Commit 429672d3 authored by Daniel Verkamp's avatar Daniel Verkamp Committed by Jim Harris
Browse files

json: add function to write UTF-16LE strings



spdk_json_write_string_utf16le() writes a UTF-16LE string to a
JSON write context.

Change-Id: I413ffb8a3dee6e1b44ec96ce2415fd1b9c36320f
Signed-off-by: default avatarDaniel Verkamp <daniel.verkamp@intel.com>
Reviewed-on: https://review.gerrithub.io/368625


Tested-by: default avatarSPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
parent 229f6494
Loading
Loading
Loading
Loading
+21 −0
Original line number Diff line number Diff line
@@ -198,6 +198,27 @@ int spdk_json_write_int64(struct spdk_json_write_ctx *w, int64_t val);
int spdk_json_write_uint64(struct spdk_json_write_ctx *w, uint64_t val);
int spdk_json_write_string(struct spdk_json_write_ctx *w, const char *val);
int spdk_json_write_string_raw(struct spdk_json_write_ctx *w, const char *val, size_t len);

/**
 * Write null-terminated UTF-16LE string.
 *
 * \param w JSON write context.
 * \param val UTF-16LE string; must be null terminated.
 * \return 0 on success or negative on failure.
 */
int spdk_json_write_string_utf16le(struct spdk_json_write_ctx *w, const uint16_t *val);

/**
 * Write UTF-16LE string.
 *
 * \param w JSON write context.
 * \param val UTF-16LE string; may contain embedded null characters.
 * \param len Length of val in 16-bit code units (i.e. size of string in bytes divided by 2).
 * \return 0 on success or negative on failure.
 */
int spdk_json_write_string_utf16le_raw(struct spdk_json_write_ctx *w, const uint16_t *val,
				       size_t len);

int spdk_json_write_string_fmt(struct spdk_json_write_ctx *w, const char *fmt,
			       ...) __attribute__((__format__(__printf__, 2, 3)));
int spdk_json_write_array_begin(struct spdk_json_write_ctx *w);
+44 −0
Original line number Diff line number Diff line
@@ -36,6 +36,7 @@

#include "spdk/stdinc.h"

#include "spdk/endian.h"
#include "spdk/json.h"
#include "spdk/likely.h"
#include "spdk/string.h"
@@ -251,6 +252,49 @@ utf16_valid_surrogate_low(uint32_t val)
	return val >= 0xDC00 && val <= 0xDFFF;
}

/*
 * Check for a valid UTF-16LE encoding of a single codepoint.
 *
 * \return Length of valid UTF-16LE sequence in 16-bit code units, or negative if invalid.
 */
static inline int
utf16le_valid(const uint16_t *start, const uint16_t *end)
{
	const uint16_t *p = start;
	uint16_t high, low;

	if (p == end) {
		return 0;
	}

	high = from_le16(p);

	if (high <= 0xD7FF || high >= 0xE000) {
		/* Single code unit in BMP */
		return 1;
	}

	if (high >= 0xDC00) {
		/* Low surrogate in first code unit - invalid */
		return -1;
	}

	assert(utf16_valid_surrogate_high(high));

	if (++p == end) {
		/* Not enough code units left */
		return -1;
	}
	low = from_le16(p);

	if (!utf16_valid_surrogate_low(low)) {
		return -1;
	}

	/* Valid surrogate pair */
	return 2;
}

static inline uint32_t
utf16_decode_surrogate_pair(uint32_t high, uint32_t low)
{
+91 −33
Original line number Diff line number Diff line
@@ -275,11 +275,9 @@ write_hex_4(void *dest, uint16_t val)
	p[3] = hex[val & 0xF];
}

static int
write_string_or_name(struct spdk_json_write_ctx *w, const char *val, size_t len)
static inline int
write_codepoint(struct spdk_json_write_ctx *w, uint32_t codepoint)
{
	const uint8_t *p = val;
	const uint8_t *end = val + len;
	static const uint8_t escapes[] = {
		['\b'] = 'b',
		['\f'] = 'f',
@@ -293,34 +291,10 @@ write_string_or_name(struct spdk_json_write_ctx *w, const char *val, size_t len)
		 *  (it is valid unescaped).
		 */
	};

	if (emit(w, "\"", 1)) return fail(w);

	while (p != end) {
		int codepoint_len;
		uint32_t codepoint;
	uint16_t high, low;
	char out[13];
	size_t out_len;

		codepoint_len = utf8_valid(p, end);
		switch (codepoint_len) {
		case 1:
			codepoint = utf8_decode_unsafe_1(p);
			break;
		case 2:
			codepoint = utf8_decode_unsafe_2(p);
			break;
		case 3:
			codepoint = utf8_decode_unsafe_3(p);
			break;
		case 4:
			codepoint = utf8_decode_unsafe_4(p);
			break;
		default:
			return fail(w);
		}

	if (codepoint < sizeof(escapes) && escapes[codepoint]) {
		out[0] = '\\';
		out[1] = escapes[codepoint];
@@ -348,7 +322,71 @@ write_string_or_name(struct spdk_json_write_ctx *w, const char *val, size_t len)
		out_len = 12;
	}

		if (emit(w, out, out_len)) return fail(w);
	return emit(w, out, out_len);
}

static int
write_string_or_name(struct spdk_json_write_ctx *w, const char *val, size_t len)
{
	const uint8_t *p = val;
	const uint8_t *end = val + len;

	if (emit(w, "\"", 1)) return fail(w);

	while (p != end) {
		int codepoint_len;
		uint32_t codepoint;

		codepoint_len = utf8_valid(p, end);
		switch (codepoint_len) {
		case 1:
			codepoint = utf8_decode_unsafe_1(p);
			break;
		case 2:
			codepoint = utf8_decode_unsafe_2(p);
			break;
		case 3:
			codepoint = utf8_decode_unsafe_3(p);
			break;
		case 4:
			codepoint = utf8_decode_unsafe_4(p);
			break;
		default:
			return fail(w);
		}

		if (write_codepoint(w, codepoint)) return fail(w);
		p += codepoint_len;
	}

	return emit(w, "\"", 1);
}

static int
write_string_or_name_utf16le(struct spdk_json_write_ctx *w, const uint16_t *val, size_t len)
{
	const uint16_t *p = val;
	const uint16_t *end = val + len;

	if (emit(w, "\"", 1)) return fail(w);

	while (p != end) {
		int codepoint_len;
		uint32_t codepoint;

		codepoint_len = utf16le_valid(p, end);
		switch (codepoint_len) {
		case 1:
			codepoint = from_le16(&p[0]);
			break;
		case 2:
			codepoint = utf16_decode_surrogate_pair(from_le16(&p[0]), from_le16(&p[1]));
			break;
		default:
			return fail(w);
		}

		if (write_codepoint(w, codepoint)) return fail(w);
		p += codepoint_len;
	}

@@ -368,6 +406,26 @@ spdk_json_write_string(struct spdk_json_write_ctx *w, const char *val)
	return spdk_json_write_string_raw(w, val, strlen(val));
}

int
spdk_json_write_string_utf16le_raw(struct spdk_json_write_ctx *w, const uint16_t *val, size_t len)
{
	if (begin_value(w)) return fail(w);
	return write_string_or_name_utf16le(w, val, len);
}

int
spdk_json_write_string_utf16le(struct spdk_json_write_ctx *w, const uint16_t *val)
{
	const uint16_t *p;
	size_t len;

	for (len = 0, p = val; *p; p++) {
		len++;
	}

	return spdk_json_write_string_utf16le_raw(w, val, len);
}

int
spdk_json_write_string_fmt(struct spdk_json_write_ctx *w, const char *fmt, ...)
{
+44 −0
Original line number Diff line number Diff line
@@ -86,6 +86,18 @@ write_cb(void *cb_ctx, const void *data, size_t size)
#define STR_FAIL(in) \
	BEGIN(); VAL_STRING_FAIL(in); END_FAIL()

#define VAL_STRING_UTF16LE(str) \
	CU_ASSERT(spdk_json_write_string_utf16le_raw(w, (const uint16_t *)str, sizeof(str) / sizeof(uint16_t) - 1) == 0)

#define VAL_STRING_UTF16LE_FAIL(str) \
	CU_ASSERT(spdk_json_write_string_utf16le_raw(w, (const uint16_t *)str, sizeof(str) / sizeof(uint16_t) - 1) < 0)

#define STR_UTF16LE_PASS(in, out) \
	BEGIN(); VAL_STRING_UTF16LE(in); END("\"" out "\"")

#define STR_UTF16LE_FAIL(in) \
	BEGIN(); VAL_STRING_UTF16LE_FAIL(in); END_FAIL()

#define VAL_NAME(name) \
	CU_ASSERT(spdk_json_write_name_raw(w, name, sizeof(name) - 1) == 0)

@@ -248,6 +260,37 @@ test_write_string_escapes(void)
	STR_FAIL("\xED\xA1\x8C\xED\xBE\xB4"); /* U+233B4 (invalid surrogate pair encoding) */
}

static void
test_write_string_utf16le(void)
{
	struct spdk_json_write_ctx *w;

	/* All characters in BMP */
	STR_UTF16LE_PASS(((uint8_t[]) {
		'H', 0, 'e', 0, 'l', 0, 'l', 0, 'o', 0, 0x15, 0xFE, 0, 0
	}), "Hello\\uFE15");

	/* Surrogate pair */
	STR_UTF16LE_PASS(((uint8_t[]) {
		'H', 0, 'i', 0,  0x34, 0xD8, 0x1E, 0xDD, '!', 0, 0, 0
	}), "Hi\\uD834\\uDD1E!");

	/* Valid high surrogate, but no low surrogate */
	STR_UTF16LE_FAIL(((uint8_t[]) {
		0x00, 0xD8, 0, 0 /* U+D800 */
	}));

	/* Invalid leading low surrogate */
	STR_UTF16LE_FAIL(((uint8_t[]) {
		0x00, 0xDC, 0x00, 0xDC, 0, 0 /* U+DC00 U+DC00 */
	}));

	/* Valid high surrogate followed by another high surrogate (invalid) */
	STR_UTF16LE_FAIL(((uint8_t[]) {
		0x00, 0xD8, 0x00, 0xD8, 0, 0 /* U+D800 U+D800 */
	}));
}

static void
test_write_number_int32(void)
{
@@ -618,6 +661,7 @@ int main(int argc, char **argv)
		CU_add_test(suite, "write_literal", test_write_literal) == NULL ||
		CU_add_test(suite, "write_string_simple", test_write_string_simple) == NULL ||
		CU_add_test(suite, "write_string_escapes", test_write_string_escapes) == NULL ||
		CU_add_test(suite, "write_string_utf16le", test_write_string_utf16le) == NULL ||
		CU_add_test(suite, "write_number_int32", test_write_number_int32) == NULL ||
		CU_add_test(suite, "write_number_uint32", test_write_number_uint32) == NULL ||
		CU_add_test(suite, "write_array", test_write_array) == NULL ||