Escape control characters in JSON strings (#427) (1b5453f8) · Commits · Public Repositories / Smithy Rust

rust-runtime/smithy-json/src/escape.rs

+53 −29

Original line number	Diff line number	Diff line
		@@ -5,35 +5,44 @@

		use std::borrow::Cow;

		const ESCAPES: &[char] = &['"', '\\', '\u{08}', '\u{0C}', '\n', '\r', '\t'];

		/// Escapes a string for embedding in a JSON string value.
		pub fn escape_string(value: &str) -> Cow<str> {
		if !value.contains(ESCAPES) {
		return Cow::Borrowed(value);
		let bytes = value.as_bytes();
		for (index, byte) in bytes.iter().enumerate() {
		match byte {
		0..=0x1F \| b'"' \| b'\\' => {
		return Cow::Owned(escape_string_inner(&bytes[0..index], &bytes[index..]))
		}
		_ => {}
		}
		}
		Cow::Borrowed(value)
		}

		let mut escaped = String::new();
		let (mut last, end) = (0, value.len());
		for (index, chr) in value
		.char_indices()
		.filter(\|(_index, chr)\| ESCAPES.contains(chr))
		{
		escaped.push_str(&value[last..index]);
		escaped.push_str(match chr {
		'"' => "\\\"",
		'\\' => "\\\\",
		'\u{08}' => "\\b",
		'\u{0C}' => "\\f",
		'\n' => "\\n",
		'\r' => "\\r",
		'\t' => "\\t",
		_ => unreachable!(),
		});
		last = index + 1;
		fn escape_string_inner(start: &[u8], rest: &[u8]) -> String {
		let mut escaped = Vec::with_capacity(start.len() + rest.len() + 1);
		escaped.extend(start);

		for byte in rest {
		match byte {
		b'"' => escaped.extend(b"\\\""),
		b'\\' => escaped.extend(b"\\\\"),
		0x08 => escaped.extend(b"\\b"),
		0x0C => escaped.extend(b"\\f"),
		b'\n' => escaped.extend(b"\\n"),
		b'\r' => escaped.extend(b"\\r"),
		b'\t' => escaped.extend(b"\\t"),
		0..=0x1F => escaped.extend(format!("\\u{:04x}", byte).bytes()),
		_ => escaped.push(*byte),
		}
		}
		escaped.push_str(&value[last..end]);
		Cow::Owned(escaped)

		// This is safe because:
		// - The original input was valid UTF-8 since it came in as a `&str`
		// - Only single-byte code points were escaped
		// - The escape sequences are valid UTF-8
		debug_assert!(std::str::from_utf8(&escaped).is_ok());
		unsafe { String::from_utf8_unchecked(escaped) }
		}

		#[cfg(test)]
		@@ -53,16 +62,31 @@ mod test {
		escape_string("\u{08}f\u{0C}o\to\r\n").as_ref()
		);
		assert_eq!("\\\"test\\\"", escape_string("\"test\"").as_ref());
		assert_eq!("\\u0000", escape_string("\u{0}").as_ref());
		assert_eq!("\\u001f", escape_string("\u{1f}").as_ref());
		}

		use proptest::proptest;
		proptest! {
		#[test]
		fn matches_serde_json(s: String) {
		assert_eq!(
		serde_json::to_string(&s).unwrap(),
		format!(r#""{}""#, escape_string(&s))
		)
		fn matches_serde_json(s in ".*") {
		let serde_escaped = serde_json::to_string(&s).unwrap();
		let serde_escaped = &serde_escaped[1..(serde_escaped.len() - 1)];
		assert_eq!(serde_escaped,escape_string(&s))
		}
		}

		#[test]
		#[ignore] // This tests escaping of all codepoints, but can take a long time in debug builds
		fn all_codepoints() {
		for value in 0..u32::MAX {
		if let Some(chr) = char::from_u32(value) {
		let string = String::from(chr);
		let escaped = escape_string(&string);
		let serde_escaped = serde_json::to_string(&string).unwrap();
		let serde_escaped = &serde_escaped[1..(serde_escaped.len() - 1)];
		assert_eq!(&escaped, serde_escaped);
		}
		}
		}
		}