Implement XML normalization for protocol-test-helpers (#381) (1b5d3204) · Commits · Public Repositories / Smithy Rust

rust-runtime/protocol-test-helpers/Cargo.toml

+3 −0

Original line number	Diff line number	Diff line
		@@ -12,3 +12,6 @@ thiserror = "1"
		serde_json = "1"
		# Not perfect for our needs, but good for now
		assert-json-diff = "1"

		pretty_assertions = "0.7.2"
		roxmltree = "0.14.1"

rust-runtime/protocol-test-helpers/src/lib.rs

+58 −10

Original line number	Diff line number	Diff line
		@@ -3,8 +3,12 @@
		* SPDX-License-Identifier: Apache-2.0.
		*/

		mod xml;

		use crate::xml::try_xml_equivalent;
		use assert_json_diff::assert_json_eq_no_panic;
		use http::{Request, Uri};
		use pretty_assertions::Comparison;
		use std::collections::HashSet;
		use thiserror::Error;

		@@ -30,10 +34,11 @@ pub enum ProtocolTestFailure {
		MissingHeader { expected: String },
		#[error("Header `{forbidden}` was forbidden but found: `{found}`")]
		ForbiddenHeader { forbidden: String, found: String },
		#[error("body did not match. Hint:\n{hint}. Expected:\n `{expected}`\n Actual: \n`{found}")]
		#[error("body did not match. {comparison:?} \n == hint:\n{hint}.")]
		BodyDidNotMatch {
		expected: String,
		found: String,
		// the comparison includes colorized escapes. PrettyString ensures that even during
		// debug printing, these appear
		comparison: PrettyString,
		hint: String,
		},
		#[error("Expected body to be valid {expected} but instead: {found}")]
		@@ -211,8 +216,9 @@ pub fn require_headers<B>(
		pub enum MediaType {
		/// Json media types are deserialized and compared
		Json,
		/// XML media types are normalized and compared
		Xml,
		/// Other media types are compared literally
		// TODO: XML, etc.
		Other(String),
		}

		@@ -220,6 +226,7 @@ impl<T: AsRef<str>> From<T> for MediaType {
		fn from(inp: T) -> Self {
		match inp.as_ref() {
		"application/json" => MediaType::Json,
		"application/xml" => MediaType::Xml,
		other => MediaType::Other(other.to_string()),
		}
		}
		@@ -232,16 +239,20 @@ pub fn validate_body<T: AsRef<[u8]>>(
		) -> Result<(), ProtocolTestFailure> {
		let body_str = std::str::from_utf8(actual_body.as_ref());
		match (media_type, body_str) {
		(MediaType::Json, Ok(actual_body)) => validate_json_body(actual_body, expected_body),
		(MediaType::Json, Ok(actual_body)) => try_json_eq(actual_body, expected_body),
		(MediaType::Xml, Ok(actual_body)) => try_xml_equivalent(actual_body, expected_body),
		(MediaType::Json, Err(_)) => Err(ProtocolTestFailure::InvalidBodyFormat {
		expected: "json".to_owned(),
		found: "input was not valid UTF-8".to_owned(),
		}),
		(MediaType::Xml, Err(_)) => Err(ProtocolTestFailure::InvalidBodyFormat {
		expected: "XML".to_owned(),
		found: "input was not valid UTF-8".to_owned(),
		}),
		(MediaType::Other(media_type), Ok(actual_body)) => {
		if actual_body != expected_body {
		Err(ProtocolTestFailure::BodyDidNotMatch {
		expected: expected_body.to_string(),
		found: actual_body.to_string(),
		comparison: pretty_comparison(actual_body, expected_body),
		hint: format!("media type: {}", media_type),
		})
		} else {
		@@ -256,7 +267,31 @@ pub fn validate_body<T: AsRef<[u8]>>(
		}
		}

		fn validate_json_body(actual: &str, expected: &str) -> Result<(), ProtocolTestFailure> {
		use std::fmt::{self, Debug};
		#[derive(Eq, PartialEq)]
		struct PrettyStr<'a>(&'a str);
		impl Debug for PrettyStr<'_> {
		fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
		f.write_str(self.0)
		}
		}

		#[derive(Eq, PartialEq)]
		pub struct PrettyString(String);
		impl Debug for PrettyString {
		fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
		f.write_str(&self.0)
		}
		}

		fn pretty_comparison(left: &str, right: &str) -> PrettyString {
		PrettyString(format!(
		"{}",
		Comparison::new(&PrettyStr(left), &PrettyStr(right))
		))
		}

		fn try_json_eq(actual: &str, expected: &str) -> Result<(), ProtocolTestFailure> {
		let actual_json: serde_json::Value =
		serde_json::from_str(actual).map_err(\|e\| ProtocolTestFailure::InvalidBodyFormat {
		expected: "json".to_owned(),
		@@ -267,8 +302,7 @@ fn validate_json_body(actual: &str, expected: &str) -> Result<(), ProtocolTestFa
		match assert_json_eq_no_panic(&actual_json, &expected_json) {
		Ok(()) => Ok(()),
		Err(message) => Err(ProtocolTestFailure::BodyDidNotMatch {
		expected: expected.to_string(),
		found: actual.to_string(),
		comparison: pretty_comparison(actual, expected),
		hint: message,
		}),
		}
		@@ -400,6 +434,20 @@ mod tests {
		validate_body(&actual, expected, MediaType::Json).expect_err("bodies do not match");
		}

		#[test]
		fn test_validate_xml_body() {
		let expected = r#"<a>
		hello123
		</a>"#;
		let actual = "<a>hello123</a>";
		validate_body(&actual, expected, MediaType::Xml).expect("inputs match as XML");
		let expected = r#"<a>
		hello123
		</a>"#;
		let actual = "<a>hello124</a>";
		validate_body(&actual, expected, MediaType::Xml).expect_err("inputs are different");
		}

		#[test]
		fn test_validate_non_json_body() {
		let expected = r#"asdf"#;

rust-runtime/protocol-test-helpers/src/xml.rs

0 → 100644

+308 −0

Original line number	Diff line number	Diff line
		/*
		* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
		* SPDX-License-Identifier: Apache-2.0.
		*/

		use crate::{pretty_comparison, ProtocolTestFailure};
		use roxmltree::{Node, NodeType};
		use std::fmt::Write;

		/// Assert that two XML documents are equivalent
		///
		/// This will normalize documents and attempts to determine if it is OK to sort members or not by
		/// using a heuristic to determine if the tag represents a list (which should not be reordered)
		pub fn try_xml_equivalent(d1: &str, d2: &str) -> Result<(), ProtocolTestFailure> {
		let norm_1 = normalize_xml(d1).map_err(\|e\| ProtocolTestFailure::InvalidBodyFormat {
		expected: "left document to be valid XML".to_string(),
		found: format!("{}", e),
		})?;
		let norm_2 = normalize_xml(d2).map_err(\|e\| ProtocolTestFailure::InvalidBodyFormat {
		expected: "right document to be valid XML".to_string(),
		found: format!("{}", e),
		})?;
		if norm_1 == norm_2 {
		Ok(())
		} else {
		Err(ProtocolTestFailure::BodyDidNotMatch {
		comparison: pretty_comparison(&norm_1, &norm_2),
		hint: "".to_string(),
		})
		}
		}

		/// Normalizes XML for comparison during Smithy Protocol tests
		///
		/// This will normalize documents and attempts to determine if it is OK to sort members or not by
		/// using a heuristic to determine if the tag represents a list (which should not be reordered)
		pub fn normalize_xml(s: &str) -> Result<String, roxmltree::Error> {
		let rotree = roxmltree::Document::parse(s)?;
		let root = rotree.root().first_child().unwrap();
		Ok(unparse_tag(root, 1))
		}

		/// Unparse a "tag" (a subtree) of an XML document
		///
		/// This function will first convert each of the tag's children into a normalized string
		/// then, assuming the node does not represent a list, it will simply lexicographically sort the fully
		/// rendered nodes themselves (avoiding the need to sort on keys then values then attributes, etc.).
		///
		/// This is not a fast algorithm ;-), but the test data it's running on is not large.
		fn unparse_tag(tag: Node, depth: usize) -> String {
		let mut out = String::new();
		out.push_str(&unparse_start_element(tag));
		let mut child_nodes = tag
		.children()
		// flat_map over empty/ignored nodes
		.flat_map(\|node\| unparse_node(node, depth + 1))
		.collect::<Vec<_>>();
		if !is_list(tag) {
		child_nodes.sort();
		}
		for node in child_nodes {
		out.push('\n');
		for _ in 0..depth {
		out.push_str(" ");
		}
		out.push_str(&node)
		}
		out.push('\n');
		for _ in 0..depth - 1 {
		out.push_str(" ");
		}
		write!(&mut out, "</{}>", tag.tag_name().name()).unwrap();
		out
		}

		/// Convert a node into text recursively
		///
		/// If the node is a start element, it will recursively convert all of its children
		/// If the node is text, it will return the text, stripped of whitespace
		/// If the node is neither, it is ignored
		fn unparse_node(n: Node, depth: usize) -> Option<String> {
		match n.node_type() {
		NodeType::Element => Some(unparse_tag(n, depth)),
		NodeType::Text => {
		let o = n.text().map(\|t\| t.trim().to_string())?;
		if o.is_empty() {
		None
		} else {
		Some(o)
		}
		}
		_ => None,
		}
		}

		/// Convert a node back into a string. Attributes are sorted by key, value, and namespace
		///
		/// Produces output like: `<a key="foo">`
		fn unparse_start_element(n: Node) -> String {
		let mut out = String::new();
		out.push('<');
		out.push_str(n.tag_name().name());
		if let Some(ns) = n.tag_name().namespace() {
		write!(&mut out, " xmlns=\"{}\"", ns).unwrap();
		}
		let mut attributes: Vec<_> = n.attributes().iter().collect();
		attributes.sort_by_key(\|attrib\| (attrib.name(), attrib.value(), attrib.namespace()));
		for attribute in attributes {
		write!(&mut out, " ").unwrap();
		if let Some(ns) = attribute.namespace() {
		write!(&mut out, "{}:", ns).unwrap();
		}
		write!(&mut out, "{}=\"{}\"", attribute.name(), attribute.value()).unwrap();
		}

		out.push('>');
		out
		}

		fn is_list(node: Node) -> bool {
		// a flat list looks like:
		// <Foo>
		// <flat>example1</flat>
		// <flat>example2</flat>
		// <flat>example3</flat>
		// </Foo>

		// a regular list looks like:
		// <values>
		// <Item>example1</Item>
		// <Item>example2</Item>
		// <Item>example3</Item>
		// </values>

		if !node.has_children() {
		return false;
		}

		// in both of these cases, we don't want to reorder because list ordering is actually important
		let all_children_elements =
		non_empty_children(node).all(\|child\| child.node_type() == NodeType::Element);
		let first_child = non_empty_children(node)
		.next()
		.expect("we know one child exists");
		let all_same_name =
		non_empty_children(node).all(\|child\| child.tag_name() == first_child.tag_name());
		let all_have_one_child =
		non_empty_children(node).all(\|child\| non_empty_children(child).count() == 1);
		all_children_elements && all_same_name && all_have_one_child
		}

		/// Children of `node` that are not whitespace text nodes
		fn non_empty_children<'a, 'input: 'a>(
		node: Node<'a, 'input>,
		) -> impl Iterator<Item = Node<'a, 'input>> {
		node.children()
		.filter(\|c\| !c.is_text() \|\| !c.text().unwrap().trim().is_empty())
		}

		#[cfg(test)]
		mod test {
		use crate::xml::{is_list, normalize_xml, try_xml_equivalent};
		use pretty_assertions::{assert_eq, assert_ne};
		use std::error::Error;

		#[test]
		fn normalize_field_order() -> Result<(), Box<dyn Error>> {
		let d1 = r#"<SimpleScalarPropertiesInputOutput xmlns="https://example.com" test="test" a="a">
		<stringValue>string</stringValue>
		<trueBooleanValue>true</trueBooleanValue>
		<falseBooleanValue>false</falseBooleanValue>
		<Nested xmlns:xsi="https://example.com" xsi:someName="nestedAttrValue"><a></a></Nested>
		</SimpleScalarPropertiesInputOutput>"#;
		let d2 = r#"<SimpleScalarPropertiesInputOutput xmlns="https://example.com" test="test" a="a">
		<stringValue>string</stringValue>
		<falseBooleanValue>false</falseBooleanValue>
		<trueBooleanValue>true</trueBooleanValue>
		<Nested xmlns:xsi="https://example.com" xsi:someName="nestedAttrValue"><a></a></Nested>
		</SimpleScalarPropertiesInputOutput>"#;
		// sanity check ;-)
		assert_ne!(d1, d2);
		assert_eq!(normalize_xml(d1).unwrap(), normalize_xml(d2).unwrap());
		Ok(())
		}

		#[test]
		fn detect_lists() {
		let d1 = r#"<values>
		<Item>example1</Item>
		<Item>example2</Item>
		<Item>example3</Item>
		</values>"#;

		let rotree = roxmltree::Document::parse(d1).unwrap();
		let root = rotree.root().first_child().unwrap();
		assert!(is_list(root));
		}

		#[test]
		fn dont_reorder_lists() {
		let d1 = r#"<Foo>
		<values>
		<Item>example1</Item>
		<Item>example2</Item>
		<Item>example3</Item>
		</values>
		</Foo>"#;
		let d2 = r#"<Foo>
		<values>
		<Item>example1</Item>
		<Item>example3</Item>
		<Item>example2</Item>
		</values>
		</Foo>"#;
		try_xml_equivalent(d1, d2).expect_err("lists are out of order");
		}

		#[test]
		fn reorder_wrapped_maps() {
		let d1 = r#"<Foo>
		<values>
		<entry>
		<key>example-key1</key>
		<value>example1</value>
		</entry>
		<entry>
		<key>example-key2</key>
		<value>example2</value>
		</entry>
		</values>
		</Foo>"#;
		let d2 = r#"<Foo>
		<values>
		<entry>
		<key>example-key2</key>
		<value>example2</value>
		</entry>
		<entry>
		<key>example-key1</key>
		<value>example1</value>
		</entry>
		</values>
		</Foo>"#;
		assert_eq!(normalize_xml(d1).unwrap(), normalize_xml(d2).unwrap());
		}

		#[test]
		fn reorder_flat_maps() {
		let d1 = r#"
		<Bar>
		<flatMap>
		<key>example-key1</key>
		<value>example1</value>
		</flatMap>
		<flatMap>
		<key>example-key2</key>
		<value>example2</value>
		</flatMap>
		<flatMap>
		<key>example-key3</key>
		<value>example3</value>
		</flatMap>
		</Bar>"#;
		let d2 = r#"
		<Bar>
		<flatMap>
		<value>example1</value>
		<key>example-key1</key>
		</flatMap>
		<flatMap>
		<key>example-key3</key>
		<value>example3</value>
		</flatMap>
		<flatMap>
		<value>example2</value>
		<key>example-key2</key>
		</flatMap>
		</Bar>"#;
		try_xml_equivalent(d1, d2).expect("XML is equivalent except for reordering");
		}

		#[test]
		fn normalize_self_closing_elements() {
		try_xml_equivalent("<a/>", "<a></a>").expect("xml was equivalent");
		}

		#[test]
		fn different_attributes_are_different() {
		let d1 = r#"<XmlAttributesInputOutput test="test">
		<foo>hi</foo>
		</XmlAttributesInputOutput>"#;
		let d2 = r#"<XmlAttributesInputOutput test="other">
		<foo>hi</foo>
		</XmlAttributesInputOutput>"#;
		try_xml_equivalent(d1, d2).expect_err("differing attributes");
		}

		#[test]
		fn nested_namespaces() {
		let d1 = r#"<root xmlns="https://example.com/foo">
		<Nested xmlns:xsi="https://example2.com" xsi:someName="nestedAttrValue"></Nested>
		</root>"#;
		let d2 = r#"<root xmlns="https://example.com/foo">
		<Nested xmlns:xsi="https://example3.com" xsi:someName="nestedAttrValue"></Nested>
		</root>"#;
		try_xml_equivalent(d1, d2).expect_err("namespaces differ");
		}
		}