Unverified Commit edb5808e authored by Russell Cohen's avatar Russell Cohen Committed by GitHub
Browse files

Write XML Encoding Abstractions (#389)

* Write XML Encoding Abstractions

This commit creates an XML encode & escaping abstraction that allows "fearless code generation"
(invalid XML is a compile error).

* Rename TagWriter to ScopeWriter
parent 1b5d3204
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -10,3 +10,5 @@ thiserror = "1"

[dev-dependencies]
proptest = "1"
base64 = "0.13.0"
protocol-test-helpers = { path = "../protocol-test-helpers" }
+165 −0
Original line number Diff line number Diff line
/*
 * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 * SPDX-License-Identifier: Apache-2.0.
 */

//! XML Encoding module that uses Rust lifetimes to make
//! generating malformed XML a compile error

use crate::escape::escape;
use std::fmt::{self, Display, Formatter, Write};

// currently there's actually no way that encoding can fail but give it time :-)
#[derive(Debug)]
pub enum Error {}

impl Display for Error {
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
        write!(f, "Xml Encoding Error")
    }
}

/// XmlWriter Abstraction
///
/// XmlWriter (and friends) make generating an invalid XML document a type error. Nested branches
/// of the Xml document mutable borrow from the root. You cannot continue writing to the root
/// until the nested branch is dropped and dropping the nested branch writes the terminator (eg.
/// closing element).
///
/// The one exception to this rule is names—it is possible to construct an invalid Xml Name. However,
/// names are always known ahead of time and always static, so this would be obvious from the code.
///
/// Furthermore, once `const panic` stabilizes, we'll be able to make an invalid XmlName a compiler
/// error.
///
/// ## Example
/// ```rust
/// use smithy_xml::encode::XmlWriter;
/// let mut s = String::new();
/// let mut doc = XmlWriter::new(&mut s);
/// let mut start_el = doc.start_el("Root");
/// start_el.write_ns("http://example.com");
/// let mut start_tag = start_el.finish();
/// start_tag.data("hello");
/// start_tag.finish();
/// assert_eq!(s, "<Root xmlns=\"http://example.com\">hello</Root>");
/// ```
///
/// See `tests/handwritten_serializers.rs` for more usage examples.
pub struct XmlWriter<'a> {
    doc: &'a mut String,
}

impl<'a> XmlWriter<'a> {
    pub fn new(doc: &'a mut String) -> Self {
        Self { doc }
    }
}

impl<'a> XmlWriter<'a> {
    pub fn start_el<'b, 'c>(&'c mut self, tag: &'b str) -> ElWriter<'c, 'b> {
        write!(self.doc, "<{}", tag).unwrap();
        ElWriter {
            doc: self.doc,
            start: tag,
        }
    }
}

pub struct ElWriter<'a, 'b> {
    start: &'b str,
    doc: &'a mut String,
}

impl<'a, 'b> ElWriter<'a, 'b> {
    pub fn write_attribute(&mut self, key: &str, value: &str) -> &mut Self {
        write!(self.doc, " {}=\"{}\"", key, escape(value)).unwrap();
        self
    }

    pub fn write_ns(&mut self, namespace: &str) -> &mut Self {
        write!(self.doc, " xmlns=\"{}\"", escape(namespace)).unwrap();
        self
    }

    pub fn finish(self) -> ScopeWriter<'a, 'b> {
        write!(self.doc, ">").unwrap();
        ScopeWriter {
            doc: self.doc,
            start: self.start,
        }
    }
}

/// Wrap the construction of a tag pair `<a></a>`
pub struct ScopeWriter<'a, 'b> {
    doc: &'a mut String,
    start: &'b str,
}

impl Drop for ScopeWriter<'_, '_> {
    fn drop(&mut self) {
        write!(self.doc, "</{}>", self.start).unwrap();
    }
}

impl ScopeWriter<'_, '_> {
    pub fn data(&mut self, data: &str) {
        self.doc.write_str(escape(data).as_ref()).unwrap();
    }

    pub fn finish(self) {
        // drop will be called which writes the closer to the document
    }

    pub fn start_el<'b, 'c>(&'c mut self, tag: &'b str) -> ElWriter<'c, 'b> {
        write!(self.doc, "<{}", tag).unwrap();
        ElWriter {
            doc: self.doc,
            start: tag,
        }
    }
}

#[cfg(test)]
mod test {
    use crate::encode::XmlWriter;

    #[test]
    fn basic_document_encoding() {
        let mut out = String::new();
        let mut doc_writer = XmlWriter::new(&mut out);
        let mut start_el = doc_writer.start_el("Hello");
        start_el
            .write_attribute("key", "foo")
            .write_ns("http://example.com");
        let mut tag = start_el.finish();
        let mut inner = tag.start_el("inner").finish();
        inner.data("hello world!");
        inner.finish();
        let more_inner = tag.start_el("inner").finish();
        more_inner.finish();
        tag.finish();

        assert_eq!(
            out,
            "<Hello key=\"foo\" xmlns=\"http://example.com\"><inner>hello world!</inner><inner></inner></Hello>"
        );
    }

    #[test]
    fn escape_data() {
        let mut s = String::new();
        {
            let mut doc_writer = XmlWriter::new(&mut s);
            let mut start_el = doc_writer.start_el("Hello");
            start_el.write_attribute("key", "<key=\"value\">");
            let mut tag = start_el.finish();
            tag.data("\n\r&");
        }
        assert_eq!(
            s,
            r#"<Hello key="&lt;key=&quot;value&quot;&gt;">&#xA;&#xD;&amp;</Hello>"#
        )
    }
}
+78 −0
Original line number Diff line number Diff line
/*
 * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 * SPDX-License-Identifier: Apache-2.0.
 */

use std::borrow::Cow;
use std::fmt::Write;

const ESCAPES: &[char] = &[
    '&', '\'', '\"', '<', '>', '\u{00D}', '\u{00A}', '\u{0085}', '\u{2028}',
];

pub fn escape(s: &str) -> Cow<str> {
    let mut remaining = s;
    if !s.contains(ESCAPES) {
        return Cow::Borrowed(s);
    }
    let mut out = String::new();
    while let Some(idx) = remaining.find(ESCAPES) {
        out.push_str(&remaining[..idx]);
        remaining = &remaining[idx..];
        let mut idxs = remaining.char_indices();
        let (_, chr) = idxs.next().expect("must not be none");
        match chr {
            '>' => out.push_str("&gt;"),
            '<' => out.push_str("&lt;"),
            '\'' => out.push_str("&apos;"),
            '"' => out.push_str("&quot;"),
            '&' => out.push_str("&amp;"),
            // push a hex escape sequence
            other => {
                write!(&mut out, "&#x{:X};", other as u32).expect("write to string cannot fail")
            }
        };
        match idxs.next() {
            None => remaining = "",
            Some((idx, _)) => remaining = &remaining[idx..],
        }
    }
    out.push_str(remaining);
    Cow::Owned(out)
}

#[cfg(test)]
mod test {
    #[test]
    fn escape_basic() {
        let inp = "<helo>&\"'";
        assert_eq!(escape(inp), "&lt;helo&gt;&amp;&quot;&apos;");
    }

    #[test]
    fn escape_eol_encoding_sep() {
        let test_cases = vec![
            ("CiAK", "&#xA; &#xA;"),                                      // '\n \n'
            ("YQ0KIGIKIGMN", "a&#xD;&#xA; b&#xA; c&#xD;"),                // 'a\r\n b\n c\r'
            ("YQ3ChSBiwoU", "a&#xD;&#x85; b&#x85;"),                      // 'a\r\u0085 b\u0085'
            ("YQ3igKggYsKFIGPigKg=", "a&#xD;&#x2028; b&#x85; c&#x2028;"), // 'a\r\u2028 b\u0085 c\u2028'
        ];
        for (base64_encoded, expected_xml_output) in test_cases {
            let bytes = base64::decode(base64_encoded).expect("valid base64");
            let input = String::from_utf8(bytes).expect("valid utf-8");
            assert_eq!(escape(&input), expected_xml_output);
        }
    }

    use crate::escape::escape;
    use proptest::proptest;
    proptest! {
        /// Test that arbitrary strings round trip after being escaped and unescaped
        #[test]
        fn round_trip(s: String) {
            let encoded = escape(&s);
            let decoded = crate::unescape::unescape(&encoded).expect("encoded should be valid decoded");
            assert_eq!(decoded, s);
        }
    }
}
+2 −0
Original line number Diff line number Diff line
//! Abstractions for Smithy
//! [XML Binding Traits](https://awslabs.github.io/smithy/1.0/spec/core/xml-traits.html)
pub mod decode;
pub mod encode;
mod escape;
mod unescape;
+100 −0
Original line number Diff line number Diff line
/*
 * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 * SPDX-License-Identifier: Apache-2.0.
 */

use protocol_test_helpers::{validate_body, MediaType};
use smithy_xml::encode;
use smithy_xml::encode::ScopeWriter;

// @namespace http://www.example.com
struct WithNamespace {
    foo: String,
    bar: String,
}

struct Nested {
    // @xmlAttribute("a")
    a: String,
    inner: WithNamespace,
}

fn serialize_nested(nested: &Nested) -> Result<String, smithy_xml::encode::Error> {
    let mut out = String::new();
    {
        let mut writer = encode::XmlWriter::new(&mut out);
        let mut start_el = writer.start_el("Nested");
        start_el.write_attribute("a", &nested.a);
        let mut tag = start_el.finish();
        let mut inner = tag.start_el("inner").finish();
        with_namespace_inner(&mut inner, &nested.inner);
    }
    Ok(out)
}

fn serialize_with_namespace(
    with_namespace: &WithNamespace,
) -> Result<String, smithy_xml::encode::Error> {
    let mut out = String::new();
    {
        let mut writer = encode::XmlWriter::new(&mut out);
        let mut root = writer.start_el("MyStructure");
        root.write_ns("http://foo.com");
        let mut root_scope = root.finish();
        with_namespace_inner(&mut root_scope, with_namespace);
        root_scope.finish();
    }

    Ok(out)
}

fn with_namespace_inner(tag: &mut ScopeWriter, with_namespace: &WithNamespace) {
    let mut foo_scope = tag.start_el("foo").finish();
    foo_scope.data(&with_namespace.foo);
    foo_scope.finish();

    let mut bar_scope = tag.start_el("bar").finish();
    bar_scope.data(&with_namespace.bar);
    bar_scope.finish();
}

#[test]
fn test_serialize_with_namespace() {
    let inp = WithNamespace {
        foo: "FooFoo".to_string(),
        bar: "BarBar".to_string(),
    };

    validate_body(
        serialize_with_namespace(&inp).unwrap(),
        r#"<MyStructure xmlns="http://foo.com">
            <foo>FooFoo</foo>
            <bar>BarBar</bar>
        </MyStructure>"#,
        MediaType::Xml,
    )
    .expect("correct XML should be generated");
}

#[test]
fn test_serialize_nested() {
    let inp = Nested {
        a: "avalue".to_string(),
        inner: WithNamespace {
            foo: "foovalue".to_string(),
            bar: "barvalue".to_string(),
        },
    };

    validate_body(
        serialize_nested(&inp).unwrap(),
        r#"<Nested a="avalue">
            <inner>
                <foo>foovalue</foo>
                <bar>barvalue</bar>
            </inner>
        </Nested>"#,
        MediaType::Xml,
    )
    .expect("correct XML should be generated");
}