Loading aws/sdk-codegen/src/main/kotlin/software/amazon/smithy/rustsdk/AwsReadmeDecorator.kt +179 −8 Original line number Diff line number Diff line Loading @@ -6,6 +6,8 @@ package software.amazon.smithy.rustsdk import org.jsoup.Jsoup import org.jsoup.nodes.Element import org.jsoup.nodes.TextNode import software.amazon.smithy.model.traits.DocumentationTrait import software.amazon.smithy.rust.codegen.rustlang.raw import software.amazon.smithy.rust.codegen.smithy.CodegenContext Loading @@ -13,6 +15,13 @@ import software.amazon.smithy.rust.codegen.smithy.RustCrate import software.amazon.smithy.rust.codegen.smithy.customize.RustCodegenDecorator import software.amazon.smithy.rust.codegen.smithy.generators.ManifestCustomizations import software.amazon.smithy.rust.codegen.util.getTrait import java.lang.StringBuilder import java.util.logging.Logger // Use a sigil that should always be unique in the text to fix line breaks and spaces // since Jsoup doesn't preserve whitespace at all. private const val LINE_BREAK_SIGIL = "[[smithy-rs-br]]" private const val SPACE_SIGIL = "[[smithy-rs-nbsp]]" /** * Generates a README.md for each service crate for display on crates.io. Loading @@ -21,16 +30,17 @@ class AwsReadmeDecorator : RustCodegenDecorator { override val name: String = "AwsReadmeDecorator" override val order: Byte = 0 private val logger: Logger = Logger.getLogger(javaClass.name) override fun crateManifestCustomizations(codegenContext: CodegenContext): ManifestCustomizations = mapOf("package" to mapOf("readme" to "README.md")) override fun extras(codegenContext: CodegenContext, rustCrate: RustCrate) { rustCrate.withFile("README.md") { writer -> // Strip HTML from the doc trait value. In the future when it's written, we can use our Rustdoc // documentation normalization code to convert this to Markdown. val description = Jsoup.parse( val description = normalizeDescription( codegenContext.moduleName, codegenContext.settings.getService(codegenContext.model).getTrait<DocumentationTrait>()?.value ?: "" ).text() ) val moduleName = codegenContext.settings.moduleName writer.raw( Loading @@ -39,9 +49,9 @@ class AwsReadmeDecorator : RustCodegenDecorator { **Please Note: The SDK is currently released as an alpha and is intended strictly for feedback purposes only. Do not use this SDK for production workloads.** $description """.trimIndent() + "\n\n$description\n\n" + """ ## Getting Started > Examples are availble for many services and operations, check out the Loading Loading @@ -78,4 +88,165 @@ class AwsReadmeDecorator : RustCodegenDecorator { ) } } /** * Strips HTML from the description and makes it human-readable Markdown. */ internal fun normalizeDescription(moduleName: String, input: String): String { val doc = Jsoup.parse(input) doc.body().apply { // The order of operations here is important: stripUndesiredNodes() // Remove `<fullname>`, blank whitespace nodes, etc normalizeInlineStyles() // Convert bold/italics tags to Markdown equivalents normalizeAnchors() // Convert anchor tags into Markdown links normalizeBreaks() // Convert `<br>` tags into newlines normalizeLists() // Convert HTML lists into Markdown lists normalizeDescriptionLists() // Converts HTML <dl> description lists into Markdown normalizeParagraphs() // Replace paragraph tags into blocks of text separated by newlines warnOnUnrecognizedElements(moduleName) // Log a warning if we missed something } return doc.body().text() .replace(LINE_BREAK_SIGIL, "\n") .replace(SPACE_SIGIL, " ") .normalizeLineWhitespace() } private fun Element.stripUndesiredNodes() { // Remove the `<fullname>` tag getElementsByTag("fullname").forEach { it.remove() } // Unwrap `<important>` tags getElementsByTag("important").forEach { it.changeInto("span") } // Remove the `<note>` tag getElementsByTag("note").forEach { if (it.children().isEmpty()) { throw IllegalStateException("<note> tag unexpectedly had children") } it.remove() } // Eliminate empty whitespace textNodes().forEach { text -> if (text.isBlank) { text.remove() } } } private fun Element.changeInto(tagName: String) { replaceWith(Element(tagName).also { elem -> elem.appendChildren(childNodesCopy()) }) } private fun Element.normalizeInlineStyles() { getElementsByTag("b").forEach { normalizeInlineStyleTag("__", it) } getElementsByTag("i").forEach { normalizeInlineStyleTag("_", it) } } private fun normalizeInlineStyleTag(surround: String, tag: Element) { tag.replaceWith( Element("span").also { span -> span.append(surround) span.appendChildren(tag.childNodesCopy()) span.append(surround) } ) } private fun Element.normalizeAnchors() { for (anchor in getElementsByTag("a")) { val text = anchor.text() val link = anchor.attr("href") anchor.replaceWith( TextNode( if (link.isNotBlank()) { "[$text]($link)" } else { text } ) ) } } private fun Element.normalizeBreaks() { getElementsByTag("br").forEach { lineBreak -> lineBreak.replaceWith(TextNode(LINE_BREAK_SIGIL)) } } private fun Element.isList(): Boolean = tagName() == "ul" || tagName() == "ol" private fun Element.normalizeLists() { (getElementsByTag("ul") + getElementsByTag("ol")) // Only operate on lists that are top-level (are not nested within other lists) .filter { list -> list.parents().none() { it.isList() } } .forEach { list -> list.normalizeList() } } private fun Element.normalizeList(indent: Int = 1) { // First, replace nested lists for (child in children().filter { it.tagName() == "li" }) { for (itemChild in child.children()) { if (itemChild.isList()) { itemChild.normalizeList(indent + 1) } } } // Then format the list items down to Markdown val result = StringBuilder(if (indent == 1) "" else LINE_BREAK_SIGIL) val prefix = if (tagName() == "ul") "- " else "1. " val indentText = SPACE_SIGIL.repeat(indent * 2) for (child in children()) { result.append("$indentText$prefix${child.text().trim()}$LINE_BREAK_SIGIL") } replaceWith(TextNode(result.toString())) } private fun Element.normalizeDescriptionLists() { getElementsByTag("dl").forEach { list -> list.normalizeDescriptionList() } } private fun Element.normalizeDescriptionList() { getElementsByTag("dt").forEach { dt -> dt.text("${LINE_BREAK_SIGIL}__${dt.text()}__$LINE_BREAK_SIGIL") dt.changeInto("span") } getElementsByTag("dd").forEach { dd -> dd.changeInto("p") } appendChild(TextNode(LINE_BREAK_SIGIL)) changeInto("span") } private fun Element.normalizeParagraphs() { getElementsByTag("p").forEach { paragraph -> paragraph.replaceWith(TextNode(LINE_BREAK_SIGIL + paragraph.text() + LINE_BREAK_SIGIL)) } } private fun Element.warnOnUnrecognizedElements(moduleName: String) { allElements .filter { elem -> // body is always present elem.tagName() != "body" && // we replace certain elements with span, so these are fine elem.tagName() != "span" } .map { elem -> elem.tagName() }.toSortedSet().joinToString(", ") .let { tags -> if (tags.isNotEmpty()) { logger.warning { "[$moduleName] Unrecognized HTML tags encountered when normalizing text: $tags" } } } } private fun String.normalizeLineWhitespace(): String = // Convert sigils back into whitespace replace(LINE_BREAK_SIGIL, "\n") .replace(SPACE_SIGIL, " ") // Replace long runs of linebreaks with just two line breaks .replace(Regex("\n\n\n+"), "\n\n") // Remove trailing whitespace from each line .replace(Regex("[ \t]+\n"), "\n") // Remove leading whitespace from each line when it's not a list item .replace(Regex("\n[ \t]+([^ \t\\-1])"), "\n$1") // Chop off leading newlines .replace(Regex("^\n+"), "") // Chop off trailing newlines .replace(Regex("\n+$"), "") } aws/sdk-codegen/src/test/kotlin/AwsReadmeDecoratorTest.kt 0 → 100644 +105 −0 Original line number Diff line number Diff line /* * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. * SPDX-License-Identifier: Apache-2.0. */ import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.api.Test import software.amazon.smithy.rustsdk.AwsReadmeDecorator class AwsReadmeDecoratorTest { @Test fun `it converts description HTML into Markdown`() { assertEquals( """ This is __some paragraph__ of information. This is _another_ paragraph of information. More information [can be found here](https://example.com). """.trimIndent(), AwsReadmeDecorator().normalizeDescription( "", """ <fullname>Some service</fullname> <p>This is <b>some paragraph</b> of information.</p> <p>This is <i>another</i> paragraph of information.</p> <p>More information <a href="https://example.com">can be found here</a>.</p> """.trimIndent() ) ) } @Test fun `it converts lists`() { assertEquals( """ Some text introducing a list: - foo bar baz - baz bar foo 1. nested item 1. another More text. """.trimIndent(), AwsReadmeDecorator().normalizeDescription( "", """ <p>Some text introducing a list: <ul> <li>foo bar baz</li> <li>baz bar foo <ol><li>nested item</li><li>another</li></ol> </li> </ul> More text.</p> """.trimIndent() ) ) } @Test fun `it converts description lists`() { assertEquals( """ Some text introducing a description list: __Something__ Some description of [something](test). __Another thing__ Some description of another thing. A second paragraph that describes another thing. __MDN says these can be wrapped in divs__ So here we are Some trailing text. """.trimIndent(), AwsReadmeDecorator().normalizeDescription( "", """ <p>Some text introducing a description list: <dl> <dt>Something</dt> <dd>Some description of <a href="test">something</a>.</dd> <dt>Another thing</dt> <dd>Some description of another thing.</dd> <dd>A second paragraph that describes another thing.</dd> <div> <dt>MDN says these can be wrapped in divs</dt> <dd>So here we are</dd> </div> </dl> Some trailing text. </p> """.trimIndent() ) ) } } Loading
aws/sdk-codegen/src/main/kotlin/software/amazon/smithy/rustsdk/AwsReadmeDecorator.kt +179 −8 Original line number Diff line number Diff line Loading @@ -6,6 +6,8 @@ package software.amazon.smithy.rustsdk import org.jsoup.Jsoup import org.jsoup.nodes.Element import org.jsoup.nodes.TextNode import software.amazon.smithy.model.traits.DocumentationTrait import software.amazon.smithy.rust.codegen.rustlang.raw import software.amazon.smithy.rust.codegen.smithy.CodegenContext Loading @@ -13,6 +15,13 @@ import software.amazon.smithy.rust.codegen.smithy.RustCrate import software.amazon.smithy.rust.codegen.smithy.customize.RustCodegenDecorator import software.amazon.smithy.rust.codegen.smithy.generators.ManifestCustomizations import software.amazon.smithy.rust.codegen.util.getTrait import java.lang.StringBuilder import java.util.logging.Logger // Use a sigil that should always be unique in the text to fix line breaks and spaces // since Jsoup doesn't preserve whitespace at all. private const val LINE_BREAK_SIGIL = "[[smithy-rs-br]]" private const val SPACE_SIGIL = "[[smithy-rs-nbsp]]" /** * Generates a README.md for each service crate for display on crates.io. Loading @@ -21,16 +30,17 @@ class AwsReadmeDecorator : RustCodegenDecorator { override val name: String = "AwsReadmeDecorator" override val order: Byte = 0 private val logger: Logger = Logger.getLogger(javaClass.name) override fun crateManifestCustomizations(codegenContext: CodegenContext): ManifestCustomizations = mapOf("package" to mapOf("readme" to "README.md")) override fun extras(codegenContext: CodegenContext, rustCrate: RustCrate) { rustCrate.withFile("README.md") { writer -> // Strip HTML from the doc trait value. In the future when it's written, we can use our Rustdoc // documentation normalization code to convert this to Markdown. val description = Jsoup.parse( val description = normalizeDescription( codegenContext.moduleName, codegenContext.settings.getService(codegenContext.model).getTrait<DocumentationTrait>()?.value ?: "" ).text() ) val moduleName = codegenContext.settings.moduleName writer.raw( Loading @@ -39,9 +49,9 @@ class AwsReadmeDecorator : RustCodegenDecorator { **Please Note: The SDK is currently released as an alpha and is intended strictly for feedback purposes only. Do not use this SDK for production workloads.** $description """.trimIndent() + "\n\n$description\n\n" + """ ## Getting Started > Examples are availble for many services and operations, check out the Loading Loading @@ -78,4 +88,165 @@ class AwsReadmeDecorator : RustCodegenDecorator { ) } } /** * Strips HTML from the description and makes it human-readable Markdown. */ internal fun normalizeDescription(moduleName: String, input: String): String { val doc = Jsoup.parse(input) doc.body().apply { // The order of operations here is important: stripUndesiredNodes() // Remove `<fullname>`, blank whitespace nodes, etc normalizeInlineStyles() // Convert bold/italics tags to Markdown equivalents normalizeAnchors() // Convert anchor tags into Markdown links normalizeBreaks() // Convert `<br>` tags into newlines normalizeLists() // Convert HTML lists into Markdown lists normalizeDescriptionLists() // Converts HTML <dl> description lists into Markdown normalizeParagraphs() // Replace paragraph tags into blocks of text separated by newlines warnOnUnrecognizedElements(moduleName) // Log a warning if we missed something } return doc.body().text() .replace(LINE_BREAK_SIGIL, "\n") .replace(SPACE_SIGIL, " ") .normalizeLineWhitespace() } private fun Element.stripUndesiredNodes() { // Remove the `<fullname>` tag getElementsByTag("fullname").forEach { it.remove() } // Unwrap `<important>` tags getElementsByTag("important").forEach { it.changeInto("span") } // Remove the `<note>` tag getElementsByTag("note").forEach { if (it.children().isEmpty()) { throw IllegalStateException("<note> tag unexpectedly had children") } it.remove() } // Eliminate empty whitespace textNodes().forEach { text -> if (text.isBlank) { text.remove() } } } private fun Element.changeInto(tagName: String) { replaceWith(Element(tagName).also { elem -> elem.appendChildren(childNodesCopy()) }) } private fun Element.normalizeInlineStyles() { getElementsByTag("b").forEach { normalizeInlineStyleTag("__", it) } getElementsByTag("i").forEach { normalizeInlineStyleTag("_", it) } } private fun normalizeInlineStyleTag(surround: String, tag: Element) { tag.replaceWith( Element("span").also { span -> span.append(surround) span.appendChildren(tag.childNodesCopy()) span.append(surround) } ) } private fun Element.normalizeAnchors() { for (anchor in getElementsByTag("a")) { val text = anchor.text() val link = anchor.attr("href") anchor.replaceWith( TextNode( if (link.isNotBlank()) { "[$text]($link)" } else { text } ) ) } } private fun Element.normalizeBreaks() { getElementsByTag("br").forEach { lineBreak -> lineBreak.replaceWith(TextNode(LINE_BREAK_SIGIL)) } } private fun Element.isList(): Boolean = tagName() == "ul" || tagName() == "ol" private fun Element.normalizeLists() { (getElementsByTag("ul") + getElementsByTag("ol")) // Only operate on lists that are top-level (are not nested within other lists) .filter { list -> list.parents().none() { it.isList() } } .forEach { list -> list.normalizeList() } } private fun Element.normalizeList(indent: Int = 1) { // First, replace nested lists for (child in children().filter { it.tagName() == "li" }) { for (itemChild in child.children()) { if (itemChild.isList()) { itemChild.normalizeList(indent + 1) } } } // Then format the list items down to Markdown val result = StringBuilder(if (indent == 1) "" else LINE_BREAK_SIGIL) val prefix = if (tagName() == "ul") "- " else "1. " val indentText = SPACE_SIGIL.repeat(indent * 2) for (child in children()) { result.append("$indentText$prefix${child.text().trim()}$LINE_BREAK_SIGIL") } replaceWith(TextNode(result.toString())) } private fun Element.normalizeDescriptionLists() { getElementsByTag("dl").forEach { list -> list.normalizeDescriptionList() } } private fun Element.normalizeDescriptionList() { getElementsByTag("dt").forEach { dt -> dt.text("${LINE_BREAK_SIGIL}__${dt.text()}__$LINE_BREAK_SIGIL") dt.changeInto("span") } getElementsByTag("dd").forEach { dd -> dd.changeInto("p") } appendChild(TextNode(LINE_BREAK_SIGIL)) changeInto("span") } private fun Element.normalizeParagraphs() { getElementsByTag("p").forEach { paragraph -> paragraph.replaceWith(TextNode(LINE_BREAK_SIGIL + paragraph.text() + LINE_BREAK_SIGIL)) } } private fun Element.warnOnUnrecognizedElements(moduleName: String) { allElements .filter { elem -> // body is always present elem.tagName() != "body" && // we replace certain elements with span, so these are fine elem.tagName() != "span" } .map { elem -> elem.tagName() }.toSortedSet().joinToString(", ") .let { tags -> if (tags.isNotEmpty()) { logger.warning { "[$moduleName] Unrecognized HTML tags encountered when normalizing text: $tags" } } } } private fun String.normalizeLineWhitespace(): String = // Convert sigils back into whitespace replace(LINE_BREAK_SIGIL, "\n") .replace(SPACE_SIGIL, " ") // Replace long runs of linebreaks with just two line breaks .replace(Regex("\n\n\n+"), "\n\n") // Remove trailing whitespace from each line .replace(Regex("[ \t]+\n"), "\n") // Remove leading whitespace from each line when it's not a list item .replace(Regex("\n[ \t]+([^ \t\\-1])"), "\n$1") // Chop off leading newlines .replace(Regex("^\n+"), "") // Chop off trailing newlines .replace(Regex("\n+$"), "") }
aws/sdk-codegen/src/test/kotlin/AwsReadmeDecoratorTest.kt 0 → 100644 +105 −0 Original line number Diff line number Diff line /* * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. * SPDX-License-Identifier: Apache-2.0. */ import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.api.Test import software.amazon.smithy.rustsdk.AwsReadmeDecorator class AwsReadmeDecoratorTest { @Test fun `it converts description HTML into Markdown`() { assertEquals( """ This is __some paragraph__ of information. This is _another_ paragraph of information. More information [can be found here](https://example.com). """.trimIndent(), AwsReadmeDecorator().normalizeDescription( "", """ <fullname>Some service</fullname> <p>This is <b>some paragraph</b> of information.</p> <p>This is <i>another</i> paragraph of information.</p> <p>More information <a href="https://example.com">can be found here</a>.</p> """.trimIndent() ) ) } @Test fun `it converts lists`() { assertEquals( """ Some text introducing a list: - foo bar baz - baz bar foo 1. nested item 1. another More text. """.trimIndent(), AwsReadmeDecorator().normalizeDescription( "", """ <p>Some text introducing a list: <ul> <li>foo bar baz</li> <li>baz bar foo <ol><li>nested item</li><li>another</li></ol> </li> </ul> More text.</p> """.trimIndent() ) ) } @Test fun `it converts description lists`() { assertEquals( """ Some text introducing a description list: __Something__ Some description of [something](test). __Another thing__ Some description of another thing. A second paragraph that describes another thing. __MDN says these can be wrapped in divs__ So here we are Some trailing text. """.trimIndent(), AwsReadmeDecorator().normalizeDescription( "", """ <p>Some text introducing a description list: <dl> <dt>Something</dt> <dd>Some description of <a href="test">something</a>.</dd> <dt>Another thing</dt> <dd>Some description of another thing.</dd> <dd>A second paragraph that describes another thing.</dd> <div> <dt>MDN says these can be wrapped in divs</dt> <dd>So here we are</dd> </div> </dl> Some trailing text. </p> """.trimIndent() ) ) } }