Update `toSnakeCase` to better handle plurals, version numbers, and other... (e61fb6f0) · Commits · Public Repositories / Smithy Rust

CHANGELOG.next.toml

+6 −0

Original line number	Diff line number	Diff line
		@@ -297,3 +297,9 @@ message = "The `customize()` method is now sync and infallible. Remove any `awai
		references = ["smithy-rs#3039"]
		meta = { "breaking" = true, "tada" = false, "bug" = false, "target" = "client" }
		author = "jdisanti"

		[[smithy-rs]]
		message = "Our algorithm for converting identifiers to `snake_case` has been updated. This may result in a small change for some identifiers, particularly acronyms ending in `s`, e.g. `ACLs`."
		references = ["smithy-rs#3037", "aws-sdk-rust#756"]
		meta = { "breaking" = true, "tada" = false, "bug" = true, "target" = "all" }
		author = "rcoh"

aws/sdk-codegen/src/main/kotlin/software/amazon/smithy/rustsdk/AwsDocs.kt

+2 −3

Original line number	Diff line number	Diff line
		@@ -12,7 +12,6 @@ import software.amazon.smithy.rust.codegen.core.rustlang.docsTemplate
		import software.amazon.smithy.rust.codegen.core.rustlang.rustTemplate
		import software.amazon.smithy.rust.codegen.core.rustlang.writable
		import software.amazon.smithy.rust.codegen.core.smithy.customize.writeCustomizationsOrElse
		import software.amazon.smithy.rust.codegen.core.util.toSnakeCase

		object AwsDocs {
		/**
		@@ -27,7 +26,7 @@ object AwsDocs {
		).contains(codegenContext.serviceShape.id)

		fun constructClient(codegenContext: ClientCodegenContext, indent: String): Writable {
		val crateName = codegenContext.moduleName.toSnakeCase()
		val crateName = codegenContext.moduleUseName()
		return writable {
		writeCustomizationsOrElse(
		codegenContext.rootDecorator.extraSections(codegenContext),
		@@ -48,7 +47,7 @@ object AwsDocs {

		fun clientConstructionDocs(codegenContext: ClientCodegenContext): Writable = {
		if (canRelyOnAwsConfig(codegenContext)) {
		val crateName = codegenContext.moduleName.toSnakeCase()
		val crateName = codegenContext.moduleUseName()
		docsTemplate(
		"""
		#### Constructing a `Client`

codegen-core/src/main/kotlin/software/amazon/smithy/rust/codegen/core/util/Strings.kt

+101 −5

Original line number	Diff line number	Diff line
		@@ -8,7 +8,8 @@ package software.amazon.smithy.rust.codegen.core.util
		import software.amazon.smithy.utils.CaseUtils
		import software.amazon.smithy.utils.StringUtils

		fun String.doubleQuote(): String = StringUtils.escapeJavaString(this, "").replace(Regex("""\\u([0-9a-f]{4})""")) { matchResult: MatchResult ->
		fun String.doubleQuote(): String =
		StringUtils.escapeJavaString(this, "").replace(Regex("""\\u([0-9a-f]{4})""")) { matchResult: MatchResult ->
		"\\u{" + matchResult.groupValues[1] + "}" as CharSequence
		}

		@@ -17,11 +18,106 @@ fun String.doubleQuote(): String = StringUtils.escapeJavaString(this, "").replac
		*/
		fun String.dq(): String = this.doubleQuote()

		// String extensions
		private fun String.splitOnWordBoundaries(): List<String> {
		val out = mutableListOf<String>()
		// These are whole words but cased differently, e.g. `IPv4`, `MiB`, `GiB`, `TtL`
		val completeWords = listOf("ipv4", "ipv6", "sigv4", "mib", "gib", "kib", "ttl")
		var currentWord = ""

		// emit the current word and update from the next character
		val emit = { next: Char ->
		if (currentWord.isNotEmpty()) {
		out += currentWord.lowercase()
		}
		currentWord = if (next.isLetterOrDigit()) {
		next.toString()
		} else {
		""
		}
		}
		val allLowerCase = this.lowercase() == this
		this.forEachIndexed { index, nextCharacter ->
		val peek = this.getOrNull(index + 1)
		val doublePeek = this.getOrNull(index + 2)
		val completeWordInProgress = completeWords.any {
		(currentWord + this.substring(index)).lowercase().startsWith(
		it,
		)
		} && !completeWords.contains(currentWord.lowercase())
		when {
		// [C] in these docs indicates the value of nextCharacter
		// A[_]B
		!nextCharacter.isLetterOrDigit() -> emit(nextCharacter)

		// If we have no letters so far, push the next letter (we already know it's a letter or digit)
		currentWord.isEmpty() -> currentWord += nextCharacter.toString()

		// Abc[D]ef or Ab2[D]ef
		!completeWordInProgress && loweredFollowedByUpper(currentWord, nextCharacter) -> emit(nextCharacter)

		// s3[k]ey
		!completeWordInProgress && allLowerCase && digitFollowedByLower(currentWord, nextCharacter) -> emit(
		nextCharacter,
		)

		// DB[P]roxy, or `IAM[U]ser` but not AC[L]s
		endOfAcronym(currentWord, nextCharacter, peek, doublePeek) -> emit(nextCharacter)

		// If we haven't found a word boundary, push it and keep going
		else -> currentWord += nextCharacter.toString()
		}
		}
		if (currentWord.isNotEmpty()) {
		out += currentWord
		}
		return out
		}

		/**
		* Handle cases like `DB[P]roxy`, `ARN[S]upport`, `AC[L]s`
		*/
		private fun endOfAcronym(current: String, nextChar: Char, peek: Char?, doublePeek: Char?): Boolean {
		if (!current.last().isUpperCase()) {
		// Not an acronym in progress
		return false
		}
		if (!nextChar.isUpperCase()) {
		// We aren't at the next word yet
		return false
		}

		if (peek?.isLowerCase() != true) {
		return false
		}

		// Skip cases like `AR[N]s`, `AC[L]s` but not `IAM[U]ser`
		if (peek == 's' && (doublePeek == null \|\| !doublePeek.isLowerCase())) {
		return false
		}

		// Skip cases like `DynamoD[B]v2`
		if (peek == 'v' && doublePeek?.isDigit() == true) {
		return false
		}
		return true
		}

		private fun loweredFollowedByUpper(current: String, nextChar: Char): Boolean {
		if (!nextChar.isUpperCase()) {
		return false
		}
		return current.last().isLowerCase() \|\| current.last().isDigit()
		}

		private fun digitFollowedByLower(current: String, nextChar: Char): Boolean {
		return (current.last().isDigit() && nextChar.isLowerCase())
		}

		fun String.toSnakeCase(): String {
		return CaseUtils.toSnakeCase(this)
		return this.splitOnWordBoundaries().joinToString("_") { it.lowercase() }
		}

		fun String.toPascalCase(): String {
		// TODO(https://github.com/awslabs/smithy-rs/issues/3047): consider using our updated toSnakeCase (but need to audit diff)
		return CaseUtils.toSnakeCase(this).let { CaseUtils.toPascalCase(it) }
		}

codegen-core/src/test/kotlin/software/amazon/smithy/rust/codegen/core/util/StringsTest.kt

+81 −0

Original line number	Diff line number	Diff line
		@@ -7,6 +7,14 @@ package software.amazon.smithy.rust.codegen.core.util

		import io.kotest.matchers.shouldBe
		import org.junit.jupiter.api.Test
		import org.junit.jupiter.api.extension.ExtensionContext
		import org.junit.jupiter.api.fail
		import org.junit.jupiter.params.ParameterizedTest
		import org.junit.jupiter.params.provider.Arguments
		import org.junit.jupiter.params.provider.ArgumentsProvider
		import org.junit.jupiter.params.provider.ArgumentsSource
		import java.io.File
		import java.util.stream.Stream

		internal class StringsTest {

		@@ -18,4 +26,77 @@ internal class StringsTest {
		"{\"nested\": \"{\\\"nested\\\": 5}\"}\"}"
		""".trimIndent().trim()
		}

		@Test
		fun correctlyConvertToSnakeCase() {
		"NotificationARNs".toSnakeCase() shouldBe "notification_arns"
		}

		@Test
		fun testAllNames() {
		// Set this to true to write a new test expectation file
		val publishUpdate = false
		val allNames = this::class.java.getResource("/testOutput.txt")?.readText()!!
		val errors = mutableListOf<String>()
		val output = StringBuilder()
		allNames.lines().filter { it.isNotBlank() }.forEach {
		val split = it.split(',')
		val input = split[0]
		val expectation = split[1]
		val actual = input.toSnakeCase()
		if (input.toSnakeCase() != expectation) {
		errors += "$it => $actual (expected $expectation)"
		}
		output.appendLine("$input,$actual")
		}
		if (publishUpdate) {
		File("testOutput.txt").writeText(output.toString())
		}
		if (errors.isNotEmpty()) {
		fail(errors.joinToString("\n"))
		}
		}

		@ParameterizedTest
		@ArgumentsSource(TestCasesProvider::class)
		fun testSnakeCase(input: String, output: String) {
		input.toSnakeCase() shouldBe output
		}
		}

		class TestCasesProvider : ArgumentsProvider {
		override fun provideArguments(context: ExtensionContext?): Stream<out Arguments> =
		listOf(
		"ACLs" to "acls",
		"ACLsUpdateStatus" to "acls_update_status",
		"AllowedAllVPCs" to "allowed_all_vpcs",
		"BluePrimaryX" to "blue_primary_x",
		"CIDRs" to "cidrs",
		"AuthTtL" to "auth_ttl",
		"CNAMEPrefix" to "cname_prefix",
		"S3Location" to "s3_location",
		"signatureS" to "signature_s",
		"signatureR" to "signature_r",
		"M3u8Settings" to "m3u8_settings",
		"IAMUser" to "iam_user",
		"OtaaV1_0_x" to "otaa_v1_0_x",
		"DynamoDBv2Action" to "dynamo_dbv2_action",
		"SessionKeyEmv2000" to "session_key_emv2000",
		"SupportsClassB" to "supports_class_b",
		"UnassignIpv6AddressesRequest" to "unassign_ipv6_addresses_request",
		"TotalGpuMemoryInMiB" to "total_gpu_memory_in_mib",
		"WriteIOs" to "write_ios",
		"dynamoDBv2" to "dynamo_dbv2",
		"ipv4Address" to "ipv4_address",
		"sigv4" to "sigv4",
		"s3key" to "s3_key",
		"sha256sum" to "sha256_sum",
		"Av1QvbrSettings" to "av1_qvbr_settings",
		"Av1Settings" to "av1_settings",
		"AwsElbv2LoadBalancer" to "aws_elbv2_load_balancer",
		"SigV4Authorization" to "sigv4_authorization",
		"IpV6Address" to "ipv6_address",
		"IpV6Cidr" to "ipv6_cidr",
		"IpV4Addresses" to "ipv4_addresses",
		).map { Arguments.of(it.first, it.second) }.stream()
		}

codegen-core/src/test/resources/allNames.txt

0 → 100644

+62988 −0

File added.

Preview size limit exceeded, changes collapsed.