Unverified Commit e61fb6f0 authored by Russell Cohen's avatar Russell Cohen Committed by GitHub
Browse files

Update `toSnakeCase` to better handle plurals, version numbers, and other...

Update `toSnakeCase` to better handle plurals, version numbers, and other pathological cases (#3037)

## Motivation and Context
There are currently a lot of fields in the SDK that are clearly
converted to snake-case wrong:
<img width="738" alt="Screenshot 2023-10-09 at 12 00 43 PM"
src="https://github.com/awslabs/smithy-rs/assets/492903/796b5dac-ca26-4a55-b9da-b88a976251d5">
<img width="417" alt="Screenshot 2023-10-09 at 12 02 48 PM"
src="https://github.com/awslabs/smithy-rs/assets/492903/b091bf50-2e6b-4ada-b2f8-8c7158f18f7d">


## Description
- Author a new splitWords algorithm
- Add snapshot test
- Add lots of individual tests.

## Testing
- snapshot testing, compared with current behavior.

## Checklist
<!--- If a checkbox below is not applicable, then please DELETE it
rather than leaving it unchecked -->
- [ ] I have updated `CHANGELOG.next.toml` if I made changes to the
smithy-rs codegen or runtime crates
- [ ] I have updated `CHANGELOG.next.toml` if I made changes to the AWS
SDK, generated SDK code, or SDK runtime crates

----

_By submitting this pull request, I confirm that you can use, modify,
copy, and redistribute this contribution, under the terms of your
choice._
parent 8bfc4c61
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -297,3 +297,9 @@ message = "The `customize()` method is now sync and infallible. Remove any `awai
references = ["smithy-rs#3039"]
meta = { "breaking" = true, "tada" = false, "bug" = false, "target" = "client" }
author = "jdisanti"

[[smithy-rs]]
message = "Our algorithm for converting identifiers to `snake_case` has been updated. This may result in a small change for some identifiers, particularly acronyms ending in `s`, e.g. `ACLs`."
references = ["smithy-rs#3037", "aws-sdk-rust#756"]
meta = { "breaking" = true, "tada" = false, "bug" = true, "target" = "all" }
author = "rcoh"
+2 −3
Original line number Diff line number Diff line
@@ -12,7 +12,6 @@ import software.amazon.smithy.rust.codegen.core.rustlang.docsTemplate
import software.amazon.smithy.rust.codegen.core.rustlang.rustTemplate
import software.amazon.smithy.rust.codegen.core.rustlang.writable
import software.amazon.smithy.rust.codegen.core.smithy.customize.writeCustomizationsOrElse
import software.amazon.smithy.rust.codegen.core.util.toSnakeCase

object AwsDocs {
    /**
@@ -27,7 +26,7 @@ object AwsDocs {
            ).contains(codegenContext.serviceShape.id)

    fun constructClient(codegenContext: ClientCodegenContext, indent: String): Writable {
        val crateName = codegenContext.moduleName.toSnakeCase()
        val crateName = codegenContext.moduleUseName()
        return writable {
            writeCustomizationsOrElse(
                codegenContext.rootDecorator.extraSections(codegenContext),
@@ -48,7 +47,7 @@ object AwsDocs {

    fun clientConstructionDocs(codegenContext: ClientCodegenContext): Writable = {
        if (canRelyOnAwsConfig(codegenContext)) {
            val crateName = codegenContext.moduleName.toSnakeCase()
            val crateName = codegenContext.moduleUseName()
            docsTemplate(
                """
                #### Constructing a `Client`
+101 −5
Original line number Diff line number Diff line
@@ -8,7 +8,8 @@ package software.amazon.smithy.rust.codegen.core.util
import software.amazon.smithy.utils.CaseUtils
import software.amazon.smithy.utils.StringUtils

fun String.doubleQuote(): String = StringUtils.escapeJavaString(this, "").replace(Regex("""\\u([0-9a-f]{4})""")) { matchResult: MatchResult ->
fun String.doubleQuote(): String =
    StringUtils.escapeJavaString(this, "").replace(Regex("""\\u([0-9a-f]{4})""")) { matchResult: MatchResult ->
        "\\u{" + matchResult.groupValues[1] + "}" as CharSequence
    }

@@ -17,11 +18,106 @@ fun String.doubleQuote(): String = StringUtils.escapeJavaString(this, "").replac
 */
fun String.dq(): String = this.doubleQuote()

// String extensions
private fun String.splitOnWordBoundaries(): List<String> {
    val out = mutableListOf<String>()
    // These are whole words but cased differently, e.g. `IPv4`, `MiB`, `GiB`, `TtL`
    val completeWords = listOf("ipv4", "ipv6", "sigv4", "mib", "gib", "kib", "ttl")
    var currentWord = ""

    // emit the current word and update from the next character
    val emit = { next: Char ->
        if (currentWord.isNotEmpty()) {
            out += currentWord.lowercase()
        }
        currentWord = if (next.isLetterOrDigit()) {
            next.toString()
        } else {
            ""
        }
    }
    val allLowerCase = this.lowercase() == this
    this.forEachIndexed { index, nextCharacter ->
        val peek = this.getOrNull(index + 1)
        val doublePeek = this.getOrNull(index + 2)
        val completeWordInProgress = completeWords.any {
            (currentWord + this.substring(index)).lowercase().startsWith(
                it,
            )
        } && !completeWords.contains(currentWord.lowercase())
        when {
            // [C] in these docs indicates the value of nextCharacter
            // A[_]B
            !nextCharacter.isLetterOrDigit() -> emit(nextCharacter)

            // If we have no letters so far, push the next letter (we already know it's a letter or digit)
            currentWord.isEmpty() -> currentWord += nextCharacter.toString()

            // Abc[D]ef or Ab2[D]ef
            !completeWordInProgress && loweredFollowedByUpper(currentWord, nextCharacter) -> emit(nextCharacter)

            // s3[k]ey
            !completeWordInProgress && allLowerCase && digitFollowedByLower(currentWord, nextCharacter) -> emit(
                nextCharacter,
            )

            // DB[P]roxy, or `IAM[U]ser` but not AC[L]s
            endOfAcronym(currentWord, nextCharacter, peek, doublePeek) -> emit(nextCharacter)

            // If we haven't found a word boundary, push it and keep going
            else -> currentWord += nextCharacter.toString()
        }
    }
    if (currentWord.isNotEmpty()) {
        out += currentWord
    }
    return out
}

/**
 * Handle cases like `DB[P]roxy`, `ARN[S]upport`, `AC[L]s`
 */
private fun endOfAcronym(current: String, nextChar: Char, peek: Char?, doublePeek: Char?): Boolean {
    if (!current.last().isUpperCase()) {
        // Not an acronym in progress
        return false
    }
    if (!nextChar.isUpperCase()) {
        // We aren't at the next word yet
        return false
    }

    if (peek?.isLowerCase() != true) {
        return false
    }

    // Skip cases like `AR[N]s`, `AC[L]s` but not `IAM[U]ser`
    if (peek == 's' && (doublePeek == null || !doublePeek.isLowerCase())) {
        return false
    }

    // Skip cases like `DynamoD[B]v2`
    if (peek == 'v' && doublePeek?.isDigit() == true) {
        return false
    }
    return true
}

private fun loweredFollowedByUpper(current: String, nextChar: Char): Boolean {
    if (!nextChar.isUpperCase()) {
        return false
    }
    return current.last().isLowerCase() || current.last().isDigit()
}

private fun digitFollowedByLower(current: String, nextChar: Char): Boolean {
    return (current.last().isDigit() && nextChar.isLowerCase())
}

fun String.toSnakeCase(): String {
    return CaseUtils.toSnakeCase(this)
    return this.splitOnWordBoundaries().joinToString("_") { it.lowercase() }
}

fun String.toPascalCase(): String {
    // TODO(https://github.com/awslabs/smithy-rs/issues/3047): consider using our updated toSnakeCase (but need to audit diff)
    return CaseUtils.toSnakeCase(this).let { CaseUtils.toPascalCase(it) }
}
+81 −0
Original line number Diff line number Diff line
@@ -7,6 +7,14 @@ package software.amazon.smithy.rust.codegen.core.util

import io.kotest.matchers.shouldBe
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.extension.ExtensionContext
import org.junit.jupiter.api.fail
import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.Arguments
import org.junit.jupiter.params.provider.ArgumentsProvider
import org.junit.jupiter.params.provider.ArgumentsSource
import java.io.File
import java.util.stream.Stream

internal class StringsTest {

@@ -18,4 +26,77 @@ internal class StringsTest {
        "{\"nested\": \"{\\\"nested\\\": 5}\"}\"}"
        """.trimIndent().trim()
    }

    @Test
    fun correctlyConvertToSnakeCase() {
        "NotificationARNs".toSnakeCase() shouldBe "notification_arns"
    }

    @Test
    fun testAllNames() {
        // Set this to true to write a new test expectation file
        val publishUpdate = false
        val allNames = this::class.java.getResource("/testOutput.txt")?.readText()!!
        val errors = mutableListOf<String>()
        val output = StringBuilder()
        allNames.lines().filter { it.isNotBlank() }.forEach {
            val split = it.split(',')
            val input = split[0]
            val expectation = split[1]
            val actual = input.toSnakeCase()
            if (input.toSnakeCase() != expectation) {
                errors += "$it => $actual (expected $expectation)"
            }
            output.appendLine("$input,$actual")
        }
        if (publishUpdate) {
            File("testOutput.txt").writeText(output.toString())
        }
        if (errors.isNotEmpty()) {
            fail(errors.joinToString("\n"))
        }
    }

    @ParameterizedTest
    @ArgumentsSource(TestCasesProvider::class)
    fun testSnakeCase(input: String, output: String) {
        input.toSnakeCase() shouldBe output
    }
}

class TestCasesProvider : ArgumentsProvider {
    override fun provideArguments(context: ExtensionContext?): Stream<out Arguments> =
        listOf(
            "ACLs" to "acls",
            "ACLsUpdateStatus" to "acls_update_status",
            "AllowedAllVPCs" to "allowed_all_vpcs",
            "BluePrimaryX" to "blue_primary_x",
            "CIDRs" to "cidrs",
            "AuthTtL" to "auth_ttl",
            "CNAMEPrefix" to "cname_prefix",
            "S3Location" to "s3_location",
            "signatureS" to "signature_s",
            "signatureR" to "signature_r",
            "M3u8Settings" to "m3u8_settings",
            "IAMUser" to "iam_user",
            "OtaaV1_0_x" to "otaa_v1_0_x",
            "DynamoDBv2Action" to "dynamo_dbv2_action",
            "SessionKeyEmv2000" to "session_key_emv2000",
            "SupportsClassB" to "supports_class_b",
            "UnassignIpv6AddressesRequest" to "unassign_ipv6_addresses_request",
            "TotalGpuMemoryInMiB" to "total_gpu_memory_in_mib",
            "WriteIOs" to "write_ios",
            "dynamoDBv2" to "dynamo_dbv2",
            "ipv4Address" to "ipv4_address",
            "sigv4" to "sigv4",
            "s3key" to "s3_key",
            "sha256sum" to "sha256_sum",
            "Av1QvbrSettings" to "av1_qvbr_settings",
            "Av1Settings" to "av1_settings",
            "AwsElbv2LoadBalancer" to "aws_elbv2_load_balancer",
            "SigV4Authorization" to "sigv4_authorization",
            "IpV6Address" to "ipv6_address",
            "IpV6Cidr" to "ipv6_cidr",
            "IpV4Addresses" to "ipv4_addresses",
        ).map { Arguments.of(it.first, it.second) }.stream()
}
+62988 −0

File added.

Preview size limit exceeded, changes collapsed.

Loading