First pass at optimizing SplitOnWordBoundaries (#3140) (59a47833) · Commits · Public Repositories / Smithy Rust

codegen-core/src/main/kotlin/software/amazon/smithy/rust/codegen/core/util/Strings.kt

+17 −10

Original line number	Diff line number	Diff line
		@@ -19,13 +19,16 @@ fun String.doubleQuote(): String =
		fun String.dq(): String = this.doubleQuote()

		private val completeWords: List<String> = listOf("ipv4", "ipv6", "sigv4", "mib", "gib", "kib", "ttl")

		private fun String.splitOnWordBoundaries(): List<String> {
		val out = mutableListOf<String>()
		// These are whole words but cased differently, e.g. `IPv4`, `MiB`, `GiB`, `TtL`
		var currentWord = ""

		var completeWordInProgress = true
		// emit the current word and update from the next character
		val emit = { next: Char ->
		completeWordInProgress = true
		if (currentWord.isNotEmpty()) {
		out += currentWord.lowercase()
		}
		@@ -37,13 +40,17 @@ private fun String.splitOnWordBoundaries(): List<String> {
		}
		val allLowerCase = this.lowercase() == this
		this.forEachIndexed { index, nextCharacter ->
		val peek = this.getOrNull(index + 1)
		val doublePeek = this.getOrNull(index + 2)
		val completeWordInProgress = completeWords.any {
		(currentWord + this.substring(index)).lowercase().startsWith(
		val computeWordInProgress = {
		val result = completeWordInProgress && currentWord.isNotEmpty() && completeWords.any {
		it.startsWith(currentWord, ignoreCase = true) && (currentWord + this.substring(index)).startsWith(
		it,
		)
		} && !completeWords.contains(currentWord.lowercase())
		ignoreCase = true,
		) && !it.equals(currentWord, ignoreCase = true)
		}

		completeWordInProgress = result
		result
		}
		when {
		// [C] in these docs indicates the value of nextCharacter
		// A[_]B
		@@ -53,15 +60,15 @@ private fun String.splitOnWordBoundaries(): List<String> {
		currentWord.isEmpty() -> currentWord += nextCharacter.toString()

		// Abc[D]ef or Ab2[D]ef
		!completeWordInProgress && loweredFollowedByUpper(currentWord, nextCharacter) -> emit(nextCharacter)
		!computeWordInProgress() && loweredFollowedByUpper(currentWord, nextCharacter) -> emit(nextCharacter)

		// s3[k]ey
		!completeWordInProgress && allLowerCase && digitFollowedByLower(currentWord, nextCharacter) -> emit(
		!computeWordInProgress() && allLowerCase && digitFollowedByLower(currentWord, nextCharacter) -> emit(
		nextCharacter,
		)

		// DB[P]roxy, or `IAM[U]ser` but not AC[L]s
		endOfAcronym(currentWord, nextCharacter, peek, doublePeek) -> emit(nextCharacter)
		endOfAcronym(currentWord, nextCharacter, this.getOrNull(index + 1), this.getOrNull(index + 2)) -> emit(nextCharacter)

		// If we haven't found a word boundary, push it and keep going
		else -> currentWord += nextCharacter.toString()