-
Notifications
You must be signed in to change notification settings - Fork 32
Add string extensions quote and reverse #998
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -137,6 +137,17 @@ public enum Function { | |
| SimpleType.STRING, | ||
| SimpleType.STRING)), | ||
| CelFunctionBinding.from("string_lower_ascii", String.class, Ascii::toLowerCase)), | ||
| QUOTE( | ||
| CelFunctionDecl.newFunctionDeclaration( | ||
| "strings.quote", | ||
| CelOverloadDecl.newGlobalOverload( | ||
| "strings_quote", | ||
| "Takes the given string and makes it safe to print (without any formatting" | ||
| + " due to escape sequences). If any invalid UTF-8 characters are" | ||
| + " encountered, they are replaced with \\uFFFD.", | ||
| SimpleType.STRING, | ||
| ImmutableList.of(SimpleType.STRING))), | ||
| CelFunctionBinding.from("strings_quote", String.class, CelStringExtensions::quote)), | ||
| REPLACE( | ||
| CelFunctionDecl.newFunctionDeclaration( | ||
| "replace", | ||
|
|
@@ -164,6 +175,16 @@ public enum Function { | |
| "string_replace_string_string_int", | ||
| ImmutableList.of(String.class, String.class, String.class, Long.class), | ||
| CelStringExtensions::replace)), | ||
| REVERSE( | ||
| CelFunctionDecl.newFunctionDeclaration( | ||
| "reverse", | ||
| CelOverloadDecl.newMemberOverload( | ||
| "string_reverse", | ||
| "Returns a new string whose characters are the same as the target string," | ||
| + " only formatted in reverse order.", | ||
| SimpleType.STRING, | ||
| SimpleType.STRING)), | ||
| CelFunctionBinding.from("string_reverse", String.class, CelStringExtensions::reverse)), | ||
| SPLIT( | ||
| CelFunctionDecl.newFunctionDeclaration( | ||
| "split", | ||
|
|
@@ -449,6 +470,57 @@ private static Long lastIndexOf(CelCodePointArray str, CelCodePointArray substr, | |
| return -1L; | ||
| } | ||
|
|
||
| private static String quote(String s) { | ||
| StringBuilder sb = new StringBuilder(s.length() + 2); | ||
| sb.append('"'); | ||
| for (int i = 0; i < s.length(); ) { | ||
| int codePoint = s.codePointAt(i); | ||
| if (!Character.isValidCodePoint(codePoint) | ||
| || Character.isLowSurrogate(s.charAt(i)) | ||
| || (Character.isHighSurrogate(s.charAt(i)) | ||
| && (i + 1 >= s.length() || !Character.isLowSurrogate(s.charAt(i + 1))))) { | ||
| sb.append('\uFFFD'); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you add a test for the replacement behavior here? Something similar to cel-go's test here should work: https://github.com/google/cel-go/blob/7114ed27a63255f33c689fbff0ee8a08298f70ab/ext/strings_test.go#L623 We should also add tests for low/high surrogate boundaries: |
||
| i++; | ||
| continue; | ||
| } | ||
| switch (codePoint) { | ||
| case '\u0007': | ||
| sb.append("\\a"); | ||
| break; | ||
| case '\b': | ||
| sb.append("\\b"); | ||
| break; | ||
| case '\f': | ||
| sb.append("\\f"); | ||
| break; | ||
| case '\n': | ||
| sb.append("\\n"); | ||
| break; | ||
| case '\r': | ||
| sb.append("\\r"); | ||
| break; | ||
| case '\t': | ||
| sb.append("\\t"); | ||
| break; | ||
| case '\u000B': | ||
| sb.append("\\v"); | ||
| break; | ||
| case '\\': | ||
| sb.append("\\\\"); | ||
| break; | ||
| case '"': | ||
| sb.append("\\\""); | ||
| break; | ||
| default: | ||
| sb.appendCodePoint(codePoint); | ||
| break; | ||
| } | ||
| i += Character.charCount(codePoint); | ||
| } | ||
| sb.append('"'); | ||
| return sb.toString(); | ||
| } | ||
|
|
||
| private static String replaceAll(Object[] objects) { | ||
| return replace((String) objects[0], (String) objects[1], (String) objects[2], -1); | ||
| } | ||
|
|
@@ -504,6 +576,10 @@ private static String replace(String text, String searchString, String replaceme | |
| return sb.append(textCpa.slice(start, textCpa.length())).toString(); | ||
| } | ||
|
|
||
| private static String reverse(String s) { | ||
| return new StringBuilder(s).reverse().toString(); | ||
| } | ||
|
|
||
| private static List<String> split(String str, String separator) { | ||
| return split(str, separator, Integer.MAX_VALUE); | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -70,7 +70,9 @@ public void library() { | |
| "lastIndexOf", | ||
| "lowerAscii", | ||
| "replace", | ||
| "reverse", | ||
| "split", | ||
| "strings.quote", | ||
| "substring", | ||
| "trim", | ||
| "upperAscii"); | ||
|
|
@@ -1467,6 +1469,58 @@ public void stringExtension_functionSubset_success() throws Exception { | |
| assertThat(evaluatedResult).isEqualTo(true); | ||
| } | ||
|
|
||
| @Test | ||
| @TestParameters("{string: 'abcd', expectedResult: 'dcba'}") | ||
| @TestParameters("{string: '', expectedResult: ''}") | ||
| @TestParameters("{string: 'a', expectedResult: 'a'}") | ||
| @TestParameters("{string: 'hello world', expectedResult: 'dlrow olleh'}") | ||
| @TestParameters("{string: 'ab가cd', expectedResult: 'dc가ba'}") | ||
| public void reverse_success(String string, String expectedResult) throws Exception { | ||
| CelAbstractSyntaxTree ast = COMPILER.compile("s.reverse()").getAst(); | ||
| CelRuntime.Program program = RUNTIME.createProgram(ast); | ||
|
|
||
| Object evaluatedResult = program.eval(ImmutableMap.of("s", string)); | ||
|
|
||
| assertThat(evaluatedResult).isEqualTo(expectedResult); | ||
| } | ||
|
|
||
| @Test | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we also port the test over for invisible formatting characters? |
||
| public void reverse_unicode() throws Exception { | ||
| CelAbstractSyntaxTree ast = COMPILER.compile("s.reverse()").getAst(); | ||
| CelRuntime.Program program = RUNTIME.createProgram(ast); | ||
|
|
||
| Object evaluatedResult = program.eval(ImmutableMap.of("s", "😁😑😦")); | ||
|
|
||
| assertThat(evaluatedResult).isEqualTo("😦😑😁"); | ||
| } | ||
|
|
||
| @Test | ||
| @TestParameters("{string: 'hello', expectedResult: '\"hello\"'}") | ||
| @TestParameters("{string: '', expectedResult: '\"\"'}") | ||
| @TestParameters("{string: 'contains \\\"quotes\\\"', expectedResult: '\"contains \\\\\\\"quotes\\\\\\\"\"'}") | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you add a test for single quote case as well: |
||
| public void quote_success(String string, String expectedResult) throws Exception { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Boundary cases for backslashes appearing at the very beginning or the end are also worth adding: |
||
| CelAbstractSyntaxTree ast = COMPILER.compile("strings.quote(s)").getAst(); | ||
| CelRuntime.Program program = RUNTIME.createProgram(ast); | ||
|
|
||
| Object evaluatedResult = program.eval(ImmutableMap.of("s", string)); | ||
|
|
||
| assertThat(evaluatedResult).isEqualTo(expectedResult); | ||
| } | ||
|
|
||
| @Test | ||
| public void quote_escapesSpecialCharacters() throws Exception { | ||
| CelAbstractSyntaxTree ast = COMPILER.compile("strings.quote(s)").getAst(); | ||
| CelRuntime.Program program = RUNTIME.createProgram(ast); | ||
|
|
||
| Object evaluatedResult = | ||
| program.eval( | ||
| ImmutableMap.of( | ||
| "s", "\u0007bell\u000Bvtab\bback\ffeed\rret\nline\ttab\\slash 가 😁")); | ||
|
|
||
| assertThat(evaluatedResult) | ||
| .isEqualTo("\"\\abell\\vvtab\\bback\\ffeed\\rret\\nline\\ttab\\\\slash 가 😁\""); | ||
| } | ||
|
|
||
| @Test | ||
| public void stringExtension_compileUnallowedFunction_throws() { | ||
| CelCompiler celCompiler = | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For readability, would you mind extracting this into a separate helper method? The last line checking for unpaired high surrogate is especially worth pulling out into a named variable or adding a comment around it: