Skip to content

Commit 8d3ad57

Browse files
authored
feat: Mark array_compact as Compatible and improve test coverage (#3889)
1 parent ef99fe8 commit 8d3ad57

3 files changed

Lines changed: 28 additions & 4 deletions

File tree

spark/src/main/scala/org/apache/comet/serde/arrays.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ object CometArrayRepeat extends CometExpressionSerde[ArrayRepeat] {
314314

315315
object CometArrayCompact extends CometExpressionSerde[Expression] {
316316

317-
override def getSupportLevel(expr: Expression): SupportLevel = Incompatible(None)
317+
override def getSupportLevel(expr: Expression): SupportLevel = Compatible()
318318

319319
override def convert(
320320
expr: Expression,

spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,12 @@ trait CometExprShim extends CommonStringExprs {
113113
// val optExpr = scalarFunctionExprToProto("width_bucket", childExprs: _*)
114114
// optExprWithInfo(optExpr, wb, wb.children: _*)
115115

116+
// KnownNotContainsNull is a TaggingExpression added in Spark 4.0 that only
117+
// changes schema metadata (containsNull = false). It has no runtime effect,
118+
// so we pass through to the child expression.
119+
case k: KnownNotContainsNull =>
120+
exprToProtoInternal(k.child, inputs, binding)
121+
116122
case _ => None
117123
}
118124
}

spark/src/test/resources/sql-tests/expressions/array/array_compact.sql

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,35 @@
1515
-- specific language governing permissions and limitations
1616
-- under the License.
1717

18-
-- ConfigMatrix: parquet.enable.dictionary=false,true
1918

2019
statement
2120
CREATE TABLE test_array_compact(arr array<int>) USING parquet
2221

2322
statement
2423
INSERT INTO test_array_compact VALUES (array(1, NULL, 2, NULL, 3)), (array()), (NULL), (array(NULL, NULL)), (array(1, 2, 3))
2524

26-
query spark_answer_only
25+
-- column argument
26+
query
2727
SELECT array_compact(arr) FROM test_array_compact
2828

2929
-- literal arguments
30-
query spark_answer_only
30+
query
3131
SELECT array_compact(array(1, NULL, 2, NULL, 3))
32+
33+
-- string element type
34+
statement
35+
CREATE TABLE test_array_compact_str(arr array<string>) USING parquet
36+
37+
statement
38+
INSERT INTO test_array_compact_str VALUES (array('a', NULL, 'b', NULL, 'c')), (array()), (NULL), (array(NULL, NULL)), (array('', NULL, '', NULL))
39+
40+
query
41+
SELECT array_compact(arr) FROM test_array_compact_str
42+
43+
-- double element type
44+
query
45+
SELECT array_compact(array(1.0, NULL, 2.0, NULL, 3.0))
46+
47+
-- nested array type (removes null arrays from outer, preserves null elements in inner)
48+
query
49+
SELECT array_compact(array(array(1, NULL, 3), NULL, array(NULL, 2, 3)))

0 commit comments

Comments
 (0)