Skip to content

Commit 1da18dd

Browse files
authored
chore: Remove deprecated SCAN_NATIVE_COMET constant and related test code (#3671)
1 parent ae8e57c commit 1da18dd

17 files changed

Lines changed: 45 additions & 820 deletions

File tree

common/src/main/scala/org/apache/comet/CometConf.scala

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -114,12 +114,6 @@ object CometConf extends ShimCometConf {
114114
.booleanConf
115115
.createWithEnvVarOrDefault("ENABLE_COMET_WRITE", false)
116116

117-
// Deprecated: native_comet uses mutable buffers incompatible with Arrow FFI best practices
118-
// and does not support complex types. Use native_iceberg_compat or auto instead.
119-
// This will be removed in a future release.
120-
// See: https://github.com/apache/datafusion-comet/issues/2186
121-
@deprecated("Use SCAN_AUTO instead. native_comet will be removed in a future release.", "0.9.0")
122-
val SCAN_NATIVE_COMET = "native_comet"
123117
val SCAN_NATIVE_DATAFUSION = "native_datafusion"
124118
val SCAN_NATIVE_ICEBERG_COMPAT = "native_iceberg_compat"
125119
val SCAN_AUTO = "auto"

common/src/main/scala/org/apache/spark/sql/comet/util/Utils.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ object Utils extends CometTypeShim {
271271
throw new SparkException(
272272
s"Comet execution only takes Arrow Arrays, but got ${c.getClass}. " +
273273
"This typically happens when a Comet scan falls back to Spark due to unsupported " +
274-
"data types (e.g., complex types like structs, arrays, or maps with native_comet). " +
274+
"data types (e.g., complex types like structs, arrays, or maps). " +
275275
"To resolve this, you can: " +
276276
"(1) enable spark.comet.scan.allowIncompatible=true to use a compatible native " +
277277
"scan variant, or " +

spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ import org.apache.comet.parquet.CometParquetFileFormat
5151
*
5252
* This is a hybrid scan where the native plan will contain a `ScanExec` that reads batches of
5353
* data from the JVM via JNI. The ultimate source of data may be a JVM implementation such as
54-
* Spark readers, or could be the `native_comet` or `native_iceberg_compat` native scans.
54+
* Spark readers, or could be the `native_iceberg_compat` native scan.
5555
*
5656
* Note that scanImpl can only be `native_datafusion` after CometScanRule runs and before
5757
* CometExecRule runs. It will never be set to `native_datafusion` at execution time

spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -812,13 +812,7 @@ class CometArrayExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelp
812812

813813
// https://github.com/apache/datafusion-comet/issues/2612
814814
test("array_reverse - fallback for binary array") {
815-
val fallbackReason =
816-
if (CometConf.COMET_NATIVE_SCAN_IMPL.key == CometConf.SCAN_NATIVE_COMET || sys.env
817-
.getOrElse("COMET_PARQUET_SCAN_IMPL", "") == CometConf.SCAN_NATIVE_COMET) {
818-
"Unsupported schema"
819-
} else {
820-
CometArrayReverse.unsupportedReason
821-
}
815+
val fallbackReason = CometArrayReverse.unsupportedReason
822816
withTable("t1") {
823817
sql("""create table t1 using parquet as
824818
select cast(null as array<binary>) c1, cast(array() as array<binary>) c2

spark/src/test/scala/org/apache/comet/CometCastSuite.scala

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1205,14 +1205,8 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
12051205
|USING parquet
12061206
""".stripMargin)
12071207
sql("INSERT INTO TABLE tab1 SELECT named_struct('col1','1','col2','2')")
1208-
if (!usingLegacyNativeCometScan) {
1209-
checkSparkAnswerAndOperator(
1210-
"SELECT CAST(s AS struct<field1:string, field2:string>) AS new_struct FROM tab1")
1211-
} else {
1212-
// Should just fall back to Spark since non-DataSourceExec scan does not support nested types.
1213-
checkSparkAnswer(
1214-
"SELECT CAST(s AS struct<field1:string, field2:string>) AS new_struct FROM tab1")
1215-
}
1208+
checkSparkAnswerAndOperator(
1209+
"SELECT CAST(s AS struct<field1:string, field2:string>) AS new_struct FROM tab1")
12161210
}
12171211
}
12181212

spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala

Lines changed: 6 additions & 185 deletions
Original file line numberDiff line numberDiff line change
@@ -185,22 +185,6 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
185185
}
186186
}
187187

188-
// ignored: native_comet scan is no longer supported
189-
ignore("basic data type support") {
190-
// this test requires native_comet scan due to unsigned u8/u16 issue
191-
withSQLConf(CometConf.COMET_NATIVE_SCAN_IMPL.key -> CometConf.SCAN_NATIVE_COMET) {
192-
Seq(true, false).foreach { dictionaryEnabled =>
193-
withTempDir { dir =>
194-
val path = new Path(dir.toURI.toString, "test.parquet")
195-
makeParquetFileAllPrimitiveTypes(path, dictionaryEnabled = dictionaryEnabled, 10000)
196-
withParquetTable(path.toString, "tbl") {
197-
checkSparkAnswerAndOperator("select * FROM tbl WHERE _2 > 100")
198-
}
199-
}
200-
}
201-
}
202-
}
203-
204188
test("basic data type support - excluding u8/u16") {
205189
// variant that skips _9 (UINT_8) and _10 (UINT_16) for default scan impl
206190
Seq(true, false).foreach { dictionaryEnabled =>
@@ -217,27 +201,6 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
217201
}
218202
}
219203

220-
// ignored: native_comet scan is no longer supported
221-
ignore("uint data type support") {
222-
// this test requires native_comet scan due to unsigned u8/u16 issue
223-
withSQLConf(CometConf.COMET_NATIVE_SCAN_IMPL.key -> CometConf.SCAN_NATIVE_COMET) {
224-
Seq(true, false).foreach { dictionaryEnabled =>
225-
withTempDir { dir =>
226-
val path = new Path(dir.toURI.toString, "testuint.parquet")
227-
makeParquetFileAllPrimitiveTypes(
228-
path,
229-
dictionaryEnabled = dictionaryEnabled,
230-
Byte.MinValue,
231-
Byte.MaxValue)
232-
withParquetTable(path.toString, "tbl") {
233-
val qry = "select _9 from tbl order by _11"
234-
checkSparkAnswerAndOperator(qry)
235-
}
236-
}
237-
}
238-
}
239-
}
240-
241204
test("uint data type support - excluding u8/u16") {
242205
// variant that tests UINT_32 and UINT_64, skipping _9 (UINT_8) and _10 (UINT_16)
243206
Seq(true, false).foreach { dictionaryEnabled =>
@@ -1491,57 +1454,6 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
14911454
}
14921455
}
14931456

1494-
// ignored: native_comet scan is no longer supported
1495-
ignore("round") {
1496-
// https://github.com/apache/datafusion-comet/issues/1441
1497-
assume(usingLegacyNativeCometScan)
1498-
Seq(true, false).foreach { dictionaryEnabled =>
1499-
withTempDir { dir =>
1500-
val path = new Path(dir.toURI.toString, "test.parquet")
1501-
makeParquetFileAllPrimitiveTypes(
1502-
path,
1503-
dictionaryEnabled = dictionaryEnabled,
1504-
-128,
1505-
128,
1506-
randomSize = 100)
1507-
// this test requires native_comet scan due to unsigned u8/u16 issue
1508-
withSQLConf(CometConf.COMET_NATIVE_SCAN_IMPL.key -> CometConf.SCAN_NATIVE_COMET) {
1509-
withParquetTable(path.toString, "tbl") {
1510-
for (s <- Seq(-5, -1, 0, 1, 5, -1000, 1000, -323, -308, 308, -15, 15, -16, 16,
1511-
null)) {
1512-
// array tests
1513-
// TODO: enable test for floats (_6, _7, _8, _13)
1514-
for (c <- Seq(2, 3, 4, 5, 9, 10, 11, 12, 15, 16, 17)) {
1515-
checkSparkAnswerAndOperator(s"select _${c}, round(_${c}, ${s}) FROM tbl")
1516-
}
1517-
// scalar tests
1518-
// Exclude the constant folding optimizer in order to actually execute the native round
1519-
// operations for scalar (literal) values.
1520-
// TODO: comment in the tests for float once supported
1521-
withSQLConf(
1522-
"spark.sql.optimizer.excludedRules" -> "org.apache.spark.sql.catalyst.optimizer.ConstantFolding") {
1523-
for (n <- Seq("0.0", "-0.0", "0.5", "-0.5", "1.2", "-1.2")) {
1524-
checkSparkAnswerAndOperator(
1525-
s"select round(cast(${n} as tinyint), ${s}) FROM tbl")
1526-
// checkSparkAnswerAndCometOperators(s"select round(cast(${n} as float), ${s}) FROM tbl")
1527-
checkSparkAnswerAndOperator(
1528-
s"select round(cast(${n} as decimal(38, 18)), ${s}) FROM tbl")
1529-
checkSparkAnswerAndOperator(
1530-
s"select round(cast(${n} as decimal(20, 0)), ${s}) FROM tbl")
1531-
}
1532-
// checkSparkAnswer(s"select round(double('infinity'), ${s}) FROM tbl")
1533-
// checkSparkAnswer(s"select round(double('-infinity'), ${s}) FROM tbl")
1534-
// checkSparkAnswer(s"select round(double('NaN'), ${s}) FROM tbl")
1535-
// checkSparkAnswer(
1536-
// s"select round(double('0.000000000000000000000000000000000001'), ${s}) FROM tbl")
1537-
}
1538-
}
1539-
}
1540-
}
1541-
}
1542-
}
1543-
}
1544-
15451457
test("md5") {
15461458
Seq(false, true).foreach { dictionary =>
15471459
withSQLConf("parquet.enable.dictionary" -> dictionary.toString) {
@@ -1556,25 +1468,6 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
15561468
}
15571469
}
15581470

1559-
// ignored: native_comet scan is no longer supported
1560-
ignore("hex") {
1561-
// https://github.com/apache/datafusion-comet/issues/1441
1562-
assume(usingLegacyNativeCometScan)
1563-
Seq(true, false).foreach { dictionaryEnabled =>
1564-
withTempDir { dir =>
1565-
val path = new Path(dir.toURI.toString, "hex.parquet")
1566-
// this test requires native_comet scan due to unsigned u8/u16 issue
1567-
withSQLConf(CometConf.COMET_NATIVE_SCAN_IMPL.key -> CometConf.SCAN_NATIVE_COMET) {
1568-
makeParquetFileAllPrimitiveTypes(path, dictionaryEnabled = dictionaryEnabled, 10000)
1569-
withParquetTable(path.toString, "tbl") {
1570-
checkSparkAnswerAndOperator(
1571-
"SELECT hex(_1), hex(_2), hex(_3), hex(_4), hex(_5), hex(_6), hex(_7), hex(_8), hex(_9), hex(_10), hex(_11), hex(_12), hex(_13), hex(_14), hex(_15), hex(_16), hex(_17), hex(_18), hex(_19), hex(_20) FROM tbl")
1572-
}
1573-
}
1574-
}
1575-
}
1576-
}
1577-
15781471
test("unhex") {
15791472
val table = "unhex_table"
15801473
withTable(table) {
@@ -2442,13 +2335,7 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
24422335
df.write.parquet(dir.toString())
24432336
}
24442337
val df = spark.read.parquet(dir.toString()).select("nested1.id")
2445-
// Comet's original scan does not support structs.
2446-
// The plan will have a Comet Scan only if scan impl is native_full or native_recordbatch
2447-
if (!scanImpl.equals(CometConf.SCAN_NATIVE_COMET)) {
2448-
checkSparkAnswerAndOperator(df)
2449-
} else {
2450-
checkSparkAnswer(df)
2451-
}
2338+
checkSparkAnswerAndOperator(df)
24522339
}
24532340
}
24542341

@@ -2474,19 +2361,10 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
24742361
}
24752362

24762363
val df = spark.read.parquet(dir.toString())
2477-
// Comet's original scan does not support structs.
2478-
// The plan will have a Comet Scan only if scan impl is native_full or native_recordbatch
2479-
if (scanImpl != CometConf.SCAN_NATIVE_COMET) {
2480-
checkSparkAnswerAndOperator(df.select("nested1.id"))
2481-
checkSparkAnswerAndOperator(df.select("nested1.nested2"))
2482-
checkSparkAnswerAndOperator(df.select("nested1.nested2.id"))
2483-
checkSparkAnswerAndOperator(df.select("nested1.id", "nested1.nested2.id"))
2484-
} else {
2485-
checkSparkAnswer(df.select("nested1.id"))
2486-
checkSparkAnswer(df.select("nested1.nested2"))
2487-
checkSparkAnswer(df.select("nested1.nested2.id"))
2488-
checkSparkAnswer(df.select("nested1.id", "nested1.nested2.id"))
2489-
}
2364+
checkSparkAnswerAndOperator(df.select("nested1.id"))
2365+
checkSparkAnswerAndOperator(df.select("nested1.nested2"))
2366+
checkSparkAnswerAndOperator(df.select("nested1.nested2.id"))
2367+
checkSparkAnswerAndOperator(df.select("nested1.id", "nested1.nested2.id"))
24902368
}
24912369
}
24922370

@@ -2512,13 +2390,7 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
25122390
}
25132391

25142392
val df = spark.read.parquet(dir.toString()).select("nested1.id")
2515-
// Comet's original scan does not support structs.
2516-
// The plan will have a Comet Scan only if scan impl is native_full or native_recordbatch
2517-
if (scanImpl != CometConf.SCAN_NATIVE_COMET) {
2518-
checkSparkAnswerAndOperator(df)
2519-
} else {
2520-
checkSparkAnswer(df)
2521-
}
2393+
checkSparkAnswerAndOperator(df)
25222394
}
25232395
}
25242396

@@ -2595,7 +2467,6 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
25952467
}
25962468

25972469
test("get_struct_field with DataFusion ParquetExec - read entire struct") {
2598-
assume(!usingLegacyNativeCometScan(conf))
25992470
withTempPath { dir =>
26002471
// create input file with Comet disabled
26012472
withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
@@ -2632,7 +2503,6 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
26322503
}
26332504

26342505
test("read array[int] from parquet") {
2635-
assume(!usingLegacyNativeCometScan(conf))
26362506

26372507
withTempPath { dir =>
26382508
// create input file with Comet disabled
@@ -2773,55 +2643,6 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
27732643
}
27742644
}
27752645

2776-
// ignored: native_comet scan is no longer supported
2777-
ignore("test integral divide") {
2778-
// this test requires native_comet scan due to unsigned u8/u16 issue
2779-
withSQLConf(CometConf.COMET_NATIVE_SCAN_IMPL.key -> CometConf.SCAN_NATIVE_COMET) {
2780-
Seq(true, false).foreach { dictionaryEnabled =>
2781-
withTempDir { dir =>
2782-
val path1 = new Path(dir.toURI.toString, "test1.parquet")
2783-
val path2 = new Path(dir.toURI.toString, "test2.parquet")
2784-
makeParquetFileAllPrimitiveTypes(
2785-
path1,
2786-
dictionaryEnabled = dictionaryEnabled,
2787-
0,
2788-
0,
2789-
randomSize = 10000)
2790-
makeParquetFileAllPrimitiveTypes(
2791-
path2,
2792-
dictionaryEnabled = dictionaryEnabled,
2793-
0,
2794-
0,
2795-
randomSize = 10000)
2796-
withParquetTable(path1.toString, "tbl1") {
2797-
withParquetTable(path2.toString, "tbl2") {
2798-
checkSparkAnswerAndOperator("""
2799-
|select
2800-
| t1._2 div t2._2, div(t1._2, t2._2),
2801-
| t1._3 div t2._3, div(t1._3, t2._3),
2802-
| t1._4 div t2._4, div(t1._4, t2._4),
2803-
| t1._5 div t2._5, div(t1._5, t2._5),
2804-
| t1._9 div t2._9, div(t1._9, t2._9),
2805-
| t1._10 div t2._10, div(t1._10, t2._10),
2806-
| t1._11 div t2._11, div(t1._11, t2._11)
2807-
| from tbl1 t1 join tbl2 t2 on t1._id = t2._id
2808-
| order by t1._id""".stripMargin)
2809-
2810-
checkSparkAnswerAndOperator("""
2811-
|select
2812-
| t1._12 div t2._12, div(t1._12, t2._12),
2813-
| t1._15 div t2._15, div(t1._15, t2._15),
2814-
| t1._16 div t2._16, div(t1._16, t2._16),
2815-
| t1._17 div t2._17, div(t1._17, t2._17)
2816-
| from tbl1 t1 join tbl2 t2 on t1._id = t2._id
2817-
| order by t1._id""".stripMargin)
2818-
}
2819-
}
2820-
}
2821-
}
2822-
}
2823-
}
2824-
28252646
test("ANSI support for add") {
28262647
val data = Seq((Integer.MAX_VALUE, 1), (Integer.MIN_VALUE, -1))
28272648
withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {

0 commit comments

Comments
 (0)