Skip to content

Commit c0d3839

Browse files
committed
refactor: remove dead nonIdentityTransform detection from CometScanRule
1 parent 21d6521 commit c0d3839

1 file changed

Lines changed: 13 additions & 59 deletions

File tree

spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala

Lines changed: 13 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -531,32 +531,12 @@ case class CometScanRule(session: SparkSession)
531531
false
532532
}
533533

534-
// Check for non-identity transform functions in residual expressions.
535-
// Non-identity transforms (truncate, bucket, year, month, day, hour) are handled
536-
// differently based on whether delete files are present:
537-
// - Read-only (no delete files): native scan proceeds; iceberg-rust skips row-group
538-
// filtering for these predicates and CometFilter applies them post-scan.
539-
// - With delete files: must fall back to Spark. convertIcebergExpression() cannot
540-
// convert BoundTerm with transforms, causing residuals to be silently dropped,
541-
// which breaks delete correctness.
542-
// Future: convert transforms to Spark expressions (bucket->pmod, year->Year, etc.)
543-
// to enable native execution with delete files too.
544-
val transformFunctionsSupported = taskValidation.nonIdentityTransform match {
545-
case Some(transformType) =>
546-
if (!taskValidation.deleteFiles.isEmpty) {
547-
fallbackReasons +=
548-
s"Iceberg transform '$transformType' with delete files present. " +
549-
"Falling back to ensure correct delete operation."
550-
false
551-
} else {
552-
logInfo(
553-
s"Iceberg residual contains transform '$transformType' - " +
554-
"post-scan filtering will apply.")
555-
true
556-
}
557-
case None =>
558-
true
559-
}
534+
// TODO: A safety guard for non-identity transforms (truncate, bucket, year, etc.)
535+
// with delete files should be re-implemented using PartitionSpec inspection.
536+
// The previous residual-based detection was ineffective because Iceberg residuals
537+
// use NamedReference terms without transform metadata. Correctness is currently
538+
// maintained by Iceberg/Spark's MOR delete processing and CometFilter post-scan
539+
// filtering. See: https://github.com/apache/datafusion-comet/issues/XXXX
560540

561541
// Check for unsupported struct types in delete files
562542
val deleteFileTypesSupported = {
@@ -639,7 +619,7 @@ case class CometScanRule(session: SparkSession)
639619

640620
if (schemaSupported && fileIOCompatible && formatVersionSupported &&
641621
taskValidation.allParquet && allSupportedFilesystems && partitionTypesSupported &&
642-
complexTypePredicatesSupported && transformFunctionsSupported &&
622+
complexTypePredicatesSupported &&
643623
deleteFileTypesSupported && dppSubqueriesSupported) {
644624
CometBatchScanExec(
645625
scanExec.clone().asInstanceOf[BatchScanExec],
@@ -786,23 +766,19 @@ object CometScanRule extends Logging {
786766
val contentScanTaskClass = Class.forName(IcebergReflection.ClassNames.CONTENT_SCAN_TASK)
787767
val contentFileClass = Class.forName(IcebergReflection.ClassNames.CONTENT_FILE)
788768
val fileScanTaskClass = Class.forName(IcebergReflection.ClassNames.FILE_SCAN_TASK)
789-
val unboundPredicateClass = Class.forName(IcebergReflection.ClassNames.UNBOUND_PREDICATE)
790769
// scalastyle:on classforname
791770

792771
// Cache all method lookups outside the loop
793772
val fileMethod = contentScanTaskClass.getMethod("file")
794773
val formatMethod = contentFileClass.getMethod("format")
795774
val pathMethod = contentFileClass.getMethod("path")
796-
val residualMethod = contentScanTaskClass.getMethod("residual")
797775
val deletesMethod = fileScanTaskClass.getMethod("deletes")
798-
val termMethod = unboundPredicateClass.getMethod("term")
799776

800777
val supportedSchemes =
801778
Set("file", "s3", "s3a", "gs", "gcs", "oss", "abfss", "abfs", "wasbs", "wasb")
802779

803780
var allParquet = true
804781
val unsupportedSchemes = mutable.Set[String]()
805-
var nonIdentityTransform: Option[String] = None
806782
val deleteFiles = new java.util.ArrayList[Any]()
807783

808784
tasks.asScala.foreach { task =>
@@ -826,28 +802,11 @@ object CometScanRule extends Logging {
826802
case _: java.net.URISyntaxException => // ignore
827803
}
828804

829-
// Residual transform check (short-circuit if already found unsupported)
830-
if (nonIdentityTransform.isEmpty && fileScanTaskClass.isInstance(task)) {
831-
try {
832-
val residual = residualMethod.invoke(task)
833-
if (unboundPredicateClass.isInstance(residual)) {
834-
val term = termMethod.invoke(residual)
835-
try {
836-
val transformMethod = term.getClass.getMethod("transform")
837-
transformMethod.setAccessible(true)
838-
val transform = transformMethod.invoke(term)
839-
val transformStr = transform.toString
840-
if (transformStr != IcebergReflection.Transforms.IDENTITY) {
841-
nonIdentityTransform = Some(transformStr)
842-
}
843-
} catch {
844-
case _: NoSuchMethodException => // No transform = simple reference, OK
845-
}
846-
}
847-
} catch {
848-
case _: Exception => // Skip tasks where we can't get residual
849-
}
850-
}
805+
// TODO: Non-identity transform detection via residual inspection was removed
806+
// because Iceberg residuals use NamedReference terms (not BoundTransform),
807+
// so the transform() method is never available. To properly detect non-identity
808+
// transforms, inspect the table's PartitionSpec instead of residual expressions.
809+
// See: https://github.com/apache/datafusion-comet/issues/XXXX
851810

852811
// Collect delete files and check their schemes
853812
if (fileScanTaskClass.isInstance(task)) {
@@ -875,11 +834,7 @@ object CometScanRule extends Logging {
875834
}
876835
}
877836

878-
IcebergTaskValidationResult(
879-
allParquet,
880-
unsupportedSchemes.toSet,
881-
nonIdentityTransform,
882-
deleteFiles)
837+
IcebergTaskValidationResult(allParquet, unsupportedSchemes.toSet, deleteFiles)
883838
}
884839
}
885840

@@ -889,5 +844,4 @@ object CometScanRule extends Logging {
889844
case class IcebergTaskValidationResult(
890845
allParquet: Boolean,
891846
unsupportedSchemes: Set[String],
892-
nonIdentityTransform: Option[String],
893847
deleteFiles: java.util.List[_])

0 commit comments

Comments
 (0)