@@ -531,32 +531,12 @@ case class CometScanRule(session: SparkSession)
531531 false
532532 }
533533
534- // Check for non-identity transform functions in residual expressions.
535- // Non-identity transforms (truncate, bucket, year, month, day, hour) are handled
536- // differently based on whether delete files are present:
537- // - Read-only (no delete files): native scan proceeds; iceberg-rust skips row-group
538- // filtering for these predicates and CometFilter applies them post-scan.
539- // - With delete files: must fall back to Spark. convertIcebergExpression() cannot
540- // convert BoundTerm with transforms, causing residuals to be silently dropped,
541- // which breaks delete correctness.
542- // Future: convert transforms to Spark expressions (bucket->pmod, year->Year, etc.)
543- // to enable native execution with delete files too.
544- val transformFunctionsSupported = taskValidation.nonIdentityTransform match {
545- case Some (transformType) =>
546- if (! taskValidation.deleteFiles.isEmpty) {
547- fallbackReasons +=
548- s " Iceberg transform ' $transformType' with delete files present. " +
549- " Falling back to ensure correct delete operation."
550- false
551- } else {
552- logInfo(
553- s " Iceberg residual contains transform ' $transformType' - " +
554- " post-scan filtering will apply." )
555- true
556- }
557- case None =>
558- true
559- }
534+ // TODO: A safety guard for non-identity transforms (truncate, bucket, year, etc.)
535+ // with delete files should be re-implemented using PartitionSpec inspection.
536+ // The previous residual-based detection was ineffective because Iceberg residuals
537+ // use NamedReference terms without transform metadata. Correctness is currently
538+ // maintained by Iceberg/Spark's MOR delete processing and CometFilter post-scan
539+ // filtering. See: https://github.com/apache/datafusion-comet/issues/XXXX
560540
561541 // Check for unsupported struct types in delete files
562542 val deleteFileTypesSupported = {
@@ -639,7 +619,7 @@ case class CometScanRule(session: SparkSession)
639619
640620 if (schemaSupported && fileIOCompatible && formatVersionSupported &&
641621 taskValidation.allParquet && allSupportedFilesystems && partitionTypesSupported &&
642- complexTypePredicatesSupported && transformFunctionsSupported &&
622+ complexTypePredicatesSupported &&
643623 deleteFileTypesSupported && dppSubqueriesSupported) {
644624 CometBatchScanExec (
645625 scanExec.clone().asInstanceOf [BatchScanExec ],
@@ -786,23 +766,19 @@ object CometScanRule extends Logging {
786766 val contentScanTaskClass = Class .forName(IcebergReflection .ClassNames .CONTENT_SCAN_TASK )
787767 val contentFileClass = Class .forName(IcebergReflection .ClassNames .CONTENT_FILE )
788768 val fileScanTaskClass = Class .forName(IcebergReflection .ClassNames .FILE_SCAN_TASK )
789- val unboundPredicateClass = Class .forName(IcebergReflection .ClassNames .UNBOUND_PREDICATE )
790769 // scalastyle:on classforname
791770
792771 // Cache all method lookups outside the loop
793772 val fileMethod = contentScanTaskClass.getMethod(" file" )
794773 val formatMethod = contentFileClass.getMethod(" format" )
795774 val pathMethod = contentFileClass.getMethod(" path" )
796- val residualMethod = contentScanTaskClass.getMethod(" residual" )
797775 val deletesMethod = fileScanTaskClass.getMethod(" deletes" )
798- val termMethod = unboundPredicateClass.getMethod(" term" )
799776
800777 val supportedSchemes =
801778 Set (" file" , " s3" , " s3a" , " gs" , " gcs" , " oss" , " abfss" , " abfs" , " wasbs" , " wasb" )
802779
803780 var allParquet = true
804781 val unsupportedSchemes = mutable.Set [String ]()
805- var nonIdentityTransform : Option [String ] = None
806782 val deleteFiles = new java.util.ArrayList [Any ]()
807783
808784 tasks.asScala.foreach { task =>
@@ -826,28 +802,11 @@ object CometScanRule extends Logging {
826802 case _ : java.net.URISyntaxException => // ignore
827803 }
828804
829- // Residual transform check (short-circuit if already found unsupported)
830- if (nonIdentityTransform.isEmpty && fileScanTaskClass.isInstance(task)) {
831- try {
832- val residual = residualMethod.invoke(task)
833- if (unboundPredicateClass.isInstance(residual)) {
834- val term = termMethod.invoke(residual)
835- try {
836- val transformMethod = term.getClass.getMethod(" transform" )
837- transformMethod.setAccessible(true )
838- val transform = transformMethod.invoke(term)
839- val transformStr = transform.toString
840- if (transformStr != IcebergReflection .Transforms .IDENTITY ) {
841- nonIdentityTransform = Some (transformStr)
842- }
843- } catch {
844- case _ : NoSuchMethodException => // No transform = simple reference, OK
845- }
846- }
847- } catch {
848- case _ : Exception => // Skip tasks where we can't get residual
849- }
850- }
805+ // TODO: Non-identity transform detection via residual inspection was removed
806+ // because Iceberg residuals use NamedReference terms (not BoundTransform),
807+ // so the transform() method is never available. To properly detect non-identity
808+ // transforms, inspect the table's PartitionSpec instead of residual expressions.
809+ // See: https://github.com/apache/datafusion-comet/issues/XXXX
851810
852811 // Collect delete files and check their schemes
853812 if (fileScanTaskClass.isInstance(task)) {
@@ -875,11 +834,7 @@ object CometScanRule extends Logging {
875834 }
876835 }
877836
878- IcebergTaskValidationResult (
879- allParquet,
880- unsupportedSchemes.toSet,
881- nonIdentityTransform,
882- deleteFiles)
837+ IcebergTaskValidationResult (allParquet, unsupportedSchemes.toSet, deleteFiles)
883838 }
884839}
885840
@@ -889,5 +844,4 @@ object CometScanRule extends Logging {
889844case class IcebergTaskValidationResult (
890845 allParquet : Boolean ,
891846 unsupportedSchemes : Set [String ],
892- nonIdentityTransform : Option [String ],
893847 deleteFiles : java.util.List [_])
0 commit comments