@@ -131,6 +131,9 @@ pub struct SparkCastOptions {
131131 pub timezone : String ,
132132 /// Allow casts that are supported but not guaranteed to be 100% compatible
133133 pub allow_incompat : bool ,
134+ /// True when running against Spark 4.0+. Enables version-specific cast behaviour
135+ /// such as the handling of leading whitespace before T-prefixed time-only strings.
136+ pub is_spark4_plus : bool ,
134137 /// Support casting unsigned ints to signed ints (used by Parquet SchemaAdapter)
135138 pub allow_cast_unsigned_ints : bool ,
136139 /// We also use the cast logic for adapting Parquet schemas, so this flag is used
@@ -148,6 +151,7 @@ impl SparkCastOptions {
148151 eval_mode,
149152 timezone : timezone. to_string ( ) ,
150153 allow_incompat,
154+ is_spark4_plus : false ,
151155 allow_cast_unsigned_ints : false ,
152156 is_adapting_schema : false ,
153157 null_string : "null" . to_string ( ) ,
@@ -160,12 +164,25 @@ impl SparkCastOptions {
160164 eval_mode,
161165 timezone : "" . to_string ( ) ,
162166 allow_incompat,
167+ is_spark4_plus : false ,
163168 allow_cast_unsigned_ints : false ,
164169 is_adapting_schema : false ,
165170 null_string : "null" . to_string ( ) ,
166171 binary_output_style : None ,
167172 }
168173 }
174+
175+ pub fn new_with_version (
176+ eval_mode : EvalMode ,
177+ timezone : & str ,
178+ allow_incompat : bool ,
179+ is_spark4_plus : bool ,
180+ ) -> Self {
181+ Self {
182+ is_spark4_plus,
183+ ..Self :: new ( eval_mode, timezone, allow_incompat)
184+ }
185+ }
169186}
170187
171188/// Spark-compatible cast implementation. Defers to DataFusion's cast where that is known
@@ -296,9 +313,13 @@ pub(crate) fn cast_array(
296313 let cast_result = match ( & from_type, to_type) {
297314 ( Utf8 , Boolean ) => spark_cast_utf8_to_boolean :: < i32 > ( & array, eval_mode) ,
298315 ( LargeUtf8 , Boolean ) => spark_cast_utf8_to_boolean :: < i64 > ( & array, eval_mode) ,
299- ( Utf8 , Timestamp ( _, _) ) => {
300- cast_string_to_timestamp ( & array, to_type, eval_mode, & cast_options. timezone )
301- }
316+ ( Utf8 , Timestamp ( _, _) ) => cast_string_to_timestamp (
317+ & array,
318+ to_type,
319+ eval_mode,
320+ & cast_options. timezone ,
321+ cast_options. is_spark4_plus ,
322+ ) ,
302323 ( Utf8 , Date32 ) => cast_string_to_date ( & array, to_type, eval_mode) ,
303324 ( Date32 , Int32 ) => {
304325 // Date32 is stored as days since epoch (i32), so this is a simple reinterpret cast
0 commit comments