@@ -700,8 +700,7 @@ def _build_command(self) -> List[str]:
700700 # When input data is provided, we don't add source parameters
701701 # The sling binary will auto-detect stdin
702702 # Set source format to Arrow if using Arrow mode with input data
703- # BUT only if we're streaming to stdout or the target explicitly uses Arrow
704- if HAS_ARROW and self ._should_use_arrow () and self ._should_use_arrow_for_input ():
703+ if HAS_ARROW and self ._should_use_arrow ():
705704 if self .src_options is None :
706705 self .src_options = SourceOptions (format = Format .ARROW , null_if = '\\ N' )
707706 elif isinstance (self .src_options , dict ):
@@ -784,7 +783,7 @@ def _build_command(self) -> List[str]:
784783
785784 def _write_input_data_sync (self , stdin : IO , input_data : Any ):
786785 """Write input data to stdin, using Arrow IPC format if available, otherwise CSV"""
787- if HAS_ARROW and self ._should_use_arrow () and self . _should_use_arrow_for_input () :
786+ if HAS_ARROW and self ._should_use_arrow ():
788787 self ._write_input_data_arrow (stdin , input_data )
789788 else :
790789 self ._write_input_data_csv (stdin , input_data )
@@ -794,27 +793,15 @@ def _should_use_arrow(self) -> bool:
794793 # Use Arrow if available and not disabled via env var
795794 return HAS_ARROW and os .environ .get ('SLING_USE_ARROW' , 'true' ).lower () != 'false'
796795
797- def _should_use_arrow_for_input (self ) -> bool :
798- """Determine if Arrow format should be used for input data"""
799- # Only use Arrow for input if:
800- # 1. We're streaming to stdout (no target object)
801- # 2. Or the target explicitly requests Arrow format
802- if not self .tgt_object :
803- # Streaming to stdout
804- return True
805-
806- # Check if target format is explicitly set to Arrow
807- if self .tgt_options :
808- if isinstance (self .tgt_options , dict ) and self .tgt_options .get ('format' ) == Format .ARROW :
809- return True
810- elif hasattr (self .tgt_options , 'format' ) and self .tgt_options .format == Format .ARROW :
811- return True
812-
813- # For file targets, don't use Arrow for input unless explicitly requested
814- return False
815-
816796 def _convert_to_arrow_table (self , input_data : Any ) -> pa .Table :
817797 """Convert input data to Arrow Table"""
798+ # Check for Arrow Dataset
799+ if HAS_ARROW and pa is not None :
800+ # if isinstance(input_data, pa.Dataset):
801+ # return input_data.to_table()
802+ if isinstance (input_data , pa .Table ):
803+ return input_data
804+
818805 # Check for pandas DataFrame first
819806 if HAS_PANDAS and pd is not None and isinstance (input_data , pd .DataFrame ):
820807 return pa .Table .from_pandas (input_data , preserve_index = False )
0 commit comments