@@ -35,10 +35,13 @@ use datafusion::execution::object_store::ObjectStoreUrl;
3535use datafusion:: execution:: runtime_env:: RuntimeEnv ;
3636use datafusion:: physical_plan:: ColumnarValue ;
3737use datafusion_comet_spark_expr:: EvalMode ;
38+ use log:: debug;
3839use object_store:: path:: Path ;
3940use object_store:: { parse_url, ObjectStore } ;
4041use std:: collections:: HashMap ;
42+ use std:: sync:: OnceLock ;
4143use std:: time:: Duration ;
44+ use std:: { collections:: hash_map:: DefaultHasher , hash:: Hasher , sync:: RwLock } ;
4245use std:: { fmt:: Debug , hash:: Hash , sync:: Arc } ;
4346use url:: Url ;
4447
@@ -444,6 +447,56 @@ fn create_hdfs_object_store(
444447 } )
445448}
446449
450+ type ObjectStoreCache = RwLock < HashMap < ( String , u64 ) , Arc < dyn ObjectStore > > > ;
451+
452+ /// Process-wide cache of object stores, keyed by `(scheme://host:port, config_hash)`.
453+ ///
454+ /// ## Why static / process lifetime?
455+ ///
456+ /// Comet's JNI architecture calls `initRecordBatchReader` once per Parquet file, and each
457+ /// call constructs a fresh `RuntimeEnv`. There is therefore no executor-scoped Rust object
458+ /// with a lifetime longer than a single file read that could own this cache. The executor
459+ /// process itself is the natural scope for HTTP connection-pool reuse, so process lifetime
460+ /// (i.e. `static`) is the appropriate choice here. In the standard Spark-on-Kubernetes
461+ /// deployment model each executor process is dedicated to a single Spark application, so
462+ /// process lifetime and application lifetime are equivalent; the cache is reclaimed when
463+ /// the executor pod terminates.
464+ ///
465+ /// ## Unbounded size
466+ ///
467+ /// Cache entries are indexed by `(scheme://host:port, hash-of-configs)`. A typical Spark
468+ /// job accesses a small, fixed set of buckets with a stable configuration, so the number of
469+ /// distinct keys is O(buckets × credential-configs) and remains small throughout the job.
470+ /// Entries are cheap relative to the cost of creating a new object store (new HTTP
471+ /// connection pool + DNS resolution), and there is no meaningful benefit from eviction, so
472+ /// no eviction policy is applied.
473+ ///
474+ /// ## Credential invalidation
475+ ///
476+ /// Object stores that use dynamic credentials (IMDS, WebIdentity, ECS role, STS assume-role)
477+ /// delegate credential refresh to a `CometCredentialProvider` that fetches fresh credentials
478+ /// on every request, so credential rotation is transparent and requires no cache
479+ /// invalidation. Object stores whose credentials are embedded in the Hadoop configuration
480+ /// (e.g. `fs.s3a.access.key` / `fs.s3a.secret.key`) produce a different `config_hash` when
481+ /// those values change, which causes a new store to be created and inserted under the new
482+ /// key; the old entry is harmlessly superseded.
483+ fn object_store_cache ( ) -> & ' static ObjectStoreCache {
484+ static CACHE : OnceLock < ObjectStoreCache > = OnceLock :: new ( ) ;
485+ CACHE . get_or_init ( || RwLock :: new ( HashMap :: new ( ) ) )
486+ }
487+
488+ /// Compute a hash of the object store configuration for cache keying.
489+ fn hash_object_store_configs ( configs : & HashMap < String , String > ) -> u64 {
490+ let mut hasher = DefaultHasher :: new ( ) ;
491+ let mut keys: Vec < & String > = configs. keys ( ) . collect ( ) ;
492+ keys. sort ( ) ;
493+ for key in keys {
494+ key. hash ( & mut hasher) ;
495+ configs[ key] . hash ( & mut hasher) ;
496+ }
497+ hasher. finish ( )
498+ }
499+
447500/// Parses the url, registers the object store with configurations, and returns a tuple of the object store url
448501/// and object store path
449502pub ( crate ) fn prepare_object_store_with_configs (
@@ -467,17 +520,45 @@ pub(crate) fn prepare_object_store_with_configs(
467520 & url[ url:: Position :: BeforeHost ..url:: Position :: AfterPort ] ,
468521 ) ;
469522
470- let ( object_store, object_store_path) : ( Box < dyn ObjectStore > , Path ) = if is_hdfs_scheme {
471- create_hdfs_object_store ( & url)
472- } else if scheme == "s3" {
473- objectstore:: s3:: create_store ( & url, object_store_configs, Duration :: from_secs ( 300 ) )
474- } else {
475- parse_url ( & url)
476- }
477- . map_err ( |e| ExecutionError :: GeneralError ( e. to_string ( ) ) ) ?;
523+ let config_hash = hash_object_store_configs ( object_store_configs) ;
524+ let cache_key = ( url_key. clone ( ) , config_hash) ;
525+
526+ // Check the cache first to reuse existing object store instances.
527+ // This enables HTTP connection pooling and avoids redundant DNS lookups.
528+ let cached = {
529+ let cache = object_store_cache ( )
530+ . read ( )
531+ . map_err ( |e| ExecutionError :: GeneralError ( format ! ( "Object store cache error: {e}" ) ) ) ?;
532+ cache. get ( & cache_key) . cloned ( )
533+ } ;
534+
535+ let ( object_store, object_store_path) : ( Arc < dyn ObjectStore > , Path ) =
536+ if let Some ( store) = cached {
537+ debug ! ( "Reusing cached object store for {url_key}" ) ;
538+ let path = Path :: from_url_path ( url. path ( ) )
539+ . map_err ( |e| ExecutionError :: GeneralError ( e. to_string ( ) ) ) ?;
540+ ( store, path)
541+ } else {
542+ debug ! ( "Creating new object store for {url_key}" ) ;
543+ let ( store, path) : ( Box < dyn ObjectStore > , Path ) = if is_hdfs_scheme {
544+ create_hdfs_object_store ( & url)
545+ } else if scheme == "s3" {
546+ objectstore:: s3:: create_store ( & url, object_store_configs, Duration :: from_secs ( 300 ) )
547+ } else {
548+ parse_url ( & url)
549+ }
550+ . map_err ( |e| ExecutionError :: GeneralError ( e. to_string ( ) ) ) ?;
551+
552+ let store: Arc < dyn ObjectStore > = Arc :: from ( store) ;
553+ // Insert into cache
554+ if let Ok ( mut cache) = object_store_cache ( ) . write ( ) {
555+ cache. insert ( cache_key, Arc :: clone ( & store) ) ;
556+ }
557+ ( store, path)
558+ } ;
478559
479560 let object_store_url = ObjectStoreUrl :: parse ( url_key. clone ( ) ) ?;
480- runtime_env. register_object_store ( & url, Arc :: from ( object_store) ) ;
561+ runtime_env. register_object_store ( & url, object_store) ;
481562 Ok ( ( object_store_url, object_store_path) )
482563}
483564
0 commit comments