1818
1919package org .apache .hudi .metadata ;
2020
21+ import org .apache .hudi .avro .model .HoodieCleanMetadata ;
2122import org .apache .hudi .avro .model .HoodieMetadataRecord ;
2223import org .apache .hudi .client .FailOnFirstErrorWriteStatus ;
2324import org .apache .hudi .client .transaction .lock .InProcessLockProvider ;
7374import org .apache .hudi .config .metrics .HoodieMetricsPrometheusConfig ;
7475import org .apache .hudi .exception .HoodieException ;
7576import org .apache .hudi .exception .HoodieMetadataException ;
77+ import org .apache .hudi .metadata .index .Indexer ;
7678import org .apache .hudi .stats .HoodieColumnRangeMetadata ;
7779import org .apache .hudi .storage .StoragePath ;
7880import org .apache .hudi .storage .StoragePathInfo ;
106108import static org .apache .hudi .metadata .HoodieTableMetadataUtil .PARTITION_NAME_PARTITION_STATS ;
107109import static org .apache .hudi .metadata .HoodieTableMetadataUtil .convertMetadataToBloomFilterRecords ;
108110import static org .apache .hudi .metadata .HoodieTableMetadataUtil .convertMetadataToColumnStatsRecords ;
111+ import static org .apache .hudi .metadata .HoodieTableMetadataUtil .convertMetadataToExpressionIndexRecords ;
109112import static org .apache .hudi .metadata .HoodieTableMetadataUtil .convertMetadataToFilesPartitionRecords ;
110113import static org .apache .hudi .metadata .HoodieTableMetadataUtil .convertMetadataToPartitionStatsRecords ;
111114import static org .apache .hudi .metadata .HoodieTableMetadataUtil .convertMetadataToRecordIndexRecords ;
@@ -362,6 +365,41 @@ public static HoodieWriteConfig createMetadataWriteConfig(
362365 return metadataWriteConfig ;
363366 }
364367
368+ /**
369+ * Convert the clean action to metadata records.
370+ */
371+ public static Map <String , HoodieData <HoodieRecord >> convertMetadataToRecords (HoodieEngineContext engineContext ,
372+ HoodieCleanMetadata cleanMetadata ,
373+ String instantTime ,
374+ HoodieTableMetaClient dataMetaClient ,
375+ HoodieMetadataConfig metadataConfig ,
376+ Map <MetadataPartitionType , Indexer > enabledIndexBuilderMap ,
377+ int bloomIndexParallelism ,
378+ Option <HoodieRecord .HoodieRecordType > recordTypeOpt ) {
379+ final Map <String , HoodieData <HoodieRecord >> partitionToRecordsMap = new HashMap <>();
380+ final HoodieData <HoodieRecord > filesPartitionRecordsRDD = engineContext .parallelize (
381+ convertMetadataToFilesPartitionRecords (cleanMetadata , instantTime ), 1 );
382+ partitionToRecordsMap .put (MetadataPartitionType .FILES .getPartitionPath (), filesPartitionRecordsRDD );
383+ if (enabledIndexBuilderMap .containsKey (MetadataPartitionType .BLOOM_FILTERS )) {
384+ final HoodieData <HoodieRecord > metadataBloomFilterRecordsRDD =
385+ convertMetadataToBloomFilterRecords (cleanMetadata , engineContext , instantTime , bloomIndexParallelism );
386+ partitionToRecordsMap .put (MetadataPartitionType .BLOOM_FILTERS .getPartitionPath (), metadataBloomFilterRecordsRDD );
387+ }
388+
389+ if (enabledIndexBuilderMap .containsKey (MetadataPartitionType .COLUMN_STATS )) {
390+ final HoodieData <HoodieRecord > metadataColumnStatsRDD =
391+ convertMetadataToColumnStatsRecords (cleanMetadata , engineContext ,
392+ dataMetaClient , metadataConfig , recordTypeOpt );
393+ partitionToRecordsMap .put (MetadataPartitionType .COLUMN_STATS .getPartitionPath (), metadataColumnStatsRDD );
394+ }
395+ if (enabledIndexBuilderMap .containsKey (MetadataPartitionType .EXPRESSION_INDEX )) {
396+ convertMetadataToExpressionIndexRecords (engineContext , cleanMetadata , instantTime , dataMetaClient , metadataConfig , bloomIndexParallelism , partitionToRecordsMap ,
397+ recordTypeOpt );
398+ }
399+
400+ return partitionToRecordsMap ;
401+ }
402+
365403 /**
366404 * Convert commit action to metadata records for the enabled partition types.
367405 *
@@ -370,12 +408,14 @@ public static HoodieWriteConfig createMetadataWriteConfig(
370408 * @param commitMetadata - Commit action metadata
371409 * @param instantTime - Action instant time
372410 * @param dataMetaClient - HoodieTableMetaClient for data
373- * @param tableMetadata
411+ * @param tableMetadata - metadata table reader
374412 * @param metadataConfig - HoodieMetadataConfig
375413 * @param enabledPartitionTypes - Set of enabled MDT partitions to update
376414 * @param bloomFilterType - Type of generated bloom filter records
377415 * @param bloomIndexParallelism - Parallelism for bloom filter record generation
378- * @param enableOptimizeLogBlocksScan - flag used to enable scanInternalV2 for log blocks in data table
416+ * @param writesFileIdEncoding - file id encoding used while generating record index records
417+ * @param engineType - execution engine type
418+ * @param recordTypeOpt - record type override for generated metadata records
379419 * @return Map of partition to metadata records for the commit action
380420 */
381421 public static Map <String , HoodieData <HoodieRecord >> convertMetadataToRecords (HoodieEngineContext context , HoodieWriteConfig dataWriteConfig , HoodieCommitMetadata commitMetadata ,
0 commit comments