@@ -251,6 +251,45 @@ def test_for_loop_repeatable(self):
251251 super ().test_for_loop_repeatable ()
252252
253253
254+ class GrainArrayRecordTiktokenTest (GrainArrayRecordProcessingTest ):
255+ """Test grain data processing with best_fit packing strategy."""
256+
257+ def setUp (self ):
258+ super ().setUp ()
259+ self .config = self ._make_config (
260+ tokenizer_type = "tiktoken" ,
261+ tokenizer_path = os .path .join (MAXTEXT_ASSETS_ROOT , "tokenizers" , "tokenizer_llama3.tiktoken" ),
262+ )
263+ self .train_iter = grain_data_processing .make_grain_train_iterator (self .config , self .mesh , self .process_indices )
264+
265+ # Only runs test_train_ds from parent class, skip other tests
266+ @pytest .mark .skip (reason = "skip for tokenizer testing" )
267+ def test_batch_determinism (self ):
268+ pass
269+
270+ @pytest .mark .skip (reason = "skip for tokenizer testing" )
271+ def test_for_loop_repeatable (self ):
272+ pass
273+
274+
275+ class GrainArrayRecordHFTokenizerTest (GrainArrayRecordProcessingTest ):
276+ """Test grain data processing with best_fit packing strategy."""
277+
278+ def setUp (self ):
279+ super ().setUp ()
280+ self .config = self ._make_config (tokenizer_type = "huggingface" , tokenizer_path = "deepseek-ai/DeepSeek-V3" )
281+ self .train_iter = grain_data_processing .make_grain_train_iterator (self .config , self .mesh , self .process_indices )
282+
283+ # Only runs test_train_ds from parent class, skip other tests
284+ @pytest .mark .skip (reason = "skip for tokenizer testing" )
285+ def test_batch_determinism (self ):
286+ pass
287+
288+ @pytest .mark .skip (reason = "skip for tokenizer testing" )
289+ def test_for_loop_repeatable (self ):
290+ pass
291+
292+
254293class GrainArrayRecordBestFitPackingTest (GrainArrayRecordProcessingTest ):
255294 """Test grain data processing with best_fit packing strategy."""
256295
0 commit comments