1- # AlphaDiffract trainer configuration (paper-aligned defaults provided here )
2- # This file is required by src/ trainer/train_paper.py. It contains all parameters with no script-side defaults.
1+ # AlphaDiffract trainer configuration — ConvNeXt (paper-matching lightweight variant )
2+ # Use with: PYTHONPATH=src python -m trainer.train_paper configs/trainer_convnext_paper.yaml
33
44# --- Data / Manifests ---
5- manifest_dir : " ../../data/manifests"
6- dataset_root : " ../../data/dataset" # used when auto_generate_manifests is true
5+ manifest_dir : " ../../../ad_data/manifests"
6+ dataset_root : " ../../../ad_data/data/dataset"
7+ extra_val_file : " rruff.jsonl"
78auto_generate_manifests : true
89train_ratio : 0.8
910val_ratio : 0.1
1011test_ratio : 0.1
1112seed : 42
1213
1314# --- DataLoader ---
14- batch_size : 256 # paper used 64
15+ batch_size : 64 # match OG run (64 per process)
1516num_workers : 8
1617pin_memory : true
1718persistent_workers : true
@@ -31,22 +32,33 @@ labels_key_map:
3132 lp_alpha : " _cell_angle_alpha"
3233 lp_beta : " _cell_angle_beta"
3334 lp_gamma : " _cell_angle_gamma"
34- dtype : " float32" # one of: float32, float64, float16, bfloat16
35- mmap_mode : null # NumPy memmap mode: 'r', 'r+', 'w+', or null to disable
36- floor_at_zero : True # Clamp negative counts to 0 before any normalization
37- normalize_log1p : True # If true, apply log1p(x) to compress dynamic range
35+ dtype : " float32"
36+ mmap_mode : null
37+ floor_at_zero : true
38+ normalize_log1p : False # paper used log1p preprocessing
39+ model_type : " multiscale"
3840
39- # --- Model architecture ---
40- depths : [3, 3, 9, 3]
41- dims : [80, 160, 320, 640]
42- kernel_sizes : [7, 7, 7, 7]
43- strides : [4, 2, 2, 2]
41+ # --- ConvNeXt (OG-equivalent configuration) ---
42+ # 3 stages; one block per stage; large kernels; stride-5 downsampling
43+ # Matches OG multiscale_cnn_cls_regr_convnextBlock_LeakyReLU.json exactly
44+ depths : [1, 1, 1]
45+ dims : [80, 80, 80]
46+ kernel_sizes : [100, 50, 25]
47+ strides : [5, 5, 5]
4448dropout_rate : 0.3
45- layer_scale_init_value : 1.0e-6
46- drop_path_rate : 0.1
49+ # OG uses layer_scale_init_value=0 (disabled)
50+ layer_scale_init_value : 0.0
51+ # OG uses constant drop_path_rate=0.3 (not ramped)
52+ drop_path_rate : 0.3
53+ ramped_dropout_rate : false
54+ block_type : " convnext"
55+ pooling_type : " average"
56+ final_pool : true
57+ use_batchnorm : false
58+ output_type : " flatten"
4759
4860# Heads
49- head_dropout : 0.2
61+ head_dropout : 0.5
5062cs_hidden : [2300, 1150]
5163sg_hidden : [2300, 1150]
5264lp_hidden : [512, 256]
@@ -58,7 +70,7 @@ num_lp_outputs: 6
5870
5971# LP output bounds
6072lp_bounds_min : [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
61- lp_bounds_max : [500 .0, 500 .0, 500 .0, 180.0, 180.0, 180.0]
73+ lp_bounds_max : [300 .0, 300 .0, 300 .0, 180.0, 180.0, 180.0]
6274bound_lp_with_sigmoid : true
6375
6476# Loss weights
@@ -68,30 +80,39 @@ lambda_lp: 1.0
6880
6981# Optional GEMD term on SG
7082gemd_mu : 0.0
71- gemd_distance_matrix_path : # e.g., "path/to/space_group_distance_matrix.npy" to enable GEMD
83+ gemd_distance_matrix_path : null
7284
73- # Optimizer
74- lr : 0.00015 # paper used 2e-4
75- weight_decay : 0.01 # paper used 0.01
85+ # Optimizer (paper): AdamW, lr=2e-4, wd=0.01
86+ lr : 0.0002
87+ weight_decay : 0.01
7688use_adamw : true
7789gradient_clip_val : 1.0
7890gradient_clip_algorithm : " norm"
7991
92+ # --- Noise augmentation (training split only; matches paper) ---
93+ # If provided, noise is applied dynamically per-sample in the DataModule using the same
94+ # sequencing as the paper: Poisson -> normalize -> add Gaussian -> renormalize -> rescale.
95+ # Set ranges to None to disable.
96+ noise_poisson_range : [1.0, 100.0]
97+ noise_gaussian_range : [0.001, 0.1]
98+
99+ # Standardize after noise to match OG CLI (--standardize-to 0 100)
100+ standardize_to : [0.0, 100.0]
80101# --- Logging ---
81- logger : " mlflow" # 'csv' or 'mlflow'
82- csv_logger_name : " model_logs "
83- mlflow_experiment_name : " OpenAlphaDiffract_ConvFUll "
84- mlflow_tracking_uri : null # null uses MLflow default (file:./mlruns)
85- mlflow_run_name : " OpenAlphaDiffract_Run "
102+ logger : " mlflow"
103+ csv_logger_name : " model_logs_convnext_paper "
104+ mlflow_experiment_name : " AlphaDiffract_Paper_ConvNeXt "
105+ mlflow_tracking_uri : null
106+ mlflow_run_name : " ConvNeXt_Paper_Run "
86107
87108# --- Trainer settings ---
88- default_root_dir : " outputs/model "
89- max_epochs : 50
109+ default_root_dir : " outputs/convnext_paper "
110+ max_epochs : 100
90111accumulate_grad_batches : 1
91- precision : " bf16-mixed " # e.g., '32', '16-mixed', 'bf16-mixed'
112+ precision : " 32 " # match OG (AMP disabled)
92113accelerator : " gpu"
93114devices : 1
94- log_every_n_steps : 50
115+ log_every_n_steps : 200
95116deterministic : false
96117benchmark : true
97118
@@ -102,5 +123,5 @@ save_top_k: 1
102123every_n_epochs : 1
103124
104125# --- Evaluation ---
105- resume_from : # e.g., "outputs/paper_model/checkpoints/epochXYZ.ckpt"
126+ resume_from :
106127test_after_train : true
0 commit comments