Skip to content

Commit abe3b94

Browse files
committed
feat: Use correct reduction for GSAS and pass base parameters via a embedded comment the .cif file
1 parent 5b70399 commit abe3b94

5 files changed

Lines changed: 56 additions & 51 deletions

File tree

configs/simulator.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,11 @@ worker_base_dir: "/data/workers"
1515
parallel_jobs: 10
1616
sims_per_file: 2
1717
master_seed: 42 # Reproducible parameter sampling
18-
cleanup_worker_dirs: true # If false, worker directories will be kept
18+
cleanup_worker_dirs: true
19+
20+
# If true, read SG from "# _original_symmetry_space_group_name_H-M" comment in CIF
21+
# If using custom CIFs without this comment, set to false to use standard tags
22+
parse_from_comment: true
1923

2024
# --- Simulation Parameter Ranges ---
2125
# The format is ALWAYS (smaller_number, larger_number)

docker/downloader.Dockerfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ WORKDIR /app
1010
COPY src/downloader/requirements.txt /tmp/requirements.txt
1111
RUN pip install --no-cache-dir -r /tmp/requirements.txt
1212

13+
ENV MPLCONFIGDIR=/app/.cache/matplotlib
14+
1315
# Code and configs are mounted at runtime via docker-compose volumes:
1416
# - ./src/downloader -> /app
1517
# - ./configs -> /configs

src/downloader/downloader.py

Lines changed: 15 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -36,46 +36,28 @@ def _write_cif_from_struct(
3636
out_path: Path,
3737
sg_num: Optional[int] = None,
3838
sg_symbol: Optional[str] = None,
39-
symprec: float = 0.01,
40-
angle_tolerance: float = 5.0,
4139
) -> None:
4240
"""
43-
Write structure to CIF using pymatgen's CifWriter, ensuring symmetry metadata (_symmetry_* tags)
44-
are present and reflect the detected space group from SpacegroupAnalyzer.
41+
Write structure to CIF using pymatgen's CifWriter without forcing symmetry detection (emit P1/minimal symmetry).
42+
Then annotate the file with original space group/crystal system as comment lines so downstream tools ignore them.
4543
"""
4644
try:
47-
# Write CIF with symmetry detection enabled
48-
CifWriter(structure, symprec=symprec, angle_tolerance=angle_tolerance).write_file(out_path)
45+
# Write CIF without forcing symmetry detection (emit P1/minimal symmetry with fully expanded sites)
46+
CifWriter(structure).write_file(out_path)
4947
except Exception as e:
5048
raise RuntimeError(f"Failed to write CIF to {out_path}: {e}")
5149

52-
# Post-process CIF to force writing of symmetry tags
53-
try:
54-
with open(out_path, "r", errors="ignore") as f:
55-
lines = f.readlines()
56-
57-
def set_or_insert(tag: str, value: Any, quote: bool = False) -> None:
58-
val_str = f"'{value}'" if quote else str(value)
59-
for i, line in enumerate(lines):
60-
if line.strip().startswith(tag):
61-
lines[i] = f"{tag} {val_str}\n"
62-
break
63-
else:
64-
# Insert after the first line (typically the data_ line)
65-
insert_idx = 1 if len(lines) >= 1 else 0
66-
lines.insert(insert_idx, f"{tag} {val_str}\n")
67-
68-
if sg_num is not None:
69-
set_or_insert("_symmetry_Int_Tables_number", sg_num, quote=False)
70-
if sg_symbol:
71-
set_or_insert("_symmetry_space_group_name_H-M", sg_symbol, quote=True)
72-
73-
with open(out_path, "w") as f:
74-
f.writelines(lines)
75-
except Exception:
76-
# Non-fatal if symmetry tag insertion fails; CIF already written
77-
pass
50+
# Post-process CIF to add original symmetry as comments (ignored by CIF parsers)
51+
with open(out_path, "r", errors="ignore") as f:
52+
lines = f.readlines()
53+
54+
comment = f"# _original_symmetry_space_group_name_H-M '{str(sg_symbol)}'\n"
55+
56+
insert_idx = 1 if len(lines) >= 1 else 0
57+
lines.insert(insert_idx, comment)
7858

59+
with open(out_path, "w") as f:
60+
f.writelines(lines)
7961

8062
def niggli_reduce(
8163
structure: Structure,
@@ -348,9 +330,7 @@ def download_cifs_entire_db(
348330
std_struct,
349331
out_path,
350332
sg_num=sg_num,
351-
sg_symbol=sga_conv.get_space_group_symbol(),
352-
symprec=symprec,
353-
angle_tolerance=angle_tolerance,
333+
sg_symbol=sga_conv.get_space_group_symbol()
354334
)
355335
summary["downloaded"] += 1
356336

src/simulator/diffraction_generator.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ def _generate_simulation_tasks(self, n_sims_per_file, master_seed, cleanup_worke
8585
'constant_noise_range': param_ranges['constant_noise_range'],
8686
'cleanup_worker_dir': cleanup_worker_dirs,
8787
'output_filename': f"{stem}-{variation_index}.npy",
88+
'parse_from_comment': bool(kwargs.get('parse_from_comment', False)),
8889
}
8990
tasks_to_run.append(params)
9091
job_id_counter += 1
@@ -164,6 +165,7 @@ def main() -> None:
164165
sims_per_file = int(cfg["sims_per_file"])
165166
master_seed = int(cfg["master_seed"])
166167
cleanup_worker_dirs = bool(cfg["cleanup_worker_dirs"])
168+
parse_from_comment = bool(cfg.get("parse_from_comment", False))
167169

168170
# Parameter ranges
169171
ranges = {
@@ -192,6 +194,7 @@ def main() -> None:
192194
print(f"Sims per file: {sims_per_file}")
193195
print(f"Master seed: {master_seed}")
194196
print(f"Cleanup worker dirs: {cleanup_worker_dirs}")
197+
print(f"Parse from comment: {parse_from_comment}")
195198
print("Parameter ranges:")
196199
for k, v in ranges.items():
197200
print(f" {k}: {v}")
@@ -211,6 +214,7 @@ def main() -> None:
211214
n_sims_per_file=sims_per_file,
212215
master_seed=master_seed,
213216
cleanup_worker_dirs=cleanup_worker_dirs,
217+
parse_from_comment=parse_from_comment,
214218
**ranges,
215219
)
216220
except Exception as e:

src/simulator/simulation_worker.py

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -87,25 +87,40 @@ def get_sg_num_from_symbol(sg_symbol):
8787
try: return SpaceGroup.sg_encoding[sg_symbol]['int_number']
8888
except KeyError: return None
8989

90-
def _parse_cif_file(file_path):
90+
def _parse_cif_file(file_path, parse_from_comment: bool = False):
9191
"""Parses a CIF file to get the space group number."""
9292
sg_num = None
9393
with open(file_path, 'r', errors='ignore') as f:
94-
for line in f:
95-
if '_symmetry_Int_Tables_number' in line or '_space_group_IT_number' in line:
96-
try:
97-
sg_num = int(line.split()[-1])
94+
for raw_line in f:
95+
line = raw_line.strip()
96+
97+
if parse_from_comment:
98+
# Prefer provenance comment inserted by downloader, e.g.:
99+
# # _original__symmetry_space_group_name_H-M 'I-43m'
100+
if line.startswith("# _original_symmetry_space_group_name_H-M"):
101+
if '"' in line:
102+
sg_sym = line.split('"')[1].replace(' ', '')
103+
elif "'" in line:
104+
sg_sym = line.split("'")[1].replace(' ', '')
105+
sg_num = get_sg_num_from_symbol(sg_sym)
98106
return sg_num, get_crystal_system(sg_num)
99-
except (ValueError, IndexError): continue
107+
108+
else:
109+
# Standard CIF tags (non-comment)
110+
if '_symmetry_Int_Tables_number' in line or '_space_group_IT_number' in line:
111+
try:
112+
sg_num = int(line.split()[-1])
113+
return sg_num, get_crystal_system(sg_num)
114+
except (ValueError, IndexError): continue
100115

101-
# H-M name tag with quoted values: handle original double quotes and the provided single-quote example
102-
if '_symmetry_space_group_name_H-M' in line:
103-
if '"' in line:
104-
sg_sym = line.split('"')[1].replace(' ', '')
105-
elif "'" in line:
106-
sg_sym = line.split("'")[1].replace(' ', '')
107-
sg_num = get_sg_num_from_symbol(sg_sym)
108-
return sg_num, get_crystal_system(sg_num)
116+
# H-M name tag with quoted values (ignore commented lines)
117+
if '_symmetry_space_group_name_H-M' in line and not line.startswith("#"):
118+
if '"' in line:
119+
sg_sym = line.split('"')[1].replace(' ', '')
120+
elif "'" in line:
121+
sg_sym = line.split("'")[1].replace(' ', '')
122+
sg_num = get_sg_num_from_symbol(sg_sym)
123+
return sg_num, get_crystal_system(sg_num)
109124

110125
return None, None
111126

@@ -212,7 +227,7 @@ def run_single_simulation(params):
212227
try:
213228
if input_file.suffix.lower() == '.cif':
214229
file_type = 'cif'
215-
sg_num, cs = _parse_cif_file(input_file)
230+
sg_num, cs = _parse_cif_file(input_file, parse_from_comment=params['parse_from_comment'])
216231
if sg_num is None: raise ValueError("Could not parse space group number from CIF.")
217232
elif input_file.suffix.lower() in ['.dif', '.txt']:
218233
file_type = 'dif'

0 commit comments

Comments
 (0)