#!/usr/bin/env python3 # update_modulus_csv.py # Usage: python update_modulus_csv.py PID # # Reads PID-prefixed modulus result files in the *current directory*: # {PID}_K_result.dat, {PID}_G1_result.dat, {PID}_G2_result.dat, # {PID}_G_result.dat, {PID}_nu_result.dat, {PID}_E_result.dat # # Writes/updates six separate CSVs in ../../RESULTS/: # K_MD.csv, G1_MD.csv, G2_MD.csv, G_MD.csv, NU_MD.csv, E_MD.csv # # Each CSV has columns: PID, SMILES, # SMILES is looked up from ../../../SMILES.csv (expects headers: PID,SMILES) import sys, csv, re from pathlib import Path # --- Config --- OUT_DIR = Path("../../RESULTS") SMILES_CSV = Path("../../..") / "SMILES.csv" # run from POLYMER_DATA/MODULUS// PID_COL = "PID" SMILES_COL = "SMILES" # Input .dat files (PID-prefixed) PROP_FILES_BASE = { "K(GPa)" : "K_result.dat", "G1(GPa)": "G1_result.dat", "G2(GPa)": "G2_result.dat", "G(GPa)" : "G_result.dat", # combined shear modulus (Shear Modulus = ...) "nu" : "nu_result.dat", "E(GPa)" : "E_result.dat", } # Output CSV filenames per property PROP_OUTFILE = { "K(GPa)" : "K_MD.csv", "G1(GPa)": "G1_MD.csv", "G2(GPa)": "G2_MD.csv", "G(GPa)" : "G_MD.csv", # new CSV for combined shear modulus "nu" : "NU_MD.csv", "E(GPa)" : "E_MD.csv", } FLOAT_RE = re.compile(r"[-+]?(?:\d+\.?\d*|\.\d+)(?:[eE][-+]?\d+)?") def usage(): print("Usage: python update_modulus_csv.py PID") sys.exit(1) def first_number(text: str): m = FLOAT_RE.search(text) return m.group(0) if m else None def load_smiles(pid: str) -> str: if not SMILES_CSV.exists(): return "" with SMILES_CSV.open(newline="", encoding="utf-8") as fh: rdr = csv.DictReader(fh) if not rdr.fieldnames: return "" lower = {k.lower(): k for k in rdr.fieldnames} pid_k, smi_k = lower.get("pid"), lower.get("smiles") if not pid_k or not smi_k: return "" for row in rdr: if (row.get(pid_k) or "").strip() == pid: return (row.get(smi_k) or "").strip() return "" def read_existing_prop(csv_path: Path, prop_col: str): """Read an existing per-property CSV into a dict keyed by PID.""" rows = {} if csv_path.exists(): with csv_path.open(newline="", encoding="utf-8") as fh: rdr = csv.DictReader(fh) for row in rdr: k = (row.get(PID_COL) or "").strip() if not k: continue rows[k] = { PID_COL: k, SMILES_COL: (row.get(SMILES_COL) or "").strip(), prop_col: (row.get(prop_col) or "").strip(), } return rows def write_prop(csv_path: Path, rows: dict, prop_col: str): csv_path.parent.mkdir(parents=True, exist_ok=True) with csv_path.open("w", newline="", encoding="utf-8") as fh: w = csv.DictWriter(fh, fieldnames=[PID_COL, SMILES_COL, prop_col]) w.writeheader() # Write in stable PID order for pid_key in sorted(rows.keys()): w.writerow(rows[pid_key]) def main(): if len(sys.argv) != 2: usage() pid = sys.argv[1].strip() if not pid: usage() # Gather values from PID-prefixed files in current directory updates = {} for prop_col, base in PROP_FILES_BASE.items(): fname = f"{pid}_{base}" p = Path(fname) if not p.exists(): print(f"[SKIP] {fname}: not found") continue try: txt = p.read_text(encoding="utf-8", errors="ignore") except Exception: print(f"[SKIP] {fname}: cannot read") continue val = first_number(txt) if val is None: print(f"[SKIP] {fname}: no numeric value found") continue updates[prop_col] = val print(f"[OK] {fname}: {prop_col}={val}") if not updates: print("[INFO] No modulus values found; nothing to write.") return smiles = load_smiles(pid) # Upsert each property into its own CSV for prop_col, val in updates.items(): out_csv = OUT_DIR / PROP_OUTFILE[prop_col] rows = read_existing_prop(out_csv, prop_col) row = rows.get(pid, {PID_COL: pid, SMILES_COL: "", prop_col: ""}) row[PID_COL] = pid # only overwrite SMILES if we have a non-empty lookup, else keep existing row[SMILES_COL] = smiles or row.get(SMILES_COL, "") row[prop_col] = val rows[pid] = row write_prop(out_csv, rows, prop_col) print(f"[DONE] Updated {out_csv.name} for PID={pid}") if __name__ == "__main__": main()