File size: 4,739 Bytes
930ea3d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 | #!/usr/bin/env python3
# update_modulus_csv.py
# Usage: python update_modulus_csv.py PID
#
# Reads PID-prefixed modulus result files in the *current directory*:
# {PID}_K_result.dat, {PID}_G1_result.dat, {PID}_G2_result.dat,
# {PID}_G_result.dat, {PID}_nu_result.dat, {PID}_E_result.dat
#
# Writes/updates six separate CSVs in ../../RESULTS/:
# K_MD.csv, G1_MD.csv, G2_MD.csv, G_MD.csv, NU_MD.csv, E_MD.csv
#
# Each CSV has columns: PID, SMILES, <property>
# SMILES is looked up from ../../../SMILES.csv (expects headers: PID,SMILES)
import sys, csv, re
from pathlib import Path
# --- Config ---
OUT_DIR = Path("../../RESULTS")
SMILES_CSV = Path("../../..") / "SMILES.csv" # run from POLYMER_DATA/MODULUS/<PID>/
PID_COL = "PID"
SMILES_COL = "SMILES"
# Input .dat files (PID-prefixed)
PROP_FILES_BASE = {
"K(GPa)" : "K_result.dat",
"G1(GPa)": "G1_result.dat",
"G2(GPa)": "G2_result.dat",
"G(GPa)" : "G_result.dat", # combined shear modulus (Shear Modulus = ...)
"nu" : "nu_result.dat",
"E(GPa)" : "E_result.dat",
}
# Output CSV filenames per property
PROP_OUTFILE = {
"K(GPa)" : "K_MD.csv",
"G1(GPa)": "G1_MD.csv",
"G2(GPa)": "G2_MD.csv",
"G(GPa)" : "G_MD.csv", # new CSV for combined shear modulus
"nu" : "NU_MD.csv",
"E(GPa)" : "E_MD.csv",
}
FLOAT_RE = re.compile(r"[-+]?(?:\d+\.?\d*|\.\d+)(?:[eE][-+]?\d+)?")
def usage():
print("Usage: python update_modulus_csv.py PID")
sys.exit(1)
def first_number(text: str):
m = FLOAT_RE.search(text)
return m.group(0) if m else None
def load_smiles(pid: str) -> str:
if not SMILES_CSV.exists():
return ""
with SMILES_CSV.open(newline="", encoding="utf-8") as fh:
rdr = csv.DictReader(fh)
if not rdr.fieldnames:
return ""
lower = {k.lower(): k for k in rdr.fieldnames}
pid_k, smi_k = lower.get("pid"), lower.get("smiles")
if not pid_k or not smi_k:
return ""
for row in rdr:
if (row.get(pid_k) or "").strip() == pid:
return (row.get(smi_k) or "").strip()
return ""
def read_existing_prop(csv_path: Path, prop_col: str):
"""Read an existing per-property CSV into a dict keyed by PID."""
rows = {}
if csv_path.exists():
with csv_path.open(newline="", encoding="utf-8") as fh:
rdr = csv.DictReader(fh)
for row in rdr:
k = (row.get(PID_COL) or "").strip()
if not k:
continue
rows[k] = {
PID_COL: k,
SMILES_COL: (row.get(SMILES_COL) or "").strip(),
prop_col: (row.get(prop_col) or "").strip(),
}
return rows
def write_prop(csv_path: Path, rows: dict, prop_col: str):
csv_path.parent.mkdir(parents=True, exist_ok=True)
with csv_path.open("w", newline="", encoding="utf-8") as fh:
w = csv.DictWriter(fh, fieldnames=[PID_COL, SMILES_COL, prop_col])
w.writeheader()
# Write in stable PID order
for pid_key in sorted(rows.keys()):
w.writerow(rows[pid_key])
def main():
if len(sys.argv) != 2:
usage()
pid = sys.argv[1].strip()
if not pid:
usage()
# Gather values from PID-prefixed files in current directory
updates = {}
for prop_col, base in PROP_FILES_BASE.items():
fname = f"{pid}_{base}"
p = Path(fname)
if not p.exists():
print(f"[SKIP] {fname}: not found")
continue
try:
txt = p.read_text(encoding="utf-8", errors="ignore")
except Exception:
print(f"[SKIP] {fname}: cannot read")
continue
val = first_number(txt)
if val is None:
print(f"[SKIP] {fname}: no numeric value found")
continue
updates[prop_col] = val
print(f"[OK] {fname}: {prop_col}={val}")
if not updates:
print("[INFO] No modulus values found; nothing to write.")
return
smiles = load_smiles(pid)
# Upsert each property into its own CSV
for prop_col, val in updates.items():
out_csv = OUT_DIR / PROP_OUTFILE[prop_col]
rows = read_existing_prop(out_csv, prop_col)
row = rows.get(pid, {PID_COL: pid, SMILES_COL: "", prop_col: ""})
row[PID_COL] = pid
# only overwrite SMILES if we have a non-empty lookup, else keep existing
row[SMILES_COL] = smiles or row.get(SMILES_COL, "")
row[prop_col] = val
rows[pid] = row
write_prop(out_csv, rows, prop_col)
print(f"[DONE] Updated {out_csv.name} for PID={pid}")
if __name__ == "__main__":
main()
|