File size: 3,173 Bytes
930ea3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/env python3
# update_tc_csv.py
# Usage: python update_tc_csv.py PID
#
# Reads thermal conductivity result in the *current directory*:
#   {PID}_TC_result.dat  -> ../../RESULTS/TC_MD.csv  (columns: PID,SMILES,TC)
#
# Looks up SMILES for PID from ../../../SMILES.csv (expects headers: PID,SMILES)

import sys, csv, re
from pathlib import Path

PID_COL, SMILES_COL, VALUE_COL = "PID", "SMILES", "TC"
SMILES_CSV = Path("../../..") / "SMILES.csv"  # from POLYMER_DATA/THERMAL_CONDUCTIVITY/<PID>/
OUT_DIR    = Path("../../RESULTS")
OUT_CSV    = OUT_DIR / "TC_MD.csv"
FLOAT_RE   = re.compile(r"[-+]?(?:\d+\.?\d*|\.\d+)(?:[eE][-+]?\d+)?")

def usage():
    print("Usage: python update_tc_csv.py PID")
    sys.exit(1)

def first_number(text: str):
    m = FLOAT_RE.search(text)
    return m.group(0) if m else None

def load_smiles(pid: str) -> str:
    if not SMILES_CSV.exists():
        return ""
    with SMILES_CSV.open(newline="", encoding="utf-8") as fh:
        rdr = csv.DictReader(fh)
        if not rdr.fieldnames:
            return ""
        lower = {k.lower(): k for k in rdr.fieldnames}
        pid_key = lower.get("pid"); smi_key = lower.get("smiles")
        if not pid_key or not smi_key:
            return ""
        for row in rdr:
            if (row.get(pid_key) or "").strip() == pid:
                return (row.get(smi_key) or "").strip()
    return ""

def upsert(csv_path: Path, pid: str, smiles: str, value: str):
    # Simple upsert keyed by PID; expects columns PID,SMILES,TC
    data = {}
    if csv_path.exists():
        with csv_path.open(newline="", encoding="utf-8") as fh:
            rdr = csv.DictReader(fh)
            if rdr.fieldnames and {PID_COL, SMILES_COL, VALUE_COL}.issubset(rdr.fieldnames):
                for row in rdr:
                    k = (row.get(PID_COL) or "").strip()
                    if k:
                        data[k] = {
                            PID_COL: k,
                            SMILES_COL: row.get(SMILES_COL, ""),
                            VALUE_COL: row.get(VALUE_COL, ""),
                        }

    data[pid] = {PID_COL: pid, SMILES_COL: smiles, VALUE_COL: value}

    csv_path.parent.mkdir(parents=True, exist_ok=True)
    with csv_path.open("w", newline="", encoding="utf-8") as fh:
        w = csv.DictWriter(fh, fieldnames=[PID_COL, SMILES_COL, VALUE_COL])
        w.writeheader()
        for row in data.values():
            w.writerow(row)

def main():
    if len(sys.argv) != 2:
        usage()
    pid = sys.argv[1].strip()
    if not pid:
        usage()

    src = Path(f"{pid}_TC_result.dat")
    if not src.exists():
        print(f"[SKIP] {src.name}: not found")
        sys.exit(0)

    try:
        text = src.read_text(encoding="utf-8", errors="ignore")
    except Exception:
        print(f"[SKIP] {src.name}: cannot read")
        sys.exit(0)

    val = first_number(text)
    if val is None:
        print(f"[SKIP] {src.name}: no numeric value found")
        sys.exit(0)

    smiles = load_smiles(pid)
    upsert(OUT_CSV, pid, smiles, val)
    print(f"[OK] {src.name}{OUT_CSV.name}: PID={pid} {VALUE_COL}={val}")

if __name__ == "__main__":
    main()