| | from __future__ import annotations |
| |
|
| | import pandas as pd |
| | import streamlit as st |
| | from rdkit import Chem |
| | from rdkit import RDLogger |
| |
|
| | RDLogger.DisableLog("rdApp.*") |
| |
|
| | |
| | |
| | |
| | SOURCES = ["EXP", "MD", "DFT", "GC"] |
| |
|
| | SOURCE_LABELS = { |
| | "EXP": "Experimental", |
| | "MD": "Molecular Dynamics", |
| | "DFT": "Density Functional Theory", |
| | "GC": "Group Contribution", |
| | } |
| |
|
| | |
| | |
| | |
| | POLYINFO_FILE = "data/POLYINFO.csv" |
| |
|
| |
|
| | def canonicalize_smiles(smiles: str) -> str | None: |
| | smiles = (smiles or "").strip() |
| | if not smiles: |
| | return None |
| | mol = Chem.MolFromSmiles(smiles) |
| | if mol is None: |
| | return None |
| | return Chem.MolToSmiles(mol, canonical=True) |
| |
|
| |
|
| | |
| | PROPERTY_META = { |
| | |
| | "tm": {"name": "Melting temperature", "unit": "K"}, |
| | "tg": {"name": "Glass transition temperature", "unit": "K"}, |
| | "td": {"name": "Thermal diffusivity", "unit": "m^2/s"}, |
| | "tc": {"name": "Thermal conductivity", "unit": "W/m路K"}, |
| | "cp": {"name": "Specific heat capacity", "unit": "J/kg路K"}, |
| | |
| | "young": {"name": "Young's modulus", "unit": "GPa"}, |
| | "shear": {"name": "Shear modulus", "unit": "GPa"}, |
| | "bulk": {"name": "Bulk modulus", "unit": "GPa"}, |
| | "poisson": {"name": "Poisson ratio", "unit": "-"}, |
| | |
| | "visc": {"name": "Viscosity", "unit": "Pa路s"}, |
| | "dif": {"name": "Diffusivity", "unit": "cm^2/s"}, |
| | |
| | "phe": {"name": "He permeability", "unit": "Barrer"}, |
| | "ph2": {"name": "H2 permeability", "unit": "Barrer"}, |
| | "pco2": {"name": "CO2 permeability", "unit": "Barrer"}, |
| | "pn2": {"name": "N2 permeability", "unit": "Barrer"}, |
| | "po2": {"name": "O2 permeability", "unit": "Barrer"}, |
| | "pch4": {"name": "CH4 permeability", "unit": "Barrer"}, |
| | |
| | "alpha": {"name": "Polarizability", "unit": "a.u."}, |
| | "homo": {"name": "HOMO energy", "unit": "eV"}, |
| | "lumo": {"name": "LUMO energy", "unit": "eV"}, |
| | "bandgap": {"name": "Band gap", "unit": "eV"}, |
| | "mu": {"name": "Dipole moment", "unit": "Debye"}, |
| | "etotal": {"name": "Total electronic energy", "unit": "eV"}, |
| | "ri": {"name": "Refractive index", "unit": "-"}, |
| | "dc": {"name": "Dielectric constant", "unit": "-"}, |
| | "pe": {"name": "Permittivity", "unit": "-"}, |
| | |
| | "rg": {"name": "Radius of gyration", "unit": "脜"}, |
| | "rho": {"name": "Density", "unit": "g/cm^3"}, |
| | } |
| |
|
| |
|
| | @st.cache_data |
| | def load_source_csv(source: str) -> pd.DataFrame: |
| | """ |
| | Loads data/{SOURCE}.csv, normalizes: |
| | - SMILES column -> 'smiles' |
| | - property columns -> lowercase |
| | - adds 'smiles_canon' |
| | """ |
| | path = f"data/{source}.csv" |
| | df = pd.read_csv(path) |
| |
|
| | |
| | if "SMILES" in df.columns: |
| | df = df.rename(columns={"SMILES": "smiles"}) |
| | elif "smiles" not in df.columns: |
| | raise ValueError(f"{path} missing SMILES column") |
| |
|
| | |
| | rename_map = {c: c.lower() for c in df.columns if c != "smiles"} |
| | df = df.rename(columns=rename_map) |
| |
|
| | |
| | df["smiles_canon"] = df["smiles"].astype(str).apply(canonicalize_smiles) |
| | df = df.dropna(subset=["smiles_canon"]).reset_index(drop=True) |
| |
|
| | return df |
| |
|
| |
|
| | @st.cache_data |
| | def build_index(df: pd.DataFrame) -> dict[str, int]: |
| | """canonical smiles -> row index (first occurrence)""" |
| | idx: dict[str, int] = {} |
| | for i, s in enumerate(df["smiles_canon"].tolist()): |
| | if s and s not in idx: |
| | idx[s] = i |
| | return idx |
| |
|
| |
|
| | @st.cache_data |
| | def load_polyinfo_csv() -> pd.DataFrame: |
| | """ |
| | Loads data/POLYINFO.csv with columns: |
| | SMILES, Polymer_Class, Polymer_Name |
| | Adds canonical smiles column 'smiles_canon'. |
| | Returns empty df if file missing. |
| | """ |
| | try: |
| | df = pd.read_csv(POLYINFO_FILE) |
| | except Exception: |
| | return pd.DataFrame(columns=["smiles", "polymer_class", "polymer_name", "smiles_canon"]) |
| |
|
| | |
| | if "SMILES" in df.columns: |
| | df = df.rename(columns={"SMILES": "smiles"}) |
| | elif "smiles" not in df.columns: |
| | |
| | return pd.DataFrame(columns=["smiles", "polymer_class", "polymer_name", "smiles_canon"]) |
| |
|
| | |
| | ren = {} |
| | if "Polymer_Class" in df.columns: |
| | ren["Polymer_Class"] = "polymer_class" |
| | if "Polymer_Name" in df.columns: |
| | ren["Polymer_Name"] = "polymer_name" |
| | df = df.rename(columns=ren) |
| |
|
| | |
| | if "polymer_class" not in df.columns: |
| | df["polymer_class"] = pd.NA |
| | if "polymer_name" not in df.columns: |
| | df["polymer_name"] = pd.NA |
| |
|
| | |
| | df["smiles_canon"] = df["smiles"].astype(str).apply(canonicalize_smiles) |
| | df = df.dropna(subset=["smiles_canon"]).reset_index(drop=True) |
| |
|
| | return df |
| |
|
| |
|
| | @st.cache_data |
| | def load_all_sources(): |
| | """ |
| | Returns dict: |
| | db["EXP"/"MD"/"DFT"/"GC"] = {"df": df, "idx": idx} |
| | db["POLYINFO"] = {"df": df, "idx": idx} |
| | """ |
| | db = {} |
| | for src in SOURCES: |
| | df = load_source_csv(src) |
| | idx = build_index(df) |
| | db[src] = {"df": df, "idx": idx} |
| |
|
| | |
| | pi_df = load_polyinfo_csv() |
| | pi_idx = build_index(pi_df) if not pi_df.empty else {} |
| | db["POLYINFO"] = {"df": pi_df, "idx": pi_idx} |
| |
|
| | return db |
| |
|
| |
|
| | def get_value(db, source: str, smiles_canon: str, prop_key: str): |
| | pack = db[source] |
| | df, idx = pack["df"], pack["idx"] |
| | row_i = idx.get(smiles_canon, None) |
| | if row_i is None: |
| | return None |
| | if prop_key not in df.columns: |
| | return None |
| | val = df.iloc[row_i][prop_key] |
| | if pd.isna(val): |
| | return None |
| | return float(val) |
| |
|
| |
|
| | def get_polyinfo(db, smiles_canon: str) -> tuple[str | None, str | None]: |
| | """ |
| | Returns (polymer_name, polymer_class) if available, else (None, None). |
| | No 'not available' text here. |
| | """ |
| | pack = db.get("POLYINFO", None) |
| | if pack is None: |
| | return None, None |
| |
|
| | df, idx = pack["df"], pack["idx"] |
| | if df is None or df.empty: |
| | return None, None |
| |
|
| | row_i = idx.get(smiles_canon, None) |
| | if row_i is None: |
| | return None, None |
| |
|
| | name = df.iloc[row_i].get("polymer_name", None) |
| | cls = df.iloc[row_i].get("polymer_class", None) |
| |
|
| | |
| | if pd.isna(name) or str(name).strip() == "": |
| | name = None |
| | else: |
| | name = str(name).strip() |
| |
|
| | if pd.isna(cls) or str(cls).strip() == "": |
| | cls = None |
| | else: |
| | cls = str(cls).strip() |
| |
|
| | return name, cls |
| |
|