Spaces:

singhn9
/

SteelAI_Module2_EAF_Intelligence_Explorer

Sleeping

App Files Files Community

singhn9 commited on Nov 8, 2025

Commit

a39c7fd

verified ·

1 Parent(s): 6ff080e

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +62 -100

src/streamlit_app.py CHANGED Viewed

@@ -29,38 +29,33 @@ import shap
 # -------------------------
 # Config & paths
 # -------------------------
 st.set_page_config(page_title="Steel Authority of India Limited (MODEX)", layout="wide")
-# Base directory and persistent logs
-BASE_DIR = "./"
-LOG_DIR = os.path.join(BASE_DIR, "logs")
 os.makedirs(LOG_DIR, exist_ok=True)
-# Timestamped run subfolder
-run_id = datetime.now().strftime("%Y%m%d_%H%M%S")
-RUN_DIR = os.path.join(LOG_DIR, f"run_{run_id}")
-os.makedirs(RUN_DIR, exist_ok=True)
-# File paths for this run
-CSV_PATH = os.path.join(RUN_DIR, "flatfile_universe_advanced.csv")
-META_PATH = os.path.join(RUN_DIR, "feature_metadata_advanced.json")
-ENSEMBLE_ARTIFACT = os.path.join(RUN_DIR, "ensemble_models.joblib")
-LOG_PATH = os.path.join(RUN_DIR, "run.log")
 def log(msg: str):
     with open(LOG_PATH, "a", encoding="utf-8") as f:
-        f.write(f"[{datetime.now():%Y-%m-%d %H:%M:%S}] {msg}\n")
     print(msg)
-log(f" Streamlit session started | run_id={run_id}")
-log(f"Run directory: {RUN_DIR}")
-# Confirm storage mount
 if os.path.exists("/data"):
-    st.sidebar.success(f" Using persistent storage | Run directory: {RUN_DIR}")
 else:
-    st.sidebar.warning(f" Using ephemeral storage | Run directory: {RUN_DIR}. Data will be lost on rebuild.")
 # -------------------------
@@ -86,7 +81,7 @@ def generate_advanced_flatfile(
         variance_overrides: dict mapping feature name or substring → stddev multiplier
     """
     np.random.seed(random_seed)
-    os.makedirs(RUN_DIR, exist_ok=True)
     if variance_overrides is None:
         variance_overrides = {}
@@ -260,25 +255,26 @@ def generate_advanced_flatfile(
     df.fillna(0, inplace=True)
     # save CSV & metadata
-    df.to_csv(CSV_PATH, index=False)
-    meta = []
-    for col in df.columns:
-        if col in natural_feats:
-            source = "natural"
-        elif col.startswith("poly__") or col.startswith("pca_") or col in ["operating_mode"]:
-            source = "advanced_synthetic"
-        else:
-            source = "synthetic"
-        meta.append({
-            "feature_name": col,
-            "source_type": source,
-            "linked_use_cases": ["All" if source!="natural" else "Mapped"],
-            "units": "-",
-            "formula": "see generator logic",
-            "remarks": "auto-generated or simulated"
-        })
-    with open(META_PATH, "w") as f:
-        json.dump(meta, f, indent=2)
     PDF_PATH = None
     # annotated bibliography
@@ -772,16 +768,10 @@ with tabs[4]:
                 st.pyplot(fig)
                 # Save trained stack artifacts
-                stack_artifact = os.path.join(RUN_DIR, f"stacked_{use_case.replace(' ', '_')}.joblib")
-                to_save = {
-                    "base_models": {bm["family"]: bm["model"] for bm in base_models if bm["family"] in selected},
-                    "meta": meta,
-                    "features": features,
-                    "selected": selected,
-                    "target": target,
-                }
-                joblib.dump(to_save, stack_artifact)
-                st.caption(f" Stacked ensemble saved: {stack_artifact}")
                 # Explainability
                 st.markdown("### Explainability (approximate)")
@@ -906,64 +896,36 @@ in metallurgical AI modeling. Click any title to open the official paper.
 st.markdown("---")
 st.markdown("**Notes:** This dataset is synthetic and for demo/prototyping. Real plant integration requires NDA, data on-boarding, sensor mapping, and plant safety checks before any control actions.")
 # -----  Download tab
-tabs.append("Download Saved Runs")
 with tabs[-1]:
-    st.subheader("Reproducibility & Run Exports")
-    run_folders = sorted(
-        [f for f in os.listdir(LOG_DIR) if f.startswith("run_")],
-        reverse=True
-    )
-    if not run_folders:
-        st.info("No completed runs found yet.")
     else:
-        selected_run = st.selectbox("Select run folder", run_folders, index=0)
-        selected_path = os.path.join(LOG_DIR, selected_run)
-        # Show contained files
-        files = [
-            f for f in os.listdir(selected_path)
-            if os.path.isfile(os.path.join(selected_path, f))
-        ]
-        st.write(f"Files in `{selected_run}`:")
-        st.write(", ".join(files))
-        # Zip the folder in-memory for download
-        zip_buffer = io.BytesIO()
-        with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zipf:
-            for root, _, filenames in os.walk(selected_path):
-                for fname in filenames:
-                    file_path = os.path.join(root, fname)
-                    zipf.write(file_path, arcname=os.path.relpath(file_path, selected_path))
-        zip_buffer.seek(0)
-        st.download_button(
-            label=f"Download full run ({selected_run}.zip)",
-            data=zip_buffer,
-            file_name=f"{selected_run}.zip",
-            mime="application/zip"
-        )
 # -----  Logs tab
 tabs.append("View Logs")
 with tabs[-1]:
-    st.subheader(" Session & Model Logs")
-    st.markdown("Each run creates a timestamped log file in `/logs/` inside this Space. Use this panel to review run progress and debug output.")
-    log_files = sorted(
-        [f for f in os.listdir(LOG_DIR) if f.endswith(".log")],
-        reverse=True
-    )
-    if not log_files:
-        st.info("No logs yet. Run an AutoML job first.")
-    else:
-        latest = st.selectbox("Select log file", log_files, index=0)
-        path = os.path.join(LOG_DIR, latest)
-        with open(path, "r", encoding="utf-8") as f:
             content = f.read()
-        st.text_area("Log Output", content, height=400)
-        st.download_button(" Download Log", content, file_name=latest)

 # -------------------------
 # Config & paths
 # -------------------------
 st.set_page_config(page_title="Steel Authority of India Limited (MODEX)", layout="wide")
+LOG_DIR = "./logs"
 os.makedirs(LOG_DIR, exist_ok=True)
+# Permanent artifact filenames (never change)
+CSV_PATH = os.path.join(LOG_DIR, "flatfile_universe_advanced.csv")
+META_PATH = os.path.join(LOG_DIR, "feature_metadata_advanced.json")
+ENSEMBLE_PATH = os.path.join(LOG_DIR, "ensemble_models.joblib")
+LOG_PATH = os.path.join(LOG_DIR, "run_master.log")
+# Simple logger that time-stamps inside one file
 def log(msg: str):
+    stamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
     with open(LOG_PATH, "a", encoding="utf-8") as f:
+        f.write(f"[{stamp}] {msg}\n")
     print(msg)
+log("=== Streamlit session started ===")
 if os.path.exists("/data"):
+    st.sidebar.success(f" Using persistent storage | Logs directory: {LOG_DIR}")
 else:
+    st.sidebar.warning(f" Using ephemeral storage | Logs directory: {LOG_DIR}. Data will be lost on rebuild.")
 # -------------------------
         variance_overrides: dict mapping feature name or substring → stddev multiplier
     """
     np.random.seed(random_seed)
+    os.makedirs(LOG_DIR, exist_ok=True)
     if variance_overrides is None:
         variance_overrides = {}
     df.fillna(0, inplace=True)
     # save CSV & metadata
+    df["run_timestamp"] = datetime.now().strftime("%Y%m%d_%H%M%S")
+    if os.path.exists(CSV_PATH):
+        df.to_csv(CSV_PATH, mode="a", index=False, header=False)
+    else:
+        df.to_csv(CSV_PATH, index=False)
+    # append run-summary entry to metadata JSON
+    meta_entry = {
+        "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+        "features": len(df.columns),
+        "rows_added": len(df),
+        "note": "auto-generated block appended"
+    }
+    if os.path.exists(META_PATH):
+        existing = json.load(open(META_PATH))
+        existing.append(meta_entry)
+    else:
+        existing = [meta_entry]
+    json.dump(existing, open(META_PATH, "w"), indent=2)
     PDF_PATH = None
     # annotated bibliography
                 st.pyplot(fig)
                 # Save trained stack artifacts
+                joblib.dump(meta, ENSEMBLE_PATH)
+                st.caption(f"Stacked ensemble snapshot updated → {ENSEMBLE_PATH}")
+                log(f"Ensemble model updated for use case: {use_case}")
                 # Explainability
                 st.markdown("### Explainability (approximate)")
 st.markdown("---")
 st.markdown("**Notes:** This dataset is synthetic and for demo/prototyping. Real plant integration requires NDA, data on-boarding, sensor mapping, and plant safety checks before any control actions.")
 # -----  Download tab
+tabs.append("Download Saved Files")
 with tabs[-1]:
+    st.subheader(" Download Saved Files (Flat Log Mode)")
+    available_files = [f for f in os.listdir(LOG_DIR) if os.path.isfile(os.path.join(LOG_DIR, f))]
+    if not available_files:
+        st.info("No files found yet — run AutoML once to generate outputs.")
     else:
+        for f in sorted(available_files):
+            path = os.path.join(LOG_DIR, f)
+            with open(path, "rb") as fp:
+                st.download_button(
+                    label=f" Download {f}",
+                    data=fp,
+                    file_name=f,
+                    mime="application/octet-stream"
+                )
 # -----  Logs tab
 tabs.append("View Logs")
 with tabs[-1]:
+    st.subheader(" Master Log (append-in-place)")
+    if os.path.exists(LOG_PATH):
+        with open(LOG_PATH, "r", encoding="utf-8") as f:
             content = f.read()
+        st.text_area("Master Log Output", content, height=400)
+        st.download_button("Download Log", content, file_name="run_master.log")
+    else:
+        st.info("No log file yet — run AutoML once to start logging.")