singhn9 commited on
Commit
a39c7fd
·
verified ·
1 Parent(s): 6ff080e

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +62 -100
src/streamlit_app.py CHANGED
@@ -29,38 +29,33 @@ import shap
29
  # -------------------------
30
  # Config & paths
31
  # -------------------------
 
32
  st.set_page_config(page_title="Steel Authority of India Limited (MODEX)", layout="wide")
33
 
34
- # Base directory and persistent logs
35
- BASE_DIR = "./"
36
- LOG_DIR = os.path.join(BASE_DIR, "logs")
37
  os.makedirs(LOG_DIR, exist_ok=True)
38
 
39
- # Timestamped run subfolder
40
- run_id = datetime.now().strftime("%Y%m%d_%H%M%S")
41
- RUN_DIR = os.path.join(LOG_DIR, f"run_{run_id}")
42
- os.makedirs(RUN_DIR, exist_ok=True)
43
-
44
- # File paths for this run
45
- CSV_PATH = os.path.join(RUN_DIR, "flatfile_universe_advanced.csv")
46
- META_PATH = os.path.join(RUN_DIR, "feature_metadata_advanced.json")
47
- ENSEMBLE_ARTIFACT = os.path.join(RUN_DIR, "ensemble_models.joblib")
48
- LOG_PATH = os.path.join(RUN_DIR, "run.log")
49
 
 
50
  def log(msg: str):
 
51
  with open(LOG_PATH, "a", encoding="utf-8") as f:
52
- f.write(f"[{datetime.now():%Y-%m-%d %H:%M:%S}] {msg}\n")
53
  print(msg)
54
 
55
- log(f" Streamlit session started | run_id={run_id}")
56
- log(f"Run directory: {RUN_DIR}")
57
 
58
 
59
- # Confirm storage mount
60
  if os.path.exists("/data"):
61
- st.sidebar.success(f" Using persistent storage | Run directory: {RUN_DIR}")
62
  else:
63
- st.sidebar.warning(f" Using ephemeral storage | Run directory: {RUN_DIR}. Data will be lost on rebuild.")
64
 
65
 
66
  # -------------------------
@@ -86,7 +81,7 @@ def generate_advanced_flatfile(
86
  variance_overrides: dict mapping feature name or substring → stddev multiplier
87
  """
88
  np.random.seed(random_seed)
89
- os.makedirs(RUN_DIR, exist_ok=True)
90
  if variance_overrides is None:
91
  variance_overrides = {}
92
 
@@ -260,25 +255,26 @@ def generate_advanced_flatfile(
260
  df.fillna(0, inplace=True)
261
 
262
  # save CSV & metadata
263
- df.to_csv(CSV_PATH, index=False)
264
- meta = []
265
- for col in df.columns:
266
- if col in natural_feats:
267
- source = "natural"
268
- elif col.startswith("poly__") or col.startswith("pca_") or col in ["operating_mode"]:
269
- source = "advanced_synthetic"
270
- else:
271
- source = "synthetic"
272
- meta.append({
273
- "feature_name": col,
274
- "source_type": source,
275
- "linked_use_cases": ["All" if source!="natural" else "Mapped"],
276
- "units": "-",
277
- "formula": "see generator logic",
278
- "remarks": "auto-generated or simulated"
279
- })
280
- with open(META_PATH, "w") as f:
281
- json.dump(meta, f, indent=2)
 
282
 
283
  PDF_PATH = None
284
  # annotated bibliography
@@ -772,16 +768,10 @@ with tabs[4]:
772
  st.pyplot(fig)
773
 
774
  # Save trained stack artifacts
775
- stack_artifact = os.path.join(RUN_DIR, f"stacked_{use_case.replace(' ', '_')}.joblib")
776
- to_save = {
777
- "base_models": {bm["family"]: bm["model"] for bm in base_models if bm["family"] in selected},
778
- "meta": meta,
779
- "features": features,
780
- "selected": selected,
781
- "target": target,
782
- }
783
- joblib.dump(to_save, stack_artifact)
784
- st.caption(f" Stacked ensemble saved: {stack_artifact}")
785
 
786
  # Explainability
787
  st.markdown("### Explainability (approximate)")
@@ -906,64 +896,36 @@ in metallurgical AI modeling. Click any title to open the official paper.
906
  st.markdown("---")
907
  st.markdown("**Notes:** This dataset is synthetic and for demo/prototyping. Real plant integration requires NDA, data on-boarding, sensor mapping, and plant safety checks before any control actions.")
908
 
 
909
  # ----- Download tab
910
- tabs.append("Download Saved Runs")
911
  with tabs[-1]:
912
- st.subheader("Reproducibility & Run Exports")
913
-
914
- run_folders = sorted(
915
- [f for f in os.listdir(LOG_DIR) if f.startswith("run_")],
916
- reverse=True
917
- )
918
 
919
- if not run_folders:
920
- st.info("No completed runs found yet.")
 
921
  else:
922
- selected_run = st.selectbox("Select run folder", run_folders, index=0)
923
- selected_path = os.path.join(LOG_DIR, selected_run)
924
-
925
- # Show contained files
926
- files = [
927
- f for f in os.listdir(selected_path)
928
- if os.path.isfile(os.path.join(selected_path, f))
929
- ]
930
- st.write(f"Files in `{selected_run}`:")
931
- st.write(", ".join(files))
932
-
933
- # Zip the folder in-memory for download
934
- zip_buffer = io.BytesIO()
935
- with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zipf:
936
- for root, _, filenames in os.walk(selected_path):
937
- for fname in filenames:
938
- file_path = os.path.join(root, fname)
939
- zipf.write(file_path, arcname=os.path.relpath(file_path, selected_path))
940
- zip_buffer.seek(0)
941
-
942
- st.download_button(
943
- label=f"Download full run ({selected_run}.zip)",
944
- data=zip_buffer,
945
- file_name=f"{selected_run}.zip",
946
- mime="application/zip"
947
- )
948
 
949
 
950
  # ----- Logs tab
951
  tabs.append("View Logs")
952
  with tabs[-1]:
953
- st.subheader(" Session & Model Logs")
954
- st.markdown("Each run creates a timestamped log file in `/logs/` inside this Space. Use this panel to review run progress and debug output.")
955
-
956
- log_files = sorted(
957
- [f for f in os.listdir(LOG_DIR) if f.endswith(".log")],
958
- reverse=True
959
- )
960
-
961
- if not log_files:
962
- st.info("No logs yet. Run an AutoML job first.")
963
- else:
964
- latest = st.selectbox("Select log file", log_files, index=0)
965
- path = os.path.join(LOG_DIR, latest)
966
- with open(path, "r", encoding="utf-8") as f:
967
  content = f.read()
968
- st.text_area("Log Output", content, height=400)
969
- st.download_button(" Download Log", content, file_name=latest)
 
 
 
29
  # -------------------------
30
  # Config & paths
31
  # -------------------------
32
+
33
  st.set_page_config(page_title="Steel Authority of India Limited (MODEX)", layout="wide")
34
 
35
+ LOG_DIR = "./logs"
 
 
36
  os.makedirs(LOG_DIR, exist_ok=True)
37
 
38
+ # Permanent artifact filenames (never change)
39
+ CSV_PATH = os.path.join(LOG_DIR, "flatfile_universe_advanced.csv")
40
+ META_PATH = os.path.join(LOG_DIR, "feature_metadata_advanced.json")
41
+ ENSEMBLE_PATH = os.path.join(LOG_DIR, "ensemble_models.joblib")
42
+ LOG_PATH = os.path.join(LOG_DIR, "run_master.log")
 
 
 
 
 
43
 
44
+ # Simple logger that time-stamps inside one file
45
  def log(msg: str):
46
+ stamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
47
  with open(LOG_PATH, "a", encoding="utf-8") as f:
48
+ f.write(f"[{stamp}] {msg}\n")
49
  print(msg)
50
 
51
+ log("=== Streamlit session started ===")
52
+
53
 
54
 
 
55
  if os.path.exists("/data"):
56
+ st.sidebar.success(f" Using persistent storage | Logs directory: {LOG_DIR}")
57
  else:
58
+ st.sidebar.warning(f" Using ephemeral storage | Logs directory: {LOG_DIR}. Data will be lost on rebuild.")
59
 
60
 
61
  # -------------------------
 
81
  variance_overrides: dict mapping feature name or substring → stddev multiplier
82
  """
83
  np.random.seed(random_seed)
84
+ os.makedirs(LOG_DIR, exist_ok=True)
85
  if variance_overrides is None:
86
  variance_overrides = {}
87
 
 
255
  df.fillna(0, inplace=True)
256
 
257
  # save CSV & metadata
258
+ df["run_timestamp"] = datetime.now().strftime("%Y%m%d_%H%M%S")
259
+ if os.path.exists(CSV_PATH):
260
+ df.to_csv(CSV_PATH, mode="a", index=False, header=False)
261
+ else:
262
+ df.to_csv(CSV_PATH, index=False)
263
+
264
+ # append run-summary entry to metadata JSON
265
+ meta_entry = {
266
+ "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
267
+ "features": len(df.columns),
268
+ "rows_added": len(df),
269
+ "note": "auto-generated block appended"
270
+ }
271
+ if os.path.exists(META_PATH):
272
+ existing = json.load(open(META_PATH))
273
+ existing.append(meta_entry)
274
+ else:
275
+ existing = [meta_entry]
276
+ json.dump(existing, open(META_PATH, "w"), indent=2)
277
+
278
 
279
  PDF_PATH = None
280
  # annotated bibliography
 
768
  st.pyplot(fig)
769
 
770
  # Save trained stack artifacts
771
+ joblib.dump(meta, ENSEMBLE_PATH)
772
+ st.caption(f"Stacked ensemble snapshot updated → {ENSEMBLE_PATH}")
773
+ log(f"Ensemble model updated for use case: {use_case}")
774
+
 
 
 
 
 
 
775
 
776
  # Explainability
777
  st.markdown("### Explainability (approximate)")
 
896
  st.markdown("---")
897
  st.markdown("**Notes:** This dataset is synthetic and for demo/prototyping. Real plant integration requires NDA, data on-boarding, sensor mapping, and plant safety checks before any control actions.")
898
 
899
+
900
  # ----- Download tab
901
+ tabs.append("Download Saved Files")
902
  with tabs[-1]:
903
+ st.subheader(" Download Saved Files (Flat Log Mode)")
 
 
 
 
 
904
 
905
+ available_files = [f for f in os.listdir(LOG_DIR) if os.path.isfile(os.path.join(LOG_DIR, f))]
906
+ if not available_files:
907
+ st.info("No files found yet — run AutoML once to generate outputs.")
908
  else:
909
+ for f in sorted(available_files):
910
+ path = os.path.join(LOG_DIR, f)
911
+ with open(path, "rb") as fp:
912
+ st.download_button(
913
+ label=f" Download {f}",
914
+ data=fp,
915
+ file_name=f,
916
+ mime="application/octet-stream"
917
+ )
918
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
919
 
920
 
921
  # ----- Logs tab
922
  tabs.append("View Logs")
923
  with tabs[-1]:
924
+ st.subheader(" Master Log (append-in-place)")
925
+ if os.path.exists(LOG_PATH):
926
+ with open(LOG_PATH, "r", encoding="utf-8") as f:
 
 
 
 
 
 
 
 
 
 
 
927
  content = f.read()
928
+ st.text_area("Master Log Output", content, height=400)
929
+ st.download_button("Download Log", content, file_name="run_master.log")
930
+ else:
931
+ st.info("No log file yet — run AutoML once to start logging.")