singhn9 commited on
Commit
cb7a53e
·
verified ·
1 Parent(s): 594f122

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +166 -63
src/streamlit_app.py CHANGED
@@ -261,7 +261,7 @@ def generate_advanced_flatfile(
261
 
262
  # clean NaN and infinite
263
  df.replace([np.inf, -np.inf], np.nan, inplace=True)
264
- df.fillna(method="bfill", inplace=True)
265
  df.fillna(0, inplace=True)
266
 
267
  # save CSV & metadata
@@ -847,13 +847,117 @@ with tabs[4]:
847
  st.stop()
848
 
849
  # Meta prediction
850
- y_meta_pred = meta.predict(X_meta_val)
851
 
852
  # Final evaluation
853
  final_r2 = r2_score(y_val, y_meta_pred)
854
  final_rmse = float(np.sqrt(mean_squared_error(y_val, y_meta_pred)))
855
  st.success("AutoML + Stacking complete — metrics, artifacts, and SHAP ready.")
856
  log(f"Completed stacking. Final R2={final_r2:.4f}, RMSE={final_rmse:.4f}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
857
 
858
 
859
  c1, c2 = st.columns(2)
@@ -896,75 +1000,69 @@ with tabs[4]:
896
  st.success(" AutoML + Stacking complete — metrics, artifacts, and SHAP ready.")
897
 
898
  # --- Store AutoML summary for optional LLM advisory ---
899
- st.session_state["automl_summary"] = {
900
- "leaderboard": lb[["family", "cv_r2"]].round(4).to_dict(orient="records"),
901
- "final_r2": float(final_r2),
902
- "final_rmse": float(final_rmse),
903
- "target": target,
904
- "use_case": use_case
905
- }
 
 
 
 
 
 
 
 
 
 
906
 
907
- # --- Optional: AI Model Recommendation Assistant ---
 
 
 
 
 
 
908
  st.markdown("---")
909
- st.subheader("AI Recommendation Assistant (cached local model)")
910
- st.caption("Get quick local AI suggestions without internet cached inside ./logs")
911
 
912
  if st.button("Get AI Recommendation (tiny local LLM)", key="ai_reco"):
913
  summary = st.session_state.get("automl_summary", {})
914
- st.info("Loading local model... first time may take ~10s.")
915
- try:
916
- import importlib.util, os
917
- from pathlib import Path
918
 
919
- # Ensure transformers is available
920
- if importlib.util.find_spec("transformers") is None:
921
- st.error("Transformers not installed. Run `pip install transformers`.")
922
- else:
923
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
924
 
925
- MODEL_NAME = "sshleifer/tiny-gpt2" # very small 6 MB model
926
- MODEL_DIR = Path(LOG_DIR) / "cached_tiny_llm"
927
- os.makedirs(MODEL_DIR, exist_ok=True)
928
 
929
- # If model is already cached locally, load from there
930
- if (MODEL_DIR / "config.json").exists():
931
- st.caption("Loading tiny model from local cache...")
932
- model = AutoModelForCausalLM.from_pretrained(MODEL_DIR)
933
- tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
934
- else:
935
- st.caption("☁️ Downloading tiny model (once only)...")
936
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
937
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
938
- model.save_pretrained(MODEL_DIR)
939
- tokenizer.save_pretrained(MODEL_DIR)
940
- st.success("Cached tiny LLM in ./logs/cached_tiny_llm")
941
 
942
- assistant = pipeline("text-generation", model=model, tokenizer=tokenizer)
 
 
 
 
943
 
944
- prompt = f"""
945
- You are an ML model tuning assistant.
946
- Given this AutoML summary, provide 3 actionable steps for improvement if overfitting,
947
- underfitting, or data quality issues are suspected.
948
 
949
- Use case: {summary.get('use_case')}
950
- Target: {summary.get('target')}
951
- Final R²: {summary.get('final_r2')}
952
- Final RMSE: {summary.get('final_rmse')}
953
- Leaderboard: {summary.get('leaderboard')}
954
 
955
- Respond in concise numbered steps.
956
- """
957
- out = assistant(prompt, max_new_tokens=90, temperature=0.7, do_sample=True)[0]["generated_text"]
958
- st.success("LLM Recommendation:")
959
- st.markdown(out)
960
- log("Tiny LLM recommendation generated successfully.")
961
  except Exception as e:
962
  st.error(f"LLM generation failed: {e}")
963
- st.info("If the model download failed, rerun once it will cache afterward.")
964
-
965
-
966
-
967
-
968
 
969
  # ----- Target & Business Impact tab
970
  with tabs[5]:
@@ -1042,12 +1140,17 @@ in metallurgical AI modeling. Click any title to open the official paper.
1042
  bib_df = pd.DataFrame(bib_data)
1043
  bib_df["Paper Title"] = bib_df.apply(lambda x: f"[{x['title']}]({x['url']})", axis=1)
1044
 
1045
- st.dataframe(
1046
- bib_df[["Paper Title", "authors", "notes"]]
1047
- .rename(columns={"authors": "Authors / Year", "notes": "Relevance"}),
1048
- width="stretch",
1049
- hide_index=True
1050
- )
 
 
 
 
 
1051
 
1052
  st.markdown("""
1053
  **Feature ↔ Target Justification**
 
261
 
262
  # clean NaN and infinite
263
  df.replace([np.inf, -np.inf], np.nan, inplace=True)
264
+ df.bfill(inplace=True)
265
  df.fillna(0, inplace=True)
266
 
267
  # save CSV & metadata
 
847
  st.stop()
848
 
849
  # Meta prediction
850
+ y_meta_pred = meta.predict(pd.DataFrame(X_meta_val, columns=X_stack.columns))
851
 
852
  # Final evaluation
853
  final_r2 = r2_score(y_val, y_meta_pred)
854
  final_rmse = float(np.sqrt(mean_squared_error(y_val, y_meta_pred)))
855
  st.success("AutoML + Stacking complete — metrics, artifacts, and SHAP ready.")
856
  log(f"Completed stacking. Final R2={final_r2:.4f}, RMSE={final_rmse:.4f}")
857
+ # ===============================
858
+ # OPERATOR ADVISORY SYSTEM
859
+ # ===============================
860
+ st.markdown("---")
861
+ st.subheader("Operator Advisory System — Real-Time Shift Recommendations")
862
+
863
+ try:
864
+ # Use top base model already identified
865
+ top_base = next((b for b in base_models if b["family"] == selected[0]), None)
866
+ if top_base and hasattr(top_base["model"], "predict"):
867
+ sample_X = X_val.sample(min(300, len(X_val)), random_state=42)
868
+ model = top_base["model"]
869
+
870
+ # SHAP direction analysis
871
+ expl = shap.TreeExplainer(model)
872
+ shap_vals = expl.shap_values(sample_X)
873
+ mean_abs = np.abs(shap_vals).mean(axis=0)
874
+ mean_sign = np.sign(shap_vals).mean(axis=0)
875
+ importance = pd.DataFrame({
876
+ "Feature": sample_X.columns,
877
+ "Mean |SHAP|": mean_abs,
878
+ "Mean SHAP Sign": mean_sign
879
+ }).sort_values("Mean |SHAP|", ascending=False)
880
+
881
+ # Display Top 5 Drivers
882
+ st.markdown("### Top 5 Operational Drivers Influencing Target")
883
+ st.dataframe(importance.head(5).style.format({"Mean |SHAP|": "{:.3f}", "Mean SHAP Sign": "{:.3f}"}))
884
+
885
+ # Direction-based recommendations
886
+ recommendations = []
887
+ for _, row in importance.head(5).iterrows():
888
+ f = row["Feature"]
889
+ s = row["Mean SHAP Sign"]
890
+ if s > 0.05:
891
+ recommendations.append(f"Increase `{f}` likely increases `{target}`")
892
+ elif s < -0.05:
893
+ recommendations.append(f"Decrease `{f}` likely increases `{target}`")
894
+ else:
895
+ recommendations.append(f" `{f}` is neutral or nonlinear for `{target}`")
896
+
897
+ st.markdown("### Suggested Operator Adjustments (Model-Inferred)")
898
+ st.write("\n".join(recommendations))
899
+
900
+ # Delta recommendations vs previous shift
901
+ prev_shift = df.tail(200).mean(numeric_only=True)
902
+ recommended_shift = prev_shift.copy()
903
+ for rec in recommendations:
904
+ if "Increase" in rec:
905
+ name = rec.split('`')[1]
906
+ if name in recommended_shift:
907
+ recommended_shift[name] *= 1.03 # +3%
908
+ elif "Decrease" in rec:
909
+ name = rec.split('`')[1]
910
+ if name in recommended_shift:
911
+ recommended_shift[name] *= 0.97 # -3%
912
+
913
+ # Delta table
914
+ st.markdown("### 🧾 Shift Adjustment Summary (vs Previous 200 Samples)")
915
+ deltas = pd.DataFrame({
916
+ "Current Avg": prev_shift,
917
+ "Suggested": recommended_shift,
918
+ "Δ (%)": ((recommended_shift - prev_shift) / prev_shift * 100)
919
+ }).loc[[r.split('`')[1] for r in recommendations if '`' in r]].round(2)
920
+
921
+ st.dataframe(deltas.fillna(0).style.format("{:.2f}"))
922
+ log("Operator advisory system executed successfully.")
923
+
924
+ # Optional: LLM-generated human-friendly summary
925
+ st.markdown("### Natural Language Operator Note")
926
+ try:
927
+ import importlib.util
928
+ if importlib.util.find_spec("transformers"):
929
+ from transformers import pipeline
930
+ tiny_llm_path = os.path.join(LOG_DIR, "cached_tiny_llm")
931
+ if os.path.exists(os.path.join(tiny_llm_path, "config.json")):
932
+ from transformers import AutoModelForCausalLM, AutoTokenizer
933
+ model = AutoModelForCausalLM.from_pretrained(tiny_llm_path)
934
+ tokenizer = AutoTokenizer.from_pretrained(tiny_llm_path)
935
+ assistant = pipeline("text-generation", model=model, tokenizer=tokenizer)
936
+ else:
937
+ assistant = pipeline("text-generation", model="sshleifer/tiny-gpt2")
938
+
939
+ llm_prompt = f"""
940
+ You are a metallurgical process advisor working in a steel manufacturing unit.
941
+ Based on these recommendations:
942
+ {recommendations}
943
+ and these shift averages:
944
+ {deltas.to_dict(orient='index')}
945
+ Write a concise 3-line message to the operator suggesting what to adjust this shift.
946
+ """
947
+ resp = assistant(llm_prompt, max_new_tokens=80, do_sample=True, temperature=0.6)[0]["generated_text"]
948
+ st.info(resp)
949
+ log("Operator LLM advisory note generated successfully.")
950
+ else:
951
+ st.warning("Transformers not available — install it for text generation.")
952
+ except Exception as e:
953
+ st.warning(f"LLM advisory generation skipped: {e}")
954
+
955
+ else:
956
+ st.info("No suitable model found for operator advisory system.")
957
+ except Exception as e:
958
+ st.error(f"Operator advisory system failed: {e}")
959
+ log(f"Operator advisory error: {e}")
960
+
961
 
962
 
963
  c1, c2 = st.columns(2)
 
1000
  st.success(" AutoML + Stacking complete — metrics, artifacts, and SHAP ready.")
1001
 
1002
  # --- Store AutoML summary for optional LLM advisory ---
1003
+ if "automl_summary" not in st.session_state:
1004
+ st.session_state["automl_summary"] = {
1005
+ "leaderboard": lb[["family", "cv_r2"]].round(4).to_dict(orient="records"),
1006
+ "final_r2": float(final_r2),
1007
+ "final_rmse": float(final_rmse),
1008
+ "target": target,
1009
+ "use_case": use_case
1010
+ }
1011
+ else:
1012
+ # Always refresh with latest metrics after each run
1013
+ st.session_state["automl_summary"].update({
1014
+ "leaderboard": lb[["family", "cv_r2"]].round(4).to_dict(orient="records"),
1015
+ "final_r2": float(final_r2),
1016
+ "final_rmse": float(final_rmse),
1017
+ "target": target,
1018
+ "use_case": use_case
1019
+ })
1020
 
1021
+ # Persist SHAP-based recommendations for reuse across reruns
1022
+ if "shap_recommendations" not in st.session_state:
1023
+ st.session_state["shap_recommendations"] = recommendations
1024
+ else:
1025
+ st.session_state["shap_recommendations"] = recommendations
1026
+
1027
+ # --- AI Recommendation Assistant (in-memory safe for Hugging Face) ---
1028
  st.markdown("---")
1029
+ st.subheader("AI Recommendation Assistant (in-memory mode)")
1030
+ st.caption("Generates quick local AI suggestions no file writes required.")
1031
 
1032
  if st.button("Get AI Recommendation (tiny local LLM)", key="ai_reco"):
1033
  summary = st.session_state.get("automl_summary", {})
1034
+ if not summary:
1035
+ st.warning("Please run AutoML first to generate context.")
1036
+ st.stop()
 
1037
 
1038
+ try:
1039
+ from transformers import pipeline
 
 
 
1040
 
1041
+ st.info("Loading tiny model in-memory (ephemeral)...")
1042
+ assistant = pipeline("text-generation", model="sshleifer/tiny-gpt2")
 
1043
 
1044
+ prompt = f"""
1045
+ You are an ML model tuning assistant.
1046
+ Given this AutoML summary, provide 3 actionable steps for improvement
1047
+ if overfitting, underfitting, or data quality issues are suspected.
 
 
 
 
 
 
 
 
1048
 
1049
+ Use case: {summary.get('use_case')}
1050
+ Target: {summary.get('target')}
1051
+ Final R²: {summary.get('final_r2')}
1052
+ Final RMSE: {summary.get('final_rmse')}
1053
+ Leaderboard: {summary.get('leaderboard')}
1054
 
1055
+ Respond in concise numbered steps.
1056
+ """
 
 
1057
 
1058
+ result = assistant(prompt, max_new_tokens=90, temperature=0.7, do_sample=True)[0]["generated_text"]
1059
+ st.success("LLM Recommendation:")
1060
+ st.markdown(result)
1061
+ log("Tiny LLM in-memory advisory generated successfully.")
 
1062
 
 
 
 
 
 
 
1063
  except Exception as e:
1064
  st.error(f"LLM generation failed: {e}")
1065
+ st.info("Make sure `transformers` is installed in your Space environment.")
 
 
 
 
1066
 
1067
  # ----- Target & Business Impact tab
1068
  with tabs[5]:
 
1140
  bib_df = pd.DataFrame(bib_data)
1141
  bib_df["Paper Title"] = bib_df.apply(lambda x: f"[{x['title']}]({x['url']})", axis=1)
1142
 
1143
+ st.markdown("### Annotated Bibliography — Justification for Target Variables")
1144
+
1145
+ for _, row in bib_df.iterrows():
1146
+ st.markdown(
1147
+ f"**[{row['title']}]({row['url']})** \n"
1148
+ f"*{row['authors']}* \n"
1149
+ f" _{row['notes']}_ \n",
1150
+ unsafe_allow_html=True
1151
+ )
1152
+ st.info("Click any paper title above to open it in a new tab.")
1153
+
1154
 
1155
  st.markdown("""
1156
  **Feature ↔ Target Justification**