Update src/streamlit_app.py
Browse files- src/streamlit_app.py +166 -63
src/streamlit_app.py
CHANGED
|
@@ -261,7 +261,7 @@ def generate_advanced_flatfile(
|
|
| 261 |
|
| 262 |
# clean NaN and infinite
|
| 263 |
df.replace([np.inf, -np.inf], np.nan, inplace=True)
|
| 264 |
-
df.
|
| 265 |
df.fillna(0, inplace=True)
|
| 266 |
|
| 267 |
# save CSV & metadata
|
|
@@ -847,13 +847,117 @@ with tabs[4]:
|
|
| 847 |
st.stop()
|
| 848 |
|
| 849 |
# Meta prediction
|
| 850 |
-
y_meta_pred = meta.predict(X_meta_val)
|
| 851 |
|
| 852 |
# Final evaluation
|
| 853 |
final_r2 = r2_score(y_val, y_meta_pred)
|
| 854 |
final_rmse = float(np.sqrt(mean_squared_error(y_val, y_meta_pred)))
|
| 855 |
st.success("AutoML + Stacking complete — metrics, artifacts, and SHAP ready.")
|
| 856 |
log(f"Completed stacking. Final R2={final_r2:.4f}, RMSE={final_rmse:.4f}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 857 |
|
| 858 |
|
| 859 |
c1, c2 = st.columns(2)
|
|
@@ -896,75 +1000,69 @@ with tabs[4]:
|
|
| 896 |
st.success(" AutoML + Stacking complete — metrics, artifacts, and SHAP ready.")
|
| 897 |
|
| 898 |
# --- Store AutoML summary for optional LLM advisory ---
|
| 899 |
-
|
| 900 |
-
|
| 901 |
-
|
| 902 |
-
|
| 903 |
-
|
| 904 |
-
|
| 905 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 906 |
|
| 907 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 908 |
st.markdown("---")
|
| 909 |
-
st.subheader("AI Recommendation Assistant (
|
| 910 |
-
st.caption("
|
| 911 |
|
| 912 |
if st.button("Get AI Recommendation (tiny local LLM)", key="ai_reco"):
|
| 913 |
summary = st.session_state.get("automl_summary", {})
|
| 914 |
-
|
| 915 |
-
|
| 916 |
-
|
| 917 |
-
from pathlib import Path
|
| 918 |
|
| 919 |
-
|
| 920 |
-
|
| 921 |
-
st.error("Transformers not installed. Run `pip install transformers`.")
|
| 922 |
-
else:
|
| 923 |
-
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
|
| 924 |
|
| 925 |
-
|
| 926 |
-
|
| 927 |
-
os.makedirs(MODEL_DIR, exist_ok=True)
|
| 928 |
|
| 929 |
-
|
| 930 |
-
|
| 931 |
-
|
| 932 |
-
|
| 933 |
-
tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
|
| 934 |
-
else:
|
| 935 |
-
st.caption("☁️ Downloading tiny model (once only)...")
|
| 936 |
-
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
|
| 937 |
-
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
| 938 |
-
model.save_pretrained(MODEL_DIR)
|
| 939 |
-
tokenizer.save_pretrained(MODEL_DIR)
|
| 940 |
-
st.success("Cached tiny LLM in ./logs/cached_tiny_llm")
|
| 941 |
|
| 942 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 943 |
|
| 944 |
-
|
| 945 |
-
|
| 946 |
-
Given this AutoML summary, provide 3 actionable steps for improvement if overfitting,
|
| 947 |
-
underfitting, or data quality issues are suspected.
|
| 948 |
|
| 949 |
-
|
| 950 |
-
|
| 951 |
-
|
| 952 |
-
|
| 953 |
-
Leaderboard: {summary.get('leaderboard')}
|
| 954 |
|
| 955 |
-
Respond in concise numbered steps.
|
| 956 |
-
"""
|
| 957 |
-
out = assistant(prompt, max_new_tokens=90, temperature=0.7, do_sample=True)[0]["generated_text"]
|
| 958 |
-
st.success("LLM Recommendation:")
|
| 959 |
-
st.markdown(out)
|
| 960 |
-
log("Tiny LLM recommendation generated successfully.")
|
| 961 |
except Exception as e:
|
| 962 |
st.error(f"LLM generation failed: {e}")
|
| 963 |
-
st.info("
|
| 964 |
-
|
| 965 |
-
|
| 966 |
-
|
| 967 |
-
|
| 968 |
|
| 969 |
# ----- Target & Business Impact tab
|
| 970 |
with tabs[5]:
|
|
@@ -1042,12 +1140,17 @@ in metallurgical AI modeling. Click any title to open the official paper.
|
|
| 1042 |
bib_df = pd.DataFrame(bib_data)
|
| 1043 |
bib_df["Paper Title"] = bib_df.apply(lambda x: f"[{x['title']}]({x['url']})", axis=1)
|
| 1044 |
|
| 1045 |
-
st.
|
| 1046 |
-
|
| 1047 |
-
|
| 1048 |
-
|
| 1049 |
-
|
| 1050 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1051 |
|
| 1052 |
st.markdown("""
|
| 1053 |
**Feature ↔ Target Justification**
|
|
|
|
| 261 |
|
| 262 |
# clean NaN and infinite
|
| 263 |
df.replace([np.inf, -np.inf], np.nan, inplace=True)
|
| 264 |
+
df.bfill(inplace=True)
|
| 265 |
df.fillna(0, inplace=True)
|
| 266 |
|
| 267 |
# save CSV & metadata
|
|
|
|
| 847 |
st.stop()
|
| 848 |
|
| 849 |
# Meta prediction
|
| 850 |
+
y_meta_pred = meta.predict(pd.DataFrame(X_meta_val, columns=X_stack.columns))
|
| 851 |
|
| 852 |
# Final evaluation
|
| 853 |
final_r2 = r2_score(y_val, y_meta_pred)
|
| 854 |
final_rmse = float(np.sqrt(mean_squared_error(y_val, y_meta_pred)))
|
| 855 |
st.success("AutoML + Stacking complete — metrics, artifacts, and SHAP ready.")
|
| 856 |
log(f"Completed stacking. Final R2={final_r2:.4f}, RMSE={final_rmse:.4f}")
|
| 857 |
+
# ===============================
|
| 858 |
+
# OPERATOR ADVISORY SYSTEM
|
| 859 |
+
# ===============================
|
| 860 |
+
st.markdown("---")
|
| 861 |
+
st.subheader("Operator Advisory System — Real-Time Shift Recommendations")
|
| 862 |
+
|
| 863 |
+
try:
|
| 864 |
+
# Use top base model already identified
|
| 865 |
+
top_base = next((b for b in base_models if b["family"] == selected[0]), None)
|
| 866 |
+
if top_base and hasattr(top_base["model"], "predict"):
|
| 867 |
+
sample_X = X_val.sample(min(300, len(X_val)), random_state=42)
|
| 868 |
+
model = top_base["model"]
|
| 869 |
+
|
| 870 |
+
# SHAP direction analysis
|
| 871 |
+
expl = shap.TreeExplainer(model)
|
| 872 |
+
shap_vals = expl.shap_values(sample_X)
|
| 873 |
+
mean_abs = np.abs(shap_vals).mean(axis=0)
|
| 874 |
+
mean_sign = np.sign(shap_vals).mean(axis=0)
|
| 875 |
+
importance = pd.DataFrame({
|
| 876 |
+
"Feature": sample_X.columns,
|
| 877 |
+
"Mean |SHAP|": mean_abs,
|
| 878 |
+
"Mean SHAP Sign": mean_sign
|
| 879 |
+
}).sort_values("Mean |SHAP|", ascending=False)
|
| 880 |
+
|
| 881 |
+
# Display Top 5 Drivers
|
| 882 |
+
st.markdown("### Top 5 Operational Drivers Influencing Target")
|
| 883 |
+
st.dataframe(importance.head(5).style.format({"Mean |SHAP|": "{:.3f}", "Mean SHAP Sign": "{:.3f}"}))
|
| 884 |
+
|
| 885 |
+
# Direction-based recommendations
|
| 886 |
+
recommendations = []
|
| 887 |
+
for _, row in importance.head(5).iterrows():
|
| 888 |
+
f = row["Feature"]
|
| 889 |
+
s = row["Mean SHAP Sign"]
|
| 890 |
+
if s > 0.05:
|
| 891 |
+
recommendations.append(f"Increase `{f}` likely increases `{target}`")
|
| 892 |
+
elif s < -0.05:
|
| 893 |
+
recommendations.append(f"Decrease `{f}` likely increases `{target}`")
|
| 894 |
+
else:
|
| 895 |
+
recommendations.append(f" `{f}` is neutral or nonlinear for `{target}`")
|
| 896 |
+
|
| 897 |
+
st.markdown("### Suggested Operator Adjustments (Model-Inferred)")
|
| 898 |
+
st.write("\n".join(recommendations))
|
| 899 |
+
|
| 900 |
+
# Delta recommendations vs previous shift
|
| 901 |
+
prev_shift = df.tail(200).mean(numeric_only=True)
|
| 902 |
+
recommended_shift = prev_shift.copy()
|
| 903 |
+
for rec in recommendations:
|
| 904 |
+
if "Increase" in rec:
|
| 905 |
+
name = rec.split('`')[1]
|
| 906 |
+
if name in recommended_shift:
|
| 907 |
+
recommended_shift[name] *= 1.03 # +3%
|
| 908 |
+
elif "Decrease" in rec:
|
| 909 |
+
name = rec.split('`')[1]
|
| 910 |
+
if name in recommended_shift:
|
| 911 |
+
recommended_shift[name] *= 0.97 # -3%
|
| 912 |
+
|
| 913 |
+
# Delta table
|
| 914 |
+
st.markdown("### 🧾 Shift Adjustment Summary (vs Previous 200 Samples)")
|
| 915 |
+
deltas = pd.DataFrame({
|
| 916 |
+
"Current Avg": prev_shift,
|
| 917 |
+
"Suggested": recommended_shift,
|
| 918 |
+
"Δ (%)": ((recommended_shift - prev_shift) / prev_shift * 100)
|
| 919 |
+
}).loc[[r.split('`')[1] for r in recommendations if '`' in r]].round(2)
|
| 920 |
+
|
| 921 |
+
st.dataframe(deltas.fillna(0).style.format("{:.2f}"))
|
| 922 |
+
log("Operator advisory system executed successfully.")
|
| 923 |
+
|
| 924 |
+
# Optional: LLM-generated human-friendly summary
|
| 925 |
+
st.markdown("### Natural Language Operator Note")
|
| 926 |
+
try:
|
| 927 |
+
import importlib.util
|
| 928 |
+
if importlib.util.find_spec("transformers"):
|
| 929 |
+
from transformers import pipeline
|
| 930 |
+
tiny_llm_path = os.path.join(LOG_DIR, "cached_tiny_llm")
|
| 931 |
+
if os.path.exists(os.path.join(tiny_llm_path, "config.json")):
|
| 932 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 933 |
+
model = AutoModelForCausalLM.from_pretrained(tiny_llm_path)
|
| 934 |
+
tokenizer = AutoTokenizer.from_pretrained(tiny_llm_path)
|
| 935 |
+
assistant = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
| 936 |
+
else:
|
| 937 |
+
assistant = pipeline("text-generation", model="sshleifer/tiny-gpt2")
|
| 938 |
+
|
| 939 |
+
llm_prompt = f"""
|
| 940 |
+
You are a metallurgical process advisor working in a steel manufacturing unit.
|
| 941 |
+
Based on these recommendations:
|
| 942 |
+
{recommendations}
|
| 943 |
+
and these shift averages:
|
| 944 |
+
{deltas.to_dict(orient='index')}
|
| 945 |
+
Write a concise 3-line message to the operator suggesting what to adjust this shift.
|
| 946 |
+
"""
|
| 947 |
+
resp = assistant(llm_prompt, max_new_tokens=80, do_sample=True, temperature=0.6)[0]["generated_text"]
|
| 948 |
+
st.info(resp)
|
| 949 |
+
log("Operator LLM advisory note generated successfully.")
|
| 950 |
+
else:
|
| 951 |
+
st.warning("Transformers not available — install it for text generation.")
|
| 952 |
+
except Exception as e:
|
| 953 |
+
st.warning(f"LLM advisory generation skipped: {e}")
|
| 954 |
+
|
| 955 |
+
else:
|
| 956 |
+
st.info("No suitable model found for operator advisory system.")
|
| 957 |
+
except Exception as e:
|
| 958 |
+
st.error(f"Operator advisory system failed: {e}")
|
| 959 |
+
log(f"Operator advisory error: {e}")
|
| 960 |
+
|
| 961 |
|
| 962 |
|
| 963 |
c1, c2 = st.columns(2)
|
|
|
|
| 1000 |
st.success(" AutoML + Stacking complete — metrics, artifacts, and SHAP ready.")
|
| 1001 |
|
| 1002 |
# --- Store AutoML summary for optional LLM advisory ---
|
| 1003 |
+
if "automl_summary" not in st.session_state:
|
| 1004 |
+
st.session_state["automl_summary"] = {
|
| 1005 |
+
"leaderboard": lb[["family", "cv_r2"]].round(4).to_dict(orient="records"),
|
| 1006 |
+
"final_r2": float(final_r2),
|
| 1007 |
+
"final_rmse": float(final_rmse),
|
| 1008 |
+
"target": target,
|
| 1009 |
+
"use_case": use_case
|
| 1010 |
+
}
|
| 1011 |
+
else:
|
| 1012 |
+
# Always refresh with latest metrics after each run
|
| 1013 |
+
st.session_state["automl_summary"].update({
|
| 1014 |
+
"leaderboard": lb[["family", "cv_r2"]].round(4).to_dict(orient="records"),
|
| 1015 |
+
"final_r2": float(final_r2),
|
| 1016 |
+
"final_rmse": float(final_rmse),
|
| 1017 |
+
"target": target,
|
| 1018 |
+
"use_case": use_case
|
| 1019 |
+
})
|
| 1020 |
|
| 1021 |
+
# Persist SHAP-based recommendations for reuse across reruns
|
| 1022 |
+
if "shap_recommendations" not in st.session_state:
|
| 1023 |
+
st.session_state["shap_recommendations"] = recommendations
|
| 1024 |
+
else:
|
| 1025 |
+
st.session_state["shap_recommendations"] = recommendations
|
| 1026 |
+
|
| 1027 |
+
# --- AI Recommendation Assistant (in-memory safe for Hugging Face) ---
|
| 1028 |
st.markdown("---")
|
| 1029 |
+
st.subheader("AI Recommendation Assistant (in-memory mode)")
|
| 1030 |
+
st.caption("Generates quick local AI suggestions — no file writes required.")
|
| 1031 |
|
| 1032 |
if st.button("Get AI Recommendation (tiny local LLM)", key="ai_reco"):
|
| 1033 |
summary = st.session_state.get("automl_summary", {})
|
| 1034 |
+
if not summary:
|
| 1035 |
+
st.warning("Please run AutoML first to generate context.")
|
| 1036 |
+
st.stop()
|
|
|
|
| 1037 |
|
| 1038 |
+
try:
|
| 1039 |
+
from transformers import pipeline
|
|
|
|
|
|
|
|
|
|
| 1040 |
|
| 1041 |
+
st.info("Loading tiny model in-memory (ephemeral)...")
|
| 1042 |
+
assistant = pipeline("text-generation", model="sshleifer/tiny-gpt2")
|
|
|
|
| 1043 |
|
| 1044 |
+
prompt = f"""
|
| 1045 |
+
You are an ML model tuning assistant.
|
| 1046 |
+
Given this AutoML summary, provide 3 actionable steps for improvement
|
| 1047 |
+
if overfitting, underfitting, or data quality issues are suspected.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1048 |
|
| 1049 |
+
Use case: {summary.get('use_case')}
|
| 1050 |
+
Target: {summary.get('target')}
|
| 1051 |
+
Final R²: {summary.get('final_r2')}
|
| 1052 |
+
Final RMSE: {summary.get('final_rmse')}
|
| 1053 |
+
Leaderboard: {summary.get('leaderboard')}
|
| 1054 |
|
| 1055 |
+
Respond in concise numbered steps.
|
| 1056 |
+
"""
|
|
|
|
|
|
|
| 1057 |
|
| 1058 |
+
result = assistant(prompt, max_new_tokens=90, temperature=0.7, do_sample=True)[0]["generated_text"]
|
| 1059 |
+
st.success("LLM Recommendation:")
|
| 1060 |
+
st.markdown(result)
|
| 1061 |
+
log("Tiny LLM in-memory advisory generated successfully.")
|
|
|
|
| 1062 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1063 |
except Exception as e:
|
| 1064 |
st.error(f"LLM generation failed: {e}")
|
| 1065 |
+
st.info("Make sure `transformers` is installed in your Space environment.")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1066 |
|
| 1067 |
# ----- Target & Business Impact tab
|
| 1068 |
with tabs[5]:
|
|
|
|
| 1140 |
bib_df = pd.DataFrame(bib_data)
|
| 1141 |
bib_df["Paper Title"] = bib_df.apply(lambda x: f"[{x['title']}]({x['url']})", axis=1)
|
| 1142 |
|
| 1143 |
+
st.markdown("### Annotated Bibliography — Justification for Target Variables")
|
| 1144 |
+
|
| 1145 |
+
for _, row in bib_df.iterrows():
|
| 1146 |
+
st.markdown(
|
| 1147 |
+
f"**[{row['title']}]({row['url']})** \n"
|
| 1148 |
+
f"*{row['authors']}* \n"
|
| 1149 |
+
f" _{row['notes']}_ \n",
|
| 1150 |
+
unsafe_allow_html=True
|
| 1151 |
+
)
|
| 1152 |
+
st.info("Click any paper title above to open it in a new tab.")
|
| 1153 |
+
|
| 1154 |
|
| 1155 |
st.markdown("""
|
| 1156 |
**Feature ↔ Target Justification**
|