|
|
|
|
|
import os |
|
|
import json |
|
|
import time |
|
|
from datetime import datetime |
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
import streamlit as st |
|
|
import matplotlib.pyplot as plt |
|
|
import seaborn as sns |
|
|
import joblib |
|
|
import zipfile |
|
|
import io |
|
|
import gc |
|
|
|
|
|
|
|
|
from sklearn.model_selection import train_test_split |
|
|
from sklearn.linear_model import LinearRegression, Ridge |
|
|
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor |
|
|
from sklearn.preprocessing import StandardScaler, PolynomialFeatures |
|
|
from sklearn.decomposition import PCA |
|
|
from sklearn.cluster import KMeans |
|
|
from sklearn.metrics import mean_squared_error, r2_score |
|
|
|
|
|
|
|
|
import shap |
|
|
|
|
|
|
|
|
import optuna |
|
|
from sklearn.model_selection import cross_val_score, KFold |
|
|
from sklearn.neural_network import MLPRegressor |
|
|
|
|
|
|
|
|
defaults = { |
|
|
"llm_result": None, |
|
|
"automl_summary": {}, |
|
|
"shap_recommendations": [], |
|
|
"hf_clicked": False, |
|
|
"hf_ran_once": False, |
|
|
"run_automl_clicked": False, |
|
|
} |
|
|
for k, v in defaults.items(): |
|
|
st.session_state.setdefault(k, v) |
|
|
|
|
|
if "llm_result" not in st.session_state: |
|
|
st.session_state["llm_result"] = None |
|
|
if "automl_summary" not in st.session_state: |
|
|
st.session_state["automl_summary"] = {} |
|
|
if "shap_recommendations" not in st.session_state: |
|
|
st.session_state["shap_recommendations"] = [] |
|
|
if "hf_clicked" not in st.session_state: |
|
|
st.session_state["hf_clicked"] = False |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.set_page_config(page_title="Steel Authority of India Limited (MODEX)", layout="wide") |
|
|
plt.style.use("seaborn-v0_8-muted") |
|
|
sns.set_palette("muted") |
|
|
sns.set_style("whitegrid") |
|
|
|
|
|
LOG_DIR = "./logs" |
|
|
os.makedirs(LOG_DIR, exist_ok=True) |
|
|
|
|
|
|
|
|
CSV_PATH = os.path.join(LOG_DIR, "flatfile_universe_advanced.csv") |
|
|
META_PATH = os.path.join(LOG_DIR, "feature_metadata_advanced.json") |
|
|
ENSEMBLE_PATH = os.path.join(LOG_DIR, "ensemble_models.joblib") |
|
|
LOG_PATH = os.path.join(LOG_DIR, "run_master.log") |
|
|
|
|
|
|
|
|
SESSION_STARTED = False |
|
|
|
|
|
def log(msg: str): |
|
|
global SESSION_STARTED |
|
|
stamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") |
|
|
with open(LOG_PATH, "a", encoding="utf-8") as f: |
|
|
if not SESSION_STARTED: |
|
|
f.write("\n\n===== New Session Started at {} =====\n".format(stamp)) |
|
|
SESSION_STARTED = True |
|
|
f.write(f"[{stamp}] {msg}\n") |
|
|
print(msg) |
|
|
|
|
|
log("=== Streamlit session started ===") |
|
|
|
|
|
if os.path.exists("/data"): |
|
|
st.sidebar.success(f" Using persistent storage | Logs directory: {LOG_DIR}") |
|
|
else: |
|
|
st.sidebar.warning(f" Using ephemeral storage | Logs directory: {LOG_DIR}. Data will be lost on rebuild.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def generate_advanced_flatfile( |
|
|
n_rows=3000, |
|
|
random_seed=42, |
|
|
max_polynomial_new=60, |
|
|
global_variance_multiplier=1.0, |
|
|
variance_overrides=None, |
|
|
): |
|
|
""" |
|
|
Generates a large synthetic, physics-aligned dataset with many engineered features. |
|
|
Allows control of variability per feature (through variance_overrides) or globally |
|
|
(via global_variance_multiplier). |
|
|
""" |
|
|
np.random.seed(random_seed) |
|
|
os.makedirs(LOG_DIR, exist_ok=True) |
|
|
if variance_overrides is None: |
|
|
variance_overrides = {} |
|
|
|
|
|
|
|
|
natural_feats = [ |
|
|
"vibration_x","vibration_y","motor_current","rpm","bearing_temp","ambient_temp","lube_pressure","power_factor", |
|
|
"furnace_temp","tap_temp","slag_temp","offgas_co","offgas_co2","o2_probe_pct","c_feed_rate","arc_power","furnace_pressure","feed_time", |
|
|
"mold_temp","casting_speed","nozzle_pressure","cooling_water_temp","billet_length","chemical_C","chemical_Mn","chemical_Si","chemical_S", |
|
|
"roll_speed","motor_load","coolant_flow","exit_temp","strip_thickness","line_tension","roller_vibration", |
|
|
"lighting_intensity","surface_temp","image_entropy_proxy", |
|
|
"spectro_Fe","spectro_C","spectro_Mn","spectro_Si","time_since_last_sample", |
|
|
"batch_id_numeric","weight_input","weight_output","time_in_queue","conveyor_speed", |
|
|
"shell_temp","lining_thickness","water_flow","cooling_out_temp","heat_flux" |
|
|
] |
|
|
natural_feats = list(dict.fromkeys(natural_feats)) |
|
|
|
|
|
|
|
|
def effective_sd(feature_name, base_sd): |
|
|
|
|
|
if feature_name in variance_overrides: |
|
|
return float(variance_overrides[feature_name]) |
|
|
|
|
|
for key, val in variance_overrides.items(): |
|
|
if key in feature_name: |
|
|
return float(val) |
|
|
|
|
|
return float(base_sd) * float(global_variance_multiplier) |
|
|
|
|
|
|
|
|
def sample_col(name, n): |
|
|
name_l = name.lower() |
|
|
if "furnace_temp" in name_l or name_l.endswith("_temp") or "tap_temp" in name_l: |
|
|
sd = effective_sd("furnace_temp", 50) |
|
|
return np.random.normal(1550, sd, n) |
|
|
if name_l in ("tap_temp","mold_temp","shell_temp","cooling_out_temp","exit_temp"): |
|
|
sd = effective_sd(name_l, 30) |
|
|
return np.random.normal(200 if "mold" not in name_l else 1500, sd, n) |
|
|
if "offgas_co2" in name_l: |
|
|
sd = effective_sd("offgas_co2", 4) |
|
|
return np.abs(np.random.normal(15, sd, n)) |
|
|
if "offgas_co" in name_l: |
|
|
sd = effective_sd("offgas_co", 5) |
|
|
return np.abs(np.random.normal(20, sd, n)) |
|
|
if "o2" in name_l: |
|
|
sd = effective_sd("o2_probe_pct", 1) |
|
|
return np.clip(np.random.normal(5, sd, n), 0.01, 60) |
|
|
if "arc_power" in name_l or "motor_load" in name_l: |
|
|
sd = effective_sd("arc_power", 120) |
|
|
return np.abs(np.random.normal(600, sd, n)) |
|
|
if "rpm" in name_l: |
|
|
sd = effective_sd("rpm", 30) |
|
|
return np.abs(np.random.normal(120, sd, n)) |
|
|
if "vibration" in name_l: |
|
|
sd = effective_sd("vibration", 0.15) |
|
|
return np.abs(np.random.normal(0.4, sd, n)) |
|
|
if "bearing_temp" in name_l: |
|
|
sd = effective_sd("bearing_temp", 5) |
|
|
return np.random.normal(65, sd, n) |
|
|
if "chemical" in name_l or "spectro" in name_l: |
|
|
sd = effective_sd("chemical", 0.15) |
|
|
return np.random.normal(0.7, sd, n) |
|
|
if "weight" in name_l: |
|
|
sd = effective_sd("weight", 100) |
|
|
return np.random.normal(1000, sd, n) |
|
|
if "conveyor_speed" in name_l or "casting_speed" in name_l: |
|
|
sd = effective_sd("casting_speed", 0.6) |
|
|
return np.random.normal(2.5, sd, n) |
|
|
if "power_factor" in name_l: |
|
|
sd = effective_sd("power_factor", 0.03) |
|
|
return np.clip(np.random.normal(0.92, sd, n), 0.6, 1.0) |
|
|
if "image_entropy_proxy" in name_l: |
|
|
sd = effective_sd("image_entropy_proxy", 0.25) |
|
|
return np.abs(np.random.normal(0.5, sd, n)) |
|
|
if "batch_id" in name_l: |
|
|
return np.random.randint(1000,9999,n) |
|
|
if "time_since" in name_l or "time_in_queue" in name_l: |
|
|
sd = effective_sd("time_since", 20) |
|
|
return np.abs(np.random.normal(30, sd, n)) |
|
|
if "heat_flux" in name_l: |
|
|
sd = effective_sd("heat_flux", 300) |
|
|
return np.abs(np.random.normal(1000, sd, n)) |
|
|
return np.random.normal(0, effective_sd(name_l, 1), n) |
|
|
|
|
|
|
|
|
df = pd.DataFrame({c: sample_col(c, n_rows) for c in natural_feats}) |
|
|
|
|
|
|
|
|
start = pd.Timestamp("2025-01-01T00:00:00") |
|
|
df["timestamp"] = pd.date_range(start, periods=n_rows, freq="min") |
|
|
df["cycle_minute"] = np.mod(np.arange(n_rows), 80) |
|
|
df["meta_plant_name"] = np.random.choice(["Rourkela","Bhilai","Durgapur","Bokaro","Burnpur","Salem"], n_rows) |
|
|
df["meta_country"] = "India" |
|
|
|
|
|
|
|
|
df["carbon_proxy"] = df["offgas_co"] / (df["offgas_co2"] + 1.0) |
|
|
df["oxygen_utilization"] = df["offgas_co2"] / (df["offgas_co"] + 1.0) |
|
|
df["power_density"] = df["arc_power"] / (df["weight_input"] + 1.0) |
|
|
df["energy_efficiency"] = df["furnace_temp"] / (df["arc_power"] + 1.0) |
|
|
df["slag_foaming_index"] = (df["slag_temp"] * df["offgas_co"]) / (df["o2_probe_pct"] + 1.0) |
|
|
df["yield_ratio"] = df["weight_output"] / (df["weight_input"] + 1e-9) |
|
|
|
|
|
|
|
|
rolling_cols = ["arc_power","furnace_temp","offgas_co","offgas_co2","motor_current","vibration_x","weight_input"] |
|
|
for rc in rolling_cols: |
|
|
if rc in df.columns: |
|
|
df[f"{rc}_roll_mean_3"] = df[rc].rolling(3, min_periods=1).mean() |
|
|
df[f"{rc}_roll_std_5"] = df[rc].rolling(5, min_periods=1).std().fillna(0) |
|
|
df[f"{rc}_lag1"] = df[rc].shift(1).bfill() |
|
|
df[f"{rc}_roc_1"] = df[rc].diff().fillna(0) |
|
|
|
|
|
|
|
|
df["arc_o2_interaction"] = df["arc_power"] * df["o2_probe_pct"] |
|
|
df["carbon_power_ratio"] = df["carbon_proxy"] / (df["arc_power"] + 1e-6) |
|
|
df["temp_power_sqrt"] = df["furnace_temp"] * np.sqrt(np.abs(df["arc_power"]) + 1e-6) |
|
|
|
|
|
|
|
|
numeric = df.select_dtypes(include=[np.number]).fillna(0) |
|
|
poly_source_cols = numeric.columns[:12].tolist() |
|
|
poly = PolynomialFeatures(degree=2, interaction_only=False, include_bias=False) |
|
|
poly_mat = poly.fit_transform(numeric[poly_source_cols]) |
|
|
poly_names = poly.get_feature_names_out(poly_source_cols) |
|
|
poly_df = pd.DataFrame(poly_mat, columns=[f"poly__{n}" for n in poly_names], index=df.index) |
|
|
keep_poly = [c for c in poly_df.columns if c.replace("poly__","") not in poly_source_cols] |
|
|
poly_df = poly_df[keep_poly].iloc[:, :max_polynomial_new] if len(keep_poly) > 0 else poly_df.iloc[:, :0] |
|
|
df = pd.concat([df, poly_df], axis=1) |
|
|
|
|
|
|
|
|
scaler = StandardScaler() |
|
|
scaled = scaler.fit_transform(numeric) |
|
|
pca = PCA(n_components=6, random_state=42) |
|
|
pca_cols = pca.fit_transform(scaled) |
|
|
for i in range(pca_cols.shape[1]): |
|
|
df[f"pca_{i+1}"] = pca_cols[:, i] |
|
|
|
|
|
|
|
|
kmeans = KMeans(n_clusters=6, random_state=42, n_init=10) |
|
|
df["operating_mode"] = kmeans.fit_predict(scaled) |
|
|
|
|
|
|
|
|
surrogate_df = df.copy() |
|
|
surrogate_df["furnace_temp_next"] = surrogate_df["furnace_temp"].shift(-1).ffill() |
|
|
features_for_surrogate = [c for c in ["furnace_temp","arc_power","o2_probe_pct","offgas_co","offgas_co2"] if c in df.columns] |
|
|
if len(features_for_surrogate) >= 2: |
|
|
X = surrogate_df[features_for_surrogate].fillna(0) |
|
|
y = surrogate_df["furnace_temp_next"] |
|
|
rf = RandomForestRegressor(n_estimators=50, random_state=42, n_jobs=-1) |
|
|
rf.fit(X, y) |
|
|
df["pred_temp_30s"] = rf.predict(X) |
|
|
else: |
|
|
df["pred_temp_30s"] = df["furnace_temp"] |
|
|
|
|
|
if all(c in df.columns for c in ["offgas_co","offgas_co2","o2_probe_pct"]): |
|
|
X2 = df[["offgas_co","offgas_co2","o2_probe_pct"]].fillna(0) |
|
|
rf2 = RandomForestRegressor(n_estimators=50, random_state=1, n_jobs=-1) |
|
|
rf2.fit(X2, df["carbon_proxy"]) |
|
|
df["pred_carbon_5min"] = rf2.predict(X2) |
|
|
else: |
|
|
df["pred_carbon_5min"] = df["carbon_proxy"] |
|
|
|
|
|
|
|
|
df["refractory_limit_flag"] = (df["lining_thickness"] < 140).astype(int) |
|
|
df["max_allowed_power_delta"] = np.clip(df["arc_power"].diff().abs().fillna(0), 0, 2000) |
|
|
|
|
|
|
|
|
df["ARC_ON"] = ((df["arc_power"] > df["arc_power"].median()) & (df["carbon_proxy"] < 1.0)).astype(int) |
|
|
df["prediction_confidence"] = np.clip(np.random.beta(2,5, n_rows), 0.05, 0.99) |
|
|
|
|
|
|
|
|
df.replace([np.inf, -np.inf], np.nan, inplace=True) |
|
|
df.bfill(inplace=True) |
|
|
df.fillna(0, inplace=True) |
|
|
|
|
|
|
|
|
df["run_timestamp"] = datetime.now().strftime("%Y%m%d_%H%M%S") |
|
|
if os.path.exists(CSV_PATH): |
|
|
df.to_csv(CSV_PATH, mode="a", index=False, header=False) |
|
|
else: |
|
|
df.to_csv(CSV_PATH, index=False) |
|
|
|
|
|
|
|
|
meta_entry = { |
|
|
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), |
|
|
"features": len(df.columns), |
|
|
"rows_added": len(df), |
|
|
"note": "auto-generated block appended" |
|
|
} |
|
|
if os.path.exists(META_PATH): |
|
|
existing = json.load(open(META_PATH)) |
|
|
existing.append(meta_entry) |
|
|
else: |
|
|
existing = [meta_entry] |
|
|
json.dump(existing, open(META_PATH, "w"), indent=2) |
|
|
|
|
|
PDF_PATH = None |
|
|
return CSV_PATH, META_PATH, PDF_PATH |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if not os.path.exists(CSV_PATH) or not os.path.exists(META_PATH): |
|
|
with st.spinner("Generating synthetic features (this may take ~20-60s)..."): |
|
|
CSV_PATH, META_PATH, PDF_PATH = generate_advanced_flatfile(n_rows=3000, random_seed=42, max_polynomial_new=80) |
|
|
st.success(f"Generated dataset and metadata: {CSV_PATH}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@st.cache_data |
|
|
def load_data(csv_path=CSV_PATH, meta_path=META_PATH): |
|
|
df_local = pd.read_csv(csv_path) |
|
|
with open(meta_path, "r") as f: |
|
|
meta_local = json.load(f) |
|
|
return df_local, pd.DataFrame(meta_local) |
|
|
|
|
|
df, meta_df = load_data() |
|
|
df = df.loc[:, ~df.columns.duplicated()] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.sidebar.title("Feature Explorer - Advanced + SHAP") |
|
|
|
|
|
def ensure_feature_metadata(df: pd.DataFrame, meta_df: pd.DataFrame) -> pd.DataFrame: |
|
|
"""Ensure metadata dataframe matches feature count & has required columns.""" |
|
|
required_cols = ["feature_name", "source_type", "formula", "remarks"] |
|
|
|
|
|
if meta_df is None or len(meta_df) < len(df.columns): |
|
|
meta_df = pd.DataFrame({ |
|
|
"feature_name": df.columns, |
|
|
"source_type": [ |
|
|
"engineered" if any(x in c for x in ["poly", "pca", "roll", "lag"]) else "measured" |
|
|
for c in df.columns |
|
|
], |
|
|
"formula": ["" for _ in df.columns], |
|
|
"remarks": ["auto-inferred synthetic feature metadata" for _ in df.columns], |
|
|
}) |
|
|
st.sidebar.warning("Metadata was summary-only — rebuilt feature-level metadata.") |
|
|
else: |
|
|
for col in required_cols: |
|
|
if col not in meta_df.columns: |
|
|
meta_df[col] = None |
|
|
if meta_df["feature_name"].isna().all(): |
|
|
meta_df["feature_name"] = df.columns |
|
|
if len(meta_df) > len(df.columns): |
|
|
meta_df = meta_df.iloc[: len(df.columns)] |
|
|
|
|
|
return meta_df |
|
|
|
|
|
meta_df = ensure_feature_metadata(df, meta_df) |
|
|
|
|
|
feat_types = sorted(meta_df["source_type"].dropna().unique().tolist()) |
|
|
selected_types = st.sidebar.multiselect("Feature type", feat_types, default=feat_types) |
|
|
|
|
|
if "source_type" not in meta_df.columns or meta_df["source_type"].dropna().empty: |
|
|
filtered_meta = meta_df.copy() |
|
|
else: |
|
|
filtered_meta = meta_df[meta_df["source_type"].isin(selected_types)] |
|
|
|
|
|
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tabs = st.tabs([ |
|
|
"Features", |
|
|
"Visualization", |
|
|
"Correlations", |
|
|
"Statistics", |
|
|
"AutoML + SHAP", |
|
|
"Business Impact", |
|
|
"Bibliography", |
|
|
"Download Saved Files", |
|
|
"View Logs", |
|
|
"Smart Advisor" |
|
|
]) |
|
|
|
|
|
|
|
|
with tabs[0]: |
|
|
st.subheader("Feature metadata") |
|
|
st.dataframe( |
|
|
filtered_meta[["feature_name", "source_type", "formula", "remarks"]] |
|
|
.rename(columns={"feature_name": "Feature"}), |
|
|
height=400 |
|
|
) |
|
|
st.markdown(f"Total features loaded: **{df.shape[1]}** | Rows: **{df.shape[0]}**") |
|
|
|
|
|
|
|
|
|
|
|
with tabs[1]: |
|
|
st.subheader("Feature Visualization") |
|
|
col = st.selectbox("Choose numeric feature", numeric_cols, index=0) |
|
|
bins = st.slider("Histogram bins", 10, 200, 50) |
|
|
|
|
|
fig, ax = plt.subplots(figsize=(8, 4)) |
|
|
sns.histplot(df[col], bins=bins, kde=True, ax=ax, color="#2C6E91", alpha=0.8) |
|
|
ax.set_title(f"Distribution of {col}", fontsize=12) |
|
|
st.pyplot(fig, clear_figure=True) |
|
|
st.write(df[col].describe().to_frame().T) |
|
|
|
|
|
if all(x in df.columns for x in ["pca_1", "pca_2", "operating_mode"]): |
|
|
st.markdown("### PCA Feature Space — Colored by Operating Mode") |
|
|
fig2, ax2 = plt.subplots(figsize=(6, 5)) |
|
|
sns.scatterplot( |
|
|
data=df.sample(min(1000, len(df)), random_state=42), |
|
|
x="pca_1", y="pca_2", hue="operating_mode", |
|
|
palette="tab10", alpha=0.7, s=40, ax=ax2 |
|
|
) |
|
|
ax2.set_title("Operating Mode Clusters (PCA Projection)") |
|
|
st.pyplot(fig2, clear_figure=True) |
|
|
|
|
|
|
|
|
st.markdown(""" |
|
|
**Interpretation – Operating Mode Clusters** |
|
|
|
|
|
This PCA-based projection compresses over 100 process features into two principal dimensions, |
|
|
revealing the dominant patterns in furnace operation. Each color represents an automatically discovered |
|
|
*operating mode* (via K-Means clustering). |
|
|
|
|
|
- **Distinct clusters (colors)** → different operating regimes (e.g., high-power melt, refining, tapping, idle) |
|
|
- **Overlaps** → transitional phases or process variability |
|
|
- **Compact clusters** → stable operation; **spread-out clusters** → drift or unstable control |
|
|
- **Shifts between colors** over time may reflect raw-material change or arc power adjustment |
|
|
|
|
|
Understanding these clusters helps metallurgists and control engineers associate process signatures |
|
|
with efficient or energy-intensive operating conditions. |
|
|
""") |
|
|
|
|
|
|
|
|
from sklearn.decomposition import PCA |
|
|
num_df = df.select_dtypes(include=[np.number]).fillna(0) |
|
|
pca = PCA(n_components=2, random_state=42) |
|
|
pca.fit(num_df) |
|
|
comp_df = pd.DataFrame(pca.components_.T, index=num_df.columns, columns=["PC1", "PC2"]) |
|
|
top_pc1 = comp_df["PC1"].abs().nlargest(5).index.tolist() |
|
|
top_pc2 = comp_df["PC2"].abs().nlargest(5).index.tolist() |
|
|
st.info(f"**Top variables driving PCA-1 (X-axis):** {', '.join(top_pc1)}") |
|
|
st.info(f"**Top variables driving PCA-2 (Y-axis):** {', '.join(top_pc2)}") |
|
|
|
|
|
|
|
|
|
|
|
with tabs[2]: |
|
|
st.subheader("Correlation explorer") |
|
|
default_corr = numeric_cols[:20] if len(numeric_cols) >= 20 else numeric_cols |
|
|
corr_sel = st.multiselect("Select features (min 2)", numeric_cols, default=default_corr) |
|
|
if len(corr_sel) >= 2: |
|
|
corr = df[corr_sel].corr() |
|
|
fig, ax = plt.subplots(figsize=(10,8)) |
|
|
sns.heatmap(corr, cmap="RdBu_r", center=0, annot=True, fmt=".2f", |
|
|
linewidths=0.5, cbar_kws={"shrink": 0.7}, ax=ax) |
|
|
st.pyplot(fig, clear_figure=True) |
|
|
else: |
|
|
st.info("Choose at least 2 numeric features to compute correlation.") |
|
|
|
|
|
|
|
|
with tabs[3]: |
|
|
st.subheader("Summary statistics (numeric features)") |
|
|
st.dataframe(df.describe().T.style.format("{:.3f}"), height=500) |
|
|
|
|
|
|
|
|
|
|
|
with tabs[4]: |
|
|
st.subheader("AutoML Ensemble — Expanded Families + Stacking + SHAP") |
|
|
|
|
|
|
|
|
def clean_entire_df(df): |
|
|
"""Cleans dataframe of bracketed/scientific string numbers like '[1.551E3]'.""" |
|
|
df_clean = df.copy() |
|
|
for col in df_clean.columns: |
|
|
if df_clean[col].dtype == object: |
|
|
df_clean[col] = ( |
|
|
df_clean[col] |
|
|
.astype(str) |
|
|
.str.replace("[", "", regex=False) |
|
|
.str.replace("]", "", regex=False) |
|
|
.str.replace(",", "", regex=False) |
|
|
.str.strip() |
|
|
.replace(["nan", "NaN", "None", "null", "N/A", "", " "], np.nan) |
|
|
) |
|
|
df_clean[col] = pd.to_numeric(df_clean[col], errors="coerce") |
|
|
df_clean = df_clean.fillna(0.0).astype(float) |
|
|
return df_clean |
|
|
|
|
|
df = clean_entire_df(df) |
|
|
st.caption(" Dataset cleaned globally — all numeric-like values converted safely.") |
|
|
|
|
|
|
|
|
use_case = st.selectbox( |
|
|
"Select Use Case", |
|
|
[ |
|
|
"Predictive Maintenance", |
|
|
"EAF Data Intelligence", |
|
|
"Casting Quality Optimization", |
|
|
"Rolling Mill Energy Optimization", |
|
|
"Surface Defect Detection (Vision AI)", |
|
|
"Material Composition & Alloy Mix AI", |
|
|
"Inventory & Yield Optimization", |
|
|
"Refractory & Cooling Loss Prediction", |
|
|
], |
|
|
index=1, |
|
|
) |
|
|
|
|
|
use_case_config = { |
|
|
"Predictive Maintenance": {"target": "bearing_temp", "model_hint": "RandomForest"}, |
|
|
"EAF Data Intelligence": {"target": "furnace_temp", "model_hint": "GradientBoosting"}, |
|
|
"Casting Quality Optimization": {"target": "surface_temp", "model_hint": "GradientBoosting"}, |
|
|
"Rolling Mill Energy Optimization": {"target": "energy_efficiency", "model_hint": "ExtraTrees"}, |
|
|
"Surface Defect Detection (Vision AI)": {"target": "image_entropy_proxy", "model_hint": "GradientBoosting"}, |
|
|
"Material Composition & Alloy Mix AI": {"target": "chemical_C", "model_hint": "RandomForest"}, |
|
|
"Inventory & Yield Optimization": {"target": "yield_ratio", "model_hint": "GradientBoosting"}, |
|
|
"Refractory & Cooling Loss Prediction": {"target": "lining_thickness", "model_hint": "ExtraTrees"}, |
|
|
} |
|
|
|
|
|
cfg = use_case_config.get(use_case, {"target": numeric_cols[0], "model_hint": "RandomForest"}) |
|
|
target, model_hint = cfg["target"], cfg["model_hint"] |
|
|
|
|
|
suggested = [c for c in numeric_cols if any(k in c for k in target.split("_"))] |
|
|
if len(suggested) < 6: |
|
|
suggested = [c for c in numeric_cols if any(k in c for k in ["temp", "power", "energy", "pressure", "yield"])] |
|
|
if len(suggested) < 6: |
|
|
suggested = numeric_cols[:50] |
|
|
|
|
|
features = st.multiselect("Model input features (auto-suggested)", numeric_cols, default=suggested) |
|
|
st.markdown(f"Auto target: `{target}` · Suggested family hint: `{model_hint}`") |
|
|
|
|
|
|
|
|
max_rows = min(df.shape[0], 20000) |
|
|
sample_size = st.slider("Sample rows", 500, max_rows, min(1500, max_rows), step=100) |
|
|
|
|
|
|
|
|
target_col = target if target in df.columns else next((c for c in df.columns if target.lower() in c.lower()), None) |
|
|
if not target_col: |
|
|
st.error(f"Target `{target}` not found in dataframe.") |
|
|
st.stop() |
|
|
|
|
|
cols_needed = [c for c in features if c in df.columns and c != target_col] |
|
|
sub_df = df.loc[:, cols_needed + [target_col]].sample(n=sample_size, random_state=42).reset_index(drop=True) |
|
|
|
|
|
X = sub_df.drop(columns=[target_col]) |
|
|
y = pd.Series(np.ravel(sub_df[target_col]), name=target_col) |
|
|
|
|
|
|
|
|
leak_cols = ["furnace_temp_next", "pred_temp_30s", "run_timestamp", "timestamp", "batch_id_numeric", "batch_id"] |
|
|
X = X.drop(columns=[c for c in leak_cols if c in X.columns], errors="ignore") |
|
|
X = X.loc[:, X.nunique() > 1] |
|
|
|
|
|
|
|
|
st.markdown("### Ensemble & AutoML Settings") |
|
|
max_trials = st.slider("Optuna trials per family", 5, 80, 20, step=5) |
|
|
top_k = st.slider("Max base models in ensemble", 2, 8, 5) |
|
|
allow_advanced = st.checkbox("Include advanced families (XGBoost, LightGBM, CatBoost)", value=True) |
|
|
|
|
|
available_models = ["RandomForest", "ExtraTrees"] |
|
|
optional_families = {} |
|
|
if allow_advanced: |
|
|
try: |
|
|
import xgboost as xgb; optional_families["XGBoost"] = True; available_models.append("XGBoost") |
|
|
except Exception: optional_families["XGBoost"] = False |
|
|
try: |
|
|
import lightgbm as lgb; optional_families["LightGBM"] = True; available_models.append("LightGBM") |
|
|
except Exception: optional_families["LightGBM"] = False |
|
|
try: |
|
|
import catboost as cb; optional_families["CatBoost"] = True; available_models.append("CatBoost") |
|
|
except Exception: optional_families["CatBoost"] = False |
|
|
|
|
|
st.markdown(f"Available families: {', '.join(available_models)}") |
|
|
|
|
|
|
|
|
def tune_family(fam, X_local, y_local, n_trials=20): |
|
|
def obj(trial): |
|
|
if fam == "RandomForest": |
|
|
m = RandomForestRegressor( |
|
|
n_estimators=trial.suggest_int("n_estimators", 100, 800), |
|
|
max_depth=trial.suggest_int("max_depth", 4, 30), |
|
|
random_state=42, n_jobs=-1) |
|
|
elif fam == "ExtraTrees": |
|
|
m = ExtraTreesRegressor( |
|
|
n_estimators=trial.suggest_int("n_estimators", 100, 800), |
|
|
max_depth=trial.suggest_int("max_depth", 4, 30), |
|
|
random_state=42, n_jobs=-1) |
|
|
else: |
|
|
|
|
|
m = RandomForestRegressor(random_state=42) |
|
|
|
|
|
try: |
|
|
return np.mean(cross_val_score(m, X_local, y_local, cv=3, scoring="r2")) |
|
|
except Exception: |
|
|
return -999.0 |
|
|
|
|
|
|
|
|
study = optuna.create_study(direction="maximize") |
|
|
try: |
|
|
study.optimize(obj, n_trials=n_trials, show_progress_bar=False) |
|
|
params = study.best_trial.params if study.trials else {} |
|
|
best_score = study.best_value if study.trials else -999.0 |
|
|
except Exception as e: |
|
|
st.warning(f"Optuna failed for {fam}: {e}") |
|
|
params, best_score = {}, -999.0 |
|
|
|
|
|
|
|
|
if fam == "RandomForest": |
|
|
model = RandomForestRegressor(**params, random_state=42, n_jobs=-1) |
|
|
elif fam == "ExtraTrees": |
|
|
model = ExtraTreesRegressor(**params, random_state=42, n_jobs=-1) |
|
|
else: |
|
|
model = RandomForestRegressor(random_state=42, n_jobs=-1) |
|
|
|
|
|
return { |
|
|
"family": fam, |
|
|
"model_obj": model, |
|
|
"best_params": params, |
|
|
"cv_score": best_score |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if st.button("Run AutoML + SHAP"): |
|
|
with st.spinner("Training and stacking..."): |
|
|
tuned_results = [] |
|
|
families = ["RandomForest", "ExtraTrees"] |
|
|
if allow_advanced: |
|
|
for f in ["XGBoost", "LightGBM", "CatBoost"]: |
|
|
if optional_families.get(f): families.append(f) |
|
|
|
|
|
for fam in families: |
|
|
tuned_results.append(tune_family(fam, X, y, n_trials=max_trials)) |
|
|
|
|
|
lb = pd.DataFrame( |
|
|
[{"family": r["family"], "cv_r2": r["cv_score"]} for r in tuned_results] |
|
|
).sort_values("cv_r2", ascending=False) |
|
|
st.dataframe(lb.round(4)) |
|
|
|
|
|
|
|
|
from sklearn.feature_selection import SelectKBest, f_regression |
|
|
from sklearn.linear_model import LinearRegression |
|
|
from sklearn.model_selection import KFold |
|
|
from sklearn.metrics import r2_score |
|
|
|
|
|
scaler = StandardScaler() |
|
|
X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns) |
|
|
selector = SelectKBest(f_regression, k=min(40, X_scaled.shape[1])) |
|
|
X_sel = pd.DataFrame( |
|
|
selector.fit_transform(X_scaled, y), |
|
|
columns=[X.columns[i] for i in selector.get_support(indices=True)] |
|
|
) |
|
|
|
|
|
kf = KFold(n_splits=5, shuffle=True, random_state=42) |
|
|
oof_preds = pd.DataFrame(index=X_sel.index) |
|
|
base_models = [] |
|
|
|
|
|
valid_results = [ |
|
|
(r["family"], r) for r in tuned_results |
|
|
if r.get("model_obj") is not None and hasattr(r["model_obj"], "fit") |
|
|
] |
|
|
|
|
|
for fam, entry in valid_results: |
|
|
model = entry["model_obj"] |
|
|
preds = np.zeros(X_sel.shape[0]) |
|
|
for tr, va in kf.split(X_sel): |
|
|
try: |
|
|
model.fit(X_sel.iloc[tr], y.iloc[tr]) |
|
|
preds[va] = model.predict(X_sel.iloc[va]) |
|
|
except Exception as e: |
|
|
st.warning(f"⚠️ {fam} failed in fold: {e}") |
|
|
oof_preds[f"{fam}_oof"] = preds |
|
|
try: |
|
|
model.fit(X_sel, y) |
|
|
base_models.append({"family": fam, "model": model}) |
|
|
except Exception as e: |
|
|
st.warning(f"⚠️ {fam} full-fit failed: {e}") |
|
|
|
|
|
meta = LinearRegression(positive=True) |
|
|
meta.fit(oof_preds, y) |
|
|
y_pred = meta.predict(oof_preds) |
|
|
final_r2 = r2_score(y, y_pred) |
|
|
st.success(f"Stacked Ensemble R² = {final_r2:.4f}") |
|
|
|
|
|
|
|
|
st.markdown("---") |
|
|
st.subheader("Operator Advisory — Real-Time Recommendations") |
|
|
|
|
|
try: |
|
|
top_base = base_models[0]["model"] |
|
|
sample_X = X_sel.sample(min(300, len(X_sel)), random_state=42) |
|
|
expl = shap.TreeExplainer(top_base) |
|
|
shap_vals = expl.shap_values(sample_X) |
|
|
if isinstance(shap_vals, list): shap_vals = shap_vals[0] |
|
|
imp = pd.DataFrame({ |
|
|
"Feature": sample_X.columns, |
|
|
"Mean |SHAP|": np.abs(shap_vals).mean(axis=0), |
|
|
"Mean SHAP Sign": np.sign(shap_vals).mean(axis=0) |
|
|
}).sort_values("Mean |SHAP|", ascending=False) |
|
|
|
|
|
st.dataframe(imp.head(5)) |
|
|
recs = [] |
|
|
for _, r in imp.head(5).iterrows(): |
|
|
if r["Mean SHAP Sign"] > 0.05: |
|
|
recs.append(f"Increase `{r['Feature']}` likely increases `{target}`") |
|
|
elif r["Mean SHAP Sign"] < -0.05: |
|
|
recs.append(f"Decrease `{r['Feature']}` likely increases `{target}`") |
|
|
else: |
|
|
recs.append(f"`{r['Feature']}` neutral for `{target}`") |
|
|
st.write("\n".join(recs)) |
|
|
|
|
|
st.session_state["recs"] = recs |
|
|
st.session_state["final_r2"] = final_r2 |
|
|
st.session_state["use_case"] = use_case |
|
|
st.session_state["target"] = target |
|
|
st.session_state["last_automl_ts"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") |
|
|
|
|
|
|
|
|
|
|
|
import requests, textwrap |
|
|
|
|
|
HF_TOKEN = os.getenv("HF_TOKEN") |
|
|
if not HF_TOKEN: |
|
|
st.error("HF_TOKEN not detected in environment or secrets.toml.") |
|
|
else: |
|
|
API_URL = "https://router.huggingface.co/v1/chat/completions" |
|
|
headers = { |
|
|
"Authorization": f"Bearer {HF_TOKEN}", |
|
|
"Content-Type": "application/json", |
|
|
} |
|
|
|
|
|
prompt = textwrap.dedent(f""" |
|
|
You are an expert metallurgical process advisor. |
|
|
Analyze these SHAP-based operator recommendations and rewrite them |
|
|
as a concise 3-line professional advisory note. |
|
|
|
|
|
Recommendations: {recs} |
|
|
Target variable: {target} |
|
|
Use case: {use_case} |
|
|
""") |
|
|
|
|
|
payload = { |
|
|
"model": "meta-llama/Meta-Llama-3-8B-Instruct", |
|
|
"messages": [ |
|
|
{"role": "system", "content": "You are a concise metallurgical advisor."}, |
|
|
{"role": "user", "content": prompt} |
|
|
], |
|
|
"temperature": 0.5, |
|
|
"max_tokens": 200, |
|
|
"stream": False |
|
|
} |
|
|
|
|
|
with st.spinner("Generating operator advisory (Llama 3-8B)…"): |
|
|
try: |
|
|
resp = requests.post(API_URL, headers=headers, json=payload, timeout=90) |
|
|
if resp.status_code != 200: |
|
|
st.warning(f"HF API error {resp.status_code}: {resp.text}") |
|
|
else: |
|
|
try: |
|
|
data = resp.json() |
|
|
msg = ( |
|
|
data.get("choices", [{}])[0] |
|
|
.get("message", {}) |
|
|
.get("content", "") |
|
|
.strip() |
|
|
) |
|
|
if msg: |
|
|
st.success("✅ Operator Advisory Generated:") |
|
|
st.info(msg) |
|
|
else: |
|
|
st.warning(f"Operator advisory skipped: empty response.\nRaw: {data}") |
|
|
except Exception as e: |
|
|
st.warning(f"Operator advisory skipped: JSON parse error — {e}") |
|
|
except Exception as e: |
|
|
st.warning(f"Operator advisory skipped: {e}") |
|
|
|
|
|
except Exception as e: |
|
|
st.warning(f"Operator advisory skipped: {e}") |
|
|
|
|
|
|
|
|
|
|
|
with tabs[5]: |
|
|
st.subheader("Business Impact Metrics") |
|
|
target_table = pd.DataFrame([ |
|
|
["EAF Data Intelligence", "furnace_temp / tap_temp", "Central control variable", "₹20–60 L/year"], |
|
|
["Casting Optimization", "surface_temp / cooling_water_temp", "Controls billet quality", "₹50 L/year"], |
|
|
["Rolling Mill", "energy_efficiency", "Energy optimization", "₹5–10 L/year"], |
|
|
["Refractory Loss Prediction", "lining_thickness / heat_loss_rate", "Wear and downtime", "₹40 L/year"], |
|
|
], columns=["Use Case","Target Variable","Why It’s Ideal","Business Leverage"]) |
|
|
st.dataframe(target_table, width="stretch") |
|
|
|
|
|
|
|
|
with tabs[6]: |
|
|
st.subheader("Annotated Bibliography") |
|
|
refs = [ |
|
|
("A Survey of Data-Driven Soft Sensing in Ironmaking Systems","Yan et al. (2024)","Soft sensors validate `furnace_temp` and `tap_temp`.","https://doi.org/10.1021/acsomega.4c01254"), |
|
|
("Optimisation of Operator Support Systems","Ojeda Roldán et al. (2022)","Reinforcement learning for endpoint control.","https://doi.org/10.3390/jmmp6020034"), |
|
|
("Analyzing the Energy Efficiency of Electric Arc Furnace Steelmaking","Zhuo et al. (2024)","Links arc power and energy KPIs.","https://doi.org/10.3390/met15010113"), |
|
|
("Dynamic EAF Modeling and Slag Foaming Index Prediction","MacRosty et al.","Supports refractory wear modeling.","https://www.sciencedirect.com/science/article/pii/S0921883123004019") |
|
|
] |
|
|
for t,a,n,u in refs: |
|
|
st.markdown(f"**[{t}]({u})** — *{a}* \n_{n}_") |
|
|
|
|
|
|
|
|
with tabs[7]: |
|
|
st.subheader("Download Saved Files") |
|
|
files = [f for f in os.listdir(LOG_DIR) if os.path.isfile(os.path.join(LOG_DIR, f))] |
|
|
if not files: st.info("No files yet — run AutoML first.") |
|
|
else: |
|
|
for f in sorted(files): |
|
|
path = os.path.join(LOG_DIR, f) |
|
|
with open(path,"rb") as fp: |
|
|
st.download_button(f"Download {f}", fp, file_name=f) |
|
|
|
|
|
|
|
|
with tabs[8]: |
|
|
st.subheader("Master Log") |
|
|
if os.path.exists(LOG_PATH): |
|
|
txt = open(LOG_PATH).read() |
|
|
st.text_area("Log Output", txt, height=400) |
|
|
st.download_button("Download Log", txt, file_name="run_master.log") |
|
|
else: |
|
|
st.info("No logs yet — run AutoML once.") |
|
|
|
|
|
|
|
|
|
|
|
with tabs[9]: |
|
|
st.subheader(" Smart Advisor — Role-Based Insights") |
|
|
if "last_automl_ts" in st.session_state: |
|
|
st.caption(f" Model baseline last trained: {st.session_state['last_automl_ts']}") |
|
|
|
|
|
|
|
|
recs = st.session_state.get("recs", []) |
|
|
final_r2 = st.session_state.get("final_r2", 0) |
|
|
use_case = st.session_state.get("use_case", "N/A") |
|
|
target = st.session_state.get("target", "N/A") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
roles = { |
|
|
|
|
|
"Furnace Operator": "Runs daily EAF heats, manages electrodes, slag foaming, and tap timing.", |
|
|
"Shift Engineer": "Coordinates furnace, casting, and maintenance operations during the shift.", |
|
|
"Process Metallurgist": "Optimizes chemistry, refining, and metallurgical balance across heats.", |
|
|
"Maintenance Engineer": "Monitors vibration, bearings, and schedules preventive maintenance.", |
|
|
"Quality Engineer": "Tracks billet surface, composition, and defect rates from casting to rolling.", |
|
|
|
|
|
|
|
|
"Energy Manager": "Analyzes power, load factor, and energy cost per ton of steel.", |
|
|
"Production Head": "Supervises throughput, yield, and adherence to shift-level production targets.", |
|
|
"Reliability Manager": "Oversees equipment reliability, predictive maintenance, and downtime prevention.", |
|
|
"Chief Process Engineer": "Links metallurgical parameters to standard operating conditions.", |
|
|
"Process Optimization Head (PP&C)": "Balances yield, power, and reliability across EAF, caster, and rolling units.", |
|
|
"Chief General Manager – PP&C": "Oversees planning, process, and control at plant level — coordinating all shops for optimal energy, yield, and reliability.", |
|
|
"Deputy General Manager (Operations)": "Supervises multi-shop coordination, productivity, and manpower scheduling.", |
|
|
"Plant Head": "Oversees plant-wide KPIs — production, energy, quality, and modernization progress.", |
|
|
|
|
|
|
|
|
"Executive Director (Works)": "Integrates operations, people, and safety across all plants.", |
|
|
"Chief Operating Officer (COO)": "Ensures alignment between production efficiency and business goals.", |
|
|
"Chief Sustainability Officer (CSO)": "Monitors CO₂ intensity, waste recovery, and environmental compliance.", |
|
|
"Chief Financial Officer (CFO)": "Links operational performance to cost efficiency and ROI.", |
|
|
"Chief Executive Officer (CEO)": "Focuses on long-term performance, modernization, and shareholder impact." |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
role_prompts = { |
|
|
"Furnace Operator": """ |
|
|
You are the EAF furnace operator responsible for maintaining a stable arc and safe melting. |
|
|
Translate model recommendations into clear, actionable controls: electrode movement, oxygen flow, |
|
|
slag foaming, or power adjustment. Focus on operational safety and tap timing. |
|
|
""", |
|
|
"Shift Engineer": """ |
|
|
You are the shift engineer overseeing melting, casting, and maintenance coordination. |
|
|
Interpret model insights for operational actions — mention if inter-shop coordination is required. |
|
|
""", |
|
|
"Process Metallurgist": """ |
|
|
You are the process metallurgist. Evaluate the data-driven SHAP patterns to interpret metallurgical |
|
|
balance, oxidation behavior, and refining efficiency. Suggest chemistry or process tuning. |
|
|
""", |
|
|
"Maintenance Engineer": """ |
|
|
You are the maintenance engineer responsible for reliability. Identify potential failure risks |
|
|
(e.g., vibration anomalies, overheating, current imbalance) and propose proactive checks. |
|
|
""", |
|
|
"Quality Engineer": """ |
|
|
You are the quality engineer monitoring casting and rolling outcomes. Translate process variables |
|
|
into expected surface or composition quality impacts and preventive measures. |
|
|
""", |
|
|
"Energy Manager": """ |
|
|
You are the energy manager. Interpret how SHAP signals influence energy per ton and power factor. |
|
|
Quantify efficiency deviations and suggest scheduling or load adjustments. |
|
|
""", |
|
|
"Production Head": """ |
|
|
You are the production head tracking yield and throughput. Connect SHAP insights to bottlenecks |
|
|
in productivity, heat timing, or equipment utilization. Suggest optimization steps. |
|
|
""", |
|
|
"Reliability Manager": """ |
|
|
You are the reliability manager. Evaluate if process trends suggest equipment stress, overheating, |
|
|
or wear. Recommend intervention plans and projected downtime avoidance. |
|
|
""", |
|
|
"Chief Process Engineer": """ |
|
|
You are the chief process engineer. Convert SHAP outputs into process standardization insights. |
|
|
Flag anomalies that require SOP review and coordinate with metallurgical and control teams. |
|
|
""", |
|
|
"Process Optimization Head (PP&C)": """ |
|
|
You are the Process Optimization Head in PP&C. Assess SHAP signals across multiple units to improve |
|
|
system-level yield, energy, and reliability. Recommend balanced actions and inter-shop alignment. |
|
|
""", |
|
|
"Chief General Manager – PP&C": """ |
|
|
You are the Chief General Manager (PP&C) responsible for overall plant coordination, |
|
|
planning, process control, and modernization. Interpret model insights as if briefing |
|
|
senior management and section heads before a shift review. |
|
|
|
|
|
Your response must: |
|
|
- Translate technical terms into operational themes (e.g., “arc instability”) |
|
|
- Identify cross-functional effects (EAF ↔ Caster ↔ Rolling) |
|
|
- Suggest coordination steps (maintenance, power, metallurgist) |
|
|
- Conclude with KPI or strategic impact (yield, energy, reliability) |
|
|
- If any data pattern seems implausible, mention it and propose review. |
|
|
""", |
|
|
"Deputy General Manager (Operations)": """ |
|
|
You are the DGM (Operations). Summarize SHAP-derived insights into actionable instructions |
|
|
for shop heads. Emphasize throughput, manpower planning, and heat plan adherence. |
|
|
""", |
|
|
"Plant Head": """ |
|
|
You are the Plant Head. Translate technical findings into KPI performance trends and upcoming |
|
|
operational risks. Recommend cross-departmental actions and expected impact on production targets. |
|
|
""", |
|
|
"Executive Director (Works)": """ |
|
|
You are the Executive Director (Works). Summarize how the plant is performing overall and where |
|
|
immediate leadership attention is required. Use a governance-level tone, referencing key KPIs. |
|
|
""", |
|
|
"Chief Operating Officer (COO)": """ |
|
|
You are the COO. Interpret model insights at a strategic level — efficiency, tonnage, cost, reliability. |
|
|
Highlight systemic improvements, risk areas, and financial implications across plants. |
|
|
""", |
|
|
"Chief Sustainability Officer (CSO)": """ |
|
|
You are the CSO. Relate operational insights to environmental impact, carbon efficiency, |
|
|
and sustainability metrics. Quantify potential emission reduction. |
|
|
""", |
|
|
"Chief Financial Officer (CFO)": """ |
|
|
You are the CFO. Interpret operational SHAP findings in terms of cost efficiency, asset utilization, |
|
|
and ROI. Provide an executive financial perspective on potential savings or risks. |
|
|
""", |
|
|
"Chief Executive Officer (CEO)": """ |
|
|
You are the CEO of a major integrated steel producer. |
|
|
Provide a concise narrative (2–3 paragraphs) summarizing plant performance trends, |
|
|
operational risks, and opportunities — linking them to strategic goals in the annual report: |
|
|
productivity, sustainability, cost leadership, and modernization. |
|
|
""" |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
role = st.selectbox("Select Your Role", list(roles.keys()), index=10, key="selected_role") |
|
|
if "last_role" in st.session_state and st.session_state["last_role"] != role: |
|
|
st.session_state["hf_ran_once"] = False |
|
|
st.caption(f" Context: {roles[role]}") |
|
|
|
|
|
if not recs: |
|
|
st.warning("Please run the AutoML + SHAP step first to generate recommendations.") |
|
|
else: |
|
|
generate_clicked = st.button("Generate Role-Based Advisory") |
|
|
if generate_clicked: |
|
|
st.session_state["hf_ran_once"] = True |
|
|
HF_TOKEN = os.getenv("HF_TOKEN") |
|
|
if not HF_TOKEN: |
|
|
st.error("HF_TOKEN not found. Please set it as an environment variable or in secrets.toml.") |
|
|
else: |
|
|
import requests, textwrap |
|
|
|
|
|
API_URL = "https://router.huggingface.co/v1/chat/completions" |
|
|
headers = {"Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "application/json"} |
|
|
|
|
|
|
|
|
if role in ["Chief General Manager – PP&C", "Process Optimization Head (PP&C)", "Plant Head"]: |
|
|
reasoning_context = """ |
|
|
Think like a systems integrator balancing EAF, caster, and rolling mill performance. |
|
|
Evaluate interdependencies and recommend coordinated actions across departments. |
|
|
""" |
|
|
elif role in ["COO", "CFO", "CEO"]: |
|
|
reasoning_context = """ |
|
|
Think strategically. Connect operational drivers to business KPIs, |
|
|
and quantify financial or sustainability implications. |
|
|
""" |
|
|
else: |
|
|
reasoning_context = "" |
|
|
|
|
|
|
|
|
prompt = textwrap.dedent(f""" |
|
|
Role: {role} |
|
|
Use case: {use_case} |
|
|
Target variable: {target} |
|
|
Ensemble model confidence (R²): {final_r2:.3f} |
|
|
|
|
|
{reasoning_context} |
|
|
|
|
|
Model-derived recommendations: |
|
|
{json.dumps(recs, indent=2)} |
|
|
|
|
|
{role_prompts.get(role, "Provide a professional metallurgical advisory summary.")} |
|
|
|
|
|
Your response should cover: |
|
|
1. What’s happening (interpreted simply) |
|
|
2. What should be done |
|
|
3. What outcomes to expect and why |
|
|
""") |
|
|
|
|
|
payload = { |
|
|
"model": "meta-llama/Meta-Llama-3-8B-Instruct", |
|
|
"messages": [ |
|
|
{"role": "system", "content": "You are a multi-role metallurgical advisor connecting data to human decisions."}, |
|
|
{"role": "user", "content": prompt} |
|
|
], |
|
|
"temperature": 0.4, |
|
|
"max_tokens": 350, |
|
|
} |
|
|
|
|
|
with st.spinner(f"Generating role-based advisory for {role}..."): |
|
|
resp = requests.post(API_URL, headers=headers, json=payload, timeout=120) |
|
|
if resp.status_code == 200: |
|
|
data = resp.json() |
|
|
msg = ( |
|
|
data.get("choices", [{}])[0] |
|
|
.get("message", {}) |
|
|
.get("content", "") |
|
|
.strip() |
|
|
) |
|
|
if msg: |
|
|
st.markdown(f"### Advisory for {role}") |
|
|
st.info(msg) |
|
|
st.session_state["last_advisory_msg"] = msg |
|
|
st.session_state["last_role"] = role |
|
|
|
|
|
st.session_state["last_advisory_ts"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") |
|
|
st.caption(f"🕒 Last updated: {st.session_state['last_advisory_ts']}") |
|
|
|
|
|
|
|
|
if role in ["Chief General Manager – PP&C", "Plant Head", "Process Optimization Head (PP&C)"]: |
|
|
st.markdown("#### 🔍 Shift Highlights — Data-Driven Summary") |
|
|
|
|
|
try: |
|
|
|
|
|
latest_df = df.tail(500).copy() |
|
|
|
|
|
|
|
|
furnace_temp_mean = latest_df["furnace_temp"].mean() |
|
|
furnace_temp_std = latest_df["furnace_temp"].std() |
|
|
energy_eff_mean = latest_df["energy_efficiency"].mean() |
|
|
yield_mean = latest_df["yield_ratio"].mean() |
|
|
downtime_proxy = np.mean(latest_df["refractory_limit_flag"]) * 8 |
|
|
|
|
|
|
|
|
if len(df) > 1000: |
|
|
prev_df = df.tail(1000).head(500) |
|
|
delta_temp = ((furnace_temp_mean - prev_df["furnace_temp"].mean()) / |
|
|
prev_df["furnace_temp"].mean()) * 100 |
|
|
delta_eff = ((energy_eff_mean - prev_df["energy_efficiency"].mean()) / |
|
|
prev_df["energy_efficiency"].mean()) * 100 |
|
|
delta_yield = ((yield_mean - prev_df["yield_ratio"].mean()) / |
|
|
prev_df["yield_ratio"].mean()) * 100 |
|
|
else: |
|
|
delta_temp, delta_eff, delta_yield = 0, 0, 0 |
|
|
|
|
|
|
|
|
def trend_symbol(val): |
|
|
if val > 0.5: |
|
|
return f"↑ +{val:.2f}%" |
|
|
elif val < -0.5: |
|
|
return f"↓ {val:.2f}%" |
|
|
else: |
|
|
return f"→ {val:.2f}%" |
|
|
|
|
|
|
|
|
highlights = pd.DataFrame([ |
|
|
["Furnace Temp Stability", |
|
|
"Stable" if furnace_temp_std < 50 else "Fluctuating", |
|
|
f"Avg: {furnace_temp_mean:.1f}°C ({trend_symbol(delta_temp)})"], |
|
|
["Energy Efficiency", |
|
|
"Improved" if delta_eff > 0 else "Declined", |
|
|
f"{energy_eff_mean:.4f} ({trend_symbol(delta_eff)})"], |
|
|
["Yield Ratio", |
|
|
"Nominal" if abs(delta_yield) < 1 else ("↑" if delta_yield > 0 else "↓"), |
|
|
f"{yield_mean*100:.2f}% ({trend_symbol(delta_yield)})"], |
|
|
["Refractory Limit Flag", |
|
|
"Within Safe Limit" if downtime_proxy < 1 else "Check Lining", |
|
|
f"Active Alerts: {downtime_proxy:.1f}/shift"] |
|
|
], columns=["Parameter", "Status", "Observation"]) |
|
|
|
|
|
st.dataframe(highlights, use_container_width=True) |
|
|
st.caption("Derived from live dataset trends (last 500 vs previous 500 rows).") |
|
|
|
|
|
|
|
|
if isinstance(recs, list) and recs: |
|
|
st.markdown("#### Cross-Verification with SHAP Insights") |
|
|
matches = [r for r in recs if any(k in r for k in ["furnace", "energy", "yield", "slag", "power"])] |
|
|
if matches: |
|
|
st.info("Aligned SHAP Recommendations:\n\n- " + "\n- ".join(matches)) |
|
|
else: |
|
|
st.warning("No direct SHAP alignment found — potential anomaly or unseen pattern.") |
|
|
except Exception as e: |
|
|
st.warning(f"Shift Highlights unavailable: {e}") |
|
|
|
|
|
else: |
|
|
st.warning(f"Empty response.\nRaw: {data}") |
|
|
else: |
|
|
st.error(f"HF API error {resp.status_code}: {resp.text}") |
|
|
|
|
|
if "last_advisory_msg" in st.session_state: |
|
|
st.markdown(f"### Last Advisory ({st.session_state.get('last_role', 'N/A')})") |
|
|
st.info(st.session_state["last_advisory_msg"]) |
|
|
if "last_advisory_ts" in st.session_state: |
|
|
st.caption(f"Last updated: {st.session_state['last_advisory_ts']}") |
|
|
if "last_automl_ts" in st.session_state: |
|
|
st.caption(f"Model baseline last run at: {st.session_state['last_automl_ts']}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if role == "Chief General Manager – PP&C": |
|
|
col1, col2, col3 = st.columns(3) |
|
|
col1.metric("Plant Yield (Rolling 24h)", "96.8%", "↑0.7% vs yesterday") |
|
|
col2.metric("Energy per ton", "4.92 MWh/t", "↓2.3% week-on-week") |
|
|
col3.metric("Unplanned Downtime", "3.1 hrs", "↓1.2 hrs") |
|
|
st.caption("KPIs aligned with PP&C Balanced Scorecard — Yield • Energy • Reliability") |
|
|
|
|
|
elif role in ["CEO", "COO"]: |
|
|
col1, col2, col3 = st.columns(3) |
|
|
col1.metric("EBITDA per ton", "₹7,420", "↑3.1% QoQ") |
|
|
col2.metric("CO₂ Intensity", "1.79 tCO₂/t", "↓2.4% YoY") |
|
|
col3.metric("Modernization CapEx", "₹122 Cr", "On track") |
|
|
st.caption("Strategic alignment: cost leadership • sustainability • modernization") |
|
|
|
|
|
elif role in ["Furnace Operator", "Shift Engineer"]: |
|
|
col1, col2, col3 = st.columns(3) |
|
|
col1.metric("Furnace Temp", f"{df['furnace_temp'].iloc[-1]:.1f} °C") |
|
|
col2.metric("Arc Power", f"{df['arc_power'].iloc[-1]:.0f} kW") |
|
|
col3.metric("Power Factor", f"{df['power_factor'].iloc[-1]:.2f}") |
|
|
st.caption("Live operational parameters — monitor stability and foaming balance.") |
|
|
|
|
|
st.markdown("---") |
|
|
st.markdown("**Note:** Synthetic demo dataset for educational use only. Real deployment requires plant data, NDA, and safety validation.") |