|
|
|
|
|
|
|
|
import os |
|
|
import json |
|
|
import time |
|
|
from datetime import datetime |
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
import streamlit as st |
|
|
import matplotlib.pyplot as plt |
|
|
import seaborn as sns |
|
|
import joblib |
|
|
|
|
|
|
|
|
from sklearn.model_selection import train_test_split |
|
|
from sklearn.linear_model import LinearRegression |
|
|
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor |
|
|
from sklearn.preprocessing import StandardScaler, PolynomialFeatures |
|
|
from sklearn.decomposition import PCA |
|
|
from sklearn.cluster import KMeans |
|
|
from sklearn.metrics import mean_squared_error, r2_score |
|
|
|
|
|
|
|
|
import shap |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.set_page_config(page_title="AI Feature Universe Explorer — Advanced + SHAP", layout="wide") |
|
|
DATA_DIR = "/mnt/data" |
|
|
CSV_PATH = os.path.join(DATA_DIR, "flatfile_universe_advanced.csv") |
|
|
META_PATH = os.path.join(DATA_DIR, "feature_metadata_advanced.json") |
|
|
PDF_PATH = os.path.join(DATA_DIR, "annotated_bibliography.pdf") |
|
|
ENSEMBLE_ARTIFACT = os.path.join(DATA_DIR, "ensemble_models.joblib") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def generate_advanced_flatfile(n_rows=3000, random_seed=42, max_polynomial_new=60): |
|
|
""" |
|
|
Generates a large synthetic, physics-aligned dataset with many engineered features. |
|
|
Saves CSV and metadata JSON and a short annotated bibliography PDF (text). |
|
|
""" |
|
|
np.random.seed(random_seed) |
|
|
os.makedirs(DATA_DIR, exist_ok=True) |
|
|
|
|
|
natural_feats = [ |
|
|
"vibration_x","vibration_y","motor_current","rpm","bearing_temp","ambient_temp","lube_pressure","power_factor", |
|
|
"furnace_temp","tap_temp","slag_temp","offgas_co","offgas_co2","o2_probe_pct","c_feed_rate","arc_power","furnace_pressure","feed_time", |
|
|
"mold_temp","casting_speed","nozzle_pressure","cooling_water_temp","billet_length","chemical_C","chemical_Mn","chemical_Si","chemical_S", |
|
|
"roll_speed","motor_load","coolant_flow","exit_temp","strip_thickness","line_tension","roller_vibration", |
|
|
"lighting_intensity","surface_temp","image_entropy_proxy", |
|
|
"spectro_Fe","spectro_C","spectro_Mn","spectro_Si","time_since_last_sample", |
|
|
"batch_id_numeric","weight_input","weight_output","time_in_queue","conveyor_speed", |
|
|
"shell_temp","lining_thickness","water_flow","cooling_out_temp","heat_flux" |
|
|
] |
|
|
|
|
|
natural_feats = list(dict.fromkeys(natural_feats)) |
|
|
|
|
|
|
|
|
def sample_col(name, n): |
|
|
name_l = name.lower() |
|
|
if "furnace_temp" in name_l or name_l.endswith("_temp") or "tap_temp" in name_l: |
|
|
return np.random.normal(1550, 50, n) |
|
|
if name_l in ("tap_temp","mold_temp","shell_temp","cooling_out_temp","exit_temp"): |
|
|
return np.random.normal(200 if "mold" not in name_l else 1500, 30, n) |
|
|
if "offgas_co2" in name_l: |
|
|
return np.abs(np.random.normal(15,4,n)) |
|
|
if "offgas_co" in name_l: |
|
|
return np.abs(np.random.normal(20,5,n)) |
|
|
if "o2" in name_l: |
|
|
return np.clip(np.random.normal(5,1,n), 0.01, 60) |
|
|
if "arc_power" in name_l or "motor_load" in name_l: |
|
|
return np.abs(np.random.normal(600,120,n)) |
|
|
if "rpm" in name_l: |
|
|
return np.abs(np.random.normal(120,30,n)) |
|
|
if "vibration" in name_l: |
|
|
return np.abs(np.random.normal(0.4,0.15,n)) |
|
|
if "bearing_temp" in name_l: |
|
|
return np.random.normal(65,5,n) |
|
|
if "chemical" in name_l or "spectro" in name_l: |
|
|
return np.random.normal(0.7,0.15,n) |
|
|
if "weight" in name_l: |
|
|
return np.random.normal(1000,100,n) |
|
|
if "conveyor_speed" in name_l or "casting_speed" in name_l: |
|
|
return np.random.normal(2.5,0.6,n) |
|
|
if "power_factor" in name_l: |
|
|
return np.clip(np.random.normal(0.92,0.03,n),0.6,1.0) |
|
|
if "image_entropy_proxy" in name_l: |
|
|
return np.abs(np.random.normal(0.5,0.25,n)) |
|
|
if "batch_id" in name_l: |
|
|
return np.random.randint(1000,9999,n) |
|
|
if "time_since" in name_l or "time_in_queue" in name_l: |
|
|
return np.abs(np.random.normal(30,20,n)) |
|
|
if "heat_flux" in name_l: |
|
|
return np.abs(np.random.normal(1000,300,n)) |
|
|
return np.random.normal(0,1,n) |
|
|
|
|
|
|
|
|
df = pd.DataFrame({c: sample_col(c, n_rows) for c in natural_feats}) |
|
|
|
|
|
|
|
|
start = pd.Timestamp("2025-01-01T00:00:00") |
|
|
df["timestamp"] = pd.date_range(start, periods=n_rows, freq="T") |
|
|
df["cycle_minute"] = np.mod(np.arange(n_rows), 80) |
|
|
df["meta_plant_name"] = np.random.choice(["Rourkela","Jamshedpur","VSP","Bokaro","Kalinganagar","Salem"], n_rows) |
|
|
df["meta_country"] = "India" |
|
|
|
|
|
|
|
|
df["carbon_proxy"] = df["offgas_co"] / (df["offgas_co2"] + 1.0) |
|
|
df["oxygen_utilization"] = df["offgas_co2"] / (df["offgas_co"] + 1.0) |
|
|
df["power_density"] = df["arc_power"] / (df["weight_input"] + 1.0) |
|
|
df["energy_efficiency"] = df["furnace_temp"] / (df["arc_power"] + 1.0) |
|
|
df["slag_foaming_index"] = (df["slag_temp"] * df["offgas_co"]) / (df["o2_probe_pct"] + 1.0) |
|
|
df["yield_ratio"] = df["weight_output"] / (df["weight_input"] + 1e-9) |
|
|
|
|
|
|
|
|
rolling_cols = ["arc_power","furnace_temp","offgas_co","offgas_co2","motor_current","vibration_x","weight_input"] |
|
|
for rc in rolling_cols: |
|
|
if rc in df.columns: |
|
|
df[f"{rc}_roll_mean_3"] = df[rc].rolling(3, min_periods=1).mean() |
|
|
df[f"{rc}_roll_std_5"] = df[rc].rolling(5, min_periods=1).std().fillna(0) |
|
|
df[f"{rc}_lag1"] = df[rc].shift(1).fillna(method="bfill") |
|
|
df[f"{rc}_roc_1"] = df[rc].diff().fillna(0) |
|
|
|
|
|
|
|
|
df["arc_o2_interaction"] = df["arc_power"] * df["o2_probe_pct"] |
|
|
df["carbon_power_ratio"] = df["carbon_proxy"] / (df["arc_power"] + 1e-6) |
|
|
df["temp_power_sqrt"] = df["furnace_temp"] * np.sqrt(np.abs(df["arc_power"]) + 1e-6) |
|
|
|
|
|
|
|
|
numeric = df.select_dtypes(include=[np.number]).fillna(0) |
|
|
poly_source_cols = numeric.columns[:12].tolist() |
|
|
poly = PolynomialFeatures(degree=2, interaction_only=False, include_bias=False) |
|
|
poly_mat = poly.fit_transform(numeric[poly_source_cols]) |
|
|
poly_names = poly.get_feature_names_out(poly_source_cols) |
|
|
poly_df = pd.DataFrame(poly_mat, columns=[f"poly__{n}" for n in poly_names], index=df.index) |
|
|
|
|
|
keep_poly = [c for c in poly_df.columns if c.replace("poly__","") not in poly_source_cols] |
|
|
if len(keep_poly) > 0: |
|
|
poly_df = poly_df[keep_poly].iloc[:, :max_polynomial_new] |
|
|
else: |
|
|
poly_df = poly_df.iloc[:, :0] |
|
|
df = pd.concat([df, poly_df], axis=1) |
|
|
|
|
|
|
|
|
scaler = StandardScaler() |
|
|
scaled = scaler.fit_transform(numeric) |
|
|
pca = PCA(n_components=6, random_state=42) |
|
|
pca_cols = pca.fit_transform(scaled) |
|
|
for i in range(pca_cols.shape[1]): |
|
|
df[f"pca_{i+1}"] = pca_cols[:, i] |
|
|
|
|
|
|
|
|
kmeans = KMeans(n_clusters=6, random_state=42, n_init=10) |
|
|
df["operating_mode"] = kmeans.fit_predict(scaled) |
|
|
|
|
|
|
|
|
|
|
|
surrogate_df = df.copy() |
|
|
surrogate_df["furnace_temp_next"] = surrogate_df["furnace_temp"].shift(-1).fillna(method="ffill") |
|
|
features_for_surrogate = [c for c in ["furnace_temp","arc_power","o2_probe_pct","offgas_co","offgas_co2"] if c in df.columns] |
|
|
if len(features_for_surrogate) >= 2: |
|
|
X = surrogate_df[features_for_surrogate].fillna(0) |
|
|
y = surrogate_df["furnace_temp_next"] |
|
|
from sklearn.ensemble import RandomForestRegressor |
|
|
rf = RandomForestRegressor(n_estimators=50, random_state=42, n_jobs=-1) |
|
|
rf.fit(X, y) |
|
|
df["pred_temp_30s"] = rf.predict(X) |
|
|
else: |
|
|
df["pred_temp_30s"] = df["furnace_temp"] |
|
|
|
|
|
|
|
|
if all(c in df.columns for c in ["offgas_co","offgas_co2","o2_probe_pct"]): |
|
|
X2 = df[["offgas_co","offgas_co2","o2_probe_pct"]].fillna(0) |
|
|
rf2 = RandomForestRegressor(n_estimators=50, random_state=1, n_jobs=-1) |
|
|
rf2.fit(X2, df["carbon_proxy"]) |
|
|
df["pred_carbon_5min"] = rf2.predict(X2) |
|
|
else: |
|
|
df["pred_carbon_5min"] = df["carbon_proxy"] |
|
|
|
|
|
|
|
|
df["refractory_limit_flag"] = (df["lining_thickness"] < 140).astype(int) |
|
|
df["max_allowed_power_delta"] = np.clip(df["arc_power"].diff().abs().fillna(0), 0, 2000) |
|
|
|
|
|
|
|
|
df["ARC_ON"] = ((df["arc_power"] > df["arc_power"].median()) & (df["carbon_proxy"] < 1.0)).astype(int) |
|
|
df["prediction_confidence"] = np.clip(np.random.beta(2,5, n_rows), 0.05, 0.99) |
|
|
|
|
|
|
|
|
df.replace([np.inf, -np.inf], np.nan, inplace=True) |
|
|
df.fillna(method="bfill", inplace=True) |
|
|
df.fillna(0, inplace=True) |
|
|
|
|
|
|
|
|
df.to_csv(CSV_PATH, index=False) |
|
|
|
|
|
meta = [] |
|
|
for col in df.columns: |
|
|
if col in natural_feats: |
|
|
source = "natural" |
|
|
elif col.startswith("poly__") or col.startswith("pca_") or col in ["operating_mode"]: |
|
|
source = "advanced_synthetic" |
|
|
else: |
|
|
source = "synthetic" |
|
|
meta.append({ |
|
|
"feature_name": col, |
|
|
"source_type": source, |
|
|
"linked_use_cases": ["All" if source!="natural" else "Mapped"], |
|
|
"units": "-", |
|
|
"formula": "see generator logic", |
|
|
"remarks": "auto-generated or simulated" |
|
|
}) |
|
|
with open(META_PATH, "w") as f: |
|
|
json.dump(meta, f, indent=2) |
|
|
|
|
|
|
|
|
try: |
|
|
from fpdf import FPDF |
|
|
pdf = FPDF('P','mm','A4') |
|
|
pdf.add_page() |
|
|
pdf.set_font("Helvetica","B",14) |
|
|
pdf.cell(0,8,"Annotated Bibliography - Metallurgical AI (Selected Papers)", ln=True) |
|
|
pdf.ln(2) |
|
|
pdf.set_font("Helvetica","",10) |
|
|
pdf.cell(0,6,"Generated: " + datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC"), ln=True) |
|
|
pdf.ln(4) |
|
|
bib_items = [ |
|
|
("A Survey of Data-Driven Soft Sensing in Ironmaking Systems","Yan et al. (2024)","Review of soft-sensors; supports gas proxies, lags, PCA."), |
|
|
("Optimisation of Oxygen Blowing Process using RL","Ojeda Roldan et al. (2022)","RL for oxygen control; motivates surrogate predicted states & safety indices."), |
|
|
("Analyzing the Energy Efficiency of Electric Arc Furnace","Zhuo et al. (2024)","Energy KPIs (kWh/t) motivate power_density & energy_efficiency features."), |
|
|
("BOF/Endpoint prediction techniques","Springer (2024)","Endpoint prediction; supports temporal lags and cycle encoding."), |
|
|
("Dynamic EAF modeling & slag foaming","MacRosty et al.","Physics priors for slag_foaming_index and refractory health modeling.") |
|
|
] |
|
|
for title, auth, note in bib_items: |
|
|
pdf.set_font("Helvetica","B",11) |
|
|
pdf.multi_cell(0,6, f"{title} — {auth}") |
|
|
pdf.set_font("Helvetica","",10) |
|
|
pdf.multi_cell(0,5, f"Notes: {note}") |
|
|
pdf.ln(2) |
|
|
pdf.output(PDF_PATH) |
|
|
except Exception as e: |
|
|
|
|
|
with open(PDF_PATH.replace(".pdf",".txt"), "w") as tf: |
|
|
tf.write("Annotated bibliography generated. Install fpdf for PDF output.\n") |
|
|
return CSV_PATH, META_PATH, PDF_PATH |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if not os.path.exists(CSV_PATH) or not os.path.exists(META_PATH): |
|
|
with st.spinner("Generating advanced feature universe (this may take ~20-60s)..."): |
|
|
CSV_PATH, META_PATH, PDF_PATH = generate_advanced_flatfile(n_rows=3000, random_seed=42, max_polynomial_new=80) |
|
|
st.success(f"Generated dataset and metadata: {CSV_PATH}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@st.cache_data |
|
|
def load_data(csv_path=CSV_PATH, meta_path=META_PATH): |
|
|
df_local = pd.read_csv(csv_path) |
|
|
with open(meta_path, "r") as f: |
|
|
meta_local = json.load(f) |
|
|
return df_local, pd.DataFrame(meta_local) |
|
|
|
|
|
df, meta_df = load_data() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.sidebar.title("🔎 Feature Explorer - Advanced + SHAP") |
|
|
feat_types = sorted(meta_df["source_type"].unique().tolist()) |
|
|
selected_types = st.sidebar.multiselect("Feature type", feat_types, default=feat_types) |
|
|
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.title("Steel Authority of India Limited (SHAP-enabled)") |
|
|
tabs = st.tabs([ |
|
|
"Features", |
|
|
"Visualize", |
|
|
"Correlations", |
|
|
"Stats", |
|
|
"Ensemble + SHAP", |
|
|
"Target & Business Impact", |
|
|
"Bibliography" |
|
|
]) |
|
|
|
|
|
|
|
|
with tabs[0]: |
|
|
st.subheader("Feature metadata") |
|
|
filtered_meta = meta_df[meta_df["source_type"].isin(selected_types)] |
|
|
st.dataframe(filtered_meta[["feature_name","source_type","formula","remarks"]].rename(columns={"feature_name":"Feature"}), height=400) |
|
|
st.markdown(f"Total features loaded: **{df.shape[1]}** | Rows: **{df.shape[0]}**") |
|
|
|
|
|
|
|
|
with tabs[1]: |
|
|
st.subheader("Feature visualization") |
|
|
col = st.selectbox("Choose numeric feature", numeric_cols, index=0) |
|
|
bins = st.slider("Histogram bins", 10, 200, 50) |
|
|
fig, ax = plt.subplots(figsize=(8,4)) |
|
|
sns.histplot(df[col], bins=bins, kde=True, ax=ax) |
|
|
ax.set_title(col) |
|
|
st.pyplot(fig) |
|
|
st.write(df[col].describe().to_frame().T) |
|
|
|
|
|
|
|
|
with tabs[2]: |
|
|
st.subheader("Correlation explorer") |
|
|
default_corr = numeric_cols[:20] if len(numeric_cols) >= 20 else numeric_cols |
|
|
corr_sel = st.multiselect("Select features (min 2)", numeric_cols, default=default_corr) |
|
|
if len(corr_sel) >= 2: |
|
|
corr = df[corr_sel].corr() |
|
|
fig, ax = plt.subplots(figsize=(10,8)) |
|
|
sns.heatmap(corr, cmap="coolwarm", center=0, ax=ax) |
|
|
st.pyplot(fig) |
|
|
else: |
|
|
st.info("Choose at least 2 numeric features to compute correlation.") |
|
|
|
|
|
|
|
|
with tabs[3]: |
|
|
st.subheader("Summary statistics (numeric features)") |
|
|
st.dataframe(df.describe().T.style.format("{:.3f}"), height=500) |
|
|
|
|
|
|
|
|
with tabs[4]: |
|
|
st.subheader("Ensemble modeling sandbox (fast) + SHAP explainability") |
|
|
|
|
|
target = st.selectbox("Target variable", numeric_cols, index=numeric_cols.index("furnace_temp") if "furnace_temp" in numeric_cols else 0) |
|
|
default_features = [c for c in numeric_cols if c != target][:50] |
|
|
features = st.multiselect("Model input features (select many; start with defaults)", numeric_cols, default=default_features) |
|
|
sample_size = st.slider("Sample rows to use for training (speed vs fidelity)", min_value=200, max_value=min(4000, df.shape[0]), value=1000, step=100) |
|
|
train_button = st.button("Train ensemble & compute SHAP (recommended sample only)") |
|
|
|
|
|
if train_button: |
|
|
with st.spinner("Preparing data and training ensemble..."): |
|
|
sub_df = df[features + [target]].sample(n=sample_size, random_state=42) |
|
|
X = sub_df[features].fillna(0) |
|
|
y = sub_df[target].fillna(0) |
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
|
|
|
models = { |
|
|
"Linear": LinearRegression(), |
|
|
"RandomForest": RandomForestRegressor(n_estimators=150, random_state=42, n_jobs=-1), |
|
|
"GradientBoosting": GradientBoostingRegressor(n_estimators=150, random_state=42), |
|
|
"ExtraTrees": ExtraTreesRegressor(n_estimators=150, random_state=42, n_jobs=-1) |
|
|
} |
|
|
preds = {} |
|
|
results = [] |
|
|
for name, m in models.items(): |
|
|
m.fit(X_train, y_train) |
|
|
p = m.predict(X_test) |
|
|
preds[name] = p |
|
|
results.append({"Model": name, "R2": r2_score(y_test, p), "RMSE": float(np.sqrt(mean_squared_error(y_test, p)))}) |
|
|
|
|
|
ensemble_pred = np.column_stack(list(preds.values())).mean(axis=1) |
|
|
results.append({"Model": "EnsembleAvg", "R2": r2_score(y_test, ensemble_pred), "RMSE": float(np.sqrt(mean_squared_error(y_test, ensemble_pred)))}) |
|
|
st.dataframe(pd.DataFrame(results).set_index("Model").round(4)) |
|
|
|
|
|
|
|
|
fig, ax = plt.subplots(figsize=(8,4)) |
|
|
ax.scatter(y_test, ensemble_pred, alpha=0.5) |
|
|
ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "r--") |
|
|
ax.set_xlabel("Actual"); ax.set_ylabel("Predicted (Ensemble)") |
|
|
st.pyplot(fig) |
|
|
|
|
|
|
|
|
joblib.dump(models, ENSEMBLE_ARTIFACT) |
|
|
st.success(f"Saved ensemble models to {ENSEMBLE_ARTIFACT}") |
|
|
|
|
|
|
|
|
st.markdown("### SHAP Explainability — pick a model to explain (Tree models recommended)") |
|
|
explain_model_name = st.selectbox("Model to explain", list(models.keys()), index= list(models.keys()).index("RandomForest") if "RandomForest" in models else 0) |
|
|
explainer_sample = st.slider("Number of rows to use for SHAP explanation (memory heavy)", 50, min(1500, sample_size), value=300, step=50) |
|
|
|
|
|
|
|
|
model_to_explain = models[explain_model_name] |
|
|
X_shap = X_test.copy() |
|
|
if explainer_sample < X_shap.shape[0]: |
|
|
X_shap_for = X_shap.sample(n=explainer_sample, random_state=42) |
|
|
else: |
|
|
X_shap_for = X_shap |
|
|
|
|
|
with st.spinner("Computing SHAP values (this may take a while for large SHAP sample)..."): |
|
|
try: |
|
|
if hasattr(model_to_explain, "predict") and (explain_model_name in ["RandomForest","ExtraTrees","GradientBoosting"]): |
|
|
explainer = shap.TreeExplainer(model_to_explain) |
|
|
shap_values = explainer.shap_values(X_shap_for) |
|
|
|
|
|
import warnings |
|
|
warnings.filterwarnings("ignore", category=UserWarning, module="matplotlib") |
|
|
fig_shap = plt.figure(figsize=(8,6)) |
|
|
shap.summary_plot(shap_values, X_shap_for, show=False) |
|
|
st.pyplot(fig_shap) |
|
|
else: |
|
|
|
|
|
explainer = shap.KernelExplainer(model_to_explain.predict, shap.sample(X_train, 100)) |
|
|
shap_values = explainer.shap_values(X_shap_for, nsamples=100) |
|
|
fig_shap = plt.figure(figsize=(8,6)) |
|
|
shap.summary_plot(shap_values, X_shap_for, show=False) |
|
|
st.pyplot(fig_shap) |
|
|
st.success("SHAP summary plotted.") |
|
|
except Exception as e: |
|
|
st.error(f"SHAP failed: {e}") |
|
|
|
|
|
st.markdown("#### Explain a single prediction (waterfall):") |
|
|
idx_choice = st.number_input("Row index (0..n_test-1)", min_value=0, max_value=X_shap.shape[0]-1, value=0) |
|
|
try: |
|
|
row = X_shap_for.iloc[[idx_choice]] |
|
|
if explain_model_name in ["RandomForest","ExtraTrees","GradientBoosting"]: |
|
|
expl = shap.TreeExplainer(model_to_explain) |
|
|
shap_vals_row = expl.shap_values(row) |
|
|
exp_val = expl.expected_value |
|
|
shap_vals = shap_vals_row |
|
|
|
|
|
|
|
|
if isinstance(exp_val, (list, np.ndarray)) and not np.isscalar(exp_val): |
|
|
exp_val = exp_val[0] |
|
|
if isinstance(shap_vals, list): |
|
|
shap_vals = shap_vals[0] |
|
|
|
|
|
exp_val = expl.expected_value |
|
|
shap_vals = shap_vals_row |
|
|
|
|
|
|
|
|
if isinstance(exp_val, (list, np.ndarray)) and not np.isscalar(exp_val): |
|
|
exp_val = exp_val[0] |
|
|
if isinstance(shap_vals, list): |
|
|
shap_vals = shap_vals[0] |
|
|
|
|
|
|
|
|
try: |
|
|
explanation = shap.Explanation( |
|
|
values=shap_vals[0], |
|
|
base_values=exp_val, |
|
|
data=row.iloc[0], |
|
|
feature_names=row.columns.tolist() |
|
|
) |
|
|
plot_obj = shap.plots.waterfall(explanation, show=False) |
|
|
|
|
|
|
|
|
import matplotlib.pyplot as plt |
|
|
if hasattr(plot_obj, "figure"): |
|
|
fig2 = plot_obj.figure |
|
|
else: |
|
|
fig2 = plt.gcf() |
|
|
|
|
|
st.pyplot(fig2) |
|
|
except Exception as e: |
|
|
st.warning(f"Waterfall plotting failed gracefully: {e}") |
|
|
|
|
|
|
|
|
else: |
|
|
st.info("Per-instance waterfall not available for this model type in fallback.") |
|
|
except Exception as e: |
|
|
st.warning(f"Could not plot waterfall: {e}") |
|
|
|
|
|
|
|
|
|
|
|
with tabs[5]: |
|
|
st.subheader("🎯 Recommended Target Variables by Use Case") |
|
|
st.markdown("Each use case maps to a practical target variable that drives measurable business impact.") |
|
|
|
|
|
target_table = pd.DataFrame([ |
|
|
["Predictive Maintenance (Mills, Motors, Compressors)", "bearing_temp / time_to_failure", "Rises before mechanical failure; early warning", "₹10–30 L per asset/year"], |
|
|
["Blast Furnace / EAF Data Intelligence", "furnace_temp / tap_temp", "Central control variable, linked to energy and quality", "₹20–60 L/year"], |
|
|
["Casting Quality Optimization", "defect_probability / solidification_rate", "Determines billet quality; control nozzle & cooling", "₹50 L/year yield gain"], |
|
|
["Rolling Mill Energy Optimization", "energy_per_ton / exit_temp", "Directly tied to energy efficiency", "₹5–10 L/year per kWh/t"], |
|
|
["Surface Defect Detection (Vision AI)", "defect_probability", "Quality metric from CNN", "1–2 % yield gain"], |
|
|
["Material Composition & Alloy Mix AI", "deviation_from_target_grade", "Predict deviation, suggest corrections", "₹20 L/year raw material savings"], |
|
|
["Inventory & Yield Optimization", "yield_ratio (output/input)", "Linked to WIP and process yield", "₹1 Cr+/year"], |
|
|
["Refractory & Cooling Loss Prediction", "lining_thickness / heat_loss_rate", "Predict wear for planned maintenance", "₹40 L/year downtime savings"]], columns=["Use Case", "Target Variable", "Why It’s Ideal", "Business Leverage"]) |
|
|
|
|
|
st.dataframe(target_table, use_container_width=True) |
|
|
|
|
|
st.markdown("---") |
|
|
st.subheader(" Business Framing for Clients") |
|
|
st.markdown("These metrics show approximate annual benefits from small process improvements.") |
|
|
|
|
|
business_table = pd.DataFrame([ |
|
|
["Energy consumption", "400 kWh/ton", "₹35–60 L"], |
|
|
["Electrode wear", "1.8 kg/ton", "₹10 L"], |
|
|
["Refractory wear", "3 mm/heat", "₹15 L"], |
|
|
["Oxygen usage", "40 Nm³/ton", "₹20 L"], |
|
|
["Yield loss", "2 %", "₹50 L – ₹1 Cr"], |
|
|
], columns=["Metric", "Typical Value (EAF India)", "5 % Improvement → Annual ₹ Value"]) |
|
|
|
|
|
st.dataframe(business_table, use_container_width=True) |
|
|
st.info("These numbers are indicative averages; actual benefits depend on plant capacity and process efficiency.") |
|
|
|
|
|
|
|
|
with tabs[6]: |
|
|
st.subheader("📚 Annotated Bibliography & Feature Justification") |
|
|
st.markdown(""" |
|
|
This section summarizes published research supporting the feature design and modeling choices. |
|
|
""") |
|
|
|
|
|
bib_data = [ |
|
|
("A Survey of Data-Driven Soft Sensing in Ironmaking Systems", "Yan et al. (2024)", "Supports gas proxies, lags, PCA for off-gas and temperature correlation."), |
|
|
("Optimisation of Oxygen Blowing Process using RL", "Ojeda Roldan et al. (2022)", "Reinforcement learning for oxygen control; motivates surrogate predicted states & safety indices."), |
|
|
("Analyzing the Energy Efficiency of Electric Arc Furnace", "Zhuo et al. (2024)", "Energy KPIs (kWh/t) motivate power_density & energy_efficiency features."), |
|
|
("BOF/Endpoint Prediction Techniques", "Springer (2024)", "Endpoint prediction; supports temporal lags and cycle encoding."), |
|
|
("Dynamic EAF Modeling & Slag Foaming", "MacRosty et al.", "Physics priors for slag_foaming_index and refractory health modeling."), |
|
|
] |
|
|
|
|
|
bib_df = pd.DataFrame(bib_data, columns=["Paper Title", "Authors / Year", "Relevance to Feature Engineering"]) |
|
|
st.dataframe(bib_df, use_container_width=True) |
|
|
|
|
|
st.markdown(""" |
|
|
**Feature-to-Research Mapping Summary:** |
|
|
- Gas probes & soft-sensing → `carbon_proxy`, `oxygen_utilization` |
|
|
- Power & energy proxies → `power_density`, `energy_efficiency` |
|
|
- Temporal features → rolling means, lags, cycle progress indicators |
|
|
- Surrogate features → `pred_temp_30s`, `pred_carbon_5min` |
|
|
- PCA / clustering → operating mode compression |
|
|
""") |
|
|
|
|
|
|
|
|
|
|
|
st.markdown("---") |
|
|
st.markdown("**Notes:** This dataset is synthetic and for demo/prototyping. Real plant integration requires NDA, data on-boarding, sensor mapping, and plant safety checks before any control actions.") |
|
|
|