|
|
|
|
|
|
|
|
import os |
|
|
import json |
|
|
import time |
|
|
from datetime import datetime |
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
import streamlit as st |
|
|
import matplotlib.pyplot as plt |
|
|
import seaborn as sns |
|
|
import joblib |
|
|
import zipfile |
|
|
import io |
|
|
|
|
|
|
|
|
from sklearn.model_selection import train_test_split |
|
|
from sklearn.linear_model import LinearRegression |
|
|
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor |
|
|
from sklearn.preprocessing import StandardScaler, PolynomialFeatures |
|
|
from sklearn.decomposition import PCA |
|
|
from sklearn.cluster import KMeans |
|
|
from sklearn.metrics import mean_squared_error, r2_score |
|
|
|
|
|
|
|
|
import shap |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.set_page_config(page_title="Steel Authority of India Limited (MODEX)", layout="wide") |
|
|
|
|
|
|
|
|
BASE_DIR = "./" |
|
|
LOG_DIR = os.path.join(BASE_DIR, "logs") |
|
|
os.makedirs(LOG_DIR, exist_ok=True) |
|
|
|
|
|
|
|
|
run_id = datetime.now().strftime("%Y%m%d_%H%M%S") |
|
|
RUN_DIR = os.path.join(LOG_DIR, f"run_{run_id}") |
|
|
os.makedirs(RUN_DIR, exist_ok=True) |
|
|
|
|
|
|
|
|
CSV_PATH = os.path.join(RUN_DIR, "flatfile_universe_advanced.csv") |
|
|
META_PATH = os.path.join(RUN_DIR, "feature_metadata_advanced.json") |
|
|
ENSEMBLE_ARTIFACT = os.path.join(RUN_DIR, "ensemble_models.joblib") |
|
|
LOG_PATH = os.path.join(RUN_DIR, "run.log") |
|
|
|
|
|
def log(msg: str): |
|
|
with open(LOG_PATH, "a", encoding="utf-8") as f: |
|
|
f.write(f"[{datetime.now():%Y-%m-%d %H:%M:%S}] {msg}\n") |
|
|
print(msg) |
|
|
|
|
|
log(f" Streamlit session started | run_id={run_id}") |
|
|
log(f"Run directory: {RUN_DIR}") |
|
|
|
|
|
|
|
|
|
|
|
if os.path.exists("/data"): |
|
|
st.sidebar.success(f" Using persistent storage | Run directory: {RUN_DIR}") |
|
|
else: |
|
|
st.sidebar.warning(f" Using ephemeral storage | Run directory: {RUN_DIR}. Data will be lost on rebuild.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def generate_advanced_flatfile( |
|
|
n_rows=3000, |
|
|
random_seed=42, |
|
|
max_polynomial_new=60, |
|
|
global_variance_multiplier=1.0, |
|
|
variance_overrides=None, |
|
|
): |
|
|
""" |
|
|
Generates a large synthetic, physics-aligned dataset with many engineered features. |
|
|
Allows control of variability per feature (through variance_overrides) or globally |
|
|
(via global_variance_multiplier). |
|
|
|
|
|
Args: |
|
|
n_rows: number of samples |
|
|
random_seed: RNG seed |
|
|
max_polynomial_new: limit on number of polynomial expansion features |
|
|
global_variance_multiplier: multiplier applied to all default stddevs |
|
|
variance_overrides: dict mapping feature name or substring → stddev multiplier |
|
|
""" |
|
|
np.random.seed(random_seed) |
|
|
os.makedirs(RUN_DIR, exist_ok=True) |
|
|
if variance_overrides is None: |
|
|
variance_overrides = {} |
|
|
|
|
|
|
|
|
natural_feats = [ |
|
|
"vibration_x","vibration_y","motor_current","rpm","bearing_temp","ambient_temp","lube_pressure","power_factor", |
|
|
"furnace_temp","tap_temp","slag_temp","offgas_co","offgas_co2","o2_probe_pct","c_feed_rate","arc_power","furnace_pressure","feed_time", |
|
|
"mold_temp","casting_speed","nozzle_pressure","cooling_water_temp","billet_length","chemical_C","chemical_Mn","chemical_Si","chemical_S", |
|
|
"roll_speed","motor_load","coolant_flow","exit_temp","strip_thickness","line_tension","roller_vibration", |
|
|
"lighting_intensity","surface_temp","image_entropy_proxy", |
|
|
"spectro_Fe","spectro_C","spectro_Mn","spectro_Si","time_since_last_sample", |
|
|
"batch_id_numeric","weight_input","weight_output","time_in_queue","conveyor_speed", |
|
|
"shell_temp","lining_thickness","water_flow","cooling_out_temp","heat_flux" |
|
|
] |
|
|
natural_feats = list(dict.fromkeys(natural_feats)) |
|
|
|
|
|
|
|
|
def effective_sd(feature_name, base_sd): |
|
|
|
|
|
if feature_name in variance_overrides: |
|
|
return float(variance_overrides[feature_name]) |
|
|
|
|
|
for key, val in variance_overrides.items(): |
|
|
if key in feature_name: |
|
|
return float(val) |
|
|
|
|
|
return float(base_sd) * float(global_variance_multiplier) |
|
|
|
|
|
|
|
|
def sample_col(name, n): |
|
|
name_l = name.lower() |
|
|
if "furnace_temp" in name_l or name_l.endswith("_temp") or "tap_temp" in name_l: |
|
|
sd = effective_sd("furnace_temp", 50) |
|
|
return np.random.normal(1550, sd, n) |
|
|
if name_l in ("tap_temp","mold_temp","shell_temp","cooling_out_temp","exit_temp"): |
|
|
sd = effective_sd(name_l, 30) |
|
|
return np.random.normal(200 if "mold" not in name_l else 1500, sd, n) |
|
|
if "offgas_co2" in name_l: |
|
|
sd = effective_sd("offgas_co2", 4) |
|
|
return np.abs(np.random.normal(15, sd, n)) |
|
|
if "offgas_co" in name_l: |
|
|
sd = effective_sd("offgas_co", 5) |
|
|
return np.abs(np.random.normal(20, sd, n)) |
|
|
if "o2" in name_l: |
|
|
sd = effective_sd("o2_probe_pct", 1) |
|
|
return np.clip(np.random.normal(5, sd, n), 0.01, 60) |
|
|
if "arc_power" in name_l or "motor_load" in name_l: |
|
|
sd = effective_sd("arc_power", 120) |
|
|
return np.abs(np.random.normal(600, sd, n)) |
|
|
if "rpm" in name_l: |
|
|
sd = effective_sd("rpm", 30) |
|
|
return np.abs(np.random.normal(120, sd, n)) |
|
|
if "vibration" in name_l: |
|
|
sd = effective_sd("vibration", 0.15) |
|
|
return np.abs(np.random.normal(0.4, sd, n)) |
|
|
if "bearing_temp" in name_l: |
|
|
sd = effective_sd("bearing_temp", 5) |
|
|
return np.random.normal(65, sd, n) |
|
|
if "chemical" in name_l or "spectro" in name_l: |
|
|
sd = effective_sd("chemical", 0.15) |
|
|
return np.random.normal(0.7, sd, n) |
|
|
if "weight" in name_l: |
|
|
sd = effective_sd("weight", 100) |
|
|
return np.random.normal(1000, sd, n) |
|
|
if "conveyor_speed" in name_l or "casting_speed" in name_l: |
|
|
sd = effective_sd("casting_speed", 0.6) |
|
|
return np.random.normal(2.5, sd, n) |
|
|
if "power_factor" in name_l: |
|
|
sd = effective_sd("power_factor", 0.03) |
|
|
return np.clip(np.random.normal(0.92, sd, n), 0.6, 1.0) |
|
|
if "image_entropy_proxy" in name_l: |
|
|
sd = effective_sd("image_entropy_proxy", 0.25) |
|
|
return np.abs(np.random.normal(0.5, sd, n)) |
|
|
if "batch_id" in name_l: |
|
|
return np.random.randint(1000,9999,n) |
|
|
if "time_since" in name_l or "time_in_queue" in name_l: |
|
|
sd = effective_sd("time_since", 20) |
|
|
return np.abs(np.random.normal(30, sd, n)) |
|
|
if "heat_flux" in name_l: |
|
|
sd = effective_sd("heat_flux", 300) |
|
|
return np.abs(np.random.normal(1000, sd, n)) |
|
|
return np.random.normal(0, effective_sd(name_l, 1), n) |
|
|
|
|
|
|
|
|
df = pd.DataFrame({c: sample_col(c, n_rows) for c in natural_feats}) |
|
|
|
|
|
|
|
|
start = pd.Timestamp("2025-01-01T00:00:00") |
|
|
df["timestamp"] = pd.date_range(start, periods=n_rows, freq="min") |
|
|
df["cycle_minute"] = np.mod(np.arange(n_rows), 80) |
|
|
df["meta_plant_name"] = np.random.choice(["Rourkela","Bhilai","Durgapur","Bokaro","Burnpur","Salem"], n_rows) |
|
|
df["meta_country"] = "India" |
|
|
|
|
|
|
|
|
df["carbon_proxy"] = df["offgas_co"] / (df["offgas_co2"] + 1.0) |
|
|
df["oxygen_utilization"] = df["offgas_co2"] / (df["offgas_co"] + 1.0) |
|
|
df["power_density"] = df["arc_power"] / (df["weight_input"] + 1.0) |
|
|
df["energy_efficiency"] = df["furnace_temp"] / (df["arc_power"] + 1.0) |
|
|
df["slag_foaming_index"] = (df["slag_temp"] * df["offgas_co"]) / (df["o2_probe_pct"] + 1.0) |
|
|
df["yield_ratio"] = df["weight_output"] / (df["weight_input"] + 1e-9) |
|
|
|
|
|
|
|
|
rolling_cols = ["arc_power","furnace_temp","offgas_co","offgas_co2","motor_current","vibration_x","weight_input"] |
|
|
for rc in rolling_cols: |
|
|
if rc in df.columns: |
|
|
df[f"{rc}_roll_mean_3"] = df[rc].rolling(3, min_periods=1).mean() |
|
|
df[f"{rc}_roll_std_5"] = df[rc].rolling(5, min_periods=1).std().fillna(0) |
|
|
df[f"{rc}_lag1"] = df[rc].shift(1).bfill() |
|
|
df[f"{rc}_roc_1"] = df[rc].diff().fillna(0) |
|
|
|
|
|
|
|
|
df["arc_o2_interaction"] = df["arc_power"] * df["o2_probe_pct"] |
|
|
df["carbon_power_ratio"] = df["carbon_proxy"] / (df["arc_power"] + 1e-6) |
|
|
df["temp_power_sqrt"] = df["furnace_temp"] * np.sqrt(np.abs(df["arc_power"]) + 1e-6) |
|
|
|
|
|
|
|
|
numeric = df.select_dtypes(include=[np.number]).fillna(0) |
|
|
poly_source_cols = numeric.columns[:12].tolist() |
|
|
poly = PolynomialFeatures(degree=2, interaction_only=False, include_bias=False) |
|
|
poly_mat = poly.fit_transform(numeric[poly_source_cols]) |
|
|
poly_names = poly.get_feature_names_out(poly_source_cols) |
|
|
poly_df = pd.DataFrame(poly_mat, columns=[f"poly__{n}" for n in poly_names], index=df.index) |
|
|
keep_poly = [c for c in poly_df.columns if c.replace("poly__","") not in poly_source_cols] |
|
|
poly_df = poly_df[keep_poly].iloc[:, :max_polynomial_new] if len(keep_poly) > 0 else poly_df.iloc[:, :0] |
|
|
df = pd.concat([df, poly_df], axis=1) |
|
|
|
|
|
|
|
|
scaler = StandardScaler() |
|
|
scaled = scaler.fit_transform(numeric) |
|
|
pca = PCA(n_components=6, random_state=42) |
|
|
pca_cols = pca.fit_transform(scaled) |
|
|
for i in range(pca_cols.shape[1]): |
|
|
df[f"pca_{i+1}"] = pca_cols[:, i] |
|
|
|
|
|
|
|
|
kmeans = KMeans(n_clusters=6, random_state=42, n_init=10) |
|
|
df["operating_mode"] = kmeans.fit_predict(scaled) |
|
|
|
|
|
|
|
|
surrogate_df = df.copy() |
|
|
surrogate_df["furnace_temp_next"] = surrogate_df["furnace_temp"].shift(-1).ffill() |
|
|
features_for_surrogate = [c for c in ["furnace_temp","arc_power","o2_probe_pct","offgas_co","offgas_co2"] if c in df.columns] |
|
|
if len(features_for_surrogate) >= 2: |
|
|
X = surrogate_df[features_for_surrogate].fillna(0) |
|
|
y = surrogate_df["furnace_temp_next"] |
|
|
rf = RandomForestRegressor(n_estimators=50, random_state=42, n_jobs=-1) |
|
|
rf.fit(X, y) |
|
|
df["pred_temp_30s"] = rf.predict(X) |
|
|
else: |
|
|
df["pred_temp_30s"] = df["furnace_temp"] |
|
|
|
|
|
if all(c in df.columns for c in ["offgas_co","offgas_co2","o2_probe_pct"]): |
|
|
X2 = df[["offgas_co","offgas_co2","o2_probe_pct"]].fillna(0) |
|
|
rf2 = RandomForestRegressor(n_estimators=50, random_state=1, n_jobs=-1) |
|
|
rf2.fit(X2, df["carbon_proxy"]) |
|
|
df["pred_carbon_5min"] = rf2.predict(X2) |
|
|
else: |
|
|
df["pred_carbon_5min"] = df["carbon_proxy"] |
|
|
|
|
|
|
|
|
df["refractory_limit_flag"] = (df["lining_thickness"] < 140).astype(int) |
|
|
df["max_allowed_power_delta"] = np.clip(df["arc_power"].diff().abs().fillna(0), 0, 2000) |
|
|
|
|
|
|
|
|
df["ARC_ON"] = ((df["arc_power"] > df["arc_power"].median()) & (df["carbon_proxy"] < 1.0)).astype(int) |
|
|
df["prediction_confidence"] = np.clip(np.random.beta(2,5, n_rows), 0.05, 0.99) |
|
|
|
|
|
|
|
|
df.replace([np.inf, -np.inf], np.nan, inplace=True) |
|
|
df.fillna(method="bfill", inplace=True) |
|
|
df.fillna(0, inplace=True) |
|
|
|
|
|
|
|
|
df.to_csv(CSV_PATH, index=False) |
|
|
meta = [] |
|
|
for col in df.columns: |
|
|
if col in natural_feats: |
|
|
source = "natural" |
|
|
elif col.startswith("poly__") or col.startswith("pca_") or col in ["operating_mode"]: |
|
|
source = "advanced_synthetic" |
|
|
else: |
|
|
source = "synthetic" |
|
|
meta.append({ |
|
|
"feature_name": col, |
|
|
"source_type": source, |
|
|
"linked_use_cases": ["All" if source!="natural" else "Mapped"], |
|
|
"units": "-", |
|
|
"formula": "see generator logic", |
|
|
"remarks": "auto-generated or simulated" |
|
|
}) |
|
|
with open(META_PATH, "w") as f: |
|
|
json.dump(meta, f, indent=2) |
|
|
|
|
|
PDF_PATH = None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return CSV_PATH, META_PATH, PDF_PATH |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if not os.path.exists(CSV_PATH) or not os.path.exists(META_PATH): |
|
|
with st.spinner("Generating synthetic features (this may take ~20-60s)..."): |
|
|
CSV_PATH, META_PATH, PDF_PATH = generate_advanced_flatfile(n_rows=3000, random_seed=42, max_polynomial_new=80) |
|
|
st.success(f"Generated dataset and metadata: {CSV_PATH}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@st.cache_data |
|
|
def load_data(csv_path=CSV_PATH, meta_path=META_PATH): |
|
|
df_local = pd.read_csv(csv_path) |
|
|
with open(meta_path, "r") as f: |
|
|
meta_local = json.load(f) |
|
|
return df_local, pd.DataFrame(meta_local) |
|
|
|
|
|
df, meta_df = load_data() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.sidebar.title("Feature Explorer - Advanced + SHAP") |
|
|
feat_types = sorted(meta_df["source_type"].unique().tolist()) |
|
|
selected_types = st.sidebar.multiselect("Feature type", feat_types, default=feat_types) |
|
|
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.title("Steel Authority of India Limited (SHAP-enabled)") |
|
|
tabs = st.tabs([ |
|
|
"Features", |
|
|
"Visualize", |
|
|
"Correlations", |
|
|
"Stats", |
|
|
"Ensemble + SHAP", |
|
|
"Target & Business Impact", |
|
|
"Bibliography" |
|
|
]) |
|
|
|
|
|
|
|
|
with tabs[0]: |
|
|
st.subheader("Feature metadata") |
|
|
filtered_meta = meta_df[meta_df["source_type"].isin(selected_types)] |
|
|
st.dataframe(filtered_meta[["feature_name","source_type","formula","remarks"]].rename(columns={"feature_name":"Feature"}), height=400) |
|
|
st.markdown(f"Total features loaded: **{df.shape[1]}** | Rows: **{df.shape[0]}**") |
|
|
|
|
|
|
|
|
with tabs[1]: |
|
|
st.subheader("Feature visualization") |
|
|
col = st.selectbox("Choose numeric feature", numeric_cols, index=0) |
|
|
bins = st.slider("Histogram bins", 10, 200, 50) |
|
|
fig, ax = plt.subplots(figsize=(8,4)) |
|
|
sns.histplot(df[col], bins=bins, kde=True, ax=ax) |
|
|
ax.set_title(col) |
|
|
st.pyplot(fig) |
|
|
st.write(df[col].describe().to_frame().T) |
|
|
|
|
|
|
|
|
with tabs[2]: |
|
|
st.subheader("Correlation explorer") |
|
|
default_corr = numeric_cols[:20] if len(numeric_cols) >= 20 else numeric_cols |
|
|
corr_sel = st.multiselect("Select features (min 2)", numeric_cols, default=default_corr) |
|
|
if len(corr_sel) >= 2: |
|
|
corr = df[corr_sel].corr() |
|
|
fig, ax = plt.subplots(figsize=(10,8)) |
|
|
sns.heatmap(corr, cmap="coolwarm", center=0, ax=ax) |
|
|
st.pyplot(fig) |
|
|
else: |
|
|
st.info("Choose at least 2 numeric features to compute correlation.") |
|
|
|
|
|
|
|
|
with tabs[3]: |
|
|
st.subheader("Summary statistics (numeric features)") |
|
|
st.dataframe(df.describe().T.style.format("{:.3f}"), height=500) |
|
|
|
|
|
|
|
|
|
|
|
with tabs[4]: |
|
|
st.subheader(" AutoML Ensemble — Expanded Families + Stacking + SHAP") |
|
|
|
|
|
|
|
|
st.markdown("### Choose Industrial Use Case ") |
|
|
use_case = st.selectbox( |
|
|
"Select Use Case", |
|
|
[ |
|
|
"Predictive Maintenance", |
|
|
"EAF Data Intelligence", |
|
|
"Casting Quality Optimization", |
|
|
"Rolling Mill Energy Optimization", |
|
|
"Surface Defect Detection (Vision AI)", |
|
|
"Material Composition & Alloy Mix AI", |
|
|
"Inventory & Yield Optimization", |
|
|
"Refractory & Cooling Loss Prediction" |
|
|
], |
|
|
index=1 |
|
|
) |
|
|
|
|
|
|
|
|
use_case_config = { |
|
|
"Predictive Maintenance": {"target": "bearing_temp", "model_hint": "RandomForest"}, |
|
|
"EAF Data Intelligence": {"target": "furnace_temp", "model_hint": "GradientBoosting"}, |
|
|
"Casting Quality Optimization": {"target": "surface_temp" if "surface_temp" in numeric_cols else "furnace_temp", "model_hint": "GradientBoosting"}, |
|
|
"Rolling Mill Energy Optimization": {"target": "energy_efficiency", "model_hint": "ExtraTrees"}, |
|
|
"Surface Defect Detection (Vision AI)": {"target": "image_entropy_proxy", "model_hint": "GradientBoosting"}, |
|
|
"Material Composition & Alloy Mix AI": {"target": "chemical_C", "model_hint": "RandomForest"}, |
|
|
"Inventory & Yield Optimization": {"target": "yield_ratio", "model_hint": "GradientBoosting"}, |
|
|
"Refractory & Cooling Loss Prediction": {"target": "lining_thickness", "model_hint": "ExtraTrees"}, |
|
|
} |
|
|
cfg = use_case_config.get(use_case, {"target": numeric_cols[0], "model_hint": "RandomForest"}) |
|
|
target = cfg["target"] |
|
|
model_hint = cfg["model_hint"] |
|
|
|
|
|
|
|
|
suggested = [c for c in numeric_cols if any(k in c for k in target.split('_'))] |
|
|
if len(suggested) < 6: |
|
|
suggested = [c for c in numeric_cols if any(k in c for k in ["temp", "power", "energy", "pressure", "yield"])] |
|
|
if len(suggested) < 6: |
|
|
suggested = numeric_cols[:50] |
|
|
|
|
|
features = st.multiselect("Model input features (auto-suggested)", numeric_cols, default=suggested) |
|
|
st.markdown(f"Auto target: `{target}` · Suggested family hint: `{model_hint}`") |
|
|
|
|
|
|
|
|
max_rows = min(df.shape[0], 20000) |
|
|
sample_size = st.slider("Sample rows (train speed vs fidelity)", 500, max_rows, min(1500, max_rows), step=100) |
|
|
|
|
|
sub_df = df[features + [target]].sample(n=sample_size, random_state=42).reset_index(drop=True) |
|
|
X = sub_df[features].fillna(0) |
|
|
y = sub_df[target].fillna(0) |
|
|
|
|
|
|
|
|
st.markdown("### Ensemble & AutoML Settings") |
|
|
max_trials = st.slider("Optuna trials per family (total trials grow with families)", 5, 80, 20, step=5) |
|
|
top_k = st.slider("Max base models to keep in final ensemble", 2, 8, 5) |
|
|
allow_advanced = st.checkbox("Include advanced families (XGBoost, LightGBM, CatBoost, TabPFN if installed)", value=True) |
|
|
|
|
|
|
|
|
available_models = ["RandomForest", "ExtraTrees"] |
|
|
optional_families = {} |
|
|
if allow_advanced: |
|
|
try: |
|
|
import xgboost as xgb |
|
|
optional_families["XGBoost"] = True |
|
|
available_models.append("XGBoost") |
|
|
except Exception: |
|
|
optional_families["XGBoost"] = False |
|
|
try: |
|
|
import lightgbm as lgb |
|
|
optional_families["LightGBM"] = True |
|
|
available_models.append("LightGBM") |
|
|
except Exception: |
|
|
optional_families["LightGBM"] = False |
|
|
try: |
|
|
import catboost as cb |
|
|
optional_families["CatBoost"] = True |
|
|
available_models.append("CatBoost") |
|
|
except Exception: |
|
|
optional_families["CatBoost"] = False |
|
|
try: |
|
|
|
|
|
import tabpfn |
|
|
optional_families["TabPFN"] = True |
|
|
available_models.append("TabPFN") |
|
|
except Exception: |
|
|
optional_families["TabPFN"] = False |
|
|
try: |
|
|
|
|
|
from pytorch_tabular.models import transformers |
|
|
optional_families["FTTransformer"] = True |
|
|
available_models.append("FTTransformer") |
|
|
except Exception: |
|
|
optional_families["FTTransformer"] = False |
|
|
|
|
|
st.markdown(f"Available model families: {', '.join(available_models)}") |
|
|
|
|
|
|
|
|
import optuna |
|
|
from sklearn.model_selection import cross_val_score, KFold |
|
|
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor |
|
|
from sklearn.linear_model import Ridge |
|
|
from sklearn.neural_network import MLPRegressor |
|
|
from sklearn.metrics import r2_score, mean_squared_error |
|
|
|
|
|
def tune_family(family_name, X_local, y_local, n_trials=20, random_state=42): |
|
|
"""Tune one model family using Optuna; returns best (model_obj, cv_score, best_params).""" |
|
|
def obj(trial): |
|
|
|
|
|
if family_name == "RandomForest": |
|
|
n_estimators = trial.suggest_int("n_estimators", 100, 800) |
|
|
max_depth = trial.suggest_int("max_depth", 4, 30) |
|
|
m = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, n_jobs=-1, random_state=random_state) |
|
|
elif family_name == "ExtraTrees": |
|
|
n_estimators = trial.suggest_int("n_estimators", 100, 800) |
|
|
max_depth = trial.suggest_int("max_depth", 4, 30) |
|
|
m = ExtraTreesRegressor(n_estimators=n_estimators, max_depth=max_depth, n_jobs=-1, random_state=random_state) |
|
|
elif family_name == "XGBoost" and optional_families.get("XGBoost"): |
|
|
n_estimators = trial.suggest_int("n_estimators", 100, 1000) |
|
|
max_depth = trial.suggest_int("max_depth", 3, 12) |
|
|
lr = trial.suggest_float("learning_rate", 0.01, 0.3, log=True) |
|
|
m = xgb.XGBRegressor(n_estimators=n_estimators, max_depth=max_depth, learning_rate=lr, tree_method="hist", verbosity=0, random_state=random_state, n_jobs=1) |
|
|
elif family_name == "LightGBM" and optional_families.get("LightGBM"): |
|
|
n_estimators = trial.suggest_int("n_estimators", 100, 1000) |
|
|
max_depth = trial.suggest_int("max_depth", 3, 16) |
|
|
lr = trial.suggest_float("learning_rate", 0.01, 0.3, log=True) |
|
|
m = lgb.LGBMRegressor(n_estimators=n_estimators, max_depth=max_depth, learning_rate=lr, n_jobs=1, random_state=random_state) |
|
|
elif family_name == "CatBoost" and optional_families.get("CatBoost"): |
|
|
iterations = trial.suggest_int("iterations", 200, 1000) |
|
|
depth = trial.suggest_int("depth", 4, 10) |
|
|
lr = trial.suggest_float("learning_rate", 0.01, 0.3, log=True) |
|
|
m = cb.CatBoostRegressor(iterations=iterations, depth=depth, learning_rate=lr, verbose=0, random_state=random_state) |
|
|
elif family_name == "MLP": |
|
|
hidden = trial.suggest_int("hidden_layer_sizes", 32, 512, log=True) |
|
|
lr = trial.suggest_float("learning_rate_init", 1e-4, 1e-1, log=True) |
|
|
m = MLPRegressor(hidden_layer_sizes=(hidden,), learning_rate_init=lr, max_iter=500, random_state=random_state) |
|
|
elif family_name == "TabPFN" and optional_families.get("TabPFN"): |
|
|
|
|
|
|
|
|
|
|
|
return 0.0 |
|
|
else: |
|
|
|
|
|
m = RandomForestRegressor(n_estimators=200, max_depth=8, random_state=random_state, n_jobs=-1) |
|
|
|
|
|
|
|
|
try: |
|
|
scores = cross_val_score(m, X_local, y_local, scoring="r2", cv=3, n_jobs=1) |
|
|
return float(np.mean(scores)) |
|
|
except Exception: |
|
|
return -999.0 |
|
|
|
|
|
study = optuna.create_study(direction="maximize") |
|
|
study.optimize(obj, n_trials=n_trials, show_progress_bar=False) |
|
|
best = study.best_trial.params if study.trials else {} |
|
|
|
|
|
try: |
|
|
if family_name == "RandomForest": |
|
|
model = RandomForestRegressor(n_estimators=best.get("n_estimators",200), max_depth=best.get("max_depth",8), n_jobs=-1, random_state=42) |
|
|
elif family_name == "ExtraTrees": |
|
|
model = ExtraTreesRegressor(n_estimators=best.get("n_estimators",200), max_depth=best.get("max_depth",8), n_jobs=-1, random_state=42) |
|
|
elif family_name == "XGBoost" and optional_families.get("XGBoost"): |
|
|
model = xgb.XGBRegressor(n_estimators=best.get("n_estimators",200), max_depth=best.get("max_depth",6), learning_rate=best.get("learning_rate",0.1), tree_method="hist", verbosity=0, random_state=42, n_jobs=1) |
|
|
elif family_name == "LightGBM" and optional_families.get("LightGBM"): |
|
|
model = lgb.LGBMRegressor(n_estimators=best.get("n_estimators",200), max_depth=best.get("max_depth",8), learning_rate=best.get("learning_rate",0.1), n_jobs=1, random_state=42) |
|
|
elif family_name == "CatBoost" and optional_families.get("CatBoost"): |
|
|
model = cb.CatBoostRegressor(iterations=best.get("iterations",200), depth=best.get("depth",6), learning_rate=best.get("learning_rate",0.1), verbose=0, random_state=42) |
|
|
elif family_name == "MLP": |
|
|
model = MLPRegressor(hidden_layer_sizes=(best.get("hidden_layer_sizes",128),), learning_rate_init=best.get("learning_rate_init",0.001), max_iter=500, random_state=42) |
|
|
elif family_name == "TabPFN" and optional_families.get("TabPFN"): |
|
|
|
|
|
model = "TabPFN_placeholder" |
|
|
else: |
|
|
model = RandomForestRegressor(n_estimators=200, max_depth=8, random_state=42, n_jobs=-1) |
|
|
except Exception: |
|
|
model = RandomForestRegressor(n_estimators=200, max_depth=8, random_state=42, n_jobs=-1) |
|
|
|
|
|
|
|
|
try: |
|
|
score = float(np.mean(cross_val_score(model, X_local, y_local, scoring="r2", cv=3, n_jobs=1))) |
|
|
except Exception: |
|
|
score = -999.0 |
|
|
|
|
|
return {"model_obj": model, "cv_score": score, "best_params": best, "family": family_name, "study": study} |
|
|
|
|
|
|
|
|
run_btn = st.button(" Run expanded AutoML + Stacking") |
|
|
if run_btn: |
|
|
log("AutoML + Stacking initiated.") |
|
|
with st.spinner("Tuning multiple families (this may take a while depending on choices)..."): |
|
|
families_to_try = ["RandomForest", "ExtraTrees", "MLP"] |
|
|
if allow_advanced: |
|
|
if optional_families.get("XGBoost"): families_to_try.append("XGBoost") |
|
|
if optional_families.get("LightGBM"): families_to_try.append("LightGBM") |
|
|
if optional_families.get("CatBoost"): families_to_try.append("CatBoost") |
|
|
if optional_families.get("TabPFN"): families_to_try.append("TabPFN") |
|
|
if optional_families.get("FTTransformer"): families_to_try.append("FTTransformer") |
|
|
|
|
|
tuned_results = [] |
|
|
for fam in families_to_try: |
|
|
log(f"Tuning family: {fam}") |
|
|
st.caption(f"Tuning family: {fam}") |
|
|
res = tune_family(fam, X, y, n_trials=max_trials) |
|
|
|
|
|
if isinstance(res, dict) and "model_obj" in res: |
|
|
tuned_results.append(res) |
|
|
else: |
|
|
st.warning(f"Family {fam} returned unexpected tune result: {res}") |
|
|
log("All families tuned successfully.") |
|
|
|
|
|
|
|
|
lb = pd.DataFrame([{"family": r["family"], "cv_r2": r["cv_score"], "params": r["best_params"]} for r in tuned_results]) |
|
|
lb = lb.sort_values("cv_r2", ascending=False).reset_index(drop=True) |
|
|
st.markdown("### Tuning Leaderboard (by CV R²)") |
|
|
st.dataframe(lb[["family","cv_r2"]].round(4)) |
|
|
|
|
|
|
|
|
st.markdown("### Building base models & out-of-fold predictions for stacking") |
|
|
kf = KFold(n_splits=5, shuffle=True, random_state=42) |
|
|
base_models = [] |
|
|
oof_preds = pd.DataFrame(index=X.index) |
|
|
|
|
|
for idx, row in lb.iterrows(): |
|
|
fam = row["family"] |
|
|
model_entry = next((r for r in tuned_results if r["family"] == fam), None) |
|
|
if model_entry is None: |
|
|
continue |
|
|
model_obj = model_entry["model_obj"] |
|
|
|
|
|
oof = np.zeros(X.shape[0]) |
|
|
for tr_idx, val_idx in kf.split(X): |
|
|
X_tr, X_val = X.iloc[tr_idx], X.iloc[val_idx] |
|
|
y_tr = y.iloc[tr_idx] |
|
|
|
|
|
if model_obj == "TabPFN_placeholder": |
|
|
try: |
|
|
|
|
|
tmp = RandomForestRegressor(n_estimators=200, max_depth=8, random_state=42, n_jobs=-1) |
|
|
tmp.fit(X_tr, y_tr) |
|
|
oof[val_idx] = tmp.predict(X_val) |
|
|
except Exception: |
|
|
oof[val_idx] = np.mean(y_tr) |
|
|
else: |
|
|
try: |
|
|
model_obj.fit(X_tr, y_tr) |
|
|
oof[val_idx] = model_obj.predict(X_val) |
|
|
except Exception: |
|
|
|
|
|
oof[val_idx] = np.mean(y_tr) |
|
|
oof_preds[f"{fam}_oof"] = oof |
|
|
|
|
|
|
|
|
try: |
|
|
if model_entry["model_obj"] == "TabPFN_placeholder": |
|
|
|
|
|
fitted = RandomForestRegressor(n_estimators=200, max_depth=8, random_state=42, n_jobs=-1) |
|
|
fitted.fit(X, y) |
|
|
else: |
|
|
model_entry["model_obj"].fit(X, y) |
|
|
fitted = model_entry["model_obj"] |
|
|
except Exception: |
|
|
fitted = RandomForestRegressor(n_estimators=200, max_depth=8, random_state=42, n_jobs=-1) |
|
|
fitted.fit(X, y) |
|
|
|
|
|
base_models.append({"family": fam, "model": fitted, "cv_r2": model_entry["cv_score"]}) |
|
|
|
|
|
|
|
|
if oof_preds.shape[1] == 0: |
|
|
st.error("No base models created — aborting stacking.") |
|
|
else: |
|
|
corr_matrix = oof_preds.corr().abs() |
|
|
|
|
|
diversity = {col: 1 - corr_matrix[col].drop(col).mean() for col in corr_matrix.columns} |
|
|
summary = [] |
|
|
for bm in base_models: |
|
|
col = f"{bm['family']}_oof" |
|
|
summary.append({"family": bm["family"], "cv_r2": bm["cv_r2"], "diversity": diversity.get(col, 0.0)}) |
|
|
summary_df = pd.DataFrame(summary).sort_values(["cv_r2", "diversity"], ascending=[False, False]).reset_index(drop=True) |
|
|
st.markdown("### Base Model Summary (cv_r2, diversity)") |
|
|
st.dataframe(summary_df.round(4)) |
|
|
|
|
|
|
|
|
selected = summary_df.sort_values(["cv_r2","diversity"], ascending=[False, False]).head(top_k)["family"].tolist() |
|
|
st.markdown(f"Selected for stacking (top {top_k}): {selected}") |
|
|
|
|
|
|
|
|
selected_cols = [f"{s}_oof" for s in selected] |
|
|
X_stack = oof_preds[selected_cols].fillna(0) |
|
|
meta = Ridge(alpha=1.0) |
|
|
meta.fit(X_stack, y) |
|
|
|
|
|
|
|
|
|
|
|
X_tr, X_val, y_tr, y_val = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
|
|
|
|
|
|
def scalar_mean(arr): |
|
|
try: |
|
|
return float(np.mean(arr)) |
|
|
except Exception: |
|
|
return float(np.mean(np.ravel(arr))) |
|
|
|
|
|
|
|
|
base_model_map = {bm["family"]: bm["model"] for bm in base_models} |
|
|
|
|
|
meta_inputs = [] |
|
|
missing_families = [] |
|
|
n_meta_features_trained = X_stack.shape[1] |
|
|
|
|
|
|
|
|
for fam in selected: |
|
|
bm = base_model_map.get(fam) |
|
|
if bm is None: |
|
|
missing_families.append(fam) |
|
|
safe_mean = scalar_mean(y_tr) |
|
|
meta_inputs.append(np.full(len(X_val), safe_mean)) |
|
|
continue |
|
|
|
|
|
try: |
|
|
preds = bm.predict(X_val) |
|
|
preds = np.asarray(preds) |
|
|
|
|
|
if preds.ndim == 2: |
|
|
preds = preds.mean(axis=1) |
|
|
preds = preds.reshape(-1) |
|
|
if preds.shape[0] != len(X_val): |
|
|
preds = np.full(len(X_val), scalar_mean(y_tr)) |
|
|
meta_inputs.append(preds) |
|
|
except Exception as e: |
|
|
safe_mean = scalar_mean(y_tr) |
|
|
meta_inputs.append(np.full(len(X_val), safe_mean)) |
|
|
|
|
|
if missing_families: |
|
|
st.warning(f"Missing base models: {missing_families}. Using mean predictions.") |
|
|
|
|
|
|
|
|
if not meta_inputs: |
|
|
st.error("No meta features to predict — aborting.") |
|
|
st.stop() |
|
|
|
|
|
X_meta_val = np.column_stack(meta_inputs) |
|
|
n_meta_features_val = X_meta_val.shape[1] |
|
|
|
|
|
|
|
|
if n_meta_features_val < n_meta_features_trained: |
|
|
pad_cols = n_meta_features_trained - n_meta_features_val |
|
|
safe_mean = scalar_mean(y_tr) |
|
|
pad = np.tile(np.full((len(X_val), 1), safe_mean), (1, pad_cols)) |
|
|
X_meta_val = np.hstack([X_meta_val, pad]) |
|
|
elif n_meta_features_val > n_meta_features_trained: |
|
|
X_meta_val = X_meta_val[:, :n_meta_features_trained] |
|
|
|
|
|
if X_meta_val.shape[1] != n_meta_features_trained: |
|
|
st.error(f"Stack alignment failed: {X_meta_val.shape[1]} != {n_meta_features_trained}") |
|
|
st.stop() |
|
|
|
|
|
|
|
|
y_meta_pred = meta.predict(X_meta_val) |
|
|
|
|
|
|
|
|
final_r2 = r2_score(y_val, y_meta_pred) |
|
|
final_rmse = mean_squared_error(y_val, y_meta_pred, squared=False) |
|
|
st.success("AutoML + Stacking complete — metrics, artifacts, and SHAP ready.") |
|
|
log(f"Completed stacking. Final R2={final_r2:.4f}, RMSE={final_rmse:.4f}") |
|
|
|
|
|
|
|
|
c1, c2 = st.columns(2) |
|
|
c1.metric("Stacked Ensemble R² (holdout)", f"{final_r2:.4f}") |
|
|
c2.metric("Stacked Ensemble RMSE (holdout)", f"{final_rmse:.4f}") |
|
|
|
|
|
|
|
|
fig, ax = plt.subplots(figsize=(7, 4)) |
|
|
ax.scatter(y_val, y_meta_pred, alpha=0.6) |
|
|
ax.plot([y_val.min(), y_val.max()], [y_val.min(), y_val.max()], "r--") |
|
|
ax.set_xlabel("Actual") |
|
|
ax.set_ylabel("Stacked Predicted") |
|
|
st.pyplot(fig) |
|
|
|
|
|
|
|
|
stack_artifact = os.path.join(RUN_DIR, f"stacked_{use_case.replace(' ', '_')}.joblib") |
|
|
to_save = { |
|
|
"base_models": {bm["family"]: bm["model"] for bm in base_models if bm["family"] in selected}, |
|
|
"meta": meta, |
|
|
"features": features, |
|
|
"selected": selected, |
|
|
"target": target, |
|
|
} |
|
|
joblib.dump(to_save, stack_artifact) |
|
|
st.caption(f" Stacked ensemble saved: {stack_artifact}") |
|
|
|
|
|
|
|
|
st.markdown("### Explainability (approximate)") |
|
|
try: |
|
|
top_base = next((b for b in base_models if b["family"] == selected[0]), None) |
|
|
if top_base and hasattr(top_base["model"], "predict"): |
|
|
sample_X = X_val.sample(min(300, len(X_val)), random_state=42) |
|
|
if any(k in top_base["family"] for k in ["XGBoost", "LightGBM", "RandomForest", "ExtraTrees", "CatBoost"]): |
|
|
expl = shap.TreeExplainer(top_base["model"]) |
|
|
shap_vals = expl.shap_values(sample_X) |
|
|
fig_sh = plt.figure(figsize=(8, 6)) |
|
|
shap.summary_plot(shap_vals, sample_X, show=False) |
|
|
st.pyplot(fig_sh) |
|
|
else: |
|
|
st.info("Top model not tree-based; skipping SHAP summary.") |
|
|
else: |
|
|
st.info("No suitable base model for SHAP explanation.") |
|
|
except Exception as e: |
|
|
st.warning(f"SHAP computation skipped: {e}") |
|
|
|
|
|
st.success(" AutoML + Stacking complete — metrics, artifacts, and SHAP ready.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with tabs[5]: |
|
|
st.subheader("Recommended Target Variables by Use Case") |
|
|
st.markdown("Each use case maps to a practical target variable that drives measurable business impact.") |
|
|
|
|
|
target_table = pd.DataFrame([ |
|
|
["Predictive Maintenance (Mills, Motors, Compressors)", "bearing_temp / time_to_failure", "Rises before mechanical failure; early warning", "₹10–30 L per asset/year"], |
|
|
["Blast Furnace / EAF Data Intelligence", "furnace_temp / tap_temp", "Central control variable, linked to energy and quality", "₹20–60 L/year"], |
|
|
["Casting Quality Optimization", "defect_probability / solidification_rate", "Determines billet quality; control nozzle & cooling", "₹50 L/year yield gain"], |
|
|
["Rolling Mill Energy Optimization", "energy_per_ton / exit_temp", "Directly tied to energy efficiency", "₹5–10 L/year per kWh/t"], |
|
|
["Surface Defect Detection (Vision AI)", "defect_probability", "Quality metric from CNN", "1–2 % yield gain"], |
|
|
["Material Composition & Alloy Mix AI", "deviation_from_target_grade", "Predict deviation, suggest corrections", "₹20 L/year raw material savings"], |
|
|
["Inventory & Yield Optimization", "yield_ratio (output/input)", "Linked to WIP and process yield", "₹1 Cr+/year"], |
|
|
["Refractory & Cooling Loss Prediction", "lining_thickness / heat_loss_rate", "Predict wear for planned maintenance", "₹40 L/year downtime savings"]], columns=["Use Case", "Target Variable", "Why It’s Ideal", "Business Leverage"]) |
|
|
|
|
|
st.dataframe(target_table, width="stretch") |
|
|
|
|
|
st.markdown("---") |
|
|
st.subheader("Business Framing for Clients") |
|
|
st.markdown("These metrics show approximate annual benefits from small process improvements.") |
|
|
|
|
|
business_table = pd.DataFrame([ |
|
|
["Energy consumption", "400 kWh/ton", "₹35–60 L"], |
|
|
["Electrode wear", "1.8 kg/ton", "₹10 L"], |
|
|
["Refractory wear", "3 mm/heat", "₹15 L"], |
|
|
["Oxygen usage", "40 Nm³/ton", "₹20 L"], |
|
|
["Yield loss", "2 %", "₹50 L – ₹1 Cr"], |
|
|
], columns=["Metric", "Typical Value (EAF India)", "5 % Improvement → Annual ₹ Value"]) |
|
|
|
|
|
st.dataframe(business_table, width="stretch") |
|
|
st.info("These numbers are indicative averages; actual benefits depend on plant capacity and process efficiency.") |
|
|
|
|
|
|
|
|
with tabs[6]: |
|
|
st.subheader("Annotated Bibliography — Justification for Target Variables") |
|
|
st.markdown(""" |
|
|
These papers justify the chosen target variables (temperature, yield, efficiency, refractory wear) |
|
|
in metallurgical AI modeling. Click any title to open the official paper. |
|
|
""") |
|
|
|
|
|
bib_data = [ |
|
|
{ |
|
|
"title": "A Survey of Data-Driven Soft Sensing in Ironmaking Systems", |
|
|
"authors": "Yan et al. (2024)", |
|
|
"notes": "Soft sensors for furnace and tap temperature; validates `furnace_temp` and `tap_temp` targets.", |
|
|
"url": "https://doi.org/10.1021/acsomega.4c01254" |
|
|
}, |
|
|
{ |
|
|
"title": "Optimisation of Operator Support Systems through Artificial Intelligence for the Cast Steel Industry", |
|
|
"authors": "Ojeda Roldán et al. (2022)", |
|
|
"notes": "Reinforcement learning for oxygen blowing and endpoint control; supports temperature and carbon targets.", |
|
|
"url": "https://doi.org/10.3390/jmmp6020034" |
|
|
}, |
|
|
{ |
|
|
"title": "Analyzing the Energy Efficiency of Electric Arc Furnace Steelmaking", |
|
|
"authors": "Zhuo et al. (2024)", |
|
|
"notes": "Links arc power, temperature, and energy KPIs — validates `energy_efficiency` and `power_density`.", |
|
|
"url": "https://doi.org/10.3390/met15010113" |
|
|
}, |
|
|
{ |
|
|
"title": "Dynamic EAF Modeling and Slag Foaming Index Prediction", |
|
|
"authors": "MacRosty et al.", |
|
|
"notes": "Supports refractory and heat-flux-based wear prediction — validates `lining_thickness` target.", |
|
|
"url": "https://www.sciencedirect.com/science/article/pii/S0921883123004019" |
|
|
}, |
|
|
{ |
|
|
"title": "Machine Learning for Yield Optimization in Continuous Casting", |
|
|
"authors": "Springer (2023)", |
|
|
"notes": "ML for yield ratio and defect minimization; supports `yield_ratio` target.", |
|
|
"url": "https://link.springer.com/article/10.1007/s40964-023-00592-7" |
|
|
} |
|
|
] |
|
|
|
|
|
bib_df = pd.DataFrame(bib_data) |
|
|
bib_df["Paper Title"] = bib_df.apply(lambda x: f"[{x['title']}]({x['url']})", axis=1) |
|
|
|
|
|
st.dataframe( |
|
|
bib_df[["Paper Title", "authors", "notes"]] |
|
|
.rename(columns={"authors": "Authors / Year", "notes": "Relevance"}), |
|
|
width="stretch", |
|
|
hide_index=True |
|
|
) |
|
|
|
|
|
st.markdown(""" |
|
|
**Feature ↔ Target Justification** |
|
|
- `furnace_temp`, `tap_temp` → Process temperature (Yan 2024, Ojeda 2022) |
|
|
- `yield_ratio` → Production yield (Springer 2023) |
|
|
- `energy_efficiency`, `power_density` → Energy KPIs (Zhuo 2024) |
|
|
- `lining_thickness`, `slag_foaming_index` → Refractory & process health (MacRosty et al.) |
|
|
""") |
|
|
|
|
|
st.info("Click any paper title above to open it in a new tab.") |
|
|
log("Bibliography tab rendered successfully.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.markdown("---") |
|
|
st.markdown("**Notes:** This dataset is synthetic and for demo/prototyping. Real plant integration requires NDA, data on-boarding, sensor mapping, and plant safety checks before any control actions.") |
|
|
|
|
|
|
|
|
tabs.append("Download Saved Runs") |
|
|
with tabs[-1]: |
|
|
st.subheader("Reproducibility & Run Exports") |
|
|
|
|
|
run_folders = sorted( |
|
|
[f for f in os.listdir(LOG_DIR) if f.startswith("run_")], |
|
|
reverse=True |
|
|
) |
|
|
|
|
|
if not run_folders: |
|
|
st.info("No completed runs found yet.") |
|
|
else: |
|
|
selected_run = st.selectbox("Select run folder", run_folders, index=0) |
|
|
selected_path = os.path.join(LOG_DIR, selected_run) |
|
|
|
|
|
|
|
|
files = [ |
|
|
f for f in os.listdir(selected_path) |
|
|
if os.path.isfile(os.path.join(selected_path, f)) |
|
|
] |
|
|
st.write(f"Files in `{selected_run}`:") |
|
|
st.write(", ".join(files)) |
|
|
|
|
|
|
|
|
zip_buffer = io.BytesIO() |
|
|
with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zipf: |
|
|
for root, _, filenames in os.walk(selected_path): |
|
|
for fname in filenames: |
|
|
file_path = os.path.join(root, fname) |
|
|
zipf.write(file_path, arcname=os.path.relpath(file_path, selected_path)) |
|
|
zip_buffer.seek(0) |
|
|
|
|
|
st.download_button( |
|
|
label=f"Download full run ({selected_run}.zip)", |
|
|
data=zip_buffer, |
|
|
file_name=f"{selected_run}.zip", |
|
|
mime="application/zip" |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
tabs.append("View Logs") |
|
|
with tabs[-1]: |
|
|
st.subheader(" Session & Model Logs") |
|
|
st.markdown("Each run creates a timestamped log file in `/logs/` inside this Space. Use this panel to review run progress and debug output.") |
|
|
|
|
|
log_files = sorted( |
|
|
[f for f in os.listdir(LOG_DIR) if f.endswith(".log")], |
|
|
reverse=True |
|
|
) |
|
|
|
|
|
if not log_files: |
|
|
st.info("No logs yet. Run an AutoML job first.") |
|
|
else: |
|
|
latest = st.selectbox("Select log file", log_files, index=0) |
|
|
path = os.path.join(LOG_DIR, latest) |
|
|
with open(path, "r", encoding="utf-8") as f: |
|
|
content = f.read() |
|
|
st.text_area("Log Output", content, height=400) |
|
|
st.download_button(" Download Log", content, file_name=latest) |
|
|
|