import streamlit as st import pandas as pd import numpy as np import re import io import os import joblib import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt import seaborn as sns from datetime import datetime from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold from sklearn.preprocessing import LabelEncoder, StandardScaler, RobustScaler from sklearn.metrics import ( accuracy_score, confusion_matrix, silhouette_score, classification_report, f1_score, precision_score, recall_score ) from sklearn.ensemble import RandomForestClassifier from sklearn.svm import SVC from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier from sklearn.cluster import KMeans from sklearn.feature_selection import mutual_info_classif from sklearn.utils import resample # ========================================================== # PAGE CONFIG # ========================================================== st.set_page_config( page_title="AI AutoML Platform", page_icon="🤖", layout="wide" ) # ========================================================== # SESSION STATE # ========================================================== if "history" not in st.session_state: st.session_state.history = [] if "last_model_name" not in st.session_state: st.session_state.last_model_name = None if "last_score" not in st.session_state: st.session_state.last_score = None #store detailed results per model run for reports if "model_results" not in st.session_state: st.session_state.model_results = [] #store selected target so report can reference it if "selected_target" not in st.session_state: st.session_state.selected_target = None # store the cleaned df reference for report generation if "cleaned_df" not in st.session_state: st.session_state.cleaned_df = None # ========================================================== # THEME CSS # ========================================================== st.markdown(""" """, unsafe_allow_html=True) # ========================================================== # HEADER # ========================================================== st.markdown('

🤖 AI AutoML Platform

', unsafe_allow_html=True) st.markdown('

upload csv select model download trained model

', unsafe_allow_html=True) # ========================================================== # HELPERS # ========================================================== def smart_clean(df): df = df.copy() df = df.drop_duplicates() for col in df.columns: if df[col].dtype == "object": df[col] = df[col].fillna(df[col].mode()[0]) else: # use median instead of mean (more robust to outliers) df[col] = df[col].fillna(df[col].median()) return df def convert_units(value): try: txt = str(value).lower().strip() nums = re.findall(r'[\d.]+', txt) if not nums: return value num = float(nums[0]) if "km" in txt: return num * 1000 elif "cm" in txt: return num / 100 elif "mm" in txt: return num / 1000 elif "m" in txt: return num else: return num except: return value def detect_unit_columns(df): df = df.copy() for col in df.columns: if df[col].dtype == "object": sample = str(df[col].iloc[0]).lower() if any(x in sample for x in ["km", "cm", "mm", " m"]): df[col] = df[col].apply(convert_units) return df def detect_best_target(df): scores = {} for col in df.columns: score = 0 unique = df[col].nunique() ratio = unique / len(df) if 2 <= unique <= 15: score += 6 if df[col].dtype == "object": score += 3 if ratio > 0.9: score -= 10 if unique > 50: score -= 5 scores[col] = score best = max(scores, key=scores.get) ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True) return best, ranked[:5] def prepare_for_supervised(df, target): data = df.copy() for col in data.columns: if data[col].dtype == "object": le = LabelEncoder() data[col] = le.fit_transform(data[col].astype(str)) X = data.drop(columns=[target]) y = data[target] return X, y, data # --- ACCURACY HELPER FUNCTIONS --- def clip_outliers_iqr(df): """Clip outliers using IQR method instead of removing rows.""" df = df.copy() info = {} for col in df.select_dtypes(include=[np.number]).columns: Q1 = df[col].quantile(0.25) Q3 = df[col].quantile(0.75) IQR = Q3 - Q1 lower = Q1 - 1.5 * IQR upper = Q3 + 1.5 * IQR n_out = ((df[col] < lower) | (df[col] > upper)).sum() if n_out > 0: df[col] = df[col].clip(lower=lower, upper=upper) info[col] = n_out return df, info def remove_low_variance(X, threshold=0.01): """Remove features with near-zero variance.""" variances = X.var() low = variances[variances < threshold].index.tolist() if low: X = X.drop(columns=low) return X, low def remove_high_correlation(X, threshold=0.95): """Remove one of each pair of highly correlated features.""" corr = X.corr().abs() upper = corr.where(np.triu(np.ones(corr.shape), k=1).astype(bool)) to_drop = [c for c in upper.columns if any(upper[c] > threshold)] if to_drop: X = X.drop(columns=to_drop) return X, to_drop def balance_classes(X, y): """Oversample minority classes to match majority count.""" classes, counts = np.unique(y, return_counts=True) if len(classes) < 2: return X, y, False max_count = counts.max() ratio = max_count / counts.min() if ratio < 2: return X, y, False X_out = X.copy() y_out = y.copy() for cls, cnt in zip(classes, counts): if cnt < max_count: idx = y[y == cls].index extra = resample(X.loc[idx], replace=True, n_samples=max_count - cnt, random_state=42) y_extra = pd.Series([cls] * (max_count - cnt), index=extra.index) X_out = pd.concat([X_out, extra]) y_out = pd.concat([y_out, y_extra]) return X_out, y_out, True def select_top_features(X, y, max_features=20): """Select top features by mutual information.""" if X.shape[1] <= max_features: return X, list(X.columns) mi = mutual_info_classif(X, y, random_state=42) top = pd.Series(mi, index=X.columns).sort_values(ascending=False).head(max_features).index.tolist() return X[top], top def preprocess_for_model(df, target): """Full accuracy-boosting preprocessing pipeline.""" X, y, transformed = prepare_for_supervised(df, target) # Clip outliers transformed_clipped, outlier_info = clip_outliers_iqr(transformed) X = transformed_clipped.drop(columns=[target]) y = transformed_clipped[target] # Remove low variance X, low_var = remove_low_variance(X) # Remove high correlation X, high_corr = remove_high_correlation(X) # Balance classes X, y, balanced = balance_classes(X, y) # Feature selection X, selected = select_top_features(X, y) return X, y, transformed, { "outliers_clipped": outlier_info, "low_var_removed": low_var, "high_corr_removed": high_corr, "class_balanced": balanced, "features_used": list(X.columns), } def show_confusion(y_true, y_pred, title): fig, ax = plt.subplots(figsize=(5,4)) cm = confusion_matrix(y_true, y_pred) sns.heatmap( cm, annot=True, fmt="d", cmap="Blues", linewidths=1 ) plt.title(title) plt.xlabel("Predicted") plt.ylabel("Actual") col1, col2, col3 = st.columns([1, 2, 1]) with col2: st.pyplot(fig) return fig def compact_bar(labels, values, title): fig, ax = plt.subplots(figsize=(6,3)) sns.barplot(x=labels, y=values) plt.xticks(rotation=20) plt.title(title) col1, col2, col3 = st.columns([1, 2, 1]) with col2: st.pyplot(fig) return fig def save_result(name, score, target_col, features_used, extra_info=None): """Enhanced save_result that stores all details for reporting.""" st.session_state.last_model_name = name st.session_state.last_score = score entry = { "Model": name, "Score": score, "Target": target_col, "Features": features_used, "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), } if extra_info: entry.update(extra_info) st.session_state.history.append(entry) st.session_state.model_results.append(entry) # --- REPORT GENERATORS --- def generate_text_report(df, target, model_results): """Generate a comprehensive TXT report with every detail.""" best = max(model_results, key=lambda x: x["Score"]) if model_results else None lines = [] lines.append("=" * 70) lines.append(" DARK AI AUTOML PLATFORM - FULL REPORT") lines.append("=" * 70) lines.append(f" Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") lines.append("") lines.append("-" * 70) lines.append(" DATASET SUMMARY") lines.append("-" * 70) lines.append(f" Rows: {df.shape[0]}") lines.append(f" Columns: {df.shape[1]}") lines.append(f" Target Column: {target}") lines.append(f" Target Unique Values: {df[target].nunique()}") lines.append("") lines.append("-" * 70) lines.append(" COLUMN DETAILS") lines.append("-" * 70) for col in df.columns: dtype = str(df[col].dtype) nunique = df[col].nunique() missing = df[col].isnull().sum() lines.append(f" {col}: type={dtype}, unique={nunique}, missing={missing}") lines.append("") lines.append("-" * 70) lines.append(" MODEL RESULTS (ALL RUNS)") lines.append("-" * 70) for i, r in enumerate(model_results, 1): lines.append("") lines.append(f" Run #{i}") lines.append(f" Model: {r['Model']}") lines.append(f" Accuracy/Score: {r['Score']:.2f}%") lines.append(f" Target Feature: {r.get('Target', 'N/A')}") lines.append(f" Features Used: {r.get('Features', 'N/A')}") lines.append(f" Timestamp: {r.get('Timestamp', 'N/A')}") if "Precision" in r: lines.append(f" Precision: {r['Precision']:.2f}%") if "Recall" in r: lines.append(f" Recall: {r['Recall']:.2f}%") if "F1Score" in r: lines.append(f" F1 Score: {r['F1Score']:.2f}%") if "BestParams" in r: lines.append(f" Best Hyperparameters: {r['BestParams']}") if "OutliersClipped" in r: lines.append(f" Outliers Clipped: {r['OutliersClipped']} columns") if "LowVarRemoved" in r: lines.append(f" Low Variance Features Removed: {r['LowVarRemoved']}") if "HighCorrRemoved" in r: lines.append(f" High Correlation Features Removed: {r['HighCorrRemoved']}") if "ClassBalanced" in r: lines.append(f" Class Balancing Applied: {r['ClassBalanced']}") if "BestK" in r: lines.append(f" Optimal Clusters (k): {r['BestK']}") if best: lines.append("") lines.append("-" * 70) lines.append(" BEST MODEL") lines.append("-" * 70) lines.append(f" Model: {best['Model']}") lines.append(f" Score: {best['Score']:.2f}%") lines.append(f" Target: {best.get('Target', 'N/A')}") lines.append("") lines.append("-" * 70) lines.append(" PREPROCESSING PIPELINE") lines.append("-" * 70) lines.append(" - Duplicate removal") lines.append(" - Missing values handled (median for numeric, mode for categorical)") lines.append(" - Unit conversion (km/cm/mm -> m)") lines.append(" - Categorical encoding (LabelEncoder)") lines.append(" - Outlier clipping (IQR method)") lines.append(" - Low variance feature removal") lines.append(" - High correlation feature removal") lines.append(" - Class imbalance handling (oversampling)") lines.append(" - Feature selection (mutual information, top 20)") lines.append(" - Scaling where required (StandardScaler / RobustScaler)") lines.append(" - Hyperparameter tuning (GridSearchCV)") lines.append(" - Stratified cross-validation (5-fold)") lines.append("") lines.append("=" * 70) lines.append(" END OF REPORT") lines.append("=" * 70) return "\n".join(lines) def generate_xlsx_report(df, target, model_results): """Generate a multi-sheet XLSX report with every detail.""" output = io.BytesIO() with pd.ExcelWriter(output, engine="openpyxl") as writer: # Sheet 1: Dataset Summary summary = pd.DataFrame({ "Property": ["Rows", "Columns", "Target Column", "Target Unique Values"], "Value": [df.shape[0], df.shape[1], target, df[target].nunique()] }) summary.to_excel(writer, sheet_name="Dataset Summary", index=False) # Sheet 2: Column Details col_details = [] for col in df.columns: col_details.append({ "Column": col, "Type": str(df[col].dtype), "Unique Values": df[col].nunique(), "Missing Values": df[col].isnull().sum(), }) pd.DataFrame(col_details).to_excel(writer, sheet_name="Column Details", index=False) # Sheet 3: Model Results results_df = pd.DataFrame(model_results) results_df.to_excel(writer, sheet_name="Model Results", index=False) # Sheet 4: Best Model if model_results: best = max(model_results, key=lambda x: x["Score"]) pd.DataFrame([best]).to_excel(writer, sheet_name="Best Model", index=False) output.seek(0) return output # ========================================================== # UPLOAD # ========================================================== st.markdown('

📁 Upload Dataset

', unsafe_allow_html=True) file = st.file_uploader("Upload CSV File", type=["csv"]) # ========================================================== # MAIN APP # ========================================================== if file: raw = pd.read_csv(file) st.markdown('

📌 Dataset Preview

', unsafe_allow_html=True) st.dataframe(raw.head(), use_container_width=True) df = smart_clean(raw) df = detect_unit_columns(df) st.session_state.cleaned_df = df # ------------------------------------------------------ # TARGET DETECTION # ------------------------------------------------------ st.markdown('

🎯 AI Target Detection

', unsafe_allow_html=True) best_target, top5 = detect_best_target(df) st.success(f"Recommended Target Column: {best_target}") st.write("Top Suggestions:") for n, s in top5: st.write(f"• {n} (score: {s})") # Dropdown with AI recommendation pre-selected, user can override target = st.selectbox( "Choose Target Column (AI recommended is pre-selected - change if needed)", [best_target] + [c for c in df.columns if c != best_target] ) st.session_state.selected_target = target # ------------------------------------------------------ # MODEL SELECT # ------------------------------------------------------ st.markdown('

🤖 Choose Model

', unsafe_allow_html=True) model_choice = st.selectbox( "Select One Model", [ "Random Forest", "SVM", "Logistic Regression", "Decision Tree", "KMeans Clustering" ] ) # ------------------------------------------------------ # APPLY MODEL # ------------------------------------------------------ if st.button("🚀 Apply Model"): # Each model result is in its own container so # applying a second model shows results separately beneath the first # RANDOM FOREST if model_choice == "Random Forest": X, y, transformed, pp_info = preprocess_for_model(df, target) features_used = pp_info["features_used"] result_box = st.container() with result_box: st.markdown('

', unsafe_allow_html=True) st.markdown(f"### Random Forest Results (Target: {target})") col1, col2 = st.columns(2) with col1: st.write("Original") st.dataframe(raw.head()) with col2: st.write("Processed") st.dataframe(transformed.head()) X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42, stratify=y ) cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42) model = GridSearchCV( RandomForestClassifier(), { "n_estimators":[100,200,300], "max_depth":[5,10,15,None], "min_samples_split":[2,5], "min_samples_leaf":[1,2] }, cv=cv, n_jobs=-1 ) model.fit(X_train, y_train) pred = model.predict(X_test) acc = accuracy_score(y_test, pred)*100 prec = precision_score(y_test, pred, average="weighted", zero_division=0)*100 rec = recall_score(y_test, pred, average="weighted", zero_division=0)*100 f1 = f1_score(y_test, pred, average="weighted", zero_division=0)*100 st.success(f"Accuracy: {acc:.2f}%") st.info(f"Precision: {prec:.2f}% | Recall: {rec:.2f}% | F1: {f1:.2f}%") show_confusion(y_test, pred, "Random Forest Matrix") imp = pd.Series( model.best_estimator_.feature_importances_, index=X.columns ).sort_values(ascending=False).head(8) compact_bar(imp.index, imp.values, "Feature Importance") st.write("**Classification Report:**") st.text(classification_report(y_test, pred, zero_division=0)) st.markdown('

', unsafe_allow_html=True) joblib.dump(model.best_estimator_, "random_forest.pkl") save_result("Random Forest", acc, target, ", ".join(features_used), { "Precision": prec, "Recall": rec, "F1Score": f1, "BestParams": str(model.best_params_), "OutliersClipped": len(pp_info["outliers_clipped"]), "LowVarRemoved": str(pp_info["low_var_removed"]), "HighCorrRemoved": str(pp_info["high_corr_removed"]), "ClassBalanced": pp_info["class_balanced"], }) # SVM elif model_choice == "SVM": X, y, transformed, pp_info = preprocess_for_model(df, target) features_used = pp_info["features_used"] result_box = st.container() with result_box: st.markdown('

', unsafe_allow_html=True) st.markdown(f"### SVM Results (Target: {target})") X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42, stratify=y ) # RobustScaler for SVM (handles outliers better) sc = RobustScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test) cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42) model = GridSearchCV( SVC(), { "C":[0.1,1,10,100], "kernel":["rbf","linear","poly"], "gamma":["scale","auto"] }, cv=cv, n_jobs=-1 ) model.fit(X_train, y_train) pred = model.predict(X_test) acc = accuracy_score(y_test, pred)*100 prec = precision_score(y_test, pred, average="weighted", zero_division=0)*100 rec = recall_score(y_test, pred, average="weighted", zero_division=0)*100 f1 = f1_score(y_test, pred, average="weighted", zero_division=0)*100 st.success(f"Accuracy: {acc:.2f}%") st.info(f"Precision: {prec:.2f}% | Recall: {rec:.2f}% | F1: {f1:.2f}%") show_confusion(y_test, pred, "SVM Matrix") st.write("**Classification Report:**") st.text(classification_report(y_test, pred, zero_division=0)) st.markdown('

', unsafe_allow_html=True) joblib.dump(model.best_estimator_, "svm.pkl") save_result("SVM", acc, target, ", ".join(features_used), { "Precision": prec, "Recall": rec, "F1Score": f1, "BestParams": str(model.best_params_), "OutliersClipped": len(pp_info["outliers_clipped"]), "LowVarRemoved": str(pp_info["low_var_removed"]), "HighCorrRemoved": str(pp_info["high_corr_removed"]), "ClassBalanced": pp_info["class_balanced"], }) # LOGISTIC elif model_choice == "Logistic Regression": X, y, transformed, pp_info = preprocess_for_model(df, target) features_used = pp_info["features_used"] result_box = st.container() with result_box: st.markdown('

', unsafe_allow_html=True) st.markdown(f"### Logistic Regression Results (Target: {target})") X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42, stratify=y ) sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test) cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42) model = GridSearchCV( LogisticRegression(max_iter=5000, solver="liblinear"), { "C":[0.01,0.1,1,10,100], "penalty":["l1","l2"] }, cv=cv, n_jobs=-1 ) model.fit(X_train, y_train) pred = model.predict(X_test) acc = accuracy_score(y_test, pred)*100 prec = precision_score(y_test, pred, average="weighted", zero_division=0)*100 rec = recall_score(y_test, pred, average="weighted", zero_division=0)*100 f1 = f1_score(y_test, pred, average="weighted", zero_division=0)*100 st.success(f"Accuracy: {acc:.2f}%") st.info(f"Precision: {prec:.2f}% | Recall: {rec:.2f}% | F1: {f1:.2f}%") show_confusion(y_test, pred, "Logistic Regression Matrix") # Show coefficient magnitudes for logistic regression if hasattr(model.best_estimator_, "coef_"): coef = pd.Series( np.abs(model.best_estimator_.coef_[0]), index=X.columns ).sort_values(ascending=False).head(8) compact_bar(coef.index, coef.values, "Feature Coefficients (Absolute)") st.write("**Classification Report:**") st.text(classification_report(y_test, pred, zero_division=0)) st.markdown('

', unsafe_allow_html=True) joblib.dump(model.best_estimator_, "logistic.pkl") save_result("Logistic Regression", acc, target, ", ".join(features_used), { "Precision": prec, "Recall": rec, "F1Score": f1, "BestParams": str(model.best_params_), "OutliersClipped": len(pp_info["outliers_clipped"]), "LowVarRemoved": str(pp_info["low_var_removed"]), "HighCorrRemoved": str(pp_info["high_corr_removed"]), "ClassBalanced": pp_info["class_balanced"], }) # DECISION TREE elif model_choice == "Decision Tree": X, y, transformed, pp_info = preprocess_for_model(df, target) features_used = pp_info["features_used"] result_box = st.container() with result_box: st.markdown('

', unsafe_allow_html=True) st.markdown(f"### Decision Tree Results (Target: {target})") X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42, stratify=y ) cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42) model = GridSearchCV( DecisionTreeClassifier(), { "max_depth":[3,5,10,15,None], "min_samples_split":[2,5,10], "min_samples_leaf":[1,2,4], "criterion":["gini","entropy"] }, cv=cv, n_jobs=-1 ) model.fit(X_train, y_train) pred = model.predict(X_test) acc = accuracy_score(y_test, pred)*100 prec = precision_score(y_test, pred, average="weighted", zero_division=0)*100 rec = recall_score(y_test, pred, average="weighted", zero_division=0)*100 f1 = f1_score(y_test, pred, average="weighted", zero_division=0)*100 st.success(f"Accuracy: {acc:.2f}%") st.info(f"Precision: {prec:.2f}% | Recall: {rec:.2f}% | F1: {f1:.2f}%") show_confusion(y_test, pred, "Decision Tree Matrix") # Feature importance for decision tree imp = pd.Series( model.best_estimator_.feature_importances_, index=X.columns ).sort_values(ascending=False).head(8) compact_bar(imp.index, imp.values, "Feature Importance") st.write("**Classification Report:**") st.text(classification_report(y_test, pred, zero_division=0)) st.markdown('

', unsafe_allow_html=True) joblib.dump(model.best_estimator_, "decision_tree.pkl") save_result("Decision Tree", acc, target, ", ".join(features_used), { "Precision": prec, "Recall": rec, "F1Score": f1, "BestParams": str(model.best_params_), "OutliersClipped": len(pp_info["outliers_clipped"]), "LowVarRemoved": str(pp_info["low_var_removed"]), "HighCorrRemoved": str(pp_info["high_corr_removed"]), "ClassBalanced": pp_info["class_balanced"], }) # KMEANS elif model_choice == "KMeans Clustering": temp = df.copy() for col in temp.columns: if temp[col].dtype == "object": le = LabelEncoder() temp[col] = le.fit_transform(temp[col].astype(str)) X = temp.drop(columns=[target]) # Clip outliers for clustering too temp_clipped, outlier_info = clip_outliers_iqr(temp) X_clipped = temp_clipped.drop(columns=[target]) sc = StandardScaler() Xs = sc.fit_transform(X_clipped) # Find optimal k using elbow method inertias = [] K_range = range(2, min(11, len(df) // 10 + 1)) for k in K_range: km = KMeans(n_clusters=k, random_state=42, n_init=10) km.fit(Xs) inertias.append(km.inertia_) best_k = 3 if len(inertias) >= 3: diffs = [inertias[i] - inertias[i+1] for i in range(len(inertias)-1)] if diffs: elbow_idx = np.argmax(diffs) + 1 best_k = list(K_range)[elbow_idx] if elbow_idx < len(list(K_range)) else 3 best_k = max(2, min(best_k, 10)) result_box = st.container() with result_box: st.markdown('

', unsafe_allow_html=True) st.markdown(f"### KMeans Clustering Results (Target: {target})") model = KMeans(n_clusters=best_k, random_state=42, n_init=10) cluster = model.fit_predict(Xs) score = silhouette_score(Xs, cluster)*100 st.success(f"Cluster Quality Score: {score:.2f}% (k={best_k})") fig, ax = plt.subplots(figsize=(6,4)) plt.scatter(Xs[:,0], Xs[:,1], c=cluster, cmap="viridis") plt.title(f"Clusters (k={best_k})") col1, col2, col3 = st.columns([1, 2, 1]) with col2: st.pyplot(fig) # Elbow plot fig2, ax2 = plt.subplots(figsize=(6,3)) plt.plot(list(K_range), inertias, "bo-") plt.xlabel("Number of Clusters (k)") plt.ylabel("Inertia") plt.title("Elbow Method") col1, col2, col3 = st.columns([1, 2, 1]) with col2: st.pyplot(fig2) # Cluster distribution cluster_counts = pd.Series(cluster).value_counts().sort_index() fig3, ax3 = plt.subplots(figsize=(6,3)) sns.barplot(x=cluster_counts.index, y=cluster_counts.values) plt.xlabel("Cluster") plt.ylabel("Count") plt.title("Cluster Distribution") col1, col2, col3 = st.columns([1, 2, 1]) with col2: st.pyplot(fig3) st.markdown('

', unsafe_allow_html=True) joblib.dump(model, "kmeans.pkl") save_result("KMeans Clustering", score, target, ", ".join(X_clipped.columns), { "BestK": best_k, "OutliersClipped": len(outlier_info), }) # ========================================================== # DOWNLOAD SECTION # ========================================================== if st.session_state.last_model_name: st.markdown('

⬇ Downloads

', unsafe_allow_html=True) file_map = { "Random Forest":"random_forest.pkl", "SVM":"svm.pkl", "Logistic Regression":"logistic.pkl", "Decision Tree":"decision_tree.pkl", "KMeans Clustering":"kmeans.pkl" } current = file_map[st.session_state.last_model_name] if os.path.exists(current): with open(current, "rb") as f: st.download_button( label=f"Download {st.session_state.last_model_name} (Deploy Ready)", data=f, file_name=current, mime="application/octet-stream" ) # ========================================================== # HISTORY + REPORTS # ========================================================== if len(st.session_state.history) > 0: st.markdown('

📊 History

', unsafe_allow_html=True) hist = pd.DataFrame(st.session_state.history) st.dataframe(hist, use_container_width=True) fig, ax = plt.subplots(figsize=(6,3)) sns.barplot(data=hist, x="Model", y="Score") plt.xticks(rotation=20) plt.title("All Applied Models") col1, col2, col3 = st.columns([1, 2, 1]) with col2: st.pyplot(fig) # CSV csv_buffer = io.StringIO() hist.to_csv(csv_buffer, index=False) st.download_button( "Download Results CSV", csv_buffer.getvalue(), "results.csv" ) # TXT report if st.session_state.cleaned_df is not None and len(st.session_state.model_results) > 0: report_text = generate_text_report( st.session_state.cleaned_df, st.session_state.selected_target or "unknown", st.session_state.model_results ) st.download_button( "Download Full Report (TXT)", report_text, "full_report.txt", mime="text/plain" ) # XLSX report try: xlsx_data = generate_xlsx_report( st.session_state.cleaned_df, st.session_state.selected_target or "unknown", st.session_state.model_results ) st.download_button( "Download Full Report (XLSX)", data=xlsx_data.getvalue(), file_name="full_report.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ) except Exception: pass # ========================================================== # RESET # ========================================================== st.markdown('

♻ Reset

', unsafe_allow_html=True) if st.button("Clear History"): st.session_state.history = [] st.session_state.last_model_name = None st.session_state.last_score = None st.session_state.model_results = [] st.session_state.selected_target = None st.session_state.cleaned_df = None st.success("History Cleared")