Instructions to use GouravSinghThakur/AutoML with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Scikit-learn
How to use GouravSinghThakur/AutoML with Scikit-learn:
from huggingface_hub import hf_hub_download import joblib model = joblib.load( hf_hub_download("GouravSinghThakur/AutoML", "sklearn_model.joblib") ) # only load pickle files from sources you trust # read more about it here https://skops.readthedocs.io/en/stable/persistence.html - Notebooks
- Google Colab
- Kaggle
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| from sklearn.model_selection import train_test_split,cross_val_score,GridSearchCV | |
| from sklearn.preprocessing import StandardScaler, LabelEncoder | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.linear_model import LogisticRegression | |
| from sklearn.svm import SVC | |
| from xgboost import XGBClassifier | |
| from sklearn.pipeline import Pipeline | |
| from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, roc_curve, auc,classification_report | |
| from sklearn.impute import SimpleImputer | |
| import openpyxl | |
| import optuna | |
| import joblib | |
| import plotly.express as px | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| st.set_page_config(page_title="ML Model Deployment", layout="wide") | |
| def load_data(file): | |
| try: | |
| if file.name.endswith('.csv'): | |
| data = pd.read_csv(file) | |
| elif file.name.endswith(('.xls', '.xlsx')): | |
| data = pd.read_excel(file) | |
| return data | |
| except Exception as e: | |
| st.error(f"Error loading file: {e}") | |
| return None | |
| def auto_process_data(data): | |
| processed_data = data.copy() | |
| label_encoders = {} | |
| if processed_data.isnull().sum().sum() > 0: | |
| st.info("Automatically handling missing values...") | |
| num_cols = processed_data.select_dtypes(include=['int64', 'float64']).columns | |
| if len(num_cols) > 0: | |
| num_imputer = SimpleImputer(strategy='median') | |
| processed_data[num_cols] = num_imputer.fit_transform(processed_data[num_cols]) | |
| cat_cols = processed_data.select_dtypes(include=['object']).columns | |
| if len(cat_cols) > 0: | |
| for col in cat_cols: | |
| if processed_data[col].isnull().any(): | |
| most_frequent = processed_data[col].mode()[0] | |
| processed_data[col].fillna(most_frequent, inplace=True) | |
| for column in processed_data.select_dtypes(include=['object']): | |
| label_encoders[column] = LabelEncoder() | |
| processed_data[column] = label_encoders[column].fit_transform(processed_data[column].astype(str)) | |
| return processed_data, label_encoders | |
| def get_model_configs(): | |
| models = { | |
| 'Logistic Regression': { | |
| 'pipeline': Pipeline([ | |
| ('scaler', StandardScaler()), | |
| ('classifier', LogisticRegression()) | |
| ]), | |
| 'params': { | |
| 'classifier__penalty':['l1','l2'], | |
| 'classifier__C':[0.01,0.1,1], | |
| 'classifier__max_iter': [100, 200], | |
| 'classifier__solver':['liblinear','saga'] | |
| } | |
| }, | |
| 'Support Vector Machine': { | |
| 'pipeline': Pipeline([ | |
| ('scaler', StandardScaler()), | |
| ('classifier', SVC(probability=True)) | |
| ]), | |
| 'params': { | |
| 'classifier__C': [0.001, 0.1, 1], | |
| 'classifier__kernel': ['linear', 'rbf', 'sigmoid'], | |
| 'classifier__gamma': ['scale', 'auto', 0.01, 0.1, 1], | |
| 'classifier__max_iter':[100,200] | |
| } | |
| }, | |
| 'Random Forest': { | |
| 'pipeline': Pipeline([ | |
| ('scaler', StandardScaler()), | |
| ('classifier', RandomForestClassifier()) | |
| ]), | |
| 'params': { | |
| 'classifier__n_estimators':[100,200], | |
| 'classifier__max_depth': [None, 10, 20], | |
| 'classifier__min_samples_split': [2,5,10], | |
| 'classifier__min_samples_leaf':[1,2,4], | |
| } | |
| }, | |
| 'XgBoost':{ | |
| 'pipeline':Pipeline([ | |
| ('scaled',StandardScaler()), | |
| ('classifier',XGBClassifier(use_label_encoder=False,eval_metric='logloss')) | |
| ]), | |
| 'params':{ | |
| 'classifier__n_estimators': [100, 200], | |
| 'classifier__learning_rate': [0.01, 0.05, 0.1], | |
| 'classifier__max_depth': [3, 5, 7], | |
| 'classifier__min_child_weight': [1, 3, 5], | |
| 'classifier__subsample': [0.8, 1.0] | |
| } | |
| } | |
| } | |
| return models | |
| def train_model(X_train, y_train, selected_model, progress_bar=None): | |
| models = get_model_configs() | |
| model_config = models[selected_model] | |
| with st.spinner(f"Training {selected_model}..."): | |
| grid_search = GridSearchCV( | |
| estimator=model_config['pipeline'], | |
| param_grid=model_config['params'], | |
| cv=5, | |
| n_jobs=-1, | |
| verbose=0, | |
| scoring="accuracy" | |
| ) | |
| grid_search.fit(X_train, y_train) | |
| if progress_bar: | |
| progress_bar.progress(1.0) | |
| return grid_search.best_estimator_, grid_search.best_score_ | |
| def objective(trial, X_train, y_train, model_name): | |
| models = get_model_configs() | |
| model_config = models[model_name] | |
| dataset_size = len(X_train) | |
| cv_folds = 5 if dataset_size > 1000 else (3 if dataset_size > 500 else min(2, dataset_size)) | |
| params = {} | |
| if model_name == 'Logistic Regression': | |
| params = { | |
| 'classifier__penalty': trial.suggest_categorical('classifier__penalty', ['l1', 'l2']), | |
| 'classifier__C': trial.suggest_float('classifier__C', 0.01, 1.0, log=True), | |
| 'classifier__solver': trial.suggest_categorical('classifier__solver', ['liblinear', 'saga']), | |
| 'classifier__max_iter': trial.suggest_int('classifier__max_iter', 100, 200) | |
| } | |
| elif model_name == 'Support Vector Machine': | |
| params = { | |
| 'classifier__C': trial.suggest_float('classifier__C', 0.001, 1.0, log=True), | |
| 'classifier__kernel': trial.suggest_categorical('classifier__kernel', ['linear', 'rbf', 'sigmoid']), | |
| 'classifier__gamma': trial.suggest_categorical('classifier__gamma', ['scale', 'auto', 0.01, 0.1, 1]), | |
| 'classifier__max_iter': trial.suggest_int('classifier__max_iter', 100, 200) | |
| } | |
| elif model_name == 'Random Forest': | |
| params = { | |
| 'classifier__n_estimators': trial.suggest_int('classifier__n_estimators', 100, 200), | |
| 'classifier__max_depth': trial.suggest_categorical('classifier__max_depth', [None, 10, 20]), | |
| 'classifier__min_samples_split': trial.suggest_int('classifier__min_samples_split', 2, 10), | |
| 'classifier__min_samples_leaf': trial.suggest_int('classifier__min_samples_leaf', 1, 4) | |
| } | |
| elif model_name == 'XGBoost': | |
| params = { | |
| 'classifier__n_estimators': trial.suggest_int('classifier__n_estimators', 100, 300), | |
| 'classifier__learning_rate': trial.suggest_float('classifier__learning_rate', 0.01, 0.2, log=True), | |
| 'classifier__max_depth': trial.suggest_int('classifier__max_depth', 3, 10), | |
| 'classifier__min_child_weight': trial.suggest_int('classifier__min_child_weight', 1, 6) | |
| } | |
| pipeline = model_config['pipeline'].set_params(**params) | |
| pipeline.fit(X_train, y_train) | |
| score = cross_val_score(pipeline, X_train, y_train, cv=cv_folds, scoring="accuracy").mean() | |
| return score | |
| def auto_train(X_train, y_train, X_test, y_test): | |
| models = get_model_configs() | |
| results = {} | |
| best_score = 0 | |
| best_model = None | |
| best_model_name = None | |
| st.write("🔄 Training models with Optuna hyperparameter tuning...") | |
| progress_cols = st.columns(len(models)) | |
| progress_bars = {model_name: progress_cols[i].progress(0.0) for i, model_name in enumerate(models)} | |
| for model_name in models.keys(): | |
| st.write(f"🛠 Training {model_name}...") | |
| # Run Optuna optimization | |
| study = optuna.create_study(direction='maximize') | |
| study.optimize(lambda trial: objective(trial, X_train, y_train, model_name), n_trials=20) | |
| # Retrieve best parameters and train model | |
| best_params = study.best_params | |
| pipeline = models[model_name]['pipeline'].set_params(**best_params) | |
| pipeline.fit(X_train, y_train) | |
| # Evaluate model | |
| y_pred = pipeline.predict(X_test) | |
| test_accuracy = accuracy_score(y_test, y_pred) | |
| results[model_name] = { | |
| 'model': pipeline, | |
| 'cv_score': study.best_value, | |
| 'test_accuracy': test_accuracy | |
| } | |
| progress_bars[model_name].progress(1.0) | |
| # Track best model | |
| if test_accuracy > best_score: | |
| best_score = test_accuracy | |
| best_model = pipeline | |
| best_model_name = model_name | |
| # Display results | |
| results_df = pd.DataFrame({ | |
| 'Model': list(results.keys()), | |
| 'Cross-Validation Score': [results[model]['cv_score'] for model in results], | |
| 'Test Accuracy': [results[model]['test_accuracy'] for model in results] | |
| }).sort_values('Test Accuracy', ascending=False) | |
| st.subheader("📊 Model Performance Comparison") | |
| st.dataframe(results_df) | |
| st.success(f"🏆 Best model: **{best_model_name}** with accuracy: **{best_score:.2%}**") | |
| return best_model, best_model_name | |
| def get_classification_report(y_true, y_pred): | |
| report_dict = classification_report(y_true, y_pred, output_dict=True) | |
| df = pd.DataFrame(report_dict).transpose() | |
| return df | |
| def evaluate_models(X_train, X_test, y_train, y_test): | |
| models =get_model_configs() | |
| results = {} | |
| plt.figure(figsize=(10, 6)) | |
| for name, model in models.items(): | |
| model.fit(X_train, y_train) | |
| y_pred = model.predict(X_test) | |
| y_prob = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else None | |
| accuracy = accuracy_score(y_test, y_pred) | |
| precision = precision_score(y_test, y_pred, average='binary') | |
| recall = recall_score(y_test, y_pred, average='binary') | |
| f1 = f1_score(y_test, y_pred, average='binary') | |
| roc_auc = roc_auc_score(y_test, y_prob) if y_prob is not None else None | |
| results[name] = { | |
| "Accuracy": accuracy, | |
| "Precision": precision, | |
| "Recall": recall, | |
| "F1-score": f1, | |
| "ROC-AUC": roc_auc | |
| } | |
| if y_prob is not None: | |
| fpr, tpr, _ = roc_curve(y_test, y_prob) | |
| plt.plot(fpr, tpr, label=f"{name} (AUC = {roc_auc:.2f})") | |
| plt.plot([0, 1], [0, 1], linestyle="--", color="gray") | |
| plt.xlabel("False Positive Rate") | |
| plt.ylabel("True Positive Rate") | |
| plt.title("ROC Curves") | |
| plt.legend() | |
| plt.show() | |
| fig, axes = plt.subplots(2, 2, figsize=(12, 10)) | |
| for ax, (name, model) in zip(axes.ravel(), models.items()): | |
| y_pred = model.predict(X_test) | |
| cm = confusion_matrix(y_test, y_pred) | |
| sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", ax=ax) | |
| ax.set_title(f"{name} - Confusion Matrix") | |
| ax.set_xlabel("Predicted Label") | |
| ax.set_ylabel("True Label") | |
| plt.tight_layout() | |
| plt.show() | |
| results_df = pd.DataFrame(results).T | |
| results_df.plot(kind="bar", figsize=(10, 6)) | |
| plt.title("Model Comparison") | |
| plt.ylabel("Score") | |
| plt.xticks(rotation=45) | |
| plt.legend(title="Metrics") | |
| plt.show() | |
| return results_df | |
| def main(): | |
| st.title("🤖 Machine Learning Model Deployment") | |
| st.sidebar.header("Navigation") | |
| page = st.sidebar.radio("Go to", ["Home","Data Upload & Analysis", "Model Training","Visualisation", "Prediction"]) | |
| if 'data' not in st.session_state: | |
| st.session_state.data = None | |
| if 'processed_data' not in st.session_state: | |
| st.session_state.processed_data = None | |
| if 'label_encoders' not in st.session_state: | |
| st.session_state.label_encoders = None | |
| if 'model' not in st.session_state: | |
| st.session_state.model = None | |
| if 'features' not in st.session_state: | |
| st.session_state.features = None | |
| if 'target' not in st.session_state: | |
| st.session_state.target = None | |
| if 'model_name' not in st.session_state: | |
| st.session_state.model_name = None | |
| if page=="Home": | |
| st.title("🚀 AutoML: Effortless Machine Learning") | |
| st.markdown( | |
| """ | |
| Welcome to **AutoML**, a powerful yet easy-to-use tool that automates the process of building and evaluating | |
| machine learning models. Whether you're a beginner exploring data or an expert looking for quick model deployment, | |
| AutoML simplifies the entire workflow. | |
| """ | |
| ) | |
| st.header("🔹 Features") | |
| st.markdown( | |
| """ | |
| - **Automated Model Selection** – Let AutoML pick the best algorithm for your data. | |
| - **Hyperparameter Tuning** – Optimize model performance without manual tweaking. | |
| - **Data Preprocessing** – Handle missing values, scaling, encoding, and feature engineering. | |
| - **Performance Evaluation** – Compare models with key metrics and visualizations. | |
| - **Model Export** – Save trained models for deployment. | |
| """ | |
| ) | |
| st.header("🚀 Get Started") | |
| st.markdown( | |
| """ | |
| 1. **Upload your dataset** – Provide a CSV or Excel file with your data. | |
| 2. **Select your target variable** – Choose the column to predict. | |
| 3. **Let AutoML do the magic!** – Sit back and watch the automation work. | |
| """ | |
| ) | |
| st.header("📊 Visual Insights") | |
| st.markdown( | |
| """ | |
| Explore interactive charts and performance metrics to make informed decisions. | |
| Use visualizations to compare model accuracy, precision, recall, and other key statistics. | |
| """ | |
| ) | |
| st.success("Start automating your ML workflows now! 🎯") | |
| st.write('''Developed By Gourav Singh,Ankit Yadav,Pushpansh''') | |
| if page == "Data Upload & Analysis": | |
| st.header("📊 Data Upload & Analysis") | |
| uploaded_file = st.file_uploader("Upload your dataset (CSV or Excel)", type=['csv', 'xlsx', 'xls']) | |
| if uploaded_file is not None: | |
| st.session_state.data = load_data(uploaded_file) | |
| if st.session_state.data is not None: | |
| st.session_state.processed_data, st.session_state.label_encoders = auto_process_data(st.session_state.data) | |
| st.success("Data loaded and automatically processed!") | |
| st.subheader("Dataset Overview") | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.info(f"Number of rows: {st.session_state.data.shape[0]}") | |
| with col2: | |
| st.info(f"Number of columns: {st.session_state.data.shape[1]}") | |
| with col3: | |
| missing_values = st.session_state.data.isnull().sum().sum() | |
| st.info(f"Missing values: {missing_values} (Automatically handled)") | |
| st.subheader("Original Data Preview") | |
| st.dataframe(st.session_state.data.head()) | |
| st.subheader("Processed Data Preview") | |
| st.dataframe(st.session_state.processed_data.head()) | |
| st.subheader("Statistical Description") | |
| st.dataframe(st.session_state.processed_data.describe()) | |
| st.subheader("Correlation Heatmap") | |
| fig, ax = plt.subplots(figsize=(10, 6)) | |
| sns.heatmap(st.session_state.processed_data.corr(), annot=True, cmap='coolwarm', ax=ax) | |
| st.pyplot(fig) | |
| elif page == "Model Training": | |
| st.header("🎯 Auto Model Training") | |
| if st.session_state.processed_data is None: | |
| st.warning("Please upload and process your data first!") | |
| return | |
| st.subheader("Select Features and Target") | |
| columns = st.session_state.processed_data.columns.tolist() | |
| st.session_state.features = st.multiselect("Select features", columns, default=columns[:-1]) | |
| st.session_state.target = st.selectbox("Select target variable", columns) | |
| if st.button("Auto Train Models"): | |
| if len(st.session_state.features) > 0 and st.session_state.target: | |
| X = st.session_state.processed_data[st.session_state.features] | |
| y = st.session_state.processed_data[st.session_state.target] | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
| st.session_state.model, st.session_state.model_name = auto_train(X_train, y_train, X_test, y_test) | |
| y_pred = st.session_state.model.predict(X_test) | |
| st.subheader("Best Model Performance") | |
| accuracy = accuracy_score(y_test, y_pred) | |
| st.metric("Accuracy", f"{accuracy:.2%}") | |
| st.text("Classification Report:") | |
| df_report = get_classification_report(y_test, y_pred) | |
| st.dataframe(df_report) | |
| if st.session_state.model_name == "Random Forest": | |
| st.subheader("Feature Importance") | |
| importance_df = pd.DataFrame({ | |
| 'Feature': st.session_state.features, | |
| 'Importance': st.session_state.model.named_steps['classifier'].feature_importances_ | |
| }).sort_values('Importance', ascending=False) | |
| fig = px.bar(importance_df, x='Feature', y='Importance', | |
| title='Feature Importance Plot') | |
| st.plotly_chart(fig) | |
| model_data = { | |
| 'model': st.session_state.model, | |
| 'model_name': st.session_state.model_name, | |
| 'label_encoders': st.session_state.label_encoders, | |
| 'features': st.session_state.features, | |
| 'target': st.session_state.target | |
| } | |
| joblib.dump(model_data, 'model_data.joblib') | |
| st.download_button( | |
| label="Download trained model", | |
| data=open('model_data.joblib', 'rb'), | |
| file_name='model_data.joblib', | |
| mime='application/octet-stream' | |
| ) | |
| elif page=="Visualisation": | |
| st.header("Model Visualisation") | |
| if st.session_state.model is None: | |
| st.warning("Please train a model first!") | |
| return | |
| if st.session_state.processed_data is not None and st.session_state.features and st.session_state.target: | |
| X = st.session_state.processed_data[st.session_state.features] | |
| y = st.session_state.processed_data[st.session_state.target] | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
| # Create visualization options | |
| viz_option = st.selectbox( | |
| "Select visualization type", | |
| ["Model Comparison", "ROC Curves", "Confusion Matrix"] | |
| ) | |
| if viz_option == "Model Comparison": | |
| st.subheader("Model Performance Metrics") | |
| # Train all models to compare | |
| models = get_model_configs() | |
| results = {} | |
| progress_bar = st.progress(0) | |
| progress_text = st.empty() | |
| for i, (name, model_config) in enumerate(models.items()): | |
| progress_text.text(f"Training {name}...") | |
| pipeline = model_config['pipeline'] | |
| pipeline.fit(X_train, y_train) | |
| y_pred = pipeline.predict(X_test) | |
| y_prob = pipeline.predict_proba(X_test)[:, 1] if hasattr(pipeline, "predict_proba") else None | |
| accuracy = accuracy_score(y_test, y_pred) | |
| precision = precision_score(y_test, y_pred, average='binary') | |
| recall = recall_score(y_test, y_pred, average='binary') | |
| f1 = f1_score(y_test, y_pred, average='binary') | |
| roc_auc = roc_auc_score(y_test, y_prob) if y_prob is not None else None | |
| results[name] = { | |
| "Accuracy": accuracy, | |
| "Precision": precision, | |
| "Recall": recall, | |
| "F1-score": f1, | |
| "ROC-AUC": roc_auc | |
| } | |
| progress_bar.progress((i + 1) / len(models)) | |
| progress_text.empty() | |
| results_df = pd.DataFrame(results).T | |
| st.dataframe(results_df) | |
| fig = px.bar( | |
| results_df.reset_index().melt(id_vars='index', var_name='Metric', value_name='Score'), | |
| x='index', y='Score', color='Metric', | |
| barmode='group', | |
| title='Model Comparison', | |
| labels={'index': 'Model'} | |
| ) | |
| st.plotly_chart(fig) | |
| elif viz_option == "ROC Curves": | |
| st.subheader("ROC Curves") | |
| models = get_model_configs() | |
| fig = plt.figure(figsize=(10, 6)) | |
| for name, model_config in models.items(): | |
| pipeline = model_config['pipeline'] | |
| pipeline.fit(X_train, y_train) | |
| if hasattr(pipeline, "predict_proba"): | |
| y_prob = pipeline.predict_proba(X_test)[:, 1] | |
| fpr, tpr, _ = roc_curve(y_test, y_prob) | |
| roc_auc = auc(fpr, tpr) | |
| plt.plot(fpr, tpr, lw=2, label=f'{name} (AUC = {roc_auc:.2f})') | |
| plt.plot([0, 1], [0, 1], color='gray', lw=2, linestyle='--') | |
| plt.xlim([0.0, 1.0]) | |
| plt.ylim([0.0, 1.05]) | |
| plt.xlabel('False Positive Rate') | |
| plt.ylabel('True Positive Rate') | |
| plt.title('Receiver Operating Characteristic (ROC) Curves') | |
| plt.legend(loc="lower right") | |
| st.pyplot(fig) | |
| elif viz_option == "Confusion Matrix": | |
| st.subheader("Confusion Matrices") | |
| models = get_model_configs() | |
| if len(models) > 4: | |
| st.warning("Showing confusion matrices for the first 4 models") | |
| model_items = list(models.items())[:4] | |
| else: | |
| model_items = list(models.items()) | |
| num_models = len(model_items) | |
| cols = 2 | |
| rows = (num_models + 1) // 2 | |
| fig, axes = plt.subplots(rows, cols, figsize=(12, 10)) | |
| axes = axes.flatten() if num_models > 1 else [axes] | |
| for i, (name, model_config) in enumerate(model_items): | |
| pipeline = model_config['pipeline'] | |
| pipeline.fit(X_train, y_train) | |
| y_pred = pipeline.predict(X_test) | |
| cm = confusion_matrix(y_test, y_pred) | |
| sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", ax=axes[i]) | |
| axes[i].set_title(f"{name} - Confusion Matrix") | |
| axes[i].set_xlabel("Predicted") | |
| axes[i].set_ylabel("Actual") | |
| for j in range(num_models, len(axes)): | |
| fig.delaxes(axes[j]) | |
| plt.tight_layout() | |
| st.pyplot(fig) | |
| st.subheader("Current Model Performance") | |
| best_model_pred = st.session_state.model.predict(X_test) | |
| st.metric("Accuracy", f"{accuracy_score(y_test, best_model_pred):.2%}") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.metric("Precision", f"{precision_score(y_test, best_model_pred):.2%}") | |
| st.metric("F1 Score", f"{f1_score(y_test, best_model_pred):.2%}") | |
| with col2: | |
| st.metric("Recall", f"{recall_score(y_test, best_model_pred):.2%}") | |
| if hasattr(st.session_state.model, "predict_proba"): | |
| best_proba = st.session_state.model.predict_proba(X_test)[:, 1] | |
| st.metric("AUC", f"{roc_auc_score(y_test, best_proba):.2%}") | |
| else: | |
| st.warning("Please load and preprocess your dataset before running evaluation.") | |
| elif page == "Prediction": | |
| st.header("🎲 Make Predictions") | |
| if st.session_state.model is None: | |
| st.warning("Please train a model first!") | |
| return | |
| st.subheader("Enter Feature Values") | |
| st.info(f"Using best model: {st.session_state.model_name}") | |
| input_data = {} | |
| for feature in st.session_state.features: | |
| if feature in st.session_state.label_encoders: | |
| options = st.session_state.label_encoders[feature].classes_ | |
| value = st.selectbox(f"Select {feature}", options) | |
| input_data[feature] = st.session_state.label_encoders[feature].transform([value])[0] | |
| else: | |
| input_data[feature] = st.number_input(f"Enter value for {feature}", value=0.0) | |
| if st.button("Predict"): | |
| input_df = pd.DataFrame([input_data]) | |
| prediction = st.session_state.model.predict(input_df) | |
| if st.session_state.target in st.session_state.label_encoders: | |
| original_prediction = st.session_state.label_encoders[st.session_state.target].inverse_transform(prediction) | |
| st.success(f"Predicted {st.session_state.target}: {original_prediction[0]}") | |
| else: | |
| st.success(f"Predicted {st.session_state.target}: {prediction[0]}") | |
| proba = st.session_state.model.predict_proba(input_df) | |
| st.subheader("Prediction Probability") | |
| if st.session_state.target in st.session_state.label_encoders: | |
| classes = st.session_state.label_encoders[st.session_state.target].classes_ | |
| else: | |
| classes = st.session_state.model.classes_ | |
| proba_df = pd.DataFrame( | |
| proba, | |
| columns=classes | |
| ) | |
| st.dataframe(proba_df) | |
| if __name__ == "__main__": | |
| main() | |