Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import numpy as np | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| from sklearn.metrics import roc_curve, roc_auc_score, confusion_matrix, classification_report | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import StandardScaler | |
| from sklearn.linear_model import LogisticRegression | |
| from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier | |
| from sklearn.datasets import make_classification | |
| import joblib | |
| # Generate sample data | |
| def load_data(): | |
| # Create a synthetic dataset for classification with 1000 samples and 20 features | |
| X, y = make_classification(n_samples=1000, n_features=20, random_state=42) | |
| return X, y | |
| # Train models | |
| def train_models(X_train, y_train): | |
| # Dictionary of models to train | |
| models = { | |
| 'Logistic Regression': LogisticRegression(), | |
| 'Random Forest': RandomForestClassifier(), | |
| 'Gradient Boosting': GradientBoostingClassifier() | |
| } | |
| trained_models = {} | |
| # Train each model using the training data | |
| for name, model in models.items(): | |
| model.fit(X_train, y_train) | |
| trained_models[name] = model # Store trained models in a dictionary | |
| return trained_models | |
| # Predict and evaluate | |
| def evaluate_models(models, X_test, y_test): | |
| results = {} | |
| # Evaluate each model using the test data | |
| for name, model in models.items(): | |
| y_pred = model.predict(X_test) # Predict class labels | |
| y_prob = model.predict_proba(X_test)[:, 1] # Probability estimates for ROC | |
| # Calculate accuracy and ROC AUC score | |
| accuracy = model.score(X_test, y_test) | |
| roc_auc = roc_auc_score(y_test, y_prob) | |
| # Compute confusion matrix and classification report | |
| conf_matrix = confusion_matrix(y_test, y_pred) | |
| class_report = classification_report(y_test, y_pred) | |
| results[name] = { | |
| 'Accuracy': accuracy, | |
| 'ROC AUC': roc_auc, | |
| 'Confusion Matrix': conf_matrix, | |
| 'Classification Report': class_report | |
| } | |
| return results | |
| # Streamlit app | |
| def main(): | |
| st.title("Model Performance and Predictions") | |
| # Load and split data into training and test sets | |
| X, y = load_data() | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
| scaler = StandardScaler() | |
| X_train_scaled = scaler.fit_transform(X_train) # Scale training data | |
| X_test_scaled = scaler.transform(X_test) # Scale test data | |
| # Train models using scaled training data | |
| models = train_models(X_train_scaled, y_train) | |
| # Sidebar for model selection | |
| st.sidebar.header("Model Selection") | |
| model_names = list(models.keys()) | |
| selected_model_name = st.sidebar.selectbox("Select Model", model_names) | |
| selected_model = models[selected_model_name] | |
| # Evaluate selected model using test data | |
| results = evaluate_models(models, X_test_scaled, y_test) | |
| metrics = results[selected_model_name] | |
| st.header(f"Model: {selected_model_name}") | |
| st.subheader("Metrics") | |
| st.write(f"**Accuracy:** {metrics['Accuracy']:.4f}") | |
| st.write(f"**ROC AUC:** {metrics['ROC AUC']:.4f}") | |
| st.write("**Confusion Matrix:**") | |
| st.write(metrics['Confusion Matrix']) | |
| st.write("**Classification Report:**") | |
| st.text(metrics['Classification Report']) | |
| st.subheader("ROC Curve") | |
| plt.figure(figsize=(10, 7)) | |
| y_prob = selected_model.predict_proba(X_test_scaled)[:, 1] | |
| fpr, tpr, _ = roc_curve(y_test, y_prob) | |
| plt.plot(fpr, tpr, label=f'{selected_model_name} (AUC = {metrics["ROC AUC"]:.2f})') | |
| plt.plot([0, 1], [0, 1], 'k--') # Diagonal line for random guessing | |
| plt.xlabel('False Positive Rate') | |
| plt.ylabel('True Positive Rate') | |
| plt.title('Receiver Operating Characteristic (ROC) Curve') | |
| plt.legend(loc='lower right') | |
| st.pyplot(plt) | |
| st.subheader("Feature Importance") | |
| if selected_model_name in ['Random Forest', 'Gradient Boosting']: | |
| feature_importances = selected_model.feature_importances_ | |
| feature_names = [f'Feature {i}' for i in range(X_test_scaled.shape[1])] | |
| importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': feature_importances}) | |
| importance_df = importance_df.sort_values(by='Importance', ascending=False) | |
| fig, ax = plt.subplots(figsize=(10, 7)) | |
| sns.barplot(x='Importance', y='Feature', data=importance_df, ax=ax) | |
| ax.set_title(f'Feature Importance - {selected_model_name}') | |
| st.pyplot(fig) | |
| st.subheader("Make Predictions") | |
| input_data = st.text_input("Enter features separated by commas (e.g., 0.1, 0.2, ..., 0.5)") | |
| if input_data: | |
| try: | |
| # Convert input data to numpy array and reshape | |
| input_features = np.array([float(i) for i in input_data.split(',')]).reshape(1, -1) | |
| # Check if the number of features matches the model's input | |
| if input_features.shape[1] != X_train_scaled.shape[1]: | |
| st.error(f"Number of features should be {X_train_scaled.shape[1]}.") | |
| else: | |
| # Transform input features using the same scaler | |
| input_features_scaled = scaler.transform(input_features) | |
| # Predict using the selected model | |
| prediction = selected_model.predict(input_features_scaled) | |
| prediction_proba = selected_model.predict_proba(input_features_scaled)[:, 1] | |
| st.write(f"Prediction: {'Positive' if prediction[0] == 1 else 'Negative'}") | |
| st.write(f"Probability of Positive: {prediction_proba[0]:.4f}") | |
| except ValueError: | |
| st.error("Please enter valid numerical values separated by commas.") | |
| except Exception as e: | |
| st.error(f"An error occurred: {e}") | |
| if __name__ == "__main__": | |
| main() | |