Spaces:
Sleeping
Sleeping
File size: 5,964 Bytes
f982348 d99ae65 f982348 d99ae65 f982348 d99ae65 f982348 d99ae65 f982348 d99ae65 f982348 d99ae65 f982348 d99ae65 f982348 d99ae65 f982348 d99ae65 f982348 d99ae65 f982348 d99ae65 f982348 d99ae65 f982348 d99ae65 f982348 d99ae65 f982348 d99ae65 f982348 d99ae65 f982348 d99ae65 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import streamlit as st
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import roc_curve, roc_auc_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.datasets import make_classification
import joblib
# Generate sample data
def load_data():
# Create a synthetic dataset for classification with 1000 samples and 20 features
X, y = make_classification(n_samples=1000, n_features=20, random_state=42)
return X, y
# Train models
def train_models(X_train, y_train):
# Dictionary of models to train
models = {
'Logistic Regression': LogisticRegression(),
'Random Forest': RandomForestClassifier(),
'Gradient Boosting': GradientBoostingClassifier()
}
trained_models = {}
# Train each model using the training data
for name, model in models.items():
model.fit(X_train, y_train)
trained_models[name] = model # Store trained models in a dictionary
return trained_models
# Predict and evaluate
def evaluate_models(models, X_test, y_test):
results = {}
# Evaluate each model using the test data
for name, model in models.items():
y_pred = model.predict(X_test) # Predict class labels
y_prob = model.predict_proba(X_test)[:, 1] # Probability estimates for ROC
# Calculate accuracy and ROC AUC score
accuracy = model.score(X_test, y_test)
roc_auc = roc_auc_score(y_test, y_prob)
# Compute confusion matrix and classification report
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)
results[name] = {
'Accuracy': accuracy,
'ROC AUC': roc_auc,
'Confusion Matrix': conf_matrix,
'Classification Report': class_report
}
return results
# Streamlit app
def main():
st.title("Model Performance and Predictions")
# Load and split data into training and test sets
X, y = load_data()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train) # Scale training data
X_test_scaled = scaler.transform(X_test) # Scale test data
# Train models using scaled training data
models = train_models(X_train_scaled, y_train)
# Sidebar for model selection
st.sidebar.header("Model Selection")
model_names = list(models.keys())
selected_model_name = st.sidebar.selectbox("Select Model", model_names)
selected_model = models[selected_model_name]
# Evaluate selected model using test data
results = evaluate_models(models, X_test_scaled, y_test)
metrics = results[selected_model_name]
st.header(f"Model: {selected_model_name}")
st.subheader("Metrics")
st.write(f"**Accuracy:** {metrics['Accuracy']:.4f}")
st.write(f"**ROC AUC:** {metrics['ROC AUC']:.4f}")
st.write("**Confusion Matrix:**")
st.write(metrics['Confusion Matrix'])
st.write("**Classification Report:**")
st.text(metrics['Classification Report'])
st.subheader("ROC Curve")
plt.figure(figsize=(10, 7))
y_prob = selected_model.predict_proba(X_test_scaled)[:, 1]
fpr, tpr, _ = roc_curve(y_test, y_prob)
plt.plot(fpr, tpr, label=f'{selected_model_name} (AUC = {metrics["ROC AUC"]:.2f})')
plt.plot([0, 1], [0, 1], 'k--') # Diagonal line for random guessing
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
st.pyplot(plt)
st.subheader("Feature Importance")
if selected_model_name in ['Random Forest', 'Gradient Boosting']:
feature_importances = selected_model.feature_importances_
feature_names = [f'Feature {i}' for i in range(X_test_scaled.shape[1])]
importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': feature_importances})
importance_df = importance_df.sort_values(by='Importance', ascending=False)
fig, ax = plt.subplots(figsize=(10, 7))
sns.barplot(x='Importance', y='Feature', data=importance_df, ax=ax)
ax.set_title(f'Feature Importance - {selected_model_name}')
st.pyplot(fig)
st.subheader("Make Predictions")
input_data = st.text_input("Enter features separated by commas (e.g., 0.1, 0.2, ..., 0.5)")
if input_data:
try:
# Convert input data to numpy array and reshape
input_features = np.array([float(i) for i in input_data.split(',')]).reshape(1, -1)
# Check if the number of features matches the model's input
if input_features.shape[1] != X_train_scaled.shape[1]:
st.error(f"Number of features should be {X_train_scaled.shape[1]}.")
else:
# Transform input features using the same scaler
input_features_scaled = scaler.transform(input_features)
# Predict using the selected model
prediction = selected_model.predict(input_features_scaled)
prediction_proba = selected_model.predict_proba(input_features_scaled)[:, 1]
st.write(f"Prediction: {'Positive' if prediction[0] == 1 else 'Negative'}")
st.write(f"Probability of Positive: {prediction_proba[0]:.4f}")
except ValueError:
st.error("Please enter valid numerical values separated by commas.")
except Exception as e:
st.error(f"An error occurred: {e}")
if __name__ == "__main__":
main()
|