marianeft commited on
Commit
f982348
·
1 Parent(s): e3c1e47

Initial commit

Browse files
Files changed (2) hide show
  1. app.py +139 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import pandas as pd
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ from sklearn.metrics import roc_curve, roc_auc_score, confusion_matrix, classification_report
7
+ from sklearn.model_selection import train_test_split
8
+ from sklearn.preprocessing import StandardScaler
9
+ from sklearn.linear_model import LogisticRegression
10
+ from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
11
+ from sklearn.datasets import make_classification
12
+ import joblib
13
+
14
+ # Generate sample data
15
+ def load_data():
16
+ X, y = make_classification(n_samples=1000, n_features=20, random_state=42)
17
+ return X, y
18
+
19
+ # Train models
20
+ def train_models(X_train, y_train):
21
+ models = {
22
+ 'Logistic Regression': LogisticRegression(),
23
+ 'Random Forest': RandomForestClassifier(),
24
+ 'Gradient Boosting': GradientBoostingClassifier()
25
+ }
26
+
27
+ trained_models = {}
28
+ for name, model in models.items():
29
+ model.fit(X_train, y_train)
30
+ trained_models[name] = model
31
+ return trained_models
32
+
33
+ # Predict and evaluate
34
+ def evaluate_models(models, X_test, y_test):
35
+ results = {}
36
+ for name, model in models.items():
37
+ y_pred = model.predict(X_test)
38
+ y_prob = model.predict_proba(X_test)[:, 1] # Probability estimates for ROC
39
+
40
+ accuracy = model.score(X_test, y_test)
41
+ roc_auc = roc_auc_score(y_test, y_prob)
42
+ conf_matrix = confusion_matrix(y_test, y_pred)
43
+ class_report = classification_report(y_test, y_pred)
44
+
45
+ results[name] = {
46
+ 'Accuracy': accuracy,
47
+ 'ROC AUC': roc_auc,
48
+ 'Confusion Matrix': conf_matrix,
49
+ 'Classification Report': class_report
50
+ }
51
+ return results
52
+
53
+ # Streamlit app
54
+ def main():
55
+ st.title("Model Performance and Predictions")
56
+
57
+ # Load and split data
58
+ X, y = load_data()
59
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
60
+ scaler = StandardScaler()
61
+ X_train_scaled = scaler.fit_transform(X_train)
62
+ X_test_scaled = scaler.transform(X_test)
63
+
64
+ # Train models
65
+ models = train_models(X_train_scaled, y_train)
66
+
67
+ # Model selection
68
+ st.sidebar.header("Model Selection")
69
+ model_names = list(models.keys())
70
+ selected_model_name = st.sidebar.selectbox("Select Model", model_names)
71
+ selected_model = models[selected_model_name]
72
+
73
+ # Evaluate selected model
74
+ results = evaluate_models(models, X_test_scaled, y_test)
75
+ metrics = results[selected_model_name]
76
+
77
+ st.header(f"Model: {selected_model_name}")
78
+
79
+ st.subheader("Metrics")
80
+ st.write(f"**Accuracy:** {metrics['Accuracy']:.4f}")
81
+ st.write(f"**ROC AUC:** {metrics['ROC AUC']:.4f}")
82
+
83
+ st.write("**Confusion Matrix:**")
84
+ st.write(metrics['Confusion Matrix'])
85
+
86
+ st.write("**Classification Report:**")
87
+ st.text(metrics['Classification Report'])
88
+
89
+ st.subheader("ROC Curve")
90
+ plt.figure(figsize=(10, 7))
91
+ y_prob = selected_model.predict_proba(X_test_scaled)[:, 1]
92
+ fpr, tpr, _ = roc_curve(y_test, y_prob)
93
+ plt.plot(fpr, tpr, label=f'{selected_model_name} (AUC = {metrics["ROC AUC"]:.2f})')
94
+ plt.plot([0, 1], [0, 1], 'k--')
95
+ plt.xlabel('False Positive Rate')
96
+ plt.ylabel('True Positive Rate')
97
+ plt.title('Receiver Operating Characteristic (ROC) Curve')
98
+ plt.legend(loc='lower right')
99
+ st.pyplot(plt)
100
+
101
+ st.subheader("Feature Importance")
102
+ if selected_model_name in ['Random Forest', 'Gradient Boosting']:
103
+ feature_importances = selected_model.feature_importances_
104
+ feature_names = [f'Feature {i}' for i in range(X_test_scaled.shape[1])]
105
+ importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': feature_importances})
106
+ importance_df = importance_df.sort_values(by='Importance', ascending=False)
107
+
108
+ fig, ax = plt.subplots(figsize=(10, 7))
109
+ sns.barplot(x='Importance', y='Feature', data=importance_df, ax=ax)
110
+ ax.set_title(f'Feature Importance - {selected_model_name}')
111
+ st.pyplot(fig)
112
+
113
+ st.subheader("Make Predictions")
114
+ input_data = st.text_input("Enter features separated by commas (e.g., 0.1, 0.2, ..., 0.5)")
115
+ if input_data:
116
+ try:
117
+ # Convert input data to numpy array and reshape
118
+ input_features = np.array([float(i) for i in input_data.split(',')]).reshape(1, -1)
119
+
120
+ # Check if the number of features matches the model's input
121
+ if input_features.shape[1] != X_train_scaled.shape[1]:
122
+ st.error(f"Number of features should be {X_train_scaled.shape[1]}.")
123
+ else:
124
+ # Transform input features using the same scaler
125
+ input_features_scaled = scaler.transform(input_features)
126
+
127
+ # Predict using the selected model
128
+ prediction = selected_model.predict(input_features_scaled)
129
+ prediction_proba = selected_model.predict_proba(input_features_scaled)[:, 1]
130
+ st.write(f"Prediction: {'Positive' if prediction[0] == 1 else 'Negative'}")
131
+ st.write(f"Probability of Positive: {prediction_proba[0]:.4f}")
132
+
133
+ except ValueError:
134
+ st.error("Please enter valid numerical values separated by commas.")
135
+ except Exception as e:
136
+ st.error(f"An error occurred: {e}")
137
+
138
+ if __name__ == "__main__":
139
+ main()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Flask==2.2.2
2
+ numpy==1.23.5
3
+ scikit-learn
4
+ joblib==1.3.0
5
+ mlflow==2.2.2
6
+ seaborn
7
+ streamlit