marianeft commited on
Commit
d99ae65
·
verified ·
1 Parent(s): 8bc7c7b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -36
app.py CHANGED
@@ -13,11 +13,13 @@ import joblib
13
 
14
  # Generate sample data
15
  def load_data():
 
16
  X, y = make_classification(n_samples=1000, n_features=20, random_state=42)
17
  return X, y
18
 
19
  # Train models
20
  def train_models(X_train, y_train):
 
21
  models = {
22
  'Logistic Regression': LogisticRegression(),
23
  'Random Forest': RandomForestClassifier(),
@@ -25,20 +27,24 @@ def train_models(X_train, y_train):
25
  }
26
 
27
  trained_models = {}
 
28
  for name, model in models.items():
29
  model.fit(X_train, y_train)
30
- trained_models[name] = model
31
  return trained_models
32
 
33
  # Predict and evaluate
34
  def evaluate_models(models, X_test, y_test):
35
  results = {}
 
36
  for name, model in models.items():
37
- y_pred = model.predict(X_test)
38
  y_prob = model.predict_proba(X_test)[:, 1] # Probability estimates for ROC
39
 
 
40
  accuracy = model.score(X_test, y_test)
41
  roc_auc = roc_auc_score(y_test, y_prob)
 
42
  conf_matrix = confusion_matrix(y_test, y_pred)
43
  class_report = classification_report(y_test, y_pred)
44
 
@@ -52,50 +58,25 @@ def evaluate_models(models, X_test, y_test):
52
 
53
  # Streamlit app
54
  def main():
55
- st.title("Model Performance and Predictions")
56
 
57
- st.subheader("Make Predictions")
58
- input_data = st.text_input("Enter features separated by commas (e.g., 0.1, 0.2, ..., 0.5)")
59
- if input_data:
60
- try:
61
- # Convert input data to numpy array and reshape
62
- input_features = np.array([float(i) for i in input_data.split(',')]).reshape(1, -1)
63
-
64
- # Check if the number of features matches the model's input
65
- if input_features.shape[1] != X_train_scaled.shape[1]:
66
- st.error(f"Number of features should be {X_train_scaled.shape[1]}.")
67
- else:
68
- # Transform input features using the same scaler
69
- input_features_scaled = scaler.transform(input_features)
70
-
71
- # Predict using the selected model
72
- prediction = selected_model.predict(input_features_scaled)
73
- prediction_proba = selected_model.predict_proba(input_features_scaled)[:, 1]
74
- st.write(f"Prediction: {'Positive' if prediction[0] == 1 else 'Negative'}")
75
- st.write(f"Probability of Positive: {prediction_proba[0]:.4f}")
76
-
77
- except ValueError:
78
- st.error("Please enter valid numerical values separated by commas.")
79
- except Exception as e:
80
- st.error(f"An error occurred: {e}")
81
-
82
- # Load and split data
83
  X, y = load_data()
84
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
85
  scaler = StandardScaler()
86
- X_train_scaled = scaler.fit_transform(X_train)
87
- X_test_scaled = scaler.transform(X_test)
88
 
89
- # Train models
90
  models = train_models(X_train_scaled, y_train)
91
 
92
- # Model selection
93
  st.sidebar.header("Model Selection")
94
  model_names = list(models.keys())
95
  selected_model_name = st.sidebar.selectbox("Select Model", model_names)
96
  selected_model = models[selected_model_name]
97
 
98
- # Evaluate selected model
99
  results = evaluate_models(models, X_test_scaled, y_test)
100
  metrics = results[selected_model_name]
101
 
@@ -116,7 +97,7 @@ if input_data:
116
  y_prob = selected_model.predict_proba(X_test_scaled)[:, 1]
117
  fpr, tpr, _ = roc_curve(y_test, y_prob)
118
  plt.plot(fpr, tpr, label=f'{selected_model_name} (AUC = {metrics["ROC AUC"]:.2f})')
119
- plt.plot([0, 1], [0, 1], 'k--')
120
  plt.xlabel('False Positive Rate')
121
  plt.ylabel('True Positive Rate')
122
  plt.title('Receiver Operating Characteristic (ROC) Curve')
@@ -135,5 +116,30 @@ if input_data:
135
  ax.set_title(f'Feature Importance - {selected_model_name}')
136
  st.pyplot(fig)
137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  if __name__ == "__main__":
139
- main()
 
13
 
14
  # Generate sample data
15
  def load_data():
16
+ # Create a synthetic dataset for classification with 1000 samples and 20 features
17
  X, y = make_classification(n_samples=1000, n_features=20, random_state=42)
18
  return X, y
19
 
20
  # Train models
21
  def train_models(X_train, y_train):
22
+ # Dictionary of models to train
23
  models = {
24
  'Logistic Regression': LogisticRegression(),
25
  'Random Forest': RandomForestClassifier(),
 
27
  }
28
 
29
  trained_models = {}
30
+ # Train each model using the training data
31
  for name, model in models.items():
32
  model.fit(X_train, y_train)
33
+ trained_models[name] = model # Store trained models in a dictionary
34
  return trained_models
35
 
36
  # Predict and evaluate
37
  def evaluate_models(models, X_test, y_test):
38
  results = {}
39
+ # Evaluate each model using the test data
40
  for name, model in models.items():
41
+ y_pred = model.predict(X_test) # Predict class labels
42
  y_prob = model.predict_proba(X_test)[:, 1] # Probability estimates for ROC
43
 
44
+ # Calculate accuracy and ROC AUC score
45
  accuracy = model.score(X_test, y_test)
46
  roc_auc = roc_auc_score(y_test, y_prob)
47
+ # Compute confusion matrix and classification report
48
  conf_matrix = confusion_matrix(y_test, y_pred)
49
  class_report = classification_report(y_test, y_pred)
50
 
 
58
 
59
  # Streamlit app
60
  def main():
61
+ st.title("Model Performance and Predictions")
62
 
63
+ # Load and split data into training and test sets
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  X, y = load_data()
65
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
66
  scaler = StandardScaler()
67
+ X_train_scaled = scaler.fit_transform(X_train) # Scale training data
68
+ X_test_scaled = scaler.transform(X_test) # Scale test data
69
 
70
+ # Train models using scaled training data
71
  models = train_models(X_train_scaled, y_train)
72
 
73
+ # Sidebar for model selection
74
  st.sidebar.header("Model Selection")
75
  model_names = list(models.keys())
76
  selected_model_name = st.sidebar.selectbox("Select Model", model_names)
77
  selected_model = models[selected_model_name]
78
 
79
+ # Evaluate selected model using test data
80
  results = evaluate_models(models, X_test_scaled, y_test)
81
  metrics = results[selected_model_name]
82
 
 
97
  y_prob = selected_model.predict_proba(X_test_scaled)[:, 1]
98
  fpr, tpr, _ = roc_curve(y_test, y_prob)
99
  plt.plot(fpr, tpr, label=f'{selected_model_name} (AUC = {metrics["ROC AUC"]:.2f})')
100
+ plt.plot([0, 1], [0, 1], 'k--') # Diagonal line for random guessing
101
  plt.xlabel('False Positive Rate')
102
  plt.ylabel('True Positive Rate')
103
  plt.title('Receiver Operating Characteristic (ROC) Curve')
 
116
  ax.set_title(f'Feature Importance - {selected_model_name}')
117
  st.pyplot(fig)
118
 
119
+ st.subheader("Make Predictions")
120
+ input_data = st.text_input("Enter features separated by commas (e.g., 0.1, 0.2, ..., 0.5)")
121
+ if input_data:
122
+ try:
123
+ # Convert input data to numpy array and reshape
124
+ input_features = np.array([float(i) for i in input_data.split(',')]).reshape(1, -1)
125
+
126
+ # Check if the number of features matches the model's input
127
+ if input_features.shape[1] != X_train_scaled.shape[1]:
128
+ st.error(f"Number of features should be {X_train_scaled.shape[1]}.")
129
+ else:
130
+ # Transform input features using the same scaler
131
+ input_features_scaled = scaler.transform(input_features)
132
+
133
+ # Predict using the selected model
134
+ prediction = selected_model.predict(input_features_scaled)
135
+ prediction_proba = selected_model.predict_proba(input_features_scaled)[:, 1]
136
+ st.write(f"Prediction: {'Positive' if prediction[0] == 1 else 'Negative'}")
137
+ st.write(f"Probability of Positive: {prediction_proba[0]:.4f}")
138
+
139
+ except ValueError:
140
+ st.error("Please enter valid numerical values separated by commas.")
141
+ except Exception as e:
142
+ st.error(f"An error occurred: {e}")
143
+
144
  if __name__ == "__main__":
145
+ main()