jaker86 commited on
Commit
755fb3a
·
verified ·
1 Parent(s): d1cd9a7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -27
app.py CHANGED
@@ -93,43 +93,59 @@ def analyze_file(file, label_col, n_clusters):
93
  mse = mean_squared_error(y_test, y_pred)
94
  r2 = r2_score(y_test, y_pred)
95
  results_text += (
96
- "Regression Results (predicting numeric values):\n"
97
- f"- Mean Squared Error (MSE): {mse:.3f} (lower is better)\n"
98
- f"- R² Score: {r2:.3f} (0 to 1, higher is better)\n"
99
  )
100
- plt.figure(figsize=(8, 6))
101
- plt.scatter(y_test, y_pred, alpha=0.7)
102
- plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
103
- plt.xlabel("True Values")
104
- plt.ylabel("Predicted Values")
105
- plt.title("Regression: True vs Predicted")
106
- buf = io.BytesIO()
107
- plt.savefig(buf, format="png", bbox_inches="tight")
108
- plt.close()
109
- buf.seek(0)
110
- model_img = Image.open(buf) # Convert to PIL Image
 
 
 
 
 
 
 
 
 
111
  else:
112
  # Classification
113
  if len(y.unique()) < 2:
114
- return ("Label column must have at least 2 unique values for classification.", None, None, None, None, None)
115
  y_encoded, uniques = pd.factorize(y)
116
  X_train, X_test, y_train, y_test = train_test_split(X_processed, y_encoded, test_size=0.3, random_state=RANDOM_STATE)
117
  model = RandomForestClassifier(random_state=RANDOM_STATE)
118
  model.fit(X_train, y_train)
119
  y_pred = model.predict(X_test)
120
- cm = confusion_matrix(y_test, y_pred)
121
  cr = classification_report(y_test, y_pred, target_names=[str(u) for u in uniques])
122
- results_text += "Classification Results (predicting categories):\n" + cr + "\n"
123
- plt.figure(figsize=(8, 6))
124
- sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=uniques, yticklabels=uniques)
125
- plt.xlabel("Predicted")
126
- plt.ylabel("True")
127
- plt.title("Confusion Matrix")
128
- buf = io.BytesIO()
129
- plt.savefig(buf, format="png", bbox_inches="tight")
130
- plt.close()
131
- buf.seek(0)
132
- model_img = Image.open(buf) # Convert to PIL Image
 
 
 
 
 
 
 
 
133
  except Exception as e:
134
  results_text += f"\nError during model training: {e}"
135
 
 
93
  mse = mean_squared_error(y_test, y_pred)
94
  r2 = r2_score(y_test, y_pred)
95
  results_text += (
96
+ "Regression Results:\n"
97
+ f"- MSE: {mse:.3f}\n"
98
+ f"- R²: {r2:.3f}\n"
99
  )
100
+ # 3D Plot with next two most important features
101
+ fi = pd.Series(model.feature_importances_, index=X_processed.columns).sort_values(ascending=False)
102
+ if len(fi) < 3:
103
+ results_text += "\nNot enough features for a 3D plot with the next two most important features."
104
+ else:
105
+ next_two_features = fi.index[1:3] # Second and third most important features
106
+ fig = plt.figure(figsize=(10, 8))
107
+ ax = fig.add_subplot(111, projection='3d')
108
+ ax.scatter(X_test[next_two_features[0]], X_test[next_two_features[1]], y_test, c='blue', marker='o', label='True Values')
109
+ ax.scatter(X_test[next_two_features[0]], X_test[next_two_features[1]], y_pred, c='red', marker='^', label='Predicted Values')
110
+ ax.set_xlabel(next_two_features[0])
111
+ ax.set_ylabel(next_two_features[1])
112
+ ax.set_zlabel(label_col)
113
+ ax.set_title("3D Plot: Label vs Next Two Most Important Features")
114
+ ax.legend()
115
+ buf = io.BytesIO()
116
+ plt.savefig(buf, format="png", bbox_inches="tight")
117
+ plt.close()
118
+ buf.seek(0)
119
+ model_img = Image.open(buf)
120
  else:
121
  # Classification
122
  if len(y.unique()) < 2:
123
+ return ("Label must have at least 2 unique values.", None, None, None, None, None)
124
  y_encoded, uniques = pd.factorize(y)
125
  X_train, X_test, y_train, y_test = train_test_split(X_processed, y_encoded, test_size=0.3, random_state=RANDOM_STATE)
126
  model = RandomForestClassifier(random_state=RANDOM_STATE)
127
  model.fit(X_train, y_train)
128
  y_pred = model.predict(X_test)
 
129
  cr = classification_report(y_test, y_pred, target_names=[str(u) for u in uniques])
130
+ results_text += "Classification Results:\n" + cr + "\n"
131
+ # 3D Plot with next two most important features
132
+ fi = pd.Series(model.feature_importances_, index=X_processed.columns).sort_values(ascending=False)
133
+ if len(fi) < 3:
134
+ results_text += "\nNot enough features for a 3D plot with the next two most important features."
135
+ else:
136
+ next_two_features = fi.index[1:3] # Second and third most important features
137
+ fig = plt.figure(figsize=(10, 8))
138
+ ax = fig.add_subplot(111, projection='3d')
139
+ scatter = ax.scatter(X_test[next_two_features[0]], X_test[next_two_features[1]], y_test, c=y_test, cmap='viridis', marker='o')
140
+ ax.set_xlabel(next_two_features[0])
141
+ ax.set_ylabel(next_two_features[1])
142
+ ax.set_zlabel(label_col + " (encoded)")
143
+ ax.set_title("3D Plot: Label vs Next Two Most Important Features")
144
+ buf = io.BytesIO()
145
+ plt.savefig(buf, format="png", bbox_inches="tight")
146
+ plt.close()
147
+ buf.seek(0)
148
+ model_img = Image.open(buf)
149
  except Exception as e:
150
  results_text += f"\nError during model training: {e}"
151