Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -93,43 +93,59 @@ def analyze_file(file, label_col, n_clusters):
|
|
| 93 |
mse = mean_squared_error(y_test, y_pred)
|
| 94 |
r2 = r2_score(y_test, y_pred)
|
| 95 |
results_text += (
|
| 96 |
-
"Regression Results
|
| 97 |
-
f"-
|
| 98 |
-
f"- R
|
| 99 |
)
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
else:
|
| 112 |
# Classification
|
| 113 |
if len(y.unique()) < 2:
|
| 114 |
-
return ("Label
|
| 115 |
y_encoded, uniques = pd.factorize(y)
|
| 116 |
X_train, X_test, y_train, y_test = train_test_split(X_processed, y_encoded, test_size=0.3, random_state=RANDOM_STATE)
|
| 117 |
model = RandomForestClassifier(random_state=RANDOM_STATE)
|
| 118 |
model.fit(X_train, y_train)
|
| 119 |
y_pred = model.predict(X_test)
|
| 120 |
-
cm = confusion_matrix(y_test, y_pred)
|
| 121 |
cr = classification_report(y_test, y_pred, target_names=[str(u) for u in uniques])
|
| 122 |
-
results_text += "Classification Results
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
except Exception as e:
|
| 134 |
results_text += f"\nError during model training: {e}"
|
| 135 |
|
|
|
|
| 93 |
mse = mean_squared_error(y_test, y_pred)
|
| 94 |
r2 = r2_score(y_test, y_pred)
|
| 95 |
results_text += (
|
| 96 |
+
"Regression Results:\n"
|
| 97 |
+
f"- MSE: {mse:.3f}\n"
|
| 98 |
+
f"- R²: {r2:.3f}\n"
|
| 99 |
)
|
| 100 |
+
# 3D Plot with next two most important features
|
| 101 |
+
fi = pd.Series(model.feature_importances_, index=X_processed.columns).sort_values(ascending=False)
|
| 102 |
+
if len(fi) < 3:
|
| 103 |
+
results_text += "\nNot enough features for a 3D plot with the next two most important features."
|
| 104 |
+
else:
|
| 105 |
+
next_two_features = fi.index[1:3] # Second and third most important features
|
| 106 |
+
fig = plt.figure(figsize=(10, 8))
|
| 107 |
+
ax = fig.add_subplot(111, projection='3d')
|
| 108 |
+
ax.scatter(X_test[next_two_features[0]], X_test[next_two_features[1]], y_test, c='blue', marker='o', label='True Values')
|
| 109 |
+
ax.scatter(X_test[next_two_features[0]], X_test[next_two_features[1]], y_pred, c='red', marker='^', label='Predicted Values')
|
| 110 |
+
ax.set_xlabel(next_two_features[0])
|
| 111 |
+
ax.set_ylabel(next_two_features[1])
|
| 112 |
+
ax.set_zlabel(label_col)
|
| 113 |
+
ax.set_title("3D Plot: Label vs Next Two Most Important Features")
|
| 114 |
+
ax.legend()
|
| 115 |
+
buf = io.BytesIO()
|
| 116 |
+
plt.savefig(buf, format="png", bbox_inches="tight")
|
| 117 |
+
plt.close()
|
| 118 |
+
buf.seek(0)
|
| 119 |
+
model_img = Image.open(buf)
|
| 120 |
else:
|
| 121 |
# Classification
|
| 122 |
if len(y.unique()) < 2:
|
| 123 |
+
return ("Label must have at least 2 unique values.", None, None, None, None, None)
|
| 124 |
y_encoded, uniques = pd.factorize(y)
|
| 125 |
X_train, X_test, y_train, y_test = train_test_split(X_processed, y_encoded, test_size=0.3, random_state=RANDOM_STATE)
|
| 126 |
model = RandomForestClassifier(random_state=RANDOM_STATE)
|
| 127 |
model.fit(X_train, y_train)
|
| 128 |
y_pred = model.predict(X_test)
|
|
|
|
| 129 |
cr = classification_report(y_test, y_pred, target_names=[str(u) for u in uniques])
|
| 130 |
+
results_text += "Classification Results:\n" + cr + "\n"
|
| 131 |
+
# 3D Plot with next two most important features
|
| 132 |
+
fi = pd.Series(model.feature_importances_, index=X_processed.columns).sort_values(ascending=False)
|
| 133 |
+
if len(fi) < 3:
|
| 134 |
+
results_text += "\nNot enough features for a 3D plot with the next two most important features."
|
| 135 |
+
else:
|
| 136 |
+
next_two_features = fi.index[1:3] # Second and third most important features
|
| 137 |
+
fig = plt.figure(figsize=(10, 8))
|
| 138 |
+
ax = fig.add_subplot(111, projection='3d')
|
| 139 |
+
scatter = ax.scatter(X_test[next_two_features[0]], X_test[next_two_features[1]], y_test, c=y_test, cmap='viridis', marker='o')
|
| 140 |
+
ax.set_xlabel(next_two_features[0])
|
| 141 |
+
ax.set_ylabel(next_two_features[1])
|
| 142 |
+
ax.set_zlabel(label_col + " (encoded)")
|
| 143 |
+
ax.set_title("3D Plot: Label vs Next Two Most Important Features")
|
| 144 |
+
buf = io.BytesIO()
|
| 145 |
+
plt.savefig(buf, format="png", bbox_inches="tight")
|
| 146 |
+
plt.close()
|
| 147 |
+
buf.seek(0)
|
| 148 |
+
model_img = Image.open(buf)
|
| 149 |
except Exception as e:
|
| 150 |
results_text += f"\nError during model training: {e}"
|
| 151 |
|