Spaces:
Sleeping
Sleeping
Commit
路
b245512
1
Parent(s):
cbe4d30
Lighter App
Browse files
app.py
CHANGED
|
@@ -487,8 +487,6 @@ def compute_global_regression(df_combined, embedding_cols, tsne_params, df_f1, r
|
|
| 487 |
perplexity=tsne_params["perplexity"],
|
| 488 |
learning_rate=tsne_params["learning_rate"])
|
| 489 |
|
| 490 |
-
nan_rows = df_combined[df_combined[embedding_cols].isnull().any(axis=1)]
|
| 491 |
-
print(nan_rows)
|
| 492 |
reduced = reducer.fit_transform(df_combined[embedding_cols].values)
|
| 493 |
# Guardamos el embedding completo (por ejemplo, 4 dimensiones en PCA)
|
| 494 |
df_combined['embedding'] = list(reduced)
|
|
@@ -1165,83 +1163,83 @@ def run_model(model_name):
|
|
| 1165 |
|
| 1166 |
|
| 1167 |
|
| 1168 |
-
real_labels_new = sorted(df_all["real"]['label'].unique().tolist())
|
| 1169 |
-
df_distances_new = compute_cluster_distances_synthetic_individual(
|
| 1170 |
-
|
| 1171 |
-
|
| 1172 |
-
|
| 1173 |
-
|
| 1174 |
-
|
| 1175 |
-
)
|
| 1176 |
|
| 1177 |
-
global_distances_new = {}
|
| 1178 |
-
for idx in df_distances_new.index:
|
| 1179 |
-
|
| 1180 |
-
|
| 1181 |
-
|
| 1182 |
|
| 1183 |
-
all_x_new = []
|
| 1184 |
-
all_y_new = []
|
| 1185 |
-
for source in df_f1.columns:
|
| 1186 |
-
|
| 1187 |
-
|
| 1188 |
-
|
| 1189 |
-
|
| 1190 |
-
|
| 1191 |
-
all_x_arr_new = np.array(all_x_new).reshape(-1, 1)
|
| 1192 |
-
all_y_arr_new = np.array(all_y_new)
|
| 1193 |
|
| 1194 |
-
model_global_new = LinearRegression().fit(all_x_arr_new, all_y_arr_new)
|
| 1195 |
-
r2_new = model_global_new.score(all_x_arr_new, all_y_arr_new)
|
| 1196 |
-
slope_new = model_global_new.coef_[0]
|
| 1197 |
-
intercept_new = model_global_new.intercept_
|
| 1198 |
|
| 1199 |
-
scatter_fig_new = figure(
|
| 1200 |
-
|
| 1201 |
-
|
| 1202 |
-
|
| 1203 |
-
|
| 1204 |
-
|
| 1205 |
-
|
| 1206 |
-
|
| 1207 |
-
)
|
| 1208 |
-
scatter_fig_new.xgrid.grid_line_color = None
|
| 1209 |
-
scatter_fig_new.ygrid.grid_line_color = "gray"
|
| 1210 |
-
scatter_fig_new.match_aspect = True
|
| 1211 |
|
| 1212 |
-
source_colors = {
|
| 1213 |
-
|
| 1214 |
-
|
| 1215 |
-
|
| 1216 |
-
|
| 1217 |
-
|
| 1218 |
-
|
| 1219 |
-
|
| 1220 |
-
}
|
| 1221 |
|
| 1222 |
-
for source in df_f1.columns:
|
| 1223 |
-
|
| 1224 |
-
|
| 1225 |
-
|
| 1226 |
-
|
| 1227 |
-
|
| 1228 |
-
|
| 1229 |
-
|
| 1230 |
-
|
| 1231 |
-
|
| 1232 |
-
|
| 1233 |
-
|
| 1234 |
|
| 1235 |
-
scatter_fig_new.xaxis.axis_label = "Distance (Global, por Colegio) - Nueva PCA"
|
| 1236 |
-
scatter_fig_new.yaxis.axis_label = "F1 Score"
|
| 1237 |
-
scatter_fig_new.legend.location = "top_right"
|
| 1238 |
-
hover_tool_new = scatter_fig_new.select_one(HoverTool)
|
| 1239 |
-
hover_tool_new.tooltips = [("Distance", "@x"), ("F1", "@y"), ("Subset", "@Fuente")]
|
| 1240 |
-
x_line_new = np.linspace(all_x_arr_new.min(), all_x_arr_new.max(), 100)
|
| 1241 |
-
y_line_new = model_global_new.predict(x_line_new.reshape(-1,1))
|
| 1242 |
-
scatter_fig_new.line(x_line_new, y_line_new, line_width=2, line_color="black", legend_label="Global Regression")
|
| 1243 |
-
st.bokeh_chart(scatter_fig_new)
|
| 1244 |
-
st.write(f"Regresi贸n global (Nueva PCA): R虏 = {r2_new:.4f}, Slope = {slope_new:.4f}, Intercept = {intercept_new:.4f}")
|
| 1245 |
|
| 1246 |
# -------------------------------------------------------------------------
|
| 1247 |
# 5. BLOQUE: Heatmap de Caracter铆sticas
|
|
|
|
| 487 |
perplexity=tsne_params["perplexity"],
|
| 488 |
learning_rate=tsne_params["learning_rate"])
|
| 489 |
|
|
|
|
|
|
|
| 490 |
reduced = reducer.fit_transform(df_combined[embedding_cols].values)
|
| 491 |
# Guardamos el embedding completo (por ejemplo, 4 dimensiones en PCA)
|
| 492 |
df_combined['embedding'] = list(reduced)
|
|
|
|
| 1163 |
|
| 1164 |
|
| 1165 |
|
| 1166 |
+
# real_labels_new = sorted(df_all["real"]['label'].unique().tolist())
|
| 1167 |
+
# df_distances_new = compute_cluster_distances_synthetic_individual(
|
| 1168 |
+
# df_all["synthetic"],
|
| 1169 |
+
# df_all["real"],
|
| 1170 |
+
# real_labels_new,
|
| 1171 |
+
# metric="wasserstein", # O la m茅trica que prefieras
|
| 1172 |
+
# bins=20
|
| 1173 |
+
# )
|
| 1174 |
|
| 1175 |
+
# global_distances_new = {}
|
| 1176 |
+
# for idx in df_distances_new.index:
|
| 1177 |
+
# if idx.startswith("Global"):
|
| 1178 |
+
# source_name = idx.split("(")[1].rstrip(")")
|
| 1179 |
+
# global_distances_new[source_name] = df_distances_new.loc[idx].values
|
| 1180 |
|
| 1181 |
+
# all_x_new = []
|
| 1182 |
+
# all_y_new = []
|
| 1183 |
+
# for source in df_f1.columns:
|
| 1184 |
+
# if source in global_distances_new:
|
| 1185 |
+
# x_vals = global_distances_new[source]
|
| 1186 |
+
# y_vals = df_f1[source].values
|
| 1187 |
+
# all_x_new.extend(x_vals)
|
| 1188 |
+
# all_y_new.extend(y_vals)
|
| 1189 |
+
# all_x_arr_new = np.array(all_x_new).reshape(-1, 1)
|
| 1190 |
+
# all_y_arr_new = np.array(all_y_new)
|
| 1191 |
|
| 1192 |
+
# model_global_new = LinearRegression().fit(all_x_arr_new, all_y_arr_new)
|
| 1193 |
+
# r2_new = model_global_new.score(all_x_arr_new, all_y_arr_new)
|
| 1194 |
+
# slope_new = model_global_new.coef_[0]
|
| 1195 |
+
# intercept_new = model_global_new.intercept_
|
| 1196 |
|
| 1197 |
+
# scatter_fig_new = figure(
|
| 1198 |
+
# width=600,
|
| 1199 |
+
# height=600,
|
| 1200 |
+
# tools="pan,wheel_zoom,reset,save,hover",
|
| 1201 |
+
# active_scroll="wheel_zoom",
|
| 1202 |
+
# title="Scatter Plot: Distance vs F1 (Nueva PCA)",
|
| 1203 |
+
# background_fill_color="white",
|
| 1204 |
+
# y_range=(0, 1)
|
| 1205 |
+
# )
|
| 1206 |
+
# scatter_fig_new.xgrid.grid_line_color = None
|
| 1207 |
+
# scatter_fig_new.ygrid.grid_line_color = "gray"
|
| 1208 |
+
# scatter_fig_new.match_aspect = True
|
| 1209 |
|
| 1210 |
+
# source_colors = {
|
| 1211 |
+
# "es-digital-paragraph-degradation-seq": "blue",
|
| 1212 |
+
# "es-digital-line-degradation-seq": "green",
|
| 1213 |
+
# "es-digital-seq": "red",
|
| 1214 |
+
# "es-digital-zoom-degradation-seq": "orange",
|
| 1215 |
+
# "es-digital-rotation-degradation-seq": "purple",
|
| 1216 |
+
# "es-digital-rotation-zoom-degradation-seq": "brown",
|
| 1217 |
+
# "es-render-seq": "cyan"
|
| 1218 |
+
# }
|
| 1219 |
|
| 1220 |
+
# for source in df_f1.columns:
|
| 1221 |
+
# if source in global_distances_new:
|
| 1222 |
+
# x_vals = global_distances_new[source]
|
| 1223 |
+
# y_vals = df_f1[source].values
|
| 1224 |
+
# data = {"x": x_vals, "y": y_vals, "Fuente": [source]*len(x_vals)}
|
| 1225 |
+
# cds = ColumnDataSource(data=data)
|
| 1226 |
+
# scatter_fig_new.circle(
|
| 1227 |
+
# 'x', 'y', size=8, alpha=0.7, source=cds,
|
| 1228 |
+
# fill_color=source_colors.get(source, "gray"),
|
| 1229 |
+
# line_color=source_colors.get(source, "gray"),
|
| 1230 |
+
# legend_label=source
|
| 1231 |
+
# )
|
| 1232 |
|
| 1233 |
+
# scatter_fig_new.xaxis.axis_label = "Distance (Global, por Colegio) - Nueva PCA"
|
| 1234 |
+
# scatter_fig_new.yaxis.axis_label = "F1 Score"
|
| 1235 |
+
# scatter_fig_new.legend.location = "top_right"
|
| 1236 |
+
# hover_tool_new = scatter_fig_new.select_one(HoverTool)
|
| 1237 |
+
# hover_tool_new.tooltips = [("Distance", "@x"), ("F1", "@y"), ("Subset", "@Fuente")]
|
| 1238 |
+
# x_line_new = np.linspace(all_x_arr_new.min(), all_x_arr_new.max(), 100)
|
| 1239 |
+
# y_line_new = model_global_new.predict(x_line_new.reshape(-1,1))
|
| 1240 |
+
# scatter_fig_new.line(x_line_new, y_line_new, line_width=2, line_color="black", legend_label="Global Regression")
|
| 1241 |
+
# st.bokeh_chart(scatter_fig_new)
|
| 1242 |
+
# st.write(f"Regresi贸n global (Nueva PCA): R虏 = {r2_new:.4f}, Slope = {slope_new:.4f}, Intercept = {intercept_new:.4f}")
|
| 1243 |
|
| 1244 |
# -------------------------------------------------------------------------
|
| 1245 |
# 5. BLOQUE: Heatmap de Caracter铆sticas
|