de-Rodrigo commited on
Commit
b245512
1 Parent(s): cbe4d30

Lighter App

Browse files
Files changed (1) hide show
  1. app.py +70 -72
app.py CHANGED
@@ -487,8 +487,6 @@ def compute_global_regression(df_combined, embedding_cols, tsne_params, df_f1, r
487
  perplexity=tsne_params["perplexity"],
488
  learning_rate=tsne_params["learning_rate"])
489
 
490
- nan_rows = df_combined[df_combined[embedding_cols].isnull().any(axis=1)]
491
- print(nan_rows)
492
  reduced = reducer.fit_transform(df_combined[embedding_cols].values)
493
  # Guardamos el embedding completo (por ejemplo, 4 dimensiones en PCA)
494
  df_combined['embedding'] = list(reduced)
@@ -1165,83 +1163,83 @@ def run_model(model_name):
1165
 
1166
 
1167
 
1168
- real_labels_new = sorted(df_all["real"]['label'].unique().tolist())
1169
- df_distances_new = compute_cluster_distances_synthetic_individual(
1170
- df_all["synthetic"],
1171
- df_all["real"],
1172
- real_labels_new,
1173
- metric="wasserstein", # O la m茅trica que prefieras
1174
- bins=20
1175
- )
1176
 
1177
- global_distances_new = {}
1178
- for idx in df_distances_new.index:
1179
- if idx.startswith("Global"):
1180
- source_name = idx.split("(")[1].rstrip(")")
1181
- global_distances_new[source_name] = df_distances_new.loc[idx].values
1182
 
1183
- all_x_new = []
1184
- all_y_new = []
1185
- for source in df_f1.columns:
1186
- if source in global_distances_new:
1187
- x_vals = global_distances_new[source]
1188
- y_vals = df_f1[source].values
1189
- all_x_new.extend(x_vals)
1190
- all_y_new.extend(y_vals)
1191
- all_x_arr_new = np.array(all_x_new).reshape(-1, 1)
1192
- all_y_arr_new = np.array(all_y_new)
1193
 
1194
- model_global_new = LinearRegression().fit(all_x_arr_new, all_y_arr_new)
1195
- r2_new = model_global_new.score(all_x_arr_new, all_y_arr_new)
1196
- slope_new = model_global_new.coef_[0]
1197
- intercept_new = model_global_new.intercept_
1198
 
1199
- scatter_fig_new = figure(
1200
- width=600,
1201
- height=600,
1202
- tools="pan,wheel_zoom,reset,save,hover",
1203
- active_scroll="wheel_zoom",
1204
- title="Scatter Plot: Distance vs F1 (Nueva PCA)",
1205
- background_fill_color="white",
1206
- y_range=(0, 1)
1207
- )
1208
- scatter_fig_new.xgrid.grid_line_color = None
1209
- scatter_fig_new.ygrid.grid_line_color = "gray"
1210
- scatter_fig_new.match_aspect = True
1211
 
1212
- source_colors = {
1213
- "es-digital-paragraph-degradation-seq": "blue",
1214
- "es-digital-line-degradation-seq": "green",
1215
- "es-digital-seq": "red",
1216
- "es-digital-zoom-degradation-seq": "orange",
1217
- "es-digital-rotation-degradation-seq": "purple",
1218
- "es-digital-rotation-zoom-degradation-seq": "brown",
1219
- "es-render-seq": "cyan"
1220
- }
1221
 
1222
- for source in df_f1.columns:
1223
- if source in global_distances_new:
1224
- x_vals = global_distances_new[source]
1225
- y_vals = df_f1[source].values
1226
- data = {"x": x_vals, "y": y_vals, "Fuente": [source]*len(x_vals)}
1227
- cds = ColumnDataSource(data=data)
1228
- scatter_fig_new.circle(
1229
- 'x', 'y', size=8, alpha=0.7, source=cds,
1230
- fill_color=source_colors.get(source, "gray"),
1231
- line_color=source_colors.get(source, "gray"),
1232
- legend_label=source
1233
- )
1234
 
1235
- scatter_fig_new.xaxis.axis_label = "Distance (Global, por Colegio) - Nueva PCA"
1236
- scatter_fig_new.yaxis.axis_label = "F1 Score"
1237
- scatter_fig_new.legend.location = "top_right"
1238
- hover_tool_new = scatter_fig_new.select_one(HoverTool)
1239
- hover_tool_new.tooltips = [("Distance", "@x"), ("F1", "@y"), ("Subset", "@Fuente")]
1240
- x_line_new = np.linspace(all_x_arr_new.min(), all_x_arr_new.max(), 100)
1241
- y_line_new = model_global_new.predict(x_line_new.reshape(-1,1))
1242
- scatter_fig_new.line(x_line_new, y_line_new, line_width=2, line_color="black", legend_label="Global Regression")
1243
- st.bokeh_chart(scatter_fig_new)
1244
- st.write(f"Regresi贸n global (Nueva PCA): R虏 = {r2_new:.4f}, Slope = {slope_new:.4f}, Intercept = {intercept_new:.4f}")
1245
 
1246
  # -------------------------------------------------------------------------
1247
  # 5. BLOQUE: Heatmap de Caracter铆sticas
 
487
  perplexity=tsne_params["perplexity"],
488
  learning_rate=tsne_params["learning_rate"])
489
 
 
 
490
  reduced = reducer.fit_transform(df_combined[embedding_cols].values)
491
  # Guardamos el embedding completo (por ejemplo, 4 dimensiones en PCA)
492
  df_combined['embedding'] = list(reduced)
 
1163
 
1164
 
1165
 
1166
+ # real_labels_new = sorted(df_all["real"]['label'].unique().tolist())
1167
+ # df_distances_new = compute_cluster_distances_synthetic_individual(
1168
+ # df_all["synthetic"],
1169
+ # df_all["real"],
1170
+ # real_labels_new,
1171
+ # metric="wasserstein", # O la m茅trica que prefieras
1172
+ # bins=20
1173
+ # )
1174
 
1175
+ # global_distances_new = {}
1176
+ # for idx in df_distances_new.index:
1177
+ # if idx.startswith("Global"):
1178
+ # source_name = idx.split("(")[1].rstrip(")")
1179
+ # global_distances_new[source_name] = df_distances_new.loc[idx].values
1180
 
1181
+ # all_x_new = []
1182
+ # all_y_new = []
1183
+ # for source in df_f1.columns:
1184
+ # if source in global_distances_new:
1185
+ # x_vals = global_distances_new[source]
1186
+ # y_vals = df_f1[source].values
1187
+ # all_x_new.extend(x_vals)
1188
+ # all_y_new.extend(y_vals)
1189
+ # all_x_arr_new = np.array(all_x_new).reshape(-1, 1)
1190
+ # all_y_arr_new = np.array(all_y_new)
1191
 
1192
+ # model_global_new = LinearRegression().fit(all_x_arr_new, all_y_arr_new)
1193
+ # r2_new = model_global_new.score(all_x_arr_new, all_y_arr_new)
1194
+ # slope_new = model_global_new.coef_[0]
1195
+ # intercept_new = model_global_new.intercept_
1196
 
1197
+ # scatter_fig_new = figure(
1198
+ # width=600,
1199
+ # height=600,
1200
+ # tools="pan,wheel_zoom,reset,save,hover",
1201
+ # active_scroll="wheel_zoom",
1202
+ # title="Scatter Plot: Distance vs F1 (Nueva PCA)",
1203
+ # background_fill_color="white",
1204
+ # y_range=(0, 1)
1205
+ # )
1206
+ # scatter_fig_new.xgrid.grid_line_color = None
1207
+ # scatter_fig_new.ygrid.grid_line_color = "gray"
1208
+ # scatter_fig_new.match_aspect = True
1209
 
1210
+ # source_colors = {
1211
+ # "es-digital-paragraph-degradation-seq": "blue",
1212
+ # "es-digital-line-degradation-seq": "green",
1213
+ # "es-digital-seq": "red",
1214
+ # "es-digital-zoom-degradation-seq": "orange",
1215
+ # "es-digital-rotation-degradation-seq": "purple",
1216
+ # "es-digital-rotation-zoom-degradation-seq": "brown",
1217
+ # "es-render-seq": "cyan"
1218
+ # }
1219
 
1220
+ # for source in df_f1.columns:
1221
+ # if source in global_distances_new:
1222
+ # x_vals = global_distances_new[source]
1223
+ # y_vals = df_f1[source].values
1224
+ # data = {"x": x_vals, "y": y_vals, "Fuente": [source]*len(x_vals)}
1225
+ # cds = ColumnDataSource(data=data)
1226
+ # scatter_fig_new.circle(
1227
+ # 'x', 'y', size=8, alpha=0.7, source=cds,
1228
+ # fill_color=source_colors.get(source, "gray"),
1229
+ # line_color=source_colors.get(source, "gray"),
1230
+ # legend_label=source
1231
+ # )
1232
 
1233
+ # scatter_fig_new.xaxis.axis_label = "Distance (Global, por Colegio) - Nueva PCA"
1234
+ # scatter_fig_new.yaxis.axis_label = "F1 Score"
1235
+ # scatter_fig_new.legend.location = "top_right"
1236
+ # hover_tool_new = scatter_fig_new.select_one(HoverTool)
1237
+ # hover_tool_new.tooltips = [("Distance", "@x"), ("F1", "@y"), ("Subset", "@Fuente")]
1238
+ # x_line_new = np.linspace(all_x_arr_new.min(), all_x_arr_new.max(), 100)
1239
+ # y_line_new = model_global_new.predict(x_line_new.reshape(-1,1))
1240
+ # scatter_fig_new.line(x_line_new, y_line_new, line_width=2, line_color="black", legend_label="Global Regression")
1241
+ # st.bokeh_chart(scatter_fig_new)
1242
+ # st.write(f"Regresi贸n global (Nueva PCA): R虏 = {r2_new:.4f}, Slope = {slope_new:.4f}, Intercept = {intercept_new:.4f}")
1243
 
1244
  # -------------------------------------------------------------------------
1245
  # 5. BLOQUE: Heatmap de Caracter铆sticas