de-Rodrigo commited on
Commit
1904d0c
1 Parent(s): 8a63ffd

Indentation Issue

Browse files
Files changed (1) hide show
  1. app.py +211 -211
app.py CHANGED
@@ -1436,227 +1436,227 @@ def run_model(model_name):
1436
  )
1437
 
1438
  elif reduction_method == "t-nse":
1439
- st.markdown("## t-SNE - Solo Muestras Reales")
1440
- # -------------------------------------------------------------------------
1441
- # 1. t-SNE sobre las muestras reales
1442
- df_real_only = embeddings["real"].copy()
1443
-
1444
- reducer_real = TSNE(n_components=2, perplexity=30, random_state=42)
1445
- reduced_real = reducer_real.fit_transform(df_real_only[embedding_cols].values)
1446
-
1447
- # Agregar columnas TSNE1, TSNE2
1448
- df_real_only['TSNE1'] = reduced_real[:, 0]
1449
- df_real_only['TSNE2'] = reduced_real[:, 1]
1450
 
1451
- unique_labels_real = sorted(df_real_only['label'].unique().tolist())
1452
-
1453
- # Mapeo de colores para las muestras reales usando la paleta Reds9
1454
- num_labels = len(unique_labels_real)
1455
- if num_labels <= 9:
1456
- red_palette = Reds9[:num_labels]
1457
- else:
1458
- red_palette = (Reds9 * ((num_labels // 9) + 1))[:num_labels]
1459
- real_color_mapping = {label: red_palette[i] for i, label in enumerate(unique_labels_real)}
1460
-
1461
- # -------------------------------------------------------------------------
1462
- # Crear plot interactivo con Bokeh
1463
- st.subheader("t-SNE - Real: Visualizaci贸n Interactiva")
1464
-
1465
- source = ColumnDataSource(df_real_only)
1466
-
1467
- hover = HoverTool(tooltips=[
1468
- ("Index", "$index"),
1469
- ("Label", "@label"),
1470
- ("TSNE1", "@TSNE1"),
1471
- ("TSNE2", "@TSNE2")
1472
- ])
1473
-
1474
- p = figure(
1475
- width=800,
1476
- height=600,
1477
- title="t-SNE sobre muestras reales",
1478
- tools=["pan", "wheel_zoom", "box_zoom", "reset", hover]
1479
- )
1480
 
1481
- for label in unique_labels_real:
1482
- subset = df_real_only[df_real_only['label'] == label]
1483
- p.scatter(
1484
- x=subset["TSNE1"],
1485
- y=subset["TSNE2"],
1486
- size=8,
1487
- color=real_color_mapping[label],
1488
- alpha=0.7,
1489
- legend_label=str(label)
1490
- )
1491
 
1492
- p.legend.title = "Label"
1493
- p.legend.location = "top_right"
1494
- p.xaxis.axis_label = "t-SNE 1"
1495
- p.yaxis.axis_label = "t-SNE 2"
1496
 
1497
- st.bokeh_chart(p, use_container_width=True)
 
 
 
 
 
1498
 
1499
- # -------------------------------------------------------------------------
1500
- # BLOQUE: Heatmap de Caracter铆sticas (adaptado a TSNE1 y TSNE2)
1501
- st.markdown("## Heatmap de Caracter铆sticas")
1502
- try:
1503
- df_heat = pd.read_csv(f"data/heatmaps_{model_name.lower()}.csv")
1504
- except Exception as e:
1505
- st.error(f"Error al cargar heatmaps.csv: {e}")
1506
- df_heat = None
1507
-
1508
- if df_heat is not None:
1509
- if 'img' not in df_all["real"].columns:
1510
- st.error("La columna 'img' no se encuentra en las muestras reales para hacer el merge con heatmaps.csv.")
1511
- else:
1512
- # Crear columna 'name' en las muestras reales (si a煤n no existe)
1513
- df_all["real"]["name"] = df_all["real"]["img"].apply(
1514
- lambda x: x.split("/")[-1].replace(".png", "") if isinstance(x, str) else x
1515
- )
1516
- # Merge con heatmaps.csv
1517
- df_heatmap_base = pd.merge(df_all["real"], df_heat, on="name", how="inner")
1518
-
1519
- # Opciones de features
1520
- feature_options = [col for col in df_heat.columns if col != "name"]
1521
- selected_feature = st.selectbox("Select heatmap feature:",
1522
- options=feature_options, key=f"heatmap_{model_name}")
1523
- select_extra_dataset_hm = st.selectbox("Select a dataset:",
1524
- options=model_options_with_default, key=f"heatmap_extra_dataset_{model_name}")
1525
-
1526
- # Definir un rango fijo y rejilla
1527
- x_min, x_max = -1, 1
1528
- y_min, y_max = -1, 1
1529
- grid_size = 50
1530
- x_bins = np.linspace(x_min, x_max, grid_size + 1)
1531
- y_bins = np.linspace(y_min, y_max, grid_size + 1)
1532
-
1533
- # Solo una combinaci贸n: TSNE1 vs TSNE2
1534
- x_comp, y_comp = "TSNE1", "TSNE2"
1535
- st.markdown(f"### Heatmap: {x_comp} vs {y_comp}")
1536
-
1537
- df_heatmap = df_heatmap_base.copy()
1538
- df_heatmap["x"] = df_heatmap[x_comp]
1539
- df_heatmap["y"] = df_heatmap[y_comp]
1540
-
1541
- # Si la feature seleccionada no es num茅rica, convertir
1542
- cat_mapping = None
1543
- if df_heatmap[selected_feature].dtype == bool or not pd.api.types.is_numeric_dtype(df_heatmap[selected_feature]):
1544
- cat = df_heatmap[selected_feature].astype('category')
1545
- cat_mapping = list(cat.cat.categories)
1546
- df_heatmap[selected_feature] = cat.cat.codes
1547
-
1548
- # Calcular estad铆stica binned
1549
- heat_stat, x_edges, y_edges, binnumber = binned_statistic_2d(
1550
- df_heatmap['x'], df_heatmap['y'], df_heatmap[selected_feature],
1551
- statistic='mean', bins=[x_bins, y_bins]
1552
  )
1553
- heatmap_data = heat_stat.T
1554
-
1555
- # Color mapper
1556
- if selected_feature in model_options:
1557
- color_mapper = LinearColorMapper(
1558
- palette=red_green_palette,
1559
- low=0,
1560
- high=1,
1561
- nan_color='rgba(0, 0, 0, 0)'
1562
- )
 
 
 
 
 
 
 
 
 
 
1563
  else:
1564
- color_mapper = LinearColorMapper(
1565
- palette="Viridis256",
1566
- low=np.nanmin(heatmap_data),
1567
- high=np.nanmax(heatmap_data),
1568
- nan_color='rgba(0, 0, 0, 0)'
1569
  )
1570
-
1571
- # Figura heatmap
1572
- heatmap_fig = figure(title=f"Heatmap de '{selected_feature}' ({x_comp} vs {y_comp})",
1573
- x_range=(x_min, x_max), y_range=(y_min, y_max),
1574
- width=600, height=600,
1575
- tools="pan,wheel_zoom,reset,save", active_scroll="wheel_zoom", tooltips=TOOLTIPS,
1576
- sizing_mode="fixed")
1577
- heatmap_fig.match_aspect = True
1578
- heatmap_fig.xaxis.axis_label = x_comp
1579
- heatmap_fig.yaxis.axis_label = y_comp
1580
-
1581
- heatmap_fig.image(image=[heatmap_data], x=x_min, y=y_min,
1582
- dw=x_max - x_min, dh=y_max - y_min,
1583
- color_mapper=color_mapper)
1584
-
1585
- # Barra de color
1586
- color_bar = ColorBar(color_mapper=color_mapper, location=(0, 0))
1587
- if cat_mapping is not None:
1588
- ticks = list(range(len(cat_mapping)))
1589
- color_bar.ticker = FixedTicker(ticks=ticks)
1590
- categories_json = json.dumps(cat_mapping)
1591
- color_bar.formatter = FuncTickFormatter(code=f"""
1592
- var categories = {categories_json};
1593
- var index = Math.round(tick);
1594
- if(index >= 0 && index < categories.length) {{
1595
- return categories[index];
1596
- }} else {{
1597
- return "";
1598
- }}
1599
- """)
1600
- heatmap_fig.add_layout(color_bar, 'right')
1601
-
1602
- # Tooltips
1603
- source_points = ColumnDataSource(data={
1604
- 'x': df_heatmap['x'],
1605
- 'y': df_heatmap['y'],
1606
- 'img': df_heatmap['img'],
1607
- 'label': df_heatmap['name']
1608
- })
1609
- invisible_renderer = heatmap_fig.circle('x', 'y', size=10, source=source_points, fill_alpha=0, line_alpha=0.5)
1610
-
1611
- if select_extra_dataset_hm != "-":
1612
- df_extra = df_all["synthetic"][df_all["synthetic"]["source"] == select_extra_dataset_hm].copy()
1613
- df_extra["x"] = df_extra[x_comp]
1614
- df_extra["y"] = df_extra[y_comp]
1615
- if 'name' not in df_extra.columns:
1616
- df_extra["name"] = df_extra["img"].apply(lambda x: x.split("/")[-1].replace(".png", "") if isinstance(x, str) else x)
1617
- source_extra_points = ColumnDataSource(data={
1618
- 'x': df_extra['x'],
1619
- 'y': df_extra['y'],
1620
- 'img': df_extra['img'],
1621
- 'label': df_extra['name']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1622
  })
1623
- heatmap_fig.circle('x', 'y', size=5, source=source_extra_points, fill_alpha=0, line_alpha=0.5, color="purple")
1624
-
1625
- hover_tool_points = HoverTool(renderers=[invisible_renderer], tooltips=TOOLTIPS)
1626
- heatmap_fig.add_tools(hover_tool_points)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1627
 
1628
- st.bokeh_chart(heatmap_fig)
1629
-
1630
- # -------------------------------------------------------------------------
1631
- # Bot贸n para descargar df_all (Embeddings en t-SNE Space)
1632
- if st.button("Download Embeddings in t-SNE Space", key=f"click_download_tsne_coordinates_{model_name}"):
1633
- df_all_tsne = {}
1634
- for key, df in df_all.items():
1635
- if key == "synthetic":
1636
- for source in df["source"].unique():
1637
- df_subset = df[df["source"] == source].copy()
1638
- if "img" in df_subset.columns and "name" not in df_subset.columns:
1639
- df_subset["name"] = df_subset["img"].apply(lambda x: x.split("/")[-1].replace(".png", "") if isinstance(x, str) else x)
1640
- tsne_cols = [col for col in df_subset.columns if col.startswith("TSNE") or col == "name"]
1641
- sheet_name = f"synthetic_{source}"
1642
- df_all_tsne[sheet_name] = df_subset[tsne_cols].copy()
1643
- else:
1644
- tsne_cols = [col for col in df.columns if col.startswith("TSNE") or col == "name"]
1645
- df_all_tsne[key] = df[tsne_cols].copy()
1646
-
1647
- excel_buffer = io.BytesIO()
1648
- with pd.ExcelWriter(excel_buffer, engine='openpyxl') as writer:
1649
- for key_name, df in df_all_tsne.items():
1650
- df.to_excel(writer, sheet_name=key_name, index=False)
1651
- excel_buffer.seek(0)
1652
-
1653
- st.download_button(
1654
- label="Download Embeddings in t-SNE Space",
1655
- data=excel_buffer,
1656
- file_name=f"df_all_tsne_{model_name.lower()}.xlsx",
1657
- mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
1658
- key=f"download_tsne_coordinates_{model_name}"
1659
- )
1660
 
1661
 
1662
  def main():
 
1436
  )
1437
 
1438
  elif reduction_method == "t-nse":
1439
+ st.markdown("## t-SNE - Solo Muestras Reales")
1440
+ # -------------------------------------------------------------------------
1441
+ # 1. t-SNE sobre las muestras reales
1442
+ df_real_only = embeddings["real"].copy()
1443
+
1444
+ reducer_real = TSNE(n_components=2, perplexity=30, random_state=42)
1445
+ reduced_real = reducer_real.fit_transform(df_real_only[embedding_cols].values)
1446
+
1447
+ # Agregar columnas TSNE1, TSNE2
1448
+ df_real_only['TSNE1'] = reduced_real[:, 0]
1449
+ df_real_only['TSNE2'] = reduced_real[:, 1]
1450
 
1451
+ unique_labels_real = sorted(df_real_only['label'].unique().tolist())
1452
+
1453
+ # Mapeo de colores para las muestras reales usando la paleta Reds9
1454
+ num_labels = len(unique_labels_real)
1455
+ if num_labels <= 9:
1456
+ red_palette = Reds9[:num_labels]
1457
+ else:
1458
+ red_palette = (Reds9 * ((num_labels // 9) + 1))[:num_labels]
1459
+ real_color_mapping = {label: red_palette[i] for i, label in enumerate(unique_labels_real)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1460
 
1461
+ # -------------------------------------------------------------------------
1462
+ # Crear plot interactivo con Bokeh
1463
+ st.subheader("t-SNE - Real: Visualizaci贸n Interactiva")
 
 
 
 
 
 
 
1464
 
1465
+ source = ColumnDataSource(df_real_only)
 
 
 
1466
 
1467
+ hover = HoverTool(tooltips=[
1468
+ ("Index", "$index"),
1469
+ ("Label", "@label"),
1470
+ ("TSNE1", "@TSNE1"),
1471
+ ("TSNE2", "@TSNE2")
1472
+ ])
1473
 
1474
+ p = figure(
1475
+ width=800,
1476
+ height=600,
1477
+ title="t-SNE sobre muestras reales",
1478
+ tools=["pan", "wheel_zoom", "box_zoom", "reset", hover]
1479
+ )
1480
+
1481
+ for label in unique_labels_real:
1482
+ subset = df_real_only[df_real_only['label'] == label]
1483
+ p.scatter(
1484
+ x=subset["TSNE1"],
1485
+ y=subset["TSNE2"],
1486
+ size=8,
1487
+ color=real_color_mapping[label],
1488
+ alpha=0.7,
1489
+ legend_label=str(label)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1490
  )
1491
+
1492
+ p.legend.title = "Label"
1493
+ p.legend.location = "top_right"
1494
+ p.xaxis.axis_label = "t-SNE 1"
1495
+ p.yaxis.axis_label = "t-SNE 2"
1496
+
1497
+ st.bokeh_chart(p, use_container_width=True)
1498
+
1499
+ # -------------------------------------------------------------------------
1500
+ # BLOQUE: Heatmap de Caracter铆sticas (adaptado a TSNE1 y TSNE2)
1501
+ st.markdown("## Heatmap de Caracter铆sticas")
1502
+ try:
1503
+ df_heat = pd.read_csv(f"data/heatmaps_{model_name.lower()}.csv")
1504
+ except Exception as e:
1505
+ st.error(f"Error al cargar heatmaps.csv: {e}")
1506
+ df_heat = None
1507
+
1508
+ if df_heat is not None:
1509
+ if 'img' not in df_all["real"].columns:
1510
+ st.error("La columna 'img' no se encuentra en las muestras reales para hacer el merge con heatmaps.csv.")
1511
  else:
1512
+ # Crear columna 'name' en las muestras reales (si a煤n no existe)
1513
+ df_all["real"]["name"] = df_all["real"]["img"].apply(
1514
+ lambda x: x.split("/")[-1].replace(".png", "") if isinstance(x, str) else x
 
 
1515
  )
1516
+ # Merge con heatmaps.csv
1517
+ df_heatmap_base = pd.merge(df_all["real"], df_heat, on="name", how="inner")
1518
+
1519
+ # Opciones de features
1520
+ feature_options = [col for col in df_heat.columns if col != "name"]
1521
+ selected_feature = st.selectbox("Select heatmap feature:",
1522
+ options=feature_options, key=f"heatmap_{model_name}")
1523
+ select_extra_dataset_hm = st.selectbox("Select a dataset:",
1524
+ options=model_options_with_default, key=f"heatmap_extra_dataset_{model_name}")
1525
+
1526
+ # Definir un rango fijo y rejilla
1527
+ x_min, x_max = -1, 1
1528
+ y_min, y_max = -1, 1
1529
+ grid_size = 50
1530
+ x_bins = np.linspace(x_min, x_max, grid_size + 1)
1531
+ y_bins = np.linspace(y_min, y_max, grid_size + 1)
1532
+
1533
+ # Solo una combinaci贸n: TSNE1 vs TSNE2
1534
+ x_comp, y_comp = "TSNE1", "TSNE2"
1535
+ st.markdown(f"### Heatmap: {x_comp} vs {y_comp}")
1536
+
1537
+ df_heatmap = df_heatmap_base.copy()
1538
+ df_heatmap["x"] = df_heatmap[x_comp]
1539
+ df_heatmap["y"] = df_heatmap[y_comp]
1540
+
1541
+ # Si la feature seleccionada no es num茅rica, convertir
1542
+ cat_mapping = None
1543
+ if df_heatmap[selected_feature].dtype == bool or not pd.api.types.is_numeric_dtype(df_heatmap[selected_feature]):
1544
+ cat = df_heatmap[selected_feature].astype('category')
1545
+ cat_mapping = list(cat.cat.categories)
1546
+ df_heatmap[selected_feature] = cat.cat.codes
1547
+
1548
+ # Calcular estad铆stica binned
1549
+ heat_stat, x_edges, y_edges, binnumber = binned_statistic_2d(
1550
+ df_heatmap['x'], df_heatmap['y'], df_heatmap[selected_feature],
1551
+ statistic='mean', bins=[x_bins, y_bins]
1552
+ )
1553
+ heatmap_data = heat_stat.T
1554
+
1555
+ # Color mapper
1556
+ if selected_feature in model_options:
1557
+ color_mapper = LinearColorMapper(
1558
+ palette=red_green_palette,
1559
+ low=0,
1560
+ high=1,
1561
+ nan_color='rgba(0, 0, 0, 0)'
1562
+ )
1563
+ else:
1564
+ color_mapper = LinearColorMapper(
1565
+ palette="Viridis256",
1566
+ low=np.nanmin(heatmap_data),
1567
+ high=np.nanmax(heatmap_data),
1568
+ nan_color='rgba(0, 0, 0, 0)'
1569
+ )
1570
+
1571
+ # Figura heatmap
1572
+ heatmap_fig = figure(title=f"Heatmap de '{selected_feature}' ({x_comp} vs {y_comp})",
1573
+ x_range=(x_min, x_max), y_range=(y_min, y_max),
1574
+ width=600, height=600,
1575
+ tools="pan,wheel_zoom,reset,save", active_scroll="wheel_zoom", tooltips=TOOLTIPS,
1576
+ sizing_mode="fixed")
1577
+ heatmap_fig.match_aspect = True
1578
+ heatmap_fig.xaxis.axis_label = x_comp
1579
+ heatmap_fig.yaxis.axis_label = y_comp
1580
+
1581
+ heatmap_fig.image(image=[heatmap_data], x=x_min, y=y_min,
1582
+ dw=x_max - x_min, dh=y_max - y_min,
1583
+ color_mapper=color_mapper)
1584
+
1585
+ # Barra de color
1586
+ color_bar = ColorBar(color_mapper=color_mapper, location=(0, 0))
1587
+ if cat_mapping is not None:
1588
+ ticks = list(range(len(cat_mapping)))
1589
+ color_bar.ticker = FixedTicker(ticks=ticks)
1590
+ categories_json = json.dumps(cat_mapping)
1591
+ color_bar.formatter = FuncTickFormatter(code=f"""
1592
+ var categories = {categories_json};
1593
+ var index = Math.round(tick);
1594
+ if(index >= 0 && index < categories.length) {{
1595
+ return categories[index];
1596
+ }} else {{
1597
+ return "";
1598
+ }}
1599
+ """)
1600
+ heatmap_fig.add_layout(color_bar, 'right')
1601
+
1602
+ # Tooltips
1603
+ source_points = ColumnDataSource(data={
1604
+ 'x': df_heatmap['x'],
1605
+ 'y': df_heatmap['y'],
1606
+ 'img': df_heatmap['img'],
1607
+ 'label': df_heatmap['name']
1608
  })
1609
+ invisible_renderer = heatmap_fig.circle('x', 'y', size=10, source=source_points, fill_alpha=0, line_alpha=0.5)
1610
+
1611
+ if select_extra_dataset_hm != "-":
1612
+ df_extra = df_all["synthetic"][df_all["synthetic"]["source"] == select_extra_dataset_hm].copy()
1613
+ df_extra["x"] = df_extra[x_comp]
1614
+ df_extra["y"] = df_extra[y_comp]
1615
+ if 'name' not in df_extra.columns:
1616
+ df_extra["name"] = df_extra["img"].apply(lambda x: x.split("/")[-1].replace(".png", "") if isinstance(x, str) else x)
1617
+ source_extra_points = ColumnDataSource(data={
1618
+ 'x': df_extra['x'],
1619
+ 'y': df_extra['y'],
1620
+ 'img': df_extra['img'],
1621
+ 'label': df_extra['name']
1622
+ })
1623
+ heatmap_fig.circle('x', 'y', size=5, source=source_extra_points, fill_alpha=0, line_alpha=0.5, color="purple")
1624
+
1625
+ hover_tool_points = HoverTool(renderers=[invisible_renderer], tooltips=TOOLTIPS)
1626
+ heatmap_fig.add_tools(hover_tool_points)
1627
+
1628
+ st.bokeh_chart(heatmap_fig)
1629
+
1630
+ # -------------------------------------------------------------------------
1631
+ # Bot贸n para descargar df_all (Embeddings en t-SNE Space)
1632
+ if st.button("Download Embeddings in t-SNE Space", key=f"click_download_tsne_coordinates_{model_name}"):
1633
+ df_all_tsne = {}
1634
+ for key, df in df_all.items():
1635
+ if key == "synthetic":
1636
+ for source in df["source"].unique():
1637
+ df_subset = df[df["source"] == source].copy()
1638
+ if "img" in df_subset.columns and "name" not in df_subset.columns:
1639
+ df_subset["name"] = df_subset["img"].apply(lambda x: x.split("/")[-1].replace(".png", "") if isinstance(x, str) else x)
1640
+ tsne_cols = [col for col in df_subset.columns if col.startswith("TSNE") or col == "name"]
1641
+ sheet_name = f"synthetic_{source}"
1642
+ df_all_tsne[sheet_name] = df_subset[tsne_cols].copy()
1643
+ else:
1644
+ tsne_cols = [col for col in df.columns if col.startswith("TSNE") or col == "name"]
1645
+ df_all_tsne[key] = df[tsne_cols].copy()
1646
+
1647
+ excel_buffer = io.BytesIO()
1648
+ with pd.ExcelWriter(excel_buffer, engine='openpyxl') as writer:
1649
+ for key_name, df in df_all_tsne.items():
1650
+ df.to_excel(writer, sheet_name=key_name, index=False)
1651
+ excel_buffer.seek(0)
1652
 
1653
+ st.download_button(
1654
+ label="Download Embeddings in t-SNE Space",
1655
+ data=excel_buffer,
1656
+ file_name=f"df_all_tsne_{model_name.lower()}.xlsx",
1657
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
1658
+ key=f"download_tsne_coordinates_{model_name}"
1659
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1660
 
1661
 
1662
  def main():