Spaces:
Sleeping
Sleeping
Commit
路
a685ec6
1
Parent(s):
76f3be3
Selector to Include Pretrained Datasets
Browse files
app.py
CHANGED
|
@@ -537,7 +537,7 @@ def optimize_tsne_params(df_combined, embedding_cols, df_f1, distance_metric):
|
|
| 537 |
|
| 538 |
def run_model(model_name):
|
| 539 |
version = st.selectbox("Select Model Version:", options=["vanilla", "finetuned_real"], key=f"version_{model_name}")
|
| 540 |
-
#
|
| 541 |
embedding_computation = st.selectbox("驴C贸mo se computa el embedding?", options=["weighted", "averaged"], key=f"embedding_method_{model_name}")
|
| 542 |
# Se asigna el prefijo correspondiente
|
| 543 |
prefijo_embedding = "weighted_" if embedding_computation == "weighted" else "averaged_"
|
|
@@ -545,7 +545,16 @@ def run_model(model_name):
|
|
| 545 |
embeddings = load_embeddings(model_name, version, prefijo_embedding)
|
| 546 |
if embeddings is None:
|
| 547 |
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 548 |
embedding_cols = [col for col in embeddings["real"].columns if col.startswith("dim_")]
|
|
|
|
| 549 |
df_combined = pd.concat(list(embeddings.values()), ignore_index=True)
|
| 550 |
|
| 551 |
try:
|
|
@@ -611,36 +620,30 @@ def run_model(model_name):
|
|
| 611 |
st.write(f"Trustworthiness: {result['trustworthiness']:.4f}")
|
| 612 |
st.write(f"Continuity: {result['continuity']:.4f}")
|
| 613 |
|
|
|
|
| 614 |
if reduction_method == "PCA" and result.get("pca_model") is not None:
|
| 615 |
pca_model = result["pca_model"]
|
| 616 |
components = pca_model.components_ # Shape: (n_components, n_features)
|
| 617 |
|
| 618 |
st.subheader("Pesos de las Componentes Principales (Loadings)")
|
| 619 |
-
#
|
| 620 |
for i, comp in enumerate(components):
|
| 621 |
-
# Fuente de datos con nombres de dimensiones y pesos
|
| 622 |
source = ColumnDataSource(data=dict(
|
| 623 |
-
dimensions=embedding_cols,
|
| 624 |
weight=comp
|
| 625 |
))
|
| 626 |
-
# Definir la figura usando el rango en x, pero ocultamos las etiquetas del eje
|
| 627 |
p = figure(x_range=embedding_cols, title=f"Componente Principal {i+1}",
|
| 628 |
-
|
| 629 |
-
|
| 630 |
p.vbar(x='dimensions', top='weight', width=0.8, source=source)
|
| 631 |
-
# Ocultar
|
| 632 |
p.xaxis.major_label_text_font_size = '0pt'
|
| 633 |
-
|
| 634 |
-
# Agregar HoverTool para que al pasar el mouse se muestren los datos
|
| 635 |
hover = HoverTool(tooltips=[("Dimensi贸n", "@dimensions"), ("Peso", "@weight")])
|
| 636 |
p.add_tools(hover)
|
| 637 |
-
|
| 638 |
-
# Opcionalmente, puedes seguir definiendo las etiquetas de los ejes (aunque en x no se mostrar谩n)
|
| 639 |
p.xaxis.axis_label = "Dimensiones originales"
|
| 640 |
p.yaxis.axis_label = "Peso"
|
| 641 |
-
|
| 642 |
st.bokeh_chart(p)
|
| 643 |
-
|
| 644 |
|
| 645 |
data_table, df_table, source_table = create_table(result["df_distances"])
|
| 646 |
real_subset_names = list(df_table.columns[1:])
|
|
|
|
| 537 |
|
| 538 |
def run_model(model_name):
|
| 539 |
version = st.selectbox("Select Model Version:", options=["vanilla", "finetuned_real"], key=f"version_{model_name}")
|
| 540 |
+
# Selector para el m茅todo de c贸mputo del embedding
|
| 541 |
embedding_computation = st.selectbox("驴C贸mo se computa el embedding?", options=["weighted", "averaged"], key=f"embedding_method_{model_name}")
|
| 542 |
# Se asigna el prefijo correspondiente
|
| 543 |
prefijo_embedding = "weighted_" if embedding_computation == "weighted" else "averaged_"
|
|
|
|
| 545 |
embeddings = load_embeddings(model_name, version, prefijo_embedding)
|
| 546 |
if embeddings is None:
|
| 547 |
return
|
| 548 |
+
|
| 549 |
+
# Nuevo selector para incluir o excluir el dataset pretrained
|
| 550 |
+
include_pretrained = st.checkbox("Incluir dataset pretrained", value=True)
|
| 551 |
+
if not include_pretrained:
|
| 552 |
+
# Removemos la entrada pretrained del diccionario, si existe.
|
| 553 |
+
embeddings.pop("pretrained", None)
|
| 554 |
+
|
| 555 |
+
# Extraer columnas de embedding de los datos "real"
|
| 556 |
embedding_cols = [col for col in embeddings["real"].columns if col.startswith("dim_")]
|
| 557 |
+
# Concatenamos los datasets disponibles (ahora, sin pretrained si se deseleccion贸)
|
| 558 |
df_combined = pd.concat(list(embeddings.values()), ignore_index=True)
|
| 559 |
|
| 560 |
try:
|
|
|
|
| 620 |
st.write(f"Trustworthiness: {result['trustworthiness']:.4f}")
|
| 621 |
st.write(f"Continuity: {result['continuity']:.4f}")
|
| 622 |
|
| 623 |
+
# Si se us贸 PCA, se muestran los plots de loadings con Bokeh (con hover para ver la etiqueta)
|
| 624 |
if reduction_method == "PCA" and result.get("pca_model") is not None:
|
| 625 |
pca_model = result["pca_model"]
|
| 626 |
components = pca_model.components_ # Shape: (n_components, n_features)
|
| 627 |
|
| 628 |
st.subheader("Pesos de las Componentes Principales (Loadings)")
|
| 629 |
+
# Se crea un plot de barras por cada componente
|
| 630 |
for i, comp in enumerate(components):
|
|
|
|
| 631 |
source = ColumnDataSource(data=dict(
|
| 632 |
+
dimensions=embedding_cols,
|
| 633 |
weight=comp
|
| 634 |
))
|
|
|
|
| 635 |
p = figure(x_range=embedding_cols, title=f"Componente Principal {i+1}",
|
| 636 |
+
plot_height=400, plot_width=600,
|
| 637 |
+
toolbar_location=None, tools="")
|
| 638 |
p.vbar(x='dimensions', top='weight', width=0.8, source=source)
|
| 639 |
+
# Ocultar etiquetas del eje x para un aspecto m谩s limpio
|
| 640 |
p.xaxis.major_label_text_font_size = '0pt'
|
| 641 |
+
# Agregar HoverTool para mostrar la dimensi贸n y su peso
|
|
|
|
| 642 |
hover = HoverTool(tooltips=[("Dimensi贸n", "@dimensions"), ("Peso", "@weight")])
|
| 643 |
p.add_tools(hover)
|
|
|
|
|
|
|
| 644 |
p.xaxis.axis_label = "Dimensiones originales"
|
| 645 |
p.yaxis.axis_label = "Peso"
|
|
|
|
| 646 |
st.bokeh_chart(p)
|
|
|
|
| 647 |
|
| 648 |
data_table, df_table, source_table = create_table(result["df_distances"])
|
| 649 |
real_subset_names = list(df_table.columns[1:])
|