Spaces:
Running
Running
Commit
·
0ac9498
1
Parent(s):
5b26920
Include Paligemma Embeddings Visualization
Browse files
app.py
CHANGED
|
@@ -124,6 +124,45 @@ def load_embeddings(model, version, embedding_prefix, weight_factor):
|
|
| 124 |
"synthetic": pd.concat([df_seq, df_line, df_par, df_rot, df_zoom, df_render], ignore_index=True),
|
| 125 |
"pretrained": df_pretratrained}
|
| 126 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
else:
|
| 128 |
st.error("Modelo no reconocido")
|
| 129 |
return None
|
|
@@ -1357,13 +1396,16 @@ def run_model(model_name):
|
|
| 1357 |
|
| 1358 |
def main():
|
| 1359 |
config_style()
|
| 1360 |
-
tabs = st.tabs(["Donut", "Idefics2"])
|
| 1361 |
with tabs[0]:
|
| 1362 |
st.markdown('<h2 class="sub-title">Donut 🤗</h2>', unsafe_allow_html=True)
|
| 1363 |
run_model("Donut")
|
| 1364 |
with tabs[1]:
|
| 1365 |
st.markdown('<h2 class="sub-title">Idefics2 🤗</h2>', unsafe_allow_html=True)
|
| 1366 |
run_model("Idefics2")
|
|
|
|
|
|
|
|
|
|
| 1367 |
|
| 1368 |
if __name__ == "__main__":
|
| 1369 |
main()
|
|
|
|
| 124 |
"synthetic": pd.concat([df_seq, df_line, df_par, df_rot, df_zoom, df_render], ignore_index=True),
|
| 125 |
"pretrained": df_pretratrained}
|
| 126 |
|
| 127 |
+
elif model == "Paligemma":
|
| 128 |
+
df_real = pd.read_csv(f"data/paligemma/{version}/{embedding_prefix}/de_Rodrigo_merit_secret_britanico_{weight_factor}embeddings.csv")
|
| 129 |
+
df_par = pd.read_csv(f"data/paligemma/{version}/{embedding_prefix}/de_Rodrigo_merit_es-digital-paragraph-degradation-seq_{weight_factor}embeddings.csv")
|
| 130 |
+
df_line = pd.read_csv(f"data/paligemma/{version}/{embedding_prefix}/de_Rodrigo_merit_es-digital-line-degradation-seq_{weight_factor}embeddings.csv")
|
| 131 |
+
df_seq = pd.read_csv(f"data/paligemma/{version}/{embedding_prefix}/de_Rodrigo_merit_es-digital-seq_{weight_factor}embeddings.csv")
|
| 132 |
+
df_rot = pd.read_csv(f"data/paligemma/{version}/{embedding_prefix}/de_Rodrigo_merit_es-digital-rotation-degradation-seq_{weight_factor}embeddings.csv")
|
| 133 |
+
df_zoom = pd.read_csv(f"data/paligemma/{version}/{embedding_prefix}/de_Rodrigo_merit_es-digital-zoom-degradation-seq_{weight_factor}embeddings.csv")
|
| 134 |
+
df_render = pd.read_csv(f"data/paligemma/{version}/{embedding_prefix}/de_Rodrigo_merit_es-render-seq_{weight_factor}embeddings.csv")
|
| 135 |
+
|
| 136 |
+
# Cargar ambos subconjuntos pretrained y combinarlos
|
| 137 |
+
# TODO Pretrained de idefics2, se mantienen para evitar error, pero se debe meter los de paligemma
|
| 138 |
+
df_pretratrained_PDFA = pd.read_csv(f"data/idefics2/{version}/{embedding_prefix}/de_Rodrigo_merit_aux_PDFA_{weight_factor}embeddings.csv")
|
| 139 |
+
df_pretratrained_IDL = pd.read_csv(f"data/idefics2/{version}/{embedding_prefix}/de_Rodrigo_merit_aux_IDL_{weight_factor}embeddings.csv")
|
| 140 |
+
df_pretratrained = pd.concat([df_pretratrained_PDFA, df_pretratrained_IDL], ignore_index=True)
|
| 141 |
+
|
| 142 |
+
# Asignar etiquetas de versión
|
| 143 |
+
df_real["version"] = "real"
|
| 144 |
+
df_par["version"] = "synthetic"
|
| 145 |
+
df_line["version"] = "synthetic"
|
| 146 |
+
df_seq["version"] = "synthetic"
|
| 147 |
+
df_rot["version"] = "synthetic"
|
| 148 |
+
df_zoom["version"] = "synthetic"
|
| 149 |
+
df_render["version"] = "synthetic"
|
| 150 |
+
df_pretratrained["version"] = "pretrained"
|
| 151 |
+
|
| 152 |
+
# Asignar fuente (source)
|
| 153 |
+
df_par["source"] = "es-digital-paragraph-degradation-seq"
|
| 154 |
+
df_line["source"] = "es-digital-line-degradation-seq"
|
| 155 |
+
df_seq["source"] = "es-digital-seq"
|
| 156 |
+
df_rot["source"] = "es-digital-rotation-degradation-seq"
|
| 157 |
+
df_zoom["source"] = "es-digital-zoom-degradation-seq"
|
| 158 |
+
df_render["source"] = "es-render-seq"
|
| 159 |
+
df_pretratrained["source"] = "pretrained"
|
| 160 |
+
|
| 161 |
+
return {"real": df_real,
|
| 162 |
+
"synthetic": pd.concat([df_seq, df_line, df_par, df_rot, df_zoom, df_render], ignore_index=True),
|
| 163 |
+
"pretrained": df_pretratrained}
|
| 164 |
+
|
| 165 |
+
|
| 166 |
else:
|
| 167 |
st.error("Modelo no reconocido")
|
| 168 |
return None
|
|
|
|
| 1396 |
|
| 1397 |
def main():
|
| 1398 |
config_style()
|
| 1399 |
+
tabs = st.tabs(["Donut", "Idefics2" , "Paligemma"])
|
| 1400 |
with tabs[0]:
|
| 1401 |
st.markdown('<h2 class="sub-title">Donut 🤗</h2>', unsafe_allow_html=True)
|
| 1402 |
run_model("Donut")
|
| 1403 |
with tabs[1]:
|
| 1404 |
st.markdown('<h2 class="sub-title">Idefics2 🤗</h2>', unsafe_allow_html=True)
|
| 1405 |
run_model("Idefics2")
|
| 1406 |
+
with tabs[2]:
|
| 1407 |
+
st.markdown('<h2 class="sub-title">Paligemma 🤗</h2>', unsafe_allow_html=True)
|
| 1408 |
+
run_model("Paligemma")
|
| 1409 |
|
| 1410 |
if __name__ == "__main__":
|
| 1411 |
main()
|