de-Rodrigo commited on
Commit
0ac9498
·
1 Parent(s): 5b26920

Include Paligemma Embeddings Visualization

Browse files
Files changed (1) hide show
  1. app.py +43 -1
app.py CHANGED
@@ -124,6 +124,45 @@ def load_embeddings(model, version, embedding_prefix, weight_factor):
124
  "synthetic": pd.concat([df_seq, df_line, df_par, df_rot, df_zoom, df_render], ignore_index=True),
125
  "pretrained": df_pretratrained}
126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  else:
128
  st.error("Modelo no reconocido")
129
  return None
@@ -1357,13 +1396,16 @@ def run_model(model_name):
1357
 
1358
  def main():
1359
  config_style()
1360
- tabs = st.tabs(["Donut", "Idefics2"])
1361
  with tabs[0]:
1362
  st.markdown('<h2 class="sub-title">Donut 🤗</h2>', unsafe_allow_html=True)
1363
  run_model("Donut")
1364
  with tabs[1]:
1365
  st.markdown('<h2 class="sub-title">Idefics2 🤗</h2>', unsafe_allow_html=True)
1366
  run_model("Idefics2")
 
 
 
1367
 
1368
  if __name__ == "__main__":
1369
  main()
 
124
  "synthetic": pd.concat([df_seq, df_line, df_par, df_rot, df_zoom, df_render], ignore_index=True),
125
  "pretrained": df_pretratrained}
126
 
127
+ elif model == "Paligemma":
128
+ df_real = pd.read_csv(f"data/paligemma/{version}/{embedding_prefix}/de_Rodrigo_merit_secret_britanico_{weight_factor}embeddings.csv")
129
+ df_par = pd.read_csv(f"data/paligemma/{version}/{embedding_prefix}/de_Rodrigo_merit_es-digital-paragraph-degradation-seq_{weight_factor}embeddings.csv")
130
+ df_line = pd.read_csv(f"data/paligemma/{version}/{embedding_prefix}/de_Rodrigo_merit_es-digital-line-degradation-seq_{weight_factor}embeddings.csv")
131
+ df_seq = pd.read_csv(f"data/paligemma/{version}/{embedding_prefix}/de_Rodrigo_merit_es-digital-seq_{weight_factor}embeddings.csv")
132
+ df_rot = pd.read_csv(f"data/paligemma/{version}/{embedding_prefix}/de_Rodrigo_merit_es-digital-rotation-degradation-seq_{weight_factor}embeddings.csv")
133
+ df_zoom = pd.read_csv(f"data/paligemma/{version}/{embedding_prefix}/de_Rodrigo_merit_es-digital-zoom-degradation-seq_{weight_factor}embeddings.csv")
134
+ df_render = pd.read_csv(f"data/paligemma/{version}/{embedding_prefix}/de_Rodrigo_merit_es-render-seq_{weight_factor}embeddings.csv")
135
+
136
+ # Cargar ambos subconjuntos pretrained y combinarlos
137
+ # TODO Pretrained de idefics2, se mantienen para evitar error, pero se debe meter los de paligemma
138
+ df_pretratrained_PDFA = pd.read_csv(f"data/idefics2/{version}/{embedding_prefix}/de_Rodrigo_merit_aux_PDFA_{weight_factor}embeddings.csv")
139
+ df_pretratrained_IDL = pd.read_csv(f"data/idefics2/{version}/{embedding_prefix}/de_Rodrigo_merit_aux_IDL_{weight_factor}embeddings.csv")
140
+ df_pretratrained = pd.concat([df_pretratrained_PDFA, df_pretratrained_IDL], ignore_index=True)
141
+
142
+ # Asignar etiquetas de versión
143
+ df_real["version"] = "real"
144
+ df_par["version"] = "synthetic"
145
+ df_line["version"] = "synthetic"
146
+ df_seq["version"] = "synthetic"
147
+ df_rot["version"] = "synthetic"
148
+ df_zoom["version"] = "synthetic"
149
+ df_render["version"] = "synthetic"
150
+ df_pretratrained["version"] = "pretrained"
151
+
152
+ # Asignar fuente (source)
153
+ df_par["source"] = "es-digital-paragraph-degradation-seq"
154
+ df_line["source"] = "es-digital-line-degradation-seq"
155
+ df_seq["source"] = "es-digital-seq"
156
+ df_rot["source"] = "es-digital-rotation-degradation-seq"
157
+ df_zoom["source"] = "es-digital-zoom-degradation-seq"
158
+ df_render["source"] = "es-render-seq"
159
+ df_pretratrained["source"] = "pretrained"
160
+
161
+ return {"real": df_real,
162
+ "synthetic": pd.concat([df_seq, df_line, df_par, df_rot, df_zoom, df_render], ignore_index=True),
163
+ "pretrained": df_pretratrained}
164
+
165
+
166
  else:
167
  st.error("Modelo no reconocido")
168
  return None
 
1396
 
1397
  def main():
1398
  config_style()
1399
+ tabs = st.tabs(["Donut", "Idefics2" , "Paligemma"])
1400
  with tabs[0]:
1401
  st.markdown('<h2 class="sub-title">Donut 🤗</h2>', unsafe_allow_html=True)
1402
  run_model("Donut")
1403
  with tabs[1]:
1404
  st.markdown('<h2 class="sub-title">Idefics2 🤗</h2>', unsafe_allow_html=True)
1405
  run_model("Idefics2")
1406
+ with tabs[2]:
1407
+ st.markdown('<h2 class="sub-title">Paligemma 🤗</h2>', unsafe_allow_html=True)
1408
+ run_model("Paligemma")
1409
 
1410
  if __name__ == "__main__":
1411
  main()