Spaces:
Sleeping
Sleeping
Commit
路
1af1b88
1
Parent(s):
fa618ec
Upload Two Pretrained Datasets (IDL and PDFA)
Browse files
app.py
CHANGED
|
@@ -68,7 +68,6 @@ def load_embeddings(model, version, embedding_prefix):
|
|
| 68 |
df_rot["source"] = "es-digital-rotation-degradation-seq"
|
| 69 |
df_zoom["source"] = "es-digital-zoom-degradation-seq"
|
| 70 |
df_render["source"] = "es-render-seq"
|
| 71 |
-
# Si lo requieres, puedes asignar tambi茅n una fuente para pretrained
|
| 72 |
df_pretratrained["source"] = "pretrained"
|
| 73 |
|
| 74 |
return {"real": df_real,
|
|
@@ -83,8 +82,13 @@ def load_embeddings(model, version, embedding_prefix):
|
|
| 83 |
df_rot = pd.read_csv(f"data/idefics2_{version}_de_Rodrigo_merit_es-digital-rotation-degradation-seq_{embedding_prefix}embeddings.csv")
|
| 84 |
df_zoom = pd.read_csv(f"data/idefics2_{version}_de_Rodrigo_merit_es-digital-zoom-degradation-seq_{embedding_prefix}embeddings.csv")
|
| 85 |
df_render = pd.read_csv(f"data/idefics2_{version}_de_Rodrigo_merit_es-render-seq_{embedding_prefix}embeddings.csv")
|
| 86 |
-
df_pretratrained = pd.read_csv(f"data/idefics2_{version}_de_Rodrigo_merit_pretrained_{embedding_prefix}embeddings.csv")
|
| 87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
df_real["version"] = "real"
|
| 89 |
df_par["version"] = "synthetic"
|
| 90 |
df_line["version"] = "synthetic"
|
|
@@ -94,6 +98,7 @@ def load_embeddings(model, version, embedding_prefix):
|
|
| 94 |
df_render["version"] = "synthetic"
|
| 95 |
df_pretratrained["version"] = "pretrained"
|
| 96 |
|
|
|
|
| 97 |
df_par["source"] = "es-digital-paragraph-degradation-seq"
|
| 98 |
df_line["source"] = "es-digital-line-degradation-seq"
|
| 99 |
df_seq["source"] = "es-digital-seq"
|
|
@@ -112,6 +117,7 @@ def load_embeddings(model, version, embedding_prefix):
|
|
| 112 |
|
| 113 |
|
| 114 |
|
|
|
|
| 115 |
def split_versions(df_combined, reduced):
|
| 116 |
# Asignar las coordenadas si la reducci贸n es 2D
|
| 117 |
if reduced.shape[1] == 2:
|
|
|
|
| 68 |
df_rot["source"] = "es-digital-rotation-degradation-seq"
|
| 69 |
df_zoom["source"] = "es-digital-zoom-degradation-seq"
|
| 70 |
df_render["source"] = "es-render-seq"
|
|
|
|
| 71 |
df_pretratrained["source"] = "pretrained"
|
| 72 |
|
| 73 |
return {"real": df_real,
|
|
|
|
| 82 |
df_rot = pd.read_csv(f"data/idefics2_{version}_de_Rodrigo_merit_es-digital-rotation-degradation-seq_{embedding_prefix}embeddings.csv")
|
| 83 |
df_zoom = pd.read_csv(f"data/idefics2_{version}_de_Rodrigo_merit_es-digital-zoom-degradation-seq_{embedding_prefix}embeddings.csv")
|
| 84 |
df_render = pd.read_csv(f"data/idefics2_{version}_de_Rodrigo_merit_es-render-seq_{embedding_prefix}embeddings.csv")
|
|
|
|
| 85 |
|
| 86 |
+
# Cargar ambos subconjuntos pretrained y combinarlos
|
| 87 |
+
df_pretratrained_PDFA = pd.read_csv(f"data/idefics2_{version}_de_Rodrigo_merit_aux_PDFA_{embedding_prefix}embeddings.csv")
|
| 88 |
+
df_pretratrained_IDL = pd.read_csv(f"data/idefics2_{version}_de_Rodrigo_merit_aux_IDL_{embedding_prefix}embeddings.csv")
|
| 89 |
+
df_pretratrained = pd.concat([df_pretratrained_PDFA, df_pretratrained_IDL], ignore_index=True)
|
| 90 |
+
|
| 91 |
+
# Asignar etiquetas de versi贸n
|
| 92 |
df_real["version"] = "real"
|
| 93 |
df_par["version"] = "synthetic"
|
| 94 |
df_line["version"] = "synthetic"
|
|
|
|
| 98 |
df_render["version"] = "synthetic"
|
| 99 |
df_pretratrained["version"] = "pretrained"
|
| 100 |
|
| 101 |
+
# Asignar fuente (source)
|
| 102 |
df_par["source"] = "es-digital-paragraph-degradation-seq"
|
| 103 |
df_line["source"] = "es-digital-line-degradation-seq"
|
| 104 |
df_seq["source"] = "es-digital-seq"
|
|
|
|
| 117 |
|
| 118 |
|
| 119 |
|
| 120 |
+
|
| 121 |
def split_versions(df_combined, reduced):
|
| 122 |
# Asignar las coordenadas si la reducci贸n es 2D
|
| 123 |
if reduced.shape[1] == 2:
|