Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -65,6 +65,43 @@ def parse_code_name(codes_raw: str, text_original: str) -> Tuple[str, str]:
|
|
| 65 |
name = (re.search(r"NOMBRE\s*:\s*([^|]+)", codes_raw, flags=re.I) or re.search(r"NOMBRE\s*:\s*([^|]+)", text_original, flags=re.I))
|
| 66 |
return (code.group(1).strip() if code else ""), (name.group(1).strip() if name else "")
|
| 67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
# =========================
|
| 69 |
# Reglas
|
| 70 |
# =========================
|
|
@@ -713,12 +750,11 @@ def recomendar(query: str):
|
|
| 713 |
parsed = df.apply(lambda r: parse_code_name(r.get("codes_raw",""), r.get("text_original","")), axis=1)
|
| 714 |
df["Código"] = [c for c,_ in parsed]; df["Nombre"] = [n for _,n in parsed]
|
| 715 |
|
| 716 |
-
|
| 717 |
-
|
| 718 |
-
|
| 719 |
-
|
| 720 |
-
|
| 721 |
-
return out, "OK"
|
| 722 |
|
| 723 |
# =========================
|
| 724 |
# Exportar (xlsx con fallback a csv)
|
|
|
|
| 65 |
name = (re.search(r"NOMBRE\s*:\s*([^|]+)", codes_raw, flags=re.I) or re.search(r"NOMBRE\s*:\s*([^|]+)", text_original, flags=re.I))
|
| 66 |
return (code.group(1).strip() if code else ""), (name.group(1).strip() if name else "")
|
| 67 |
|
| 68 |
+
# --- añade esto cerca de tus utilidades, debajo de parse_code_name ---
|
| 69 |
+
ORDER_CATS = ["CICP", "CPC", "UNSPSC"]
|
| 70 |
+
|
| 71 |
+
def normalize_unspsc_if_cpc_901(rows):
|
| 72 |
+
"""rows: lista de dicts [{'Catálogo','Código','Nombre','Similaridad'}]"""
|
| 73 |
+
out = []
|
| 74 |
+
for r in rows:
|
| 75 |
+
if r["Catálogo"] == "CPC" and str(r["Código"]).strip() == "901":
|
| 76 |
+
out.append({"Catálogo":"UNSPSC","Código":"N/A","Nombre":"N/A","Similaridad":1.0})
|
| 77 |
+
else:
|
| 78 |
+
out.append(r)
|
| 79 |
+
return out
|
| 80 |
+
|
| 81 |
+
def order_and_fill_one_per_catalog(df):
|
| 82 |
+
"""Garantiza 1 por catálogo (CICP,CPC,UNSPSC), con orden fijo y normalización 901->N/A."""
|
| 83 |
+
# Tomar el mejor por catálogo
|
| 84 |
+
best = (
|
| 85 |
+
df.sort_values("Similaridad", ascending=False)
|
| 86 |
+
.groupby("Catálogo", as_index=False)
|
| 87 |
+
.head(1)
|
| 88 |
+
)
|
| 89 |
+
# Pasar a lista para poder normalizar UNSPSC si CPC=901
|
| 90 |
+
rows = [{"Catálogo":r["Catálogo"], "Código":r["Código"], "Nombre":r["Nombre"], "Similaridad":r["Similaridad"]}
|
| 91 |
+
for _, r in best.iterrows()]
|
| 92 |
+
rows = normalize_unspsc_if_cpc_901(rows)
|
| 93 |
+
|
| 94 |
+
# Asegurar orden y devolver sólo los catálogos esperados
|
| 95 |
+
ordered = [r for r in rows if r["Catálogo"] in ORDER_CATS]
|
| 96 |
+
ordered.sort(key=lambda x: ORDER_CATS.index(x["Catálogo"]))
|
| 97 |
+
# Si alguno faltó, crear placeholders vacíos (opcional)
|
| 98 |
+
seen = {r["Catálogo"] for r in ordered}
|
| 99 |
+
for cat in ORDER_CATS:
|
| 100 |
+
if cat not in seen:
|
| 101 |
+
ordered.append({"Catálogo":cat, "Código":"", "Nombre":"", "Similaridad":0.0})
|
| 102 |
+
ordered.sort(key=lambda x: ORDER_CATS.index(x["Catálogo"]))
|
| 103 |
+
return pd.DataFrame(ordered, columns=["Catálogo","Código","Nombre","Similaridad"])
|
| 104 |
+
|
| 105 |
# =========================
|
| 106 |
# Reglas
|
| 107 |
# =========================
|
|
|
|
| 750 |
parsed = df.apply(lambda r: parse_code_name(r.get("codes_raw",""), r.get("text_original","")), axis=1)
|
| 751 |
df["Código"] = [c for c,_ in parsed]; df["Nombre"] = [n for _,n in parsed]
|
| 752 |
|
| 753 |
+
# ⬇️ NUEVO: 1 por catálogo, orden CICP→CPC→UNSPSC y normalización CPC=901
|
| 754 |
+
df = df[["Catálogo","Código","Nombre","Similaridad"]]
|
| 755 |
+
df_out = order_and_fill_one_per_catalog(df)
|
| 756 |
+
|
| 757 |
+
return df_out, "OK"
|
|
|
|
| 758 |
|
| 759 |
# =========================
|
| 760 |
# Exportar (xlsx con fallback a csv)
|