Spaces:

TuttiQuantis
/

PPWR_APP

Sleeping

App Files Files Community

martinofumagalli commited on Nov 10, 2025

Commit

7e449bd

verified ·

1 Parent(s): f35ad91

Update app.py

Browse files

Files changed (1) hide show

app.py +270 -125

app.py CHANGED Viewed

@@ -13,206 +13,351 @@ import pytesseract
 from PIL import Image
 # ======================================================================
-# SCHEMA TABELLA
 # ======================================================================
 SCHEMA = [
     "Piece","SKU","Title","Capacity","% Recycled","Weight","Color","Material / Resin","Class","Source File",
     "Component","Function","General description of the packaging","Material Ref GCAS","Material Family"
 ]
 # ======================================================================
-# LETTURA PDF E OCR
 # ======================================================================
 def extract_text_pages(pdf_bytes: bytes) -> List[str]:
     try:
         with pdfplumber.open(io.BytesIO(pdf_bytes)) as pdf:
-            return [p.extract_text() or "" for p in pdf.pages]
     except Exception:
-        pass
-    try:
-        reader = PdfReader(io.BytesIO(pdf_bytes))
-        return [(p.extract_text() or "") for p in reader.pages]
-    except Exception:
-        return []
 def run_ocr(pdf_bytes: bytes, lang: str, dpi: int, tesseract_cmd: str | None) -> List[str]:
     if tesseract_cmd:
         pytesseract.pytesseract.tesseract_cmd = tesseract_cmd
-    imgs = convert_from_bytes(pdf_bytes, dpi=dpi)
     config = "--psm 6 -c preserve_interword_spaces=1"
-    return [pytesseract.image_to_string(im, lang=lang, config=config) or "" for im in imgs]
 # ======================================================================
-# REGEX E PARSER
 # ======================================================================
-SKU_RE = re.compile(r"\b(?:Name|SKU|Part(?:\s*No\.?)?)\s*[:#]?\s*([A-Z0-9\-_/\.]{5,})", re.I)
-TITLE_RE = re.compile(r"\bTitle\s*[:\-]\s*(.+)", re.I)
-CLASS_RE = re.compile(r"\bClass\s*([A-Za-z ]+)", re.I)
-def _first(text, pat):
-    m = pat.search(text or ""); return m.group(1).strip() if m else ""
-def capacity_from(t):
-    m=re.search(r"([0-9]+(?:[.,][0-9]+)?)\s*(L|Liter|ml|mL)\b",t or "",re.I)
     if not m: return ""
-    return f"{m.group(1).replace(',','.')} {m.group(2).upper().replace('LITER','L').replace('ML','ml')}"
-def color_from(t):
-    m=re.search(r"(?:Part\s*Color|Color)\s*[:\-]?\s*([A-Z ]{3,})",t,re.I)
     if m: return m.group(1).strip()
-    m=re.search(r"\b([A-Z ]{4,}(?:GREEN|WHITE|BLACK|BLUE|RED|CLEAR)[A-Z ]*)\b",t)
-    return m.group(1).strip() if m else ""
-def material_from(t):
-    for l in (t or "").splitlines():
-        if re.search(r"\bRESIN\b",l,re.I): return l.strip()
-    m=re.search(r"(SERIOPLAST.*?RESIN)",t,re.I)
     return m.group(1).strip() if m else ""
-# ======================================================================
-# WEIGHT PARSER → restituisce solo "94±3g"
-# ======================================================================
-WEIGHT_VALUE_RE = re.compile(r"\bweight\b[^\n\r]{0,80}?([0-9]+(?:[.,][0-9]+)?\s*(?:±|\+/?-|\+-)?\s*[0-9]*\s*(?:mg|g|kg))\b", re.I)
-def weight_from(t: str) -> str:
-    if not t: return ""
-    m = WEIGHT_VALUE_RE.search(t)
-    if m: return re.sub(r"\s+", "", m.group(1)).replace(",", ".")
-    for line in t.splitlines():
         if "weight" in line.lower():
-            m2 = re.search(r"([0-9]+(?:[.,][0-9]+)?\s*(?:±|\+/?-|\+-)?\s*[0-9]*\s*(?:mg|g|kg))", line, re.I)
-            if m2: return re.sub(r"\s+", "", m2.group(1)).replace(",", ".")
     return ""
-# ======================================================================
-# ALTRE FUNZIONI (uguali a prima)
-# ======================================================================
-_ALLOWED_PIECES={"ribbon":"ribbon","bottle":"bottle","film bundle":"film bundle","container":"container",
-"label - adhesive":"LABEL - ADHESIVE","label adhesive":"LABEL - ADHESIVE","label-adhesive":"LABEL - ADHESIVE",
-"label - back":"LABEL - BACK","back label":"LABEL - BACK","label back":"LABEL - BACK","closure":"CLOSURE"}
-_PACK_COMP_TYPE_RE=re.compile(r"Packaging\s+Component\s+Type\s*[:\-]?\s*([^\n\r]+)",re.I)
-def _normalize_piece(s):
-    s2=re.sub(r"\s+"," ",(s or "").strip().lower())
-    for k,v in _ALLOWED_PIECES.items():
-        if k in s2: return v
     return ""
-def piece_from(t,cls):
-    m=_PACK_COMP_TYPE_RE.search(t or "")
-    if m: val=_normalize_piece(m.group(1));
-    if m and val: return val
     if cls:
-        if "bottle" in cls.lower(): return "bottle"
-        if "cap" in cls.lower(): return "CLOSURE"
-        if "corrugated" in cls.lower(): return "container"
-        if "label" in cls.lower(): return "LABEL - BACK"
     return ""
-FUNCTION_RE=re.compile(r"\b(Primary|Secondary(?:\s*or\s*Tertiary)?|Tertiary)\b",re.I)
-def component_from(t,piece,cls):
-    txt=t.lower()
-    if "label" in txt: return "Labels"
     if piece: return piece
-    if cls and "bottle" in cls.lower(): return "Bottle"
     return ""
-def function_from(t):
-    m=FUNCTION_RE.search(t or ""); return m.group(1).title() if m else ""
-def material_ref_gcas_from(t):
-    m=re.findall(r"\b(\d{7,9})\b",t or ""); return ", ".join(sorted(set(m))) if m else ""
-def material_family_from(t):
-    fams=["Monolayer HDPE","Polypropylene (PP)","Paper","Rigid Paper – Corrugated Case"]
-    for f in fams:
-        if f.lower() in (t or "").lower(): return f
-    if re.search(r"\bHDPE\b",t): return "Monolayer HDPE"
-    if re.search(r"\bPP\b",t,re.I): return "Polypropylene (PP)"
     return ""
-# ======================================================================
-# PARSER PRINCIPALE
-# ======================================================================
-def parse_record(pages: List[str], source_name: str) -> Dict[str,str]:
-    full="\n".join(pages or [""])
-    sku=_first(full,SKU_RE)
-    title=_first(full,TITLE_RE)
-    cls=_first(full,CLASS_RE)
-    cap=capacity_from(title) or capacity_from(full)
-    color=color_from(full)
-    material=material_from(full)
-    piece=piece_from(full,cls)
-    comp=component_from(full,piece,cls)
-    func=function_from(full)
-    gcas=material_ref_gcas_from(full)
-    mfam=material_family_from(full)
-    wght=weight_from(full)
     return {
-        "Piece":piece or "","SKU":sku or "","Title":title or "","Capacity":cap or "",
-        "% Recycled":"–","Weight":wght or "–","Color":color or "","Material / Resin":material or "",
-        "Class":cls or "","Source File":source_name,"Component":comp or "","Function":func or "",
-        "General description of the packaging":"","Material Ref GCAS":gcas or "","Material Family":mfam or ""
     }
 # ======================================================================
-# STREAMLIT UI
 # ======================================================================
 st.set_page_config(page_title="PDF → Table (OCR-ready)", layout="wide")
 st.title("📄→📊 PDF → Table (OCR-ready)")
-st.caption("Estrae automaticamente i campi, incluso il peso dalle immagini OCR.")
 with st.sidebar:
-    files=st.file_uploader("Seleziona PDF",type=["pdf"],accept_multiple_files=True)
     st.markdown("---")
     st.subheader("OCR")
-    ocr_fallback=st.checkbox("Usa OCR se non c'è testo",value=True)
-    ocr_lang=st.text_input("Lingue OCR (comma)",value="eng,ita")
-    ocr_dpi=st.number_input("DPI OCR",200,600,300,50)
-    tess_path=st.text_input("Percorso Tesseract (se non nel PATH)",value="")
-    run_btn=st.button("▶️ Estrai")
 if not run_btn:
     st.info("Carica i PDF e premi **Estrai**.")
     st.stop()
 if not files:
     st.warning("Nessun PDF caricato.")
     st.stop()
-lang="+".join([p.strip() for p in ocr_lang.split(",") if p.strip()]) or "eng"
-tess_cmd=tess_path.strip() or None
-rows,errors=[],[]
 for up in files:
     try:
-        raw=up.read()
-        pages=extract_text_pages(raw)
         if ocr_fallback and not any((p or "").strip() for p in pages):
-            pages=run_ocr(raw,lang=lang,dpi=int(ocr_dpi),tesseract_cmd=tess_cmd)
-        rec=parse_record(pages,up.name)
-        # se Weight vuoto, prova OCR
-        if (not rec.get("Weight") or rec["Weight"]=="–") and ocr_fallback:
-            ocr_pages=run_ocr(raw,lang=lang,dpi=int(ocr_dpi),tesseract_cmd=tess_cmd)
-            w_ocr=weight_from("\n".join(ocr_pages))
-            if w_ocr: rec["Weight"]=w_ocr
         rows.append(rec)
     except Exception as e:
-        errors.append((up.name,str(e)))
 if errors:
     with st.expander("Errori"):
-        for n,e in errors: st.error(f"{n}: {e}")
-df=pd.DataFrame(rows,columns=SCHEMA)
 st.success(f"Creat{ 'e' if len(df)!=1 else 'a' } {len(df)} riga/e.")
-st.dataframe(df,use_container_width=True)
-c1,c2=st.columns(2)
 with c1:
-    st.download_button("⬇️ CSV",df.to_csv(index=False).encode("utf-8"),"table.csv","text/csv")
 with c2:
-    bio=io.BytesIO()
-    with pd.ExcelWriter(bio,engine="openpyxl") as xw:
-        df.to_excel(xw,index=False,sheet_name="data")
-    st.download_button("⬇️ Excel",bio.getvalue(),"table.xlsx","application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")

 from PIL import Image
 # ======================================================================
+# SCHEMA TABELLA (colonne fisse)
 # ======================================================================
 SCHEMA = [
     "Piece","SKU","Title","Capacity","% Recycled","Weight","Color","Material / Resin","Class","Source File",
+    # nuove colonne
     "Component","Function","General description of the packaging","Material Ref GCAS","Material Family"
 ]
 # ======================================================================
+# ESTRATTORI LOW-LEVEL
 # ======================================================================
 def extract_text_pages(pdf_bytes: bytes) -> List[str]:
+    pages = []
+    # 1) pdfplumber
     try:
         with pdfplumber.open(io.BytesIO(pdf_bytes)) as pdf:
+            for p in pdf.pages:
+                pages.append(p.extract_text() or "")
     except Exception:
+        pages = []
+    # 2) pypdf fallback
+    if not pages or all(not (t or "").strip() for t in pages):
+        try:
+            reader = PdfReader(io.BytesIO(pdf_bytes))
+            pages = [(p.extract_text() or "") for p in reader.pages]
+        except Exception:
+            pages = []
+    return pages
 def run_ocr(pdf_bytes: bytes, lang: str, dpi: int, tesseract_cmd: str | None) -> List[str]:
     if tesseract_cmd:
         pytesseract.pytesseract.tesseract_cmd = tesseract_cmd
+    images = convert_from_bytes(pdf_bytes, dpi=dpi)
+    texts = []
     config = "--psm 6 -c preserve_interword_spaces=1"
+    for img in images:
+        if not isinstance(img, Image.Image):
+            img = img.convert("RGB")
+        texts.append(pytesseract.image_to_string(img, lang=lang, config=config) or "")
+    return texts
 # ======================================================================
+# PARSING DOMINIO (euristiche/regex leggere)
 # ======================================================================
+SKU_RE    = re.compile(r"\b(?:Name|SKU|Part(?:\s*No\.?)?)\s*[:#]?\s*([A-Z0-9\-_/\.]{5,})", re.I)
+TITLE_RE  = re.compile(r"\bTitle\s*[:\-]\s*(.+)", re.I)
+CLASS_RE  = re.compile(r"\bClass\s*([A-Za-z ]+)", re.I)
+def _first(text: str, pattern: re.Pattern, group: int = 1) -> str:
+    m = pattern.search(text or "")
+    return m.group(group).strip() if m else ""
+def capacity_from(text: str) -> str:
+    m = re.search(r"([0-9]+(?:[.,][0-9]+)?)\s*(L|Liter|ml|mL)\b", text or "", re.I)
     if not m: return ""
+    unit = m.group(2).upper().replace("LITER","L").replace("ML","ml")
+    return f"{m.group(1).replace(',', '.')} {unit}"
+def color_from(text: str) -> str:
+    m = re.search(r"(?:Part\s*Color|Color)\s*[:\-]?\s*([A-Z ]{3,})", text, re.I)
     if m: return m.group(1).strip()
+    m = re.search(r"\b([A-Z ]{4,}(?:GREEN|TRANSPARENT|WHITE|BLACK|BLUE|RED|CLEAR)[A-Z ]*)\b", text)
+    return (m.group(1).strip() if m else "")
+def material_from(text: str) -> str:
+    # cattura righe con "RESIN" o frasi simili
+    for line in (text or "").splitlines():
+        if re.search(r"\bRESIN\b", line, re.I):
+            return line.strip()
+    m = re.search(r"(SERIOPLAST.*?RESIN)", text, re.I)
     return m.group(1).strip() if m else ""
+# --- (AGGIUNTA) WEIGHT PARSER ----------------------------------------------
+WEIGHT_TOL_RE = re.compile(
+    r"\bWeight\b[^\n\r]{0,15}?([0-9]+(?:[.,][0-9]+)?)\s*(?:±|\+/?-|\+-)\s*([0-9]+(?:[.,][0-9]+)?)\s*(mg|g|kg)?",
+    re.I,
+)
+WEIGHT_SIMPLE_RE = re.compile(
+    r"\bWeight\b[^\n\r]{0,15}?([0-9]+(?:[.,][0-9]+)?)\s*(mg|g|kg)\b",
+    re.I,
+)
+WEIGHT_INLINE_RE = re.compile(
+    r"\b([0-9]+(?:[.,][0-9]+)?)\s*(?:±|\+/?-|\+-)\s*([0-9]+(?:[.,][0-9]+)?)\s*(mg|g|kg)\b",
+    re.I,
+)
+def _norm_num(s: str) -> str:
+    return (s or "").replace(",", ".").strip().rstrip(".")
+def weight_from(text: str) -> str:
+    # 1) match con tolleranza
+    m = WEIGHT_TOL_RE.search(text or "")
+    if m:
+        val = _norm_num(m.group(1))
+        tol = _norm_num(m.group(2))
+        unit = (m.group(3) or "g").lower()
+        return f"{val} ± {tol} {unit}"
+    # 2) match semplice con unità
+    m = WEIGHT_SIMPLE_RE.search(text or "")
+    if m:
+        val = _norm_num(m.group(1))
+        unit = (m.group(2) or "g").lower()
+        return f"{val} {unit}"
+    # 3) riga per riga per casi OCR
+    for line in (text or "").splitlines():
         if "weight" in line.lower():
+            m2 = WEIGHT_INLINE_RE.search(line)
+            if m2:
+                val = _norm_num(m2.group(1))
+                tol = _norm_num(m2.group(2))
+                unit = (m2.group(3) or "g").lower()
+                return f"{val} ± {tol} {unit}"
     return ""
+# ---------------------------------------------------------------------------
+# ---------------------  AGGIUNTA RICHIESTA: PIECE da "Packaging Component Type"  ---------------------
+_ALLOWED_PIECES = {
+    "ribbon": "ribbon",
+    "bottle": "bottle",
+    "film bundle": "film bundle",
+    "container": "container",
+    "label - adhesive": "LABEL - ADHESIVE",
+    "label adhesive": "LABEL - ADHESIVE",
+    "label-adhesive": "LABEL - ADHESIVE",
+    "label - back": "LABEL - BACK",
+    "back label": "LABEL - BACK",
+    "label back": "LABEL - BACK",
+    "closure": "CLOSURE",
+}
+_PACK_COMP_TYPE_RE = re.compile(
+    r"Packaging\s+Component\s+Type\s*[:\-]?\s*([^\n\r]+)", re.I
+)
+def _normalize_piece(s: str) -> str:
+    s0 = (s or "").strip()
+    s1 = re.sub(r"\s+", " ", s0)
+    s2 = s1.lower()
+    s2 = s2.replace("–", "-").replace("—", "-")
+    s2 = s2.replace("label- ", "label ").replace(" -", " - ").strip()
+    # prova match diretto
+    if s2 in _ALLOWED_PIECES:
+        return _ALLOWED_PIECES[s2]
+    # prova alcune normalizzazioni
+    s2 = s2.replace("  ", " ")
+    if s2 in _ALLOWED_PIECES:
+        return _ALLOWED_PIECES[s2]
+    # fallback per frasi lunghe: cerca la keyword migliore
+    for key, canon in _ALLOWED_PIECES.items():
+        if key in s2:
+            return canon
     return ""
+def piece_from(text: str, cls: str) -> str:
+    """
+    1) Cerca 'Packaging Component Type: <valore>' e normalizza al set richiesto.
+    2) Se non trovato, usa vecchi fallback (Class/Material Type).
+    """
+    # 1) Packaging Component Type (linea dedicata)
+    m = _PACK_COMP_TYPE_RE.search(text or "")
+    if m:
+        val = m.group(1)
+        normalized = _normalize_piece(val)
+        if normalized:
+            return normalized
+    # 2) fallback legacy: Packaging Material Type
+    m2 = re.search(r"Packaging\s*Material\s*Type\s*([^\n]+)", text or "", re.I)
+    if m2:
+        seg = m2.group(1)
+        norm = _normalize_piece(seg)
+        if norm:
+            return norm
+    # 3) fallback da Class
     if cls:
+        norm = _normalize_piece(cls)
+        if norm:
+            return norm
+        if "bottle" in cls.lower():
+            return "bottle"
+        if "cap" in cls.lower() or "closure" in cls.lower():
+            return "CLOSURE"
+        if "corrugated" in cls.lower():
+            return "container"
+        if "label" in cls.lower():
+            return "LABEL - BACK"  # scelta neutra se non specificato
     return ""
+# ----------------------------------------------------------------------------------------------------
+# --- Nuove colonne: euristiche base (si possono migliorare con esempi reali)
+FUNCTION_RE = re.compile(r"\b(Primary|Secondary(?:\s*or\s*Tertiary)?|Tertiary)\b", re.I)
+def component_from(text: str, piece: str, cls: str) -> str:
+    txt = text.lower()
+    # priorità a keyword esplicite
+    if "ink" in txt and "cartridge" in txt: return "Ink cartridge"
+    if "ink foil" in txt: return "Ink foil"
+    if "tape" in txt: return "Tape"
+    if "label" in txt and ("psl" in txt or "wet glue" in txt or "iml" in txt or "htl" in txt): return "Labels"
+    if "adhesive" in txt or "hot melt" in txt: return "Adhesive"
+    if "cartonboard" in txt or "sheet" in txt: return "Cartonboard / Sheet"
+    if "corrugated" in txt or "case" in txt or "outercase" in txt: return "Corrugated box"
+    if "bundle" in txt: return "Bundle"
+    # fallback da piece/class
     if piece: return piece
+    if cls:
+        if "bottle" in cls.lower(): return "Bottle"
+        if "cap" in cls.lower(): return "Closure"
+        if "corrugated" in cls.lower(): return "Corrugated box"
+        if "label" in cls.lower(): return "Labels"
     return ""
+def function_from(text: str) -> str:
+    m = FUNCTION_RE.search(text or "")
+    return m.group(1).title() if m else ""
+def material_ref_gcas_from(text: str) -> str:
+    # codici tipo 8 cifre (es. 90082546) o due codici tra parentesi
+    m = re.findall(r"\b(\d{7,9})\b", text or "")
+    if m:
+        seen = set(); out=[]
+        for x in m:
+            if x not in seen:
+                seen.add(x); out.append(x)
+        return ", ".join(out[:3])
+    m2 = re.findall(r"\((\d{5,})\s*kg\s*pack\)", text or "", re.I)
+    if m2:
+        seen=set(); out=[]
+        for x in m2:
+            if x not in seen:
+                seen.add(x); out.append(x)
+        return ", ".join(out[:3])
+    return ""
+def material_family_from(text: str) -> str:
+    families = [
+        "Monolayer HDPE","Polypropylene (PP)","Paper","Flexible Film – Mono non Metallized",
+        "Flexible - Label PSL WGL IML HTL","Rigid Paper – Corrugated Case",
+        "Inks and solvents","Hot melt adhesive","Wet Glue Label",
+        "Coated paper","Wood","Ink foil","Fasson PE 85 TOP White"
+    ]
+    t = text or ""
+    for fam in families:
+        if fam.lower() in t.lower():
+            return fam
+    if re.search(r"\bHDPE\b", t): return "Monolayer HDPE"
+    if re.search(r"\bPP\b|\bPolypropylene\b", t, re.I): return "Polypropylene (PP)"
+    if "corrugated" in t.lower(): return "Rigid Paper – Corrugated Case"
+    if "paper" in t.lower(): return "Paper"
     return ""
+def parse_record(pages: List[str], source_name: str) -> Dict[str, str]:
+    full = "\n".join(pages or [""])
+    sku   = _first(full, SKU_RE)
+    title = _first(full, TITLE_RE)
+    cls   = _first(full, CLASS_RE)
+    cap   = capacity_from(title) or capacity_from(full)
+    color = color_from(full)
+    material = material_from(full)
+    piece = piece_from(full, cls)  # <-- usa la nuova logica
+    # nuove colonne (euristiche leggere)
+    comp  = component_from(full, piece, cls)
+    func  = function_from(full)
+    gcas  = material_ref_gcas_from(full)
+    mfam  = material_family_from(full)
+    # (AGGIUNTA) estrai peso
+    wght  = weight_from(full)
     return {
+        "Piece": piece or "",
+        "SKU": sku or "",
+        "Title": title or "",
+        "Capacity": cap or "",
+        "% Recycled": "–",
+        "Weight": wght or "–",
+        "Color": color or "",
+        "Material / Resin": material or "",
+        "Class": cls or "",
+        "Source File": source_name,
+        "Component": comp or "",
+        "Function": func or "",
+        "General description of the packaging": "",
+        "Material Ref GCAS": gcas or "",
+        "Material Family": mfam or ""
     }
 # ======================================================================
+# UI STREAMLIT
 # ======================================================================
 st.set_page_config(page_title="PDF → Table (OCR-ready)", layout="wide")
 st.title("📄→📊 PDF → Table (OCR-ready)")
+st.caption("Carica PDF (anche scansioni). Compilo la tabella con i campi richiesti; OCR come fallback.")
 with st.sidebar:
+    files = st.file_uploader("Seleziona PDF", type=["pdf"], accept_multiple_files=True)
     st.markdown("---")
     st.subheader("OCR")
+    ocr_fallback = st.checkbox("Usa OCR se non c'è testo", value=True)
+    ocr_lang = st.text_input("Lingue OCR (comma)", value="eng,ita")
+    ocr_dpi = st.number_input("DPI OCR", 200, 600, 300, 50)
+    tess_path = st.text_input("Percorso Tesseract (se non nel PATH)", value="")
+    run_btn = st.button("▶️ Estrai")
 if not run_btn:
     st.info("Carica i PDF e premi **Estrai**.")
     st.stop()
 if not files:
     st.warning("Nessun PDF caricato.")
     st.stop()
+lang = "+".join([p.strip() for p in ocr_lang.split(",") if p.strip()]) or "eng"
+tess_cmd = tess_path.strip() or None
+rows, errors = [], []
 for up in files:
     try:
+        raw = up.read()
+        pages = extract_text_pages(raw)
         if ocr_fallback and not any((p or "").strip() for p in pages):
+            pages = run_ocr(raw, lang=lang, dpi=int(ocr_dpi), tesseract_cmd=tess_cmd)
+        rec = parse_record(pages, up.name)
         rows.append(rec)
     except Exception as e:
+        errors.append((up.name, str(e)))
 if errors:
     with st.expander("Errori"):
+        for name, err in errors:
+            st.error(f"{name}: {err}")
+df = pd.DataFrame(rows, columns=SCHEMA)
 st.success(f"Creat{ 'e' if len(df)!=1 else 'a' } {len(df)} riga/e.")
+st.dataframe(df, use_container_width=True)
+c1, c2 = st.columns(2)
 with c1:
+    st.download_button("⬇️ CSV", df.to_csv(index=False).encode("utf-8"), "table.csv", "text/csv")
 with c2:
+    bio = io.BytesIO()
+    with pd.ExcelWriter(bio, engine="openpyxl") as xw:
+        df.to_excel(xw, index=False, sheet_name="data")
+    st.download_button("⬇️ Excel", bio.getvalue(), "table.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")