martinofumagalli commited on
Commit
eb5053b
·
verified ·
1 Parent(s): 3e0713b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -2
app.py CHANGED
@@ -85,6 +85,49 @@ def material_from(text: str) -> str:
85
  m = re.search(r"(SERIOPLAST.*?RESIN)", text, re.I)
86
  return m.group(1).strip() if m else ""
87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  # --------------------- AGGIUNTA RICHIESTA: PIECE da "Packaging Component Type" ---------------------
89
  _ALLOWED_PIECES = {
90
  "ribbon": "ribbon",
@@ -239,13 +282,16 @@ def parse_record(pages: List[str], source_name: str) -> Dict[str, str]:
239
  gcas = material_ref_gcas_from(full)
240
  mfam = material_family_from(full)
241
 
 
 
 
242
  return {
243
  "Piece": piece or "",
244
  "SKU": sku or "",
245
  "Title": title or "",
246
  "Capacity": cap or "",
247
  "% Recycled": "–",
248
- "Weight": "–",
249
  "Color": color or "",
250
  "Material / Resin": material or "",
251
  "Class": cls or "",
@@ -313,5 +359,5 @@ with c2:
313
  bio = io.BytesIO()
314
  with pd.ExcelWriter(bio, engine="openpyxl") as xw:
315
  df.to_excel(xw, index=False, sheet_name="data")
316
- st.download_button("⬇️ Excel", bio.getvalue(), "table.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
317
 
 
85
  m = re.search(r"(SERIOPLAST.*?RESIN)", text, re.I)
86
  return m.group(1).strip() if m else ""
87
 
88
+ # --- (AGGIUNTA) WEIGHT PARSER ----------------------------------------------
89
+ WEIGHT_TOL_RE = re.compile(
90
+ r"\bWeight\b[^\n\r]{0,15}?([0-9]+(?:[.,][0-9]+)?)\s*(?:±|\+/?-|\+-)\s*([0-9]+(?:[.,][0-9]+)?)\s*(mg|g|kg)?",
91
+ re.I,
92
+ )
93
+ WEIGHT_SIMPLE_RE = re.compile(
94
+ r"\bWeight\b[^\n\r]{0,15}?([0-9]+(?:[.,][0-9]+)?)\s*(mg|g|kg)\b",
95
+ re.I,
96
+ )
97
+ WEIGHT_INLINE_RE = re.compile(
98
+ r"\b([0-9]+(?:[.,][0-9]+)?)\s*(?:±|\+/?-|\+-)\s*([0-9]+(?:[.,][0-9]+)?)\s*(mg|g|kg)\b",
99
+ re.I,
100
+ )
101
+
102
+ def _norm_num(s: str) -> str:
103
+ return (s or "").replace(",", ".").strip().rstrip(".")
104
+
105
+ def weight_from(text: str) -> str:
106
+ # 1) match con tolleranza
107
+ m = WEIGHT_TOL_RE.search(text or "")
108
+ if m:
109
+ val = _norm_num(m.group(1))
110
+ tol = _norm_num(m.group(2))
111
+ unit = (m.group(3) or "g").lower()
112
+ return f"{val} ± {tol} {unit}"
113
+ # 2) match semplice con unità
114
+ m = WEIGHT_SIMPLE_RE.search(text or "")
115
+ if m:
116
+ val = _norm_num(m.group(1))
117
+ unit = (m.group(2) or "g").lower()
118
+ return f"{val} {unit}"
119
+ # 3) riga per riga per casi OCR
120
+ for line in (text or "").splitlines():
121
+ if "weight" in line.lower():
122
+ m2 = WEIGHT_INLINE_RE.search(line)
123
+ if m2:
124
+ val = _norm_num(m2.group(1))
125
+ tol = _norm_num(m2.group(2))
126
+ unit = (m2.group(3) or "g").lower()
127
+ return f"{val} ± {tol} {unit}"
128
+ return ""
129
+ # ---------------------------------------------------------------------------
130
+
131
  # --------------------- AGGIUNTA RICHIESTA: PIECE da "Packaging Component Type" ---------------------
132
  _ALLOWED_PIECES = {
133
  "ribbon": "ribbon",
 
282
  gcas = material_ref_gcas_from(full)
283
  mfam = material_family_from(full)
284
 
285
+ # (AGGIUNTA) estrai peso
286
+ wght = weight_from(full)
287
+
288
  return {
289
  "Piece": piece or "",
290
  "SKU": sku or "",
291
  "Title": title or "",
292
  "Capacity": cap or "",
293
  "% Recycled": "–",
294
+ "Weight": wght or "–",
295
  "Color": color or "",
296
  "Material / Resin": material or "",
297
  "Class": cls or "",
 
359
  bio = io.BytesIO()
360
  with pd.ExcelWriter(bio, engine="openpyxl") as xw:
361
  df.to_excel(xw, index=False, sheet_name="data")
362
+ st.download_button("⬇️ Excel", bio.getvalue(), "table.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
363