Spaces:

viswanani
/

Zaravya

Sleeping

App Files Files Community

viswanani commited on Oct 10, 2025

Commit

325160c

verified ·

1 Parent(s): e2b566d

Update app.py

Browse files

Files changed (1) hide show

app.py +1 -31

app.py CHANGED Viewed

@@ -8,14 +8,10 @@ import zipfile
 import tempfile
 import uuid
-# -----------------------------
-# Basic parsing helpers
-# -----------------------------
 PRICE_PATTERN = re.compile(r'(?<!\d)(?:₹\s*|Rs\.?\s*|INR\s*)?\d+(?:\.\d{1,2})?(?!\d)')
 CLEAN_PRICE = re.compile(r'[^0-9.]')
 def preprocess_image(img: Image.Image) -> Image.Image:
-    # Convert to grayscale, increase contrast, denoise lightly, sharpen
     gray = ImageOps.grayscale(img)
     enhanced = ImageOps.autocontrast(gray)
     denoised = enhanced.filter(ImageFilter.MedianFilter(size=3))
@@ -23,30 +19,18 @@ def preprocess_image(img: Image.Image) -> Image.Image:
     return sharpened
 def simple_parse_lines(text: str):
-    """
-    Heuristic parser:
-    - Splits text into lines
-    - Tries to extract Item and Price from each line
-    - Category guessed from headings (lines in ALL CAPS or ending with ':')
-    """
     rows = []
     current_category = None
     lines = [l.strip() for l in text.splitlines() if l.strip()]
     for line in lines:
-        # Category guess
         if (line.isupper() and len(line.split()) <= 6) or line.endswith(':'):
             current_category = line.rstrip(':').strip()
             continue
-        # Find price
         price_match = PRICE_PATTERN.search(line)
         if price_match:
             price_text = price_match.group(0)
             price_value = CLEAN_PRICE.sub('', price_text)
-            # Item is everything before price
             item = line[:price_match.start()].strip(" -:•\t")
-            # Cleanup item
             item = re.sub(r'\s{2,}', ' ', item)
             if item:
                 rows.append({
@@ -57,39 +41,26 @@ def simple_parse_lines(text: str):
     return rows
 def process_images_to_zip(files):
-    # Create temp workspace
     work_dir = tempfile.mkdtemp(prefix="menu_excel_")
     output_files = []
     for idx, file_path in enumerate(files, start=1):
-        # Load image
         image = Image.open(file_path).convert("RGB")
         image = preprocess_image(image)
-        # OCR
         text = pytesseract.image_to_string(image, lang="eng")
-        # Parse
         rows = simple_parse_lines(text)
         if not rows:
-            # Fallback: dump raw text if parsing failed
             df = pd.DataFrame([{"Extracted Text": text}])
         else:
             df = pd.DataFrame(rows, columns=["Item", "Price", "Category"])
-        # Save Excel
         excel_name = f"menu_{idx:03d}.xlsx"
         excel_path = os.path.join(work_dir, excel_name)
         df.to_excel(excel_path, index=False)
         output_files.append(excel_path)
-    # Bundle ZIP
     zip_name = f"menus_output_{uuid.uuid4().hex[:8]}.zip"
     zip_path = os.path.join(work_dir, zip_name)
     with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zipf:
         for path in output_files:
             zipf.write(path, arcname=os.path.basename(path))
     return zip_path
 with gr.Blocks(title="Menu to Excel (one file per image)") as demo:
@@ -98,12 +69,11 @@ with gr.Blocks(title="Menu to Excel (one file per image)") as demo:
         input_files = gr.File(
             label="Upload menu images",
             file_count="multiple",
-            type="filepath",   # ✅ fixed here
             file_types=[".png", ".jpg", ".jpeg"]
         )
     run_btn = gr.Button("Process")
     output_zip = gr.File(label="Download ZIP")
     run_btn.click(fn=process_images_to_zip, inputs=[input_files], outputs=[output_zip])
 if __name__ == "__main__":

 import tempfile
 import uuid
 PRICE_PATTERN = re.compile(r'(?<!\d)(?:₹\s*|Rs\.?\s*|INR\s*)?\d+(?:\.\d{1,2})?(?!\d)')
 CLEAN_PRICE = re.compile(r'[^0-9.]')
 def preprocess_image(img: Image.Image) -> Image.Image:
     gray = ImageOps.grayscale(img)
     enhanced = ImageOps.autocontrast(gray)
     denoised = enhanced.filter(ImageFilter.MedianFilter(size=3))
     return sharpened
 def simple_parse_lines(text: str):
     rows = []
     current_category = None
     lines = [l.strip() for l in text.splitlines() if l.strip()]
     for line in lines:
         if (line.isupper() and len(line.split()) <= 6) or line.endswith(':'):
             current_category = line.rstrip(':').strip()
             continue
         price_match = PRICE_PATTERN.search(line)
         if price_match:
             price_text = price_match.group(0)
             price_value = CLEAN_PRICE.sub('', price_text)
             item = line[:price_match.start()].strip(" -:•\t")
             item = re.sub(r'\s{2,}', ' ', item)
             if item:
                 rows.append({
     return rows
 def process_images_to_zip(files):
     work_dir = tempfile.mkdtemp(prefix="menu_excel_")
     output_files = []
     for idx, file_path in enumerate(files, start=1):
         image = Image.open(file_path).convert("RGB")
         image = preprocess_image(image)
         text = pytesseract.image_to_string(image, lang="eng")
         rows = simple_parse_lines(text)
         if not rows:
             df = pd.DataFrame([{"Extracted Text": text}])
         else:
             df = pd.DataFrame(rows, columns=["Item", "Price", "Category"])
         excel_name = f"menu_{idx:03d}.xlsx"
         excel_path = os.path.join(work_dir, excel_name)
         df.to_excel(excel_path, index=False)
         output_files.append(excel_path)
     zip_name = f"menus_output_{uuid.uuid4().hex[:8]}.zip"
     zip_path = os.path.join(work_dir, zip_name)
     with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zipf:
         for path in output_files:
             zipf.write(path, arcname=os.path.basename(path))
     return zip_path
 with gr.Blocks(title="Menu to Excel (one file per image)") as demo:
         input_files = gr.File(
             label="Upload menu images",
             file_count="multiple",
+            type="filepath",   # ✅ correct
             file_types=[".png", ".jpg", ".jpeg"]
         )
     run_btn = gr.Button("Process")
     output_zip = gr.File(label="Download ZIP")
     run_btn.click(fn=process_images_to_zip, inputs=[input_files], outputs=[output_zip])
 if __name__ == "__main__":