Spaces:

rasmodev
/

Handwriting_Recognition_Model

Sleeping

App Files Files Community

rasmodev commited on May 17

Commit

d41270e

verified ·

1 Parent(s): 2d0dda1

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -55

app.py CHANGED Viewed

@@ -1,19 +1,6 @@
 """
 ValuationAI — Nairobi Valuation Sheet OCR
 Model: rasmodev/Handwriting_trocr_model
-PDF processing matches notebook exactly:
-- fitz opened via temp file (not stream) matching how training data was built
-- Matrix(200/72, 200/72) — same DPI as training
-- get_pixmap(matrix=mat, alpha=False) — same as training
-- Image.open(...).convert('RGB') — same as training
-Inference matches notebook exactly:
-- processor(images=img.convert('RGB'), return_tensors='pt').pixel_values
-- model.generate(pixel_values=pv, max_new_tokens=64, num_beams=1)
-Label format from training:
-- PLOT: LR 209/617 | LOC: STATE HOUSE AVENUE | AREA: 0.06 | AMT: 52000000 | DATE: 2008-06-17 | VOS: 3872
 """
 import io, time, logging, tempfile, os
 import streamlit as st
@@ -73,7 +60,6 @@ html, body, [class*="css"], .stApp {
     border-color: #2563EB !important;
     box-shadow: 0 0 0 4px rgba(37,99,235,0.06) !important;
 }
-[data-testid="stFileUploader"] label { color: #6B7280 !important; font-size: 0.9rem !important; }
 .fchip { display: inline-flex; align-items: center; gap: 5px; background: #EFF6FF; border: 1px solid #BFDBFE; color: #1D4ED8; padding: 0.25rem 0.7rem; border-radius: 6px; font-size: 0.73rem; font-weight: 500; margin: 2px; }
@@ -98,7 +84,7 @@ html, body, [class*="css"], .stApp {
 .stat-l { font-size: 0.68rem; font-weight: 500; letter-spacing: 0.12em; text-transform: uppercase; color: #6B7280; }
 .section-head { display: flex; align-items: center; justify-content: space-between; margin-bottom: 1rem; padding-bottom: 0.75rem; border-bottom: 1px solid #E5E7EB; }
-.section-title { font-family: 'Cormorant Garamond', serif; font-size: 1.5rem; font-weight: 600; color: #1A1A2E; letter-spacing: -0.01em; }
 div[data-testid="stDownloadButton"] > button {
     background: #fff !important; border: 1.5px solid #1A1A2E !important; color: #1A1A2E !important;
@@ -115,7 +101,7 @@ div[data-testid="stDownloadButton"] > button:hover { background: #1A1A2E !import
 # ═══════════════════════════════════════════════════════════
-# MODEL — matches notebook Cell 13 + Cell 28
 # ═══════════════════════════════════════════════════════════
 @st.cache_resource(show_spinner="Loading recognition model…")
 def load_model():
@@ -130,56 +116,26 @@ def load_model():
 # ═══════════════════════════════════════════════════════════
-# PDF → IMAGES — matches notebook Cell 10 exactly
-# Uses temp file not stream — same as training
-# Matrix(200/72, 200/72), get_pixmap(alpha=False), convert('RGB')
-# ═══════════════════════════════════════════════════════════
-def pdf_to_images(file_bytes: bytes) -> list:
-    import fitz
-    images = []
-    # Write to temp file — same as training which used file paths
-    with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp:
-        tmp.write(file_bytes)
-        tmp_path = tmp.name
-    try:
-        doc = fitz.open(tmp_path)                      # open from path like training
-        mat = fitz.Matrix(200/72, 200/72)              # same DPI as training
-        for page in doc:
-            pix = page.get_pixmap(matrix=mat, alpha=False)  # same as training
-            img = Image.open(io.BytesIO(pix.tobytes("png"))).convert("RGB")  # same as training
-            images.append(img)
-            pix = None                                 # free memory immediately like training
-        doc.close()
-    finally:
-        os.unlink(tmp_path)
-    return images
-# ═══════════════════════════════════════════════════════════
-# OCR — matches notebook Cell 18 + Cell 20 inference
-# processor(images=img.convert('RGB')) then model.generate
-# max_new_tokens=64, num_beams=1 (greedy — fast)
 # ═══════════════════════════════════════════════════════════
 def ocr_page(img: Image.Image) -> str:
     import torch
     processor, model, device = load_model()
-    # Exactly as in ValuationDataset.__getitem__
     pixel_values = processor(
         images=img.convert("RGB"),
         return_tensors="pt"
     ).pixel_values.to(device)
     with torch.no_grad():
         generated = model.generate(
             pixel_values=pixel_values,
             max_new_tokens=64,
-            num_beams=1,            # greedy — fast, matches validation in notebook
         )
     return processor.batch_decode(generated, skip_special_tokens=True)[0].strip()
 # ═══════════════════════════════════════════════════════════
-# PARSE LABEL — matches row_to_label() from notebook Cell 10
 # Format: PLOT: ... | LOC: ... | AREA: ... | AMT: ... | DATE: ... | VOS: ...
 # ═══════════════════════════════════════════════════════════
 def parse_label(raw_text: str, filename: str) -> dict:
@@ -314,6 +270,8 @@ run = st.button(
 # PROCESSING
 # ═══════════════════════════════════════════════════════════
 if run and uploaded:
     st.session_state.records = []
     st.session_state.errors  = []
     st.session_state.done    = False
@@ -327,27 +285,50 @@ if run and uploaded:
         raw   = uf.read()
         bar.progress(fi / len(uploaded), text=f"Reading {fname}…")
         try:
             ext = fname.lower().rsplit(".", 1)[-1]
             if ext == "pdf":
-                imgs = pdf_to_images(raw)
             else:
                 imgs = [Image.open(io.BytesIO(raw)).convert("RGB")]
             if not imgs:
-                st.session_state.errors.append(f"{fname}: no pages could be extracted")
                 continue
             for pi, img in enumerate(imgs, 1):
-                status.caption(
-                    f"Processing **{fname}** — page {pi} of {len(imgs)}"
-                )
                 raw_text = ocr_page(img)
-                record   = parse_label(raw_text, fname)
                 st.session_state.records.append(record)
         except Exception as e:
             st.session_state.errors.append(f"{fname}: {e}")
         bar.progress((fi + 1) / len(uploaded))

 """
 ValuationAI — Nairobi Valuation Sheet OCR
 Model: rasmodev/Handwriting_trocr_model
 """
 import io, time, logging, tempfile, os
 import streamlit as st
     border-color: #2563EB !important;
     box-shadow: 0 0 0 4px rgba(37,99,235,0.06) !important;
 }
 .fchip { display: inline-flex; align-items: center; gap: 5px; background: #EFF6FF; border: 1px solid #BFDBFE; color: #1D4ED8; padding: 0.25rem 0.7rem; border-radius: 6px; font-size: 0.73rem; font-weight: 500; margin: 2px; }
 .stat-l { font-size: 0.68rem; font-weight: 500; letter-spacing: 0.12em; text-transform: uppercase; color: #6B7280; }
 .section-head { display: flex; align-items: center; justify-content: space-between; margin-bottom: 1rem; padding-bottom: 0.75rem; border-bottom: 1px solid #E5E7EB; }
+.section-title { font-family: 'Cormorant Garamond', serif; font-size: 1.5rem; font-weight: 600; color: #1A1A2E; }
 div[data-testid="stDownloadButton"] > button {
     background: #fff !important; border: 1.5px solid #1A1A2E !important; color: #1A1A2E !important;
 # ═══════════════════════════════════════════════════════════
+# MODEL
 # ═══════════════════════════════════════════════════════════
 @st.cache_resource(show_spinner="Loading recognition model…")
 def load_model():
 # ═══════════════════════════════════════════════════════════
+# OCR
 # ═══════════════════════════════════════════════════════════
 def ocr_page(img: Image.Image) -> str:
     import torch
     processor, model, device = load_model()
     pixel_values = processor(
         images=img.convert("RGB"),
         return_tensors="pt"
     ).pixel_values.to(device)
     with torch.no_grad():
         generated = model.generate(
             pixel_values=pixel_values,
             max_new_tokens=64,
+            num_beams=1,
         )
     return processor.batch_decode(generated, skip_special_tokens=True)[0].strip()
 # ═══════════════════════════════════════════════════════════
+# PARSE LABEL
 # Format: PLOT: ... | LOC: ... | AREA: ... | AMT: ... | DATE: ... | VOS: ...
 # ═══════════════════════════════════════════════════════════
 def parse_label(raw_text: str, filename: str) -> dict:
 # PROCESSING
 # ═══════════════════════════════════════════════════════════
 if run and uploaded:
+    import fitz, traceback
     st.session_state.records = []
     st.session_state.errors  = []
     st.session_state.done    = False
         raw   = uf.read()
         bar.progress(fi / len(uploaded), text=f"Reading {fname}…")
+        st.write(f"📄 **{fname}** — {len(raw):,} bytes")
         try:
             ext = fname.lower().rsplit(".", 1)[-1]
             if ext == "pdf":
+                # Write to temp file — same as training
+                with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp:
+                    tmp.write(raw)
+                    tmp_path = tmp.name
+                doc = fitz.open(tmp_path)
+                st.write(f"  ✅ PDF opened — {len(doc)} page(s) found")
+                imgs = []
+                mat  = fitz.Matrix(200/72, 200/72)
+                for page in doc:
+                    pix = page.get_pixmap(matrix=mat, alpha=False)
+                    img = Image.open(io.BytesIO(pix.tobytes("png"))).convert("RGB")
+                    imgs.append(img)
+                    pix = None
+                doc.close()
+                os.unlink(tmp_path)
+                st.write(f"  ✅ Rasterized {len(imgs)} page image(s)")
             else:
                 imgs = [Image.open(io.BytesIO(raw)).convert("RGB")]
+                st.write(f"  ✅ Loaded image")
             if not imgs:
+                st.error(f"  ❌ No pages extracted from {fname}")
+                st.session_state.errors.append(f"{fname}: no pages extracted")
                 continue
             for pi, img in enumerate(imgs, 1):
+                status.caption(f"Running OCR on **{fname}** — page {pi} of {len(imgs)}")
                 raw_text = ocr_page(img)
+                st.write(f"  📝 Page {pi} OCR output: `{raw_text}`")
+                record = parse_label(raw_text, fname)
                 st.session_state.records.append(record)
         except Exception as e:
+            st.error(f"❌ Error on {fname}: {e}")
+            st.code(traceback.format_exc())
             st.session_state.errors.append(f"{fname}: {e}")
         bar.progress((fi + 1) / len(uploaded))