Spaces:

shayansjm
/

ocr2

Sleeping

App Files Files Community

shayansjm commited on Jan 7

Commit

6274ffa

verified ·

1 Parent(s): 8915ba7

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -34

app.py CHANGED Viewed

@@ -3,64 +3,67 @@ from paddleocr import PaddleOCR
 from PIL import Image, ImageOps
 import numpy as np
-# We leave ocr as None. It will only load when someone clicks "Extract"
-ocr_model = None
 def process_bank_form(image):
-    global ocr_model
     if image is None:
         return "Please upload an image."
     try:
-        # 1. LAZY LOAD: This prevents the 'name ocr is not defined' error
-        if ocr_model is None:
-            print("First run: Loading PaddleOCR model into memory...")
-            ocr_model = PaddleOCR(
                 lang='en',
-                ocr_version='PP-OCRv4',
-                use_angle_cls=True,
-                use_gpu=False,
-                enable_mkldnn=True # Speeds up CPU processing
             )
-        # 2. Image Pre-processing
         img = image.convert("RGB")
-        img = ImageOps.exif_transpose(img) # Fixes phone photo rotation
         img_array = np.array(img)
-        # 3. Run Inference
-        # In 3.0+, we use the instance we just loaded
-        result = ocr_model.ocr(img_array)
-        # 4. Parsing with Safety Checks
         if not result or not isinstance(result, list) or result[0] is None:
-            return "No text detected. Try a closer photo."
         extracted_text = []
         for line in result[0]:
-            if isinstance(line, list) and len(line) >= 2:
-                text_info = line[1]
-                if text_info and len(text_info) >= 1:
-                    raw_string = str(text_info[0]).strip()
-                    conf = float(text_info[1])
-                    if raw_string and conf > 0.35:
-                        extracted_text.append(raw_string)
-        return "\n".join(extracted_text) if extracted_text else "Could not read text."
     except Exception as e:
-        return f"Error: {str(e)}\n\nTry: Cropping the image to just the text area."
-# UI with simplified components
-with gr.Blocks() as demo:
-    gr.Markdown("### 🏦 English Bank Form OCR (Stable Edition)")
     with gr.Row():
-        input_img = gr.Image(type="pil")
-        output_text = gr.Textbox(label="Extracted Text", lines=15)
     btn = gr.Button("Extract Text", variant="primary")
     btn.click(fn=process_bank_form, inputs=input_img, outputs=output_text)
 if __name__ == "__main__":
-    demo.launch()

 from PIL import Image, ImageOps
 import numpy as np
+# Global variable to hold the model once loaded
+ocr_instance = None
 def process_bank_form(image):
+    global ocr_instance
     if image is None:
         return "Please upload an image."
     try:
+        # 1. CLEAN INITIALIZATION (2026 Standard)
+        # We removed use_gpu, show_log, and rec_algorithm
+        if ocr_instance is None:
+            print("Initializing PaddleOCR 3.0 on CPU...")
+            ocr_instance = PaddleOCR(
                 lang='en',
+                ocr_version='PP-OCRv4', # Best for handwriting
+                use_angle_cls=True
             )
+        # 2. IMAGE PREPARATION
+        # Standardization prevents the model from hitting 'ghost' boxes
         img = image.convert("RGB")
+        img = ImageOps.exif_transpose(img) # Corrects phone camera rotation
         img_array = np.array(img)
+        # 3. RUN INFERENCE
+        result = ocr_instance.ocr(img_array)
+        # 4. DEFENSIVE PARSING
         if not result or not isinstance(result, list) or result[0] is None:
+            return "No text detected. Try a closer photo or darker ink."
         extracted_text = []
         for line in result[0]:
+            # Standard PaddleOCR structure: [[box], [text, confidence]]
+            if len(line) >= 2 and len(line[1]) >= 1:
+                text_string = str(line[1][0]).strip()
+                confidence = float(line[1][1])
+                # Keep only text with reasonable confidence
+                if text_string and confidence > 0.35:
+                    extracted_text.append(text_string)
+        if not extracted_text:
+            return "The AI saw the form but couldn't read the words. Please crop the photo."
+        return "\n".join(extracted_text)
     except Exception as e:
+        return f"System Error: {str(e)}\n\nTip: Go to Settings and click 'Factory Reboot' to clear the memory."
+# Build the Gradio App
+with gr.Blocks(theme=gr.themes.Base()) as demo:
+    gr.Markdown("## 🏦 English Bank Form OCR")
     with gr.Row():
+        input_img = gr.Image(type="pil", label="Bank Form Photo")
+        output_text = gr.Textbox(label="Result", lines=15)
     btn = gr.Button("Extract Text", variant="primary")
     btn.click(fn=process_bank_form, inputs=input_img, outputs=output_text)
 if __name__ == "__main__":
+    demo.launch(max_threads=1)