Spaces:

mrrtmob
/

kiri-ocr

Running

App Files Files Community

mrrtmob commited on Jan 24

Commit

5789876

verified ·

1 Parent(s): 0f5e761

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -40

app.py CHANGED Viewed

@@ -1,45 +1,19 @@
 import gradio as gr
-try:
-    import spaces
-except ImportError:
-    class spaces:
-        @staticmethod
-        def GPU(func):
-            return func
 from kiri_ocr import OCR
 from PIL import Image, ImageDraw
 import numpy as np
-import torch
-# Initialize OCR and force FP32
 try:
     print("Loading Kiri OCR model...")
-    print(f"PyTorch version: {torch.__version__}")
-    print(f"CUDA available: {torch.cuda.is_available()}")
-    # Load model normally
-    ocr = OCR(verbose=True, device="cuda")
-    # FORCE FP32 after loading
-    if hasattr(ocr, 'model') and ocr.model is not None:
-        print("Converting model to FP32...")
-        ocr.model = ocr.model.float()
-        # Also update the config to prevent future conversions
-        if hasattr(ocr, 'transformer_cfg'):
-            ocr.transformer_cfg.USE_FP16 = False
-            print("✓ Disabled FP16 in config")
-    print("✓ Model loaded successfully on GPU with FP32")
 except Exception as e:
     print(f"Error loading model: {e}")
-    import traceback
-    traceback.print_exc()
     ocr = None
-@spaces.GPU
 def process_image(image_path):
     if ocr is None:
         return None, "Error: OCR model failed to load."
@@ -49,14 +23,8 @@ def process_image(image_path):
     try:
         print(f"Processing image: {image_path}")
-        # Ensure model is in FP32 mode before processing
-        if hasattr(ocr, 'model'):
-            ocr.model = ocr.model.float()
-        # Disable autocast to prevent automatic FP16
-        with torch.cuda.amp.autocast(enabled=False):
-            text, results = ocr.extract_text(image_path, verbose=True)
         print(f"Extracted {len(results)} regions.")
@@ -71,6 +39,7 @@ def process_image(image_path):
         for item in results:
             if 'box' in item:
                 x, y, w, h = item['box']
                 x, y, w, h = int(x), int(y), int(w), int(h)
                 draw.rectangle([x, y, x + w, y + h], outline="red", width=3)
@@ -95,4 +64,4 @@ demo = gr.Interface(
 )
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 from kiri_ocr import OCR
 from PIL import Image, ImageDraw
 import numpy as np
+import os
+# Initialize OCR
 try:
     print("Loading Kiri OCR model...")
+    # Use verbose=True to see what's happening
+    ocr = OCR(verbose=True)
+    print("Model loaded successfully")
 except Exception as e:
     print(f"Error loading model: {e}")
     ocr = None
 def process_image(image_path):
     if ocr is None:
         return None, "Error: OCR model failed to load."
     try:
         print(f"Processing image: {image_path}")
+        # extract_text returns (text, results)
+        text, results = ocr.extract_text(image_path, verbose=True)
         print(f"Extracted {len(results)} regions.")
         for item in results:
             if 'box' in item:
                 x, y, w, h = item['box']
+                # Ensure coordinates are ints
                 x, y, w, h = int(x), int(y), int(w), int(h)
                 draw.rectangle([x, y, x + w, y + h], outline="red", width=3)
 )
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860, share=False)