Spaces:

tachiwin
/

multilingual_ocr

Running

App Files Files Community

Luis J Camargo commited on 25 days ago

Commit

cbab00e

1 Parent(s): db7023c

fix3

Browse files

Files changed (1) hide show

app.py +54 -17

app.py CHANGED Viewed

@@ -1,15 +1,30 @@
 import torch
 from transformers import AutoModelForCausalLM, AutoProcessor
 from PIL import Image
 import gradio as gr
-from queue import Queue
 from threading import Event, Thread
 import atexit
 CONCURRENCY_LIMIT = 1
 DEVICE = "cpu"
 PROMPTS = {
     "ocr": "OCR:",
     "table": "Table Recognition:",
@@ -34,11 +49,21 @@ class OCRModelManager(object):
     def infer(self, *args, **kwargs):
         result_queue = Queue(maxsize=1)
         self._queue.put((args, kwargs, result_queue))
-        success, payload = result_queue.get()
-        if success:
-            return payload
-        else:
-            raise payload
     def close(self):
         for _ in self._workers:
@@ -58,9 +83,12 @@ class OCRModelManager(object):
                 img_path = args[0]
                 task = kwargs.get("task", "ocr")
                 min_new_tokens = kwargs.get("min_new_tokens", 3)
-                #max_new_tokens = kwargs.get("max_new_tokens", 2048)
                 temperature = kwargs.get("temperature", 0.2)
                 image = Image.open(img_path).convert("RGB")
                 messages = [
@@ -80,18 +108,23 @@ class OCRModelManager(object):
                     return_tensors="pt"
                 ).to(DEVICE)
-                with torch.no_grad():
                     outputs = model.generate(
                         **inputs,
-                        #max_new_tokens=max_new_tokens,
                         min_new_tokens=min_new_tokens,
-                        use_cache=False,
-                        do_sample=False,
                         min_p=0.1,
-                        temperature=temperature if temperature > 0 else 1.0,
                     )
                 decoded_outputs = processor.batch_decode(outputs, skip_special_tokens=True)[0]
                 result_queue.put((True, decoded_outputs))
             except Exception as e:
@@ -103,7 +136,7 @@ class OCRModelManager(object):
 def create_model():
     """Initialize PaddleOCR-VL with the fine-tuned Tachiwin model using transformers"""
     model_path = "tachiwin/PaddleOCR-VL-Tachiwin-BF16"
-    print(f"Loading model and processor from {model_path}...")
     # Use bfloat16 for CPU if supported, else float32
     # Hugging Face spaces CPUs often support bfloat16
@@ -113,22 +146,25 @@ def create_model():
             trust_remote_code=True,
             torch_dtype=torch.bfloat16
         ).to(DEVICE).eval()
     except Exception as e:
-        print(f"Failed to load in bfloat16, falling back to float32: {e}")
         model = AutoModelForCausalLM.from_pretrained(
             model_path,
             trust_remote_code=True,
             torch_dtype=torch.float32
         ).to(DEVICE).eval()
     processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
     return model, processor
 # Initialize model manager with 1 worker to save memory on CPU space
-print("Initializing Tachiwin Indigenous Languages OCR...")
 model_manager = OCRModelManager(1, create_model)
-print("Model ready!")
 def close_model_manager():
@@ -143,12 +179,13 @@ def inference(img):
     if img is None:
         return "Please upload an image."
     try:
         return model_manager.infer(
             img,
             task="ocr",
             min_new_tokens=3,
-            temperature=1.5,
         )
         # # Now extract text from the serialized structure

+import os
 import torch
 from transformers import AutoModelForCausalLM, AutoProcessor
 from PIL import Image
 import gradio as gr
+from queue import Queue, Empty
 from threading import Event, Thread
 import atexit
 CONCURRENCY_LIMIT = 1
+import logging
+import sys
+# Configure logging to sys.stderr which is often more reliable in containerized environments
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[logging.StreamHandler(sys.stderr)]
+)
+logger = logging.getLogger("TachiwinOCR")
 DEVICE = "cpu"
+# Speed up CPU inference
+torch.set_num_threads(os.cpu_count() or 4)
 PROMPTS = {
     "ocr": "OCR:",
     "table": "Table Recognition:",
     def infer(self, *args, **kwargs):
         result_queue = Queue(maxsize=1)
         self._queue.put((args, kwargs, result_queue))
+        # Increased timeout to 20 minutes for CPU inference
+        timeout = 1200
+        try:
+            success, payload = result_queue.get(timeout=timeout)
+            if success:
+                return payload
+            else:
+                raise payload
+        except Empty:
+            # Check if workers are still alive
+            alive = any(w.is_alive() for w in self._workers)
+            if not alive:
+                raise RuntimeError("OCR workers have crashed.")
+            raise RuntimeError(f"OCR inference timed out after {timeout} seconds.")
     def close(self):
         for _ in self._workers:
                 img_path = args[0]
                 task = kwargs.get("task", "ocr")
                 min_new_tokens = kwargs.get("min_new_tokens", 3)
+                max_new_tokens = kwargs.get("max_new_tokens", 1024)
                 temperature = kwargs.get("temperature", 0.2)
+                logger.info(f"--- Starting inference process ---")
+                logger.info(f"Task: {task}, Min New Tokens: {min_new_tokens}, Temperature: {temperature}")
                 image = Image.open(img_path).convert("RGB")
                 messages = [
                     return_tensors="pt"
                 ).to(DEVICE)
+                logger.info(f"Inputs prepared (shape: {inputs['input_ids'].shape}). Running model.generate...")
+                with torch.inference_mode():
+                    # Restoring sampling params as requested
+                    # use_cache=False as requested because it's known to be unstable on some setups
                     outputs = model.generate(
                         **inputs,
+                        max_new_tokens=max_new_tokens,
                         min_new_tokens=min_new_tokens,
+                        use_cache=False,
+                        do_sample=True,
+                        temperature=max(temperature, 0.01),
                         min_p=0.1,
                     )
+                logger.info("Generation complete. Decoding results...")
                 decoded_outputs = processor.batch_decode(outputs, skip_special_tokens=True)[0]
+                logger.info(f"Inference finished successfully.")
                 result_queue.put((True, decoded_outputs))
             except Exception as e:
 def create_model():
     """Initialize PaddleOCR-VL with the fine-tuned Tachiwin model using transformers"""
     model_path = "tachiwin/PaddleOCR-VL-Tachiwin-BF16"
+    logger.info(f"Loading model and processor from {model_path}...")
     # Use bfloat16 for CPU if supported, else float32
     # Hugging Face spaces CPUs often support bfloat16
             trust_remote_code=True,
             torch_dtype=torch.bfloat16
         ).to(DEVICE).eval()
+        logger.info(f"Model loaded on {DEVICE} with bfloat16")
     except Exception as e:
+        logger.warning(f"Failed to load in bfloat16, falling back to float32: {e}")
         model = AutoModelForCausalLM.from_pretrained(
             model_path,
             trust_remote_code=True,
             torch_dtype=torch.float32
         ).to(DEVICE).eval()
+        logger.info(f"Model loaded on {DEVICE} with float32")
     processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
+    logger.info(f"Processor loaded successfully.")
     return model, processor
 # Initialize model manager with 1 worker to save memory on CPU space
+logger.info("Initializing Tachiwin Indigenous Languages OCR model manager...")
 model_manager = OCRModelManager(1, create_model)
+logger.info("Model manager is ready and listening for tasks!")
 def close_model_manager():
     if img is None:
         return "Please upload an image."
+    gr.Info("Inference started. On CPU, this may take 2-10 minutes depending on image complexity.")
     try:
         return model_manager.infer(
             img,
             task="ocr",
             min_new_tokens=3,
+            max_new_tokens=1024,
         )
         # # Now extract text from the serialized structure