Spaces:

ZienabM
/

ocr

Sleeping

App Files Files Community

ZienabM commited on 16 days ago

Commit

0dc3fe9

verified ·

1 Parent(s): 7e88a4e

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -24

app.py CHANGED Viewed

@@ -38,7 +38,6 @@ async def lifespan(app: FastAPI):
         _attn_implementation="eager",
         trust_remote_code=True,
         torch_dtype=torch.float32,
-        use_safetensors=True,
     )
     model.eval()
     log.info("Model ready (cpu)")
@@ -55,24 +54,25 @@ from contextlib import contextmanager
 @contextmanager
 def force_cpu():
     """
-    DeepSeek-OCR-2's model.infer() hardcodes .cuda() and torch.autocast("cuda", bfloat16).
-    This context manager:
-      1. Patches .cuda() / .to("cuda") → no-op
-      2. Completely DISABLES torch.autocast so bfloat16 is never applied
     """
     # Save originals
     _tensor_cuda   = torch.Tensor.cuda
     _module_cuda   = torch.nn.Module.cuda
     _tensor_to     = torch.Tensor.to
     _module_to     = torch.nn.Module.to
-    _autocast      = torch.autocast
     def _noop_tensor_cuda(self, device=None, *args, **kwargs):
         return self
     def _noop_module_cuda(self, device=None):
         return self
     def _safe_tensor_to(self, *args, **kwargs):
         filtered = [
             a for a in args
@@ -86,6 +86,7 @@ def force_cpu():
                 return self
         return self
     def _safe_module_to(self, *args, **kwargs):
         filtered = [
             a for a in args
@@ -99,38 +100,25 @@ def force_cpu():
                 return self
         return self
-    # تعطيل autocast بالكامل — لا نريده يحوّل أي شيء إلى bfloat16
-    @contextmanager
-    def _disabled_autocast(device_type=None, dtype=None, enabled=True, cache_enabled=None):
-        """
-        Completely disables autocast.
-        The model code calls:  with torch.autocast("cuda", dtype=torch.bfloat16):
-        We replace it with a no-op context manager that does nothing.
-        """
-        yield  # لا تفعل شيئاً — لا تحويل أنواع
-    # Apply patches
     torch.Tensor.cuda     = _noop_tensor_cuda
     torch.nn.Module.cuda  = _noop_module_cuda
     torch.Tensor.to       = _safe_tensor_to
     torch.nn.Module.to    = _safe_module_to
-    torch.autocast        = _disabled_autocast
     try:
         yield
     finally:
-        # Restore originals
         torch.Tensor.cuda    = _tensor_cuda
         torch.nn.Module.cuda = _module_cuda
         torch.Tensor.to      = _tensor_to
         torch.nn.Module.to   = _module_to
-        torch.autocast       = _autocast
 # ─── Core OCR inference ───────────────────────────────────────────────────────
 def run_ocr(pil_image: Image.Image, mode: str = "free") -> str:
     """
     Run DeepSeek-OCR-2 on a PIL image and return extracted text.
-    Forces full float32 conversion to avoid bfloat16 errors on CPU.
     """
     prompt_text = (
         "Convert the document to markdown."
@@ -144,9 +132,8 @@ def run_ocr(pil_image: Image.Image, mode: str = "free") -> str:
     try:
         if hasattr(model, "infer"):
-            # ⚠️ إجبار النموذج بالكامل على float32 قبل الاستخدام
-            model.to(torch.float32)
             with tempfile.TemporaryDirectory() as out_dir:
                 with force_cpu():
                     result = model.infer(
                         tokenizer,
@@ -162,7 +149,7 @@ def run_ocr(pil_image: Image.Image, mode: str = "free") -> str:
                 return result.get("text", str(result))
             return str(result) if result else ""
-        # Fallback — نادراً ما يُستخدم
         messages = [{"role": "user", "content": [
             {"type": "image", "image": tmp_path},
             {"type": "text",  "text": prompt_text},

         _attn_implementation="eager",
         trust_remote_code=True,
         torch_dtype=torch.float32,
     )
     model.eval()
     log.info("Model ready (cpu)")
 @contextmanager
 def force_cpu():
     """
+    DeepSeek-OCR-2's model.infer() hardcodes .cuda() even when no GPU is present.
+    This context manager temporarily replaces all CUDA-moving calls with no-ops
+    so the model runs on CPU without modification.
     """
     # Save originals
     _tensor_cuda   = torch.Tensor.cuda
     _module_cuda   = torch.nn.Module.cuda
     _tensor_to     = torch.Tensor.to
     _module_to     = torch.nn.Module.to
+    # Tensor.cuda() → return self (stay on CPU)
     def _noop_tensor_cuda(self, device=None, *args, **kwargs):
         return self
+    # Module.cuda() → return self
     def _noop_module_cuda(self, device=None):
         return self
+    # Tensor.to("cuda") / to(device) → stay on CPU; allow dtype casts
     def _safe_tensor_to(self, *args, **kwargs):
         filtered = [
             a for a in args
                 return self
         return self
+    # Module.to("cuda") → stay on CPU; allow dtype casts
     def _safe_module_to(self, *args, **kwargs):
         filtered = [
             a for a in args
                 return self
         return self
     torch.Tensor.cuda     = _noop_tensor_cuda
     torch.nn.Module.cuda  = _noop_module_cuda
     torch.Tensor.to       = _safe_tensor_to
     torch.nn.Module.to    = _safe_module_to
     try:
         yield
     finally:
         torch.Tensor.cuda    = _tensor_cuda
         torch.nn.Module.cuda = _module_cuda
         torch.Tensor.to      = _tensor_to
         torch.nn.Module.to   = _module_to
 # ─── Core OCR inference ───────────────────────────────────────────────────────
 def run_ocr(pil_image: Image.Image, mode: str = "free") -> str:
     """
     Run DeepSeek-OCR-2 on a PIL image and return extracted text.
+    Works on both CPU (HF free tier) and GPU.
     """
     prompt_text = (
         "Convert the document to markdown."
     try:
         if hasattr(model, "infer"):
             with tempfile.TemporaryDirectory() as out_dir:
+                # force_cpu() patches .cuda() → no-op so model.infer() works on CPU
                 with force_cpu():
                     result = model.infer(
                         tokenizer,
                 return result.get("text", str(result))
             return str(result) if result else ""
+        # ── Fallback: standard generate() if model.infer() is not available ──
         messages = [{"role": "user", "content": [
             {"type": "image", "image": tmp_path},
             {"type": "text",  "text": prompt_text},