Spaces:

ZienabM
/

ocr

Running

App Files Files Community

ZienabM commited on 20 days ago

Commit

a9ceaac

verified ·

1 Parent(s): 4e4f5cb

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -7

app.py CHANGED Viewed

@@ -52,28 +52,30 @@ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], all
 # ─── CPU monkey-patch context manager ────────────────────────────────────────
 from contextlib import contextmanager
 @contextmanager
 def force_cpu():
     """
-    DeepSeek-OCR-2's model.infer() hardcodes .cuda() even when no GPU is present.
-    This context manager temporarily replaces all CUDA-moving calls with no-ops
-    so the model runs on CPU without modification.
     """
     # Save originals
     _tensor_cuda   = torch.Tensor.cuda
     _module_cuda   = torch.nn.Module.cuda
     _tensor_to     = torch.Tensor.to
     _module_to     = torch.nn.Module.to
-    # Tensor.cuda() → return self (stay on CPU)
     def _noop_tensor_cuda(self, device=None, *args, **kwargs):
         return self
-    # Module.cuda() → return self
     def _noop_module_cuda(self, device=None):
         return self
-    # Tensor.to("cuda") / to(device) → stay on CPU; allow dtype casts
     def _safe_tensor_to(self, *args, **kwargs):
         filtered = [
             a for a in args
@@ -87,7 +89,6 @@ def force_cpu():
                 return self
         return self
-    # Module.to("cuda") → stay on CPU; allow dtype casts
     def _safe_module_to(self, *args, **kwargs):
         filtered = [
             a for a in args
@@ -101,18 +102,37 @@ def force_cpu():
                 return self
         return self
     torch.Tensor.cuda     = _noop_tensor_cuda
     torch.nn.Module.cuda  = _noop_module_cuda
     torch.Tensor.to       = _safe_tensor_to
     torch.nn.Module.to    = _safe_module_to
     try:
         yield
     finally:
         torch.Tensor.cuda    = _tensor_cuda
         torch.nn.Module.cuda = _module_cuda
         torch.Tensor.to      = _tensor_to
         torch.nn.Module.to   = _module_to
 # ─── Core OCR inference ───────────────────────────────────────────────────────

 # ─── CPU monkey-patch context manager ────────────────────────────────────────
 from contextlib import contextmanager
+@contextmanager
+from contextlib import contextmanager
 @contextmanager
 def force_cpu():
     """
+    DeepSeek-OCR-2's model.infer() hardcodes .cuda() and torch.autocast("cuda", bfloat16).
+    This context manager patches:
+      1. .cuda() / .to("cuda") → no-op
+      2. torch.autocast("cuda", ...) → runs on CPU in float32
     """
     # Save originals
     _tensor_cuda   = torch.Tensor.cuda
     _module_cuda   = torch.nn.Module.cuda
     _tensor_to     = torch.Tensor.to
     _module_to     = torch.nn.Module.to
+    _autocast      = torch.autocast
     def _noop_tensor_cuda(self, device=None, *args, **kwargs):
         return self
     def _noop_module_cuda(self, device=None):
         return self
     def _safe_tensor_to(self, *args, **kwargs):
         filtered = [
             a for a in args
                 return self
         return self
     def _safe_module_to(self, *args, **kwargs):
         filtered = [
             a for a in args
                 return self
         return self
+    # ⚡ الباتش الجديد: استبدال torch.autocast
+    @contextmanager
+    def _cpu_autocast(device_type=None, dtype=None, enabled=True, cache_enabled=None):
+        """
+        If device_type is "cuda", run as CPU autocast with float32.
+        This prevents bfloat16 conversion on CPU.
+        """
+        if device_type == "cuda":
+            # ⇒ شغّل كـ CPU autocast مع float32 بدلاً من bfloat16
+            with _autocast("cpu", dtype=torch.float32, enabled=enabled):
+                yield
+        else:
+            with _autocast(device_type, dtype=dtype, enabled=enabled):
+                yield
+    # Apply patches
     torch.Tensor.cuda     = _noop_tensor_cuda
     torch.nn.Module.cuda  = _noop_module_cuda
     torch.Tensor.to       = _safe_tensor_to
     torch.nn.Module.to    = _safe_module_to
+    torch.autocast        = _cpu_autocast
     try:
         yield
     finally:
+        # Restore originals
         torch.Tensor.cuda    = _tensor_cuda
         torch.nn.Module.cuda = _module_cuda
         torch.Tensor.to      = _tensor_to
         torch.nn.Module.to   = _module_to
+        torch.autocast       = _autocast
 # ─── Core OCR inference ───────────────────────────────────────────────────────