Update app.py
Browse files
app.py
CHANGED
|
@@ -52,28 +52,30 @@ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], all
|
|
| 52 |
# βββ CPU monkey-patch context manager ββββββββββββββββββββββββββββββββββββββββ
|
| 53 |
from contextlib import contextmanager
|
| 54 |
|
|
|
|
|
|
|
|
|
|
| 55 |
@contextmanager
|
| 56 |
def force_cpu():
|
| 57 |
"""
|
| 58 |
-
DeepSeek-OCR-2's model.infer() hardcodes .cuda()
|
| 59 |
-
This context manager
|
| 60 |
-
|
|
|
|
| 61 |
"""
|
| 62 |
# Save originals
|
| 63 |
_tensor_cuda = torch.Tensor.cuda
|
| 64 |
_module_cuda = torch.nn.Module.cuda
|
| 65 |
_tensor_to = torch.Tensor.to
|
| 66 |
_module_to = torch.nn.Module.to
|
|
|
|
| 67 |
|
| 68 |
-
# Tensor.cuda() β return self (stay on CPU)
|
| 69 |
def _noop_tensor_cuda(self, device=None, *args, **kwargs):
|
| 70 |
return self
|
| 71 |
|
| 72 |
-
# Module.cuda() β return self
|
| 73 |
def _noop_module_cuda(self, device=None):
|
| 74 |
return self
|
| 75 |
|
| 76 |
-
# Tensor.to("cuda") / to(device) β stay on CPU; allow dtype casts
|
| 77 |
def _safe_tensor_to(self, *args, **kwargs):
|
| 78 |
filtered = [
|
| 79 |
a for a in args
|
|
@@ -87,7 +89,6 @@ def force_cpu():
|
|
| 87 |
return self
|
| 88 |
return self
|
| 89 |
|
| 90 |
-
# Module.to("cuda") β stay on CPU; allow dtype casts
|
| 91 |
def _safe_module_to(self, *args, **kwargs):
|
| 92 |
filtered = [
|
| 93 |
a for a in args
|
|
@@ -101,18 +102,37 @@ def force_cpu():
|
|
| 101 |
return self
|
| 102 |
return self
|
| 103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
torch.Tensor.cuda = _noop_tensor_cuda
|
| 105 |
torch.nn.Module.cuda = _noop_module_cuda
|
| 106 |
torch.Tensor.to = _safe_tensor_to
|
| 107 |
torch.nn.Module.to = _safe_module_to
|
|
|
|
| 108 |
|
| 109 |
try:
|
| 110 |
yield
|
| 111 |
finally:
|
|
|
|
| 112 |
torch.Tensor.cuda = _tensor_cuda
|
| 113 |
torch.nn.Module.cuda = _module_cuda
|
| 114 |
torch.Tensor.to = _tensor_to
|
| 115 |
torch.nn.Module.to = _module_to
|
|
|
|
| 116 |
|
| 117 |
|
| 118 |
# βββ Core OCR inference βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 52 |
# βββ CPU monkey-patch context manager ββββββββββββββββββββββββββββββββββββββββ
|
| 53 |
from contextlib import contextmanager
|
| 54 |
|
| 55 |
+
@contextmanager
|
| 56 |
+
from contextlib import contextmanager
|
| 57 |
+
|
| 58 |
@contextmanager
|
| 59 |
def force_cpu():
|
| 60 |
"""
|
| 61 |
+
DeepSeek-OCR-2's model.infer() hardcodes .cuda() and torch.autocast("cuda", bfloat16).
|
| 62 |
+
This context manager patches:
|
| 63 |
+
1. .cuda() / .to("cuda") β no-op
|
| 64 |
+
2. torch.autocast("cuda", ...) β runs on CPU in float32
|
| 65 |
"""
|
| 66 |
# Save originals
|
| 67 |
_tensor_cuda = torch.Tensor.cuda
|
| 68 |
_module_cuda = torch.nn.Module.cuda
|
| 69 |
_tensor_to = torch.Tensor.to
|
| 70 |
_module_to = torch.nn.Module.to
|
| 71 |
+
_autocast = torch.autocast
|
| 72 |
|
|
|
|
| 73 |
def _noop_tensor_cuda(self, device=None, *args, **kwargs):
|
| 74 |
return self
|
| 75 |
|
|
|
|
| 76 |
def _noop_module_cuda(self, device=None):
|
| 77 |
return self
|
| 78 |
|
|
|
|
| 79 |
def _safe_tensor_to(self, *args, **kwargs):
|
| 80 |
filtered = [
|
| 81 |
a for a in args
|
|
|
|
| 89 |
return self
|
| 90 |
return self
|
| 91 |
|
|
|
|
| 92 |
def _safe_module_to(self, *args, **kwargs):
|
| 93 |
filtered = [
|
| 94 |
a for a in args
|
|
|
|
| 102 |
return self
|
| 103 |
return self
|
| 104 |
|
| 105 |
+
# β‘ Ψ§ΩΨ¨Ψ§ΨͺΨ΄ Ψ§ΩΨ¬Ψ―ΩΨ―: Ψ§Ψ³ΨͺΨ¨Ψ―Ψ§Ω torch.autocast
|
| 106 |
+
@contextmanager
|
| 107 |
+
def _cpu_autocast(device_type=None, dtype=None, enabled=True, cache_enabled=None):
|
| 108 |
+
"""
|
| 109 |
+
If device_type is "cuda", run as CPU autocast with float32.
|
| 110 |
+
This prevents bfloat16 conversion on CPU.
|
| 111 |
+
"""
|
| 112 |
+
if device_type == "cuda":
|
| 113 |
+
# β Ψ΄ΨΊΩΩ ΩΩ CPU autocast Ω
ΨΉ float32 Ψ¨Ψ―ΩΨ§Ω Ω
Ω bfloat16
|
| 114 |
+
with _autocast("cpu", dtype=torch.float32, enabled=enabled):
|
| 115 |
+
yield
|
| 116 |
+
else:
|
| 117 |
+
with _autocast(device_type, dtype=dtype, enabled=enabled):
|
| 118 |
+
yield
|
| 119 |
+
|
| 120 |
+
# Apply patches
|
| 121 |
torch.Tensor.cuda = _noop_tensor_cuda
|
| 122 |
torch.nn.Module.cuda = _noop_module_cuda
|
| 123 |
torch.Tensor.to = _safe_tensor_to
|
| 124 |
torch.nn.Module.to = _safe_module_to
|
| 125 |
+
torch.autocast = _cpu_autocast
|
| 126 |
|
| 127 |
try:
|
| 128 |
yield
|
| 129 |
finally:
|
| 130 |
+
# Restore originals
|
| 131 |
torch.Tensor.cuda = _tensor_cuda
|
| 132 |
torch.nn.Module.cuda = _module_cuda
|
| 133 |
torch.Tensor.to = _tensor_to
|
| 134 |
torch.nn.Module.to = _module_to
|
| 135 |
+
torch.autocast = _autocast
|
| 136 |
|
| 137 |
|
| 138 |
# βββ Core OCR inference βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|