ZienabM commited on
Commit
a9ceaac
Β·
verified Β·
1 Parent(s): 4e4f5cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -7
app.py CHANGED
@@ -52,28 +52,30 @@ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], all
52
  # ─── CPU monkey-patch context manager ────────────────────────────────────────
53
  from contextlib import contextmanager
54
 
 
 
 
55
  @contextmanager
56
  def force_cpu():
57
  """
58
- DeepSeek-OCR-2's model.infer() hardcodes .cuda() even when no GPU is present.
59
- This context manager temporarily replaces all CUDA-moving calls with no-ops
60
- so the model runs on CPU without modification.
 
61
  """
62
  # Save originals
63
  _tensor_cuda = torch.Tensor.cuda
64
  _module_cuda = torch.nn.Module.cuda
65
  _tensor_to = torch.Tensor.to
66
  _module_to = torch.nn.Module.to
 
67
 
68
- # Tensor.cuda() β†’ return self (stay on CPU)
69
  def _noop_tensor_cuda(self, device=None, *args, **kwargs):
70
  return self
71
 
72
- # Module.cuda() β†’ return self
73
  def _noop_module_cuda(self, device=None):
74
  return self
75
 
76
- # Tensor.to("cuda") / to(device) β†’ stay on CPU; allow dtype casts
77
  def _safe_tensor_to(self, *args, **kwargs):
78
  filtered = [
79
  a for a in args
@@ -87,7 +89,6 @@ def force_cpu():
87
  return self
88
  return self
89
 
90
- # Module.to("cuda") β†’ stay on CPU; allow dtype casts
91
  def _safe_module_to(self, *args, **kwargs):
92
  filtered = [
93
  a for a in args
@@ -101,18 +102,37 @@ def force_cpu():
101
  return self
102
  return self
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  torch.Tensor.cuda = _noop_tensor_cuda
105
  torch.nn.Module.cuda = _noop_module_cuda
106
  torch.Tensor.to = _safe_tensor_to
107
  torch.nn.Module.to = _safe_module_to
 
108
 
109
  try:
110
  yield
111
  finally:
 
112
  torch.Tensor.cuda = _tensor_cuda
113
  torch.nn.Module.cuda = _module_cuda
114
  torch.Tensor.to = _tensor_to
115
  torch.nn.Module.to = _module_to
 
116
 
117
 
118
  # ─── Core OCR inference ───────────────────────────────────────────────────────
 
52
  # ─── CPU monkey-patch context manager ────────────────────────────────────────
53
  from contextlib import contextmanager
54
 
55
+ @contextmanager
56
+ from contextlib import contextmanager
57
+
58
  @contextmanager
59
  def force_cpu():
60
  """
61
+ DeepSeek-OCR-2's model.infer() hardcodes .cuda() and torch.autocast("cuda", bfloat16).
62
+ This context manager patches:
63
+ 1. .cuda() / .to("cuda") β†’ no-op
64
+ 2. torch.autocast("cuda", ...) β†’ runs on CPU in float32
65
  """
66
  # Save originals
67
  _tensor_cuda = torch.Tensor.cuda
68
  _module_cuda = torch.nn.Module.cuda
69
  _tensor_to = torch.Tensor.to
70
  _module_to = torch.nn.Module.to
71
+ _autocast = torch.autocast
72
 
 
73
  def _noop_tensor_cuda(self, device=None, *args, **kwargs):
74
  return self
75
 
 
76
  def _noop_module_cuda(self, device=None):
77
  return self
78
 
 
79
  def _safe_tensor_to(self, *args, **kwargs):
80
  filtered = [
81
  a for a in args
 
89
  return self
90
  return self
91
 
 
92
  def _safe_module_to(self, *args, **kwargs):
93
  filtered = [
94
  a for a in args
 
102
  return self
103
  return self
104
 
105
+ # ⚑ Ψ§Ω„Ψ¨Ψ§ΨͺΨ΄ Ψ§Ω„Ψ¬Ψ―ΩŠΨ―: Ψ§Ψ³ΨͺΨ¨Ψ―Ψ§Ω„ torch.autocast
106
+ @contextmanager
107
+ def _cpu_autocast(device_type=None, dtype=None, enabled=True, cache_enabled=None):
108
+ """
109
+ If device_type is "cuda", run as CPU autocast with float32.
110
+ This prevents bfloat16 conversion on CPU.
111
+ """
112
+ if device_type == "cuda":
113
+ # β‡’ Ψ΄ΨΊΩ‘Ω„ ΩƒΩ€ CPU autocast Ω…ΨΉ float32 Ψ¨Ψ―Ω„Ψ§Ω‹ Ω…Ω† bfloat16
114
+ with _autocast("cpu", dtype=torch.float32, enabled=enabled):
115
+ yield
116
+ else:
117
+ with _autocast(device_type, dtype=dtype, enabled=enabled):
118
+ yield
119
+
120
+ # Apply patches
121
  torch.Tensor.cuda = _noop_tensor_cuda
122
  torch.nn.Module.cuda = _noop_module_cuda
123
  torch.Tensor.to = _safe_tensor_to
124
  torch.nn.Module.to = _safe_module_to
125
+ torch.autocast = _cpu_autocast
126
 
127
  try:
128
  yield
129
  finally:
130
+ # Restore originals
131
  torch.Tensor.cuda = _tensor_cuda
132
  torch.nn.Module.cuda = _module_cuda
133
  torch.Tensor.to = _tensor_to
134
  torch.nn.Module.to = _module_to
135
+ torch.autocast = _autocast
136
 
137
 
138
  # ─── Core OCR inference ───────────────────────────────────────────────────────