ZienabM commited on
Commit
0dc3fe9
ยท
verified ยท
1 Parent(s): 7e88a4e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -24
app.py CHANGED
@@ -38,7 +38,6 @@ async def lifespan(app: FastAPI):
38
  _attn_implementation="eager",
39
  trust_remote_code=True,
40
  torch_dtype=torch.float32,
41
- use_safetensors=True,
42
  )
43
  model.eval()
44
  log.info("Model ready (cpu)")
@@ -55,24 +54,25 @@ from contextlib import contextmanager
55
  @contextmanager
56
  def force_cpu():
57
  """
58
- DeepSeek-OCR-2's model.infer() hardcodes .cuda() and torch.autocast("cuda", bfloat16).
59
- This context manager:
60
- 1. Patches .cuda() / .to("cuda") โ†’ no-op
61
- 2. Completely DISABLES torch.autocast so bfloat16 is never applied
62
  """
63
  # Save originals
64
  _tensor_cuda = torch.Tensor.cuda
65
  _module_cuda = torch.nn.Module.cuda
66
  _tensor_to = torch.Tensor.to
67
  _module_to = torch.nn.Module.to
68
- _autocast = torch.autocast
69
 
 
70
  def _noop_tensor_cuda(self, device=None, *args, **kwargs):
71
  return self
72
 
 
73
  def _noop_module_cuda(self, device=None):
74
  return self
75
 
 
76
  def _safe_tensor_to(self, *args, **kwargs):
77
  filtered = [
78
  a for a in args
@@ -86,6 +86,7 @@ def force_cpu():
86
  return self
87
  return self
88
 
 
89
  def _safe_module_to(self, *args, **kwargs):
90
  filtered = [
91
  a for a in args
@@ -99,38 +100,25 @@ def force_cpu():
99
  return self
100
  return self
101
 
102
- # ุชุนุทูŠู„ autocast ุจุงู„ูƒุงู…ู„ โ€” ู„ุง ู†ุฑูŠุฏู‡ ูŠุญูˆู‘ู„ ุฃูŠ ุดูŠุก ุฅู„ู‰ bfloat16
103
- @contextmanager
104
- def _disabled_autocast(device_type=None, dtype=None, enabled=True, cache_enabled=None):
105
- """
106
- Completely disables autocast.
107
- The model code calls: with torch.autocast("cuda", dtype=torch.bfloat16):
108
- We replace it with a no-op context manager that does nothing.
109
- """
110
- yield # ู„ุง ุชูุนู„ ุดูŠุฆุงู‹ โ€” ู„ุง ุชุญูˆูŠู„ ุฃู†ูˆุงุน
111
-
112
- # Apply patches
113
  torch.Tensor.cuda = _noop_tensor_cuda
114
  torch.nn.Module.cuda = _noop_module_cuda
115
  torch.Tensor.to = _safe_tensor_to
116
  torch.nn.Module.to = _safe_module_to
117
- torch.autocast = _disabled_autocast
118
 
119
  try:
120
  yield
121
  finally:
122
- # Restore originals
123
  torch.Tensor.cuda = _tensor_cuda
124
  torch.nn.Module.cuda = _module_cuda
125
  torch.Tensor.to = _tensor_to
126
  torch.nn.Module.to = _module_to
127
- torch.autocast = _autocast
128
 
129
  # โ”€โ”€โ”€ Core OCR inference โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
130
  def run_ocr(pil_image: Image.Image, mode: str = "free") -> str:
131
  """
132
  Run DeepSeek-OCR-2 on a PIL image and return extracted text.
133
- Forces full float32 conversion to avoid bfloat16 errors on CPU.
134
  """
135
  prompt_text = (
136
  "Convert the document to markdown."
@@ -144,9 +132,8 @@ def run_ocr(pil_image: Image.Image, mode: str = "free") -> str:
144
 
145
  try:
146
  if hasattr(model, "infer"):
147
- # โš ๏ธ ุฅุฌุจุงุฑ ุงู„ู†ู…ูˆุฐุฌ ุจุงู„ูƒุงู…ู„ ุนู„ู‰ float32 ู‚ุจู„ ุงู„ุงุณุชุฎุฏุงู…
148
- model.to(torch.float32)
149
  with tempfile.TemporaryDirectory() as out_dir:
 
150
  with force_cpu():
151
  result = model.infer(
152
  tokenizer,
@@ -162,7 +149,7 @@ def run_ocr(pil_image: Image.Image, mode: str = "free") -> str:
162
  return result.get("text", str(result))
163
  return str(result) if result else ""
164
 
165
- # Fallback โ€” ู†ุงุฏุฑุงู‹ ู…ุง ูŠูุณุชุฎุฏู…
166
  messages = [{"role": "user", "content": [
167
  {"type": "image", "image": tmp_path},
168
  {"type": "text", "text": prompt_text},
 
38
  _attn_implementation="eager",
39
  trust_remote_code=True,
40
  torch_dtype=torch.float32,
 
41
  )
42
  model.eval()
43
  log.info("Model ready (cpu)")
 
54
  @contextmanager
55
  def force_cpu():
56
  """
57
+ DeepSeek-OCR-2's model.infer() hardcodes .cuda() even when no GPU is present.
58
+ This context manager temporarily replaces all CUDA-moving calls with no-ops
59
+ so the model runs on CPU without modification.
 
60
  """
61
  # Save originals
62
  _tensor_cuda = torch.Tensor.cuda
63
  _module_cuda = torch.nn.Module.cuda
64
  _tensor_to = torch.Tensor.to
65
  _module_to = torch.nn.Module.to
 
66
 
67
+ # Tensor.cuda() โ†’ return self (stay on CPU)
68
  def _noop_tensor_cuda(self, device=None, *args, **kwargs):
69
  return self
70
 
71
+ # Module.cuda() โ†’ return self
72
  def _noop_module_cuda(self, device=None):
73
  return self
74
 
75
+ # Tensor.to("cuda") / to(device) โ†’ stay on CPU; allow dtype casts
76
  def _safe_tensor_to(self, *args, **kwargs):
77
  filtered = [
78
  a for a in args
 
86
  return self
87
  return self
88
 
89
+ # Module.to("cuda") โ†’ stay on CPU; allow dtype casts
90
  def _safe_module_to(self, *args, **kwargs):
91
  filtered = [
92
  a for a in args
 
100
  return self
101
  return self
102
 
 
 
 
 
 
 
 
 
 
 
 
103
  torch.Tensor.cuda = _noop_tensor_cuda
104
  torch.nn.Module.cuda = _noop_module_cuda
105
  torch.Tensor.to = _safe_tensor_to
106
  torch.nn.Module.to = _safe_module_to
 
107
 
108
  try:
109
  yield
110
  finally:
 
111
  torch.Tensor.cuda = _tensor_cuda
112
  torch.nn.Module.cuda = _module_cuda
113
  torch.Tensor.to = _tensor_to
114
  torch.nn.Module.to = _module_to
115
+
116
 
117
  # โ”€โ”€โ”€ Core OCR inference โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
118
  def run_ocr(pil_image: Image.Image, mode: str = "free") -> str:
119
  """
120
  Run DeepSeek-OCR-2 on a PIL image and return extracted text.
121
+ Works on both CPU (HF free tier) and GPU.
122
  """
123
  prompt_text = (
124
  "Convert the document to markdown."
 
132
 
133
  try:
134
  if hasattr(model, "infer"):
 
 
135
  with tempfile.TemporaryDirectory() as out_dir:
136
+ # force_cpu() patches .cuda() โ†’ no-op so model.infer() works on CPU
137
  with force_cpu():
138
  result = model.infer(
139
  tokenizer,
 
149
  return result.get("text", str(result))
150
  return str(result) if result else ""
151
 
152
+ # โ”€โ”€ Fallback: standard generate() if model.infer() is not available โ”€โ”€
153
  messages = [{"role": "user", "content": [
154
  {"type": "image", "image": tmp_path},
155
  {"type": "text", "text": prompt_text},