DocUA commited on
Commit
092c902
·
1 Parent(s): 512f03f

fix: Address MPS compatibility issues, ensure explicit model dtype, and improve Gradio file input handling.

Browse files
Files changed (3) hide show
  1. app.py +15 -6
  2. app_hf.py +6 -3
  3. requirements.txt +2 -2
app.py CHANGED
@@ -29,6 +29,8 @@ if torch.backends.mps.is_available():
29
  torch.Tensor.cuda = lambda self, *args, **kwargs: self.to("mps")
30
  torch.nn.Module.cuda = lambda self, *args, **kwargs: self.to("mps")
31
  dtype = torch.float16
 
 
32
  else:
33
  device = "cpu"
34
  dtype = torch.float32
@@ -66,7 +68,8 @@ class ModelManager:
66
  self.model = AutoModel.from_pretrained(
67
  model_name,
68
  trust_remote_code=True,
69
- use_safetensors=True
 
70
  )
71
  self.model = self.model.to(device=device, dtype=dtype)
72
  self.model.eval()
@@ -109,14 +112,17 @@ def run_ocr(input_image, input_file, model_choice, custom_prompt):
109
  images_to_process = []
110
 
111
  if input_file is not None:
112
- if input_file.name.lower().endswith(".pdf"):
 
 
 
113
  try:
114
- images_to_process = pdf_to_images(input_file.name)
115
  except Exception as e:
116
  return f"Помилка читання PDF: {str(e)}"
117
  else:
118
  try:
119
- images_to_process = [Image.open(input_file.name)]
120
  except Exception as e:
121
  return f"Помилка завантаження файлу: {str(e)}"
122
  elif input_image is not None:
@@ -187,6 +193,9 @@ def run_ocr(input_image, input_file, model_choice, custom_prompt):
187
  except Exception as e:
188
  all_results.append(f"--- Page/Image {i+1} ---\nПомилка: {str(e)}")
189
 
 
 
 
190
  return "\n\n".join(all_results)
191
 
192
  def save_result_to_file(text):
@@ -206,7 +215,7 @@ custom_css = """
206
  .footer { text-align: center; margin-top: 50px; font-size: 0.9rem; color: #718096; }
207
  """
208
 
209
- with gr.Blocks(title="OCR Comparison: DeepSeek vs MedGemma") as demo:
210
  with gr.Column():
211
  gr.Markdown("# 🔍 OCR & Medical Document Analysis", elem_classes="header")
212
  gr.Markdown("Порівняння DeepSeek-OCR-2 та MedGemma-1.5-4B", elem_classes="header")
@@ -274,4 +283,4 @@ with gr.Blocks(title="OCR Comparison: DeepSeek vs MedGemma") as demo:
274
  )
275
 
276
  if __name__ == "__main__":
277
- demo.launch(server_name="0.0.0.0", share=False, css=custom_css)
 
29
  torch.Tensor.cuda = lambda self, *args, **kwargs: self.to("mps")
30
  torch.nn.Module.cuda = lambda self, *args, **kwargs: self.to("mps")
31
  dtype = torch.float16
32
+ # Patch to avoid BFloat16 vs Float16 mismatch in custom modeling code on MPS
33
+ torch.bfloat16 = torch.float16
34
  else:
35
  device = "cpu"
36
  dtype = torch.float32
 
68
  self.model = AutoModel.from_pretrained(
69
  model_name,
70
  trust_remote_code=True,
71
+ use_safetensors=True,
72
+ torch_dtype=dtype
73
  )
74
  self.model = self.model.to(device=device, dtype=dtype)
75
  self.model.eval()
 
112
  images_to_process = []
113
 
114
  if input_file is not None:
115
+ # Compatibility with different Gradio versions (object with .name vs string path)
116
+ file_path = input_file.name if hasattr(input_file, 'name') else input_file
117
+
118
+ if file_path.lower().endswith(".pdf"):
119
  try:
120
+ images_to_process = pdf_to_images(file_path)
121
  except Exception as e:
122
  return f"Помилка читання PDF: {str(e)}"
123
  else:
124
  try:
125
+ images_to_process = [Image.open(file_path)]
126
  except Exception as e:
127
  return f"Помилка завантаження файлу: {str(e)}"
128
  elif input_image is not None:
 
193
  except Exception as e:
194
  all_results.append(f"--- Page/Image {i+1} ---\nПомилка: {str(e)}")
195
 
196
+ if torch.backends.mps.is_available():
197
+ torch.mps.empty_cache()
198
+
199
  return "\n\n".join(all_results)
200
 
201
  def save_result_to_file(text):
 
215
  .footer { text-align: center; margin-top: 50px; font-size: 0.9rem; color: #718096; }
216
  """
217
 
218
+ with gr.Blocks(title="OCR Comparison: DeepSeek vs MedGemma", css=custom_css) as demo:
219
  with gr.Column():
220
  gr.Markdown("# 🔍 OCR & Medical Document Analysis", elem_classes="header")
221
  gr.Markdown("Порівняння DeepSeek-OCR-2 та MedGemma-1.5-4B", elem_classes="header")
 
283
  )
284
 
285
  if __name__ == "__main__":
286
+ demo.launch(server_name="0.0.0.0", share=False)
app_hf.py CHANGED
@@ -89,14 +89,17 @@ def run_ocr(input_image, input_file, model_choice, custom_prompt):
89
  images_to_process = []
90
 
91
  if input_file is not None:
92
- if input_file.name.lower().endswith(".pdf"):
 
 
 
93
  try:
94
- images_to_process = pdf_to_images(input_file.name)
95
  except Exception as e:
96
  return f"Помилка читання PDF: {str(e)}"
97
  else:
98
  try:
99
- images_to_process = [Image.open(input_file.name)]
100
  except Exception as e:
101
  return f"Помилка завантаження файлу: {str(e)}"
102
  elif input_image is not None:
 
89
  images_to_process = []
90
 
91
  if input_file is not None:
92
+ # Compatibility with different Gradio versions (object with .name vs string path)
93
+ file_path = input_file.name if hasattr(input_file, 'name') else input_file
94
+
95
+ if file_path.lower().endswith(".pdf"):
96
  try:
97
+ images_to_process = pdf_to_images(file_path)
98
  except Exception as e:
99
  return f"Помилка читання PDF: {str(e)}"
100
  else:
101
  try:
102
+ images_to_process = [Image.open(file_path)]
103
  except Exception as e:
104
  return f"Помилка завантаження файлу: {str(e)}"
105
  elif input_image is not None:
requirements.txt CHANGED
@@ -10,7 +10,7 @@ pillow
10
  matplotlib
11
  requests
12
  torchvision
13
- gradio==4.44.1
 
14
  pymupdf
15
  spaces
16
- huggingface-hub<0.25.0
 
10
  matplotlib
11
  requests
12
  torchvision
13
+ gradio
14
+ huggingface-hub
15
  pymupdf
16
  spaces