Spaces:

DocUA
/

Local_OCR_Demo

Sleeping

App Files Files Community

DocUA commited on Jan 30

Commit

092c902

1 Parent(s): 512f03f

fix: Address MPS compatibility issues, ensure explicit model dtype, and improve Gradio file input handling.

Browse files

Files changed (3) hide show

app.py +15 -6
app_hf.py +6 -3
requirements.txt +2 -2

app.py CHANGED Viewed

@@ -29,6 +29,8 @@ if torch.backends.mps.is_available():
     torch.Tensor.cuda = lambda self, *args, **kwargs: self.to("mps")
     torch.nn.Module.cuda = lambda self, *args, **kwargs: self.to("mps")
     dtype = torch.float16
 else:
     device = "cpu"
     dtype = torch.float32
@@ -66,7 +68,8 @@ class ModelManager:
             self.model = AutoModel.from_pretrained(
                 model_name,
                 trust_remote_code=True,
-                use_safetensors=True
             )
             self.model = self.model.to(device=device, dtype=dtype)
             self.model.eval()
@@ -109,14 +112,17 @@ def run_ocr(input_image, input_file, model_choice, custom_prompt):
     images_to_process = []
     if input_file is not None:
-        if input_file.name.lower().endswith(".pdf"):
             try:
-                images_to_process = pdf_to_images(input_file.name)
             except Exception as e:
                 return f"Помилка читання PDF: {str(e)}"
         else:
             try:
-                images_to_process = [Image.open(input_file.name)]
             except Exception as e:
                 return f"Помилка завантаження файлу: {str(e)}"
     elif input_image is not None:
@@ -187,6 +193,9 @@ def run_ocr(input_image, input_file, model_choice, custom_prompt):
         except Exception as e:
             all_results.append(f"--- Page/Image {i+1} ---\nПомилка: {str(e)}")
     return "\n\n".join(all_results)
 def save_result_to_file(text):
@@ -206,7 +215,7 @@ custom_css = """
 .footer { text-align: center; margin-top: 50px; font-size: 0.9rem; color: #718096; }
 """
-with gr.Blocks(title="OCR Comparison: DeepSeek vs MedGemma") as demo:
     with gr.Column():
         gr.Markdown("# 🔍 OCR & Medical Document Analysis", elem_classes="header")
         gr.Markdown("Порівняння DeepSeek-OCR-2 та MedGemma-1.5-4B", elem_classes="header")
@@ -274,4 +283,4 @@ with gr.Blocks(title="OCR Comparison: DeepSeek vs MedGemma") as demo:
     )
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", share=False, css=custom_css)

     torch.Tensor.cuda = lambda self, *args, **kwargs: self.to("mps")
     torch.nn.Module.cuda = lambda self, *args, **kwargs: self.to("mps")
     dtype = torch.float16
+    # Patch to avoid BFloat16 vs Float16 mismatch in custom modeling code on MPS
+    torch.bfloat16 = torch.float16
 else:
     device = "cpu"
     dtype = torch.float32
             self.model = AutoModel.from_pretrained(
                 model_name,
                 trust_remote_code=True,
+                use_safetensors=True,
+                torch_dtype=dtype
             )
             self.model = self.model.to(device=device, dtype=dtype)
             self.model.eval()
     images_to_process = []
     if input_file is not None:
+        # Compatibility with different Gradio versions (object with .name vs string path)
+        file_path = input_file.name if hasattr(input_file, 'name') else input_file
+        if file_path.lower().endswith(".pdf"):
             try:
+                images_to_process = pdf_to_images(file_path)
             except Exception as e:
                 return f"Помилка читання PDF: {str(e)}"
         else:
             try:
+                images_to_process = [Image.open(file_path)]
             except Exception as e:
                 return f"Помилка завантаження файлу: {str(e)}"
     elif input_image is not None:
         except Exception as e:
             all_results.append(f"--- Page/Image {i+1} ---\nПомилка: {str(e)}")
+    if torch.backends.mps.is_available():
+        torch.mps.empty_cache()
     return "\n\n".join(all_results)
 def save_result_to_file(text):
 .footer { text-align: center; margin-top: 50px; font-size: 0.9rem; color: #718096; }
 """
+with gr.Blocks(title="OCR Comparison: DeepSeek vs MedGemma", css=custom_css) as demo:
     with gr.Column():
         gr.Markdown("# 🔍 OCR & Medical Document Analysis", elem_classes="header")
         gr.Markdown("Порівняння DeepSeek-OCR-2 та MedGemma-1.5-4B", elem_classes="header")
     )
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", share=False)

app_hf.py CHANGED Viewed

@@ -89,14 +89,17 @@ def run_ocr(input_image, input_file, model_choice, custom_prompt):
     images_to_process = []
     if input_file is not None:
-        if input_file.name.lower().endswith(".pdf"):
             try:
-                images_to_process = pdf_to_images(input_file.name)
             except Exception as e:
                 return f"Помилка читання PDF: {str(e)}"
         else:
             try:
-                images_to_process = [Image.open(input_file.name)]
             except Exception as e:
                 return f"Помилка завантаження файлу: {str(e)}"
     elif input_image is not None:

     images_to_process = []
     if input_file is not None:
+        # Compatibility with different Gradio versions (object with .name vs string path)
+        file_path = input_file.name if hasattr(input_file, 'name') else input_file
+        if file_path.lower().endswith(".pdf"):
             try:
+                images_to_process = pdf_to_images(file_path)
             except Exception as e:
                 return f"Помилка читання PDF: {str(e)}"
         else:
             try:
+                images_to_process = [Image.open(file_path)]
             except Exception as e:
                 return f"Помилка завантаження файлу: {str(e)}"
     elif input_image is not None:

requirements.txt CHANGED Viewed

@@ -10,7 +10,7 @@ pillow
 matplotlib
 requests
 torchvision
-gradio==4.44.1
 pymupdf
 spaces
-huggingface-hub<0.25.0

 matplotlib
 requests
 torchvision
+gradio
+huggingface-hub
 pymupdf
 spaces