Spaces:

DocUA
/

Local_OCR_Demo

Running on Zero

App Files Files Community

DocUA commited on Jan 28

Commit

c3371d2

1 Parent(s): 185ef35

Implement warning suppression, ensure pad token ID for generation, enable deterministic sampling, refine Gradio UI CSS and clear functionality, and add `.env` to .gitignore."

Browse files

Files changed (4) hide show

.gitignore +1 -0
app.py +18 -5
app_hf.py +16 -8
requirements.txt +2 -2

.gitignore CHANGED Viewed

@@ -2,6 +2,7 @@
 venv/
 .venv/
 env/
 # Data and Results
 doc_for_testing/

 venv/
 .venv/
 env/
+.env
 # Data and Results
 doc_for_testing/

app.py CHANGED Viewed

@@ -8,6 +8,14 @@ import datetime
 import fitz  # PyMuPDF
 import io
 import gc
 # --- Configuration ---
 DEEPSEEK_MODEL = 'deepseek-ai/DeepSeek-OCR-2'
@@ -76,6 +84,11 @@ class ModelManager:
             if device == "mps":
                 self.model = self.model.to("mps")
             self.model.eval()
             self.current_model_name = model_name
             return self.model, self.processor
@@ -165,7 +178,7 @@ def run_ocr(input_image, input_file, model_choice, custom_prompt):
                 ).to(model.device)
                 with torch.no_grad():
-                    output = model.generate(**inputs, max_new_tokens=4096)
                 input_len = inputs["input_ids"].shape[-1]
                 res = processor_or_tokenizer.decode(output[0][input_len:], skip_special_tokens=True)
@@ -193,7 +206,7 @@ custom_css = """
 .footer { text-align: center; margin-top: 50px; font-size: 0.9rem; color: #718096; }
 """
-with gr.Blocks(title="OCR Comparison: DeepSeek vs MedGemma", css=custom_css) as demo:
     with gr.Column():
         gr.Markdown("# 🔍 OCR & Medical Document Analysis", elem_classes="header")
         gr.Markdown("Порівняння DeepSeek-OCR-2 та MedGemma-1.5-4B", elem_classes="header")
@@ -252,13 +265,13 @@ with gr.Blocks(title="OCR Comparison: DeepSeek vs MedGemma", css=custom_css) as
     )
     def clear_all():
-        return None, None, ""
     clear_btn.click(
         fn=clear_all,
         inputs=None,
-        outputs=[input_img, input_file, output_text]
     )
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", share=False)

 import fitz  # PyMuPDF
 import io
 import gc
+import warnings
+# Suppress annoying warnings
+warnings.filterwarnings("ignore", message="The parameters have been moved from the Blocks constructor to the launch()")
+warnings.filterwarnings("ignore", message="CUDA is not available or torch_xla is imported")
+warnings.filterwarnings("ignore", message="The following generation flags are not valid and may be ignored")
+warnings.filterwarnings("ignore", message="The attention mask and the pad token id were not set")
+warnings.filterwarnings("ignore", message="You are using a model of type .* to instantiate a model of type .*")
 # --- Configuration ---
 DEEPSEEK_MODEL = 'deepseek-ai/DeepSeek-OCR-2'
             if device == "mps":
                 self.model = self.model.to("mps")
             self.model.eval()
+            # Ensure pad_token_id is set
+            if self.processor.tokenizer.pad_token_id is None:
+                self.processor.tokenizer.pad_token_id = self.processor.tokenizer.eos_token_id
             self.current_model_name = model_name
             return self.model, self.processor
                 ).to(model.device)
                 with torch.no_grad():
+                    output = model.generate(**inputs, max_new_tokens=4096, do_sample=False)
                 input_len = inputs["input_ids"].shape[-1]
                 res = processor_or_tokenizer.decode(output[0][input_len:], skip_special_tokens=True)
 .footer { text-align: center; margin-top: 50px; font-size: 0.9rem; color: #718096; }
 """
+with gr.Blocks(title="OCR Comparison: DeepSeek vs MedGemma") as demo:
     with gr.Column():
         gr.Markdown("# 🔍 OCR & Medical Document Analysis", elem_classes="header")
         gr.Markdown("Порівняння DeepSeek-OCR-2 та MedGemma-1.5-4B", elem_classes="header")
     )
     def clear_all():
+        return None, None, "", ""
     clear_btn.click(
         fn=clear_all,
         inputs=None,
+        outputs=[input_img, input_file, output_text, prompt_input]
     )
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", share=False, css=custom_css)

app_hf.py CHANGED Viewed

@@ -8,6 +8,14 @@ import datetime
 import fitz  # PyMuPDF
 import io
 import gc
 # Try to import spaces, if not available (local run), create a dummy decorator
 try:
@@ -55,6 +63,9 @@ class ModelManager:
                     torch_dtype=dtype
                 )
                 model.eval()
                 self.models[model_name] = model
                 self.processors[model_name] = processor
@@ -154,7 +165,7 @@ def run_ocr(input_image, input_file, model_choice, custom_prompt):
                     ).to("cuda") # Ensure inputs are on cuda
                     with torch.no_grad():
-                        output = model.generate(**inputs, max_new_tokens=4096)
                     input_len = inputs["input_ids"].shape[-1]
                     res = processor_or_tokenizer.decode(output[0][input_len:], skip_special_tokens=True)
@@ -172,9 +183,6 @@ def run_ocr(input_image, input_file, model_choice, custom_prompt):
     return "\n\n".join(all_results)
-    return "\n\n".join(all_results)
 def save_result_to_file(text):
     if not text or text.startswith("Будь ласка") or text.startswith("Помилка"):
         return None
@@ -192,7 +200,7 @@ custom_css = """
 .footer { text-align: center; margin-top: 50px; font-size: 0.9rem; color: #718096; }
 """
-with gr.Blocks(title="OCR Comparison: DeepSeek vs MedGemma", css=custom_css) as demo:
     with gr.Column():
         gr.Markdown("# 🔍 OCR & Medical Document Analysis", elem_classes="header")
         gr.Markdown("Порівняння DeepSeek-OCR-2 та MedGemma-1.5-4B (HuggingFace ZeroGPU Edition)", elem_classes="header")
@@ -248,13 +256,13 @@ with gr.Blocks(title="OCR Comparison: DeepSeek vs MedGemma", css=custom_css) as
     )
     def clear_all():
-        return None, None, ""
     clear_btn.click(
         fn=clear_all,
         inputs=None,
-        outputs=[input_img, input_file, output_text]
     )
 if __name__ == "__main__":
-    demo.queue().launch()

 import fitz  # PyMuPDF
 import io
 import gc
+import warnings
+# Suppress annoying warnings
+warnings.filterwarnings("ignore", message="The parameters have been moved from the Blocks constructor to the launch()")
+warnings.filterwarnings("ignore", message="CUDA is not available or torch_xla is imported")
+warnings.filterwarnings("ignore", message="The following generation flags are not valid and may be ignored")
+warnings.filterwarnings("ignore", message="The attention mask and the pad token id were not set")
+warnings.filterwarnings("ignore", message="You are using a model of type .* to instantiate a model of type .*")
 # Try to import spaces, if not available (local run), create a dummy decorator
 try:
                     torch_dtype=dtype
                 )
                 model.eval()
+                # Ensure pad_token_id is set
+                if processor.tokenizer.pad_token_id is None:
+                    processor.tokenizer.pad_token_id = processor.tokenizer.eos_token_id
                 self.models[model_name] = model
                 self.processors[model_name] = processor
                     ).to("cuda") # Ensure inputs are on cuda
                     with torch.no_grad():
+                        output = model.generate(**inputs, max_new_tokens=4096, do_sample=False)
                     input_len = inputs["input_ids"].shape[-1]
                     res = processor_or_tokenizer.decode(output[0][input_len:], skip_special_tokens=True)
     return "\n\n".join(all_results)
 def save_result_to_file(text):
     if not text or text.startswith("Будь ласка") or text.startswith("Помилка"):
         return None
 .footer { text-align: center; margin-top: 50px; font-size: 0.9rem; color: #718096; }
 """
+with gr.Blocks(title="OCR Comparison: DeepSeek vs MedGemma") as demo:
     with gr.Column():
         gr.Markdown("# 🔍 OCR & Medical Document Analysis", elem_classes="header")
         gr.Markdown("Порівняння DeepSeek-OCR-2 та MedGemma-1.5-4B (HuggingFace ZeroGPU Edition)", elem_classes="header")
     )
     def clear_all():
+        return None, None, "", ""
     clear_btn.click(
         fn=clear_all,
         inputs=None,
+        outputs=[input_img, input_file, output_text, prompt_input]
     )
 if __name__ == "__main__":
+    demo.queue().launch(css=custom_css)

requirements.txt CHANGED Viewed

@@ -10,7 +10,7 @@ pillow
 matplotlib
 requests
 torchvision
-gradio
 pymupdf
 spaces
-huggingface-hub

 matplotlib
 requests
 torchvision
+gradio==4.44.1
 pymupdf
 spaces
+huggingface-hub<0.25.0