DocUA commited on
Commit
c3371d2
·
1 Parent(s): 185ef35

Implement warning suppression, ensure pad token ID for generation, enable deterministic sampling, refine Gradio UI CSS and clear functionality, and add `.env` to .gitignore."

Browse files
Files changed (4) hide show
  1. .gitignore +1 -0
  2. app.py +18 -5
  3. app_hf.py +16 -8
  4. requirements.txt +2 -2
.gitignore CHANGED
@@ -2,6 +2,7 @@
2
  venv/
3
  .venv/
4
  env/
 
5
 
6
  # Data and Results
7
  doc_for_testing/
 
2
  venv/
3
  .venv/
4
  env/
5
+ .env
6
 
7
  # Data and Results
8
  doc_for_testing/
app.py CHANGED
@@ -8,6 +8,14 @@ import datetime
8
  import fitz # PyMuPDF
9
  import io
10
  import gc
 
 
 
 
 
 
 
 
11
 
12
  # --- Configuration ---
13
  DEEPSEEK_MODEL = 'deepseek-ai/DeepSeek-OCR-2'
@@ -76,6 +84,11 @@ class ModelManager:
76
  if device == "mps":
77
  self.model = self.model.to("mps")
78
  self.model.eval()
 
 
 
 
 
79
  self.current_model_name = model_name
80
  return self.model, self.processor
81
 
@@ -165,7 +178,7 @@ def run_ocr(input_image, input_file, model_choice, custom_prompt):
165
  ).to(model.device)
166
 
167
  with torch.no_grad():
168
- output = model.generate(**inputs, max_new_tokens=4096)
169
 
170
  input_len = inputs["input_ids"].shape[-1]
171
  res = processor_or_tokenizer.decode(output[0][input_len:], skip_special_tokens=True)
@@ -193,7 +206,7 @@ custom_css = """
193
  .footer { text-align: center; margin-top: 50px; font-size: 0.9rem; color: #718096; }
194
  """
195
 
196
- with gr.Blocks(title="OCR Comparison: DeepSeek vs MedGemma", css=custom_css) as demo:
197
  with gr.Column():
198
  gr.Markdown("# 🔍 OCR & Medical Document Analysis", elem_classes="header")
199
  gr.Markdown("Порівняння DeepSeek-OCR-2 та MedGemma-1.5-4B", elem_classes="header")
@@ -252,13 +265,13 @@ with gr.Blocks(title="OCR Comparison: DeepSeek vs MedGemma", css=custom_css) as
252
  )
253
 
254
  def clear_all():
255
- return None, None, ""
256
 
257
  clear_btn.click(
258
  fn=clear_all,
259
  inputs=None,
260
- outputs=[input_img, input_file, output_text]
261
  )
262
 
263
  if __name__ == "__main__":
264
- demo.launch(server_name="0.0.0.0", share=False)
 
8
  import fitz # PyMuPDF
9
  import io
10
  import gc
11
+ import warnings
12
+
13
+ # Suppress annoying warnings
14
+ warnings.filterwarnings("ignore", message="The parameters have been moved from the Blocks constructor to the launch()")
15
+ warnings.filterwarnings("ignore", message="CUDA is not available or torch_xla is imported")
16
+ warnings.filterwarnings("ignore", message="The following generation flags are not valid and may be ignored")
17
+ warnings.filterwarnings("ignore", message="The attention mask and the pad token id were not set")
18
+ warnings.filterwarnings("ignore", message="You are using a model of type .* to instantiate a model of type .*")
19
 
20
  # --- Configuration ---
21
  DEEPSEEK_MODEL = 'deepseek-ai/DeepSeek-OCR-2'
 
84
  if device == "mps":
85
  self.model = self.model.to("mps")
86
  self.model.eval()
87
+
88
+ # Ensure pad_token_id is set
89
+ if self.processor.tokenizer.pad_token_id is None:
90
+ self.processor.tokenizer.pad_token_id = self.processor.tokenizer.eos_token_id
91
+
92
  self.current_model_name = model_name
93
  return self.model, self.processor
94
 
 
178
  ).to(model.device)
179
 
180
  with torch.no_grad():
181
+ output = model.generate(**inputs, max_new_tokens=4096, do_sample=False)
182
 
183
  input_len = inputs["input_ids"].shape[-1]
184
  res = processor_or_tokenizer.decode(output[0][input_len:], skip_special_tokens=True)
 
206
  .footer { text-align: center; margin-top: 50px; font-size: 0.9rem; color: #718096; }
207
  """
208
 
209
+ with gr.Blocks(title="OCR Comparison: DeepSeek vs MedGemma") as demo:
210
  with gr.Column():
211
  gr.Markdown("# 🔍 OCR & Medical Document Analysis", elem_classes="header")
212
  gr.Markdown("Порівняння DeepSeek-OCR-2 та MedGemma-1.5-4B", elem_classes="header")
 
265
  )
266
 
267
  def clear_all():
268
+ return None, None, "", ""
269
 
270
  clear_btn.click(
271
  fn=clear_all,
272
  inputs=None,
273
+ outputs=[input_img, input_file, output_text, prompt_input]
274
  )
275
 
276
  if __name__ == "__main__":
277
+ demo.launch(server_name="0.0.0.0", share=False, css=custom_css)
app_hf.py CHANGED
@@ -8,6 +8,14 @@ import datetime
8
  import fitz # PyMuPDF
9
  import io
10
  import gc
 
 
 
 
 
 
 
 
11
 
12
  # Try to import spaces, if not available (local run), create a dummy decorator
13
  try:
@@ -55,6 +63,9 @@ class ModelManager:
55
  torch_dtype=dtype
56
  )
57
  model.eval()
 
 
 
58
  self.models[model_name] = model
59
  self.processors[model_name] = processor
60
 
@@ -154,7 +165,7 @@ def run_ocr(input_image, input_file, model_choice, custom_prompt):
154
  ).to("cuda") # Ensure inputs are on cuda
155
 
156
  with torch.no_grad():
157
- output = model.generate(**inputs, max_new_tokens=4096)
158
 
159
  input_len = inputs["input_ids"].shape[-1]
160
  res = processor_or_tokenizer.decode(output[0][input_len:], skip_special_tokens=True)
@@ -172,9 +183,6 @@ def run_ocr(input_image, input_file, model_choice, custom_prompt):
172
 
173
  return "\n\n".join(all_results)
174
 
175
-
176
- return "\n\n".join(all_results)
177
-
178
  def save_result_to_file(text):
179
  if not text or text.startswith("Будь ласка") or text.startswith("Помилка"):
180
  return None
@@ -192,7 +200,7 @@ custom_css = """
192
  .footer { text-align: center; margin-top: 50px; font-size: 0.9rem; color: #718096; }
193
  """
194
 
195
- with gr.Blocks(title="OCR Comparison: DeepSeek vs MedGemma", css=custom_css) as demo:
196
  with gr.Column():
197
  gr.Markdown("# 🔍 OCR & Medical Document Analysis", elem_classes="header")
198
  gr.Markdown("Порівняння DeepSeek-OCR-2 та MedGemma-1.5-4B (HuggingFace ZeroGPU Edition)", elem_classes="header")
@@ -248,13 +256,13 @@ with gr.Blocks(title="OCR Comparison: DeepSeek vs MedGemma", css=custom_css) as
248
  )
249
 
250
  def clear_all():
251
- return None, None, ""
252
 
253
  clear_btn.click(
254
  fn=clear_all,
255
  inputs=None,
256
- outputs=[input_img, input_file, output_text]
257
  )
258
 
259
  if __name__ == "__main__":
260
- demo.queue().launch()
 
8
  import fitz # PyMuPDF
9
  import io
10
  import gc
11
+ import warnings
12
+
13
+ # Suppress annoying warnings
14
+ warnings.filterwarnings("ignore", message="The parameters have been moved from the Blocks constructor to the launch()")
15
+ warnings.filterwarnings("ignore", message="CUDA is not available or torch_xla is imported")
16
+ warnings.filterwarnings("ignore", message="The following generation flags are not valid and may be ignored")
17
+ warnings.filterwarnings("ignore", message="The attention mask and the pad token id were not set")
18
+ warnings.filterwarnings("ignore", message="You are using a model of type .* to instantiate a model of type .*")
19
 
20
  # Try to import spaces, if not available (local run), create a dummy decorator
21
  try:
 
63
  torch_dtype=dtype
64
  )
65
  model.eval()
66
+ # Ensure pad_token_id is set
67
+ if processor.tokenizer.pad_token_id is None:
68
+ processor.tokenizer.pad_token_id = processor.tokenizer.eos_token_id
69
  self.models[model_name] = model
70
  self.processors[model_name] = processor
71
 
 
165
  ).to("cuda") # Ensure inputs are on cuda
166
 
167
  with torch.no_grad():
168
+ output = model.generate(**inputs, max_new_tokens=4096, do_sample=False)
169
 
170
  input_len = inputs["input_ids"].shape[-1]
171
  res = processor_or_tokenizer.decode(output[0][input_len:], skip_special_tokens=True)
 
183
 
184
  return "\n\n".join(all_results)
185
 
 
 
 
186
  def save_result_to_file(text):
187
  if not text or text.startswith("Будь ласка") or text.startswith("Помилка"):
188
  return None
 
200
  .footer { text-align: center; margin-top: 50px; font-size: 0.9rem; color: #718096; }
201
  """
202
 
203
+ with gr.Blocks(title="OCR Comparison: DeepSeek vs MedGemma") as demo:
204
  with gr.Column():
205
  gr.Markdown("# 🔍 OCR & Medical Document Analysis", elem_classes="header")
206
  gr.Markdown("Порівняння DeepSeek-OCR-2 та MedGemma-1.5-4B (HuggingFace ZeroGPU Edition)", elem_classes="header")
 
256
  )
257
 
258
  def clear_all():
259
+ return None, None, "", ""
260
 
261
  clear_btn.click(
262
  fn=clear_all,
263
  inputs=None,
264
+ outputs=[input_img, input_file, output_text, prompt_input]
265
  )
266
 
267
  if __name__ == "__main__":
268
+ demo.queue().launch(css=custom_css)
requirements.txt CHANGED
@@ -10,7 +10,7 @@ pillow
10
  matplotlib
11
  requests
12
  torchvision
13
- gradio
14
  pymupdf
15
  spaces
16
- huggingface-hub
 
10
  matplotlib
11
  requests
12
  torchvision
13
+ gradio==4.44.1
14
  pymupdf
15
  spaces
16
+ huggingface-hub<0.25.0