prithivMLmods commited on
Commit
16e37bd
·
verified ·
1 Parent(s): 81d2b64

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -11
app.py CHANGED
@@ -87,7 +87,8 @@ model_md3 = AutoModelForCausalLM.from_pretrained(
87
  torch_dtype=torch.bfloat16,
88
  device_map={"": "cuda"},
89
  )
90
- tokenizer_md3 = AutoTokenizer.from_pretrained(MODEL_ID_MD3)
 
91
 
92
 
93
  # --- PDF Generation and Preview Utility Function ---
@@ -182,10 +183,11 @@ def process_document_stream(
182
 
183
  # --- Special Handling for Moondream3 ---
184
  if model_name == "Moondream3":
185
- prompt_full = f"<image>\n{prompt_input}"
 
186
  answer = model_md3.answer_question(
187
- model_md3.encode_image(image),
188
- prompt_full,
189
  tokenizer=tokenizer_md3
190
  )
191
  yield answer, answer
@@ -255,14 +257,14 @@ def create_gradio_interface():
255
  # Left Column (Inputs)
256
  with gr.Column(scale=1):
257
  model_choice = gr.Dropdown(
258
- choices=["Moondream3", "Camel-Doc-OCR-062825", "MinerU2.5-2509-1.2B", "Video-MTR"],
259
- label="Select Model", value= "Moondream3"
260
  )
261
 
262
  prompt_input = gr.Textbox(label="Query Input", placeholder="✦︎ Enter the prompt")
263
  image_input = gr.Image(label="Upload Image", type="pil", sources=['upload'])
264
 
265
- with gr.Accordion("Advanced Settings (PDF)", open=False):
266
  max_new_tokens = gr.Slider(minimum=512, maximum=8192, value=2048, step=256, label="Max New Tokens")
267
  temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=4.0, step=0.1, value=0.6)
268
  top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.9)
@@ -285,11 +287,11 @@ def create_gradio_interface():
285
  raw_output_stream = gr.Textbox(label="Raw Model Output Stream", interactive=False, lines=15, show_copy_button=True)
286
  with gr.Row():
287
  examples = gr.Examples(
288
- examples=["examples/1.png", "examples/2.png", "examples/3.png",
289
- "examples/4.png", "examples/5.png"],
290
  inputs=image_input, label="Examples"
291
  )
292
- gr.Markdown("[Report-Bug💻](https://huggingface.co/spaces/prithivMLmods/Tiny-VLMs-Lab/discussions) | [prithivMLmods🤗](https://huggingface.co/prithivMLmods)")
293
 
294
  with gr.Tab("📰 README.md"):
295
  with gr.Accordion("(Result.md)", open=True):
@@ -324,4 +326,4 @@ def create_gradio_interface():
324
 
325
  if __name__ == "__main__":
326
  demo = create_gradio_interface()
327
- demo.queue(max_size=50).launch(share=True, mcp_server=True, ssr_mode=False, show_error=True)
 
87
  torch_dtype=torch.bfloat16,
88
  device_map={"": "cuda"},
89
  )
90
+ # FIXED: Added trust_remote_code=True to the tokenizer loading
91
+ tokenizer_md3 = AutoTokenizer.from_pretrained(MODEL_ID_MD3, trust_remote_code=True)
92
 
93
 
94
  # --- PDF Generation and Preview Utility Function ---
 
183
 
184
  # --- Special Handling for Moondream3 ---
185
  if model_name == "Moondream3":
186
+ # Moondream3 has a different inference method
187
+ enc_image = model_md3.encode_image(image)
188
  answer = model_md3.answer_question(
189
+ enc_image,
190
+ prompt_input,
191
  tokenizer=tokenizer_md3
192
  )
193
  yield answer, answer
 
257
  # Left Column (Inputs)
258
  with gr.Column(scale=1):
259
  model_choice = gr.Dropdown(
260
+ choices=["Camel-Doc-OCR-062825", "MinerU2.5-2509-1.2B", "Video-MTR", "Moondream3"],
261
+ label="Select Model", value= "Camel-Doc-OCR-062825"
262
  )
263
 
264
  prompt_input = gr.Textbox(label="Query Input", placeholder="✦︎ Enter the prompt")
265
  image_input = gr.Image(label="Upload Image", type="pil", sources=['upload'])
266
 
267
+ with gr.Accordion("Advanced Settings", open=False):
268
  max_new_tokens = gr.Slider(minimum=512, maximum=8192, value=2048, step=256, label="Max New Tokens")
269
  temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=4.0, step=0.1, value=0.6)
270
  top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.9)
 
287
  raw_output_stream = gr.Textbox(label="Raw Model Output Stream", interactive=False, lines=15, show_copy_button=True)
288
  with gr.Row():
289
  examples = gr.Examples(
290
+ examples=[["examples/1.png"], ["examples/2.png"], ["examples/3.png"],
291
+ ["examples/4.png"], ["examples/5.png"], ["examples/6.png"]],
292
  inputs=image_input, label="Examples"
293
  )
294
+ gr.Markdown("[Report-Bug💻](https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-v1.0/discussions) | [prithivMLmods🤗](https://huggingface.co/prithivMLmods)")
295
 
296
  with gr.Tab("📰 README.md"):
297
  with gr.Accordion("(Result.md)", open=True):
 
326
 
327
  if __name__ == "__main__":
328
  demo = create_gradio_interface()
329
+ demo.queue(max_size=50).launch(share=True, ssr_mode=False, show_error=True)