Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -87,7 +87,8 @@ model_md3 = AutoModelForCausalLM.from_pretrained(
|
|
| 87 |
torch_dtype=torch.bfloat16,
|
| 88 |
device_map={"": "cuda"},
|
| 89 |
)
|
| 90 |
-
|
|
|
|
| 91 |
|
| 92 |
|
| 93 |
# --- PDF Generation and Preview Utility Function ---
|
|
@@ -182,10 +183,11 @@ def process_document_stream(
|
|
| 182 |
|
| 183 |
# --- Special Handling for Moondream3 ---
|
| 184 |
if model_name == "Moondream3":
|
| 185 |
-
|
|
|
|
| 186 |
answer = model_md3.answer_question(
|
| 187 |
-
|
| 188 |
-
|
| 189 |
tokenizer=tokenizer_md3
|
| 190 |
)
|
| 191 |
yield answer, answer
|
|
@@ -255,14 +257,14 @@ def create_gradio_interface():
|
|
| 255 |
# Left Column (Inputs)
|
| 256 |
with gr.Column(scale=1):
|
| 257 |
model_choice = gr.Dropdown(
|
| 258 |
-
choices=["
|
| 259 |
-
label="Select Model", value= "
|
| 260 |
)
|
| 261 |
|
| 262 |
prompt_input = gr.Textbox(label="Query Input", placeholder="✦︎ Enter the prompt")
|
| 263 |
image_input = gr.Image(label="Upload Image", type="pil", sources=['upload'])
|
| 264 |
|
| 265 |
-
with gr.Accordion("Advanced Settings
|
| 266 |
max_new_tokens = gr.Slider(minimum=512, maximum=8192, value=2048, step=256, label="Max New Tokens")
|
| 267 |
temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=4.0, step=0.1, value=0.6)
|
| 268 |
top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.9)
|
|
@@ -285,11 +287,11 @@ def create_gradio_interface():
|
|
| 285 |
raw_output_stream = gr.Textbox(label="Raw Model Output Stream", interactive=False, lines=15, show_copy_button=True)
|
| 286 |
with gr.Row():
|
| 287 |
examples = gr.Examples(
|
| 288 |
-
examples=["examples/1.png", "examples/2.png", "examples/3.png",
|
| 289 |
-
"examples/4.png", "examples/5.png"],
|
| 290 |
inputs=image_input, label="Examples"
|
| 291 |
)
|
| 292 |
-
gr.Markdown("[Report-Bug💻](https://huggingface.co/spaces/prithivMLmods/
|
| 293 |
|
| 294 |
with gr.Tab("📰 README.md"):
|
| 295 |
with gr.Accordion("(Result.md)", open=True):
|
|
@@ -324,4 +326,4 @@ def create_gradio_interface():
|
|
| 324 |
|
| 325 |
if __name__ == "__main__":
|
| 326 |
demo = create_gradio_interface()
|
| 327 |
-
demo.queue(max_size=50).launch(share=True,
|
|
|
|
| 87 |
torch_dtype=torch.bfloat16,
|
| 88 |
device_map={"": "cuda"},
|
| 89 |
)
|
| 90 |
+
# FIXED: Added trust_remote_code=True to the tokenizer loading
|
| 91 |
+
tokenizer_md3 = AutoTokenizer.from_pretrained(MODEL_ID_MD3, trust_remote_code=True)
|
| 92 |
|
| 93 |
|
| 94 |
# --- PDF Generation and Preview Utility Function ---
|
|
|
|
| 183 |
|
| 184 |
# --- Special Handling for Moondream3 ---
|
| 185 |
if model_name == "Moondream3":
|
| 186 |
+
# Moondream3 has a different inference method
|
| 187 |
+
enc_image = model_md3.encode_image(image)
|
| 188 |
answer = model_md3.answer_question(
|
| 189 |
+
enc_image,
|
| 190 |
+
prompt_input,
|
| 191 |
tokenizer=tokenizer_md3
|
| 192 |
)
|
| 193 |
yield answer, answer
|
|
|
|
| 257 |
# Left Column (Inputs)
|
| 258 |
with gr.Column(scale=1):
|
| 259 |
model_choice = gr.Dropdown(
|
| 260 |
+
choices=["Camel-Doc-OCR-062825", "MinerU2.5-2509-1.2B", "Video-MTR", "Moondream3"],
|
| 261 |
+
label="Select Model", value= "Camel-Doc-OCR-062825"
|
| 262 |
)
|
| 263 |
|
| 264 |
prompt_input = gr.Textbox(label="Query Input", placeholder="✦︎ Enter the prompt")
|
| 265 |
image_input = gr.Image(label="Upload Image", type="pil", sources=['upload'])
|
| 266 |
|
| 267 |
+
with gr.Accordion("Advanced Settings", open=False):
|
| 268 |
max_new_tokens = gr.Slider(minimum=512, maximum=8192, value=2048, step=256, label="Max New Tokens")
|
| 269 |
temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=4.0, step=0.1, value=0.6)
|
| 270 |
top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.9)
|
|
|
|
| 287 |
raw_output_stream = gr.Textbox(label="Raw Model Output Stream", interactive=False, lines=15, show_copy_button=True)
|
| 288 |
with gr.Row():
|
| 289 |
examples = gr.Examples(
|
| 290 |
+
examples=[["examples/1.png"], ["examples/2.png"], ["examples/3.png"],
|
| 291 |
+
["examples/4.png"], ["examples/5.png"], ["examples/6.png"]],
|
| 292 |
inputs=image_input, label="Examples"
|
| 293 |
)
|
| 294 |
+
gr.Markdown("[Report-Bug💻](https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-v1.0/discussions) | [prithivMLmods🤗](https://huggingface.co/prithivMLmods)")
|
| 295 |
|
| 296 |
with gr.Tab("📰 README.md"):
|
| 297 |
with gr.Accordion("(Result.md)", open=True):
|
|
|
|
| 326 |
|
| 327 |
if __name__ == "__main__":
|
| 328 |
demo = create_gradio_interface()
|
| 329 |
+
demo.queue(max_size=50).launch(share=True, ssr_mode=False, show_error=True)
|