prithivMLmods commited on
Commit
198a838
·
verified ·
1 Parent(s): d6f9fb3

update app

Browse files
Files changed (1) hide show
  1. app.py +9 -6
app.py CHANGED
@@ -305,13 +305,16 @@ def process_pipeline(
305
  import traceback
306
  return extracted_text, None, f"Error during TTS: {str(e)}"
307
 
 
 
 
308
  with gr.Blocks() as demo:
309
  gr.Markdown("# **Vision-to-VibeVoice-en**", elem_id="main-title")
310
  gr.Markdown("Perform vision-to-audio inference with [Qwen2.5VL](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct) + [VibeVoice-Realtime-0.5B](https://huggingface.co/microsoft/VibeVoice-Realtime-0.5B).")
311
  with gr.Row():
312
  with gr.Column(scale=1):
313
  gr.Markdown("### 1. Vision Input")
314
- image_upload = gr.Image(type="pil", label="Upload Image", height=300)
315
  image_query = gr.Textbox(label="Enter the prompt", value="Give a short description indicating whether the image is safe or unsafe.", placeholder="E.g., Read this page...")
316
 
317
  gr.Markdown("### 2. Voice Settings")
@@ -349,11 +352,11 @@ with gr.Blocks() as demo:
349
 
350
  status_output = gr.Textbox(label="Status Log", lines=2)
351
 
352
- gr.Examples(
353
- examples=[["Perform OCR on the image.", "examples/1.jpg"]],
354
- inputs=[image_query, image_upload],
355
- label="Example"
356
- )
357
 
358
  submit_btn.click(
359
  fn=process_pipeline,
 
305
  import traceback
306
  return extracted_text, None, f"Error during TTS: {str(e)}"
307
 
308
+ url = "https://huggingface.co/datasets/strangervisionhf/image-examples/resolve/main/2.jpg?download=true"
309
+ example_image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
310
+
311
  with gr.Blocks() as demo:
312
  gr.Markdown("# **Vision-to-VibeVoice-en**", elem_id="main-title")
313
  gr.Markdown("Perform vision-to-audio inference with [Qwen2.5VL](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct) + [VibeVoice-Realtime-0.5B](https://huggingface.co/microsoft/VibeVoice-Realtime-0.5B).")
314
  with gr.Row():
315
  with gr.Column(scale=1):
316
  gr.Markdown("### 1. Vision Input")
317
+ image_upload = gr.Image(type="pil", label="Upload Image", value=example_image, height=300)
318
  image_query = gr.Textbox(label="Enter the prompt", value="Give a short description indicating whether the image is safe or unsafe.", placeholder="E.g., Read this page...")
319
 
320
  gr.Markdown("### 2. Voice Settings")
 
352
 
353
  status_output = gr.Textbox(label="Status Log", lines=2)
354
 
355
+ gr.Examples(
356
+ examples=[["Perform OCR on the image.", "examples/1.jpg"]],
357
+ inputs=[image_query, image_upload],
358
+ label="Example"
359
+ )
360
 
361
  submit_btn.click(
362
  fn=process_pipeline,