Spaces:
Running
on
Zero
Running
on
Zero
update app
Browse files
app.py
CHANGED
|
@@ -305,13 +305,16 @@ def process_pipeline(
|
|
| 305 |
import traceback
|
| 306 |
return extracted_text, None, f"Error during TTS: {str(e)}"
|
| 307 |
|
|
|
|
|
|
|
|
|
|
| 308 |
with gr.Blocks() as demo:
|
| 309 |
gr.Markdown("# **Vision-to-VibeVoice-en**", elem_id="main-title")
|
| 310 |
gr.Markdown("Perform vision-to-audio inference with [Qwen2.5VL](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct) + [VibeVoice-Realtime-0.5B](https://huggingface.co/microsoft/VibeVoice-Realtime-0.5B).")
|
| 311 |
with gr.Row():
|
| 312 |
with gr.Column(scale=1):
|
| 313 |
gr.Markdown("### 1. Vision Input")
|
| 314 |
-
image_upload = gr.Image(type="pil", label="Upload Image", height=300)
|
| 315 |
image_query = gr.Textbox(label="Enter the prompt", value="Give a short description indicating whether the image is safe or unsafe.", placeholder="E.g., Read this page...")
|
| 316 |
|
| 317 |
gr.Markdown("### 2. Voice Settings")
|
|
@@ -349,11 +352,11 @@ with gr.Blocks() as demo:
|
|
| 349 |
|
| 350 |
status_output = gr.Textbox(label="Status Log", lines=2)
|
| 351 |
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
|
| 358 |
submit_btn.click(
|
| 359 |
fn=process_pipeline,
|
|
|
|
| 305 |
import traceback
|
| 306 |
return extracted_text, None, f"Error during TTS: {str(e)}"
|
| 307 |
|
| 308 |
+
url = "https://huggingface.co/datasets/strangervisionhf/image-examples/resolve/main/2.jpg?download=true"
|
| 309 |
+
example_image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
|
| 310 |
+
|
| 311 |
with gr.Blocks() as demo:
|
| 312 |
gr.Markdown("# **Vision-to-VibeVoice-en**", elem_id="main-title")
|
| 313 |
gr.Markdown("Perform vision-to-audio inference with [Qwen2.5VL](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct) + [VibeVoice-Realtime-0.5B](https://huggingface.co/microsoft/VibeVoice-Realtime-0.5B).")
|
| 314 |
with gr.Row():
|
| 315 |
with gr.Column(scale=1):
|
| 316 |
gr.Markdown("### 1. Vision Input")
|
| 317 |
+
image_upload = gr.Image(type="pil", label="Upload Image", value=example_image, height=300)
|
| 318 |
image_query = gr.Textbox(label="Enter the prompt", value="Give a short description indicating whether the image is safe or unsafe.", placeholder="E.g., Read this page...")
|
| 319 |
|
| 320 |
gr.Markdown("### 2. Voice Settings")
|
|
|
|
| 352 |
|
| 353 |
status_output = gr.Textbox(label="Status Log", lines=2)
|
| 354 |
|
| 355 |
+
gr.Examples(
|
| 356 |
+
examples=[["Perform OCR on the image.", "examples/1.jpg"]],
|
| 357 |
+
inputs=[image_query, image_upload],
|
| 358 |
+
label="Example"
|
| 359 |
+
)
|
| 360 |
|
| 361 |
submit_btn.click(
|
| 362 |
fn=process_pipeline,
|