|
|
import gradio as gr |
|
|
from openvino_genai import VLMPipeline |
|
|
import openvino as ov |
|
|
from huggingface_hub import snapshot_download |
|
|
from PIL import Image |
|
|
import numpy as np |
|
|
import threading |
|
|
from queue import Queue |
|
|
import time |
|
|
|
|
|
|
|
|
MODEL_ID = "OpenVINO/Phi-3.5-vision-instruct-int4-ov" |
|
|
model_dir = snapshot_download(repo_id=MODEL_ID) |
|
|
pipe = VLMPipeline(model_dir, "CPU") |
|
|
|
|
|
|
|
|
def stream_generation(text_prompt, input_image=None): |
|
|
|
|
|
streamer_queue = Queue() |
|
|
|
|
|
|
|
|
def streamer_callback(subword): |
|
|
streamer_queue.put(subword) |
|
|
return False |
|
|
|
|
|
|
|
|
def run_generation(): |
|
|
config = { |
|
|
"max_new_tokens": 512, |
|
|
"do_sample": True, |
|
|
"temperature": 0.7, |
|
|
} |
|
|
|
|
|
|
|
|
if input_image is not None: |
|
|
image_data = np.array(input_image.convert("RGB"))[None] |
|
|
ov_image = ov.Tensor(image_data) |
|
|
|
|
|
pipe.generate(text_prompt, images=ov_image, streamer=streamer_callback, **config) |
|
|
else: |
|
|
pipe.generate(text_prompt, streamer=streamer_callback, **config) |
|
|
|
|
|
|
|
|
streamer_queue.put(None) |
|
|
|
|
|
|
|
|
thread = threading.Thread(target=run_generation) |
|
|
thread.start() |
|
|
|
|
|
|
|
|
generated_text = "" |
|
|
while True: |
|
|
token = streamer_queue.get() |
|
|
if token is None: |
|
|
break |
|
|
generated_text += token |
|
|
yield generated_text |
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("# Phi-3.5 Streaming Backend") |
|
|
with gr.Row(): |
|
|
txt = gr.Textbox() |
|
|
img = gr.Image(type="pil") |
|
|
out = gr.Textbox() |
|
|
btn = gr.Button("Generate") |
|
|
|
|
|
|
|
|
btn.click(stream_generation, inputs=[txt, img], outputs=out) |
|
|
|
|
|
demo.queue().launch() |