File size: 2,191 Bytes
305f7a9 4a1980e 305f7a9 4a1980e 7b758b5 305f7a9 7b758b5 4a1980e 305f7a9 7b758b5 4a1980e 7b758b5 4a1980e 7b758b5 4a1980e 7b758b5 305f7a9 7b758b5 4a1980e 7b758b5 305f7a9 7b758b5 4a1980e 7b758b5 305f7a9 7b758b5 305f7a9 7b758b5 305f7a9 7b758b5 4a1980e 7b758b5 305f7a9 7b758b5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 | import gradio as gr
from openvino_genai import VLMPipeline
import openvino as ov
from huggingface_hub import snapshot_download
from PIL import Image
import numpy as np
import threading
from queue import Queue
import time
# 1. Setup Model
MODEL_ID = "OpenVINO/Phi-3.5-vision-instruct-int4-ov"
model_dir = snapshot_download(repo_id=MODEL_ID)
pipe = VLMPipeline(model_dir, "CPU")
# 2. Streaming Generator
def stream_generation(text_prompt, input_image=None):
# Queue to pass tokens from the model thread to the Gradio thread
streamer_queue = Queue()
# Callback function called by OpenVINO for every new token
def streamer_callback(subword):
streamer_queue.put(subword)
return False # Return False to continue generation
# Function to run generation in a separate thread
def run_generation():
config = {
"max_new_tokens": 512,
"do_sample": True,
"temperature": 0.7,
}
# Prepare Inputs
if input_image is not None:
image_data = np.array(input_image.convert("RGB"))[None]
ov_image = ov.Tensor(image_data)
# Pass streamer callback here
pipe.generate(text_prompt, images=ov_image, streamer=streamer_callback, **config)
else:
pipe.generate(text_prompt, streamer=streamer_callback, **config)
# Signal that we are done
streamer_queue.put(None)
# Start generation in background thread
thread = threading.Thread(target=run_generation)
thread.start()
# Yield tokens as they arrive
generated_text = ""
while True:
token = streamer_queue.get()
if token is None: # End signal
break
generated_text += token
yield generated_text
# 3. Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("# Phi-3.5 Streaming Backend")
with gr.Row():
txt = gr.Textbox()
img = gr.Image(type="pil")
out = gr.Textbox()
btn = gr.Button("Generate")
# Note: No separate queue=True needed in recent Gradio, but good practice
btn.click(stream_generation, inputs=[txt, img], outputs=out)
demo.queue().launch() |