from huggingface_hub import InferenceClient import gradio as gr import base64 from PIL import Image import io client = InferenceClient( provider="cohere", api_key="HF_TOKEN", # Will add soon as soon as I get the auth working ) def image_to_data_url(image_path): if image_path is None: return None with Image.open(image_path) as img: buffered = io.BytesIO() img.save(buffered, format=img.format) img_str = base64.b64encode(buffered.getvalue()).decode() return f"data:image/{img.format.lower()};base64,{img_str}" def process_input(image, image_url, prompt, model): image_data = None if image is not None: image_data = image_to_data_url(image) elif image_url: image_data = image_url if not image_data: raise gr.Error("Please provide either an image upload or image URL") messages = [ { "role": "user", "content": [ {"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": image_data}} ] } ] stream = client.chat.completions.create( model=model, messages=messages, max_tokens=512, stream=True, ) full_response = "" for chunk in stream: content = chunk.choices[0].delta.content or "" full_response += content yield full_response models = [ "CohereLabs/aya-vision-32b", "CohereLabs/aya-vision-8b", ] with gr.Blocks() as demo: gr.Markdown("# Cohere Aya Vision model UI") with gr.Row(): with gr.Column(): model_choice = gr.Dropdown( label="Select Model", choices=models, value=models[0], interactive=True ) with gr.Tab("Upload Image"): image_input = gr.Image( label="Upload Image", type="filepath", sources=["upload"] ) with gr.Tab("Image URL"): image_url = gr.Textbox( label="Image URL", placeholder="Paste image URL here...", value="" ) prompt = gr.Textbox( label="Prompt", value="Describe this image in one sentence.", interactive=True ) submit_btn = gr.Button("Generate", variant="primary") with gr.Column(): output = gr.Textbox( label="Model Response", interactive=False, lines=10, autoscroll=True ) submit_btn.click( fn=process_input, inputs=[image_input, image_url, prompt, model_choice], outputs=output, concurrency_limit=None ) gr.Examples( examples=[ [ None, "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg", "Describe this image in one sentence.", models[0] ], [ None, "https://upload.wikimedia.org/wikipedia/commons/thumb/4/4d/Cat_November_2010-1a.jpg/1200px-Cat_November_2010-1a.jpg", "What is the main subject of this image?", models[1] ] ], inputs=[image_input, image_url, prompt, model_choice], label="Example Inputs" ) if __name__ == "__main__": demo.queue().launch()