Spaces:
Sleeping
Sleeping
File size: 4,371 Bytes
a05fede 2763883 a05fede 2763883 a05fede 2763883 a05fede 2763883 a05fede 2763883 a05fede 2763883 a05fede 2763883 a05fede 2763883 a05fede 2763883 a05fede 2763883 a05fede 2763883 a05fede 2763883 a05fede 2763883 a05fede 2763883 a05fede |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
from huggingface_hub import InferenceClient
import gradio as gr
import base64
from PIL import Image
import io
def image_to_data_url(image_path):
if image_path is None:
return None
with Image.open(image_path) as img:
buffered = io.BytesIO()
img.save(buffered, format=img.format)
img_str = base64.b64encode(buffered.getvalue()).decode()
return f"data:image/{img.format.lower()};base64,{img_str}"
def process_input(image, image_url, prompt, model, hf_token):
if not hf_token.startswith("hf_"):
raise gr.Error("Invalid Hugging Face token. It should start with 'hf_'")
client = InferenceClient(provider="cohere", api_key=hf_token)
image_data = None
if image is not None:
image_data = image_to_data_url(image)
elif image_url:
image_data = image_url
if not image_data:
raise gr.Error("Please provide either an image upload or image URL")
messages = [{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": {"url": image_data}}
]
}]
try:
stream = client.chat.completions.create(
model=model,
messages=messages,
max_tokens=512,
stream=True,
)
full_response = ""
for chunk in stream:
content = chunk.choices[0].delta.content or ""
full_response += content
yield full_response
except Exception as e:
raise gr.Error(f"API Error: {str(e)}")
models = [
"CohereLabs/aya-vision-32b",
"CohereLabs/aya-vision-8b",
]
with gr.Blocks() as demo:
gr.Markdown("""
# π Aya-Vision Model Interface
*Explore state-of-the-art vision-language models by Cohere through this interface.
Supports image inputs via upload or URL, with streaming responses.*
Read more about Aya Vision [here](https://cohere.com/research/aya)
**Get your HF token:** [Hugging Face Settings](https://huggingface.co/settings/tokens)
""")
with gr.Row():
with gr.Column():
hf_token = gr.Textbox(
label="Hugging Face Token",
type="password",
placeholder="hf_XXXXXXXXXXXXXX",
info="Token is used temporarily for the request"
)
model_choice = gr.Dropdown(
label="Model Selection",
choices=models,
value=models[0]
)
with gr.Tab("Upload Image"):
image_input = gr.Image(
label="Upload Image",
type="filepath",
sources=["upload"]
)
with gr.Tab("Image URL"):
image_url = gr.Textbox(
label="Image URL",
placeholder="https://example.com/image.jpg",
)
prompt = gr.Textbox(
label="Prompt",
value="Describe this image in one sentence.",
lines=3
)
submit_btn = gr.Button("Generate", variant="primary")
with gr.Column():
output = gr.Textbox(
label="Model Response",
interactive=False,
lines=10,
autoscroll=True
)
submit_btn.click(
fn=process_input,
inputs=[image_input, image_url, prompt, model_choice, hf_token],
outputs=output,
concurrency_limit=None
)
gr.Examples(
examples=[
[
None,
"https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
"Describe this image in one sentence.",
models[0],
""
],
[
None,
"https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png",
"What is unique about this image format?",
models[1],
""
]
],
inputs=[image_input, image_url, prompt, model_choice, hf_token],
label="Try these examples:"
)
if __name__ == "__main__":
demo.queue().launch() |