Spaces:
Sleeping
Sleeping
File size: 4,720 Bytes
a05fede b236948 a05fede b236948 a05fede 2763883 b236948 7af49e8 b236948 2763883 a05fede 2763883 a05fede 2763883 7af49e8 2763883 7af49e8 2763883 b236948 2763883 a05fede 2763883 a05fede 2763883 a05fede 2763883 a05fede 2763883 a05fede 2763883 a05fede 2763883 a05fede 2763883 a05fede 2763883 a05fede 2763883 a05fede 2763883 a05fede 2763883 a05fede b236948 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
from huggingface_hub import InferenceClient
import gradio as gr
import base64
from PIL import Image
import io
def image_to_data_url(image_path):
if image_path is None:
return None
with Image.open(image_path) as img:
buffered = io.BytesIO()
img_format = img.format if img.format else "JPEG"
img.save(buffered, format=img_format)
img_str = base64.b64encode(buffered.getvalue()).decode()
return f"data:image/{img_format.lower()};base64,{img_str}"
def process_input(image, image_url, prompt, model, hf_token):
if not hf_token.startswith("hf_"):
raise gr.Error("Invalid Hugging Face token. It should start with 'hf_'")
client = InferenceClient(
api_key=hf_token,
provider="cohere"
)
image_data = None
if image is not None:
image_data = image_to_data_url(image)
elif image_url:
image_data = image_url
if not image_data:
raise gr.Error("Please provide either an image upload or image URL")
messages = [{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": {"url": image_data}}
]
}]
try:
stream = client.chat.completions.create(
model=model,
messages=messages,
max_tokens=8000,
stream=True,
)
full_response = ""
for chunk in stream:
if hasattr(chunk.choices[0], 'delta') and hasattr(chunk.choices[0].delta, 'content'):
content = chunk.choices[0].delta.content or ""
full_response += content
yield full_response
elif hasattr(chunk, 'content'):
content = chunk.content or ""
full_response += content
yield full_response
except Exception as e:
raise gr.Error(f"API Error: {str(e)}")
models = [
"CohereLabs/aya-vision-32b",
"CohereLabs/aya-vision-8b",
]
with gr.Blocks() as demo:
gr.Markdown("""
# π Aya-Vision Model Interface
*Explore state-of-the-art vision-language models by Cohere through this interface.
Supports image inputs via upload or URL, with streaming responses.*
Read more about Aya Vision [here](https://cohere.com/research/aya)
**Get your HF token:** [Hugging Face Settings](https://huggingface.co/settings/tokens)
""")
with gr.Row():
with gr.Column():
hf_token = gr.Textbox(
label="Hugging Face Token",
type="password",
placeholder="hf_XXXXXXXXXXXXXX",
info="Token is used temporarily for the request"
)
model_choice = gr.Dropdown(
label="Model Selection",
choices=models,
value=models[0]
)
with gr.Tab("Upload Image"):
image_input = gr.Image(
label="Upload Image",
type="filepath",
sources=["upload"]
)
with gr.Tab("Image URL"):
image_url = gr.Textbox(
label="Image URL",
placeholder="https://example.com/image.jpg",
)
prompt = gr.Textbox(
label="Prompt",
value="Describe this image in one sentence.",
lines=3
)
submit_btn = gr.Button("Generate", variant="primary")
with gr.Column():
output = gr.Textbox(
label="Model Response",
interactive=False,
lines=10,
autoscroll=True
)
submit_btn.click(
fn=process_input,
inputs=[image_input, image_url, prompt, model_choice, hf_token],
outputs=output,
concurrency_limit=None
)
gr.Examples(
examples=[
[
None,
"https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
"Describe this image in one sentence.",
models[0],
""
],
[
None,
"https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png",
"What is unique about this image format?",
models[1],
""
]
],
inputs=[image_input, image_url, prompt, model_choice, hf_token],
label="Try these examples:"
)
if __name__ == "__main__":
demo.launch() |