from huggingface_hub import InferenceClient
import gradio as gr
import base64
from PIL import Image
import io

client = InferenceClient(
    provider="cohere",
    api_key="HF_TOKEN", # Will add soon as soon as I get the auth working
)

def image_to_data_url(image_path):
    if image_path is None:
        return None
    with Image.open(image_path) as img:
        buffered = io.BytesIO()
        img.save(buffered, format=img.format)
        img_str = base64.b64encode(buffered.getvalue()).decode()
        return f"data:image/{img.format.lower()};base64,{img_str}"

def process_input(image, image_url, prompt, model):
    image_data = None
    if image is not None:
        image_data = image_to_data_url(image)
    elif image_url:
        image_data = image_url
    
    if not image_data:
        raise gr.Error("Please provide either an image upload or image URL")
    
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt},
                {"type": "image_url", "image_url": {"url": image_data}}
            ]
        }
    ]
    
    stream = client.chat.completions.create(
        model=model,
        messages=messages,
        max_tokens=512,
        stream=True,
    )
    
    full_response = ""
    for chunk in stream:
        content = chunk.choices[0].delta.content or ""
        full_response += content
        yield full_response

models = [
    "CohereLabs/aya-vision-32b",
    "CohereLabs/aya-vision-8b",
]

with gr.Blocks() as demo:
    gr.Markdown("# Cohere Aya Vision model UI")
    
    with gr.Row():
        with gr.Column():
            model_choice = gr.Dropdown(
                label="Select Model",
                choices=models,
                value=models[0],
                interactive=True
            )
            
            with gr.Tab("Upload Image"):
                image_input = gr.Image(
                    label="Upload Image",
                    type="filepath",
                    sources=["upload"]
                )
            with gr.Tab("Image URL"):
                image_url = gr.Textbox(
                    label="Image URL",
                    placeholder="Paste image URL here...",
                    value=""
                )
            
            prompt = gr.Textbox(
                label="Prompt",
                value="Describe this image in one sentence.",
                interactive=True
            )
            submit_btn = gr.Button("Generate", variant="primary")
        
        with gr.Column():
            output = gr.Textbox(
                label="Model Response",
                interactive=False,
                lines=10,
                autoscroll=True
            )

    submit_btn.click(
        fn=process_input,
        inputs=[image_input, image_url, prompt, model_choice],
        outputs=output,
        concurrency_limit=None
    )

    gr.Examples(
        examples=[
            [
                None,
                "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
                "Describe this image in one sentence.",
                models[0]
            ],
            [
                None,
                "https://upload.wikimedia.org/wikipedia/commons/thumb/4/4d/Cat_November_2010-1a.jpg/1200px-Cat_November_2010-1a.jpg",
                "What is the main subject of this image?",
                models[1]
            ]
        ],
        inputs=[image_input, image_url, prompt, model_choice],
        label="Example Inputs"
    )

if __name__ == "__main__":
    demo.queue().launch()