import gradio as gr
from huggingface_hub import InferenceClient
import os
import base64
from io import BytesIO
from PIL import Image

# Configuración del Cliente de Hugging Face
# ¡OJO, MI REY! Necesitas tu HF_TOKEN en los "Secrets" del Space
# Si no lo tienes, consíguelo en tu perfil de HF -> Settings -> Access Tokens
HF_TOKEN = os.getenv("HF_TOKEN")
# Usamos un modelo potente de visión: Llama-3.2-11B-Vision-Instruct
MODEL_ID = "meta-llama/Llama-3.2-11B-Vision-Instruct"

client = InferenceClient(model=MODEL_ID, token=HF_TOKEN)

def image_to_base64(image):
    # Convertimos la imagen a base64 para que la API la entienda
    buffered = BytesIO()
    image.save(buffered, format="JPEG")
    img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
    return f"data:image/jpeg;base64,{img_str}"

def chismear_con_la_foto(imagen, pregunta):
    if imagen is None:
        return "¡No inventes, BATUTO! Sin foto no hay paraíso. Sube una."
    
    if not pregunta:
        pregunta = "Describe esta imagen detalladamente."

    try:
        # Preparamos la imagen para mandarla
        img_url = image_to_base64(imagen)

        # Armamos el mensaje estilo chat
        messages = [
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": pregunta},
                    {"type": "image_url", "image_url": {"url": img_url}}
                ]
            }
        ]

        # Le pedimos al modelo que suelte la sopa
        completion = client.chat.completions.create(
            model=MODEL_ID,
            messages=messages,
            max_tokens=500
        )
        
        return completion.choices[0].message.content

    except Exception as e:
        return f"¡Chale, se nos cayó el sistema! Error: {str(e)}"

# Interfaz visual al estilo BATUTO-ART
with gr.Blocks() as demo:
    gr.Markdown("# 🎨 BATUTO-ART Vision (Hugging Face Edition)")
    gr.Markdown("### Motor: Llama 3.2 Vision | Pura calidad, papá.")

    with gr.Row():
        with gr.Column():
            img_input = gr.Image(type="pil", label="La evidencia (Foto)")
            # Agregamos un campo de texto para chatear o preguntar cosas específicas
            txt_input = gr.Textbox(
                label="¿Qué quieres saber?", 
                placeholder="Ej: ¿Qué ingredientes se ven ahí? o Describe la imagen.",
                value="Describe esta imagen con estilo."
            )
            btn = gr.Button("¡Pregúntale al modelo!", variant="primary")
        
        with gr.Column():
            output = gr.Textbox(label="Respuesta del Modelo", interactive=False, lines=12)

    gr.Markdown("--- \n**Desarrollado con todo el flow para BATUTO-ART**")

    btn.click(fn=chismear_con_la_foto, inputs=[img_input, txt_input], outputs=output)

if __name__ == "__main__":
    demo.launch(theme=gr.themes.Soft(primary_hue="amber"))