import gradio as gr
from huggingface_hub import InferenceClient
import base64
from io import BytesIO

def encode_image(image):
    buffered = BytesIO()
    image.save(buffered, format="JPEG")
    return f"data:image/jpeg;base64,{base64.b64encode(buffered.getvalue()).decode('utf-8')}"

def caption_image(image, oauth_token: gr.OAuthToken | None):
    # Check if user is logged in
    if oauth_token is None:
        return "Please log in using the button on the sidebar to use the model."
    
    if image is None:
        return "Please upload an image."
    
    # Initialize client with the user's OAuth token
    client = InferenceClient(token=oauth_token.token)
    base64_image = encode_image(image)
    
    try:
        completion = client.chat.completions.create(
            model="zai-org/GLM-4.6V-Flash:novita",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": "Describe this image in one sentence."
                        },
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": base64_image
                            }
                        }
                    ]
                }
            ],
        )
        
        return completion.choices[0].message.content
    
    except Exception as e:
        return f"Error: {str(e)}"

# Define the Interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    with gr.Sidebar():
        gr.Markdown("### Authentication")
        gr.LoginButton()
        gr.Markdown("Logging in allows you to use your own HF quota for the Inference API.")

    gr.Markdown("# 📸 GLM Image Captioner (OAuth)")
    
    with gr.Row():
        with gr.Column():
            input_img = gr.Image(type="pil", label="Upload Image")
            submit_btn = gr.Button("Generate Caption", variant="primary")
        with gr.Column():
            output_text = gr.Textbox(label="Caption", lines=10)
    
    # Pass the image and the implicit OAuth token to the function
    submit_btn.click(
        fn=caption_image, 
        inputs=[input_img], 
        outputs=[output_text]
    )

if __name__ == "__main__":
    demo.launch()