#!/usr/bin/env python3
import os
import json
import base64
import requests
import gradio as gr
from PIL import Image
from io import BytesIO

# Get environment variables from HF Spaces secrets
ENDPOINT = os.environ.get("VLLM_ENDPOINT")
MODEL = os.environ.get("VLLM_MODEL")

if not ENDPOINT or not MODEL:
    raise ValueError("VLLM_ENDPOINT and VLLM_MODEL environment variables must be set. Please add them as secrets in your Space settings.")


def image_to_base64(image):
    """Convert PIL Image to base64 string."""
    buffered = BytesIO()
    image.save(buffered, format="PNG")
    return base64.b64encode(buffered.getvalue()).decode("utf-8")


def process_image(image, temperature):
    """
    Send image to vLLM endpoint and stream the response.
    """
    if image is None:
        return "Please upload an image first."
    
    # Convert image to base64
    b64_image = image_to_base64(image)
    
    # Build the payload with only image input (no text prompt)
    payload = {
        "model": MODEL,
        "messages": [
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": ""},
                    {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64_image}"}}
                ]
            }
        ],
        "temperature": temperature,
        "stream": True
    }

    try:
        response = requests.post(
            ENDPOINT,
            headers={"Content-Type": "application/json"},
            data=json.dumps(payload),
            stream=True
        )
        response.raise_for_status()

        accumulated_response = ""
        
        for line in response.iter_lines():
            if line:
                line = line.decode('utf-8')
                if line.startswith('data: '):
                    line = line[6:]  # Remove 'data: ' prefix
                    
                if line.strip() == '[DONE]':
                    break
                    
                try:
                    chunk = json.loads(line)
                    if 'choices' in chunk and len(chunk['choices']) > 0:
                        delta = chunk['choices'][0].get('delta', {})
                        content = delta.get('content', '')
                        if content:
                            accumulated_response += content
                            yield accumulated_response
                except json.JSONDecodeError:
                    continue
                    
    except Exception as e:
        yield f"Error: {str(e)}"


# Build the Gradio Interface
with gr.Blocks(title="📖 Image OCR", theme=gr.themes.Soft()) as demo:
    gr.Markdown(
        """
        # 📖 Image to Text Extraction
        **💡 How to use:**
        1. Upload an image using the upload box
        2. Adjust temperature if needed
        3. Click "Extract Text" to process
        
        The model will extract and format text from your image.
        """
    )
    
    with gr.Row():
        with gr.Column(scale=1):
            image_input = gr.Image(
                type="pil",
                label="🖼️ Upload Image",
                sources=["upload", "clipboard"]
            )
            temperature = gr.Slider(
                minimum=0.1,
                maximum=1.0,
                value=0.15,
                step=0.05,
                label="Temperature"
            )
            submit_btn = gr.Button("Extract Text", variant="primary")
            clear_btn = gr.Button("Clear", variant="secondary")
        
        with gr.Column(scale=2):
            output_text = gr.Markdown(
                label="📄 Extracted Text",
                value="<div style='min-height: 400px; padding: 10px; border: 1px solid #e0e0e0; border-radius: 4px; background-color: #f9f9f9;'><em>Extracted text will appear here...</em></div>"
            )
    
    with gr.Row():
        raw_output = gr.Textbox(
            label="Raw Output",
            placeholder="Raw text will appear here...",
            lines=10,
            show_copy_button=True
        )
    
    # Event handlers
    submit_btn.click(
        fn=lambda img, temp: (process_image(img, temp), process_image(img, temp)),
        inputs=[image_input, temperature],
        outputs=[output_text, raw_output]
    )
    
    clear_btn.click(
        fn=lambda: (None, "", ""),
        outputs=[image_input, output_text, raw_output]
    )
    
    gr.Markdown("""
    ---
    **Note:** Configure endpoint via `VLLM_ENDPOINT` and `VLLM_MODEL` environment variables.
    """)


if __name__ == "__main__":
    demo.launch()