import gradio as gr
import os
import requests
import json
from PIL import Image
from io import BytesIO
import base64
from mistralai import Mistral


def ocr_with_pixtral(image):
    print(image)

    img_byte_arr = BytesIO()
    image.save(img_byte_arr, format='PNG')
    img_bytes = img_byte_arr.getvalue()

    img_base64 = base64.b64encode(img_bytes).decode('utf-8')

    # Retrieve the API key from environment variables
    api_key = os.getenv('mistral_api_key')

    # Specify model
    model = "pixtral-12b-2409"

    # Initialize the Mistral client
    client = Mistral(api_key=api_key)

    # Define the messages for the chat
    messages = [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "What's in this image?"
                },
                {
                    "type": "image_url",
                    "image_url": f"data:image/jpeg;base64,{img_base64}"
                }
            ]
        }
    ]

    # Get the chat response
    chat_response = client.chat.complete(
        model=model,
        messages=messages
    )

    return chat_response.choices[0].message.content, chat_response.choices[0].message.content


# Create the Gradio interface
with gr.Blocks(title="Receipt Scanner") as app:
    gr.Markdown("# Receipt Scanner")
    gr.Markdown("Upload your receipt and get the extracted text in structured form.")

    with gr.Row():
        with gr.Column():
            input_image = gr.Image(type="pil", label="Upload Image")
            submit_button = gr.Button("Extract Text")

        with gr.Column():
            output_textbox = gr.Textbox(label="Extracted Text", max_lines=5)
            output_markdown = gr.Markdown(label="Extracted Text")

    submit_button.click(fn=ocr_with_pixtral, inputs=input_image, outputs=[output_textbox, output_markdown])

    gr.Markdown("## Instructions")
    gr.Markdown("1. Upload an image containing text")
    gr.Markdown("2. Click 'Extract Text' to process the image")
    gr.Markdown("3. View the extracted text in the output box")

# Launch the app
if __name__ == "__main__":
    app.launch(debug=True)