receipt_scanner / app.py
drlau's picture
Update app.py
1446f02 verified
import gradio as gr
import os
import requests
import json
from PIL import Image
from io import BytesIO
import base64
from mistralai import Mistral
def ocr_with_pixtral(image):
print(image)
img_byte_arr = BytesIO()
image.save(img_byte_arr, format='PNG')
img_bytes = img_byte_arr.getvalue()
img_base64 = base64.b64encode(img_bytes).decode('utf-8')
# Retrieve the API key from environment variables
api_key = os.getenv('mistral_api_key')
# Specify model
model = "pixtral-12b-2409"
# Initialize the Mistral client
client = Mistral(api_key=api_key)
# Define the messages for the chat
messages = [
{
"role": "user",
"content": [
{
"type": "text",
"text": "What's in this image?"
},
{
"type": "image_url",
"image_url": f"data:image/jpeg;base64,{img_base64}"
}
]
}
]
# Get the chat response
chat_response = client.chat.complete(
model=model,
messages=messages
)
return chat_response.choices[0].message.content, chat_response.choices[0].message.content
# Create the Gradio interface
with gr.Blocks(title="Receipt Scanner") as app:
gr.Markdown("# Receipt Scanner")
gr.Markdown("Upload your receipt and get the extracted text in structured form.")
with gr.Row():
with gr.Column():
input_image = gr.Image(type="pil", label="Upload Image")
submit_button = gr.Button("Extract Text")
with gr.Column():
output_textbox = gr.Textbox(label="Extracted Text", max_lines=5)
output_markdown = gr.Markdown(label="Extracted Text")
submit_button.click(fn=ocr_with_pixtral, inputs=input_image, outputs=[output_textbox, output_markdown])
gr.Markdown("## Instructions")
gr.Markdown("1. Upload an image containing text")
gr.Markdown("2. Click 'Extract Text' to process the image")
gr.Markdown("3. View the extracted text in the output box")
# Launch the app
if __name__ == "__main__":
app.launch(debug=True)