Spaces:

valenynl
/

ReceiptSplitAI

Running

File size: 7,998 Bytes

import json
import os
import time

from loguru import logger
from openai import OpenAI

from common.enum.ai_service_error import AiServiceError
from common.exceptions import AiServiceException
from common.utils import encode_image_to_webp_base64
from image_processing_interface import ImageProcessingInterface


class OpenAIService(ImageProcessingInterface):
    _instance = None

    TOOLS = [
        {
            "type": "function",
            "function": {
                "name": "parse_image",
                "description": "Parses receipt data from image into a structured JSON format.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "store_name": {"type": "string"},
                        "country": {"type": "string"},
                        "receipt_type": {"type": "string"},
                        "address": {"type": "string"},
                        "datetime": {"type": "string"},
                        "currency": {"type": "string"},
                        "sub_total_amount": {"type": "number"},
                        "total_price": {"type": "number"},
                        "total_discount": {"type": "number"},
                        "all_items_price_with_tax": {"type": "boolean"},
                        "payment_method": {
                            "type": "string",
                            "enum": ["card", "cash", "unknown"]
                        },
                        "rounding": {"type": "number"},
                        "tax": {"type": "number"},
                        "taxes_not_included_sum": {"type": "number"},
                        "tips": {"type": "number"},
                        "items": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "properties": {
                                    "name": {"type": "string"},
                                    "unit_price": {"type": "number"},
                                    "quantity": {"type": "number"},
                                    "measurement_unit": {"type": "string"},
                                    "total_price_without_discount": {"type": "number"},
                                    "total_price_with_discount": {"type": "number"},
                                    "discount": {"type": "number"},
                                    "category": {"type": "string"},
                                    "item_price_with_tax": {"type": "boolean"},
                                },
                                "required": ["name", "unit_price", "quantity", "total_price_without_discount"]
                            }
                        },
                        "taxs_items": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "properties": {
                                    "tax_name": {"type": "string"},
                                    "percentage": {"type": "number"},
                                    "tax_from_amount": {"type": "number"},
                                    "tax": {"type": "number"},
                                    "total": {"type": "number"},
                                    "tax_included": {"type": "boolean"},
                                },
                            }
                        }

                    },
                    "required": ["total_price", "items"]
                }
            }
        }
    ]

    def __new__(cls, *args, **kwargs):
        if cls._instance is None:
            cls._instance = super(OpenAIService, cls).__new__(cls)
        return cls._instance

    def __init__(self, api_key=None):
        if not hasattr(self, "_initialized"):
            self.api_key = api_key or os.environ.get("OPENAI_API_KEY")
            if self.api_key:
                logger.info("OPENAI_API_KEY was found.")
            else:
                raise ValueError("OPENAI_API_KEY not found.")

            self.client = OpenAI(api_key=self.api_key)
            self._initialized = True

    def process_image(self, input_image64, model_name, prompt, system="You are a receipt recognizer!", temperature=0.0):
        if not input_image64:
            raise ValueError("No image provided.")

        try:
            start_time = time.time()

            response = self.client.chat.completions.create(
                model=model_name,
                messages=[
                    {"role": "system", "content": system},
                    {"role": "user", "content": [
                        {"type": "text", "text": prompt},
                        {"type": "image_url", "image_url": {
                            "url": f"data:image/webp;base64,{input_image64}"}
                         }
                    ]}
                ],
                tools=self.TOOLS,
                temperature=temperature,
                response_format={"type": "json_object"}
            )

            end_time = time.time()
            logger.info(f"Recognition spent {end_time - start_time:.2f} seconds.")

            # Extract the function call result
            if not response.choices:
                raise ValueError("The API response does not contain valid choices.")

            choice = response.choices[0]
            if not choice.message.tool_calls:
                raise ValueError(choice.message.content or "No tool calls found in the API response.")

            function_call = choice.message.tool_calls[0]

            if not (function_call.function and function_call.function.arguments):
                raise ValueError("No valid function call data found in the API response.")

            logger.debug(f"Raw API Response: {function_call.function.arguments}")

            try:
                json_content = json.loads(function_call.function.arguments)
            except json.JSONDecodeError:
                error_message = f"The receipt could not be recognized. Please retake the photo."
                logger.error(error_message)
                raise AiServiceException(AiServiceError.RETAKE_PHOTO, error_message)

            if not self._validate_receipt_data(json_content):
                error_message = f"The receipt is empty or contains no valid items. Please ensure the receipt is correctly scanned and try again"
                logger.error(error_message)
                raise AiServiceException(AiServiceError.RETAKE_PHOTO, error_message)

            json_content = self._add_total_price(json_content)
            json_content = self._add_discount_item(json_content)
            json_content = self._add_rounding_item(json_content)

            # Add token usage information
            json_content['input_tokens'] = response.usage.prompt_tokens
            json_content['output_tokens'] = response.usage.completion_tokens
            json_content['total_tokens'] = response.usage.total_tokens
            json_content['time'] = end_time - start_time

            model_input = {
                "system": system,
                "prompt": prompt
            }

            return json.dumps(json_content, indent=4), model_input

        except Exception as e:
            raise RuntimeError(f"Failed to process the image: {str(e)}")

if __name__ == "__main__":
    try:
        processor = OpenAIService()

        # Image processing
        image_path = "./examples/fatlouis.webp"
        input_image64 = encode_image_to_webp_base64(image_path)

        system = "You are a receipt recognizer."
        with open('common/prompt_v1.txt', 'r', encoding='utf-8') as file:
            prompt = file.read()
        result = processor.process_image(input_image64, "gpt-4o-mini", prompt, system, 0.0)

        print(f'Image processing result: {result}')

    except Exception as e:
        print(f"Error: {e}")