File size: 4,457 Bytes
51db8d1
 
 
 
 
 
 
 
547b0f2
51db8d1
 
 
 
 
efbe6dc
 
 
547b0f2
efbe6dc
51db8d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
from dotenv import load_dotenv
import os
import pandas as pd
import json
from google.cloud import vision
import google.generativeai as genai
from google.oauth2 import service_account
import re
from pathlib import Path

# Initialized Modules
from modules.mapping import mapping_employee, mapping_merchant, mapping_product, mapping_unit
from modules.formatting import format_date

# # Load the .env from the parent directory of this file
# env_path = Path(__file__).resolve().parent.parent / ".env"
# load_dotenv(dotenv_path=env_path)

load_dotenv()
# Load the credential for Cloud-Vision-API model
service_account_info_str = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
service_account_info = json.loads(service_account_info_str)
CREDENTIALS = service_account.Credentials.from_service_account_info(service_account_info)
# Load the Gemini model
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
MODEL_NAME = os.getenv("MODEL_NAME")
genai.configure(api_key=GEMINI_API_KEY)

# Gemini Model
LLM_model = genai.GenerativeModel(MODEL_NAME)

# Line Split Function
def line():
    print("=" * 30)

# Image to raw text
def process_ocr(image_path):
    try:
        client = vision.ImageAnnotatorClient(credentials=CREDENTIALS)

        with open(image_path, "rb") as image_file:
            content = image_file.read()

        image = vision.Image(content=content)
        response = client.document_text_detection(image=image)

        # Extract detected text
        texts = response.text_annotations
        return texts[0].description if texts else ""
    except Exception as e:
        print(f"OCR failed: {e}")
        return ""
    
# Parsing image-text
def parse_image_text(text, extract_model):
    prompt = f"""
    Dưới đây là nội dung hóa đơn bằng tiếng Việt. Hãy trích xuất tên đại lý mua (seller), tên đại lý bán (buyer), tên sản phẩm (product_name), đơn vị tính (unit), số lượng theo từng đơn hàng (quantity), ngày đặt hàng (order_date).

    Văn bản:
    {text}

    Trả về kết quả dạng JSON:
    {{
    "order_1": {{
        "seller": "...",
        "buyer": "...",
        "product_name": "...",
        "unit": "...",
        "quantity": "...",
        "order_date": "..."
    }},
    ...
    }}
    """
    response = extract_model.generate_content(prompt)

    try:
        content = response.text
        # Use regex to extract the JSON part
        match = re.search(r"\{[\s\S]*\}", content)
        if match:
            json_str = match.group(0)
            extracted_json = json.loads(json_str)

            # Format the date string
            for order in extracted_json.values():
                if "order_date" in order:
                    order["order_date"] = format_date(date_str= order["order_date"])

            return list(extracted_json.values())  # List of orders
        else:
            raise ValueError("No valid JSON found in Gemini output")

    except Exception as e:
        print("Failed to parse JSON from LLM response:", e)
        return []
    
# Image Handling Function
def image_process(image_path, order_id):
    print(f"Start process image file: {os.path.basename(image_path)}")
    line()

    # Image to Text
    raw_text = process_ocr(image_path=image_path)
    print(f"Successfully extract raw text. Text: {raw_text}")
    line()

    # Text to JSON
    extracted_information = parse_image_text(
        text=raw_text,
        extract_model=LLM_model
    )
    print(f"Extracted Information.")
    line()

    # Mapping
    merchant_mapped_data = mapping_merchant(
        information=extracted_information,
        json_path=os.getenv("MERCHANT_JSON_PATH"),
        normalization_rule=os.getenv("NORMALIZATION_RULE_PATH")
    )

    unit_merchant_mapped_data = mapping_unit(
        information=merchant_mapped_data,
        json_path=os.getenv("UNIT_JSON_PATH"),
        normalization_rule=os.getenv("NORMALIZATION_RULE_PATH")
    )

    product_unit_merchant_mapped_data = mapping_product(
        information= unit_merchant_mapped_data, 
        json_path= os.getenv("PRODUCT_JSON_PATH"),
        normalization_rule= os.getenv("NORMALIZATION_RULE_PATH")
        )

    # Skipping employee
    processed_data = product_unit_merchant_mapped_data

    # Assign order id
    for item in processed_data:
        item["order_id"] = order_id

    print(f"Successfully mapped data (merchant + unit).")
    line()

    return processed_data