invoice-ocr-api / model_utils.py
Namra-Satva's picture
Update model_utils.py
c998c72 verified
import cv2
import pytesseract
import re
from PIL import Image
from ultralytics import YOLO
# Path to your trained YOLO model
MODEL_PATH = "yolov8m_invoiceOCR.pt"
# YOLO class names (order matters)
class_names = [
"Discount_Percentage", "Due_Date", "Email_Client", "Name_Client", "Products",
"Remise", "Subtotal", "Tax", "Tax_Precentage", "Tel_Client", "billing address",
"header", "invoice date", "invoice number", "shipping address", "total"
]
# Load YOLOv8 model
model = YOLO(MODEL_PATH)
def initialize_data_dict():
return {label: [] if label == "Products" else "" for label in class_names}
def parse_products(raw_text):
structured = []
lines = raw_text.split('\n')
for line in lines:
match = re.match(r"(\d+)\s+(.*)\s+([\d,]+\.\d{2})\s+([\d,]+\.\d{2})", line)
if match:
qty, desc, unit_price, amount = match.groups()
structured.append({
"qty": qty,
"description": desc.strip(),
"unit_price": unit_price,
"amount": amount
})
elif line.strip():
structured.append({
"qty": 0,
"description": line.strip(),
"unit_price": 0,
"amount": 0
})
return structured
def extract_invoice_data_from_image(image_path: str):
image_bgr = cv2.imread(image_path)
image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
pil_img = Image.fromarray(image_rgb)
results = model(image_path)[0]
data = initialize_data_dict()
for box in results.boxes:
x1, y1, x2, y2 = map(int, box.xyxy[0])
cls_id = int(box.cls[0])
label = class_names[cls_id]
cropped_img = pil_img.crop((x1, y1, x2, y2))
extracted_text = pytesseract.image_to_string(cropped_img, config='--psm 6').strip()
if label == "Products" and extracted_text:
structured_products = parse_products(extracted_text)
data["Products"].extend(structured_products)
elif extracted_text:
data[label] = extracted_text
return data