Spaces:

bakhil-aissa
/

invoice_parser_yolo_paddleocr

Build error

App Files Files Community

invoice_parser_yolo_paddleocr / app.py

bakhil-aissa

Upload 4 files

35311e9 verified 6 months ago

raw

history blame contribute delete

2.17 kB

	import gradio as gr
	from torch import device
	from ultralytics import YOLO
	import os
	from PIL import Image
	from ocr import get_text_from_bbox


	model = YOLO("best.pt")
	model = model.to('cpu')


	names = {0: 'Discount_Percentage', 1: 'Due_Date', 2: 'Email_Client', 3: 'Name_Client', 4: 'Products', 5: 'Remise', 6: 'Subtotal', 7: 'Tax', 8: 'Tax_Precentage', 9: 'Tel_Client', 10: 'billing address', 11: 'header', 12: 'invoice date', 13:
	'invoice number', 14: 'shipping address', 15: 'total'}


	def predict(image, img_size=640):
	image = image.convert("RGB")
	w, h = image.size
	org_image = image.copy() # Keep original image for annotation

	# Calculate correct scale factors: how much the image was scaled DOWN
	scale_x = w / img_size # e.g., if w=1600, img_size=640, scale_x = 0.4
	scale_y = h /img_size # e.g., if h=1131, img_size=640, scale_y = 0.566
	scale = (scale_x, scale_y)



	image = image.resize((img_size, img_size)) # Resize image to 640x640
	results = model.predict(image, conf=0.4,imgsz=img_size, device='cpu', verbose=False)
	annotated_image = results[0].plot()

	# Get bounding boxes and class indices
	bboxes = results[0].boxes.xyxy.numpy()
	cls_indices = results[0].boxes.cls.numpy()

	# Extract text from detected regions using original image
	extracted_text = get_text_from_bbox(org_image, bboxes, cls_indices, names,scale)

	# Get the annotated image
	return Image.fromarray(annotated_image), extracted_text

	with gr.Blocks() as demo:
	gr.Markdown("# Invoice Detection with YOLOv11 + paddleOCR")
	gr.Markdown("Upload an invoice image to detect and annotate bounding boxes.")

	with gr.Row():
	with gr.Column():
	image_input = gr.Image(label="Upload Invoice Image", type="pil")
	predict_button = gr.Button("Detect")
	with gr.Column():
	output_image = gr.Image(label="Annotated Image", type="pil")

	with gr.Row():
	json_output = gr.JSON(label="Extracted Text (JSON)", visible=True)

	predict_button.click(predict, inputs=image_input, outputs=[output_image, json_output])

	demo.launch()