Spaces:

Phong1
/

Multimodal-StreamLit-Demo

Build error

App Files Files Community

Multimodal-StreamLit-Demo / src /modules /image_process.py

Phong1

Update src/modules/image_process.py

efbe6dc verified 9 months ago

raw

history blame contribute delete

4.46 kB

	from dotenv import load_dotenv
	import os
	import pandas as pd
	import json
	from google.cloud import vision
	import google.generativeai as genai
	from google.oauth2 import service_account
	import re
	from pathlib import Path

	# Initialized Modules
	from modules.mapping import mapping_employee, mapping_merchant, mapping_product, mapping_unit
	from modules.formatting import format_date

	# # Load the .env from the parent directory of this file
	# env_path = Path(__file__).resolve().parent.parent / ".env"
	# load_dotenv(dotenv_path=env_path)

	load_dotenv()
	# Load the credential for Cloud-Vision-API model
	service_account_info_str = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
	service_account_info = json.loads(service_account_info_str)
	CREDENTIALS = service_account.Credentials.from_service_account_info(service_account_info)
	# Load the Gemini model
	GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
	MODEL_NAME = os.getenv("MODEL_NAME")
	genai.configure(api_key=GEMINI_API_KEY)

	# Gemini Model
	LLM_model = genai.GenerativeModel(MODEL_NAME)

	# Line Split Function
	def line():
	print("=" * 30)

	# Image to raw text
	def process_ocr(image_path):
	try:
	client = vision.ImageAnnotatorClient(credentials=CREDENTIALS)

	with open(image_path, "rb") as image_file:
	content = image_file.read()

	image = vision.Image(content=content)
	response = client.document_text_detection(image=image)

	# Extract detected text
	texts = response.text_annotations
	return texts[0].description if texts else ""
	except Exception as e:
	print(f"OCR failed: {e}")
	return ""

	# Parsing image-text
	def parse_image_text(text, extract_model):
	prompt = f"""
	Dưới đây là nội dung hóa đơn bằng tiếng Việt. Hãy trích xuất tên đại lý mua (seller), tên đại lý bán (buyer), tên sản phẩm (product_name), đơn vị tính (unit), số lượng theo từng đơn hàng (quantity), ngày đặt hàng (order_date).

	Văn bản:
	{text}

	Trả về kết quả dạng JSON:
	{{
	"order_1": {{
	"seller": "...",
	"buyer": "...",
	"product_name": "...",
	"unit": "...",
	"quantity": "...",
	"order_date": "..."
	}},
	...
	}}
	"""
	response = extract_model.generate_content(prompt)

	try:
	content = response.text
	# Use regex to extract the JSON part
	match = re.search(r"\{[\s\S]*\}", content)
	if match:
	json_str = match.group(0)
	extracted_json = json.loads(json_str)

	# Format the date string
	for order in extracted_json.values():
	if "order_date" in order:
	order["order_date"] = format_date(date_str= order["order_date"])

	return list(extracted_json.values()) # List of orders
	else:
	raise ValueError("No valid JSON found in Gemini output")

	except Exception as e:
	print("Failed to parse JSON from LLM response:", e)
	return []

	# Image Handling Function
	def image_process(image_path, order_id):
	print(f"Start process image file: {os.path.basename(image_path)}")
	line()

	# Image to Text
	raw_text = process_ocr(image_path=image_path)
	print(f"Successfully extract raw text. Text: {raw_text}")
	line()

	# Text to JSON
	extracted_information = parse_image_text(
	text=raw_text,
	extract_model=LLM_model
	)
	print(f"Extracted Information.")
	line()

	# Mapping
	merchant_mapped_data = mapping_merchant(
	information=extracted_information,
	json_path=os.getenv("MERCHANT_JSON_PATH"),
	normalization_rule=os.getenv("NORMALIZATION_RULE_PATH")
	)

	unit_merchant_mapped_data = mapping_unit(
	information=merchant_mapped_data,
	json_path=os.getenv("UNIT_JSON_PATH"),
	normalization_rule=os.getenv("NORMALIZATION_RULE_PATH")
	)

	product_unit_merchant_mapped_data = mapping_product(
	information= unit_merchant_mapped_data,
	json_path= os.getenv("PRODUCT_JSON_PATH"),
	normalization_rule= os.getenv("NORMALIZATION_RULE_PATH")
	)

	# Skipping employee
	processed_data = product_unit_merchant_mapped_data

	# Assign order id
	for item in processed_data:
	item["order_id"] = order_id

	print(f"Successfully mapped data (merchant + unit).")
	line()

	return processed_data