import whisper from dotenv import load_dotenv import os from rapidfuzz import process, fuzz import pandas as pd import json import google.generativeai as genai import re from pathlib import Path # Initialized Modules from modules.mapping import mapping_employee, mapping_merchant, mapping_product, mapping_unit from modules.formatting import format_date # # Load the .env from the parent directory of this file # env_path = Path(__file__).resolve().parent.parent / ".env" # load_dotenv(dotenv_path=env_path) load_dotenv() # Trancribe Model: Whisper transcribe_model = whisper.load_model("turbo", download_root= os.getenv("XDG_CACHE_HOME", "/app/.cache")) # Load the Gemini model GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") MODEL_NAME = os.getenv("MODEL_NAME") genai.configure(api_key=GEMINI_API_KEY) # Gemini Model LLM_model = genai.GenerativeModel(MODEL_NAME) # Line Split Function def line(): print("=" * 30) # Audio to raw text def process_audio(audio_path, transcribe_model): try: transcript = transcribe_model.transcribe(audio_path) return transcript["text"] except Exception as e: print(f"Trancribe failed: {e}") return "" # Parsing audio-text def parse_audio_text(text, extract_model): prompt = f""" Dưới đây là nội dung hóa đơn bằng tiếng Việt. Hãy trích xuất tên đại lý mua (seller), tên đại lý bán (buyer), tên sản phẩm (product_name), đơn vị tính (unit), số lượng theo từng đơn hàng (quantity), ngày đặt hàng (order_date). Văn bản: {text} Trả về kết quả dạng JSON: {{ "order_1": {{ "seller": "...", "buyer": "...", "product_name": "...", "unit": "...", "quantity": "...", "order_date": "..." }}, ... }} """ response = extract_model.generate_content(prompt) try: content = response.text # Use regex to extract the JSON part match = re.search(r"\{[\s\S]*\}", content) if match: json_str = match.group(0) extracted_json = json.loads(json_str) # Format the date string for order in extracted_json.values(): if "order_date" in order: order["order_date"] = format_date(date_str= order["order_date"]) return list(extracted_json.values()) # List of orders else: raise ValueError("No valid JSON found in Gemini output") except Exception as e: print("Failed to parse JSON from LLM response:", e) return [] # Audio Handling Function def audio_process(audio_path, order_id): print(f"Start process audio file: {os.path.basename(audio_path)}") line() # Audio to Text raw_text = process_audio( audio_path=audio_path, transcribe_model=transcribe_model ) print(f"Transcript is done. Transcription: {raw_text}") line() # Text to JSON extracted_information = parse_audio_text( text=raw_text, extract_model=LLM_model ) print(f"Extracted Information.") line() # Mapping merchant_mapped_data = mapping_merchant( information=extracted_information, json_path=os.getenv("MERCHANT_JSON_PATH"), normalization_rule=os.getenv("NORMALIZATION_RULE_PATH") ) unit_merchant_mapped_data = mapping_unit( information=merchant_mapped_data, json_path=os.getenv("UNIT_JSON_PATH"), normalization_rule=os.getenv("NORMALIZATION_RULE_PATH") ) product_unit_merchant_mapped_data = mapping_product( information= unit_merchant_mapped_data, json_path= os.getenv("PRODUCT_JSON_PATH"), normalization_rule= os.getenv("NORMALIZATION_RULE_PATH") ) # Skipping employee processed_data = product_unit_merchant_mapped_data # Assign order id for item in processed_data: item["order_id"] = order_id print(f"Successfully mapped data (merchant + unit).") line() return processed_data