Phong1's picture
Update src/modules/audio_process.py
4e733b3 verified
import whisper
from dotenv import load_dotenv
import os
from rapidfuzz import process, fuzz
import pandas as pd
import json
import google.generativeai as genai
import re
from pathlib import Path
# Initialized Modules
from modules.mapping import mapping_employee, mapping_merchant, mapping_product, mapping_unit
from modules.formatting import format_date
# # Load the .env from the parent directory of this file
# env_path = Path(__file__).resolve().parent.parent / ".env"
# load_dotenv(dotenv_path=env_path)
load_dotenv()
# Trancribe Model: Whisper
transcribe_model = whisper.load_model("turbo", download_root= os.getenv("XDG_CACHE_HOME", "/app/.cache"))
# Load the Gemini model
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
MODEL_NAME = os.getenv("MODEL_NAME")
genai.configure(api_key=GEMINI_API_KEY)
# Gemini Model
LLM_model = genai.GenerativeModel(MODEL_NAME)
# Line Split Function
def line():
print("=" * 30)
# Audio to raw text
def process_audio(audio_path, transcribe_model):
try:
transcript = transcribe_model.transcribe(audio_path)
return transcript["text"]
except Exception as e:
print(f"Trancribe failed: {e}")
return ""
# Parsing audio-text
def parse_audio_text(text, extract_model):
prompt = f"""
Dưới đây là nội dung hóa đơn bằng tiếng Việt. Hãy trích xuất tên đại lý mua (seller), tên đại lý bán (buyer), tên sản phẩm (product_name), đơn vị tính (unit), số lượng theo từng đơn hàng (quantity), ngày đặt hàng (order_date).
Văn bản:
{text}
Trả về kết quả dạng JSON:
{{
"order_1": {{
"seller": "...",
"buyer": "...",
"product_name": "...",
"unit": "...",
"quantity": "...",
"order_date": "..."
}},
...
}}
"""
response = extract_model.generate_content(prompt)
try:
content = response.text
# Use regex to extract the JSON part
match = re.search(r"\{[\s\S]*\}", content)
if match:
json_str = match.group(0)
extracted_json = json.loads(json_str)
# Format the date string
for order in extracted_json.values():
if "order_date" in order:
order["order_date"] = format_date(date_str= order["order_date"])
return list(extracted_json.values()) # List of orders
else:
raise ValueError("No valid JSON found in Gemini output")
except Exception as e:
print("Failed to parse JSON from LLM response:", e)
return []
# Audio Handling Function
def audio_process(audio_path, order_id):
print(f"Start process audio file: {os.path.basename(audio_path)}")
line()
# Audio to Text
raw_text = process_audio(
audio_path=audio_path,
transcribe_model=transcribe_model
)
print(f"Transcript is done. Transcription: {raw_text}")
line()
# Text to JSON
extracted_information = parse_audio_text(
text=raw_text,
extract_model=LLM_model
)
print(f"Extracted Information.")
line()
# Mapping
merchant_mapped_data = mapping_merchant(
information=extracted_information,
json_path=os.getenv("MERCHANT_JSON_PATH"),
normalization_rule=os.getenv("NORMALIZATION_RULE_PATH")
)
unit_merchant_mapped_data = mapping_unit(
information=merchant_mapped_data,
json_path=os.getenv("UNIT_JSON_PATH"),
normalization_rule=os.getenv("NORMALIZATION_RULE_PATH")
)
product_unit_merchant_mapped_data = mapping_product(
information= unit_merchant_mapped_data,
json_path= os.getenv("PRODUCT_JSON_PATH"),
normalization_rule= os.getenv("NORMALIZATION_RULE_PATH")
)
# Skipping employee
processed_data = product_unit_merchant_mapped_data
# Assign order id
for item in processed_data:
item["order_id"] = order_id
print(f"Successfully mapped data (merchant + unit).")
line()
return processed_data