Spaces:
Build error
Build error
File size: 4,068 Bytes
51db8d1 547b0f2 51db8d1 9a34674 547b0f2 9a34674 51db8d1 4e733b3 51db8d1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import whisper
from dotenv import load_dotenv
import os
from rapidfuzz import process, fuzz
import pandas as pd
import json
import google.generativeai as genai
import re
from pathlib import Path
# Initialized Modules
from modules.mapping import mapping_employee, mapping_merchant, mapping_product, mapping_unit
from modules.formatting import format_date
# # Load the .env from the parent directory of this file
# env_path = Path(__file__).resolve().parent.parent / ".env"
# load_dotenv(dotenv_path=env_path)
load_dotenv()
# Trancribe Model: Whisper
transcribe_model = whisper.load_model("turbo", download_root= os.getenv("XDG_CACHE_HOME", "/app/.cache"))
# Load the Gemini model
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
MODEL_NAME = os.getenv("MODEL_NAME")
genai.configure(api_key=GEMINI_API_KEY)
# Gemini Model
LLM_model = genai.GenerativeModel(MODEL_NAME)
# Line Split Function
def line():
print("=" * 30)
# Audio to raw text
def process_audio(audio_path, transcribe_model):
try:
transcript = transcribe_model.transcribe(audio_path)
return transcript["text"]
except Exception as e:
print(f"Trancribe failed: {e}")
return ""
# Parsing audio-text
def parse_audio_text(text, extract_model):
prompt = f"""
Dưới đây là nội dung hóa đơn bằng tiếng Việt. Hãy trích xuất tên đại lý mua (seller), tên đại lý bán (buyer), tên sản phẩm (product_name), đơn vị tính (unit), số lượng theo từng đơn hàng (quantity), ngày đặt hàng (order_date).
Văn bản:
{text}
Trả về kết quả dạng JSON:
{{
"order_1": {{
"seller": "...",
"buyer": "...",
"product_name": "...",
"unit": "...",
"quantity": "...",
"order_date": "..."
}},
...
}}
"""
response = extract_model.generate_content(prompt)
try:
content = response.text
# Use regex to extract the JSON part
match = re.search(r"\{[\s\S]*\}", content)
if match:
json_str = match.group(0)
extracted_json = json.loads(json_str)
# Format the date string
for order in extracted_json.values():
if "order_date" in order:
order["order_date"] = format_date(date_str= order["order_date"])
return list(extracted_json.values()) # List of orders
else:
raise ValueError("No valid JSON found in Gemini output")
except Exception as e:
print("Failed to parse JSON from LLM response:", e)
return []
# Audio Handling Function
def audio_process(audio_path, order_id):
print(f"Start process audio file: {os.path.basename(audio_path)}")
line()
# Audio to Text
raw_text = process_audio(
audio_path=audio_path,
transcribe_model=transcribe_model
)
print(f"Transcript is done. Transcription: {raw_text}")
line()
# Text to JSON
extracted_information = parse_audio_text(
text=raw_text,
extract_model=LLM_model
)
print(f"Extracted Information.")
line()
# Mapping
merchant_mapped_data = mapping_merchant(
information=extracted_information,
json_path=os.getenv("MERCHANT_JSON_PATH"),
normalization_rule=os.getenv("NORMALIZATION_RULE_PATH")
)
unit_merchant_mapped_data = mapping_unit(
information=merchant_mapped_data,
json_path=os.getenv("UNIT_JSON_PATH"),
normalization_rule=os.getenv("NORMALIZATION_RULE_PATH")
)
product_unit_merchant_mapped_data = mapping_product(
information= unit_merchant_mapped_data,
json_path= os.getenv("PRODUCT_JSON_PATH"),
normalization_rule= os.getenv("NORMALIZATION_RULE_PATH")
)
# Skipping employee
processed_data = product_unit_merchant_mapped_data
# Assign order id
for item in processed_data:
item["order_id"] = order_id
print(f"Successfully mapped data (merchant + unit).")
line()
return processed_data |