import json from rapidfuzz import process, fuzz # Initialized Modules from modules.normalization import normalization # Mapping merchant def mapping_merchant(information, json_path, normalization_rule): # Load from merchant JSON file with open(json_path, "r", encoding="utf-8") as f: name_id_dict = json.load(f) # Create cached normalized dictionary normalized_cached_map = {normalization(text= k, normalization_rule= normalization_rule): v for k, v in name_id_dict.items()} name_list = list(normalized_cached_map.keys()) for item in information: # Match seller seller_name = normalization(text= item["seller"], normalization_rule= normalization_rule) seller_match, seller_score, _ = process.extractOne(seller_name, name_list, scorer= fuzz.token_sort_ratio) if seller_score >= 80: item["seller_id"] = normalized_cached_map[seller_match] else: item["seller_id"] = None # Match buyer buyer_name = normalization(text= item["buyer"], normalization_rule= normalization_rule) buyer_match, buyer_score, _ = process.extractOne(buyer_name, name_list, scorer= fuzz.token_sort_ratio) if buyer_score >= 80: item["buyer_id"] = normalized_cached_map[buyer_match] else: item["buyer_id"] = None return information # Mapping unit def mapping_unit(information, json_path, normalization_rule): # Load from unit JSON file with open(json_path, "r", encoding="utf-8") as f: unit_id_dict = json.load(f) # Create cached normalized dictionary normalized_cached_map = {normalization(text= k, normalization_rule= normalization_rule): v for k, v in unit_id_dict.items()} unit_list = list(normalized_cached_map.keys()) for item in information: # Match unit unit = normalization(text= item["unit"], normalization_rule= normalization_rule) unit_match, unit_score, _ = process.extractOne(unit, unit_list, scorer= fuzz.token_sort_ratio) if unit_score >= 80: item["unit_id"] = normalized_cached_map[unit_match] else: item["unit_id"] = item["unit"] return information # Mapping employee def mapping_employee(information, json_path, normalization_rule): # Load from employee JSON file with open(json_path, "r", encoding="utf-8") as f: employee_id_dict = json.load(f) # Mapping product def mapping_product(information, json_path, normalization_rule): # Load from product JSON file with open(json_path, "r", encoding="utf-8") as f: product_id_dict = json.load(f) # Create cached normalized dictionary normalized_cached_map = { (normalization(text= product_name, normalization_rule= normalization_rule), normalization(text= unit_id, normalization_rule= normalization_rule)): product_id for product_id, (product_name, unit_id) in product_id_dict.items() } product_list = list(normalized_cached_map.keys()) for item in information: # Normalize product_name and unit_id: normalized_product_name = normalization(text= item["product_name"], normalization_rule= normalization_rule) normalized_unit_id = normalization(text = item["unit_id"], normalization_rule= normalization_rule) product_name_match, product_name_score, _ = process.extractOne(normalized_product_name, [k[0] for k in product_list], scorer= fuzz.token_sort_ratio) unit_id_match, unit_id_score, _ = process.extractOne(normalized_unit_id, [k[1] for k in product_list], scorer= fuzz.token_sort_ratio) # Calculate the average matching score average_score = (product_name_score + unit_id_score) / 2 if average_score >=80: matched_key = (product_name_match, unit_id_match) item["product_id"] = normalized_cached_map[matched_key] else: item["product_id"] = None return information