Spaces:
Build error
Build error
| import json | |
| from rapidfuzz import process, fuzz | |
| # Initialized Modules | |
| from modules.normalization import normalization | |
| # Mapping merchant | |
| def mapping_merchant(information, json_path, normalization_rule): | |
| # Load from merchant JSON file | |
| with open(json_path, "r", encoding="utf-8") as f: | |
| name_id_dict = json.load(f) | |
| # Create cached normalized dictionary | |
| normalized_cached_map = {normalization(text= k, normalization_rule= normalization_rule): v for k, v in name_id_dict.items()} | |
| name_list = list(normalized_cached_map.keys()) | |
| for item in information: | |
| # Match seller | |
| seller_name = normalization(text= item["seller"], normalization_rule= normalization_rule) | |
| seller_match, seller_score, _ = process.extractOne(seller_name, name_list, scorer= fuzz.token_sort_ratio) | |
| if seller_score >= 80: | |
| item["seller_id"] = normalized_cached_map[seller_match] | |
| else: | |
| item["seller_id"] = None | |
| # Match buyer | |
| buyer_name = normalization(text= item["buyer"], normalization_rule= normalization_rule) | |
| buyer_match, buyer_score, _ = process.extractOne(buyer_name, name_list, scorer= fuzz.token_sort_ratio) | |
| if buyer_score >= 80: | |
| item["buyer_id"] = normalized_cached_map[buyer_match] | |
| else: | |
| item["buyer_id"] = None | |
| return information | |
| # Mapping unit | |
| def mapping_unit(information, json_path, normalization_rule): | |
| # Load from unit JSON file | |
| with open(json_path, "r", encoding="utf-8") as f: | |
| unit_id_dict = json.load(f) | |
| # Create cached normalized dictionary | |
| normalized_cached_map = {normalization(text= k, normalization_rule= normalization_rule): v for k, v in unit_id_dict.items()} | |
| unit_list = list(normalized_cached_map.keys()) | |
| for item in information: | |
| # Match unit | |
| unit = normalization(text= item["unit"], normalization_rule= normalization_rule) | |
| unit_match, unit_score, _ = process.extractOne(unit, unit_list, scorer= fuzz.token_sort_ratio) | |
| if unit_score >= 80: | |
| item["unit_id"] = normalized_cached_map[unit_match] | |
| else: | |
| item["unit_id"] = item["unit"] | |
| return information | |
| # Mapping employee | |
| def mapping_employee(information, json_path, normalization_rule): | |
| # Load from employee JSON file | |
| with open(json_path, "r", encoding="utf-8") as f: | |
| employee_id_dict = json.load(f) | |
| # Mapping product | |
| def mapping_product(information, json_path, normalization_rule): | |
| # Load from product JSON file | |
| with open(json_path, "r", encoding="utf-8") as f: | |
| product_id_dict = json.load(f) | |
| # Create cached normalized dictionary | |
| normalized_cached_map = { | |
| (normalization(text= product_name, normalization_rule= normalization_rule), | |
| normalization(text= unit_id, normalization_rule= normalization_rule)): product_id | |
| for product_id, (product_name, unit_id) in product_id_dict.items() | |
| } | |
| product_list = list(normalized_cached_map.keys()) | |
| for item in information: | |
| # Normalize product_name and unit_id: | |
| normalized_product_name = normalization(text= item["product_name"], normalization_rule= normalization_rule) | |
| normalized_unit_id = normalization(text = item["unit_id"], normalization_rule= normalization_rule) | |
| product_name_match, product_name_score, _ = process.extractOne(normalized_product_name, [k[0] for k in product_list], scorer= fuzz.token_sort_ratio) | |
| unit_id_match, unit_id_score, _ = process.extractOne(normalized_unit_id, [k[1] for k in product_list], scorer= fuzz.token_sort_ratio) | |
| # Calculate the average matching score | |
| average_score = (product_name_score + unit_id_score) / 2 | |
| if average_score >=80: | |
| matched_key = (product_name_match, unit_id_match) | |
| item["product_id"] = normalized_cached_map[matched_key] | |
| else: | |
| item["product_id"] = None | |
| return information |