Spaces:
Build error
Build error
File size: 3,978 Bytes
51db8d1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import json
from rapidfuzz import process, fuzz
# Initialized Modules
from modules.normalization import normalization
# Mapping merchant
def mapping_merchant(information, json_path, normalization_rule):
# Load from merchant JSON file
with open(json_path, "r", encoding="utf-8") as f:
name_id_dict = json.load(f)
# Create cached normalized dictionary
normalized_cached_map = {normalization(text= k, normalization_rule= normalization_rule): v for k, v in name_id_dict.items()}
name_list = list(normalized_cached_map.keys())
for item in information:
# Match seller
seller_name = normalization(text= item["seller"], normalization_rule= normalization_rule)
seller_match, seller_score, _ = process.extractOne(seller_name, name_list, scorer= fuzz.token_sort_ratio)
if seller_score >= 80:
item["seller_id"] = normalized_cached_map[seller_match]
else:
item["seller_id"] = None
# Match buyer
buyer_name = normalization(text= item["buyer"], normalization_rule= normalization_rule)
buyer_match, buyer_score, _ = process.extractOne(buyer_name, name_list, scorer= fuzz.token_sort_ratio)
if buyer_score >= 80:
item["buyer_id"] = normalized_cached_map[buyer_match]
else:
item["buyer_id"] = None
return information
# Mapping unit
def mapping_unit(information, json_path, normalization_rule):
# Load from unit JSON file
with open(json_path, "r", encoding="utf-8") as f:
unit_id_dict = json.load(f)
# Create cached normalized dictionary
normalized_cached_map = {normalization(text= k, normalization_rule= normalization_rule): v for k, v in unit_id_dict.items()}
unit_list = list(normalized_cached_map.keys())
for item in information:
# Match unit
unit = normalization(text= item["unit"], normalization_rule= normalization_rule)
unit_match, unit_score, _ = process.extractOne(unit, unit_list, scorer= fuzz.token_sort_ratio)
if unit_score >= 80:
item["unit_id"] = normalized_cached_map[unit_match]
else:
item["unit_id"] = item["unit"]
return information
# Mapping employee
def mapping_employee(information, json_path, normalization_rule):
# Load from employee JSON file
with open(json_path, "r", encoding="utf-8") as f:
employee_id_dict = json.load(f)
# Mapping product
def mapping_product(information, json_path, normalization_rule):
# Load from product JSON file
with open(json_path, "r", encoding="utf-8") as f:
product_id_dict = json.load(f)
# Create cached normalized dictionary
normalized_cached_map = {
(normalization(text= product_name, normalization_rule= normalization_rule),
normalization(text= unit_id, normalization_rule= normalization_rule)): product_id
for product_id, (product_name, unit_id) in product_id_dict.items()
}
product_list = list(normalized_cached_map.keys())
for item in information:
# Normalize product_name and unit_id:
normalized_product_name = normalization(text= item["product_name"], normalization_rule= normalization_rule)
normalized_unit_id = normalization(text = item["unit_id"], normalization_rule= normalization_rule)
product_name_match, product_name_score, _ = process.extractOne(normalized_product_name, [k[0] for k in product_list], scorer= fuzz.token_sort_ratio)
unit_id_match, unit_id_score, _ = process.extractOne(normalized_unit_id, [k[1] for k in product_list], scorer= fuzz.token_sort_ratio)
# Calculate the average matching score
average_score = (product_name_score + unit_id_score) / 2
if average_score >=80:
matched_key = (product_name_match, unit_id_match)
item["product_id"] = normalized_cached_map[matched_key]
else:
item["product_id"] = None
return information |