File size: 3,978 Bytes
51db8d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import json
from rapidfuzz import process, fuzz

# Initialized Modules
from modules.normalization import normalization

# Mapping merchant
def mapping_merchant(information, json_path, normalization_rule):
    # Load from merchant JSON file
    with open(json_path, "r", encoding="utf-8") as f:
        name_id_dict = json.load(f)

    # Create cached normalized dictionary
    normalized_cached_map = {normalization(text= k, normalization_rule= normalization_rule): v for k, v in name_id_dict.items()}
    name_list = list(normalized_cached_map.keys())

    for item in information:
        # Match seller
        seller_name = normalization(text= item["seller"], normalization_rule= normalization_rule)
        seller_match, seller_score, _ = process.extractOne(seller_name, name_list, scorer= fuzz.token_sort_ratio)
        if seller_score >= 80:
            item["seller_id"] = normalized_cached_map[seller_match]
        else:
            item["seller_id"] = None

        # Match buyer
        buyer_name = normalization(text= item["buyer"], normalization_rule= normalization_rule)
        buyer_match, buyer_score, _ = process.extractOne(buyer_name, name_list, scorer= fuzz.token_sort_ratio)
        if buyer_score >= 80:
            item["buyer_id"] = normalized_cached_map[buyer_match]
        else:
            item["buyer_id"] = None
    
    return information

# Mapping unit
def mapping_unit(information, json_path, normalization_rule):
    # Load from unit JSON file
    with open(json_path, "r", encoding="utf-8") as f:
        unit_id_dict = json.load(f)

    # Create cached normalized dictionary
    normalized_cached_map = {normalization(text= k, normalization_rule= normalization_rule): v for k, v in unit_id_dict.items()}
    unit_list = list(normalized_cached_map.keys())

    for item in information:
        # Match unit
        unit = normalization(text= item["unit"], normalization_rule= normalization_rule)
        unit_match, unit_score, _ = process.extractOne(unit, unit_list, scorer= fuzz.token_sort_ratio)
        if unit_score >= 80:
            item["unit_id"] = normalized_cached_map[unit_match]
        else:
            item["unit_id"] = item["unit"]
    
    return information

# Mapping employee
def mapping_employee(information, json_path, normalization_rule):
    # Load from employee JSON file
    with open(json_path, "r", encoding="utf-8") as f:
        employee_id_dict = json.load(f)

# Mapping product
def mapping_product(information, json_path, normalization_rule):
    # Load from product JSON file
    with open(json_path, "r", encoding="utf-8") as f:
        product_id_dict = json.load(f)

    # Create cached normalized dictionary
    normalized_cached_map = {
        (normalization(text= product_name, normalization_rule= normalization_rule),
         normalization(text= unit_id, normalization_rule= normalization_rule)): product_id
         for product_id, (product_name, unit_id) in product_id_dict.items()
    }
    product_list = list(normalized_cached_map.keys())

    for item in information:
        # Normalize product_name and unit_id:
        normalized_product_name = normalization(text= item["product_name"], normalization_rule= normalization_rule)
        normalized_unit_id = normalization(text = item["unit_id"], normalization_rule= normalization_rule)

        product_name_match, product_name_score, _ = process.extractOne(normalized_product_name, [k[0] for k in product_list], scorer= fuzz.token_sort_ratio)
        unit_id_match, unit_id_score, _ = process.extractOne(normalized_unit_id, [k[1] for k in product_list], scorer= fuzz.token_sort_ratio)

        # Calculate the average matching score
        average_score = (product_name_score + unit_id_score) / 2

        if average_score >=80:
            matched_key = (product_name_match, unit_id_match)
            item["product_id"] = normalized_cached_map[matched_key]
        else:
            item["product_id"] = None
    
    return information