vinhngba2704 commited on
Commit
d68958b
·
1 Parent(s): 2944c1e

Adding product id map function

Browse files
Files changed (1) hide show
  1. modules/mapping.py +28 -1
modules/mapping.py CHANGED
@@ -64,4 +64,31 @@ def mapping_employee(information, json_path, normalization_rule):
64
  def mapping_product(information, json_path, normalization_rule):
65
  # Load from product JSON file
66
  with open(json_path, "r", encoding="utf-8") as f:
67
- product_id_dict = json.load(f)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  def mapping_product(information, json_path, normalization_rule):
65
  # Load from product JSON file
66
  with open(json_path, "r", encoding="utf-8") as f:
67
+ product_id_dict = json.load(f)
68
+
69
+ # Create cached normalized dictionary
70
+ normalized_cached_map = {
71
+ (normalization(text= product_name, normalization_rule= normalization_rule),
72
+ normalization(text= unit_id, normalization_rule= normalization_rule)): product_id
73
+ for product_id, (product_name, unit_id) in product_id_dict.items()
74
+ }
75
+ product_list = list(normalized_cached_map.keys())
76
+
77
+ for item in information:
78
+ # Normalize product_name and unit_id:
79
+ normalized_product_name = normalization(text= item["product_name"], normalization_rule= normalization_rule)
80
+ normalized_unit_id = normalization(text = item["unit_id"], normalization_rule= normalization_rule)
81
+
82
+ product_name_match, product_name_score, _ = process.extractOne(normalized_product_name, [k[0] for k in product_list], scorer= fuzz.token_sort_ratio)
83
+ unit_id_match, unit_id_score, _ = process.extractOne(normalized_unit_id, [k[1] for k in product_list], scorer= fuzz.token_sort_ratio)
84
+
85
+ # Calculate the average matching score
86
+ average_score = (product_name_score + unit_id_score) / 2
87
+
88
+ if average_score >=80:
89
+ matched_key = (product_name_match, unit_id_match)
90
+ item["product_id"] = normalized_cached_map[matched_key]
91
+ else:
92
+ item["product_id"] = None
93
+
94
+ return information