Spaces:

Phong1
/

Multimodal-StreamLit-Demo

Build error

App Files Files Community

Multimodal-StreamLit-Demo / src /modules /mapping.py

vinhngba2704

First commit to this repo

51db8d1 9 months ago

raw

history blame contribute delete

3.98 kB

	import json
	from rapidfuzz import process, fuzz

	# Initialized Modules
	from modules.normalization import normalization

	# Mapping merchant
	def mapping_merchant(information, json_path, normalization_rule):
	# Load from merchant JSON file
	with open(json_path, "r", encoding="utf-8") as f:
	name_id_dict = json.load(f)

	# Create cached normalized dictionary
	normalized_cached_map = {normalization(text= k, normalization_rule= normalization_rule): v for k, v in name_id_dict.items()}
	name_list = list(normalized_cached_map.keys())

	for item in information:
	# Match seller
	seller_name = normalization(text= item["seller"], normalization_rule= normalization_rule)
	seller_match, seller_score, _ = process.extractOne(seller_name, name_list, scorer= fuzz.token_sort_ratio)
	if seller_score >= 80:
	item["seller_id"] = normalized_cached_map[seller_match]
	else:
	item["seller_id"] = None

	# Match buyer
	buyer_name = normalization(text= item["buyer"], normalization_rule= normalization_rule)
	buyer_match, buyer_score, _ = process.extractOne(buyer_name, name_list, scorer= fuzz.token_sort_ratio)
	if buyer_score >= 80:
	item["buyer_id"] = normalized_cached_map[buyer_match]
	else:
	item["buyer_id"] = None

	return information

	# Mapping unit
	def mapping_unit(information, json_path, normalization_rule):
	# Load from unit JSON file
	with open(json_path, "r", encoding="utf-8") as f:
	unit_id_dict = json.load(f)

	# Create cached normalized dictionary
	normalized_cached_map = {normalization(text= k, normalization_rule= normalization_rule): v for k, v in unit_id_dict.items()}
	unit_list = list(normalized_cached_map.keys())

	for item in information:
	# Match unit
	unit = normalization(text= item["unit"], normalization_rule= normalization_rule)
	unit_match, unit_score, _ = process.extractOne(unit, unit_list, scorer= fuzz.token_sort_ratio)
	if unit_score >= 80:
	item["unit_id"] = normalized_cached_map[unit_match]
	else:
	item["unit_id"] = item["unit"]

	return information

	# Mapping employee
	def mapping_employee(information, json_path, normalization_rule):
	# Load from employee JSON file
	with open(json_path, "r", encoding="utf-8") as f:
	employee_id_dict = json.load(f)

	# Mapping product
	def mapping_product(information, json_path, normalization_rule):
	# Load from product JSON file
	with open(json_path, "r", encoding="utf-8") as f:
	product_id_dict = json.load(f)

	# Create cached normalized dictionary
	normalized_cached_map = {
	(normalization(text= product_name, normalization_rule= normalization_rule),
	normalization(text= unit_id, normalization_rule= normalization_rule)): product_id
	for product_id, (product_name, unit_id) in product_id_dict.items()
	}
	product_list = list(normalized_cached_map.keys())

	for item in information:
	# Normalize product_name and unit_id:
	normalized_product_name = normalization(text= item["product_name"], normalization_rule= normalization_rule)
	normalized_unit_id = normalization(text = item["unit_id"], normalization_rule= normalization_rule)

	product_name_match, product_name_score, _ = process.extractOne(normalized_product_name, [k[0] for k in product_list], scorer= fuzz.token_sort_ratio)
	unit_id_match, unit_id_score, _ = process.extractOne(normalized_unit_id, [k[1] for k in product_list], scorer= fuzz.token_sort_ratio)

	# Calculate the average matching score
	average_score = (product_name_score + unit_id_score) / 2

	if average_score >=80:
	matched_key = (product_name_match, unit_id_match)
	item["product_id"] = normalized_cached_map[matched_key]
	else:
	item["product_id"] = None

	return information