Spaces:

j-s-v
/

WineMatching

Build error

App Files Files Community

WineMatching / processor /matching.py

j-s-v

2025-07-28

d4bade4 10 months ago

raw

history blame contribute delete

32 kB

	import json
	import datetime
	import settings

	from preprocess.utils.common.extracters import *
	from multiprocessing import Process, Queue

	import pandas as pd
	from rapidfuzz import fuzz, process
	from math import isnan
	from preprocess.utils.common.utils import *
	from time import perf_counter

	SCORE_EX_EMPTY = [0,0,0,0,0,0,0,0]

	SCORE_EX_BRAND_INDEX = 0
	SCORE_EX_NAME_INDEX = 1
	SCORE_EX_SIMILARITY_INDEX = 2
	SCORE_EX_TYPE_INDEX = 3
	SCORE_EX_COLORSOUR_INDEX = 4
	SCORE_EX_VOLUME_INDEX = 5
	SCORE_EX_YEAR_INDEX = 6
	SCORE_EX_GB_INDEX = 7

	'''def compare_names(name1, name2, scorer=fuzz.ratio, score_cutoff=50):
	print("Scoring: " + name1 + " vs " + name2)
	words1 = name1.split(" ")
	words2 = name2.split(" ")

	score = 0
	for w1 in words1:
	for w2 in words2:
	r = scorer(w1, w2)
	print("\t " + w1 + " - " + w2 + " ; " + str(r))
	if r >= score_cutoff:
	score = score + r

	print("Score result: " + str(score / (100*len(words1))))
	return score / (100*len(words1))

	def compare_name_with_list(name, names_list, scorer=fuzz.ratio, score_cutoff=50):
	result = []
	index = 0
	for name2 in names_list:
	result.append((name2, compare_names(name, name2, scorer, score_cutoff), index))
	index = index + 1
	return result'''


	'''def prepare_groups_with_ids(items_df, brand_col_name = "new_brand"):
	"""
	Предварительная группировка данных из items по (new_brand, type, volume, new_type_wine, sour)
	с учетом нормализованного названия.

	Добавляем столбец 'norm_name', чтобы нормализовать значение name один раз заранее.

	:param items_df: DataFrame с колонками 'new_brand', 'type', 'name', 'id', 'volume', 'new_type_wine', 'sour'.
	:return: Словарь {(new_brand, type, volume, new_type_wine, sour): [(id, name, norm_name, volume, new_type_wine, sour)]}.
	"""
	#items_df = items_df.copy()
	#items_df['norm_name'] = items_df['name'].apply(normalize_name_ex)

	#grouped = items_df.groupby([brand_col_name, 'type', 'volume', 'new_type_wine', 'sour']).apply(
	grouped = items_df.groupby([brand_col_name, 'type', 'volume', 'new_type_wine']).apply(
	lambda x: list(zip(x['id'], x[brand_col_name], x['name'], x['orig_name'], x['norm_name'], x['volume'], x['new_type_wine'], x['sour'], x['year']))
	).to_dict()

	#print(grouped)

	return grouped'''

	def split_name(name):
	return name.split(" ")

	def prepare_groups_with_ids_ex(items_df, key_cols, name_col="name"):
	"""
	Предварительная группировка данных из items по (new_brand, type, volume, new_type_wine, sour)
	с учетом нормализованного названия.

	Добавляем столбец 'norm_name', чтобы нормализовать значение name один раз заранее.

	:param items_df: DataFrame с колонками 'new_brand', 'type', 'name', 'id', 'volume', 'new_type_wine', 'sour'.
	:return: Словарь {(new_brand, type, volume, new_type_wine, sour): [(id, name, norm_name, volume, new_type_wine, sour)]}.
	"""
	items_df[name_col + "_splitted"] = items_df[name_col].apply(split_name)
	items_df["name_2_splitted"] = items_df["name_2"].apply(split_name)

	grouped = items_df.groupby(key_cols).apply(
	#lambda x: list(zip(x['id'], x["new_brand"], x['name'], x['orig_name'], x['norm_name'], x['volume'], x['new_type_wine'], x['sour'], x['year']))
	#lambda x: list(zip(x['id'], x["new_brand"], x['name'], x['name_2'], x['norm_name'], x['volume'], x['new_type_wine'],x['sour'], x['year']))
	#lambda x: list(zip(x['index'], x['norm_name'], x['name_2']))
	lambda x: [list(x['index']), list(x['id']), list(x[name_col]), list(x[name_col + "_splitted"]), list(x["name_2"]), list(x["name_2_splitted"])]
	).to_dict()

	return grouped


	def parse_year(year):
	if not year:
	return False
	elif isinstance(year, str):
	return int(year)
	elif isinstance(year, (int, float)) and not isnan(year):
	return int(year)

	return False

	def order_by_best_year(matched_items, year):
	best_matched_items = []
	max_year_matched_items = []
	other_matched_items = []
	max_year = 0

	year = parse_year(year)

	for mi in matched_items:
	# Если в оригинале указан год, то ищем точное совпадение, иначе сортируем по году в обратном порядке
	try:
	if (isinstance(mi['year'], (int, float)) and not isnan(mi['year'])) or isinstance(mi['year'], str):
	mi_year = int(mi['year'])
	else:
	mi_year = False

	if year and mi_year and (mi_year == year):
	best_matched_items.append(mi['id'])
	mi['score_year'] = 3
	elif mi_year:
	if mi_year > max_year:
	max_year_matched_items = [mi]
	max_year = mi_year
	elif mi_year == max_year:
	max_year_matched_items.append(mi)
	except Exception as ex:
	print("Error processing best year for product " + str(mi["id"]) + " value " + str(mi['year']) + ": " + str(ex))

	for m in matched_items:
	if year:
	if m['id'] in best_matched_items:
	m['score_year'] = 3
	elif m['id'] in max_year_matched_items:
	m['score_year'] = 2
	else:
	m['score_year'] = 0
	else:
	m['score_year'] = 3


	return matched_items

	time_11s = time_11 = 0
	time_12s = time_12 = 0
	time_13s = time_13 = 0
	time_14s = time_14 = 0


	time_20s = time_20 = 0
	time_21s = time_21 = 0
	time_22s = time_22 = 0
	time_23s = time_23 = 0
	time_24s = time_24 = 0
	time_25s = time_25 = 0


	def compare_names_for_same_brand(name, name_candidates, name_candidates_splitted, scorer, score_cutoff, limit):
	if not name:
	return []

	result = []
	parts = name.split(" ")

	for idx_candidate in range(len(name_candidates)):
	parts_c = name_candidates_splitted[idx_candidate]
	similar_words_count = 0
	for p1 in parts:
	if p1 in parts_c:
	similar_words_count += 1
	#for p1 in parts:
	# match, score, _ = process.extractOne(p1, parts_c, scorer=scorer)
	# if score > 90:
	# total_score += score

	if similar_words_count > 0:
	score = 100

	if similar_words_count == len(parts):
	similarity = 3
	else:
	similarity = 2


	if score >= score_cutoff:
	result.append((name_candidates[idx_candidate], score, similarity, idx_candidate))
	time_25 += perf_counter() - time_25s

	idx_candidate += 1
	time_22 += perf_counter() - time_22s

	time_20 += perf_counter() - time_20s
	return result


	def compare_names_invariant_order(name, name_candidates, name_candidates_splitted, scorer, score_cutoff, limit):
	result = []
	idx_candidate = 0

	if not name:
	return []

	parts = name.split(" ")
	for idx_candidate in range(len(name_candidates)):
	parts_c = name_candidates_splitted[idx_candidate]
	total_score = 0
	for p1 in parts:
	if p1 in parts_c:
	total_score += 100
	# match, score, _ = process.extractOne(p1, parts_c, scorer=scorer)
	# if score > 90:
	# total_score += score
	score = total_score / len(parts)

	similarity = 3
	if len(parts) != len(parts_c):
	similarity = 2

	if score >= score_cutoff:
	result.append((name_candidates[idx_candidate], score, similarity, idx_candidate))

	return result


	def show_stat():
	global time_20s, time_20, time_21s, time_21, time_22s, time_22, time_23s, time_23, time_24s, time_24, time_25s, time_25

	print("20 : " + str(time_20) + "\n" +
	"21 : " + str(time_21) + "\n" +
	"22 : " + str(time_22) + "\n" +
	"23 : " + str(time_23) + "\n" +
	"24 : " + str(time_24) + "\n" +
	"25 : " + str(time_25) + "\n")


	def find_matches_from_candidates(item_name, candidates, order_invariant_names_matching, name_threshold, limit, brand_score):
	global time_11s, time_11, time_12s, time_12, time_13s, time_13, time_14s, time_14

	if not candidates or len(candidates) == 0:
	return []

	time_11s = perf_counter()
	products_indexes = candidates[0]
	products_ids = candidates[1]
	products_names = candidates[2]
	products_names_splitted = candidates[3]
	products_names_2 = candidates[4]
	products_names_2_splitted = candidates[5]
	time_11 += perf_counter() - time_11s

	time_12s = perf_counter()
	matches = alt_matches = []
	if brand_score > 0:
	name_threshold = 0
	limit = 100

	matches = process.extract(item_name, list(products_names), scorer=fuzz.ratio, score_cutoff=name_threshold, limit=limit)
	matches_2 = process.extract(item_name, list(products_names_2), scorer=fuzz.ratio, score_cutoff=name_threshold, limit=limit)
	matches.extend(matches_2)

	alt_matches = []
	if order_invariant_names_matching:
	alt_matches = compare_names_invariant_order(item_name, list(products_names), list(products_names_splitted), scorer=fuzz.ratio, score_cutoff=name_threshold, limit=limit)
	matches_2 = compare_names_invariant_order(item_name, list(products_names_2), list(products_names_2_splitted), scorer=fuzz.ratio, score_cutoff=name_threshold, limit=limit)
	alt_matches.extend(matches_2)

	time_12 += perf_counter() - time_12s

	'''time_13s = perf_counter()
	duplicate_indexes = []
	matches_new = []
	for match, score, idx_candidate in matches:
	if not idx_candidate in duplicate_indexes:
	matches_new.append((match, score, idx_candidate))
	duplicate_indexes.append(idx_candidate)
	time_13 += perf_counter() - time_13s

	matches = matches_new'''

	time_14s = perf_counter()
	matched_products = []

	if matches:
	for match, score, idx_candidate in matches:
	score_ex = SCORE_EX_EMPTY.copy()
	score_ex[SCORE_EX_BRAND_INDEX] = brand_score
	score_ex[SCORE_EX_NAME_INDEX] = score
	score_ex[SCORE_EX_SIMILARITY_INDEX] = 3
	matched_products.append((products_indexes[idx_candidate], products_ids[idx_candidate], score, match, score_ex))


	if alt_matches:
	for match, score, similarity, idx_candidate in alt_matches:
	score_ex = SCORE_EX_EMPTY.copy()
	score_ex[SCORE_EX_BRAND_INDEX] = brand_score
	score_ex[SCORE_EX_NAME_INDEX] = score
	score_ex[SCORE_EX_SIMILARITY_INDEX] = similarity
	matched_products.append((products_indexes[idx_candidate], products_ids[idx_candidate], score, match, score_ex))

	time_14 += perf_counter() - time_14s

	return matched_products


	def score_and_filter_matched_items_by_attributes(matched_items, item):
	filtered_matched_items = []

	for mi in matched_items:
	if (not item['volume'] and not mi['volume']):
	mi['score_volume'] = 3
	elif (not item['volume'] or not mi['volume']):
	mi['score_volume'] = 2
	else:
	mi_vol = float(mi['volume'])
	i_vol = float(item['volume'])

	if abs(mi_vol - i_vol) / max(mi_vol, i_vol) < 0.15:
	mi['score_volume'] = 3
	else:
	mi['score_volume'] = 0
	mi['alternative'] = 1


	if item['type'] == mi['type']:
	mi['score_type'] = 3
	elif item['type_l1'] == mi['type_l1'] or item['type_l0'] == "unmatched":
	mi['score_type'] = 2
	elif item['type_l0'] == mi['type_l0']:
	mi['score_type'] = 1


	type_wine_match = sour_match = 0
	if item['type_wine'] and mi['color'] and (item['type_wine'] == mi['color']):
	type_wine_match = 2
	if not item['type_wine'] and not mi['color']:
	type_wine_match = 2
	elif not item['type_wine'] or not mi['color']:
	type_wine_match = 1


	if item['sour'] and mi['sour'] and (item['sour'] == mi['sour']):
	sour_match = 2
	if not item['sour'] and not mi['sour']:
	sour_match = 2
	elif not item['sour'] or not mi['sour']:
	sour_match = 1


	if type_wine_match and sour_match:
	mi['score_colorsour'] = 3
	elif type_wine_match and not sour_match:
	mi['score_colorsour'] = 2
	elif not type_wine_match and sour_match:
	mi['score_colorsour'] = 1
	else:
	mi['score_colorsour'] = 0
	mi['alternative'] = 1

	#if item['sour']:
	# if mi['sour'] and mi['sour'] != item['sour']:
	# if SETTINGS_MATCHING_INCLUDE_ALTERNATIVES:
	# mi['alternative'] = 1
	# mi['score'] *= 0.8

	if (item['gb'] and mi['gb']) or (not item['gb'] and not mi['gb']):
	mi['score_gb'] = 3
	else:
	mi['score_gb'] = 0
	mi['alternative'] = 1

	if mi['alternative'] and not SETTINGS_MATCHING_INCLUDE_ALTERNATIVES:
	continue

	filtered_matched_items.append(mi)

	return filtered_matched_items


	def find_matches_for_brand(brand, item,
	products_groups_brand_type_vol,
	products_groups_brand_typel1_vol,
	products_groups_brand_typel0_vol,
	order_invariant_names_matching,
	name_threshold,
	brand_score):
	item_type = item['type']
	item_name = item['name']
	item_name_2 = item['name_2']
	item_volume = item['volume']
	item_type_l1 = item['type_l1']
	item_type_l0 = item['type_l0']


	item_name_wo_brand = item_name
	if brand and brand in item_name:
	item_name_x = item_name.replace(brand, '').strip()
	if len(item_name_x) > 2:
	item_name_wo_brand = item_name_x


	item_name_2_wo_brand = item_name_2
	if brand and brand in item_name_2:
	item_name_x = item_name_2.replace(brand, '').strip()
	if len(item_name_x) > 2:
	item_name_2_wo_brand = item_name_x



	matches = []

	key = (brand, item_type, item_volume)
	products_candidates = products_groups_brand_type_vol.get(key, [])
	matches_x0xx = find_matches_from_candidates(item_name_wo_brand, products_candidates, order_invariant_names_matching, name_threshold, 100, brand_score)
	matches.extend(matches_x0xx)

	if item_name_2_wo_brand:
	matches_x0xx = find_matches_from_candidates(item_name_2_wo_brand, products_candidates, order_invariant_names_matching, name_threshold, 100, brand_score)
	matches.extend(matches_x0xx)



	key = (brand, item_type_l1, item_volume)
	products_candidates = products_groups_brand_typel1_vol.get(key, [])
	matches_x1xx = find_matches_from_candidates(item_name_wo_brand, products_candidates, order_invariant_names_matching, name_threshold, 100, brand_score)
	matches.extend(matches_x1xx)

	if item_name_2_wo_brand:
	matches_x1xx = find_matches_from_candidates(item_name_2_wo_brand, products_candidates, order_invariant_names_matching, name_threshold, 100, brand_score)
	matches.extend(matches_x1xx)



	key = (brand, item_type_l0, item_volume)
	products_candidates = products_groups_brand_typel0_vol.get(key, [])
	matches_x2xx = find_matches_from_candidates(item_name_wo_brand, products_candidates, order_invariant_names_matching, name_threshold, 100, brand_score)
	matches.extend(matches_x2xx)

	if item_name_2_wo_brand:
	matches_x2xx = find_matches_from_candidates(item_name_2_wo_brand, products_candidates, order_invariant_names_matching, name_threshold, 100, brand_score)
	matches.extend(matches_x2xx)


	return matches


	def calculate_total_score(all_matched_items):

	for mi in all_matched_items:
	total_score = 28.0 * mi['score_brand']/3
	total_score += 45.0 * mi['score_name']/100
	total_score += 0.0 * mi['score_similarity'] / 3
	total_score += 10.0 * mi['score_year'] / 3
	total_score += 4.0 * mi['score_volume'] / 3
	total_score += 4.0 * mi['score_type'] / 3
	total_score += 5.0 * mi['score_colorsour'] / 3
	total_score += 4.0 * mi['score_gb'] / 3

	mi['score'] = total_score


	def new_find_matches_with_ids_func(items_df, products_df, name_threshold=85,
	products_groups_brand_type_vol=None,
	products_groups_brand_typel1_vol=None,
	products_groups_brand_typel0_vol=None,
	products_groups_typewine_type_vol=None,
	order_invariant_names_matching=False,
	index=None,
	qresult=None):
	"""
	Поиск совпадений с сохранением id найденных итемов, используя заранее подготовленные
	нормализованные группы.

	Производится два прохода:
	- Первый: поиск по группам (brand, type, volume, new_type_wine, sour);
	- Второй: для продуктов без совпадения ищем по альтернативным группам (new_type_wine, new_type, volume, sour),
	исключая итемы с исходным брендом.

	Сравнение производится по столбцу norm_name, а для вывода используется оригинальное name.

	:param products_df: DataFrame с колонками 'id', 'brand', 'type', 'name', 'volume', 'new_type_wine', 'sour', 'new_type'.
	:param items_groups: Словарь, сформированный функцией prepare_groups_with_ids.
	:param items_df: DataFrame итемов с колонками 'id', 'new_brand', 'new_type_wine', 'new_type', 'volume', 'name', 'sour'.
	:param name_threshold: Порог сходства для fuzzy matching.
	:return: DataFrame с добавленными столбцами 'matched_items' (список совпадений) и 'alternative' (альтернативные совпадения).
	"""
	results = []

	print("starting [" + str(index) + "]")

	if name_threshold < 50:
	name_threshold = 50

	all_products_indexes = list(products_df["index"])
	all_products_ids = list(products_df["id"])
	all_products_brands = list(products_df["new_brand"])
	all_products_names = list(products_df["name_wo_brand"])
	all_products_names_splitted = list(products_df['name_wo_brand'].apply(split_name))
	all_products_names_with_brand = list(products_df["name_with_brand"])
	all_products_names_with_brand_splitted = list(products_df['name_with_brand'].apply(split_name))

	all_products_names_2 = list(products_df["name_2"])
	all_products_names_2_splitted = list(products_df['name_2'].apply(split_name))
	#all_products_names_wo_brand = list(products_df["name_wo_brand"])

	all_products_orig_names = list(products_df["orig_name"])
	all_products_volumes = list(products_df["volume"])
	all_products_types = list(products_df["type"])
	all_products_types_l1 = list(products_df["type_l1"])
	all_products_types_l0 = list(products_df["type_l0"])
	all_products_type_wine = list(products_df["new_type_wine"])
	all_products_sour = list(products_df["sour"])
	all_products_year = list(products_df["year"])
	all_products_gbs = list(products_df["gb"])

	all_products = [all_products_indexes, all_products_ids, all_products_names, all_products_names_splitted, all_products_names_2, all_products_names_2_splitted]
	all_products_with_brands = [all_products_indexes, all_products_ids, all_products_names_with_brand, all_products_names_with_brand_splitted, all_products_names_2, all_products_names_2_splitted]

	time_0s = time_0 = 0
	time_1s = time_1 = 0
	time_2s = time_2 = 0
	time_3s = time_3 = 0
	time_4s = time_4 = 0
	time_5s = time_5 = 0
	time_6s = time_6 = 0
	time_7s = time_7 = 0
	time_8s = time_8 = 0
	time_9s = time_9 = 0

	#for idx, item in tqdm(items_df.iterrows(), total=len(items_df)):
	total=len(items_df)
	row_index = 0
	for idx, item in items_df.iterrows():
	time_0s = perf_counter()
	#print("Matching row " + str(index) + " - " + str(row_index) + "/" + str(total))
	row_index += 1

	time_1s = perf_counter()

	item_brand = item['brand']
	item_brand_2 = item['brand_2']
	item_type = item['type']
	item_name = item['name']
	item_name_2 = item['name_2']
	#item_name_with_brand = item['name_with_brand']
	item_volume = item['volume']
	item_type_wine = item['new_type_wine']
	item_sour = item['sour']
	item_type_l1 = item['type_l1']
	item_type_l0 = item['type_l0']
	matched_items = []

	time_1 += perf_counter() - time_1s
	time_2s = perf_counter()

	all_matches = []

	# First let's find matches for all brands we found for the item so far
	used_brands = []
	if item['brand']:
	matches = find_matches_for_brand(item['brand'], item, products_groups_brand_type_vol, products_groups_brand_typel1_vol,
	products_groups_brand_typel0_vol, order_invariant_names_matching,
	name_threshold, 3)
	all_matches.extend(matches)
	used_brands.append(item['brand'])


	if item['brand_2']:
	matches = find_matches_for_brand(item['brand_2'], item, products_groups_brand_type_vol, products_groups_brand_typel1_vol,
	products_groups_brand_typel0_vol, order_invariant_names_matching,
	name_threshold, 3)
	all_matches.extend(matches)
	used_brands.append(item['brand_2'])


	if item['new_brand'] and (not item['new_brand'] in used_brands):
	matches = find_matches_for_brand(item['new_brand'], item, products_groups_brand_type_vol, products_groups_brand_typel1_vol,
	products_groups_brand_typel0_vol, order_invariant_names_matching,
	name_threshold, 2)
	all_matches.extend(matches)
	used_brands.append(item['new_brand'])


	for ab in item['alt_brands']:
	if not ab in used_brands:
	matches = find_matches_for_brand(ab, item, products_groups_brand_type_vol, products_groups_brand_typel1_vol,
	products_groups_brand_typel0_vol, order_invariant_names_matching,
	name_threshold, 2)
	all_matches.extend(matches)
	used_brands.append(ab)


	# All further searchings is performed using full name with brand
	item_name_with_brand = item_name
	if item_brand and not item_brand in item_name:
	item_name_with_brand = item_brand + " " + item_name


	item_name_2_with_brand = item_name_2
	if item_name_2 and item_brand and not item_brand in item_name_2:
	item_name_2_with_brand = item_brand + " " + item_name_2


	alt_key = (item_type_wine, item_type, item_volume)
	products_candidates = products_groups_typewine_type_vol.get(alt_key, [])
	matches = find_matches_from_candidates(item_name_with_brand, products_candidates, order_invariant_names_matching, name_threshold, 30, 0)
	all_matches.extend(matches)

	if item_name_2_with_brand:
	matches = find_matches_from_candidates(item_name_2_with_brand, products_candidates, order_invariant_names_matching, name_threshold, 30, 0)
	all_matches.extend(matches)



	# Finally search among all products
	matches = find_matches_from_candidates(item_name_with_brand, all_products_with_brands, order_invariant_names_matching, name_threshold, 30, 0)
	all_matches.extend(matches)

	if item_name_2_with_brand:
	matches = find_matches_from_candidates(item_name_2_with_brand, all_products_with_brands, order_invariant_names_matching, name_threshold, 30, 0)
	all_matches.extend(matches)


	if not item['brand']:
	matches = find_matches_from_candidates(item_name_with_brand, all_products, order_invariant_names_matching, name_threshold, 30, 0)
	all_matches.extend(matches)

	if item_name_2_with_brand:
	matches = find_matches_from_candidates(item_name_2_with_brand, all_products, order_invariant_names_matching, name_threshold, 30, 0)
	all_matches.extend(matches)



	all_matched_items = [
	{
	'id': all_products_ids[product_index],
	'brand': all_products_brands[product_index],
	'item_name': all_products_names[product_index],
	'score': 0,
	'alternative': 0,
	'score_ex': score_ex,
	'score_brand': score_ex[SCORE_EX_BRAND_INDEX],
	'score_name': int(score_ex[SCORE_EX_NAME_INDEX]),
	'score_similarity': score_ex[SCORE_EX_SIMILARITY_INDEX],
	'score_type': score_ex[SCORE_EX_TYPE_INDEX],
	'score_colorsour': score_ex[SCORE_EX_COLORSOUR_INDEX],
	'score_volume': score_ex[SCORE_EX_VOLUME_INDEX],
	'score_year': score_ex[SCORE_EX_YEAR_INDEX],
	'score_gb': score_ex[SCORE_EX_GB_INDEX],
	'item_orig_name': all_products_orig_names[product_index],
	'volume': all_products_volumes[product_index],
	'type': all_products_types[product_index],
	'type_l1': all_products_types_l1[product_index],
	'type_l0': all_products_types_l0[product_index],
	'color': all_products_type_wine[product_index],
	'sour': all_products_sour[product_index],
	'year': all_products_year[product_index],
	'gb': all_products_gbs[product_index],
	}
	for product_index, product_id, score, match, score_ex in all_matches
	]

	all_matched_items = score_and_filter_matched_items_by_attributes(all_matched_items, item)
	all_matched_items = order_by_best_year(all_matched_items, item['year'])

	calculate_total_score(all_matched_items)

	# Now it's time to sort by all scores
	all_matched_items = sorted(all_matched_items, key=lambda d: d['score'], reverse=True)

	duplicate_ids = []
	best_score_ex = ''
	all_matched_items_new = []
	for product in all_matched_items:
	if not product['id'] in duplicate_ids:
	score_ex = 'B' + str(product["score_brand"]) + ',' + \
	'N' + str(product["score_name"]) + ',' + \
	'S' + str(product["score_similarity"]) + ',' + \
	'T' + str(product["score_type"]) + ',' + \
	'C' + str(product["score_colorsour"]) + ',' + \
	'V' + str(product["score_volume"]) + ',' + \
	'Y' + str(product["score_year"]) + ',' + \
	'G' + str(product["score_gb"])

	if not best_score_ex:
	best_score_ex = score_ex

	product['score_ex'] = score_ex

	all_matched_items_new.append(product)
	duplicate_ids.append(product['id'])

	results.append({
	'item_id': item['id'],
	#"matched_top_id": top_matched_id,
	'best_score_ex': best_score_ex,
	'matched_items': all_matched_items_new[:10],
	#"alternative_top_id": "",
	#'alternative': [] # Заполняется во втором проходе
	})

	#results[idx]['matched_items'].extend(alt_matched_items)
	#results[idx]['match_type'] = "".join(match_type)

	time_0 += perf_counter() - time_0s


	print("finished [" + str(index) + "]")
	if qresult:
	qresult.put(results)


	return results


	def new_find_matches_with_ids(items_df, products_df, name_threshold=85,
	products_groups_brand_type_vol=None,
	products_groups_brand_typel1_vol=None,
	products_groups_brand_typel0_vol=None,
	products_groups_typewine_type_vol=None,
	order_invariant_names_matching = False,
	thread_count = 8):

	print("Started matching at " + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "\n")

	if len(items_df) < 1000:
	results = new_find_matches_with_ids_func(items_df, products_df, name_threshold,
	products_groups_brand_type_vol,
	products_groups_brand_typel1_vol,
	products_groups_brand_typel0_vol,
	products_groups_typewine_type_vol,
	order_invariant_names_matching,
	0)

	show_stat()

	else:
	results = []

	threads_data = list()
	chunk_size = len(items_df) // thread_count + 1
	num_chunks = len(items_df) // chunk_size + 1
	for i in range(num_chunks):
	#for i in range(1):
	chunk = items_df[i * chunk_size:(i + 1) * chunk_size]
	data = {"index": i, "items_df": chunk, "products_df": products_df, "name_threshold":name_threshold,
	"products_groups_brand_type_vol":products_groups_brand_type_vol,
	"products_groups_brand_typel1_vol":products_groups_brand_typel1_vol,
	"products_groups_brand_typel0_vol": products_groups_brand_typel0_vol,
	"products_groups_typewine_type_vol": products_groups_typewine_type_vol,
	"order_invariant_names_matching": order_invariant_names_matching}

	q = Queue()
	p = Process(target=new_find_matches_with_ids_func, args=(chunk, products_df, name_threshold,
	products_groups_brand_type_vol,
	products_groups_brand_typel1_vol,
	products_groups_brand_typel0_vol,
	products_groups_typewine_type_vol,
	order_invariant_names_matching,
	i, q))
	p.start()

	threads_data.append({"index": i, "q": q})


	for td in threads_data:
	td["result"] = td["q"].get()

	for td in threads_data:
	results.extend(td["result"])

	for r in results:
	r['matched_items'] = json.dumps(r['matched_items'], ensure_ascii=False)

	results_df = pd.DataFrame(results)
	merged_df = items_df.merge(results_df, left_on='id', right_on='item_id').drop(columns=['item_id'])

	print("Finished matching at " + datetime.datetime.now().strftime("Started at: %Y-%m-%d %H:%M:%S") + "\n")
	return merged_df