Spaces:

j-s-v
/

WineMatching

Build error

App Files Files Community

WineMatching / preprocess /preprocess.py

j-s-v

Fixed isnan missing issue

fe2d51b 8 months ago

raw

history blame contribute delete

31.2 kB

	import os.path
	from preprocess.utils.common.utils import normalize_name
	from math import isnan

	from preprocess.utils.items.attrs import *
	from preprocess.utils.common.brand_matching import *
	from preprocess.utils.common.top_inserts import *
	from preprocess.utils.products.products import *
	import pandas as pd
	from processor.matching import prepare_groups_with_ids_ex

	class Preprocessor():

	def __init__(self, long_types_list, short_types_list, sour_list,
	type_wine, gbs, grapes, other_words,
	#sour_merge_dict,
	type_merge_dict, color_merge_dict,
	country_list):

	self.long_types_list=[element.lower() for element in long_types_list]
	self.short_types_list=short_types_list
	self.sour=sour_list
	self.type_wine=type_wine
	self.gbs=gbs
	self.grapes=grapes
	self.other_words=other_words

	self.types_n_others=long_types_list+other_words+sour_list+country_list
	self.types_n_others.remove("Шерри")

	self.type_dict=type_merge_dict
	self.color_merge_dict=color_merge_dict
	self.country_list = country_list

	global TYPES_FROM_BRAND_DICT
	updated = {}
	for k, v in TYPES_FROM_BRAND_DICT.items():
	updated[k] = v
	updated[normalize_name(k)] = v
	TYPES_FROM_BRAND_DICT = updated



	def write_log(self, logfn, s):
	print(s + "\n")
	with open(logfn, 'a') as logf:
	logf.write(datetime.now().strftime('[%Y-%m-%d %H:%M:%S]: ') + s + "\n")



	def process_products(self, products):
	result={'index':[], 'id':[], 'orig_brand':[], 'brand':[], 'brand_unwrap':[],
	'orig_name':[], 'name':[], 'name_wo_brand':[], 'name_with_brand':[],
	'orig_name_2':[], 'name_2': [],
	'orig_type':[], 'type':[], 'type_l1':[], 'type_l0':[],
	'orig_type_wine':[], "type_wine":[], 'sour':[],
	"volume":[], "gb":[], "year":[], 'alco':[], 'other': []}#, 'embeddings':[]}

	index = 0
	for idx, row in tqdm(products.iterrows()):
	try:
	#if not row['id'] == 1115:
	# continue

	#if not isinstance(row['brand'], str):
	# continue

	#if (row['brand'].lower() == 'Villa Raiano'.lower()) or (row['brand'].lower() == 'bosco'.lower()):
	# row = row
	#else:
	# continue
	if isinstance(row['product_type'], (int, float)) and isnan(row['product_type']):
	print("Product type is not specified or incorrect for product id=[" + str(row['id']) + "]. Product is ignored")
	continue

	result['index'].append(index)
	result['id'].append(row['id'])

	result['orig_brand'].append(row['brand'])
	#result['orig_name'].append(row['name_long'])
	result['orig_name'].append(row['name'])
	result['orig_name_2'].append(row['name_translit'])
	result['orig_type'].append(row['product_type'])
	result['orig_type_wine'].append(row['category'])

	brand = preprocess_product_brand(row['brand'])
	#name = preprocess_product_name(row['name_long'])
	name = preprocess_product_name(row['name'])
	name_translit = preprocess_product_name(row['name_translit'])


	# First of all let's check if it is sparkling wine
	drink_type, _ = extract_spark(row['product_type'], False)
	drink_type_n, name = extract_spark(name, True)

	if not drink_type:
	drink_type, _ = extract_type(row['product_type'], False)
	drink_type_n, name = extract_type(name, True)

	if not drink_type:
	drink_type = row['product_type'].lower()


	type_wine = None
	sour_wine = ''
	if isinstance(row['type_prefix'], str) and row['type_prefix']:
	type_wine, sour_wine, _ = extract_color_and_sour(row['type_prefix'], remove=False)
	if drink_type is None and (type_wine or sour_wine):
	drink_type='вино'

	volume = is_volume(row['volume'])
	year, _ = extract_production_year(str(row['name_postfix']))
	gb, _ = extract_gb(row['name_postfix'], False)
	alco, _ = extract_alcohol_content(name)



	name, alcohol_n, volume_n, aging_n, year_n, gb_n, color_n, sour_wine_n, other_n = extract_attributes_from_name(name)
	name = trim_name(name, self.types_n_others).replace(',', ' ').replace('.', ' ')
	name = normalize_and_clean_name(name)

	name_translit, alcohol_n2, volume_n2, aging_n2, year_n2, gb_n2, color_n2, sour_wine_n2, other_n2 = extract_attributes_from_name(name_translit)
	name_translit = trim_name(name_translit, self.types_n_others).replace(',', ' ').replace('.', ' ')
	name_translit = normalize_and_clean_name(name_translit)


	if not year:
	year = year_n
	#elif year and year_n and (year != year_n):
	# print("Product year conflict detected for product id=[" + str(row['id']) + "]: " + str(year) + " vs " + str(year_n))


	if not type_wine:
	type_wine = color_n
	#elif color_n and type_wine and (color_n != type_wine):
	# print("Product type_wine conflict detected for product id=[" + str(row['id']) + "]: " + str(type_wine) + " vs " + str(color_n))


	if not sour_wine:
	sour_wine = sour_wine_n
	#if sour_wine_n and sour_wine and (sour_wine != sour_wine_n):
	# print("Product sour_wine conflict detected for product id=[" + str(row['id']) + "]: " + str(sour_wine) + " vs " + str(sour_wine_n))


	if not volume:
	volume = volume_n
	elif volume_n and volume and (volume_n != volume):
	print("Product volume conflict detected for product id=[" + str(row['id']) + "]: " + str(volume) + " vs " + str(volume_n))



	result['brand'].append(brand)
	result['brand_unwrap'].append('')

	result['name'].append(name)
	result['name_2'].append(name_translit)
	result['name_wo_brand'].append('')
	result['name_with_brand'].append('')

	if not type_wine:
	type_wine = ''


	result['type'].append(drink_type.lower())
	result['type_wine'].append(type_wine.lower())
	result['type_l1'].append('')
	result['type_l0'].append('')

	if not sour_wine:
	sour_wine = ''

	result['sour'].append(sour_wine)
	result['volume'].append(volume)
	result['year'].append(year)
	result['gb'].append(gb)
	result['alco'].append(alco)
	result['other'].append(other_n)

	index += 1
	except Exception as ex:
	print("Error processing product id=" + str(idx) + ": " + str(ex))
	return pd.DataFrame(result)


	def process_products_full(self, products_data):
	logfn = os.path.join(products_data['dir'], "update_log.txt")
	try:
	self.write_log(logfn, "Products processing started")

	prods_file = products_data['path']
	products_delimiter = get_delimiter(prods_file)
	# row_products=pd.read_csv(prods_file, sep=products_delimiter, on_bad_lines='skip')
	products = pd.read_csv(prods_file, sep=products_delimiter)

	# 1)
	self.write_log(logfn, '-----------Prepare products catalogue----------')
	products = self.process_products(products.copy())

	products_data["dict_types"] = products['type'].unique().tolist()

	# 2)
	#products['brand'] = products['brand'].apply(lambda x: str(x).strip().lower())

	# 3)
	#products_data["brand_3"] = products['brand'].unique()

	self.write_log(logfn, '-----------Unwrapping brands----------')
	products["brand_unwrap"] = products["brand"]
	# 4)
	##products_data["unwrap_brands_1"] = unwrap_brands(products)
	products_data["unwrap_brands_1"] = {}

	# 5)
	products["brand_unwrap"] = products["brand"].replace(products_data["unwrap_brands_1"])

	# 6)
	#products_data["unwrap_brand_2"] = unwrap_brands(products)

	# 7)
	##products_data["unwrap_brands_2"] = unwrap_brands(products, products['brand_unwrap'].unique())
	products_data["unwrap_brands_2"] = {}

	# 8)
	products["brand_unwrap"] = products["brand_unwrap"].replace(products_data["unwrap_brands_2"])
	products["brand_unwrap"] = products.apply(lambda row: row["brand_unwrap"] if row["brand_unwrap"] != row["brand"] else '', axis=1)

	# 9)
	self.write_log(logfn, '----------Adding service categories----------')
	merge_wine_type(products, colors=self.type_wine, color_merge_dict=self.color_merge_dict)
	merge_types(products, products, type_merge_dict=self.type_dict)

	# Now we can normalize and clean brands and names (only after trimming)
	products['brand'] = products['brand'].apply(normalize_and_clean_brand)
	products['norm_name'] = products['name']

	# 11)
	self.write_log(logfn, '----------Replacing product types----------')
	products['type']=products['type'].replace(self.type_dict)

	products['new_brand']=products['brand']
	#products["name_with_brand"] = products["name"]

	products["name_wo_brand"] = products.apply(lambda row: remove_brand_from_name(row['name'], row['brand']), axis=1)
	products["name_with_brand"] = products.apply(lambda row: insert_brand_in_name(row['name'], row['brand']), axis=1)
	#products["name_wo_brand_len"] = products['name_wo_brand'].apply(lambda x: len(x))



	#products_data["dict_groups_brand_type_vol_typewine"] = prepare_groups_with_ids_ex(products, ["new_brand", 'type', 'volume', 'new_type_wine'])
	products_data["groups_brand_type_vol"] = prepare_groups_with_ids_ex(products, ["new_brand", 'type', 'volume'], "name_wo_brand")

	# Change it from type_wine to type
	products['type_l1'] = products['type'].replace(TYPES_LEVEL_1_DICT)
	products['type_l0'] = products['type_l1'].replace(TYPES_LEVEL_0_DICT)

	products_data["groups_brand_typel1_vol"] = prepare_groups_with_ids_ex(products, ['new_brand', 'type_l1', 'volume'], "name_wo_brand")
	products_data["groups_brand_typel0_vol"] = prepare_groups_with_ids_ex(products, ['new_brand', 'type_l0', 'volume'], "name_wo_brand")

	products_data["groups_typewine_type_vol"] = prepare_groups_with_ids_ex(products, ['new_type_wine', 'new_type', 'volume'], "name_with_brand")

	products_data["groups_typel0"] = prepare_groups_with_ids_ex(products, ['type_l0'], "name_with_brand")

	#products_data["dict_groups_typel1_vol"] = prepare_groups_with_ids_ex(products, ['type_l1','volume'])
	#products_data["dict_groups_typel0_vol"] = prepare_groups_with_ids_ex(products, ['type_l0','volume'])
	#products_data["dict_groups_vol"] = prepare_groups_with_ids_ex(products, ['volume'])

	products_data["df_products"] = products
	save_products_data(products_data)

	remove_old_products(products_data)

	self.write_log(logfn, "Products processing finished")
	except Exception as ex:
	self.write_log(logfn, "An error occurred: " + str(ex))
	return None

	return products_data


	def preprocess_item_brand(self, brand):
	if not isinstance(brand, str):
	return str(brand), ''

	parts = brand.split('/', 2)
	if len(parts) > 1:
	return parts[0].strip(), parts[1].strip()

	return brand.strip(), ''



	def detect_language_simple_2(self, name, reverse=False):
	if reverse:
	name = name[::-1]

	ru_count = 0
	en_count = 0

	for ch in name:
	if (ord(ch) >= ord('А') and ord(ch) <= ord('Я')) or \
	(ord(ch) >= ord('а') and ord(ch) <= ord('я')):
	ru_count += 1
	elif (ord(ch) >= ord('A') and ord(ch) <= ord('Z')) or \
	(ord(ch) >= ord('a') and ord(ch) <= ord('z')):
	en_count += 1


	if ru_count < 2 and en_count < 2:
	return 'xx'

	if ru_count > en_count:
	return 'ru'

	return 'en'


	def check_alternative_name(self, name, check_len = True, simple_lang_check=True):
	startpos = 0
	while True:
	pos = name.find("/", startpos)
	if pos == -1:
	return name, ''

	parts = [name[:pos], name[pos+1:]]
	startpos = pos + 1

	if check_len:
	if float(min(len(parts[0]), len(parts[1]))) / max(len(parts[0]), len(parts[1])) < 0.5:
	continue

	if len(parts[1]) < 3:
	return name, ''

	lang1 = self.detect_language_simple_2(parts[0], True)
	lang2 = self.detect_language_simple_2(parts[1])
	if (lang1 == 'ru' and lang2=='en') or (lang1 == 'en' and lang2=='ru'):
	return parts[0], parts[1]

	return name, ''


	def merge_multiline_name(self, name_parts):
	name = name_parts[0]
	name_2 = ""

	lang_0 = detect_language(name)
	for n in name_parts[1:]:
	if detect_language(n) == lang_0:
	name += " " + n
	else:
	name_2 += " " + n

	return name, name_2


	def process_multiline_name(self, name, check_len = True, simple_lane_check=True):
	if not name:
	return name, ''

	pos = name.find(" ##### ")
	if pos >= 0:
	parts = name.split(" ##### ")
	# Special processing for complex multiline names like;
	# "Луи Мемори До\nВыдержка: от 30 до 50 лет\nLouis Memory Deau\nAgeing: from 30 to 50 years"
	if len(parts) > 2:
	return self.merge_multiline_name(parts)

	return parts[0], parts[1]

	return name, ''



	def process_items(self, df):
	result={'id':[], 'orig_brand':[], 'brand':[], 'brand_short':[], 'brand_2':[], 'brand_2_short':[], 'alt_brands': [],
	'orig_name':[], 'name':[], 'name_wo_brand':[], 'name_with_brand':[],
	'name_2':[], 'name_2_wo_brand':[], 'name_2_with_brand':[],
	'names_wo_alt_brands': [], 'names_with_alt_brands': [], 'names_2_wo_alt_brands': [], 'names_2_with_alt_brands': [],
	'type':[], 'new_type':[], 'type_n':[],
	"type_wine":[], "new_type_wine":[], "type_wine_n":[],
	"sour":[], "volume":[], 'gb':[], "year":[], 'aging':[], 'alco':[]} #, 'orig_attrs':[],}#, 'embeddings':[]}

	volume_issues = []
	year_issues = []

	for idf, i in tqdm(zip(df['id'].values, df['attrs'].values)):

	try:
	if not isinstance(i, str) or not i:
	#print("Skipping item with id=" + str(idf) + " because of incorrect format\n")
	volume_issues.append(0)
	year_issues.append(0)
	continue

	#if not (idf == 2008546 or idf == 2007114 or idf == 2008080) :
	# continue
	#if not idf == 275213:
	# continue
	#if not idf == 173796:
	# continue

	#if idf > 1000:
	# continue

	i = json.loads(i.lower().replace("\\n", " ##### ").replace("\n", " ##### "))

	result['id'].append(idf)

	if 'brand' in i.keys():
	result['orig_brand'].append(i['brand'])
	brand, brand_2 = self.preprocess_item_brand(i['brand'])

	brand = normalize_and_clean_brand(brand)
	brand_2 = normalize_and_clean_brand(brand_2)
	else:
	result['orig_brand'].append(None)
	brand = brand_2 = None


	name = i['name']
	result['orig_name'].append(name)

	# First of all remove from name specific brands that makes collisions while name parsing and trimming
	name, specific_brand, specific_name = replace_specific_brand_and_name(name)
	if specific_brand:
	if brand and specific_brand and (brand != specific_brand):
	print("Conflict between brand and specific brand for item id=[" + str(idf) + "]")
	else:
	brand = specific_brand = normalize_and_clean_brand(specific_brand)
	brand_2 = None

	if specific_name:
	specific_name = normalize_and_clean_name(specific_name)


	# Some items contains many lines separated with new line. We can easilty process them because new line is universal separator
	# Other types of multiline names that are separated with \ or / we process later (using process_multiline_name2) after all attributes are extracted
	name, name_2 = self.process_multiline_name(name)


	type_wine = None
	sour_wine = None
	volume = None
	alcohol = None
	year = None

	# First of all let's check if it is sparkling wine
	drink_type, name = extract_spark(name, False)

	if not drink_type and ('type_wine' in i.keys()):
	drink_type, _ = extract_spark(i['type_wine'], False)

	# Next let's check any other known type
	if not drink_type and ('type' in i.keys()):
	drink_type, _ = extract_type(i['type'], False)


	if not drink_type and ('type_wine' in i.keys()):
	drink_type, _ = extract_type(i['type_wine'], False)

	# Next let's check any other known type
	if not drink_type and ('category' in i.keys()):
	drink_type, _ = extract_type(i['category'], False)

	# Special case for some brands like 'jaegermeister' which sometimes the only thing specified in name
	# so we try to detect drink type using only brand / name if it is possible
	if not drink_type and brand:
	drink_type = extract_type_by_brand_name(brand)


	if 'type_wine' in i.keys():
	type_wine, sour_wine, _ = extract_color_and_sour(i['type_wine'], remove=False)
	if drink_type is None and (type_wine or sour_wine):
	drink_type='вино'


	# Try to extract type_wine and sour from "color" attribute if exists
	if 'color' in i.keys():
	if not type_wine:
	type_wine, _ = extract_color(i['color'])
	if type_wine and drink_type is None:
	drink_type='вино'

	if not sour_wine:
	sour_wine, _ = extract_sour(i['color'])
	if sour_wine and drink_type is None:
	drink_type='вино'


	# Try to extract sour from "sugar" attribute if exists
	if 'sugar' in i.keys():
	if sour_wine is None:
	sour_wine, _ = extract_sour(i['sugar'])
	if sour_wine and drink_type is None:
	drink_type='вино'


	if 'volume' in i.keys():
	volume = i['volume']


	if 'year' in i.keys():
	year = i['year']


	#alco, _ =extract_alcohol_content(i['name'])
	#result['alco'].append(alco)
	drink_type_n, name = extract_type(name, True)

	name, alcohol_n, volume_n, aging, year_n, gb, color_n, sour_wine_n, other_n = extract_attributes_from_name(name)
	name = trim_name(name, self.types_n_others).replace(',', ' ').replace('.', ' ')

	# If alternative name is not specified, then it is time to check it
	# (after we removed all attributes that could break the logic, but before normalization in order to save language difference)
	if not name_2:
	name, name_2 = self.check_alternative_name(name)

	name = normalize_and_clean_name(name)


	if name_2:
	name_2, _, _, _, _, _, _, _, _ = extract_attributes_from_name(name_2)
	name_2 = trim_name(name_2, self.types_n_others).replace(',', ' ').replace('.', ' ')
	name_2 = normalize_and_clean_name(name_2)


	if specific_brand or specific_name:
	name = restore_specific_brand_and_name(name, specific_brand, specific_name)

	# Check that there is no conflict between values extracted from name and from item attributes

	if not drink_type:
	drink_type = drink_type_n
	#elif drink_type and drink_type_n and (drink_type != drink_type_n):
	# print("Item drink_type conflict detected for item id=[" + str(idf) + "]: " + str(drink_type) + " vs " + str(drink_type_n))

	if not alcohol:
	alcohol = alcohol_n
	#elif alcohol and alcohol_n and (alcohol != alcohol_n):
	# print("Item alcohol conflict detected for item id=[" + str(idf) + "]: " + str(alcohol) + " vs " + str(alcohol_n))

	vol_issue = 0
	if not volume:
	volume = volume_n
	elif volume and volume_n and (volume != volume_n):
	vol_issue = 1
	#print("Item volume conflict detected for item id=[" + str(idf) + "]: " + str(volume) + " vs " + str(volume_n))

	volume_issues.append(vol_issue)


	year_issue = 0
	if not year:
	year = year_n
	elif year and year_n and (str(year).strip() != str(year_n).strip()):
	#print("Item year conflict detected for item id=[" + str(idf) + "]: " + str(year) + " vs " + str(year_n))
	year_issue = 1

	year_issues.append(year_issue)


	if not type_wine:
	type_wine = color_n
	#elif type_wine and color_n and (type_wine != color_n):
	# print("Item type_wine conflict detected for item id=[" + str(idf) + "]: " + str(type_wine) + " vs " + str(color_n))


	if not sour_wine:
	sour_wine = sour_wine_n
	#elif sour_wine and sour_wine_n and (sour_wine != sour_wine_n):
	# print("Item sour_wine conflict detected for item id=[" + str(idf) + "]: " + str(sour_wine) + " vs " + str(sour_wine_n))


	# Finally fill in the data
	result['brand'].append(brand)
	result['brand_short'].append('')
	result['brand_2'].append(brand_2)
	result['brand_2_short'].append('')
	result['alt_brands'].append([])

	if name is None:
	name = name

	if name_2 is None:
	name_2 = name_2

	result['name'].append(name)
	result['name_wo_brand'].append('')
	result['name_with_brand'].append('')
	result['names_wo_alt_brands'].append([])
	result['names_with_alt_brands'].append([])


	result['name_2'].append(name_2)
	result['name_2_wo_brand'].append('')
	result['name_2_with_brand'].append('')
	result['names_2_wo_alt_brands'].append([])
	result['names_2_with_alt_brands'].append([])

	result['new_type'].append('')
	result['type_n'].append('')
	result['new_type_wine'].append('')
	result['type_wine_n'].append('')

	result['type'].append(drink_type)
	result['type_wine'].append(type_wine)
	result['sour'].append(sour_wine)

	result['aging'].append(aging)
	result['alco'].append(alcohol)
	result['gb'].append(gb)
	result['volume'].append(volume)
	result['year'].append(year)

	except Exception as ex:
	print("Error occurred while processing item id=" + str(idf), ex)

	#df = df.assign(volume_issues=volume_issues)
	#df = df.assign(year_issues=year_issues)
	#df.to_csv("c:\\!\\feed_items_issues.csv")
	#exit(0)

	return pd.DataFrame(result)


	def prcess_text(self, text):
	#text=''+origin
	#text=str(split_russian_and_english(text))
	gb=find_full_word(text, self.gbs)#get_GB(text)
	if gb is not None:
	text=text.replace(str(gb), ' ')
	#text = remove_full_words(text, self.gbs)

	alcohol, text = extract_alcohol_content(text, True)
	#if alcohol is not None:
	# alco_w_comma=alcohol.replace('.', ',')
	# text=text.replace(str(alcohol), '').replace(str(alco_w_comma), '')

	years, text = extract_years(text, True)
	if years is not None:
	text = text.replace('выдержка', ' ').replace('aging', ' ').replace('ageing', ' ')

	production_year, text = extract_production_year(text, True)

	volume_or_number, text = extract_volume_or_number(text, True)
	#if volume_or_number is not None:
	#text = text.replace(vol_text, " ")
	#volume_with_comma=str(volume_or_number).replace('.', ',')
	#text=text.replace(str(volume_or_number), '').replace(str(volume_with_comma), '')
	#text = re.sub(r'\s+\b[лЛlL].\b', ' ', text)
	#text = re.sub(r'\s+\b[лЛlL]\b', ' ', text)
	#test=clean_wine_name(text) #remove_l(text)
	#text=text.replace(str(volume_or_number)+' л', '').replace(str(volume_with_comma)+' л', '')
	# else:
	# volume_or_number=re_extract_volume(text)
	# if volume_or_number is not None:
	# volume_with_comma=volume_or_number.replace('.', ',')
	# text=text.replace(str(volume_or_number), '').replace(str(volume_with_comma), '')

	#if production_year is not None:
	# text = re.sub(r'\b' + str(production_year) + r'\s[гГ]\.*(?:\b\|$)', ' ', text)

	color, sour, text = extract_color_and_sour(text, True)

	#color=find_full_word(text, self.type_wine)
	#if color is not None:
	# if not find_word(text, SPECIFIC_NAMES):
	# text=text.replace(str(color), '')

	#sour=find_full_word(text, self.sour) #get_sour(text)
	#if sour is not None:
	# text=text.replace(str(sour), '')

	# re_extracted_volume=re_extract_volume(text)
	# if re_extracted_volume is not None:
	# volume_with_comma=re_extracted_volume.replace('.', ',')
	# text=text.replace(str(re_extracted_volume), '').replace(str(volume_with_comma), '')

	# else:
	# re_extracted_volume=re_extract_volume(str(volume_or_number))
	# volume_or_number=re_extracted_volume

	return text, alcohol, volume_or_number, years, production_year, gb, color, sour


	def process_new(self, products_data, items):

	if not "df_products" in products_data.keys():
	products_data = self.process_products_full(products_data)

	print('-----------Prepare items catalogue----------')
	items=self.process_items(items.copy())

	products = products_data["df_products"]
	products_brands = products['brand'].unique()

	items['type']=items['type'].replace(self.type_dict)

	print('----------Adding service categories----------')
	merge_wine_type(items, colors=self.type_wine, color_merge_dict=self.color_merge_dict)
	merge_types(items, products, type_merge_dict=self.type_dict, product_types=products_data["dict_types"])


	items['brand']=items['brand'].apply(lambda x: str(x).strip().lower())

	print('----------Fill brands in items----------')
	fill_brands_in_dataframe(products_brands, items)
	fill_brands_in_dataframe_2(products_brands, items)

	print('----------Brand matching----------')
	comp_list, prod_brand_list, items_brand_list=get_same_brands(products, items)
	comp_list, prod_brand_list, items_brand_list=get_same_brands(products, items)
	out_prods=list(set(prod_brand_list)-set(comp_list))
	out_items=list(set(items_brand_list)-set(comp_list))
	brand_map_improved=match_brands_improved(out_items, list(products_brands))
	items["new_brand"] = items["new_brand"].replace(brand_map_improved)


	print('----------Finding brands in names----------')
	items['new_brand']=items['new_brand'].replace('none', None)
	#i_brands=items[items['new_brand'].isna()]['name'].values
	i_brands = items['name'].values
	p_brands=[i for i in products_brands if i is not None and len(i)>3]
	#new_found_brands=check_brands_in_strings_pqdm(i_brands, p_brands, threshold=30)
	new_found_brands = check_brands_in_strings_pqdm(i_brands, p_brands)
	items.loc[items['name'].isin(new_found_brands.keys()), 'new_brand'] = items['name'].map(new_found_brands)

	print('----------Top inserts----------')
	process_unbrended_names(items, p_brands, self.prcess_text, self.short_types_list, self.grapes, self.other_words)

	items['brand']=items['brand'].replace('none', None)

	#print('----------Replacing product types----------')
	# 11)
	items['new_type'] = items['new_type'].replace(self.type_dict)

	items['type_l1'] = items['type'].replace(TYPES_LEVEL_1_DICT)
	items['type_l0'] = items['type_l1'].replace(TYPES_LEVEL_0_DICT)

	#fullpath = os.path.join("c:\\!!\\_items_with_types.pkl")
	#save_df_to_file(items, fullpath, True)
	#exit(1)

	return items, products