Spaces:
Sleeping
Sleeping
Upload 22 files
Browse files- .gitignore +3 -0
- api.py +29 -14
- app.py +1 -0
- preprocess/preprocess.py +4 -2
- processor/matching.py +101 -48
- processor/processor.py +3 -3
- search/matching_judge.py +133 -0
- ui/gradio_ui.py +174 -169
.gitignore
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.pyc
|
| 2 |
+
.idea/*
|
| 3 |
+
_data/*
|
api.py
CHANGED
|
@@ -10,6 +10,13 @@ import uvicorn
|
|
| 10 |
from pydantic import BaseModel
|
| 11 |
import pandas as pd
|
| 12 |
from tmp.utils import update_products_csv
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
processor=Processor(LONG_TYPES_LIST,
|
| 15 |
SHORT_TYPES_LIST,
|
|
@@ -38,6 +45,7 @@ class match_request(BaseModel):
|
|
| 38 |
|
| 39 |
def get_data_dir():
|
| 40 |
return "/home/user/app/_data/"
|
|
|
|
| 41 |
|
| 42 |
def get_products_dir():
|
| 43 |
return os.path.join(get_data_dir(), "products")
|
|
@@ -94,6 +102,8 @@ async def upload_products_csv(file: UploadFile, overwrite_existing: int):
|
|
| 94 |
fullfn = os.path.join(datadir, "products.csv")
|
| 95 |
update_products_csv(tempfile, fullfn, overwrite_existing)
|
| 96 |
|
|
|
|
|
|
|
| 97 |
except Exception:
|
| 98 |
raise HTTPException(status_code=500, detail='Something went wrong')
|
| 99 |
finally:
|
|
@@ -102,8 +112,8 @@ async def upload_products_csv(file: UploadFile, overwrite_existing: int):
|
|
| 102 |
return {"message": f"Successfully uploaded {file.filename}"}
|
| 103 |
|
| 104 |
|
| 105 |
-
@app.post("/api/upload_items_csv")
|
| 106 |
-
|
| 107 |
try:
|
| 108 |
itemsdir = get_items_dir()
|
| 109 |
|
|
@@ -112,14 +122,16 @@ async def upload_items_csv(file: UploadFile = File(...)):
|
|
| 112 |
|
| 113 |
contents = file.file.read()
|
| 114 |
|
| 115 |
-
|
|
|
|
| 116 |
f.write(contents)
|
| 117 |
except Exception:
|
| 118 |
raise HTTPException(status_code=500, detail='Something went wrong')
|
| 119 |
finally:
|
| 120 |
file.file.close()
|
| 121 |
|
| 122 |
-
return {"message": f"Successfully uploaded {file.filename}"}
|
|
|
|
| 123 |
|
| 124 |
|
| 125 |
@app.get("/api/get_items_csv")
|
|
@@ -136,32 +148,35 @@ async def get_items_csv():
|
|
| 136 |
|
| 137 |
|
| 138 |
@app.post("/api/match")
|
| 139 |
-
async def match(
|
| 140 |
prods_file = os.path.join(get_products_dir(), "products.csv")
|
| 141 |
if not os.path.isfile(prods_file):
|
| 142 |
return {"Status": "Error", "ErrorDesc": "File 'Products.csv' not found"}
|
| 143 |
|
| 144 |
-
|
| 145 |
-
|
|
|
|
| 146 |
|
| 147 |
-
if not
|
| 148 |
-
|
| 149 |
|
| 150 |
-
items_fn = os.path.join(get_items_dir(), r.items)
|
| 151 |
-
if not os.path.isfile(items_fn):
|
| 152 |
-
|
| 153 |
|
| 154 |
row_items = pd.read_csv(items_fn, sep='\t')
|
|
|
|
|
|
|
| 155 |
row_products = pd.read_csv(prods_file, sep='\t', on_bad_lines='skip')
|
| 156 |
|
| 157 |
|
| 158 |
-
df, items, products = processor.process(row_products, row_items,
|
| 159 |
|
| 160 |
results_dir = get_results_dir()
|
| 161 |
if not os.path.exists(results_dir):
|
| 162 |
os.makedirs(results_dir)
|
| 163 |
|
| 164 |
-
output_csv = "m1-" + str(
|
| 165 |
df.to_csv(os.path.join(results_dir, output_csv), sep='\t', index=False)
|
| 166 |
|
| 167 |
return {"Status": "Success", "result_file" : output_csv}
|
|
|
|
| 10 |
from pydantic import BaseModel
|
| 11 |
import pandas as pd
|
| 12 |
from tmp.utils import update_products_csv
|
| 13 |
+
from search.matching_judge import compare_matching_with_manual
|
| 14 |
+
|
| 15 |
+
'''compare_matching_with_manual("C:\\Projects (Mediterra)\\!TechLead\\WineMatching\\Data (New4)\\products.csv",
|
| 16 |
+
"C:\\Projects (Mediterra)\\!TechLead\\WineMatching\\Data (New4)\\ws-items-for-test.csv",
|
| 17 |
+
"C:\\Projects (Mediterra)\\!TechLead\\WineMatching\m1-50-250325-133739.csv",
|
| 18 |
+
"C:\\Projects (Mediterra)\\!TechLead\\WineMatching\\Data (New4)\\matching-20250318.csv")'''
|
| 19 |
+
|
| 20 |
|
| 21 |
processor=Processor(LONG_TYPES_LIST,
|
| 22 |
SHORT_TYPES_LIST,
|
|
|
|
| 45 |
|
| 46 |
def get_data_dir():
|
| 47 |
return "/home/user/app/_data/"
|
| 48 |
+
#return "_data"
|
| 49 |
|
| 50 |
def get_products_dir():
|
| 51 |
return os.path.join(get_data_dir(), "products")
|
|
|
|
| 102 |
fullfn = os.path.join(datadir, "products.csv")
|
| 103 |
update_products_csv(tempfile, fullfn, overwrite_existing)
|
| 104 |
|
| 105 |
+
os.remove(tempfile)
|
| 106 |
+
|
| 107 |
except Exception:
|
| 108 |
raise HTTPException(status_code=500, detail='Something went wrong')
|
| 109 |
finally:
|
|
|
|
| 112 |
return {"message": f"Successfully uploaded {file.filename}"}
|
| 113 |
|
| 114 |
|
| 115 |
+
#@app.post("/api/upload_items_csv")
|
| 116 |
+
def upload_items_csv(file: UploadFile):
|
| 117 |
try:
|
| 118 |
itemsdir = get_items_dir()
|
| 119 |
|
|
|
|
| 122 |
|
| 123 |
contents = file.file.read()
|
| 124 |
|
| 125 |
+
fullfn = os.path.join(itemsdir, file.filename)
|
| 126 |
+
with open(fullfn, 'wb') as f:
|
| 127 |
f.write(contents)
|
| 128 |
except Exception:
|
| 129 |
raise HTTPException(status_code=500, detail='Something went wrong')
|
| 130 |
finally:
|
| 131 |
file.file.close()
|
| 132 |
|
| 133 |
+
#return {"message": f"Successfully uploaded {file.filename}"}
|
| 134 |
+
return fullfn
|
| 135 |
|
| 136 |
|
| 137 |
@app.get("/api/get_items_csv")
|
|
|
|
| 148 |
|
| 149 |
|
| 150 |
@app.post("/api/match")
|
| 151 |
+
async def match(items_file: UploadFile, threshold: int, items_first: int):
|
| 152 |
prods_file = os.path.join(get_products_dir(), "products.csv")
|
| 153 |
if not os.path.isfile(prods_file):
|
| 154 |
return {"Status": "Error", "ErrorDesc": "File 'Products.csv' not found"}
|
| 155 |
|
| 156 |
+
items_fn = upload_items_csv(items_file)
|
| 157 |
+
#if len(r.items) == 0:
|
| 158 |
+
# return {"Status": "Error", "ErrorDesc": "Items file not specified"}
|
| 159 |
|
| 160 |
+
if not threshold:
|
| 161 |
+
threshold = 50
|
| 162 |
|
| 163 |
+
#items_fn = os.path.join(get_items_dir(), r.items)
|
| 164 |
+
#if not os.path.isfile(items_fn):
|
| 165 |
+
# return {"Status": "Error", "ErrorDesc": "Items file not found"}
|
| 166 |
|
| 167 |
row_items = pd.read_csv(items_fn, sep='\t')
|
| 168 |
+
os.remove(items_fn)
|
| 169 |
+
|
| 170 |
row_products = pd.read_csv(prods_file, sep='\t', on_bad_lines='skip')
|
| 171 |
|
| 172 |
|
| 173 |
+
df, items, products = processor.process(row_products, row_items, items_first, threshold)
|
| 174 |
|
| 175 |
results_dir = get_results_dir()
|
| 176 |
if not os.path.exists(results_dir):
|
| 177 |
os.makedirs(results_dir)
|
| 178 |
|
| 179 |
+
output_csv = "m1-" + str(threshold) + "-" + datetime.datetime.now().strftime('%y%m%d-%H%M%S') + ".csv"
|
| 180 |
df.to_csv(os.path.join(results_dir, output_csv), sep='\t', index=False)
|
| 181 |
|
| 182 |
return {"Status": "Success", "result_file" : output_csv}
|
app.py
CHANGED
|
@@ -18,6 +18,7 @@ processor=Processor(LONG_TYPES_LIST,
|
|
| 18 |
searcher=Searcher()
|
| 19 |
|
| 20 |
ui=GradioUI(processor, searcher, "/home/user/app/_data/")
|
|
|
|
| 21 |
ui.run_ui()
|
| 22 |
|
| 23 |
|
|
|
|
| 18 |
searcher=Searcher()
|
| 19 |
|
| 20 |
ui=GradioUI(processor, searcher, "/home/user/app/_data/")
|
| 21 |
+
#ui=GradioUI(processor, searcher, "_data")
|
| 22 |
ui.run_ui()
|
| 23 |
|
| 24 |
|
preprocess/preprocess.py
CHANGED
|
@@ -31,7 +31,7 @@ class Preprocessor():
|
|
| 31 |
|
| 32 |
|
| 33 |
def process_items(self, df):
|
| 34 |
-
result={'id':[], 'brand':[], 'name':[], 'type':[], "type_wine":[], "volume":[], "year":[], 'alco':[]}#, 'embeddings':[]}
|
| 35 |
#counter=0
|
| 36 |
for idf, i in tqdm(zip(df['id'].values, df['attrs'].values)):
|
| 37 |
|
|
@@ -42,6 +42,7 @@ class Preprocessor():
|
|
| 42 |
result['brand'].append(i['brand'])
|
| 43 |
else: result['brand'].append(None)
|
| 44 |
result['name'].append(i['name'])
|
|
|
|
| 45 |
drink_type=get_type(i, self.long_types_list)
|
| 46 |
if drink_type is None:
|
| 47 |
drink_type=check_spark(i)
|
|
@@ -77,7 +78,7 @@ class Preprocessor():
|
|
| 77 |
|
| 78 |
|
| 79 |
def process_products(self, products):
|
| 80 |
-
result={'id':[], 'brand':[], 'name':[], 'type':[], "type_wine":[], "volume":[], "year":[], 'alco':[]}#, 'embeddings':[]}
|
| 81 |
for idx, row in tqdm(products.iterrows()):
|
| 82 |
try:
|
| 83 |
result['id'].append(row['id'])
|
|
@@ -85,6 +86,7 @@ class Preprocessor():
|
|
| 85 |
result['type_wine'].append(row['category'])
|
| 86 |
result['type'].append(row['product_type'])
|
| 87 |
result['name'].append(row['name_long'])
|
|
|
|
| 88 |
vol=extract_volume_or_number(row['name'])
|
| 89 |
result['volume'].append(vol)
|
| 90 |
#year=extract_production_year(row['name'])
|
|
|
|
| 31 |
|
| 32 |
|
| 33 |
def process_items(self, df):
|
| 34 |
+
result={'id':[], 'brand':[], 'name':[], 'fullname':[], 'type':[], "type_wine":[], "volume":[], "year":[], 'alco':[]}#, 'embeddings':[]}
|
| 35 |
#counter=0
|
| 36 |
for idf, i in tqdm(zip(df['id'].values, df['attrs'].values)):
|
| 37 |
|
|
|
|
| 42 |
result['brand'].append(i['brand'])
|
| 43 |
else: result['brand'].append(None)
|
| 44 |
result['name'].append(i['name'])
|
| 45 |
+
result['fullname'].append(i['name'])
|
| 46 |
drink_type=get_type(i, self.long_types_list)
|
| 47 |
if drink_type is None:
|
| 48 |
drink_type=check_spark(i)
|
|
|
|
| 78 |
|
| 79 |
|
| 80 |
def process_products(self, products):
|
| 81 |
+
result={'id':[], 'brand':[], 'name':[], 'fullname':[], 'type':[], "type_wine":[], "volume":[], "year":[], 'alco':[]}#, 'embeddings':[]}
|
| 82 |
for idx, row in tqdm(products.iterrows()):
|
| 83 |
try:
|
| 84 |
result['id'].append(row['id'])
|
|
|
|
| 86 |
result['type_wine'].append(row['category'])
|
| 87 |
result['type'].append(row['product_type'])
|
| 88 |
result['name'].append(row['name_long'])
|
| 89 |
+
result['fullname'].append(row['name_long'])
|
| 90 |
vol=extract_volume_or_number(row['name'])
|
| 91 |
result['volume'].append(vol)
|
| 92 |
#year=extract_production_year(row['name'])
|
processor/matching.py
CHANGED
|
@@ -30,7 +30,7 @@ def prepare_groups_with_ids(items_df):
|
|
| 30 |
items_df['norm_name'] = items_df['name'].apply(normalize_name)
|
| 31 |
|
| 32 |
grouped = items_df.groupby(['new_brand', 'type', 'volume', 'new_type_wine', 'sour']).apply(
|
| 33 |
-
lambda x: list(zip(x['id'], x['name'], x['norm_name'], x['volume'], x['new_type_wine'], x['sour'], x['year']))
|
| 34 |
).to_dict()
|
| 35 |
return grouped
|
| 36 |
|
|
@@ -46,11 +46,38 @@ def prepare_groups_by_alternative_keys(items_df):
|
|
| 46 |
items_df['norm_name'] = items_df['name'].apply(normalize_name)
|
| 47 |
|
| 48 |
grouped = items_df.groupby(['new_type_wine', 'new_type', 'volume', 'sour']).apply(
|
| 49 |
-
lambda x: list(zip(x['id'], x['new_brand'], x['name'], x['norm_name'], x['volume'], x['new_type_wine'], x['sour'], x['year']))
|
| 50 |
).to_dict()
|
| 51 |
return grouped
|
| 52 |
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
"""
|
| 55 |
Поиск совпадений с сохранением id найденных итемов, используя заранее подготовленные
|
| 56 |
нормализованные группы.
|
|
@@ -71,6 +98,9 @@ def new_find_matches_with_ids(products_df, items_groups, items_df, name_threshol
|
|
| 71 |
results = []
|
| 72 |
no_match_products = [] # Список для хранения продуктов без совпадения в исходной группе
|
| 73 |
|
|
|
|
|
|
|
|
|
|
| 74 |
# Первый проход: поиск по группам (brand, type, volume, new_type_wine, sour)
|
| 75 |
for idx, product in tqdm(products_df.iterrows(), total=len(products_df)):
|
| 76 |
product_brand = product['brand']
|
|
@@ -84,18 +114,21 @@ def new_find_matches_with_ids(products_df, items_groups, items_df, name_threshol
|
|
| 84 |
items_data = items_groups.get(key, [])
|
| 85 |
if items_data:
|
| 86 |
# Распаковываем: id, оригинальное имя, нормализованное имя, volume, new_type_wine, sour
|
| 87 |
-
items_ids, items_names, items_norm_names, items_volumes, item_type_wine, items_sour, items_year = zip(*items_data)
|
| 88 |
else:
|
| 89 |
-
items_ids, items_names, items_norm_names, items_volumes, item_type_wine, items_sour, items_year = ([], [], [], [], [], [],[])
|
| 90 |
|
| 91 |
norm_product_name = normalize_name(product_name)
|
| 92 |
matches = process.extract(
|
| 93 |
-
norm_product_name, list(items_norm_names), scorer=fuzz.ratio, score_cutoff=name_threshold
|
| 94 |
)
|
|
|
|
| 95 |
matched_items = [
|
| 96 |
{
|
| 97 |
'item_id': items_ids[idx_candidate],
|
| 98 |
-
'
|
|
|
|
|
|
|
| 99 |
'score': score,
|
| 100 |
'volume': items_volumes[idx_candidate],
|
| 101 |
'color': item_type_wine[idx_candidate],
|
|
@@ -105,54 +138,74 @@ def new_find_matches_with_ids(products_df, items_groups, items_df, name_threshol
|
|
| 105 |
for match, score, idx_candidate in matches
|
| 106 |
]
|
| 107 |
|
| 108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
no_match_products.append((idx, product))
|
| 110 |
|
|
|
|
|
|
|
| 111 |
results.append({
|
| 112 |
'product_id': product['id'],
|
|
|
|
| 113 |
'matched_items': matched_items,
|
| 114 |
-
|
|
|
|
| 115 |
})
|
| 116 |
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
|
| 157 |
results_df = pd.DataFrame(results)
|
| 158 |
merged_df = products_df.merge(results_df, left_on='id', right_on='product_id').drop(columns=['product_id'])
|
|
|
|
| 30 |
items_df['norm_name'] = items_df['name'].apply(normalize_name)
|
| 31 |
|
| 32 |
grouped = items_df.groupby(['new_brand', 'type', 'volume', 'new_type_wine', 'sour']).apply(
|
| 33 |
+
lambda x: list(zip(x['id'], x['name'], x['fullname'], x['norm_name'], x['volume'], x['new_type_wine'], x['sour'], x['year']))
|
| 34 |
).to_dict()
|
| 35 |
return grouped
|
| 36 |
|
|
|
|
| 46 |
items_df['norm_name'] = items_df['name'].apply(normalize_name)
|
| 47 |
|
| 48 |
grouped = items_df.groupby(['new_type_wine', 'new_type', 'volume', 'sour']).apply(
|
| 49 |
+
lambda x: list(zip(x['id'], x['new_brand'], x['name'], x['fullname'], x['norm_name'], x['volume'], x['new_type_wine'], x['sour'], x['year']))
|
| 50 |
).to_dict()
|
| 51 |
return grouped
|
| 52 |
|
| 53 |
+
|
| 54 |
+
def order_by_best_year(matched_items, year):
|
| 55 |
+
best_matched_items = []
|
| 56 |
+
max_year_matched_items = []
|
| 57 |
+
other_matched_items = []
|
| 58 |
+
max_year = 0
|
| 59 |
+
|
| 60 |
+
for mi in matched_items:
|
| 61 |
+
# Если в оригинале указан год, то ищем точное совпадение, иначе сортируем по году в обратном порядке
|
| 62 |
+
if year and (int(year) != 0) and (mi['year'] == year):
|
| 63 |
+
best_matched_items.append(mi)
|
| 64 |
+
elif mi['year'] and int(mi['year']) != 0:
|
| 65 |
+
if int(mi['year']) > max_year:
|
| 66 |
+
max_year_matched_items = [mi]
|
| 67 |
+
max_year = int(mi['year'])
|
| 68 |
+
elif int(mi['year']) > max_year:
|
| 69 |
+
max_year_matched_items.append(mi)
|
| 70 |
+
else:
|
| 71 |
+
other_matched_items.append(mi)
|
| 72 |
+
else:
|
| 73 |
+
other_matched_items.append(mi)
|
| 74 |
+
|
| 75 |
+
best_matched_items.extend(max_year_matched_items)
|
| 76 |
+
best_matched_items.extend(other_matched_items)
|
| 77 |
+
return best_matched_items
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def new_find_matches_with_ids(products_df, items_groups, items_df, name_threshold=85, include_alternatives=True):
|
| 81 |
"""
|
| 82 |
Поиск совпадений с сохранением id найденных итемов, используя заранее подготовленные
|
| 83 |
нормализованные группы.
|
|
|
|
| 98 |
results = []
|
| 99 |
no_match_products = [] # Список для хранения продуктов без совпадения в исходной группе
|
| 100 |
|
| 101 |
+
if name_threshold < 50:
|
| 102 |
+
name_threshold = 50
|
| 103 |
+
|
| 104 |
# Первый проход: поиск по группам (brand, type, volume, new_type_wine, sour)
|
| 105 |
for idx, product in tqdm(products_df.iterrows(), total=len(products_df)):
|
| 106 |
product_brand = product['brand']
|
|
|
|
| 114 |
items_data = items_groups.get(key, [])
|
| 115 |
if items_data:
|
| 116 |
# Распаковываем: id, оригинальное имя, нормализованное имя, volume, new_type_wine, sour
|
| 117 |
+
items_ids, items_names, items_full_names, items_norm_names, items_volumes, item_type_wine, items_sour, items_year = zip(*items_data)
|
| 118 |
else:
|
| 119 |
+
items_ids, items_names,items_full_names, items_norm_names, items_volumes, item_type_wine, items_sour, items_year = ([], [], [], [], [],[], [], [])
|
| 120 |
|
| 121 |
norm_product_name = normalize_name(product_name)
|
| 122 |
matches = process.extract(
|
| 123 |
+
norm_product_name, list(items_norm_names), scorer=fuzz.ratio, score_cutoff=name_threshold, limit=20
|
| 124 |
)
|
| 125 |
+
|
| 126 |
matched_items = [
|
| 127 |
{
|
| 128 |
'item_id': items_ids[idx_candidate],
|
| 129 |
+
'brand': product_brand,
|
| 130 |
+
'item_name': items_full_names[idx_candidate],
|
| 131 |
+
#'item_name': items_names[idx_candidate],
|
| 132 |
'score': score,
|
| 133 |
'volume': items_volumes[idx_candidate],
|
| 134 |
'color': item_type_wine[idx_candidate],
|
|
|
|
| 138 |
for match, score, idx_candidate in matches
|
| 139 |
]
|
| 140 |
|
| 141 |
+
|
| 142 |
+
if matched_items:
|
| 143 |
+
matched_items = order_by_best_year(matched_items, product['year'])
|
| 144 |
+
matched_items = matched_items[:5]
|
| 145 |
+
else:
|
| 146 |
no_match_products.append((idx, product))
|
| 147 |
|
| 148 |
+
|
| 149 |
+
|
| 150 |
results.append({
|
| 151 |
'product_id': product['id'],
|
| 152 |
+
#"matched_top_id": top_matched_id,
|
| 153 |
'matched_items': matched_items,
|
| 154 |
+
#"alternative_top_id": "",
|
| 155 |
+
#'alternative': [] # Заполняется во втором проходе
|
| 156 |
})
|
| 157 |
|
| 158 |
+
if include_alternatives:
|
| 159 |
+
# Подготовка альтернативной группировки по (new_type_wine, new_type, volume, sour)
|
| 160 |
+
groups_by_alternative_keys = prepare_groups_by_alternative_keys(items_df)
|
| 161 |
+
|
| 162 |
+
# Второй проход: для продуктов без совпадений ищем по альтернативным группам
|
| 163 |
+
for idx, product in tqdm(no_match_products):
|
| 164 |
+
product_brand = product['brand']
|
| 165 |
+
product_type_wine = product['new_type_wine']
|
| 166 |
+
product_type = product['new_type']
|
| 167 |
+
product_volume = product['volume']
|
| 168 |
+
product_name = product['name']
|
| 169 |
+
product_sour = product['sour']
|
| 170 |
+
|
| 171 |
+
alt_key = (product_type_wine, product_type, product_volume, product_sour)
|
| 172 |
+
type_items = groups_by_alternative_keys.get(alt_key, [])
|
| 173 |
+
# Фильтруем, исключая итемы с исходным брендом
|
| 174 |
+
filtered_items = [item for item in type_items if item[1] != product_brand]
|
| 175 |
+
if filtered_items:
|
| 176 |
+
alt_ids, alt_brands, alt_names, alt_full_names, alt_norm_names, alt_volumes, alt_type_wine, alt_sour, alt_year = zip(*filtered_items)
|
| 177 |
+
else:
|
| 178 |
+
alt_ids, alt_brands, alt_names, alt_full_names, alt_norm_names, alt_volumes, alt_type_wine, alt_sour, alt_year = ([], [], [], [], [], [], [],[], [])
|
| 179 |
+
|
| 180 |
+
norm_product_name = normalize_name(product_name)
|
| 181 |
+
alt_matches = process.extract(
|
| 182 |
+
norm_product_name, list(alt_norm_names), scorer=fuzz.ratio, score_cutoff=name_threshold
|
| 183 |
+
)
|
| 184 |
+
alt_matched_items = [
|
| 185 |
+
{
|
| 186 |
+
'item_id': alt_ids[idx_candidate],
|
| 187 |
+
'brand': alt_brands[idx_candidate],
|
| 188 |
+
#'item_name': alt_names[idx_candidate],
|
| 189 |
+
'item_name': alt_full_names[idx_candidate],
|
| 190 |
+
'score': score / 2,
|
| 191 |
+
'volume': alt_volumes[idx_candidate],
|
| 192 |
+
'color': alt_type_wine[idx_candidate],
|
| 193 |
+
'sour': alt_sour[idx_candidate],
|
| 194 |
+
'year': alt_year[idx_candidate],
|
| 195 |
+
}
|
| 196 |
+
for match, score, idx_candidate in alt_matches
|
| 197 |
+
]
|
| 198 |
+
|
| 199 |
+
alt_matched_items = order_by_best_year(alt_matched_items, product['year'])
|
| 200 |
+
alt_matched_items = alt_matched_items[:5]
|
| 201 |
+
|
| 202 |
+
results[idx]['matched_items'].extend(alt_matched_items)
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
#if alt_matched_items:
|
| 206 |
+
# results[idx]['alternative_top_id'] = alt_matched_items[0]["item_id"]
|
| 207 |
+
|
| 208 |
+
#results[idx]['alternative'] = alt_matched_items
|
| 209 |
|
| 210 |
results_df = pd.DataFrame(results)
|
| 211 |
merged_df = products_df.merge(results_df, left_on='id', right_on='product_id').drop(columns=['product_id'])
|
processor/processor.py
CHANGED
|
@@ -11,7 +11,7 @@ class Processor():
|
|
| 11 |
type_wine, gbs, colors_for_trim, grapes, other_words,
|
| 12 |
sour_merge_dict, type_merge_dict, color_merge_dict)
|
| 13 |
|
| 14 |
-
def process(self, products, items, is_items_first=False, th=65):
|
| 15 |
items, products=self.preprocessor.process(products, items)
|
| 16 |
|
| 17 |
print('-----*-----Matching-----*-----')
|
|
@@ -20,9 +20,9 @@ class Processor():
|
|
| 20 |
products['new_brand']=products['brand']
|
| 21 |
items['brand']=items['new_brand']
|
| 22 |
products_groups = prepare_groups_with_ids(products)
|
| 23 |
-
res=new_find_matches_with_ids(items, products_groups, products, name_threshold=th)
|
| 24 |
else:
|
| 25 |
items_groups = prepare_groups_with_ids(items)
|
| 26 |
-
res=new_find_matches_with_ids(products, items_groups, items, name_threshold=th)
|
| 27 |
|
| 28 |
return res.drop(['type','type_wine','alco','gb'], axis=1), items, products
|
|
|
|
| 11 |
type_wine, gbs, colors_for_trim, grapes, other_words,
|
| 12 |
sour_merge_dict, type_merge_dict, color_merge_dict)
|
| 13 |
|
| 14 |
+
def process(self, products, items, is_items_first=False, th=65, include_alternatives=True):
|
| 15 |
items, products=self.preprocessor.process(products, items)
|
| 16 |
|
| 17 |
print('-----*-----Matching-----*-----')
|
|
|
|
| 20 |
products['new_brand']=products['brand']
|
| 21 |
items['brand']=items['new_brand']
|
| 22 |
products_groups = prepare_groups_with_ids(products)
|
| 23 |
+
res=new_find_matches_with_ids(items, products_groups, products, name_threshold=th, include_alternatives=include_alternatives)
|
| 24 |
else:
|
| 25 |
items_groups = prepare_groups_with_ids(items)
|
| 26 |
+
res=new_find_matches_with_ids(products, items_groups, items, name_threshold=th, include_alternatives=include_alternatives)
|
| 27 |
|
| 28 |
return res.drop(['type','type_wine','alco','gb'], axis=1), items, products
|
search/matching_judge.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import ast
|
| 4 |
+
import csv
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def compare_matching_with_manual(products_file, items_file, match_result_file, manual_result_file):
|
| 9 |
+
'''with open(products_file, mode="r", encoding="utf-8", newline='') as csvfile:
|
| 10 |
+
csvreader = csv.reader(csvfile, dialect="excel-tab")
|
| 11 |
+
for row in csvreader:
|
| 12 |
+
print(', '.join(row))'''
|
| 13 |
+
|
| 14 |
+
products_df = pd.read_csv(products_file, sep="\t")
|
| 15 |
+
items_df = pd.read_csv(items_file, sep=";")
|
| 16 |
+
match_df = pd.read_csv(match_result_file, sep="\t")
|
| 17 |
+
manual_df = pd.read_csv(manual_result_file, sep="\t")
|
| 18 |
+
|
| 19 |
+
results = {
|
| 20 |
+
"item_count" : int(items_df.count()[0]),
|
| 21 |
+
"product_count" : int(products_df.count()[0]),
|
| 22 |
+
"match_count" : int(match_df.count()[0]),
|
| 23 |
+
"manual_count" : int(manual_df.count()[0]),
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
items_to_manual = {}
|
| 28 |
+
for index, row in items_df.iterrows():
|
| 29 |
+
x = manual_df[manual_df['item_id'] == row["id"]]['state']
|
| 30 |
+
if (len(x) > 0) and (x.values[0] == 1):
|
| 31 |
+
p = products_df[products_df["id"] == manual_df.iloc[int(x.index[0])]["product_id"]]
|
| 32 |
+
items_to_manual[row["id"]] = int(manual_df.iloc[int(x.index[0])]["product_id"])
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
'''items_to_auto = {}
|
| 36 |
+
for index, row in match_df.iterrows():
|
| 37 |
+
if row["matched_top_id"] > 0:
|
| 38 |
+
p = products_df[products_df["id"] == int(row["matched_top_id"])]
|
| 39 |
+
items_to_auto[row["id"]] = int(row["matched_top_id"])
|
| 40 |
+
|
| 41 |
+
results["items_to_manual_count"] = len(items_to_manual)
|
| 42 |
+
results["items_to_auto_count"] = len(items_to_auto)'''
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
result_list = []
|
| 46 |
+
|
| 47 |
+
for index, row in items_df.iterrows():
|
| 48 |
+
result_data = {}
|
| 49 |
+
|
| 50 |
+
result_data["id"] = row["id"]
|
| 51 |
+
result_data["match_side"] = "no_match"
|
| 52 |
+
result_data["auto_score"] = ""
|
| 53 |
+
result_data["manual_score"] = ""
|
| 54 |
+
result_data["discuss"] = ""
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
auto_match = match_df[match_df['id'] == row["id"]]["matched_items"].values[0]
|
| 58 |
+
'''if len(auto_match) > 2:
|
| 59 |
+
if auto_match.find("\\'") >= 0:
|
| 60 |
+
auto_match = auto_match
|
| 61 |
+
|
| 62 |
+
auto_match = auto_match.replace("\\'", "$$$$$$").replace(": None}", ": \"\"}").replace("'", '"').replace("$$$$$$", "\\'")
|
| 63 |
+
|
| 64 |
+
auto_match = json.loads(auto_match)'''
|
| 65 |
+
|
| 66 |
+
manual_match = None
|
| 67 |
+
manual = manual_df[manual_df['item_id'] == row["id"]]['state']
|
| 68 |
+
if (len(manual) > 0) and (manual.values[0] == 1):
|
| 69 |
+
p = products_df[products_df["id"] == manual_df.iloc[int(manual.index[0])]["product_id"]]
|
| 70 |
+
|
| 71 |
+
if len(p.values) > 0:
|
| 72 |
+
manual_match = p
|
| 73 |
+
else:
|
| 74 |
+
print("Manually matched product id=" + str(manual_df.iloc[int(manual.index[0])]["product_id"]) + " for item=" + str(row["id"]) + " not found")
|
| 75 |
+
|
| 76 |
+
if (auto_match is not None) and len(auto_match) > 2 and (manual_match is not None):
|
| 77 |
+
result_data["match_side"] = "both"
|
| 78 |
+
|
| 79 |
+
manual_id = int(manual_match["id"].values[0])
|
| 80 |
+
auto_match_ns = auto_match.replace(" ", "")
|
| 81 |
+
i1 = auto_match_ns.find("'item_id':")
|
| 82 |
+
i2 = auto_match_ns.find("'item_id':" + str(manual_id) + ",")
|
| 83 |
+
|
| 84 |
+
if i1 == i2:
|
| 85 |
+
result_data["auto_score"] = 1
|
| 86 |
+
result_data["manual_score"] = 1
|
| 87 |
+
elif i2 >= 0:
|
| 88 |
+
result_data["auto_score"] = 0.5
|
| 89 |
+
result_data["manual_score"] = 0.5
|
| 90 |
+
elif (auto_match is not None) and len(auto_match) > 2:
|
| 91 |
+
result_data["match_side"] = "only_auto"
|
| 92 |
+
elif manual_match is not None:
|
| 93 |
+
result_data["match_side"] = "only_manual"
|
| 94 |
+
|
| 95 |
+
result_data["discuss"] = ""
|
| 96 |
+
result_data["item"] = row["attrs"]
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
result_data["auto_match"] = auto_match
|
| 100 |
+
|
| 101 |
+
manual_string = ""
|
| 102 |
+
if (manual_match is not None):
|
| 103 |
+
manual_string = '{' + \
|
| 104 |
+
'"id": ' + str(manual_match["id"].values[0]) + ',' + \
|
| 105 |
+
'"brand": "' + str(manual_match["brand"].values[0]) + '",' + \
|
| 106 |
+
'"name": "' + str(manual_match["name_long"].values[0]) + '",' + \
|
| 107 |
+
'"volume": ' + str(manual_match["volume"].values[0]) + '",' + \
|
| 108 |
+
'"year": ' + str(manual_match["year"].values[0]) + '"}'
|
| 109 |
+
|
| 110 |
+
result_data["manual_match"] = manual_string
|
| 111 |
+
result_list.append(result_data)
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
results_df = pd.DataFrame(result_list)
|
| 115 |
+
results_df.to_csv("C:\\Projects (Mediterra)\\!TechLead\\WineMatching\\Data (New4)\\mjudge_new.csv")
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
'''common_match = {}
|
| 120 |
+
full_match = {}
|
| 121 |
+
for a_match in items_to_auto:
|
| 122 |
+
if a_match in items_to_manual:
|
| 123 |
+
common_match[a_match] = [items_to_auto[a_match], items_to_manual[a_match]]
|
| 124 |
+
if items_to_auto[a_match] == items_to_manual[a_match]:
|
| 125 |
+
full_match[a_match] = items_to_auto[a_match]'''
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
#results["items_to_manual"] = len(items_to_manual)
|
| 129 |
+
#results["items_to_auto"] = len(items_to_auto
|
| 130 |
+
print(results)
|
| 131 |
+
|
| 132 |
+
return results
|
| 133 |
+
|
ui/gradio_ui.py
CHANGED
|
@@ -1,170 +1,175 @@
|
|
| 1 |
-
from argparse import ArgumentError
|
| 2 |
-
|
| 3 |
-
import gradio as gr
|
| 4 |
-
import pandas as pd
|
| 5 |
-
from preprocess.utils.common.utils import get_delimiter
|
| 6 |
-
from tmp.utils import update_products_csv #remover,
|
| 7 |
-
import os
|
| 8 |
-
import datetime, time
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
class GradioUI():
|
| 12 |
-
|
| 13 |
-
def __init__(self, processor, searcher, data_path):
|
| 14 |
-
self.processor=processor
|
| 15 |
-
self.searcher=searcher
|
| 16 |
-
self.data_path = data_path
|
| 17 |
-
|
| 18 |
-
def get_data_dir(self):
|
| 19 |
-
return self.data_path
|
| 20 |
-
|
| 21 |
-
def get_products_dir(self):
|
| 22 |
-
return os.path.join(self.get_data_dir(), "products")
|
| 23 |
-
|
| 24 |
-
def get_items_dir(self):
|
| 25 |
-
return os.path.join(self.get_data_dir(), "items")
|
| 26 |
-
|
| 27 |
-
def get_results_dir(self):
|
| 28 |
-
return os.path.join(self.get_data_dir(), "results")
|
| 29 |
-
|
| 30 |
-
def get_products_file_date(self):
|
| 31 |
-
fullfn = os.path.join(self.data_path, "products", "products.csv")
|
| 32 |
-
if not os.path.isfile(fullfn):
|
| 33 |
-
return "Файл Products не найден"
|
| 34 |
-
|
| 35 |
-
stinfo = os.stat(fullfn)
|
| 36 |
-
return time.ctime(stinfo.st_mtime)
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
def upload_products_file(self, prods_file, overwrite_existing):
|
| 40 |
-
try:
|
| 41 |
-
if not os.path.exists(self.get_products_dir()):
|
| 42 |
-
os.makedirs(self.get_products_dir())
|
| 43 |
-
|
| 44 |
-
fullfn = os.path.join(self.get_products_dir(), "products.csv")
|
| 45 |
-
|
| 46 |
-
if prods_file != None:
|
| 47 |
-
update_products_csv(prods_file, fullfn, overwrite_existing)
|
| 48 |
-
|
| 49 |
-
gr.Info("Файл Products успешно загружен")
|
| 50 |
-
except Exception as ex:
|
| 51 |
-
raise gr.Error("An error occurred 💥!" + "\n\n" + str(ex), duration=5)
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
def process_items(self, items_file, is_items_first, threshold): #, q_id):
|
| 55 |
-
try:
|
| 56 |
-
prods_file = os.path.join(self.get_products_dir(), "products.csv")
|
| 57 |
-
if not os.path.isfile(prods_file):
|
| 58 |
-
raise Exception("Файл Products не найден")
|
| 59 |
-
|
| 60 |
-
if items_file != None:
|
| 61 |
-
items_delimiter=get_delimiter(items_file)
|
| 62 |
-
print('items delimiter: '+items_delimiter)
|
| 63 |
-
row_items=pd.read_csv(items_file, sep=items_delimiter, on_bad_lines='skip')
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
#
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
self.
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
#
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
)
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
file_items
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
)
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
)
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
demo.launch()
|
|
|
|
| 1 |
+
from argparse import ArgumentError
|
| 2 |
+
|
| 3 |
+
import gradio as gr
|
| 4 |
+
import pandas as pd
|
| 5 |
+
from preprocess.utils.common.utils import get_delimiter
|
| 6 |
+
from tmp.utils import update_products_csv #remover,
|
| 7 |
+
import os
|
| 8 |
+
import datetime, time
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class GradioUI():
|
| 12 |
+
|
| 13 |
+
def __init__(self, processor, searcher, data_path):
|
| 14 |
+
self.processor=processor
|
| 15 |
+
self.searcher=searcher
|
| 16 |
+
self.data_path = data_path
|
| 17 |
+
|
| 18 |
+
def get_data_dir(self):
|
| 19 |
+
return self.data_path
|
| 20 |
+
|
| 21 |
+
def get_products_dir(self):
|
| 22 |
+
return os.path.join(self.get_data_dir(), "products")
|
| 23 |
+
|
| 24 |
+
def get_items_dir(self):
|
| 25 |
+
return os.path.join(self.get_data_dir(), "items")
|
| 26 |
+
|
| 27 |
+
def get_results_dir(self):
|
| 28 |
+
return os.path.join(self.get_data_dir(), "results")
|
| 29 |
+
|
| 30 |
+
def get_products_file_date(self):
|
| 31 |
+
fullfn = os.path.join(self.data_path, "products", "products.csv")
|
| 32 |
+
if not os.path.isfile(fullfn):
|
| 33 |
+
return "Файл Products не найден"
|
| 34 |
+
|
| 35 |
+
stinfo = os.stat(fullfn)
|
| 36 |
+
return time.ctime(stinfo.st_mtime)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def upload_products_file(self, prods_file, overwrite_existing):
|
| 40 |
+
try:
|
| 41 |
+
if not os.path.exists(self.get_products_dir()):
|
| 42 |
+
os.makedirs(self.get_products_dir())
|
| 43 |
+
|
| 44 |
+
fullfn = os.path.join(self.get_products_dir(), "products.csv")
|
| 45 |
+
|
| 46 |
+
if prods_file != None:
|
| 47 |
+
update_products_csv(prods_file, fullfn, overwrite_existing)
|
| 48 |
+
|
| 49 |
+
gr.Info("Файл Products успешно загружен")
|
| 50 |
+
except Exception as ex:
|
| 51 |
+
raise gr.Error("An error occurred 💥!" + "\n\n" + str(ex), duration=5)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def process_items(self, items_file, is_items_first, threshold, include_alternatives): #, q_id):
|
| 55 |
+
try:
|
| 56 |
+
prods_file = os.path.join(self.get_products_dir(), "products.csv")
|
| 57 |
+
if not os.path.isfile(prods_file):
|
| 58 |
+
raise Exception("Файл Products не найден")
|
| 59 |
+
|
| 60 |
+
if items_file != None:
|
| 61 |
+
items_delimiter=get_delimiter(items_file)
|
| 62 |
+
print('items delimiter: '+items_delimiter)
|
| 63 |
+
#row_items=pd.read_csv(items_file, sep=items_delimiter, on_bad_lines='skip')
|
| 64 |
+
row_items = pd.read_csv(items_file, sep=items_delimiter)
|
| 65 |
+
if not 'attrs' in row_items.columns.values:
|
| 66 |
+
raise Exception("Uploaded Items CSV does not seem to be valid")
|
| 67 |
+
|
| 68 |
+
products_delimiter=get_delimiter(prods_file)
|
| 69 |
+
print('products delimiter: '+products_delimiter)
|
| 70 |
+
#row_products=pd.read_csv(prods_file, sep=products_delimiter, on_bad_lines='skip')
|
| 71 |
+
row_products = pd.read_csv(prods_file, sep=products_delimiter)
|
| 72 |
+
|
| 73 |
+
# if q_id in row_products['id'].unique():
|
| 74 |
+
# row_products=row_products[row_products['id']==q_id]
|
| 75 |
+
|
| 76 |
+
#print("product id: " + str(q_id))
|
| 77 |
+
|
| 78 |
+
df, items, products = self.processor.process(row_products, row_items, is_items_first, threshold, include_alternatives)
|
| 79 |
+
|
| 80 |
+
self.searcher.set_df(df.copy())
|
| 81 |
+
#with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
|
| 82 |
+
# output_csv = tmp.name
|
| 83 |
+
results_path = self.get_results_dir()
|
| 84 |
+
if not os.path.exists(results_path):
|
| 85 |
+
os.makedirs(results_path)
|
| 86 |
+
|
| 87 |
+
output_csv = "m1-" + str(threshold) + "-" + datetime.datetime.now().strftime('%y%m%d-%H%M%S') + ".csv"
|
| 88 |
+
output_csv = os.path.join(results_path, output_csv)
|
| 89 |
+
df.to_csv(output_csv, sep='\t', index=False)
|
| 90 |
+
return output_csv
|
| 91 |
+
except Exception as ex:
|
| 92 |
+
raise gr.Error("An error occurred 💥!"+"\n\n"+str(ex), duration=5)
|
| 93 |
+
|
| 94 |
+
def on_page_load(self, r: gr.Request):
|
| 95 |
+
m_time = self.get_products_file_date()
|
| 96 |
+
return [f"Дата последнего обновления файла Products: {m_time}", f"Дата последнего обновления файла Products: {m_time}"]
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def run_ui(self):
|
| 100 |
+
with gr.Blocks() as demo:
|
| 101 |
+
tabs = gr.Tabs()
|
| 102 |
+
with tabs:
|
| 103 |
+
|
| 104 |
+
# with gr.Row():
|
| 105 |
+
# file_input1 = gr.File(label="Products", type="filepath", file_types=[".csv"])
|
| 106 |
+
# process_button = gr.Button("Обновить")
|
| 107 |
+
|
| 108 |
+
# Вкладка для обработки CSV файлов
|
| 109 |
+
with gr.TabItem("Обработка каталога поставщика"):
|
| 110 |
+
gr.Markdown("## Обработка каталога поставщика")
|
| 111 |
+
|
| 112 |
+
m_time = self.get_products_file_date()
|
| 113 |
+
prod_file_info2 = gr.Markdown(f"Дата последнего обновления файла Products: {m_time}")
|
| 114 |
+
with gr.Row():
|
| 115 |
+
#file_input1 = gr.File(label="Products", type="filepath", file_types=[".csv"])
|
| 116 |
+
file_items = gr.File(label="Items", type="filepath", file_types=[".csv"])
|
| 117 |
+
#search_number = gr.Number(label="Введите ID продукта", value=0, precision=0)
|
| 118 |
+
with gr.Row():
|
| 119 |
+
toggle_input = gr.Checkbox(label="Инвертировать поиск", value=True)
|
| 120 |
+
toggle_alternative = gr.Checkbox(label="Включать в результаты альтернативные варианты", value=True)
|
| 121 |
+
|
| 122 |
+
threshold_input = gr.Slider(minimum=0, maximum=100, step=1, label="Threshold", value=50)
|
| 123 |
+
process_button = gr.Button("Загрузить файл с каталогом и сравнить")
|
| 124 |
+
output_file = gr.File(label="Скачать результат (CSV)")
|
| 125 |
+
process_button.click(
|
| 126 |
+
fn=self.process_items,
|
| 127 |
+
inputs=[file_items, toggle_input, threshold_input, toggle_alternative], #, search_number],
|
| 128 |
+
outputs=output_file
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
with gr.TabItem("Загрузка файла Products"):
|
| 132 |
+
prod_file_info1 = gr.Markdown("## Загрузка файла Products")
|
| 133 |
+
with gr.Row():
|
| 134 |
+
file_input1 = gr.File(label="Products", type="filepath", file_types=[".csv"])
|
| 135 |
+
with gr.Row():
|
| 136 |
+
toggle_input = gr.Checkbox(label="Перезаписать существующий файл Product", value=False)
|
| 137 |
+
upload_button = gr.Button("Загрузить файл")
|
| 138 |
+
upload_button.click(
|
| 139 |
+
fn=self.upload_products_file,
|
| 140 |
+
inputs=[file_input1, toggle_input],
|
| 141 |
+
#outputs=output_file
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
# Вкладка для поиска
|
| 146 |
+
with gr.TabItem("Поиск в обработанном csv"):
|
| 147 |
+
gr.Markdown("## Поиск")
|
| 148 |
+
search_number = gr.Number(label="Введите ID продукта", value=0, precision=0)
|
| 149 |
+
search_button = gr.Button("Найти")
|
| 150 |
+
search_table = gr.Dataframe(label="Результаты поиска")
|
| 151 |
+
search_button.click(
|
| 152 |
+
fn=self.searcher.search,
|
| 153 |
+
inputs=[search_number],
|
| 154 |
+
outputs=search_table
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
with gr.TabItem("Загрузка результат и поиск в нем"):
|
| 158 |
+
gr.Markdown("## Поиск")
|
| 159 |
+
with gr.Row():
|
| 160 |
+
input_path = gr.File(label="Matching result", type="filepath", file_types=[".csv"])
|
| 161 |
+
search_number = gr.Number(label="Введите ID продукта", value=0, precision=0)
|
| 162 |
+
search_button = gr.Button("Найти")
|
| 163 |
+
search_table = gr.Dataframe(label="Результаты поиска")
|
| 164 |
+
search_button.click(
|
| 165 |
+
fn=self.searcher.search_in_uploaded_file,
|
| 166 |
+
inputs=[input_path, search_number],
|
| 167 |
+
outputs=search_table
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
#with gr.TabItem("Удалить сохраненные продукты"):
|
| 171 |
+
# del_button = gr.Button("Удалить")
|
| 172 |
+
# process_button.click(fn=remover)
|
| 173 |
+
|
| 174 |
+
demo.load(fn=self.on_page_load, inputs=None, outputs=[prod_file_info1, prod_file_info2])
|
| 175 |
demo.launch()
|