victor7246 commited on
Commit
0ad0d08
·
verified ·
1 Parent(s): 6ee26e1

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +7 -1
utils.py CHANGED
@@ -39,7 +39,7 @@ class EmbeddingsSearch:
39
 
40
  self.model = emb_model
41
  self.metadata_df = metadata_df
42
- self.embeddings = self.model.encode(self.metadata_df['desc'].tolist())
43
 
44
  def __call__(self, text: str, topk: int = 5):
45
  q_emb = self.model.encode([text])
@@ -101,6 +101,12 @@ metadata_df2 = xls.parse('Table explanations',header=1).dropna(axis=0,how='all')
101
  metadata_df2.columns = ['table','nickname','metadata']
102
  metadata_df2.table = metadata_df2.table.apply(lambda x: x.strip())
103
  metadata_df = pd.merge(metadata_df, metadata_df2, how='inner')
 
 
 
 
 
 
104
  #metadata_df.loc[metadata_df.table == 'History_All_Skus_Availability', 'table'] = 'TBL_History_All_Skus_Availability'
105
  #metadata_df.loc[metadata_df.table == 'daily_inventory', 'table'] = 'TBL_DAILY_INVENTORY'
106
  #metadata_df.loc[metadata_df.table == 'HISTORY_OpenOrderShortage', 'table'] = 'TBL_HISTORY_OpenOrderShortage'
 
39
 
40
  self.model = emb_model
41
  self.metadata_df = metadata_df
42
+ self.embeddings = self.model.encode(self.metadata_df['final_metadata'].tolist())
43
 
44
  def __call__(self, text: str, topk: int = 5):
45
  q_emb = self.model.encode([text])
 
101
  metadata_df2.columns = ['table','nickname','metadata']
102
  metadata_df2.table = metadata_df2.table.apply(lambda x: x.strip())
103
  metadata_df = pd.merge(metadata_df, metadata_df2, how='inner')
104
+ table_desc = pd.read_csv("table_desc.csv", lineterminator='\n')
105
+ table_desc.columns = ['table','desc']
106
+ metadata_df = metadata_df.drop(['desc'], axis=1)
107
+ metadata_df = pd.merge(metadata_df, table_desc, how='inner')
108
+ metadata_df['final_metadata'] = metadata_df.apply(lambda x: x["desc"] + "\n" + x['metadata'], axis=1)
109
+
110
  #metadata_df.loc[metadata_df.table == 'History_All_Skus_Availability', 'table'] = 'TBL_History_All_Skus_Availability'
111
  #metadata_df.loc[metadata_df.table == 'daily_inventory', 'table'] = 'TBL_DAILY_INVENTORY'
112
  #metadata_df.loc[metadata_df.table == 'HISTORY_OpenOrderShortage', 'table'] = 'TBL_HISTORY_OpenOrderShortage'