Spaces:
Sleeping
Sleeping
Commit ·
12a1907
1
Parent(s): 217fa17
Update func.py
Browse files
func.py
CHANGED
|
@@ -12,7 +12,8 @@ device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cp
|
|
| 12 |
|
| 13 |
def filter_by_ganre(df: pd.DataFrame, ganre_list: list):
|
| 14 |
filtered_df = df[df['ganres'].apply(lambda x: any(g in ganre_list for g in(x)))]
|
| 15 |
-
|
|
|
|
| 16 |
|
| 17 |
def mean_pooling(model_output, attention_mask):
|
| 18 |
token_embeddings = model_output['last_hidden_state']
|
|
@@ -21,7 +22,7 @@ def mean_pooling(model_output, attention_mask):
|
|
| 21 |
sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
|
| 22 |
return sum_embeddings / sum_mask
|
| 23 |
|
| 24 |
-
def recommendation(
|
| 25 |
token_user_text = tokenizer(user_text, return_tensors='pt', padding='max_length', truncation=True, max_length=512)
|
| 26 |
user_embeddings = torch.Tensor().to(device)
|
| 27 |
model.to(device)
|
|
@@ -31,7 +32,7 @@ def recommendation(df: pd.DataFrame, embeddings:np.array, user_text: str, n=10):
|
|
| 31 |
outputs = model(**batch)
|
| 32 |
user_embeddings = torch.cat([user_embeddings, mean_pooling(outputs, batch['attention_mask'])])
|
| 33 |
user_embeddings = user_embeddings.cpu().numpy()
|
| 34 |
-
cosine_similarities = cosine_similarity(embeddings, user_embeddings.reshape(1, -1))
|
| 35 |
df_res = pd.DataFrame(cosine_similarities.ravel(), columns=['cos_sim']).sort_values('cos_sim', ascending=False)
|
| 36 |
dict_topn = df_res.iloc[:n, :].cos_sim.to_dict()
|
| 37 |
return dict_topn
|
|
|
|
| 12 |
|
| 13 |
def filter_by_ganre(df: pd.DataFrame, ganre_list: list):
|
| 14 |
filtered_df = df[df['ganres'].apply(lambda x: any(g in ganre_list for g in(x)))]
|
| 15 |
+
filt_ind = filtered_df.index.to_list()
|
| 16 |
+
return filt_ind
|
| 17 |
|
| 18 |
def mean_pooling(model_output, attention_mask):
|
| 19 |
token_embeddings = model_output['last_hidden_state']
|
|
|
|
| 22 |
sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
|
| 23 |
return sum_embeddings / sum_mask
|
| 24 |
|
| 25 |
+
def recommendation(filt_ind: list, embeddings: np.array, user_text: str, n=10):
|
| 26 |
token_user_text = tokenizer(user_text, return_tensors='pt', padding='max_length', truncation=True, max_length=512)
|
| 27 |
user_embeddings = torch.Tensor().to(device)
|
| 28 |
model.to(device)
|
|
|
|
| 32 |
outputs = model(**batch)
|
| 33 |
user_embeddings = torch.cat([user_embeddings, mean_pooling(outputs, batch['attention_mask'])])
|
| 34 |
user_embeddings = user_embeddings.cpu().numpy()
|
| 35 |
+
cosine_similarities = cosine_similarity(embeddings[filt_ind], user_embeddings.reshape(1, -1))
|
| 36 |
df_res = pd.DataFrame(cosine_similarities.ravel(), columns=['cos_sim']).sort_values('cos_sim', ascending=False)
|
| 37 |
dict_topn = df_res.iloc[:n, :].cos_sim.to_dict()
|
| 38 |
return dict_topn
|