Spaces:

knguyen471
/

team-149-project

Sleeping

knguyen471 commited on 19 days ago

Commit

812c65f

verified ·

1 Parent(s): 65c08a9

Upload main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import torch
 import nltk
 import benepar
@@ -24,9 +25,9 @@ benepar.download('benepar_en3_large')
 # Load dataset
 data = pd.read_csv("data/toy_data_aggregated_embeddings.csv")
-# Group by source
-restaurant_by_source = {k: v["id"].tolist() for k, v in data.groupby("source")}
-restaurant_by_source
 # Load precomputed TF-IDF features
 restaurant_tfidf_features = np.load("data/toy_data_tfidf_features.npz")
@@ -77,7 +78,7 @@ def retrieve_candidates(query: str, n_candidates: int):
     return candidates_idx
-def rerank(candidates_idx: np.ndarray, n_rec: int = 10, ) -> list:
     # Get popularity scores for stage 1 candidates
     rerank_scores = data.loc[candidates_idx, "pop_score"].values
@@ -91,8 +92,8 @@ def rerank(candidates_idx: np.ndarray, n_rec: int = 10, ) -> list:
     return restaurant_ids
-def get_recommendations(query: str, n_candidates: int = 100, n_rec: int = 30):
     query_clean = clean_text(query)
-    candidates_idx = retrieve_candidates(query_clean, n_candidates)
-    restaurant_ids = rerank(candidates_idx, n_rec)
     return restaurant_ids

+import json
 import torch
 import nltk
 import benepar
 # Load dataset
 data = pd.read_csv("data/toy_data_aggregated_embeddings.csv")
+# Load restaurant_by_source
+with open("data/restaurant_by_source.json", "r") as f:
+    restaurant_by_source = json.load(f)
 # Load precomputed TF-IDF features
 restaurant_tfidf_features = np.load("data/toy_data_tfidf_features.npz")
     return candidates_idx
+def rerank(candidates_idx: np.ndarray, n_rec: int = 10, data_source: str) -> list:
     # Get popularity scores for stage 1 candidates
     rerank_scores = data.loc[candidates_idx, "pop_score"].values
     return restaurant_ids
+def get_recommendations(query: str, n_candidates: int = 100, n_rec: int = 30, data_source: str = None):
     query_clean = clean_text(query)
+    candidates_idx = retrieve_candidates(query_clean, n_candidates)
+    restaurant_ids = rerank(candidates_idx, n_rec, data_source)
     return restaurant_ids