Spaces:

knguyen471
/

team-149-project

Running

App Files Files Community

knguyen471 commited on 18 days ago

Commit

ce42873

verified ·

1 Parent(s): 78f14ab

Upload 2 files

Browse files

Files changed (2) hide show

app.py +6 -5
main.py +11 -14

app.py CHANGED Viewed

@@ -108,6 +108,7 @@ with gr.Blocks(
             data_source = gr.Dropdown(
                 choices=["Michelin Guide", "Google", "Yelp"],
                 value="Yelp",
                 label="Data Source",
                 info="Select restaurant data source"
             )
@@ -142,10 +143,10 @@ with gr.Blocks(
     examples = [
         ["Italian pasta", "Yelp", 10],
-        ["sushi", "Michelin", 10],
         ["romantic dinner", "Google", 8],
         ["family-friendly pizza", "Yelp", 10],
-        ["best seafood", "Michelin", 10],
         ["cheap burger", "Google", 10]
     ]
@@ -172,7 +173,7 @@ if __name__ == "__main__":
     print("Opening at http://127.0.0.1:7860\n")
     # if run locally
-    # app.launch(share=False, server_name="127.0.0.1", server_port=7860, inbrowser=True)
-    # if run on HF Space
-    app.launch(ssr_mode=False)

             data_source = gr.Dropdown(
                 choices=["Michelin Guide", "Google", "Yelp"],
                 value="Yelp",
+                multiselect=True,
                 label="Data Source",
                 info="Select restaurant data source"
             )
     examples = [
         ["Italian pasta", "Yelp", 10],
+        ["sushi", "Michelin Guide", 10],
         ["romantic dinner", "Google", 8],
         ["family-friendly pizza", "Yelp", 10],
+        ["best seafood", "Michelin Guide", 10],
         ["cheap burger", "Google", 10]
     ]
     print("Opening at http://127.0.0.1:7860\n")
     # if run locally
+    app.launch(share=False, server_name="127.0.0.1", server_port=7860, inbrowser=True)
+    # # if run on HF Space
+    # app.launch(ssr_mode=False)

main.py CHANGED Viewed

@@ -12,11 +12,7 @@ from utils.semantic_similarity import Encoder
 from utils.syntactic_similarity import Parser
 from utils.tfidf_similarity import TFIDF_Vectorizer
-# Set default device to CUDA if available, otherwise CPU
-if torch.cuda.is_available():
-    torch.set_default_device("cuda")
-else:
-    torch.set_default_device("cpu")
 # Download models/data
 nltk.download('punkt')
@@ -30,9 +26,7 @@ data = pd.read_csv("data/toy_data_aggregated_embeddings.csv")
 with open("data/restaurant_by_source.json", "r") as f:
     restaurant_by_source = json.load(f)
-# Load precomputed TF-IDF features
-# restaurant_tfidf_features = np.load("data/toy_data_tfidf_features.npz")
 print("Computing TFIDF")
 tfidf_vectorizer = TFIDF_Vectorizer(load_vectorizer=False)
 restaurant_tfidf_features = tfidf_vectorizer.compute_tfidf_matrix(data["review_text_clean"])
@@ -91,7 +85,7 @@ def retrieve_candidates(query: str, n_candidates: int):
     return candidates_idx
-def rerank(candidates_idx: np.ndarray, n_rec: int = 10, data_source: str = None) -> list:
     print("Reranking...")
     # Get popularity scores for stage 1 candidates
@@ -105,15 +99,18 @@ def rerank(candidates_idx: np.ndarray, n_rec: int = 10, data_source: str = None)
     restaurant_ids = data.loc[topN_reranked_global_idx, "id"].tolist()
     # Filter to only data_source
-    print(f"[RERANK] Filtering to only source - {data_source}")
-    restaurant_by_source_set = set(restaurant_by_source[data_source])
-    restaurant_ids = [x for x in restaurant_ids if x in restaurant_by_source_set]
     print(f"[RERANK] Final recommendations: {restaurant_ids}")
     return restaurant_ids
-def get_recommendations(query: str, n_candidates: int = 100, n_rec: int = 30, data_source: str = None):
     query_clean = clean_text(query)
     candidates_idx = retrieve_candidates(query_clean, n_candidates)
-    restaurant_ids = rerank(candidates_idx, n_rec, data_source)
     return restaurant_ids

 from utils.syntactic_similarity import Parser
 from utils.tfidf_similarity import TFIDF_Vectorizer
+torch.set_default_device("cpu")
 # Download models/data
 nltk.download('punkt')
 with open("data/restaurant_by_source.json", "r") as f:
     restaurant_by_source = json.load(f)
+# Compute TFIDF features
 print("Computing TFIDF")
 tfidf_vectorizer = TFIDF_Vectorizer(load_vectorizer=False)
 restaurant_tfidf_features = tfidf_vectorizer.compute_tfidf_matrix(data["review_text_clean"])
     return candidates_idx
+def rerank(candidates_idx: np.ndarray, n_rec: int, data_sources: list = None) -> list:
     print("Reranking...")
     # Get popularity scores for stage 1 candidates
     restaurant_ids = data.loc[topN_reranked_global_idx, "id"].tolist()
     # Filter to only data_source
+    if data_sources is not None:
+        print(f"[RERANK] Filtering to only source - {data_sources}")
+        restaurant_by_source_set = set()
+        for src in data_sources:
+            restaurant_by_source_set.update(restaurant_by_source[src])
+        restaurant_ids = [x for x in restaurant_ids if x in restaurant_by_source_set]
     print(f"[RERANK] Final recommendations: {restaurant_ids}")
     return restaurant_ids
+def get_recommendations(query: str, n_candidates: int = 100, n_rec: int = 30, data_sources: list = None):
     query_clean = clean_text(query)
     candidates_idx = retrieve_candidates(query_clean, n_candidates)
+    restaurant_ids = rerank(candidates_idx, n_rec, data_sources)
     return restaurant_ids