Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| from sentence_transformers import SentenceTransformer | |
| import gradio as gr | |
| import spacy | |
| import subprocess | |
| # Run the spacy model download command | |
| # try: | |
| # Try to load the model to check if it's already installed | |
| # nlp = spacy.load("en_core_web_trf") | |
| # except OSError: | |
| # If the model is not found, download it | |
| subprocess.run(["python", "-m", "spacy", "download", "en_core_web_trf"]) | |
| nlp = spacy.load("en_core_web_trf") | |
| model = SentenceTransformer("nomic-ai/nomic-embed-text-v1", trust_remote_code=True) | |
| df_new = pd.read_csv('last_df.csv') | |
| df_new['country'] = df_new['country'].replace('Türkiye', 'Turkey') | |
| # | |
| # | |
| # Function to extract city name from the query | |
| def get_city_name(query): | |
| text_query = nlp(query) | |
| for city in text_query.ents: | |
| if city.label_ == "GPE": | |
| return city.text.lower() | |
| return None | |
| # Function to filter DataFrame by location | |
| def filter_by_loc(query): | |
| city_name = get_city_name(query) | |
| if city_name in df_new['locality'].str.lower().unique(): | |
| filtered_df = df_new[df_new['locality'].str.lower() == city_name.lower()] | |
| return filtered_df | |
| else: | |
| return df_new | |
| import torch.nn as nn | |
| import torch | |
| import ast | |
| # Function to calculate similarity score | |
| def get_similarity_score(row, query_embedding): | |
| similarity = nn.CosineSimilarity(dim=0) # Use dim=0 for 1D tensors | |
| # Safely evaluate string representations of lists | |
| rating_value_embedding = torch.tensor(ast.literal_eval(row['rating_value_embedding'])) | |
| hotel_combined_embedding = torch.tensor(ast.literal_eval(row['hotel_combined_embedding'])) | |
| review_embedding = torch.tensor(ast.literal_eval(row['review_embedding'])) | |
| sim1 = similarity(rating_value_embedding, query_embedding) | |
| sim2 = similarity(hotel_combined_embedding, query_embedding) | |
| sim3 = similarity(review_embedding, query_embedding) | |
| return sim1.item() + sim2.item() + sim3.item() | |
| # Main function to process the query and return results | |
| def process_query(query): | |
| query_embedding = model.encode(query) | |
| # Filter DataFrame by location | |
| filtered_data = filter_by_loc(query) | |
| # Convert query_embedding to a tensor if it is not already | |
| query_embedding_tensor = torch.tensor(query_embedding) | |
| # Apply the similarity function to the filtered DataFrame | |
| filtered_data['similarity_score'] = filtered_data.apply(lambda row: get_similarity_score(row, query_embedding_tensor), axis=1) | |
| # df_new['similarity_score'] = df_new.apply(lambda row: get_similarity_score(row, query_embedding_tensor), axis=1) | |
| top_similar = filtered_data.sort_values('similarity_score', ascending=False).head(1) | |
| hotel_name = top_similar['hotel_name'].values[0] | |
| hotel_description = top_similar['hotel_description'].values[0] | |
| hotel_rate = top_similar['rate'].values[0] | |
| hotel_price_range = top_similar['price_range'].values[0] | |
| hotel_review = top_similar['review_title'].values[0] | |
| hotel_city = top_similar['locality'].values[0] | |
| hotel_country = top_similar['country'].values[0] | |
| # Format the output | |
| result = "Here's the most similar hotel we found:\n" | |
| result += "-" * 30 + "\n" | |
| result += f"Hotel Name: {hotel_name}\n" | |
| result += f"City: {hotel_city}\n" | |
| result += f"Country: {hotel_country}\n" | |
| result += f"Star Rating: {hotel_rate}\n" | |
| result += f"Price Range: {hotel_price_range}\n" | |
| return result | |
| ui = gr.Interface( | |
| fn=process_query, | |
| inputs=gr.Textbox(label="Query", placeholder="Enter your query"), | |
| outputs="text", | |
| title="Hotel Similarity Finder", | |
| description="Enter a query to find similar hotels." | |
| ) | |
| ui.launch() | |