Spaces:

AzizTh
/

Hotel-Semantic-Search

Sleeping

App Files Files Community

Hotel-Semantic-Search / app.py

AzizTh

Update app.py

5aa6ce6 verified over 1 year ago

raw

history blame contribute delete

3.67 kB

	import pandas as pd
	from sentence_transformers import SentenceTransformer
	import gradio as gr
	import spacy

	import subprocess

	# Run the spacy model download command

	# try:
	# Try to load the model to check if it's already installed
	# nlp = spacy.load("en_core_web_trf")
	# except OSError:
	# If the model is not found, download it
	subprocess.run(["python", "-m", "spacy", "download", "en_core_web_trf"])
	nlp = spacy.load("en_core_web_trf")

	model = SentenceTransformer("nomic-ai/nomic-embed-text-v1", trust_remote_code=True)

	df_new = pd.read_csv('last_df.csv')


	df_new['country'] = df_new['country'].replace('Türkiye', 'Turkey')
	#
	#

	# Function to extract city name from the query
	def get_city_name(query):
	text_query = nlp(query)
	for city in text_query.ents:
	if city.label_ == "GPE":
	return city.text.lower()
	return None

	# Function to filter DataFrame by location
	def filter_by_loc(query):
	city_name = get_city_name(query)
	if city_name in df_new['locality'].str.lower().unique():
	filtered_df = df_new[df_new['locality'].str.lower() == city_name.lower()]
	return filtered_df
	else:
	return df_new



	import torch.nn as nn
	import torch
	import ast



	# Function to calculate similarity score
	def get_similarity_score(row, query_embedding):
	similarity = nn.CosineSimilarity(dim=0) # Use dim=0 for 1D tensors

	# Safely evaluate string representations of lists
	rating_value_embedding = torch.tensor(ast.literal_eval(row['rating_value_embedding']))
	hotel_combined_embedding = torch.tensor(ast.literal_eval(row['hotel_combined_embedding']))
	review_embedding = torch.tensor(ast.literal_eval(row['review_embedding']))

	sim1 = similarity(rating_value_embedding, query_embedding)
	sim2 = similarity(hotel_combined_embedding, query_embedding)
	sim3 = similarity(review_embedding, query_embedding)

	return sim1.item() + sim2.item() + sim3.item()

	# Main function to process the query and return results
	def process_query(query):

	query_embedding = model.encode(query)

	# Filter DataFrame by location
	filtered_data = filter_by_loc(query)

	# Convert query_embedding to a tensor if it is not already
	query_embedding_tensor = torch.tensor(query_embedding)

	# Apply the similarity function to the filtered DataFrame
	filtered_data['similarity_score'] = filtered_data.apply(lambda row: get_similarity_score(row, query_embedding_tensor), axis=1)

	# df_new['similarity_score'] = df_new.apply(lambda row: get_similarity_score(row, query_embedding_tensor), axis=1)


	top_similar = filtered_data.sort_values('similarity_score', ascending=False).head(1)


	hotel_name = top_similar['hotel_name'].values[0]
	hotel_description = top_similar['hotel_description'].values[0]
	hotel_rate = top_similar['rate'].values[0]
	hotel_price_range = top_similar['price_range'].values[0]
	hotel_review = top_similar['review_title'].values[0]
	hotel_city = top_similar['locality'].values[0]
	hotel_country = top_similar['country'].values[0]

	# Format the output

	result = "Here's the most similar hotel we found:\n"
	result += "-" * 30 + "\n"
	result += f"Hotel Name: {hotel_name}\n"
	result += f"City: {hotel_city}\n"
	result += f"Country: {hotel_country}\n"
	result += f"Star Rating: {hotel_rate}\n"
	result += f"Price Range: {hotel_price_range}\n"

	return result





	ui = gr.Interface(
	fn=process_query,
	inputs=gr.Textbox(label="Query", placeholder="Enter your query"),
	outputs="text",
	title="Hotel Similarity Finder",
	description="Enter a query to find similar hotels."
	)

	ui.launch()