Spaces:
Runtime error
Runtime error
Upload 4 files
Browse files- app.py +115 -0
- df_with_embeddings.pkl +3 -0
- journeygenius.py +125 -0
- requirements.txt +6 -0
app.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import torch
|
| 4 |
+
from sentence_transformers import SentenceTransformer, util
|
| 5 |
+
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
| 6 |
+
from sklearn.model_selection import train_test_split
|
| 7 |
+
|
| 8 |
+
# Load precomputed embeddings
|
| 9 |
+
df_with_embeddings = pd.read_pickle('df_with_embeddings.pkl')
|
| 10 |
+
|
| 11 |
+
# Load the SentenceTransformer model
|
| 12 |
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 13 |
+
|
| 14 |
+
def get_user_input():
|
| 15 |
+
companions = st.selectbox("Who are you traveling with?", options=["solo", "couple", "family"])
|
| 16 |
+
|
| 17 |
+
if companions == "solo":
|
| 18 |
+
num_people = 1
|
| 19 |
+
elif companions == "couple":
|
| 20 |
+
num_people = 2
|
| 21 |
+
elif companions == "family":
|
| 22 |
+
num_people = st.number_input("Enter the number of people:", min_value=1, step=1)
|
| 23 |
+
|
| 24 |
+
budget = st.number_input("Enter your budget per person:", min_value=0.0, step=0.01)
|
| 25 |
+
days_of_lodging = st.number_input("Enter the number of days of lodging:", min_value=1, step=1)
|
| 26 |
+
preferred_weather = st.selectbox("Enter preferred weather:", options=["Sunny", "Rainy", "Snowy"])
|
| 27 |
+
|
| 28 |
+
return budget, num_people, companions, days_of_lodging, preferred_weather
|
| 29 |
+
|
| 30 |
+
def encode_user_input(user_input):
|
| 31 |
+
user_description = f"budget {user_input[0]} companions {user_input[2]} days {user_input[3]} weather {user_input[4]}"
|
| 32 |
+
# Encode user description and return it as a tensor
|
| 33 |
+
user_embedding = model.encode(user_description, convert_to_tensor=True)
|
| 34 |
+
return user_embedding
|
| 35 |
+
|
| 36 |
+
def recommend_destinations(user_input, df):
|
| 37 |
+
# Get device (use GPU if available, else fallback to CPU)
|
| 38 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 39 |
+
|
| 40 |
+
# Encode user input and move to the correct device
|
| 41 |
+
user_embedding = encode_user_input(user_input).to(device)
|
| 42 |
+
|
| 43 |
+
# Compute cosine similarity between user_embedding and each row's embedding in df
|
| 44 |
+
df['similarity'] = df['embedding'].apply(lambda x: util.pytorch_cos_sim(user_embedding, x.to(device)).item())
|
| 45 |
+
|
| 46 |
+
# Sort by similarity and return the top 5 recommendations
|
| 47 |
+
recommendations = df.sort_values(by='similarity', ascending=False).drop_duplicates(subset='Primary').head(5)
|
| 48 |
+
|
| 49 |
+
return recommendations[['Primary', 'per_person_price', 'Topography', 'Temprature', 'Weather', 'Mood']]
|
| 50 |
+
|
| 51 |
+
def display_package_details(selection, df):
|
| 52 |
+
selected_row = df.loc[df['Primary'] == selection]
|
| 53 |
+
if not selected_row.empty:
|
| 54 |
+
st.write(f"*Package Name:* {selected_row['package_name'].values[0]}")
|
| 55 |
+
st.write(f"*Itinerary:* {selected_row['itinerary'].values[0]}")
|
| 56 |
+
st.write(f"*Sightseeing Places Covered:* {selected_row['sightseeing_places_covered'].values[0]}")
|
| 57 |
+
else:
|
| 58 |
+
st.write("Invalid selection. No package found.")
|
| 59 |
+
|
| 60 |
+
def evaluate_model(df, model):
|
| 61 |
+
# Get the correct device
|
| 62 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 63 |
+
|
| 64 |
+
# Split the data into train and test sets
|
| 65 |
+
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
|
| 66 |
+
|
| 67 |
+
# Encode the descriptions and move them to the appropriate device
|
| 68 |
+
train_embeddings = model.encode(train_df['description'].tolist(), convert_to_tensor=True).to(device)
|
| 69 |
+
test_embeddings = model.encode(test_df['description'].tolist(), convert_to_tensor=True).to(device)
|
| 70 |
+
|
| 71 |
+
# Function to get the most similar label from the training set for a given test embedding
|
| 72 |
+
def get_most_similar_label(test_embedding, train_embeddings, train_labels):
|
| 73 |
+
similarities = util.pytorch_cos_sim(test_embedding, train_embeddings)
|
| 74 |
+
most_similar_idx = similarities.argmax().item()
|
| 75 |
+
return train_labels[most_similar_idx]
|
| 76 |
+
|
| 77 |
+
# Predict labels for the test set
|
| 78 |
+
predicted_labels = [get_most_similar_label(embed, train_embeddings, train_df['Primary'].tolist()) for embed in test_embeddings]
|
| 79 |
+
|
| 80 |
+
# Calculate accuracy metrics
|
| 81 |
+
accuracy = accuracy_score(test_df['Primary'], predicted_labels)
|
| 82 |
+
precision = precision_score(test_df['Primary'], predicted_labels, average='weighted')
|
| 83 |
+
recall = recall_score(test_df['Primary'], predicted_labels, average='weighted')
|
| 84 |
+
f1 = f1_score(test_df['Primary'], predicted_labels, average='weighted')
|
| 85 |
+
|
| 86 |
+
return accuracy, precision, recall, f1
|
| 87 |
+
|
| 88 |
+
# Streamlit app
|
| 89 |
+
st.title("Travel Recommendation System")
|
| 90 |
+
|
| 91 |
+
st.write("Please provide your travel preferences below:")
|
| 92 |
+
|
| 93 |
+
user_input = get_user_input()
|
| 94 |
+
|
| 95 |
+
if st.button("Get Recommendations"):
|
| 96 |
+
recommendations = recommend_destinations(user_input, df_with_embeddings)
|
| 97 |
+
st.write("Top recommended destinations for you:")
|
| 98 |
+
st.session_state.recommendations = recommendations
|
| 99 |
+
st.dataframe(recommendations)
|
| 100 |
+
|
| 101 |
+
if 'recommendations' in st.session_state:
|
| 102 |
+
primary_selection = st.selectbox("Select a package to view details", options=st.session_state.recommendations['Primary'].tolist())
|
| 103 |
+
if st.button("View Details"):
|
| 104 |
+
st.session_state.selected_package = primary_selection
|
| 105 |
+
|
| 106 |
+
if 'selected_package' in st.session_state:
|
| 107 |
+
st.write(f"Details for {st.session_state.selected_package}:")
|
| 108 |
+
display_package_details(st.session_state.selected_package, df_with_embeddings)
|
| 109 |
+
|
| 110 |
+
if st.button("Evaluate Model Accuracy"):
|
| 111 |
+
accuracy, precision, recall, f1 = evaluate_model(df_with_embeddings, model)
|
| 112 |
+
st.write(f'Accuracy: {accuracy}')
|
| 113 |
+
st.write(f'Precision: {precision}')
|
| 114 |
+
st.write(f'Recall: {recall}')
|
| 115 |
+
st.write(f'F1 Score: {f1}')
|
df_with_embeddings.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e9b16bd4c44f08c5fe02dc8588f56b75cf9b020003faf1735093f80f49cf8f4b
|
| 3 |
+
size 30494159
|
journeygenius.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""JourneyGenius.ipynb
|
| 3 |
+
|
| 4 |
+
Automatically generated by Colab.
|
| 5 |
+
|
| 6 |
+
Original file is located at
|
| 7 |
+
https://colab.research.google.com/drive/1TX_o_0MEaHKPF8J0-L3FEqfqawGLP30J
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
!pip install sentence-transformers
|
| 11 |
+
from sentence_transformers import SentenceTransformer, util
|
| 12 |
+
|
| 13 |
+
import ast
|
| 14 |
+
import pandas as pd
|
| 15 |
+
import seaborn as sns
|
| 16 |
+
!pip install geopy
|
| 17 |
+
!pip install streamlit
|
| 18 |
+
|
| 19 |
+
import pandas as pd
|
| 20 |
+
from sentence_transformers import SentenceTransformer
|
| 21 |
+
|
| 22 |
+
# Load the dataset
|
| 23 |
+
file_path = '/content/ML_proj_dataset_updated (1).csv'
|
| 24 |
+
df = pd.read_csv(file_path)
|
| 25 |
+
|
| 26 |
+
# Extract relevant columns in the order to be returned
|
| 27 |
+
relevant_columns = [
|
| 28 |
+
'Primary',
|
| 29 |
+
'per_person_price',
|
| 30 |
+
'Topography',
|
| 31 |
+
'Temprature',
|
| 32 |
+
'Weather',
|
| 33 |
+
'Mood',
|
| 34 |
+
'package_name',
|
| 35 |
+
'itinerary',
|
| 36 |
+
'sightseeing_places_covered'
|
| 37 |
+
]
|
| 38 |
+
df_relevant = df[relevant_columns].dropna()
|
| 39 |
+
|
| 40 |
+
# Preprocess data
|
| 41 |
+
def preprocess_data(df):
|
| 42 |
+
df['description'] = df.apply(lambda row: f"{row['Primary']} {row['Topography']} {row['Temprature']} {row['Weather']} {row['Mood']} {row['per_person_price']}", axis=1)
|
| 43 |
+
return df
|
| 44 |
+
|
| 45 |
+
df_relevant = preprocess_data(df_relevant)
|
| 46 |
+
|
| 47 |
+
# Encode data
|
| 48 |
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 49 |
+
df_relevant['embedding'] = df_relevant['description'].apply(lambda x: model.encode(x, convert_to_tensor=True))
|
| 50 |
+
|
| 51 |
+
# Save embeddings to file
|
| 52 |
+
df_relevant.to_pickle('/content/df_with_embeddings.pkl')
|
| 53 |
+
|
| 54 |
+
import pandas as pd
|
| 55 |
+
from sentence_transformers import SentenceTransformer, util
|
| 56 |
+
|
| 57 |
+
# Load precomputed embeddings
|
| 58 |
+
df_with_embeddings = pd.read_pickle('/content/df_with_embeddings.pkl')
|
| 59 |
+
|
| 60 |
+
# User input function
|
| 61 |
+
def get_user_input():
|
| 62 |
+
companions = input("Who are you traveling with (solo, couple, family): ").strip().lower()
|
| 63 |
+
|
| 64 |
+
if companions == "solo":
|
| 65 |
+
num_people = 1
|
| 66 |
+
elif companions == "couple":
|
| 67 |
+
num_people = 2
|
| 68 |
+
elif companions == "family":
|
| 69 |
+
num_people = int(input("Enter the number of people: "))
|
| 70 |
+
else:
|
| 71 |
+
print("Invalid input for companions. Please enter 'solo', 'couple', or 'family'.")
|
| 72 |
+
return get_user_input() # Recursively ask for input again
|
| 73 |
+
|
| 74 |
+
budget = float(input("Enter your budget per person: "))
|
| 75 |
+
days_of_lodging = int(input("Enter the number of days of lodging: "))
|
| 76 |
+
preferred_weather = input("Enter preferred weather (Sunny, Rainy, Snowy): ").strip().capitalize()
|
| 77 |
+
|
| 78 |
+
return budget, num_people, companions, days_of_lodging, preferred_weather
|
| 79 |
+
|
| 80 |
+
# Encode user input
|
| 81 |
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 82 |
+
|
| 83 |
+
def encode_user_input(user_input):
|
| 84 |
+
user_description = f"budget {user_input[0]} companions {user_input[2]} days {user_input[3]} weather {user_input[4]}"
|
| 85 |
+
return model.encode(user_description, convert_to_tensor=True)
|
| 86 |
+
|
| 87 |
+
# Recommend destinations
|
| 88 |
+
def recommend_destinations(user_input, df):
|
| 89 |
+
user_embedding = encode_user_input(user_input)
|
| 90 |
+
df['similarity'] = df['embedding'].apply(lambda x: util.pytorch_cos_sim(user_embedding, x).item())
|
| 91 |
+
|
| 92 |
+
# Sort by similarity and drop duplicates based on 'Primary' column
|
| 93 |
+
recommendations = df.sort_values(by='similarity', ascending=False).drop_duplicates(subset='Primary').head(5)
|
| 94 |
+
|
| 95 |
+
return recommendations[['Primary', 'per_person_price', 'Topography', 'Temprature', 'Weather', 'Mood']]
|
| 96 |
+
|
| 97 |
+
# Display selected package details
|
| 98 |
+
def display_package_details(selection, df):
|
| 99 |
+
selected_row = df.loc[df['Primary'] == selection]
|
| 100 |
+
if not selected_row.empty:
|
| 101 |
+
print("\nSelected Package Details:")
|
| 102 |
+
print(f"Package Name: {selected_row['package_name'].values[0]}")
|
| 103 |
+
print(f"Itinerary: {selected_row['itinerary'].values[0]}")
|
| 104 |
+
print(f"Sightseeing Places Covered: {selected_row['sightseeing_places_covered'].values[0]}")
|
| 105 |
+
else:
|
| 106 |
+
print("Invalid selection. No package found.")
|
| 107 |
+
|
| 108 |
+
# Main function to run the recommendation system
|
| 109 |
+
def main():
|
| 110 |
+
user_input = get_user_input()
|
| 111 |
+
recommendations = recommend_destinations(user_input, df_with_embeddings)
|
| 112 |
+
print("Top recommended destinations for you:")
|
| 113 |
+
print(recommendations)
|
| 114 |
+
|
| 115 |
+
# Let the user select a recommendation
|
| 116 |
+
selected_primary = input("\nEnter the Primary name of the package you want to view details for: ").strip()
|
| 117 |
+
display_package_details(selected_primary, df_with_embeddings)
|
| 118 |
+
|
| 119 |
+
# Run the main function
|
| 120 |
+
if __name__ == "__main__":
|
| 121 |
+
main()
|
| 122 |
+
|
| 123 |
+
if __name__ == "__main__":
|
| 124 |
+
main()
|
| 125 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
pandas
|
| 3 |
+
sentence-transformers
|
| 4 |
+
scikit-learn
|
| 5 |
+
gdown
|
| 6 |
+
torch
|