Harsh1306 commited on
Commit
5840396
·
verified ·
1 Parent(s): 9ca6525

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +115 -0
  2. df_with_embeddings.pkl +3 -0
  3. journeygenius.py +125 -0
  4. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import torch
4
+ from sentence_transformers import SentenceTransformer, util
5
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
6
+ from sklearn.model_selection import train_test_split
7
+
8
+ # Load precomputed embeddings
9
+ df_with_embeddings = pd.read_pickle('df_with_embeddings.pkl')
10
+
11
+ # Load the SentenceTransformer model
12
+ model = SentenceTransformer('all-MiniLM-L6-v2')
13
+
14
+ def get_user_input():
15
+ companions = st.selectbox("Who are you traveling with?", options=["solo", "couple", "family"])
16
+
17
+ if companions == "solo":
18
+ num_people = 1
19
+ elif companions == "couple":
20
+ num_people = 2
21
+ elif companions == "family":
22
+ num_people = st.number_input("Enter the number of people:", min_value=1, step=1)
23
+
24
+ budget = st.number_input("Enter your budget per person:", min_value=0.0, step=0.01)
25
+ days_of_lodging = st.number_input("Enter the number of days of lodging:", min_value=1, step=1)
26
+ preferred_weather = st.selectbox("Enter preferred weather:", options=["Sunny", "Rainy", "Snowy"])
27
+
28
+ return budget, num_people, companions, days_of_lodging, preferred_weather
29
+
30
+ def encode_user_input(user_input):
31
+ user_description = f"budget {user_input[0]} companions {user_input[2]} days {user_input[3]} weather {user_input[4]}"
32
+ # Encode user description and return it as a tensor
33
+ user_embedding = model.encode(user_description, convert_to_tensor=True)
34
+ return user_embedding
35
+
36
+ def recommend_destinations(user_input, df):
37
+ # Get device (use GPU if available, else fallback to CPU)
38
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
39
+
40
+ # Encode user input and move to the correct device
41
+ user_embedding = encode_user_input(user_input).to(device)
42
+
43
+ # Compute cosine similarity between user_embedding and each row's embedding in df
44
+ df['similarity'] = df['embedding'].apply(lambda x: util.pytorch_cos_sim(user_embedding, x.to(device)).item())
45
+
46
+ # Sort by similarity and return the top 5 recommendations
47
+ recommendations = df.sort_values(by='similarity', ascending=False).drop_duplicates(subset='Primary').head(5)
48
+
49
+ return recommendations[['Primary', 'per_person_price', 'Topography', 'Temprature', 'Weather', 'Mood']]
50
+
51
+ def display_package_details(selection, df):
52
+ selected_row = df.loc[df['Primary'] == selection]
53
+ if not selected_row.empty:
54
+ st.write(f"*Package Name:* {selected_row['package_name'].values[0]}")
55
+ st.write(f"*Itinerary:* {selected_row['itinerary'].values[0]}")
56
+ st.write(f"*Sightseeing Places Covered:* {selected_row['sightseeing_places_covered'].values[0]}")
57
+ else:
58
+ st.write("Invalid selection. No package found.")
59
+
60
+ def evaluate_model(df, model):
61
+ # Get the correct device
62
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
63
+
64
+ # Split the data into train and test sets
65
+ train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
66
+
67
+ # Encode the descriptions and move them to the appropriate device
68
+ train_embeddings = model.encode(train_df['description'].tolist(), convert_to_tensor=True).to(device)
69
+ test_embeddings = model.encode(test_df['description'].tolist(), convert_to_tensor=True).to(device)
70
+
71
+ # Function to get the most similar label from the training set for a given test embedding
72
+ def get_most_similar_label(test_embedding, train_embeddings, train_labels):
73
+ similarities = util.pytorch_cos_sim(test_embedding, train_embeddings)
74
+ most_similar_idx = similarities.argmax().item()
75
+ return train_labels[most_similar_idx]
76
+
77
+ # Predict labels for the test set
78
+ predicted_labels = [get_most_similar_label(embed, train_embeddings, train_df['Primary'].tolist()) for embed in test_embeddings]
79
+
80
+ # Calculate accuracy metrics
81
+ accuracy = accuracy_score(test_df['Primary'], predicted_labels)
82
+ precision = precision_score(test_df['Primary'], predicted_labels, average='weighted')
83
+ recall = recall_score(test_df['Primary'], predicted_labels, average='weighted')
84
+ f1 = f1_score(test_df['Primary'], predicted_labels, average='weighted')
85
+
86
+ return accuracy, precision, recall, f1
87
+
88
+ # Streamlit app
89
+ st.title("Travel Recommendation System")
90
+
91
+ st.write("Please provide your travel preferences below:")
92
+
93
+ user_input = get_user_input()
94
+
95
+ if st.button("Get Recommendations"):
96
+ recommendations = recommend_destinations(user_input, df_with_embeddings)
97
+ st.write("Top recommended destinations for you:")
98
+ st.session_state.recommendations = recommendations
99
+ st.dataframe(recommendations)
100
+
101
+ if 'recommendations' in st.session_state:
102
+ primary_selection = st.selectbox("Select a package to view details", options=st.session_state.recommendations['Primary'].tolist())
103
+ if st.button("View Details"):
104
+ st.session_state.selected_package = primary_selection
105
+
106
+ if 'selected_package' in st.session_state:
107
+ st.write(f"Details for {st.session_state.selected_package}:")
108
+ display_package_details(st.session_state.selected_package, df_with_embeddings)
109
+
110
+ if st.button("Evaluate Model Accuracy"):
111
+ accuracy, precision, recall, f1 = evaluate_model(df_with_embeddings, model)
112
+ st.write(f'Accuracy: {accuracy}')
113
+ st.write(f'Precision: {precision}')
114
+ st.write(f'Recall: {recall}')
115
+ st.write(f'F1 Score: {f1}')
df_with_embeddings.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9b16bd4c44f08c5fe02dc8588f56b75cf9b020003faf1735093f80f49cf8f4b
3
+ size 30494159
journeygenius.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """JourneyGenius.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1TX_o_0MEaHKPF8J0-L3FEqfqawGLP30J
8
+ """
9
+
10
+ !pip install sentence-transformers
11
+ from sentence_transformers import SentenceTransformer, util
12
+
13
+ import ast
14
+ import pandas as pd
15
+ import seaborn as sns
16
+ !pip install geopy
17
+ !pip install streamlit
18
+
19
+ import pandas as pd
20
+ from sentence_transformers import SentenceTransformer
21
+
22
+ # Load the dataset
23
+ file_path = '/content/ML_proj_dataset_updated (1).csv'
24
+ df = pd.read_csv(file_path)
25
+
26
+ # Extract relevant columns in the order to be returned
27
+ relevant_columns = [
28
+ 'Primary',
29
+ 'per_person_price',
30
+ 'Topography',
31
+ 'Temprature',
32
+ 'Weather',
33
+ 'Mood',
34
+ 'package_name',
35
+ 'itinerary',
36
+ 'sightseeing_places_covered'
37
+ ]
38
+ df_relevant = df[relevant_columns].dropna()
39
+
40
+ # Preprocess data
41
+ def preprocess_data(df):
42
+ df['description'] = df.apply(lambda row: f"{row['Primary']} {row['Topography']} {row['Temprature']} {row['Weather']} {row['Mood']} {row['per_person_price']}", axis=1)
43
+ return df
44
+
45
+ df_relevant = preprocess_data(df_relevant)
46
+
47
+ # Encode data
48
+ model = SentenceTransformer('all-MiniLM-L6-v2')
49
+ df_relevant['embedding'] = df_relevant['description'].apply(lambda x: model.encode(x, convert_to_tensor=True))
50
+
51
+ # Save embeddings to file
52
+ df_relevant.to_pickle('/content/df_with_embeddings.pkl')
53
+
54
+ import pandas as pd
55
+ from sentence_transformers import SentenceTransformer, util
56
+
57
+ # Load precomputed embeddings
58
+ df_with_embeddings = pd.read_pickle('/content/df_with_embeddings.pkl')
59
+
60
+ # User input function
61
+ def get_user_input():
62
+ companions = input("Who are you traveling with (solo, couple, family): ").strip().lower()
63
+
64
+ if companions == "solo":
65
+ num_people = 1
66
+ elif companions == "couple":
67
+ num_people = 2
68
+ elif companions == "family":
69
+ num_people = int(input("Enter the number of people: "))
70
+ else:
71
+ print("Invalid input for companions. Please enter 'solo', 'couple', or 'family'.")
72
+ return get_user_input() # Recursively ask for input again
73
+
74
+ budget = float(input("Enter your budget per person: "))
75
+ days_of_lodging = int(input("Enter the number of days of lodging: "))
76
+ preferred_weather = input("Enter preferred weather (Sunny, Rainy, Snowy): ").strip().capitalize()
77
+
78
+ return budget, num_people, companions, days_of_lodging, preferred_weather
79
+
80
+ # Encode user input
81
+ model = SentenceTransformer('all-MiniLM-L6-v2')
82
+
83
+ def encode_user_input(user_input):
84
+ user_description = f"budget {user_input[0]} companions {user_input[2]} days {user_input[3]} weather {user_input[4]}"
85
+ return model.encode(user_description, convert_to_tensor=True)
86
+
87
+ # Recommend destinations
88
+ def recommend_destinations(user_input, df):
89
+ user_embedding = encode_user_input(user_input)
90
+ df['similarity'] = df['embedding'].apply(lambda x: util.pytorch_cos_sim(user_embedding, x).item())
91
+
92
+ # Sort by similarity and drop duplicates based on 'Primary' column
93
+ recommendations = df.sort_values(by='similarity', ascending=False).drop_duplicates(subset='Primary').head(5)
94
+
95
+ return recommendations[['Primary', 'per_person_price', 'Topography', 'Temprature', 'Weather', 'Mood']]
96
+
97
+ # Display selected package details
98
+ def display_package_details(selection, df):
99
+ selected_row = df.loc[df['Primary'] == selection]
100
+ if not selected_row.empty:
101
+ print("\nSelected Package Details:")
102
+ print(f"Package Name: {selected_row['package_name'].values[0]}")
103
+ print(f"Itinerary: {selected_row['itinerary'].values[0]}")
104
+ print(f"Sightseeing Places Covered: {selected_row['sightseeing_places_covered'].values[0]}")
105
+ else:
106
+ print("Invalid selection. No package found.")
107
+
108
+ # Main function to run the recommendation system
109
+ def main():
110
+ user_input = get_user_input()
111
+ recommendations = recommend_destinations(user_input, df_with_embeddings)
112
+ print("Top recommended destinations for you:")
113
+ print(recommendations)
114
+
115
+ # Let the user select a recommendation
116
+ selected_primary = input("\nEnter the Primary name of the package you want to view details for: ").strip()
117
+ display_package_details(selected_primary, df_with_embeddings)
118
+
119
+ # Run the main function
120
+ if __name__ == "__main__":
121
+ main()
122
+
123
+ if __name__ == "__main__":
124
+ main()
125
+
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ sentence-transformers
4
+ scikit-learn
5
+ gdown
6
+ torch