Tinder!
Browse files- app.py +256 -1
- tinder_data.csv +0 -0
app.py
CHANGED
|
@@ -15,8 +15,263 @@ from sklearn.preprocessing import OneHotEncoder
|
|
| 15 |
|
| 16 |
|
| 17 |
|
| 18 |
-
def
|
| 19 |
return "Hello " + name + "!!" + " str2=" + str2
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
iface = gr.Interface(fn=greet, inputs=["text", "text"], outputs="text")
|
| 22 |
iface.launch()
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
|
| 18 |
+
def greet_o(name, str2):
|
| 19 |
return "Hello " + name + "!!" + " str2=" + str2
|
| 20 |
|
| 21 |
+
def greet(name, str2):
|
| 22 |
+
user_df = {}
|
| 23 |
+
|
| 24 |
+
# Get user input for numerical columns
|
| 25 |
+
user_df['age'] = 22.0
|
| 26 |
+
user_df['status'] = 1.0
|
| 27 |
+
user_df['sex'] = 0.0
|
| 28 |
+
user_df['height'] = 60.0
|
| 29 |
+
user_df['smokes'] = 1.0
|
| 30 |
+
user_df['new_languages'] = 2.0
|
| 31 |
+
user_df['body_profile'] = 0.0
|
| 32 |
+
user_df['education_level'] = 4.0
|
| 33 |
+
user_df['dropped_out'] = 0.0
|
| 34 |
+
user_df['bio'] = 'I am a foodie and traveller. But sometimes like to sit alone in a corner and read a good fiction.'
|
| 35 |
+
user_df['location_preference'] = 2.0
|
| 36 |
+
user_df['num_languages'] = 2.0
|
| 37 |
+
user_df['drinks_encoded'] = 0.0
|
| 38 |
+
user_df['drugs_encoded'] = 0.0
|
| 39 |
+
# Get user input for one-hot encoded categorical columns
|
| 40 |
+
user_df['location_new_york'] = 0.0
|
| 41 |
+
user_df['location_northern_california'] = 1.0
|
| 42 |
+
user_df['location_southern_california'] = 0.0
|
| 43 |
+
user_df['job_encoded'] = 4.0
|
| 44 |
+
user_df['pets_0'] = 1.0
|
| 45 |
+
user_df['pets_1'] = 1.0
|
| 46 |
+
user_df['pets_2'] = 1.0
|
| 47 |
+
user_df['pets_3'] = 1.0
|
| 48 |
+
|
| 49 |
+
# Convert tfidf matrix to DataFrame
|
| 50 |
+
tfidf_df = pd.DataFrame(tfidf.transform([user_df['bio']]).toarray(), columns=feature_names)
|
| 51 |
+
|
| 52 |
+
# Convert the user input
|
| 53 |
+
# dictionary to a Pandas DataFrame
|
| 54 |
+
user_df = pd.DataFrame(user_df, index=[0])
|
| 55 |
+
user_df.drop("bio", axis=1, inplace=True)
|
| 56 |
+
user_df = pd.concat([user_df, tfidf_df], axis=1)
|
| 57 |
+
|
| 58 |
+
suggested_arr = recommend(user_df)
|
| 59 |
+
|
| 60 |
+
return "Hello " + suggested_arr[0] + "!!" + " str2=" + str2
|
| 61 |
+
|
| 62 |
+
# reading dataset using panda
|
| 63 |
+
tinder_df = pd.read_csv("tinder_data.csv")
|
| 64 |
+
|
| 65 |
+
# count the number of languages in each row
|
| 66 |
+
tinder_df['num_languages'] = tinder_df['language']\
|
| 67 |
+
.str.count(',') + 1
|
| 68 |
+
tinder_df.drop(["language"], axis=1, inplace=True)
|
| 69 |
+
|
| 70 |
+
place_type_strength = {
|
| 71 |
+
'anywhere': 1.0,
|
| 72 |
+
'same state': 2.0,
|
| 73 |
+
'same city': 2.5
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
tinder_df['location_preference'] = \
|
| 77 |
+
tinder_df['location_preference']\
|
| 78 |
+
.apply(lambda x: place_type_strength[x])
|
| 79 |
+
|
| 80 |
+
two_unique_values_column = {
|
| 81 |
+
'sex': {'f': 1, 'm': 0},
|
| 82 |
+
'dropped_out': {'no': 0, 'yes': 1}
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
tinder_df.replace(two_unique_values_column,
|
| 86 |
+
inplace=True)
|
| 87 |
+
|
| 88 |
+
status_type_strength = {
|
| 89 |
+
'single': 2.0,
|
| 90 |
+
'available': 2.0,
|
| 91 |
+
'seeing someone': 1.0,
|
| 92 |
+
'married': 1.0
|
| 93 |
+
}
|
| 94 |
+
tinder_df['status'] = tinder_df['status']\
|
| 95 |
+
.apply(lambda x:
|
| 96 |
+
status_type_strength[x])
|
| 97 |
+
|
| 98 |
+
# create a LabelEncoder object
|
| 99 |
+
orientation_encoder = LabelEncoder()
|
| 100 |
+
|
| 101 |
+
# fit the encoder on the orientation column
|
| 102 |
+
orientation_encoder.fit(tinder_df['orientation'])
|
| 103 |
+
|
| 104 |
+
# encode the orientation column using the fitted encoder
|
| 105 |
+
tinder_df['orientation'] = orientation_encoder.\
|
| 106 |
+
transform(tinder_df['orientation'])
|
| 107 |
+
|
| 108 |
+
# Drop the existing orientation column
|
| 109 |
+
tinder_df.drop("orientation", axis=1, inplace=True)
|
| 110 |
+
|
| 111 |
+
drinking_habit = {
|
| 112 |
+
'socially': 'sometimes',
|
| 113 |
+
'rarely': 'sometimes',
|
| 114 |
+
'not at all': 'do not drink',
|
| 115 |
+
'often': 'drinks often',
|
| 116 |
+
'very often': 'drinks often',
|
| 117 |
+
'desperately': 'drinks often'
|
| 118 |
+
}
|
| 119 |
+
tinder_df['drinks'] = tinder_df['drinks']\
|
| 120 |
+
.apply(lambda x:
|
| 121 |
+
drinking_habit[x])
|
| 122 |
+
# create a LabelEncoder object
|
| 123 |
+
habit_encoder = LabelEncoder()
|
| 124 |
+
|
| 125 |
+
# fit the encoder on the drinks and drugs columns
|
| 126 |
+
habit_encoder.fit(tinder_df[['drinks', 'drugs']]
|
| 127 |
+
.values.reshape(-1))
|
| 128 |
+
|
| 129 |
+
# encode the drinks and drugs columns
|
| 130 |
+
# using the fitted encoder
|
| 131 |
+
tinder_df['drinks_encoded'] = \
|
| 132 |
+
habit_encoder.transform(tinder_df['drinks'])
|
| 133 |
+
tinder_df['drugs_encoded'] = \
|
| 134 |
+
habit_encoder.transform(tinder_df['drugs'])
|
| 135 |
+
|
| 136 |
+
# Drop the existing drink and drugs column
|
| 137 |
+
tinder_df.drop(["drinks", "drugs"], axis=1,
|
| 138 |
+
inplace=True)
|
| 139 |
+
|
| 140 |
+
region_dict = {'southern_california': ['los angeles',
|
| 141 |
+
'san diego', 'hacienda heights',
|
| 142 |
+
'north hollywood', 'phoenix'],
|
| 143 |
+
'new_york': ['brooklyn',
|
| 144 |
+
'new york']}
|
| 145 |
+
|
| 146 |
+
def get_region(city):
|
| 147 |
+
for region, cities in region_dict.items():
|
| 148 |
+
if city.lower() in [c.lower() for c in cities]:
|
| 149 |
+
return region
|
| 150 |
+
return "northern_california"
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
tinder_df['location'] = tinder_df['location']\
|
| 154 |
+
.str.split(', ')\
|
| 155 |
+
.str[0].apply(get_region)
|
| 156 |
+
# perform one hot encoding
|
| 157 |
+
location_encoder = OneHotEncoder()
|
| 158 |
+
|
| 159 |
+
# fit and transform the location column
|
| 160 |
+
location_encoded = location_encoder.fit_transform\
|
| 161 |
+
(tinder_df[['location']])
|
| 162 |
+
|
| 163 |
+
# create a new DataFrame with the encoded columns
|
| 164 |
+
location_encoded_df = pd.DataFrame(location_encoded.toarray()\
|
| 165 |
+
, columns=location_encoder.\
|
| 166 |
+
get_feature_names_out(['location']))
|
| 167 |
+
|
| 168 |
+
# concatenate the new DataFrame with the original DataFrame
|
| 169 |
+
tinder_df = pd.concat([tinder_df, location_encoded_df], axis=1)
|
| 170 |
+
# Drop the existing location column
|
| 171 |
+
tinder_df.drop(["location"], axis=1, inplace=True)
|
| 172 |
+
|
| 173 |
+
# create a LabelEncoder object
|
| 174 |
+
job_encoder = LabelEncoder()
|
| 175 |
+
|
| 176 |
+
# fit the encoder on the job column
|
| 177 |
+
job_encoder.fit(tinder_df['job'])
|
| 178 |
+
|
| 179 |
+
# encode the job column using the fitted encoder
|
| 180 |
+
tinder_df['job_encoded'] = job_encoder.\
|
| 181 |
+
transform(tinder_df['job'])
|
| 182 |
+
|
| 183 |
+
# drop the original job column
|
| 184 |
+
tinder_df.drop('job', axis=1, inplace=True)
|
| 185 |
+
|
| 186 |
+
smokes = {
|
| 187 |
+
'no': 1.0,
|
| 188 |
+
'sometimes': 0,
|
| 189 |
+
'yes': 0,
|
| 190 |
+
'when drinking':0,
|
| 191 |
+
'trying to quit':0
|
| 192 |
+
}
|
| 193 |
+
tinder_df['smokes'] = tinder_df['smokes']\
|
| 194 |
+
.apply(lambda x: smokes[x])
|
| 195 |
+
|
| 196 |
+
bin_enc = ce.BinaryEncoder(cols=['pets'])
|
| 197 |
+
|
| 198 |
+
# fit and transform the pet column
|
| 199 |
+
pet_enc = bin_enc.fit_transform(tinder_df['pets'])
|
| 200 |
+
|
| 201 |
+
# add the encoded columns to the original dataframe
|
| 202 |
+
tinder_df = pd.concat([tinder_df, pet_enc], axis=1)
|
| 203 |
+
|
| 204 |
+
tinder_df.drop("pets",axis=1,inplace = True)
|
| 205 |
+
|
| 206 |
+
# create a LabelEncoder object
|
| 207 |
+
location_encoder = LabelEncoder()
|
| 208 |
+
|
| 209 |
+
# fit the encoder on the job column
|
| 210 |
+
location_encoder.fit(tinder_df['new_languages'])
|
| 211 |
+
|
| 212 |
+
# encode the job column using the fitted encoder
|
| 213 |
+
tinder_df['new_languages'] = location_encoder.transform(
|
| 214 |
+
tinder_df['new_languages'])
|
| 215 |
+
|
| 216 |
+
# create an instance of LabelEncoder
|
| 217 |
+
le = LabelEncoder()
|
| 218 |
+
|
| 219 |
+
# encode the body_profile column
|
| 220 |
+
tinder_df["body_profile"] = le.fit_transform(tinder_df["body_profile"])
|
| 221 |
+
|
| 222 |
+
# Initialize TfidfVectorizer object
|
| 223 |
+
tfidf = TfidfVectorizer(stop_words='english')
|
| 224 |
+
|
| 225 |
+
# Fit and transform the text data
|
| 226 |
+
tfidf_matrix = tfidf.fit_transform(tinder_df['bio'])
|
| 227 |
+
|
| 228 |
+
# Get the feature names from the TfidfVectorizer object
|
| 229 |
+
feature_names = tfidf.vocabulary_
|
| 230 |
+
|
| 231 |
+
# Convert tfidf matrix to DataFrame
|
| 232 |
+
tfidf_df = pd.DataFrame(tfidf_matrix.toarray(),
|
| 233 |
+
columns=feature_names)
|
| 234 |
+
|
| 235 |
+
# Add non-text features to the tfidf_df dataframe
|
| 236 |
+
tinder_dfs = tinder_df.drop(["bio", "user_id",
|
| 237 |
+
"username"], axis=1)
|
| 238 |
+
tinder_dfs = pd.concat([tinder_dfs,
|
| 239 |
+
tfidf_df], axis=1)
|
| 240 |
+
|
| 241 |
+
# Apply SVD to the feature matrix
|
| 242 |
+
svd = TruncatedSVD(n_components=100)
|
| 243 |
+
#svd = TruncatedSVD()
|
| 244 |
+
|
| 245 |
+
svd_matrix = svd.fit_transform(tinder_dfs)
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
# Calculate the cosine similarity
|
| 250 |
+
# between all pairs of users
|
| 251 |
+
cosine_sim = cosine_similarity(svd_matrix)
|
| 252 |
+
|
| 253 |
+
def recommend(user_df, num_recommendations=5):
|
| 254 |
+
|
| 255 |
+
# Apply SVD to the feature
|
| 256 |
+
# matrix of the user_df dataframe
|
| 257 |
+
svd_matrixs = svd.transform(user_df)
|
| 258 |
+
|
| 259 |
+
# Calculate the cosine similarity
|
| 260 |
+
# between the user_df and training set users
|
| 261 |
+
cosine_sim_new = cosine_similarity(svd_matrixs, svd_matrix)
|
| 262 |
+
|
| 263 |
+
# Get the indices of the top
|
| 264 |
+
# num_recommendations similar users
|
| 265 |
+
sim_scores = list(enumerate(cosine_sim_new[0]))
|
| 266 |
+
sim_scores = sorted(sim_scores,
|
| 267 |
+
key=lambda x: x[1], reverse=True)
|
| 268 |
+
sim_indices = [i[0] for i in
|
| 269 |
+
sim_scores[1:num_recommendations+1]]
|
| 270 |
+
|
| 271 |
+
# Return the user_ids of the recommended users
|
| 272 |
+
return tinder_df['username'].iloc[sim_indices]
|
| 273 |
+
|
| 274 |
+
# Setup complete!
|
| 275 |
+
|
| 276 |
iface = gr.Interface(fn=greet, inputs=["text", "text"], outputs="text")
|
| 277 |
iface.launch()
|
tinder_data.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|