Spaces:
Sleeping
Sleeping
update columns as discussed in team meeting
Browse files
app.py
CHANGED
|
@@ -62,11 +62,12 @@ def cosine_similarity_generator(master_exploded, embeddings, query, filename = t
|
|
| 62 |
top_k = master_exploded.sort_values(by=['cos_sim'], ascending=False).head(10)
|
| 63 |
print(" The top k=10 results have a min cosine similarity of: ", master_exploded.sort_values(by=['cos_sim'], ascending=False).head(10)['cos_sim'].min())
|
| 64 |
# print(master_exploded_top_k)
|
| 65 |
-
cosine_sum_by_name = master_exploded_top.groupby(["id", "name", "
|
| 66 |
print("Taking sum of cosine similarities above 0.6 threshold...")
|
| 67 |
cosine_sum_by_name.columns = cosine_sum_by_name.columns.map('_'.join)
|
| 68 |
|
| 69 |
ranked_mentors = cosine_sum_by_name.reset_index().sort_values(by ="cos_sim_sum", ascending =False)
|
|
|
|
| 70 |
# path = "./Ranked_Results_Gradio/"
|
| 71 |
# ranked_mentors_filename = path+'ranked_mentors_'+str(filename)+'.csv'
|
| 72 |
# cos_sum_filename = path+'cos_sum_'+str(filename)+'.csv'
|
|
|
|
| 62 |
top_k = master_exploded.sort_values(by=['cos_sim'], ascending=False).head(10)
|
| 63 |
print(" The top k=10 results have a min cosine similarity of: ", master_exploded.sort_values(by=['cos_sim'], ascending=False).head(10)['cos_sim'].min())
|
| 64 |
# print(master_exploded_top_k)
|
| 65 |
+
cosine_sum_by_name = master_exploded_top.groupby(["id", "name", "tokenized_sentences" ]).agg({"cos_sim": ["sum"]}).reset_index()
|
| 66 |
print("Taking sum of cosine similarities above 0.6 threshold...")
|
| 67 |
cosine_sum_by_name.columns = cosine_sum_by_name.columns.map('_'.join)
|
| 68 |
|
| 69 |
ranked_mentors = cosine_sum_by_name.reset_index().sort_values(by ="cos_sim_sum", ascending =False)
|
| 70 |
+
cosine_sum_by_name = cosine_sum_by_name.rename(columns={"id_": "MentorID", "name_": "Name", "tokenized_sentences_": "Sentences"}, errors="raise")
|
| 71 |
# path = "./Ranked_Results_Gradio/"
|
| 72 |
# ranked_mentors_filename = path+'ranked_mentors_'+str(filename)+'.csv'
|
| 73 |
# cos_sum_filename = path+'cos_sum_'+str(filename)+'.csv'
|