Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import openai | |
| from data import data as df | |
| import numpy as np | |
| import os | |
| openai.api_key = os.environ.get("openai") | |
| def cosine_similarity(a, b): | |
| return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)) | |
| def get_embedding(text, model="text-embedding-ada-002"): | |
| try: | |
| text = text.replace("\n", " ") | |
| except: | |
| None | |
| return openai.embeddings.create(input = [text], model=model).data[0].embedding | |
| def get_embedding2(text, model="text-embedding-ada-002"): | |
| try: | |
| text = text.replace("\n", " ") | |
| except: | |
| None | |
| try: | |
| return openai.Embedding.create(input = [text], model=model)['data'][0]['embedding'] | |
| except: | |
| time.sleep(2) | |
| def search_cv(search, nb=3, pprint=True): | |
| embedding = get_embedding(search, model='text-embedding-ada-002') | |
| df_replicate = df.copy() | |
| def wrap_cos(x,y): | |
| try: | |
| res = cosine_similarity(x,y) | |
| except: | |
| res = 0 | |
| return res | |
| df_replicate['similarities'] = df_replicate.embedding.apply(lambda x: wrap_cos(x, embedding)) | |
| res = df_replicate.sort_values('similarities', ascending=False).head(int(nb)) | |
| return res | |
| def get_cv(text, nb): | |
| result = search_cv(text,nb).to_dict(orient="records") | |
| final_str = "" | |
| for r in result: | |
| final_str += "#### Candidat avec " + str(round(r["similarities"]*100,2)) + "% de similarité :\n"+ str(r["summary"]).replace("#","") | |
| final_str += "\n\n[-> Lien vers le CV complet]("+ str(r["url"]) + ')' | |
| final_str += "\n\n-----------------------------------------------------------------------------------------------------\n\n" | |
| final_str = final_str.replace("`", "") | |
| return final_str |