from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity import numpy as np import pandas as pd import pickle from geopy.geocoders import Nominatim from geopy.distance import geodesic st = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2') filename = 'svc.pkl' with open(filename, 'rb') as file: model = pickle.load(file) # role_req-exp 0.341522 # role_pos 0.350747 # major_similarity 0.846268 # skill_similarity 0.774542 # score 0.986356 # cv = { # "experiences": str(body.cv.experiences), # "positions": str(positions), # "userMajors": str(userMajors), # "skills": str(body.cv.skills), # "yoe": yoe # } # job = { # "jobDesc": body.job.jobDesc, # "role": body.job.role, # "majors": str(body.job.majors), # "skills": str(body.job.skills), # "minYoE": body.job.minYoE # } geolocator = Nominatim(user_agent="geo_distance_calculator", timeout=10) def get_coordinates(city): location = geolocator.geocode(city) if location: return (location.latitude, location.longitude) else: return None def predict(cv, job, weight): results = {} role_req_exp = cosine_similarity(st.encode(cv['experiences']).reshape(1,-1), st.encode(job['role']+'\n'+job['jobDesc']).reshape(1,-1))[0][0] if cv['experiences'] != '[]' else 0 role_pos = cosine_similarity(st.encode(cv['positions']).reshape(1,-1), st.encode(job['role']).reshape(1,-1))[0][0] if cv['positions'] != '[]' else 0 major_similarity = cosine_similarity(st.encode(cv['userMajors']).reshape(1,-1), st.encode(job['majors']).reshape(1,-1))[0][0] if cv['userMajors'] != '[]' else 0 skill_similarity = cosine_similarity(st.encode(cv['skills']).reshape(1,-1), st.encode(job['skills']).reshape(1,-1))[0][0] if cv['skills'] != '[]' else 0 # score_yoe = 0.5 if diffYoe == -1 else (0 if diffYoe < 0 else 1) coords_1 = get_coordinates(cv['location']) coords_2 = get_coordinates(job['location']) distance = 999999 diffYoe = min( cv['yoe']/job['minYoE'], 0) try: if coords_1 and coords_2: distance = geodesic(coords_1, coords_2).kilometers else: raise ValueError("Could not get coordinates for one or both cities.") except ValueError: distance = 100000 score = weight['exp'] * role_req_exp + weight['position'] * role_pos + weight['major'] * major_similarity + weight['diffYoe']* diffYoe + weight['skills'] * skill_similarity + weight['location'] * (1 / (1 + distance)) # data = [{ # 'role_req-exp': role_req_exp, # 'role_pos': role_pos, # 'major_similarity': major_similarity, # 'skill_similarity': skill_similarity, # 'score': score # }] # X = pd.DataFrame.from_dict(data) # res = model.predict(X) results['score'] = score #model.predict_proba(X)[:, 1] # results['is_accepted'] = res[0] return results