api-ta / classificator.py
Muhammad Risqi Firdaus
fix naming
7e096d6
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pandas as pd
import pickle
from geopy.geocoders import Nominatim
from geopy.distance import geodesic
st = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')
filename = 'svc.pkl'
with open(filename, 'rb') as file:
model = pickle.load(file)
# role_req-exp 0.341522
# role_pos 0.350747
# major_similarity 0.846268
# skill_similarity 0.774542
# score 0.986356
# cv = {
# "experiences": str(body.cv.experiences),
# "positions": str(positions),
# "userMajors": str(userMajors),
# "skills": str(body.cv.skills),
# "yoe": yoe
# }
# job = {
# "jobDesc": body.job.jobDesc,
# "role": body.job.role,
# "majors": str(body.job.majors),
# "skills": str(body.job.skills),
# "minYoE": body.job.minYoE
# }
geolocator = Nominatim(user_agent="geo_distance_calculator", timeout=10)
def get_coordinates(city):
location = geolocator.geocode(city)
if location:
return (location.latitude, location.longitude)
else:
return None
def predict(cv, job, weight):
results = {}
role_req_exp = cosine_similarity(st.encode(cv['experiences']).reshape(1,-1), st.encode(job['role']+'\n'+job['jobDesc']).reshape(1,-1))[0][0] if cv['experiences'] != '[]' else 0
role_pos = cosine_similarity(st.encode(cv['positions']).reshape(1,-1), st.encode(job['role']).reshape(1,-1))[0][0] if cv['positions'] != '[]' else 0
major_similarity = cosine_similarity(st.encode(cv['userMajors']).reshape(1,-1), st.encode(job['majors']).reshape(1,-1))[0][0] if cv['userMajors'] != '[]' else 0
skill_similarity = cosine_similarity(st.encode(cv['skills']).reshape(1,-1), st.encode(job['skills']).reshape(1,-1))[0][0] if cv['skills'] != '[]' else 0
# score_yoe = 0.5 if diffYoe == -1 else (0 if diffYoe < 0 else 1)
coords_1 = get_coordinates(cv['location'])
coords_2 = get_coordinates(job['location'])
distance = 999999
diffYoe = min( cv['yoe']/job['minYoE'], 0)
try:
if coords_1 and coords_2:
distance = geodesic(coords_1, coords_2).kilometers
else:
raise ValueError("Could not get coordinates for one or both cities.")
except ValueError:
distance = 100000
score = weight['exp'] * role_req_exp + weight['position'] * role_pos + weight['major'] * major_similarity + weight['diffYoe']* diffYoe + weight['skills'] * skill_similarity + weight['location'] * (1 / (1 + distance))
# data = [{
# 'role_req-exp': role_req_exp,
# 'role_pos': role_pos,
# 'major_similarity': major_similarity,
# 'skill_similarity': skill_similarity,
# 'score': score
# }]
# X = pd.DataFrame.from_dict(data)
# res = model.predict(X)
results['score'] = score #model.predict_proba(X)[:, 1]
# results['is_accepted'] = res[0]
return results