File size: 2,938 Bytes
d22d262
 
 
 
 
8e054a4
 
 
 
7e096d6
d22d262
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8e054a4
f452678
8e054a4
 
 
 
 
 
 
 
 
d22d262
 
 
e0e8487
d22d262
8e054a4
ee1bfa2
8e054a4
 
 
2d91242
 
 
 
 
 
 
 
8e054a4
2d91242
8e054a4
 
 
 
 
 
 
 
 
 
 
9a6af66
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pandas as pd
import pickle 
from geopy.geocoders import Nominatim
from geopy.distance import geodesic


st = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')
filename = 'svc.pkl'

with open(filename, 'rb') as file:
  model = pickle.load(file)

# role_req-exp        0.341522
# role_pos            0.350747
# major_similarity    0.846268
# skill_similarity    0.774542
# score               0.986356
# cv = {
#     "experiences": str(body.cv.experiences), 
#     "positions": str(positions), 
#     "userMajors": str(userMajors), 
#     "skills": str(body.cv.skills), 
#     "yoe": yoe
# }
# job = {
#     "jobDesc": body.job.jobDesc, 
#     "role": body.job.role, 
#     "majors": str(body.job.majors), 
#     "skills": str(body.job.skills), 
#     "minYoE": body.job.minYoE
# }


geolocator = Nominatim(user_agent="geo_distance_calculator", timeout=10)

def get_coordinates(city):
    location = geolocator.geocode(city)
    if location:
        return (location.latitude, location.longitude)
    else:
        return None

def predict(cv, job, weight):
  results = {}
  role_req_exp = cosine_similarity(st.encode(cv['experiences']).reshape(1,-1), st.encode(job['role']+'\n'+job['jobDesc']).reshape(1,-1))[0][0] if cv['experiences'] != '[]' else 0
  role_pos = cosine_similarity(st.encode(cv['positions']).reshape(1,-1), st.encode(job['role']).reshape(1,-1))[0][0] if cv['positions'] != '[]' else 0
  major_similarity = cosine_similarity(st.encode(cv['userMajors']).reshape(1,-1), st.encode(job['majors']).reshape(1,-1))[0][0] if cv['userMajors'] != '[]' else 0
  skill_similarity = cosine_similarity(st.encode(cv['skills']).reshape(1,-1), st.encode(job['skills']).reshape(1,-1))[0][0] if cv['skills'] != '[]' else 0
  # score_yoe = 0.5 if diffYoe == -1 else (0 if diffYoe < 0 else 1)

  coords_1 = get_coordinates(cv['location'])
  coords_2 = get_coordinates(job['location'])
  distance = 999999
  diffYoe = min( cv['yoe']/job['minYoE'], 0)
  try:
    if coords_1 and coords_2:
        distance = geodesic(coords_1, coords_2).kilometers
    else:
        raise ValueError("Could not get coordinates for one or both cities.")
  except ValueError:
    distance = 100000

  score = weight['exp'] * role_req_exp + weight['position'] * role_pos  + weight['major'] * major_similarity + weight['diffYoe']* diffYoe + weight['skills'] * skill_similarity + weight['location'] * (1 / (1 + distance))
  # data = [{
  #   'role_req-exp': role_req_exp,      
  #   'role_pos': role_pos,            
  #   'major_similarity':  major_similarity,  
  #   'skill_similarity':  skill_similarity,
  #   'score': score           
  # }]
  # X = pd.DataFrame.from_dict(data)
  # res = model.predict(X)
  results['score'] = score #model.predict_proba(X)[:, 1]
  # results['is_accepted'] = res[0]
  return results