m00913563 commited on
Commit
8e054a4
·
1 Parent(s): 2ca386f

fix calculation

Browse files
Files changed (4) hide show
  1. app.py +6 -3
  2. classificator.py +36 -14
  3. models.py +11 -1
  4. requirements.txt +2 -1
app.py CHANGED
@@ -54,16 +54,19 @@ async def classify(body:JobAndCV):
54
  "positions": str(positions),
55
  "userMajors": str(userMajors),
56
  "skills": str(body.cv.skills),
57
- "yoe": yoe
 
58
  }
59
  job = {
60
  "jobDesc": body.job.jobDesc,
61
  "role": body.job.role,
62
  "majors": str(body.job.majors),
63
  "skills": str(body.job.skills),
64
- "minYoE": body.job.minYoE
 
65
  }
66
- results = classificator.predict(cv, job)
 
67
  return ClassificationResult(**results)
68
 
69
  @app.post("/cv", response_model=CVExtracted)
 
54
  "positions": str(positions),
55
  "userMajors": str(userMajors),
56
  "skills": str(body.cv.skills),
57
+ "yoe": yoe,
58
+ "location": str(body.cv.location)
59
  }
60
  job = {
61
  "jobDesc": body.job.jobDesc,
62
  "role": body.job.role,
63
  "majors": str(body.job.majors),
64
  "skills": str(body.job.skills),
65
+ "minYoE": body.job.minYoE,
66
+ "location": str(body.job.location)
67
  }
68
+ weight = body.weight.dict()
69
+ results = classificator.predict(cv, job, weight)
70
  return ClassificationResult(**results)
71
 
72
  @app.post("/cv", response_model=CVExtracted)
classificator.py CHANGED
@@ -3,6 +3,10 @@ from sklearn.metrics.pairwise import cosine_similarity
3
  import numpy as np
4
  import pandas as pd
5
  import pickle
 
 
 
 
6
  st = SentenceTransformer('all-mpnet-base-v2')
7
  filename = 'svc.pkl'
8
 
@@ -29,24 +33,42 @@ with open(filename, 'rb') as file:
29
  # "minYoE": body.job.minYoE
30
  # }
31
 
32
- def predict(cv, job):
 
 
 
 
 
 
 
 
 
 
33
  diffYoe = cv['yoe'] - job['minYoE']
34
  results = {}
35
  role_req_exp = cosine_similarity(st.encode(cv['experiences']).reshape(1,-1), st.encode(job['role']+'\n'+job['jobDesc']).reshape(1,-1))[0][0] if cv['experiences'] != '[]' else 0
36
  role_pos = cosine_similarity(st.encode(cv['positions']).reshape(1,-1), st.encode(job['role']).reshape(1,-1))[0][0] if cv['positions'] != '[]' else 0
37
  major_similarity = cosine_similarity(st.encode(cv['userMajors']).reshape(1,-1), st.encode(job['majors']).reshape(1,-1))[0][0] if cv['userMajors'] != '[]' else 0
38
  skill_similarity = cosine_similarity(st.encode(cv['skills']).reshape(1,-1), st.encode(job['skills']).reshape(1,-1))[0][0] if cv['skills'] != '[]' else 0
39
- score_yoe = 0.5 if diffYoe == -1 else (0 if diffYoe < 0 else 1)
40
- score = 0.35 * role_req_exp + 0.1 * role_pos + 0.15 * major_similarity + 0.3* score_yoe + 0.1 * skill_similarity
41
- data = [{
42
- 'role_req-exp': role_req_exp,
43
- 'role_pos': role_pos,
44
- 'major_similarity': major_similarity,
45
- 'skill_similarity': skill_similarity,
46
- 'score': score
47
- }]
48
- X = pd.DataFrame.from_dict(data)
49
- res = model.predict(X)
50
- results['score'] = model.predict_proba(X)[:, 1]
51
- results['is_accepted'] = res[0]
 
 
 
 
 
 
 
 
52
  return results
 
3
  import numpy as np
4
  import pandas as pd
5
  import pickle
6
+ from geopy.geocoders import Nominatim
7
+ from geopy.distance import geodesic
8
+
9
+
10
  st = SentenceTransformer('all-mpnet-base-v2')
11
  filename = 'svc.pkl'
12
 
 
33
  # "minYoE": body.job.minYoE
34
  # }
35
 
36
+
37
+ geolocator = Nominatim(user_agent="geo_distance_calculator")
38
+
39
+ def get_coordinates(city):
40
+ location = geolocator.geocode(city)
41
+ if location:
42
+ return (location.latitude, location.longitude)
43
+ else:
44
+ return None
45
+
46
+ def predict(cv, job, weight):
47
  diffYoe = cv['yoe'] - job['minYoE']
48
  results = {}
49
  role_req_exp = cosine_similarity(st.encode(cv['experiences']).reshape(1,-1), st.encode(job['role']+'\n'+job['jobDesc']).reshape(1,-1))[0][0] if cv['experiences'] != '[]' else 0
50
  role_pos = cosine_similarity(st.encode(cv['positions']).reshape(1,-1), st.encode(job['role']).reshape(1,-1))[0][0] if cv['positions'] != '[]' else 0
51
  major_similarity = cosine_similarity(st.encode(cv['userMajors']).reshape(1,-1), st.encode(job['majors']).reshape(1,-1))[0][0] if cv['userMajors'] != '[]' else 0
52
  skill_similarity = cosine_similarity(st.encode(cv['skills']).reshape(1,-1), st.encode(job['skills']).reshape(1,-1))[0][0] if cv['skills'] != '[]' else 0
53
+ # score_yoe = 0.5 if diffYoe == -1 else (0 if diffYoe < 0 else 1)
54
+ coords_1 = get_coordinates(cv['location'])
55
+ coords_2 = get_coordinates(job['location'])
56
+ distance = 999999
57
+ if coords_1 and coords_2:
58
+ distance = geodesic(coords_1, coords_2).kilometers
59
+ else:
60
+ raise ValueError("Could not get coordinates for one or both cities.")
61
+
62
+ score = weight['exp'] * role_req_exp + weight['position'] * role_pos + weight['major'] * major_similarity + weight['diffYoe']* diffYoe + weight['skills'] * skill_similarity + weight['location'] * (1 / (1 + distance))
63
+ # data = [{
64
+ # 'role_req-exp': role_req_exp,
65
+ # 'role_pos': role_pos,
66
+ # 'major_similarity': major_similarity,
67
+ # 'skill_similarity': skill_similarity,
68
+ # 'score': score
69
+ # }]
70
+ # X = pd.DataFrame.from_dict(data)
71
+ # res = model.predict(X)
72
+ results['score'] = score #model.predict_proba(X)[:, 1]
73
+ # results['is_accepted'] = res[0]
74
  return results
models.py CHANGED
@@ -31,6 +31,7 @@ class CVToClassify(BaseModel):
31
  educations: List[dict[str, Any]]
32
  skills: List[str]
33
  experiences: List[dict[str, Any]]
 
34
 
35
  class JobToClassify(BaseModel):
36
  minYoE: int
@@ -38,14 +39,23 @@ class JobToClassify(BaseModel):
38
  skills: List[str]
39
  role: str
40
  majors: List[str]
 
41
 
 
 
 
 
 
 
 
42
 
43
  class JobAndCV(BaseModel):
44
  cv: CVToClassify
45
  job: JobToClassify
 
46
 
47
  class ClassificationResult(BaseModel):
48
  score: float
49
- is_accepted: bool
50
  class InsertedLink(BaseModel):
51
  link: str
 
31
  educations: List[dict[str, Any]]
32
  skills: List[str]
33
  experiences: List[dict[str, Any]]
34
+ location: str
35
 
36
  class JobToClassify(BaseModel):
37
  minYoE: int
 
39
  skills: List[str]
40
  role: str
41
  majors: List[str]
42
+ location: str
43
 
44
+ class Weight(BaseModel):
45
+ exp: float
46
+ position: float
47
+ major: float
48
+ skills: float
49
+ diffYoe: float
50
+ location: float
51
 
52
  class JobAndCV(BaseModel):
53
  cv: CVToClassify
54
  job: JobToClassify
55
+ weight: Weight
56
 
57
  class ClassificationResult(BaseModel):
58
  score: float
59
+
60
  class InsertedLink(BaseModel):
61
  link: str
requirements.txt CHANGED
@@ -7,4 +7,5 @@ sentence_transformers
7
  scikit-learn
8
  numpy
9
  pandas
10
- openai
 
 
7
  scikit-learn
8
  numpy
9
  pandas
10
+ openai
11
+ geopy