HR_Model_CV_Scoring / model_logic.py
mahmodGendy's picture
Upload 6 files
903a1b0 verified
from sentence_transformers import SentenceTransformer, util
import re
model = SentenceTransformer("all-MiniLM-L6-v2")
def extract_years_of_experience(text):
pattern = r"(\d+)\s+years"
matches = re.findall(pattern, text.lower())
if matches:
return max([int(x) for x in matches])
return 0
def skills_from_text(text):
text = text.lower()
skills = [
# programming
"python","java","c++","c#","javascript","typescript","go","rust","scala",
# data science
"machine learning","deep learning","data science","data analysis",
"data mining","statistical analysis","statistics","predictive modeling",
# ml / ai frameworks
"tensorflow","pytorch","keras","scikit-learn","xgboost","lightgbm",
# data tools
"pandas","numpy","matplotlib","seaborn","plotly",
# databases
"sql","postgresql","mysql","mongodb","redis","oracle",
# cloud
"aws","azure","gcp","docker","kubernetes",
# backend
"fastapi","flask","django","spring","node.js","express",
# frontend
"react","angular","vue","html","css","bootstrap","tailwind",
# nlp
"nlp","natural language processing","transformers","bert","llm",
# devops
"ci/cd","jenkins","git","github","gitlab","terraform",
# data engineering
"spark","hadoop","kafka","airflow","etl","data pipelines",
# analytics tools
"power bi","tableau","excel"
]
found = []
for skill in skills:
if skill in text:
found.append(skill)
return list(set(found))
def get_job_requirements(title):
title = title.lower()
mapping = {
"data scientist": [
"python","machine learning","statistics","pandas",
"numpy","sql","data visualization","scikit-learn"
],
"machine learning engineer": [
"python","machine learning","deep learning",
"pytorch","tensorflow","docker","mlops"
],
"ml engineer": [
"python","machine learning","deep learning",
"pytorch","tensorflow","docker"
],
"data analyst": [
"sql","python","excel","tableau",
"power bi","data analysis","statistics"
],
"data engineer": [
"python","sql","spark","hadoop",
"etl","data pipelines","airflow"
],
"backend developer": [
"python","fastapi","flask","django",
"sql","api","docker"
],
"backend engineer": [
"python","fastapi","django",
"sql","microservices","docker"
],
"software engineer": [
"python","java","c++","git",
"algorithms","data structures"
],
"frontend developer": [
"javascript","react","html",
"css","typescript","frontend"
],
"full stack developer": [
"javascript","react","node.js",
"sql","html","css","api"
],
"devops engineer": [
"docker","kubernetes","aws",
"ci/cd","terraform","linux"
],
"ai engineer": [
"python","deep learning","pytorch",
"tensorflow","transformers","nlp"
],
"nlp engineer": [
"python","nlp","transformers",
"bert","machine learning"
],
"cloud engineer": [
"aws","azure","gcp",
"docker","kubernetes"
]
}
for key in mapping:
if key in title:
return mapping[key]
return []
def calculate_skills_score(resume_skills, job_skills):
if not job_skills:
return 0
matches = set(resume_skills).intersection(set(job_skills))
return len(matches) / len(job_skills)
def score_resume_by_title(text, title, level):
job_skills = get_job_requirements(title)
resume_skills = skills_from_text(text)
skill_score = calculate_skills_score(resume_skills, job_skills)
exp = extract_years_of_experience(text)
title_embedding = model.encode(title)
resume_embedding = model.encode(text[:2000])
similarity = util.cos_sim(title_embedding, resume_embedding).item()
# level logic
level = level.lower()
if level == "entry":
required_exp = 0
elif level == "junior":
required_exp = 1
elif level == "mid":
required_exp = 3
else:
required_exp = 5
decision = "ACCEPT"
reasons = []
if skill_score < 0.4:
decision = "REJECT"
reasons.append("Low skill match")
if exp < required_exp:
decision = "REJECT"
reasons.append("Insufficient experience")
if similarity < 0.3:
decision = "REJECT"
reasons.append("Low semantic match with job title")
return {
"decision": decision,
"skill_score": round(skill_score,3),
"similarity": round(similarity,3),
"experience_years": exp,
"resume_skills": resume_skills,
"job_skills": job_skills,
"reasons": reasons
}