from sentence_transformers import SentenceTransformer, util import re import pandas as pd import os from typing import List, Dict, Tuple class ResumeScorer: def __init__(self, skills_csv_path="skills.csv", job_requirements_csv_path="job_requirements.csv"): self.model = SentenceTransformer("all-MiniLM-L6-v2") # base directory self.base_dir = os.path.dirname(os.path.abspath(__file__)) # Safe paths self.skills_csv_path = os.path.join(self.base_dir, skills_csv_path) self.job_requirements_csv_path = os.path.join(self.base_dir, job_requirements_csv_path) # Load data self.skills_list = self._load_skills_from_csv(self.skills_csv_path) self.job_requirements = self._load_job_requirements_from_csv(self.job_requirements_csv_path) self.skill_synonyms = self._create_skill_synonyms() def _load_skills_from_csv(self, csv_path: str) -> List[str]: if not os.path.exists(csv_path): raise FileNotFoundError(f"Skills CSV file not found: {csv_path}") df = pd.read_csv(csv_path) skill_column = None for col in ['skill', 'skills', 'Skill', 'Skills']: if col in df.columns: skill_column = col break if skill_column is None: raise ValueError("CSV must have a 'skill' column") skills = df[skill_column].dropna().str.lower().tolist() print(f"Loaded {len(skills)} skills from {csv_path}") return skills def _load_job_requirements_from_csv(self, csv_path: str) -> Dict[str, Dict]: if not os.path.exists(csv_path): raise FileNotFoundError(f"Job requirements CSV file not found: {csv_path}") df = pd.read_csv(csv_path) title_col = None required_col = None preferred_col = None for col in df.columns: col_lower = col.lower() if 'title' in col_lower or 'job' in col_lower: title_col = col elif 'required' in col_lower: required_col = col elif 'preferred' in col_lower: preferred_col = col if not title_col or not required_col: raise ValueError("CSV must have job_title and required_skills columns") job_requirements = {} for _, row in df.iterrows(): title = str(row[title_col]).lower() required_skills = [] if pd.notna(row[required_col]): required_skills = [s.strip().lower() for s in str(row[required_col]).split(',')] preferred_skills = [] if preferred_col and pd.notna(row[preferred_col]): preferred_skills = [s.strip().lower() for s in str(row[preferred_col]).split(',')] job_requirements[title] = { 'required': required_skills, 'preferred': preferred_skills, 'all_skills': required_skills + preferred_skills } print(f"Loaded {len(job_requirements)} job titles from {csv_path}") return job_requirements def _create_skill_synonyms(self) -> Dict[str, List[str]]: return { 'scikit-learn': ['sklearn', 'scikit learn'], 'javascript': ['js', 'ecmascript'], 'typescript': ['ts'], 'react': ['react.js', 'reactjs'], 'node.js': ['node', 'nodejs'], 'python': ['python3', 'py'], 'sql': ['postgresql', 'mysql', 'sqlite'], 'docker': ['container', 'docker container'], 'kubernetes': ['k8s'], 'aws': ['amazon web services'], 'gcp': ['google cloud platform'], 'azure': ['microsoft azure'], 'nlp': ['natural language processing'], 'mlops': ['ml ops', 'machine learning operations'], } def extract_years_of_experience(self, text: str) -> int: text = text.lower() patterns = [ r'(\d+)\+?\s*years?', r'(\d+)\+?\s*yrs?', r'experience[:\s]+(\d+)', r'(\d+)\+?\s*year\s+experience', r'(\d+)\+?\s*yr\s+experience', ] for pattern in patterns: matches = re.findall(pattern, text) if matches: return max([int(x) for x in matches]) return 0 def skills_from_text(self, text: str, use_synonyms: bool = True) -> List[str]: text = text.lower() found = [] for skill in self.skills_list: if skill in text: found.append(skill) if use_synonyms: for main_skill, synonyms in self.skill_synonyms.items(): if main_skill in self.skills_list and main_skill not in found: for synonym in synonyms: if synonym in text: found.append(main_skill) break return list(set(found)) def get_job_requirements(self, title: str) -> Dict: title = title.lower() if title in self.job_requirements: return self.job_requirements[title] for job_title in self.job_requirements.keys(): if job_title in title or title in job_title: print(f"Matched '{title}' to '{job_title}'") return self.job_requirements[job_title] print(f"Warning: No requirements found for job title '{title}'") return {'required': [], 'preferred': [], 'all_skills': []} def calculate_skills_score(self, resume_skills: List[str], job_skills: Dict) -> Tuple[float, float]: if not job_skills['required']: return 0.0, 0.0 resume_set = set(resume_skills) required_set = set(job_skills['required']) preferred_set = set(job_skills['preferred']) required_matches = len(resume_set.intersection(required_set)) required_score = required_matches / len(required_set) if required_set else 0 preferred_matches = len(resume_set.intersection(preferred_set)) preferred_score = preferred_matches / len(preferred_set) if preferred_set else 0 total_score = (required_score * 0.7) + (preferred_score * 0.3) return total_score, required_score def score_resume_by_title(self, text: str, title: str, level: str) -> Dict: job_skills = self.get_job_requirements(title) resume_skills = self.skills_from_text(text) total_skill_score, _ = self.calculate_skills_score(resume_skills, job_skills) exp = self.extract_years_of_experience(text) title_embedding = self.model.encode(title) resume_embedding = self.model.encode(text[:2000]) similarity = util.cos_sim(title_embedding, resume_embedding).item() level = level.lower() exp_requirements = { "entry": 0, "junior": 1, "mid": 3, "senior": 5, "lead": 7, "principal": 8 } required_exp = exp_requirements.get(level, 5) decision = "ACCEPT" reasons = [] if total_skill_score < 0.4: decision = "REJECT" reasons.append("Low skill match") if exp < required_exp: decision = "REJECT" reasons.append("Insufficient experience") if similarity < 0.3: decision = "REJECT" reasons.append("Low semantic match with job title") return { "decision": decision, "skill_score": round(total_skill_score, 3), "similarity": round(similarity, 3), "experience_years": exp, "resume_skills": resume_skills, "job_skills": job_skills['all_skills'], "reasons": reasons } def create_default_csvs(): base_dir = os.path.dirname(os.path.abspath(__file__)) skills_path = os.path.join(base_dir, "skills.csv") jobs_path = os.path.join(base_dir, "job_requirements.csv") if not os.path.exists(skills_path): pd.DataFrame({ 'skill': ['python', 'java', 'sql', 'machine learning', 'deep learning', 'pandas', 'numpy', 'react', 'angular', 'docker', 'kubernetes', 'aws', 'git', 'nlp', 'tensorflow', 'pytorch'] }).to_csv(skills_path, index=False) if not os.path.exists(jobs_path): pd.DataFrame({ 'job_title': ['data scientist', 'machine learning engineer', 'software engineer'], 'required_skills': [ 'python,machine learning,statistics,sql', 'python,machine learning,deep learning,pytorch', 'python,java,git,algorithms,data structures' ], 'preferred_skills': [ 'pandas,numpy,scikit-learn', 'tensorflow,docker,kubernetes', 'sql,spring,react' ] }).to_csv(jobs_path, index=False) if __name__ == "__main__": create_default_csvs() scorer = ResumeScorer() resume_text = """ I am a data scientist with 4 years of experience in Python, machine learning, and SQL. I have worked with pandas, numpy, and scikit-learn for data analysis. I also have experience with deep learning using PyTorch and TensorFlow. """ result = scorer.score_resume_by_title(resume_text, "data scientist", "mid") print("\n" + "="*50) print("RESUME SCORING RESULT") print("="*50) for key, value in result.items(): print(f"{key}: {value}")