Spaces:
Sleeping
Sleeping
| from sklearn.feature_extraction.text import TfidfTransformer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from typing import List | |
| import os | |
| def pdf_text_extractor(file_path="")->str: | |
| if not os.path.exists(file_path): | |
| raise FileNotFoundError(f"file not found at location {file_path}") | |
| loader=PyPDFLoader(file_path=file_path) | |
| documents=loader.load() | |
| return documents.content | |
| def extract_skills(text,SKILLS): | |
| text = text.lower() | |
| found = [] | |
| for skill in SKILLS: | |
| if skill in text: | |
| found.append(skill) | |
| return found | |
| def match_resume_jd(resume_txt:str,jd:str)->float: | |
| vectorizer=TfidfTransformer() | |
| vectors=vectorizer.fit_transform([resume_txt,jd]) | |
| score=cosine_similarity(vectors[0],vectors[1])[0][0] | |
| return round(score*100,2) | |
| def final_score(resume_file_path:str, jd_text,SKILLS:List[str]=["python", "java", "ml", "react", "docker", "aws"]): | |
| resume_text=pdf_text_extractor(file_path=resume_file_path) | |
| skills = extract_skills(resume_text,SKILLS=SKILLS) | |
| skill_score = len(skills) * 5 # har skill = 5 marks | |
| jd_score = match_resume_jd(resume_text, jd_text) | |
| total = min(skill_score + jd_score, 100) | |
| return total | |