Smart_CV / utils.py
Danial7's picture
Update utils.py
15d319d verified
import docx2txt
import PyPDF2
import spacy
from transformers import pipeline
import re
def parse_resume(file):
if file.type == "application/pdf":
pdf_reader = PyPDF2.PdfReader(file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
return text
elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
return docx2txt.process(file)
else:
return ""
def load_models():
nlp_model = spacy.load("en_core_web_sm")
llm_model = pipeline("text-generation", model="microsoft/phi-2")
return nlp_model, llm_model
def get_recommendations(text, nlp_model, llm_model):
doc = nlp_model(text)
skills = [ent.text.lower() for ent in doc.ents if ent.label_ in ["SKILL", "WORK_OF_ART", "ORG"]]
education = extract_education(text)
prompt = f"Given these skills: {', '.join(set(skills))}, classify the most likely job field and rate CV quality (0-100):"
response = llm_model(prompt, max_new_tokens=100)[0]['generated_text']
domain = "Engineering" if "engineer" in response.lower() else "General"
score = int("".join(filter(str.isdigit, response)))
return score, domain, list(set(skills)), education
def extract_education(text):
match = re.findall(r"(Bachelor|Master|PhD|MBA|BSc|MSc|B\.Tech|M\.Tech)[^\n\r]*", text, re.IGNORECASE)
return ", ".join(set(match)) if match else "Not detected"