| import docx2txt | |
| import PyPDF2 | |
| import spacy | |
| from transformers import pipeline | |
| import re | |
| def parse_resume(file): | |
| if file.type == "application/pdf": | |
| pdf_reader = PyPDF2.PdfReader(file) | |
| text = "" | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() | |
| return text | |
| elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": | |
| return docx2txt.process(file) | |
| else: | |
| return "" | |
| def load_models(): | |
| nlp_model = spacy.load("en_core_web_sm") | |
| llm_model = pipeline("text-generation", model="microsoft/phi-2") | |
| return nlp_model, llm_model | |
| def get_recommendations(text, nlp_model, llm_model): | |
| doc = nlp_model(text) | |
| skills = [ent.text.lower() for ent in doc.ents if ent.label_ in ["SKILL", "WORK_OF_ART", "ORG"]] | |
| education = extract_education(text) | |
| prompt = f"Given these skills: {', '.join(set(skills))}, classify the most likely job field and rate CV quality (0-100):" | |
| response = llm_model(prompt, max_new_tokens=100)[0]['generated_text'] | |
| domain = "Engineering" if "engineer" in response.lower() else "General" | |
| score = int("".join(filter(str.isdigit, response))) | |
| return score, domain, list(set(skills)), education | |
| def extract_education(text): | |
| match = re.findall(r"(Bachelor|Master|PhD|MBA|BSc|MSc|B\.Tech|M\.Tech)[^\n\r]*", text, re.IGNORECASE) | |
| return ", ".join(set(match)) if match else "Not detected" |