Spaces:
Sleeping
Sleeping
Update utils.py
Browse files
utils.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
# utils.py
|
| 2 |
import spacy
|
| 3 |
from spacy.cli import download
|
| 4 |
import fitz # PyMuPDF
|
|
@@ -6,65 +5,38 @@ import re
|
|
| 6 |
from transformers import pipeline
|
| 7 |
|
| 8 |
def load_models():
|
|
|
|
| 9 |
try:
|
| 10 |
nlp = spacy.load("en_core_web_sm")
|
| 11 |
except OSError:
|
| 12 |
download("en_core_web_sm")
|
| 13 |
nlp = spacy.load("en_core_web_sm")
|
| 14 |
-
|
| 15 |
-
# You can replace this with a better model if needed
|
| 16 |
-
llm = pipeline("text-generation", model="openai-community/gpt2")
|
| 17 |
-
return nlp, llm
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
text = re.sub(r"[^\x00-\x7F]+", " ", text) # Remove non-ASCII
|
| 23 |
-
return text.strip()
|
| 24 |
|
| 25 |
def parse_resume(uploaded_file, nlp):
|
| 26 |
doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
|
| 27 |
text = "\n".join(page.get_text() for page in doc)
|
| 28 |
-
|
| 29 |
-
spacy_doc = nlp(text)
|
| 30 |
-
|
| 31 |
-
# Extract name
|
| 32 |
-
name = next((ent.text for ent in spacy_doc.ents if ent.label_ == "PERSON"), "N/A")
|
| 33 |
-
|
| 34 |
-
# Extract email
|
| 35 |
-
email_match = re.search(r"[\w\.-]+@[\w\.-]+", text)
|
| 36 |
-
email = email_match.group(0) if email_match else "N/A"
|
| 37 |
-
|
| 38 |
-
# Extract skills using noun chunks (filtered)
|
| 39 |
-
noun_phrases = [
|
| 40 |
-
chunk.text.lower().strip()
|
| 41 |
-
for chunk in spacy_doc.noun_chunks
|
| 42 |
-
if 2 <= len(chunk.text.strip()) <= 30
|
| 43 |
-
]
|
| 44 |
-
skills = list(set(noun_phrases))
|
| 45 |
|
| 46 |
-
|
|
|
|
|
|
|
| 47 |
edu_keywords = ["bachelor", "master", "phd", "degree", "certification", "diploma"]
|
| 48 |
-
|
| 49 |
-
sent.text.strip()
|
| 50 |
-
for sent in spacy_doc.sents
|
| 51 |
-
if any(k in sent.text.lower() for k in edu_keywords)
|
| 52 |
-
]
|
| 53 |
|
| 54 |
return text, {
|
| 55 |
-
"name": name,
|
| 56 |
-
"email": email,
|
| 57 |
-
"skills": skills,
|
| 58 |
-
"education":
|
| 59 |
}
|
| 60 |
|
| 61 |
def get_recommendations(parsed):
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
feedback = (
|
| 65 |
-
"Your CV contains a good number of skills, but try to focus on more specific, "
|
| 66 |
-
"in-demand technical and soft skills. Tailor it to your target job role."
|
| 67 |
-
)
|
| 68 |
return score, feedback
|
| 69 |
|
| 70 |
def generate_career_insights(parsed, llm, suggestion_type="roadmap"):
|
|
@@ -72,24 +44,24 @@ def generate_career_insights(parsed, llm, suggestion_type="roadmap"):
|
|
| 72 |
skills = ", ".join(parsed["skills"][:10]) if parsed["skills"] else "unspecified"
|
| 73 |
education = "; ".join(parsed["education"][:3]) if parsed["education"] else "not mentioned"
|
| 74 |
|
| 75 |
-
|
| 76 |
"certifications": (
|
| 77 |
-
f"
|
| 78 |
-
|
| 79 |
),
|
| 80 |
"degrees": (
|
| 81 |
-
f"
|
| 82 |
-
|
| 83 |
),
|
| 84 |
"roadmap": (
|
| 85 |
-
f"Create a
|
| 86 |
-
f"and education: {education}.
|
| 87 |
),
|
| 88 |
"counselor": (
|
| 89 |
-
f"
|
| 90 |
-
f"
|
| 91 |
)
|
| 92 |
}
|
| 93 |
|
| 94 |
-
|
| 95 |
-
return
|
|
|
|
|
|
|
| 1 |
import spacy
|
| 2 |
from spacy.cli import download
|
| 3 |
import fitz # PyMuPDF
|
|
|
|
| 5 |
from transformers import pipeline
|
| 6 |
|
| 7 |
def load_models():
|
| 8 |
+
# Try loading spaCy model; download if missing
|
| 9 |
try:
|
| 10 |
nlp = spacy.load("en_core_web_sm")
|
| 11 |
except OSError:
|
| 12 |
download("en_core_web_sm")
|
| 13 |
nlp = spacy.load("en_core_web_sm")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
+
# Use a fast summarization model instead of GPT-2
|
| 16 |
+
llm = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
|
| 17 |
+
return nlp, llm
|
|
|
|
|
|
|
| 18 |
|
| 19 |
def parse_resume(uploaded_file, nlp):
|
| 20 |
doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
|
| 21 |
text = "\n".join(page.get_text() for page in doc)
|
| 22 |
+
doc = nlp(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
+
name = [ent.text for ent in doc.ents if ent.label_ == "PERSON"]
|
| 25 |
+
email = re.findall(r"[\w\.-]+@[\w\.-]+", text)
|
| 26 |
+
skills = [token.text.lower() for token in doc if token.pos_ == "NOUN"]
|
| 27 |
edu_keywords = ["bachelor", "master", "phd", "degree", "certification", "diploma"]
|
| 28 |
+
edu = [sent.text for sent in doc.sents if any(k in sent.text.lower() for k in edu_keywords)]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
return text, {
|
| 31 |
+
"name": name[0] if name else "N/A",
|
| 32 |
+
"email": email[0] if email else "N/A",
|
| 33 |
+
"skills": list(set(skills)),
|
| 34 |
+
"education": edu,
|
| 35 |
}
|
| 36 |
|
| 37 |
def get_recommendations(parsed):
|
| 38 |
+
score = 50 + len(parsed["skills"]) % 50
|
| 39 |
+
feedback = "Try adding more specific technical skills and quantifiable achievements."
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
return score, feedback
|
| 41 |
|
| 42 |
def generate_career_insights(parsed, llm, suggestion_type="roadmap"):
|
|
|
|
| 44 |
skills = ", ".join(parsed["skills"][:10]) if parsed["skills"] else "unspecified"
|
| 45 |
education = "; ".join(parsed["education"][:3]) if parsed["education"] else "not mentioned"
|
| 46 |
|
| 47 |
+
input_text_map = {
|
| 48 |
"certifications": (
|
| 49 |
+
f"Candidate has skills in: {skills}. With education: {education}. "
|
| 50 |
+
"Summarize relevant certifications they can pursue."
|
| 51 |
),
|
| 52 |
"degrees": (
|
| 53 |
+
f"Given the education background: {education}, summarize higher education degrees "
|
| 54 |
+
"that can help in career advancement."
|
| 55 |
),
|
| 56 |
"roadmap": (
|
| 57 |
+
f"Create a short 1-year career roadmap for someone with skills: {skills} "
|
| 58 |
+
f"and education: {education}. Suggest goals."
|
| 59 |
),
|
| 60 |
"counselor": (
|
| 61 |
+
f"As a career advisor, suggest top 3 career moves for a person skilled in {skills} "
|
| 62 |
+
f"with education in {education}."
|
| 63 |
)
|
| 64 |
}
|
| 65 |
|
| 66 |
+
result = llm(input_text_map[suggestion_type], max_length=150, min_length=50, do_sample=False)
|
| 67 |
+
return result[0]["summary_text"]
|