Spaces:
Sleeping
Sleeping
Update utils.py
Browse files
utils.py
CHANGED
|
@@ -12,36 +12,84 @@ def load_models():
|
|
| 12 |
download("en_core_web_sm")
|
| 13 |
nlp = spacy.load("en_core_web_sm")
|
| 14 |
|
|
|
|
| 15 |
llm = pipeline("text-generation", model="openai-community/gpt2")
|
| 16 |
return nlp, llm
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
def parse_resume(uploaded_file, nlp):
|
| 19 |
doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
|
| 20 |
text = "\n".join(page.get_text() for page in doc)
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
edu_keywords = ["bachelor", "master", "phd", "degree", "certification", "diploma"]
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
return text, {
|
| 28 |
-
"name": name
|
| 29 |
-
"email": email
|
| 30 |
-
"skills":
|
| 31 |
-
"education":
|
| 32 |
}
|
| 33 |
|
| 34 |
def get_recommendations(parsed):
|
| 35 |
-
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
return score, feedback
|
| 38 |
|
| 39 |
def generate_career_insights(parsed, llm, suggestion_type="roadmap"):
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
}
|
| 46 |
-
|
| 47 |
-
|
|
|
|
|
|
| 12 |
download("en_core_web_sm")
|
| 13 |
nlp = spacy.load("en_core_web_sm")
|
| 14 |
|
| 15 |
+
# You can replace this with a better model if needed
|
| 16 |
llm = pipeline("text-generation", model="openai-community/gpt2")
|
| 17 |
return nlp, llm
|
| 18 |
|
| 19 |
+
def clean_text(text):
|
| 20 |
+
# Remove extra whitespace, symbols, control characters
|
| 21 |
+
text = re.sub(r"\s+", " ", text)
|
| 22 |
+
text = re.sub(r"[^\x00-\x7F]+", " ", text) # Remove non-ASCII
|
| 23 |
+
return text.strip()
|
| 24 |
+
|
| 25 |
def parse_resume(uploaded_file, nlp):
|
| 26 |
doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
|
| 27 |
text = "\n".join(page.get_text() for page in doc)
|
| 28 |
+
text = clean_text(text)
|
| 29 |
+
spacy_doc = nlp(text)
|
| 30 |
+
|
| 31 |
+
# Extract name
|
| 32 |
+
name = next((ent.text for ent in spacy_doc.ents if ent.label_ == "PERSON"), "N/A")
|
| 33 |
+
|
| 34 |
+
# Extract email
|
| 35 |
+
email_match = re.search(r"[\w\.-]+@[\w\.-]+", text)
|
| 36 |
+
email = email_match.group(0) if email_match else "N/A"
|
| 37 |
+
|
| 38 |
+
# Extract skills using noun chunks (filtered)
|
| 39 |
+
noun_phrases = [
|
| 40 |
+
chunk.text.lower().strip()
|
| 41 |
+
for chunk in spacy_doc.noun_chunks
|
| 42 |
+
if 2 <= len(chunk.text.strip()) <= 30
|
| 43 |
+
]
|
| 44 |
+
skills = list(set(noun_phrases))
|
| 45 |
+
|
| 46 |
+
# Extract education lines
|
| 47 |
edu_keywords = ["bachelor", "master", "phd", "degree", "certification", "diploma"]
|
| 48 |
+
education = [
|
| 49 |
+
sent.text.strip()
|
| 50 |
+
for sent in spacy_doc.sents
|
| 51 |
+
if any(k in sent.text.lower() for k in edu_keywords)
|
| 52 |
+
]
|
| 53 |
+
|
| 54 |
return text, {
|
| 55 |
+
"name": name,
|
| 56 |
+
"email": email,
|
| 57 |
+
"skills": skills,
|
| 58 |
+
"education": education,
|
| 59 |
}
|
| 60 |
|
| 61 |
def get_recommendations(parsed):
|
| 62 |
+
num_skills = len(parsed["skills"])
|
| 63 |
+
score = min(100, 50 + num_skills // 2)
|
| 64 |
+
feedback = (
|
| 65 |
+
"Your CV contains a good number of skills, but try to focus on more specific, "
|
| 66 |
+
"in-demand technical and soft skills. Tailor it to your target job role."
|
| 67 |
+
)
|
| 68 |
return score, feedback
|
| 69 |
|
| 70 |
def generate_career_insights(parsed, llm, suggestion_type="roadmap"):
|
| 71 |
+
name = parsed.get("name", "Candidate")
|
| 72 |
+
skills = ", ".join(parsed["skills"][:10]) if parsed["skills"] else "unspecified"
|
| 73 |
+
education = "; ".join(parsed["education"][:3]) if parsed["education"] else "not mentioned"
|
| 74 |
+
|
| 75 |
+
prompts = {
|
| 76 |
+
"certifications": (
|
| 77 |
+
f"The candidate has skills in: {skills}. Education background: {education}.\n"
|
| 78 |
+
f"List relevant industry-recognized certifications they should pursue."
|
| 79 |
+
),
|
| 80 |
+
"degrees": (
|
| 81 |
+
f"Based on this background: {education}, what higher education degrees (e.g., Master's, diploma) "
|
| 82 |
+
f"would help improve their career prospects?"
|
| 83 |
+
),
|
| 84 |
+
"roadmap": (
|
| 85 |
+
f"Create a detailed 1-year career roadmap for {name}, who has the following skills: {skills}, "
|
| 86 |
+
f"and education: {education}. Include quarterly goals."
|
| 87 |
+
),
|
| 88 |
+
"counselor": (
|
| 89 |
+
f"Act as a career counselor for {name}. Their main skills are: {skills}. Education includes: {education}.\n"
|
| 90 |
+
f"Give them 3 personalized suggestions to grow their career internationally."
|
| 91 |
+
)
|
| 92 |
}
|
| 93 |
+
|
| 94 |
+
response = llm(prompts[suggestion_type], max_length=512, do_sample=True, temperature=0.7)
|
| 95 |
+
return response[0]["generated_text"]
|