Danial7 commited on
Commit
b5b5de2
·
verified ·
1 Parent(s): ce78876

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +26 -54
utils.py CHANGED
@@ -1,4 +1,3 @@
1
- # utils.py
2
  import spacy
3
  from spacy.cli import download
4
  import fitz # PyMuPDF
@@ -6,65 +5,38 @@ import re
6
  from transformers import pipeline
7
 
8
  def load_models():
 
9
  try:
10
  nlp = spacy.load("en_core_web_sm")
11
  except OSError:
12
  download("en_core_web_sm")
13
  nlp = spacy.load("en_core_web_sm")
14
-
15
- # You can replace this with a better model if needed
16
- llm = pipeline("text-generation", model="openai-community/gpt2")
17
- return nlp, llm
18
 
19
- def clean_text(text):
20
- # Remove extra whitespace, symbols, control characters
21
- text = re.sub(r"\s+", " ", text)
22
- text = re.sub(r"[^\x00-\x7F]+", " ", text) # Remove non-ASCII
23
- return text.strip()
24
 
25
  def parse_resume(uploaded_file, nlp):
26
  doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
27
  text = "\n".join(page.get_text() for page in doc)
28
- text = clean_text(text)
29
- spacy_doc = nlp(text)
30
-
31
- # Extract name
32
- name = next((ent.text for ent in spacy_doc.ents if ent.label_ == "PERSON"), "N/A")
33
-
34
- # Extract email
35
- email_match = re.search(r"[\w\.-]+@[\w\.-]+", text)
36
- email = email_match.group(0) if email_match else "N/A"
37
-
38
- # Extract skills using noun chunks (filtered)
39
- noun_phrases = [
40
- chunk.text.lower().strip()
41
- for chunk in spacy_doc.noun_chunks
42
- if 2 <= len(chunk.text.strip()) <= 30
43
- ]
44
- skills = list(set(noun_phrases))
45
 
46
- # Extract education lines
 
 
47
  edu_keywords = ["bachelor", "master", "phd", "degree", "certification", "diploma"]
48
- education = [
49
- sent.text.strip()
50
- for sent in spacy_doc.sents
51
- if any(k in sent.text.lower() for k in edu_keywords)
52
- ]
53
 
54
  return text, {
55
- "name": name,
56
- "email": email,
57
- "skills": skills,
58
- "education": education,
59
  }
60
 
61
  def get_recommendations(parsed):
62
- num_skills = len(parsed["skills"])
63
- score = min(100, 50 + num_skills // 2)
64
- feedback = (
65
- "Your CV contains a good number of skills, but try to focus on more specific, "
66
- "in-demand technical and soft skills. Tailor it to your target job role."
67
- )
68
  return score, feedback
69
 
70
  def generate_career_insights(parsed, llm, suggestion_type="roadmap"):
@@ -72,24 +44,24 @@ def generate_career_insights(parsed, llm, suggestion_type="roadmap"):
72
  skills = ", ".join(parsed["skills"][:10]) if parsed["skills"] else "unspecified"
73
  education = "; ".join(parsed["education"][:3]) if parsed["education"] else "not mentioned"
74
 
75
- prompts = {
76
  "certifications": (
77
- f"The candidate has skills in: {skills}. Education background: {education}.\n"
78
- f"List relevant industry-recognized certifications they should pursue."
79
  ),
80
  "degrees": (
81
- f"Based on this background: {education}, what higher education degrees (e.g., Master's, diploma) "
82
- f"would help improve their career prospects?"
83
  ),
84
  "roadmap": (
85
- f"Create a detailed 1-year career roadmap for {name}, who has the following skills: {skills}, "
86
- f"and education: {education}. Include quarterly goals."
87
  ),
88
  "counselor": (
89
- f"Act as a career counselor for {name}. Their main skills are: {skills}. Education includes: {education}.\n"
90
- f"Give them 3 personalized suggestions to grow their career internationally."
91
  )
92
  }
93
 
94
- response = llm(prompts[suggestion_type], max_length=512, do_sample=True, temperature=0.7)
95
- return response[0]["generated_text"]
 
 
1
  import spacy
2
  from spacy.cli import download
3
  import fitz # PyMuPDF
 
5
  from transformers import pipeline
6
 
7
  def load_models():
8
+ # Try loading spaCy model; download if missing
9
  try:
10
  nlp = spacy.load("en_core_web_sm")
11
  except OSError:
12
  download("en_core_web_sm")
13
  nlp = spacy.load("en_core_web_sm")
 
 
 
 
14
 
15
+ # Use a fast summarization model instead of GPT-2
16
+ llm = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
17
+ return nlp, llm
 
 
18
 
19
  def parse_resume(uploaded_file, nlp):
20
  doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
21
  text = "\n".join(page.get_text() for page in doc)
22
+ doc = nlp(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
+ name = [ent.text for ent in doc.ents if ent.label_ == "PERSON"]
25
+ email = re.findall(r"[\w\.-]+@[\w\.-]+", text)
26
+ skills = [token.text.lower() for token in doc if token.pos_ == "NOUN"]
27
  edu_keywords = ["bachelor", "master", "phd", "degree", "certification", "diploma"]
28
+ edu = [sent.text for sent in doc.sents if any(k in sent.text.lower() for k in edu_keywords)]
 
 
 
 
29
 
30
  return text, {
31
+ "name": name[0] if name else "N/A",
32
+ "email": email[0] if email else "N/A",
33
+ "skills": list(set(skills)),
34
+ "education": edu,
35
  }
36
 
37
  def get_recommendations(parsed):
38
+ score = 50 + len(parsed["skills"]) % 50
39
+ feedback = "Try adding more specific technical skills and quantifiable achievements."
 
 
 
 
40
  return score, feedback
41
 
42
  def generate_career_insights(parsed, llm, suggestion_type="roadmap"):
 
44
  skills = ", ".join(parsed["skills"][:10]) if parsed["skills"] else "unspecified"
45
  education = "; ".join(parsed["education"][:3]) if parsed["education"] else "not mentioned"
46
 
47
+ input_text_map = {
48
  "certifications": (
49
+ f"Candidate has skills in: {skills}. With education: {education}. "
50
+ "Summarize relevant certifications they can pursue."
51
  ),
52
  "degrees": (
53
+ f"Given the education background: {education}, summarize higher education degrees "
54
+ "that can help in career advancement."
55
  ),
56
  "roadmap": (
57
+ f"Create a short 1-year career roadmap for someone with skills: {skills} "
58
+ f"and education: {education}. Suggest goals."
59
  ),
60
  "counselor": (
61
+ f"As a career advisor, suggest top 3 career moves for a person skilled in {skills} "
62
+ f"with education in {education}."
63
  )
64
  }
65
 
66
+ result = llm(input_text_map[suggestion_type], max_length=150, min_length=50, do_sample=False)
67
+ return result[0]["summary_text"]