Danial7 commited on
Commit
ce78876
·
verified ·
1 Parent(s): 5ef04ed

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +66 -18
utils.py CHANGED
@@ -12,36 +12,84 @@ def load_models():
12
  download("en_core_web_sm")
13
  nlp = spacy.load("en_core_web_sm")
14
 
 
15
  llm = pipeline("text-generation", model="openai-community/gpt2")
16
  return nlp, llm
17
 
 
 
 
 
 
 
18
  def parse_resume(uploaded_file, nlp):
19
  doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
20
  text = "\n".join(page.get_text() for page in doc)
21
- doc = nlp(text)
22
- name = [ent.text for ent in doc.ents if ent.label_ == "PERSON"]
23
- email = re.findall(r"[\w\.-]+@[\w\.-]+", text)
24
- skills = [token.text.lower() for token in doc if token.pos_ == "NOUN"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  edu_keywords = ["bachelor", "master", "phd", "degree", "certification", "diploma"]
26
- edu = [sent.text for sent in doc.sents if any(k in sent.text.lower() for k in edu_keywords)]
 
 
 
 
 
27
  return text, {
28
- "name": name[0] if name else "N/A",
29
- "email": email[0] if email else "N/A",
30
- "skills": list(set(skills)),
31
- "education": edu,
32
  }
33
 
34
  def get_recommendations(parsed):
35
- score = 50 + len(parsed["skills"]) % 50
36
- feedback = "Try adding more specific technical skills and quantifiable achievements."
 
 
 
 
37
  return score, feedback
38
 
39
  def generate_career_insights(parsed, llm, suggestion_type="roadmap"):
40
- prompt_map = {
41
- "certifications": f"Suggest relevant certifications for someone with skills: {parsed['skills']} and education: {parsed['education']}",
42
- "degrees": f"Suggest higher education paths based on: {parsed['education']}",
43
- "roadmap": f"Create a 1-year career roadmap for someone with these skills: {parsed['skills']} and education: {parsed['education']}",
44
- "counselor": f"Act like a career counselor. Give personalized advice to this candidate: Skills={parsed['skills']} Education={parsed['education']}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  }
46
- res = llm(prompt_map[suggestion_type], max_length=512, do_sample=True, temperature=0.7)
47
- return res[0]['generated_text']
 
 
12
  download("en_core_web_sm")
13
  nlp = spacy.load("en_core_web_sm")
14
 
15
+ # You can replace this with a better model if needed
16
  llm = pipeline("text-generation", model="openai-community/gpt2")
17
  return nlp, llm
18
 
19
+ def clean_text(text):
20
+ # Remove extra whitespace, symbols, control characters
21
+ text = re.sub(r"\s+", " ", text)
22
+ text = re.sub(r"[^\x00-\x7F]+", " ", text) # Remove non-ASCII
23
+ return text.strip()
24
+
25
  def parse_resume(uploaded_file, nlp):
26
  doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
27
  text = "\n".join(page.get_text() for page in doc)
28
+ text = clean_text(text)
29
+ spacy_doc = nlp(text)
30
+
31
+ # Extract name
32
+ name = next((ent.text for ent in spacy_doc.ents if ent.label_ == "PERSON"), "N/A")
33
+
34
+ # Extract email
35
+ email_match = re.search(r"[\w\.-]+@[\w\.-]+", text)
36
+ email = email_match.group(0) if email_match else "N/A"
37
+
38
+ # Extract skills using noun chunks (filtered)
39
+ noun_phrases = [
40
+ chunk.text.lower().strip()
41
+ for chunk in spacy_doc.noun_chunks
42
+ if 2 <= len(chunk.text.strip()) <= 30
43
+ ]
44
+ skills = list(set(noun_phrases))
45
+
46
+ # Extract education lines
47
  edu_keywords = ["bachelor", "master", "phd", "degree", "certification", "diploma"]
48
+ education = [
49
+ sent.text.strip()
50
+ for sent in spacy_doc.sents
51
+ if any(k in sent.text.lower() for k in edu_keywords)
52
+ ]
53
+
54
  return text, {
55
+ "name": name,
56
+ "email": email,
57
+ "skills": skills,
58
+ "education": education,
59
  }
60
 
61
  def get_recommendations(parsed):
62
+ num_skills = len(parsed["skills"])
63
+ score = min(100, 50 + num_skills // 2)
64
+ feedback = (
65
+ "Your CV contains a good number of skills, but try to focus on more specific, "
66
+ "in-demand technical and soft skills. Tailor it to your target job role."
67
+ )
68
  return score, feedback
69
 
70
  def generate_career_insights(parsed, llm, suggestion_type="roadmap"):
71
+ name = parsed.get("name", "Candidate")
72
+ skills = ", ".join(parsed["skills"][:10]) if parsed["skills"] else "unspecified"
73
+ education = "; ".join(parsed["education"][:3]) if parsed["education"] else "not mentioned"
74
+
75
+ prompts = {
76
+ "certifications": (
77
+ f"The candidate has skills in: {skills}. Education background: {education}.\n"
78
+ f"List relevant industry-recognized certifications they should pursue."
79
+ ),
80
+ "degrees": (
81
+ f"Based on this background: {education}, what higher education degrees (e.g., Master's, diploma) "
82
+ f"would help improve their career prospects?"
83
+ ),
84
+ "roadmap": (
85
+ f"Create a detailed 1-year career roadmap for {name}, who has the following skills: {skills}, "
86
+ f"and education: {education}. Include quarterly goals."
87
+ ),
88
+ "counselor": (
89
+ f"Act as a career counselor for {name}. Their main skills are: {skills}. Education includes: {education}.\n"
90
+ f"Give them 3 personalized suggestions to grow their career internationally."
91
+ )
92
  }
93
+
94
+ response = llm(prompts[suggestion_type], max_length=512, do_sample=True, temperature=0.7)
95
+ return response[0]["generated_text"]