Spaces:

Danial7
/

CVAnalyzer

Sleeping

App Files Files Community

Danial7 commited on May 17, 2025

Commit

ce78876

verified ·

1 Parent(s): 5ef04ed

Update utils.py

Browse files

Files changed (1) hide show

utils.py +66 -18

utils.py CHANGED Viewed

@@ -12,36 +12,84 @@ def load_models():
         download("en_core_web_sm")
         nlp = spacy.load("en_core_web_sm")
     llm = pipeline("text-generation", model="openai-community/gpt2")
     return nlp, llm
 def parse_resume(uploaded_file, nlp):
     doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
     text = "\n".join(page.get_text() for page in doc)
-    doc = nlp(text)
-    name = [ent.text for ent in doc.ents if ent.label_ == "PERSON"]
-    email = re.findall(r"[\w\.-]+@[\w\.-]+", text)
-    skills = [token.text.lower() for token in doc if token.pos_ == "NOUN"]
     edu_keywords = ["bachelor", "master", "phd", "degree", "certification", "diploma"]
-    edu = [sent.text for sent in doc.sents if any(k in sent.text.lower() for k in edu_keywords)]
     return text, {
-        "name": name[0] if name else "N/A",
-        "email": email[0] if email else "N/A",
-        "skills": list(set(skills)),
-        "education": edu,
     }
 def get_recommendations(parsed):
-    score = 50 + len(parsed["skills"]) % 50
-    feedback = "Try adding more specific technical skills and quantifiable achievements."
     return score, feedback
 def generate_career_insights(parsed, llm, suggestion_type="roadmap"):
-    prompt_map = {
-        "certifications": f"Suggest relevant certifications for someone with skills: {parsed['skills']} and education: {parsed['education']}",
-        "degrees": f"Suggest higher education paths based on: {parsed['education']}",
-        "roadmap": f"Create a 1-year career roadmap for someone with these skills: {parsed['skills']} and education: {parsed['education']}",
-        "counselor": f"Act like a career counselor. Give personalized advice to this candidate: Skills={parsed['skills']} Education={parsed['education']}"
     }
-    res = llm(prompt_map[suggestion_type], max_length=512, do_sample=True, temperature=0.7)
-    return res[0]['generated_text']

         download("en_core_web_sm")
         nlp = spacy.load("en_core_web_sm")
+    # You can replace this with a better model if needed
     llm = pipeline("text-generation", model="openai-community/gpt2")
     return nlp, llm
+def clean_text(text):
+    # Remove extra whitespace, symbols, control characters
+    text = re.sub(r"\s+", " ", text)
+    text = re.sub(r"[^\x00-\x7F]+", " ", text)  # Remove non-ASCII
+    return text.strip()
 def parse_resume(uploaded_file, nlp):
     doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
     text = "\n".join(page.get_text() for page in doc)
+    text = clean_text(text)
+    spacy_doc = nlp(text)
+    # Extract name
+    name = next((ent.text for ent in spacy_doc.ents if ent.label_ == "PERSON"), "N/A")
+    # Extract email
+    email_match = re.search(r"[\w\.-]+@[\w\.-]+", text)
+    email = email_match.group(0) if email_match else "N/A"
+    # Extract skills using noun chunks (filtered)
+    noun_phrases = [
+        chunk.text.lower().strip()
+        for chunk in spacy_doc.noun_chunks
+        if 2 <= len(chunk.text.strip()) <= 30
+    ]
+    skills = list(set(noun_phrases))
+    # Extract education lines
     edu_keywords = ["bachelor", "master", "phd", "degree", "certification", "diploma"]
+    education = [
+        sent.text.strip()
+        for sent in spacy_doc.sents
+        if any(k in sent.text.lower() for k in edu_keywords)
+    ]
     return text, {
+        "name": name,
+        "email": email,
+        "skills": skills,
+        "education": education,
     }
 def get_recommendations(parsed):
+    num_skills = len(parsed["skills"])
+    score = min(100, 50 + num_skills // 2)
+    feedback = (
+        "Your CV contains a good number of skills, but try to focus on more specific, "
+        "in-demand technical and soft skills. Tailor it to your target job role."
+    )
     return score, feedback
 def generate_career_insights(parsed, llm, suggestion_type="roadmap"):
+    name = parsed.get("name", "Candidate")
+    skills = ", ".join(parsed["skills"][:10]) if parsed["skills"] else "unspecified"
+    education = "; ".join(parsed["education"][:3]) if parsed["education"] else "not mentioned"
+    prompts = {
+        "certifications": (
+            f"The candidate has skills in: {skills}. Education background: {education}.\n"
+            f"List relevant industry-recognized certifications they should pursue."
+        ),
+        "degrees": (
+            f"Based on this background: {education}, what higher education degrees (e.g., Master's, diploma) "
+            f"would help improve their career prospects?"
+        ),
+        "roadmap": (
+            f"Create a detailed 1-year career roadmap for {name}, who has the following skills: {skills}, "
+            f"and education: {education}. Include quarterly goals."
+        ),
+        "counselor": (
+            f"Act as a career counselor for {name}. Their main skills are: {skills}. Education includes: {education}.\n"
+            f"Give them 3 personalized suggestions to grow their career internationally."
+        )
     }
+    response = llm(prompts[suggestion_type], max_length=512, do_sample=True, temperature=0.7)
+    return response[0]["generated_text"]