Spaces:

Danial7
/

CVAnalyzer

Sleeping

App Files Files Community

Danial7 commited on May 17, 2025

Commit

b5b5de2

verified ·

1 Parent(s): ce78876

Update utils.py

Browse files

Files changed (1) hide show

utils.py +26 -54

utils.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# utils.py
 import spacy
 from spacy.cli import download
 import fitz  # PyMuPDF
@@ -6,65 +5,38 @@ import re
 from transformers import pipeline
 def load_models():
     try:
         nlp = spacy.load("en_core_web_sm")
     except OSError:
         download("en_core_web_sm")
         nlp = spacy.load("en_core_web_sm")
-    # You can replace this with a better model if needed
-    llm = pipeline("text-generation", model="openai-community/gpt2")
-    return nlp, llm
-def clean_text(text):
-    # Remove extra whitespace, symbols, control characters
-    text = re.sub(r"\s+", " ", text)
-    text = re.sub(r"[^\x00-\x7F]+", " ", text)  # Remove non-ASCII
-    return text.strip()
 def parse_resume(uploaded_file, nlp):
     doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
     text = "\n".join(page.get_text() for page in doc)
-    text = clean_text(text)
-    spacy_doc = nlp(text)
-    # Extract name
-    name = next((ent.text for ent in spacy_doc.ents if ent.label_ == "PERSON"), "N/A")
-    # Extract email
-    email_match = re.search(r"[\w\.-]+@[\w\.-]+", text)
-    email = email_match.group(0) if email_match else "N/A"
-    # Extract skills using noun chunks (filtered)
-    noun_phrases = [
-        chunk.text.lower().strip()
-        for chunk in spacy_doc.noun_chunks
-        if 2 <= len(chunk.text.strip()) <= 30
-    ]
-    skills = list(set(noun_phrases))
-    # Extract education lines
     edu_keywords = ["bachelor", "master", "phd", "degree", "certification", "diploma"]
-    education = [
-        sent.text.strip()
-        for sent in spacy_doc.sents
-        if any(k in sent.text.lower() for k in edu_keywords)
-    ]
     return text, {
-        "name": name,
-        "email": email,
-        "skills": skills,
-        "education": education,
     }
 def get_recommendations(parsed):
-    num_skills = len(parsed["skills"])
-    score = min(100, 50 + num_skills // 2)
-    feedback = (
-        "Your CV contains a good number of skills, but try to focus on more specific, "
-        "in-demand technical and soft skills. Tailor it to your target job role."
-    )
     return score, feedback
 def generate_career_insights(parsed, llm, suggestion_type="roadmap"):
@@ -72,24 +44,24 @@ def generate_career_insights(parsed, llm, suggestion_type="roadmap"):
     skills = ", ".join(parsed["skills"][:10]) if parsed["skills"] else "unspecified"
     education = "; ".join(parsed["education"][:3]) if parsed["education"] else "not mentioned"
-    prompts = {
         "certifications": (
-            f"The candidate has skills in: {skills}. Education background: {education}.\n"
-            f"List relevant industry-recognized certifications they should pursue."
         ),
         "degrees": (
-            f"Based on this background: {education}, what higher education degrees (e.g., Master's, diploma) "
-            f"would help improve their career prospects?"
         ),
         "roadmap": (
-            f"Create a detailed 1-year career roadmap for {name}, who has the following skills: {skills}, "
-            f"and education: {education}. Include quarterly goals."
         ),
         "counselor": (
-            f"Act as a career counselor for {name}. Their main skills are: {skills}. Education includes: {education}.\n"
-            f"Give them 3 personalized suggestions to grow their career internationally."
         )
     }
-    response = llm(prompts[suggestion_type], max_length=512, do_sample=True, temperature=0.7)
-    return response[0]["generated_text"]

 import spacy
 from spacy.cli import download
 import fitz  # PyMuPDF
 from transformers import pipeline
 def load_models():
+    # Try loading spaCy model; download if missing
     try:
         nlp = spacy.load("en_core_web_sm")
     except OSError:
         download("en_core_web_sm")
         nlp = spacy.load("en_core_web_sm")
+    # Use a fast summarization model instead of GPT-2
+    llm = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
+    return nlp, llm
 def parse_resume(uploaded_file, nlp):
     doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
     text = "\n".join(page.get_text() for page in doc)
+    doc = nlp(text)
+    name = [ent.text for ent in doc.ents if ent.label_ == "PERSON"]
+    email = re.findall(r"[\w\.-]+@[\w\.-]+", text)
+    skills = [token.text.lower() for token in doc if token.pos_ == "NOUN"]
     edu_keywords = ["bachelor", "master", "phd", "degree", "certification", "diploma"]
+    edu = [sent.text for sent in doc.sents if any(k in sent.text.lower() for k in edu_keywords)]
     return text, {
+        "name": name[0] if name else "N/A",
+        "email": email[0] if email else "N/A",
+        "skills": list(set(skills)),
+        "education": edu,
     }
 def get_recommendations(parsed):
+    score = 50 + len(parsed["skills"]) % 50
+    feedback = "Try adding more specific technical skills and quantifiable achievements."
     return score, feedback
 def generate_career_insights(parsed, llm, suggestion_type="roadmap"):
     skills = ", ".join(parsed["skills"][:10]) if parsed["skills"] else "unspecified"
     education = "; ".join(parsed["education"][:3]) if parsed["education"] else "not mentioned"
+    input_text_map = {
         "certifications": (
+            f"Candidate has skills in: {skills}. With education: {education}. "
+            "Summarize relevant certifications they can pursue."
         ),
         "degrees": (
+            f"Given the education background: {education}, summarize higher education degrees "
+            "that can help in career advancement."
         ),
         "roadmap": (
+            f"Create a short 1-year career roadmap for someone with skills: {skills} "
+            f"and education: {education}. Suggest goals."
         ),
         "counselor": (
+            f"As a career advisor, suggest top 3 career moves for a person skilled in {skills} "
+            f"with education in {education}."
         )
     }
+    result = llm(input_text_map[suggestion_type], max_length=150, min_length=50, do_sample=False)
+    return result[0]["summary_text"]