Spaces:

Danial7
/

Smart_CV_Analyzer

Sleeping

App Files Files Community

Danial7 commited on May 17, 2025

Commit

697eef7

verified ·

1 Parent(s): bbd468a

Update utils.py

Browse files

Files changed (1) hide show

utils.py +55 -111

utils.py CHANGED Viewed

@@ -1,114 +1,58 @@
-import os
-import spacy
-import json
 import requests
 from transformers import pipeline
-from fpdf import FPDF
-from pdfminer.high_level import extract_text
-# Load spacy model
-try:
-    nlp_spacy = spacy.load("en_core_web_sm")
-except:
-    from spacy.cli import download
-    download("en_core_web_sm")
-    nlp_spacy = spacy.load("en_core_web_sm")
-# Lightweight model
-llm = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.1", device_map="auto", max_new_tokens=512)
-def parse_cv(pdf_path):
-    return extract_text(pdf_path)
-def identify_field_with_llm(text):
-    prompt = f"Analyze the CV content below and identify the main professional field:\n\n{text[:2000]}\n\nField:"
-    result = llm(prompt)[0]['generated_text']
-    return result.split("Field:")[-1].strip().split("\n")[0]
-def generate_skill_score(text):
-    keywords = ["Python", "Excel", "project", "machine learning", "automation", "SQL", "cloud", "leadership", "communication"]
-    score = sum(1 for kw in keywords if kw.lower() in text.lower())
-    return int((score / len(keywords)) * 100)
-def generate_llm_suggestions(cv_text, field):
-    prompt = f"""You are a career advisor AI. Based on the following CV content and field ({field}), suggest:
-1. 5 upskilling skills
-2. 3 certifications
-3. 3 scholarships
-4. 3 education paths
-5. 3 visa-friendly countries
-CV: {cv_text[:2000]}
-Respond in JSON with keys: skills, certifications, scholarships, education, visa
-"""
-    output = llm(prompt)[0]['generated_text']
-    json_start = output.find("{")
-    try:
-        data = json.loads(output[json_start:])
-    except:
-        data = {
-            "skills": ["Problem-solving", "AI tools", "Data analysis"],
-            "certifications": ["Coursera Python", "AWS Associate", "PMP"],
-            "scholarships": ["DAAD", "Chevening", "Fulbright"],
-            "education": ["MSc in AI", "MBA in Tech", "Masters in Data Science"],
-            "visa": ["Canada", "Germany", "Australia"]
-        }
-    return data
-def get_live_jobs(field):
-    # Example using Adzuna (replace with your actual API key and app id)
-    API_ID = os.getenv("ADZUNA_APP_ID", "mock_id")
-    API_KEY = os.getenv("ADZUNA_APP_KEY", "mock_key")
-    country = "gb"
-    url = f"https://api.adzuna.com/v1/api/jobs/{country}/search/1?app_id={API_ID}&app_key={API_KEY}&results_per_page=5&what={field}"
-    try:
-        response = requests.get(url)
-        data = response.json()
-        jobs = []
-        for job in data.get("results", []):
-            jobs.append({
-                "title": job.get("title", "Job"),
-                "company": job.get("company", {}).get("display_name", "Company"),
-                "location": job.get("location", {}).get("display_name", "Location"),
-                "url": job.get("redirect_url", "#")
-            })
-        return jobs
-    except:
-        return []
-def generate_counselor_response(cv_text, field, score, suggestions):
-    prompt = f"""Act like a career counselor. Given this CV in the {field} field with a skill score of {score}, and the following suggestions:\n\n{json.dumps(suggestions)}\n\nGive friendly, motivational advice (max 200 words)."""
-    return llm(prompt)[0]['generated_text'].strip()
-def generate_pdf_report(cv_text, field, score, suggestions, jobs, counselor_msg):
-    pdf = FPDF()
-    pdf.add_page()
-    pdf.set_font("Arial", size=12)
-    pdf.cell(200, 10, txt="Smart CV Analyzer Report", ln=True, align="C")
-    pdf.ln(10)
-    pdf.multi_cell(0, 10, f"Detected Field: {field}")
-    pdf.multi_cell(0, 10, f"Skill Score: {score}/100")
-    pdf.ln(5)
-    pdf.set_font("Arial", "B", 12)
-    pdf.cell(0, 10, "AI Suggestions:", ln=True)
-    pdf.set_font("Arial", size=11)
-    for key, items in suggestions.items():
-        pdf.multi_cell(0, 10, f"{key.capitalize()}: {', '.join(items)}")
-    pdf.ln(5)
-    pdf.set_font("Arial", "B", 12)
-    pdf.cell(0, 10, "Live Jobs:", ln=True)
-    pdf.set_font("Arial", size=11)
-    for job in jobs:
-        pdf.multi_cell(0, 10, f"{job['title']} at {job['company']} ({job['location']})")
-    pdf.ln(5)
-    pdf.set_font("Arial", "B", 12)
-    pdf.cell(0, 10, "Career Counselor Advice:", ln=True)
-    pdf.set_font("Arial", size=11)
-    pdf.multi_cell(0, 10, counselor_msg)
-    return pdf.output(dest='S').encode('latin1')

 import requests
+import spacy
 from transformers import pipeline
+# Load spaCy model for NLP tasks
+nlp_spacy = spacy.load("en_core_web_sm")
+# Initialize lightweight LLM pipeline (Falcon 1B)
+llm = pipeline("text-generation", model="tiiuae/falcon-rw-1b", device="cpu", max_new_tokens=512)
+def generate_llm_response(prompt: str) -> str:
+    # Generate response from Falcon 1B
+    response = llm(prompt, do_sample=True, temperature=0.7)
+    return response[0]['generated_text']
+def get_skills_suggestions(cv_text: str) -> list:
+    prompt = f"Extract and list relevant professional skills from this CV text:\n{cv_text}"
+    result = generate_llm_response(prompt)
+    skills = [s.strip() for s in result.split('\n') if s.strip()]
+    return skills
+def get_certifications_suggestions(skills: list) -> list:
+    prompt = f"Suggest certifications relevant to these skills:\n{', '.join(skills)}"
+    result = generate_llm_response(prompt)
+    certs = [c.strip() for c in result.split('\n') if c.strip()]
+    return certs
+def get_scholarships_suggestions(field: str) -> list:
+    prompt = f"List scholarships available for the field: {field}"
+    result = generate_llm_response(prompt)
+    scholarships = [s.strip() for s in result.split('\n') if s.strip()]
+    return scholarships
+def get_education_opportunities(field: str) -> list:
+    prompt = f"Suggest education opportunities and relevant courses for the field: {field}"
+    result = generate_llm_response(prompt)
+    educations = [e.strip() for e in result.split('\n') if e.strip()]
+    return educations
+def get_visa_opportunities(country: str) -> list:
+    prompt = f"List visa and immigration options for skilled professionals in {country}"
+    result = generate_llm_response(prompt)
+    visas = [v.strip() for v in result.split('\n') if v.strip()]
+    return visas
+def get_job_listings(keywords: str, location: str, limit: int = 5) -> list:
+    # For now, we will generate job suggestions with LLM (mock API)
+    prompt = f"List {limit} recent job openings for '{keywords}' in {location}, include job title and brief description."
+    result = generate_llm_response(prompt)
+    jobs = [j.strip() for j in result.split('\n') if j.strip()]
+    return jobs
+def score_cv(cv_text: str) -> int:
+    # Simple heuristic scoring based on keyword richness
+    tokens = nlp_spacy(cv_text.lower())
+    unique_tokens = set([token.lemma_ for token in tokens if not token.is_stop and token.is_alpha])
+    score = min(100, len(unique_tokens))  # max 100 points
+    return score