Danial7 commited on
Commit
03c193d
·
verified ·
1 Parent(s): a9f4ca2

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +113 -73
utils.py CHANGED
@@ -1,74 +1,114 @@
 
1
  import spacy
2
- import re
3
- from keybert import KeyBERT
4
- from sklearn.feature_extraction.text import TfidfVectorizer
5
- from sklearn.metrics.pairwise import cosine_similarity
6
-
7
- nlp_spacy = spacy.load("en_core_web_sm")
8
- kw_model = KeyBERT()
9
-
10
- # --- CV Field Detection ---
11
- def detect_cv_field(text):
12
- field_keywords = {
13
- "software": ["python", "java", "software", "developer", "api", "backend", "frontend"],
14
- "data science": ["machine learning", "data", "pandas", "statistics", "model"],
15
- "engineering": ["engineering", "maintenance", "production", "plant", "electrical"],
16
- "management": ["manager", "operations", "logistics", "supply chain"],
17
- "marketing": ["seo", "digital marketing", "brand", "campaign", "social media"],
18
- "design": ["illustrator", "photoshop", "ux", "ui", "figma", "design"]
19
- }
20
- text = text.lower()
21
- field_scores = {field: sum(kw in text for kw in kws) for field, kws in field_keywords.items()}
22
- return max(field_scores, key=field_scores.get)
23
-
24
- # --- Skill Suggestions ---
25
- def get_skills_suggestions(field):
26
- mock_skills = {
27
- "software": ["Python", "Git", "REST APIs", "Docker", "SQL"],
28
- "data science": ["Pandas", "Scikit-learn", "Data Visualization", "TensorFlow", "SQL"],
29
- "engineering": ["PLC Programming", "AutoCAD", "Industrial Safety", "SCADA", "Maintenance Planning"],
30
- "management": ["Project Management", "Excel", "ERP", "Communication", "Team Leadership"],
31
- "marketing": ["SEO", "Content Creation", "Google Analytics", "Email Marketing", "Brand Strategy"],
32
- "design": ["Figma", "Adobe XD", "Typography", "Wireframing", "User Research"]
33
- }
34
- return mock_skills.get(field.lower(), [])
35
-
36
- # --- Certification Suggestions ---
37
- def get_certifications(field):
38
- mock_certs = {
39
- "software": ["AWS Certified Developer", "Google Associate Android Developer"],
40
- "data science": ["IBM Data Science", "Google Data Analytics"],
41
- "engineering": ["AutoCAD Certification", "Six Sigma Green Belt"],
42
- "management": ["PMP", "Scrum Master", "Operations Management from Coursera"],
43
- "marketing": ["HubSpot Inbound Marketing", "Google Ads Certification"],
44
- "design": ["Adobe Certified Professional", "Google UX Design Certificate"]
45
- }
46
- return mock_certs.get(field.lower(), [])
47
-
48
- # --- Scholarships ---
49
- def get_scholarships(field, country="United States"):
50
- return [
51
- f"{field.title()} Scholars Program",
52
- f"{country} International Excellence Scholarship for {field.title()}",
53
- f"Global Leaders {field.title()} Fellowship"
54
- ]
55
-
56
- # --- Education Opportunities ---
57
- def get_education_opportunities(field, country="Germany"):
58
- return [
59
- f"MSc in {field.title()} - TU Berlin",
60
- f"DAAD-funded {field.title()} Master’s - RWTH Aachen",
61
- f"International {field.title()} Graduate Program - University of Stuttgart"
62
- ]
63
-
64
- # --- Visa Suggestions ---
65
- def get_visa_opportunities(field, country="Germany"):
66
- visa_types = {
67
- "software": ["EU Blue Card", "Freelancer Visa", "Job Seeker Visa"],
68
- "engineering": ["EU Blue Card", "Skilled Worker Visa"],
69
- "management": ["Job Seeker Visa", "Business Visa"],
70
- "marketing": ["Freelancer Visa", "Creative Professional Visa"],
71
- "data science": ["EU Blue Card", "Research Visa"],
72
- "design": ["Artist Visa", "Freelancer Visa"]
73
- }
74
- return visa_types.get(field.lower(), ["General Skilled Worker Visa", "Job Seeker Visa"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
  import spacy
3
+ import json
4
+ import requests
5
+ from transformers import pipeline
6
+ from fpdf import FPDF
7
+ from pdfminer.high_level import extract_text
8
+
9
+ # Load spacy model
10
+ try:
11
+ nlp_spacy = spacy.load("en_core_web_sm")
12
+ except:
13
+ from spacy.cli import download
14
+ download("en_core_web_sm")
15
+ nlp_spacy = spacy.load("en_core_web_sm")
16
+
17
+ # Lightweight model
18
+ llm = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.1", device_map="auto", max_new_tokens=512)
19
+
20
+ def parse_cv(pdf_path):
21
+ return extract_text(pdf_path)
22
+
23
+ def identify_field_with_llm(text):
24
+ prompt = f"Analyze the CV content below and identify the main professional field:\n\n{text[:2000]}\n\nField:"
25
+ result = llm(prompt)[0]['generated_text']
26
+ return result.split("Field:")[-1].strip().split("\n")[0]
27
+
28
+ def generate_skill_score(text):
29
+ keywords = ["Python", "Excel", "project", "machine learning", "automation", "SQL", "cloud", "leadership", "communication"]
30
+ score = sum(1 for kw in keywords if kw.lower() in text.lower())
31
+ return int((score / len(keywords)) * 100)
32
+
33
+ def generate_llm_suggestions(cv_text, field):
34
+ prompt = f"""You are a career advisor AI. Based on the following CV content and field ({field}), suggest:
35
+ 1. 5 upskilling skills
36
+ 2. 3 certifications
37
+ 3. 3 scholarships
38
+ 4. 3 education paths
39
+ 5. 3 visa-friendly countries
40
+
41
+ CV: {cv_text[:2000]}
42
+ Respond in JSON with keys: skills, certifications, scholarships, education, visa
43
+ """
44
+ output = llm(prompt)[0]['generated_text']
45
+ json_start = output.find("{")
46
+ try:
47
+ data = json.loads(output[json_start:])
48
+ except:
49
+ data = {
50
+ "skills": ["Problem-solving", "AI tools", "Data analysis"],
51
+ "certifications": ["Coursera Python", "AWS Associate", "PMP"],
52
+ "scholarships": ["DAAD", "Chevening", "Fulbright"],
53
+ "education": ["MSc in AI", "MBA in Tech", "Masters in Data Science"],
54
+ "visa": ["Canada", "Germany", "Australia"]
55
+ }
56
+ return data
57
+
58
+ def get_live_jobs(field):
59
+ # Example using Adzuna (replace with your actual API key and app id)
60
+ API_ID = os.getenv("ADZUNA_APP_ID", "mock_id")
61
+ API_KEY = os.getenv("ADZUNA_APP_KEY", "mock_key")
62
+ country = "gb"
63
+ url = f"https://api.adzuna.com/v1/api/jobs/{country}/search/1?app_id={API_ID}&app_key={API_KEY}&results_per_page=5&what={field}"
64
+
65
+ try:
66
+ response = requests.get(url)
67
+ data = response.json()
68
+ jobs = []
69
+ for job in data.get("results", []):
70
+ jobs.append({
71
+ "title": job.get("title", "Job"),
72
+ "company": job.get("company", {}).get("display_name", "Company"),
73
+ "location": job.get("location", {}).get("display_name", "Location"),
74
+ "url": job.get("redirect_url", "#")
75
+ })
76
+ return jobs
77
+ except:
78
+ return []
79
+
80
+ def generate_counselor_response(cv_text, field, score, suggestions):
81
+ prompt = f"""Act like a career counselor. Given this CV in the {field} field with a skill score of {score}, and the following suggestions:\n\n{json.dumps(suggestions)}\n\nGive friendly, motivational advice (max 200 words)."""
82
+ return llm(prompt)[0]['generated_text'].strip()
83
+
84
+ def generate_pdf_report(cv_text, field, score, suggestions, jobs, counselor_msg):
85
+ pdf = FPDF()
86
+ pdf.add_page()
87
+ pdf.set_font("Arial", size=12)
88
+
89
+ pdf.cell(200, 10, txt="Smart CV Analyzer Report", ln=True, align="C")
90
+ pdf.ln(10)
91
+ pdf.multi_cell(0, 10, f"Detected Field: {field}")
92
+ pdf.multi_cell(0, 10, f"Skill Score: {score}/100")
93
+ pdf.ln(5)
94
+
95
+ pdf.set_font("Arial", "B", 12)
96
+ pdf.cell(0, 10, "AI Suggestions:", ln=True)
97
+ pdf.set_font("Arial", size=11)
98
+ for key, items in suggestions.items():
99
+ pdf.multi_cell(0, 10, f"{key.capitalize()}: {', '.join(items)}")
100
+
101
+ pdf.ln(5)
102
+ pdf.set_font("Arial", "B", 12)
103
+ pdf.cell(0, 10, "Live Jobs:", ln=True)
104
+ pdf.set_font("Arial", size=11)
105
+ for job in jobs:
106
+ pdf.multi_cell(0, 10, f"{job['title']} at {job['company']} ({job['location']})")
107
+
108
+ pdf.ln(5)
109
+ pdf.set_font("Arial", "B", 12)
110
+ pdf.cell(0, 10, "Career Counselor Advice:", ln=True)
111
+ pdf.set_font("Arial", size=11)
112
+ pdf.multi_cell(0, 10, counselor_msg)
113
+
114
+ return pdf.output(dest='S').encode('latin1')