Danial7 commited on
Commit
697eef7
·
verified ·
1 Parent(s): bbd468a

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +55 -111
utils.py CHANGED
@@ -1,114 +1,58 @@
1
- import os
2
- import spacy
3
- import json
4
  import requests
 
5
  from transformers import pipeline
6
- from fpdf import FPDF
7
- from pdfminer.high_level import extract_text
8
-
9
- # Load spacy model
10
- try:
11
- nlp_spacy = spacy.load("en_core_web_sm")
12
- except:
13
- from spacy.cli import download
14
- download("en_core_web_sm")
15
- nlp_spacy = spacy.load("en_core_web_sm")
16
-
17
- # Lightweight model
18
- llm = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.1", device_map="auto", max_new_tokens=512)
19
-
20
- def parse_cv(pdf_path):
21
- return extract_text(pdf_path)
22
-
23
- def identify_field_with_llm(text):
24
- prompt = f"Analyze the CV content below and identify the main professional field:\n\n{text[:2000]}\n\nField:"
25
- result = llm(prompt)[0]['generated_text']
26
- return result.split("Field:")[-1].strip().split("\n")[0]
27
-
28
- def generate_skill_score(text):
29
- keywords = ["Python", "Excel", "project", "machine learning", "automation", "SQL", "cloud", "leadership", "communication"]
30
- score = sum(1 for kw in keywords if kw.lower() in text.lower())
31
- return int((score / len(keywords)) * 100)
32
-
33
- def generate_llm_suggestions(cv_text, field):
34
- prompt = f"""You are a career advisor AI. Based on the following CV content and field ({field}), suggest:
35
- 1. 5 upskilling skills
36
- 2. 3 certifications
37
- 3. 3 scholarships
38
- 4. 3 education paths
39
- 5. 3 visa-friendly countries
40
-
41
- CV: {cv_text[:2000]}
42
- Respond in JSON with keys: skills, certifications, scholarships, education, visa
43
- """
44
- output = llm(prompt)[0]['generated_text']
45
- json_start = output.find("{")
46
- try:
47
- data = json.loads(output[json_start:])
48
- except:
49
- data = {
50
- "skills": ["Problem-solving", "AI tools", "Data analysis"],
51
- "certifications": ["Coursera Python", "AWS Associate", "PMP"],
52
- "scholarships": ["DAAD", "Chevening", "Fulbright"],
53
- "education": ["MSc in AI", "MBA in Tech", "Masters in Data Science"],
54
- "visa": ["Canada", "Germany", "Australia"]
55
- }
56
- return data
57
-
58
- def get_live_jobs(field):
59
- # Example using Adzuna (replace with your actual API key and app id)
60
- API_ID = os.getenv("ADZUNA_APP_ID", "mock_id")
61
- API_KEY = os.getenv("ADZUNA_APP_KEY", "mock_key")
62
- country = "gb"
63
- url = f"https://api.adzuna.com/v1/api/jobs/{country}/search/1?app_id={API_ID}&app_key={API_KEY}&results_per_page=5&what={field}"
64
-
65
- try:
66
- response = requests.get(url)
67
- data = response.json()
68
- jobs = []
69
- for job in data.get("results", []):
70
- jobs.append({
71
- "title": job.get("title", "Job"),
72
- "company": job.get("company", {}).get("display_name", "Company"),
73
- "location": job.get("location", {}).get("display_name", "Location"),
74
- "url": job.get("redirect_url", "#")
75
- })
76
- return jobs
77
- except:
78
- return []
79
-
80
- def generate_counselor_response(cv_text, field, score, suggestions):
81
- prompt = f"""Act like a career counselor. Given this CV in the {field} field with a skill score of {score}, and the following suggestions:\n\n{json.dumps(suggestions)}\n\nGive friendly, motivational advice (max 200 words)."""
82
- return llm(prompt)[0]['generated_text'].strip()
83
-
84
- def generate_pdf_report(cv_text, field, score, suggestions, jobs, counselor_msg):
85
- pdf = FPDF()
86
- pdf.add_page()
87
- pdf.set_font("Arial", size=12)
88
-
89
- pdf.cell(200, 10, txt="Smart CV Analyzer Report", ln=True, align="C")
90
- pdf.ln(10)
91
- pdf.multi_cell(0, 10, f"Detected Field: {field}")
92
- pdf.multi_cell(0, 10, f"Skill Score: {score}/100")
93
- pdf.ln(5)
94
-
95
- pdf.set_font("Arial", "B", 12)
96
- pdf.cell(0, 10, "AI Suggestions:", ln=True)
97
- pdf.set_font("Arial", size=11)
98
- for key, items in suggestions.items():
99
- pdf.multi_cell(0, 10, f"{key.capitalize()}: {', '.join(items)}")
100
-
101
- pdf.ln(5)
102
- pdf.set_font("Arial", "B", 12)
103
- pdf.cell(0, 10, "Live Jobs:", ln=True)
104
- pdf.set_font("Arial", size=11)
105
- for job in jobs:
106
- pdf.multi_cell(0, 10, f"{job['title']} at {job['company']} ({job['location']})")
107
-
108
- pdf.ln(5)
109
- pdf.set_font("Arial", "B", 12)
110
- pdf.cell(0, 10, "Career Counselor Advice:", ln=True)
111
- pdf.set_font("Arial", size=11)
112
- pdf.multi_cell(0, 10, counselor_msg)
113
 
114
- return pdf.output(dest='S').encode('latin1')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import requests
2
+ import spacy
3
  from transformers import pipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ # Load spaCy model for NLP tasks
6
+ nlp_spacy = spacy.load("en_core_web_sm")
7
+
8
+ # Initialize lightweight LLM pipeline (Falcon 1B)
9
+ llm = pipeline("text-generation", model="tiiuae/falcon-rw-1b", device="cpu", max_new_tokens=512)
10
+
11
+ def generate_llm_response(prompt: str) -> str:
12
+ # Generate response from Falcon 1B
13
+ response = llm(prompt, do_sample=True, temperature=0.7)
14
+ return response[0]['generated_text']
15
+
16
+ def get_skills_suggestions(cv_text: str) -> list:
17
+ prompt = f"Extract and list relevant professional skills from this CV text:\n{cv_text}"
18
+ result = generate_llm_response(prompt)
19
+ skills = [s.strip() for s in result.split('\n') if s.strip()]
20
+ return skills
21
+
22
+ def get_certifications_suggestions(skills: list) -> list:
23
+ prompt = f"Suggest certifications relevant to these skills:\n{', '.join(skills)}"
24
+ result = generate_llm_response(prompt)
25
+ certs = [c.strip() for c in result.split('\n') if c.strip()]
26
+ return certs
27
+
28
+ def get_scholarships_suggestions(field: str) -> list:
29
+ prompt = f"List scholarships available for the field: {field}"
30
+ result = generate_llm_response(prompt)
31
+ scholarships = [s.strip() for s in result.split('\n') if s.strip()]
32
+ return scholarships
33
+
34
+ def get_education_opportunities(field: str) -> list:
35
+ prompt = f"Suggest education opportunities and relevant courses for the field: {field}"
36
+ result = generate_llm_response(prompt)
37
+ educations = [e.strip() for e in result.split('\n') if e.strip()]
38
+ return educations
39
+
40
+ def get_visa_opportunities(country: str) -> list:
41
+ prompt = f"List visa and immigration options for skilled professionals in {country}"
42
+ result = generate_llm_response(prompt)
43
+ visas = [v.strip() for v in result.split('\n') if v.strip()]
44
+ return visas
45
+
46
+ def get_job_listings(keywords: str, location: str, limit: int = 5) -> list:
47
+ # For now, we will generate job suggestions with LLM (mock API)
48
+ prompt = f"List {limit} recent job openings for '{keywords}' in {location}, include job title and brief description."
49
+ result = generate_llm_response(prompt)
50
+ jobs = [j.strip() for j in result.split('\n') if j.strip()]
51
+ return jobs
52
+
53
+ def score_cv(cv_text: str) -> int:
54
+ # Simple heuristic scoring based on keyword richness
55
+ tokens = nlp_spacy(cv_text.lower())
56
+ unique_tokens = set([token.lemma_ for token in tokens if not token.is_stop and token.is_alpha])
57
+ score = min(100, len(unique_tokens)) # max 100 points
58
+ return score