Danial7 commited on
Commit
2bd0c88
·
verified ·
1 Parent(s): 41b2d80

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +82 -103
utils.py CHANGED
@@ -1,116 +1,95 @@
 
 
1
  import requests
2
- import pandas as pd
3
  import streamlit as st
4
- from sklearn.feature_extraction.text import TfidfVectorizer
 
5
  from sklearn.metrics.pairwise import cosine_similarity
6
- from keybert import KeyBERT
7
-
8
- # Keyword extraction
9
- def extract_keywords(text, num_keywords=10):
10
- kw_model = KeyBERT()
11
- keywords = kw_model.extract_keywords(text, top_n=num_keywords, stop_words='english')
12
- return [kw[0].lower() for kw in keywords]
13
-
14
- # Field identification
15
- def identify_field(keywords):
16
- fields = {
17
- "Engineering": ["engineer", "mechanical", "electrical", "civil", "plc", "automation"],
18
- "Data Science": ["machine learning", "data", "python", "statistics", "ai"],
19
- "Software Development": ["developer", "software", "backend", "frontend", "javascript"],
20
- "Marketing": ["seo", "content", "marketing", "branding"],
21
- "Finance": ["accounting", "finance", "budget", "tax"],
22
- "Design": ["photoshop", "illustrator", "design", "creative"],
23
- "Healthcare": ["nursing", "surgery", "hospital", "patient"],
24
- "Construction": ["carpentry", "plumbing", "hvac", "gardening", "mining"]
25
- }
26
- scores = {field: len(set(keywords).intersection(terms)) for field, terms in fields.items()}
27
- return max(scores, key=scores.get)
28
-
29
- # Technical background
30
- def is_technical_background(keywords):
31
- tech_terms = ["engineer", "machine learning", "python", "developer", "software", "automation", "plc", "ai"]
32
- non_tech_terms = ["marketing", "finance", "content", "seo", "branding", "accounting", "creative"]
33
- tech_score = len(set(keywords).intersection(tech_terms))
34
- non_tech_score = len(set(keywords).intersection(non_tech_terms))
35
- return "Technical" if tech_score >= non_tech_score else "Non-Technical"
36
-
37
- # CV skill score
38
- def calculate_cv_score(text, keywords):
39
- ideal = " ".join(keywords)
40
- tfidf = TfidfVectorizer()
41
- tfidf_matrix = tfidf.fit_transform([text, ideal])
42
- score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
43
- return round(score * 100)
44
-
45
- # Dynamic skill suggestions using Open Skills API
46
- def suggest_upskilling(keywords):
47
- try:
48
- response = requests.get("https://publicapis.dev/api/skills")
49
- response.raise_for_status()
50
- all_skills = set(response.json().get("skills", []))
51
- current_skills = set([kw.lower() for kw in keywords])
52
- missing_skills = all_skills - current_skills
53
- return list(missing_skills)[:10]
54
- except Exception as e:
55
- st.error(f"Error fetching skills: {e}")
56
- return []
57
 
58
- # Certification suggestions using Free Certifications API
59
- def suggest_certifications(keywords):
60
- try:
61
- response = requests.get("https://free-certifications.com/api/certifications")
62
- response.raise_for_status()
63
- certifications = response.json().get("certifications", [])
64
- relevant_certs = [cert for cert in certifications if any(kw in cert.lower() for kw in keywords)]
65
- return relevant_certs[:10]
66
- except Exception as e:
67
- st.error(f"Error fetching certifications: {e}")
68
- return []
69
 
70
- # Scholarship suggestions using ScholarshipOwl API
71
- def suggest_scholarships(keywords):
72
- try:
73
- response = requests.get("https://docs.business.scholarshipowl.com/api/scholarships")
74
- response.raise_for_status()
75
- scholarships = response.json().get("scholarships", [])
76
- relevant_scholarships = [sch for sch in scholarships if any(kw in sch.lower() for kw in keywords)]
77
- return relevant_scholarships[:10]
78
- except Exception as e:
79
- st.error(f"Error fetching scholarships: {e}")
80
- return []
81
 
82
- # Education opportunities using DecidED API
83
- def suggest_education_opportunities(keywords):
84
- try:
85
- response = requests.get("https://decided.org/api/education")
86
- response.raise_for_status()
87
- programs = response.json().get("programs", [])
88
- relevant_programs = [prog for prog in programs if any(kw in prog.lower() for kw in keywords)]
89
- return relevant_programs[:10]
90
- except Exception as e:
91
- st.error(f"Error fetching education opportunities: {e}")
92
- return []
93
 
94
- # Visa opportunities using Visa Checker API
95
- def suggest_visa_opportunities(keywords):
96
  try:
97
- response = requests.get("https://zylalabs.com/api-marketplace/travel/visa%2Bchecker%2Bapi/2154")
98
- response.raise_for_status()
99
- visa_info = response.json().get("visa_requirements", [])
100
- relevant_visas = [visa for visa in visa_info if any(kw in visa.lower() for kw in keywords)]
101
- return relevant_visas[:10]
102
  except Exception as e:
103
- st.error(f"Error fetching visa opportunities: {e}")
104
- return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
- # Job listings using TheirStack Job Postings API
107
- def get_job_listings(keywords, location="Pakistan", results_per_page=10):
108
  try:
109
- query = "+".join(keywords)
110
- response = requests.get(f"https://theirstack.com/api/jobs?query={query}&location={location}&limit={results_per_page}")
111
  response.raise_for_status()
112
- jobs = response.json().get("jobs", [])
113
- return pd.DataFrame(jobs)
 
 
 
 
 
 
 
114
  except Exception as e:
115
- st.error(f"Error fetching job listings: {e}")
116
- return pd.DataFrame()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import json
3
  import requests
4
+ import spacy
5
  import streamlit as st
6
+ from transformers import pipeline
7
+ from sklearn.feature_extraction.text import CountVectorizer
8
  from sklearn.metrics.pairwise import cosine_similarity
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ # Load spaCy model
11
+ nlp_spacy = spacy.load("en_core_web_sm")
 
 
 
 
 
 
 
 
 
12
 
13
+ # Load LLM model (e.g., Mistral, OpenHermes, Phi-2) using Transformers pipeline
14
+ llm = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.1", device=-1)
 
 
 
 
 
 
 
 
 
15
 
16
+ # Adzuna credentials (replace with your actual app_id and app_key)
17
+ ADZUNA_APP_ID = "your_adzuna_app_id"
18
+ ADZUNA_APP_KEY = "your_adzuna_app_key"
19
+
20
+ def extract_keywords(cv_text, top_n=10):
21
+ doc = nlp_spacy(cv_text.lower())
22
+ keywords = [chunk.text for chunk in doc.noun_chunks if len(chunk.text.split()) < 4]
23
+ return list(set(keywords))[:top_n]
 
 
 
24
 
25
+ def generate_with_llm(prompt, max_tokens=200):
 
26
  try:
27
+ result = llm(prompt, max_new_tokens=max_tokens, do_sample=True, temperature=0.7)
28
+ return result[0]['generated_text'].split("\n")[1:] # skip prompt
 
 
 
29
  except Exception as e:
30
+ return [f"Error generating with LLM: {e}"]
31
+
32
+ def get_skills_suggestions(cv_text):
33
+ prompt = f"Based on the following CV:\n{cv_text}\nSuggest the top 5 relevant technical or professional skills the user should learn to improve job opportunities."
34
+ return generate_with_llm(prompt)
35
+
36
+ def get_certification_recommendations(cv_text):
37
+ prompt = f"Given the CV below, suggest 3 globally recognized certifications to improve their chances of getting a better job or promotion:\n\n{cv_text}"
38
+ return generate_with_llm(prompt)
39
+
40
+ def get_scholarship_suggestions(cv_text):
41
+ prompt = f"The following CV belongs to a person looking for international scholarships. Suggest 3 relevant scholarship opportunities or types of scholarships (e.g., STEM, MBA, Engineering) they could pursue:\n\n{cv_text}"
42
+ return generate_with_llm(prompt)
43
+
44
+ def get_education_opportunities(cv_text):
45
+ prompt = f"Based on the CV content below, suggest 3 higher education programs or degree types (e.g., MBA, MSc in AI, MEng Industrial Design) that would complement the user's experience:\n\n{cv_text}"
46
+ return generate_with_llm(prompt)
47
+
48
+ def get_visa_pathways(cv_text, country="USA"):
49
+ prompt = f"Based on this CV and assuming the user wants to move to {country}, suggest the top 3 most relevant visa types or pathways for this person's profession and experience:\n\n{cv_text}"
50
+ return generate_with_llm(prompt)
51
+
52
+ def get_job_recommendations(cv_text, location="USA", results=10):
53
+ keywords = extract_keywords(cv_text)
54
+ query = "+".join(keywords)
55
+
56
+ url = f"https://api.adzuna.com/v1/api/jobs/us/search/1"
57
+ params = {
58
+ "app_id": ADZUNA_APP_ID,
59
+ "app_key": ADZUNA_APP_KEY,
60
+ "what": query,
61
+ "where": location,
62
+ "results_per_page": results,
63
+ "content-type": "application/json"
64
+ }
65
 
 
 
66
  try:
67
+ response = requests.get(url, params=params)
 
68
  response.raise_for_status()
69
+ job_data = response.json()
70
+ jobs = [{
71
+ "title": job.get("title"),
72
+ "company": job.get("company", {}).get("display_name"),
73
+ "location": job.get("location", {}).get("display_name"),
74
+ "description": job.get("description"),
75
+ "url": job.get("redirect_url")
76
+ } for job in job_data.get("results", [])]
77
+ return jobs
78
  except Exception as e:
79
+ return [{"title": "Error fetching job listings", "description": str(e)}]
80
+
81
+ def get_cv_score(cv_text):
82
+ # Simple scoring based on keyword richness
83
+ keywords = extract_keywords(cv_text)
84
+ score = min(len(keywords) * 10, 100)
85
+ return score
86
+
87
+ def get_field_classification(cv_text):
88
+ prompt = f"Based on this CV, determine the most relevant professional domain or field (e.g., Data Science, Civil Engineering, Marketing, Education):\n\n{cv_text}"
89
+ return generate_with_llm(prompt, max_tokens=50)[0]
90
+
91
+ def get_personalized_advice(cv_text):
92
+ prompt = f"This is a user's CV. Give a friendly, professional career counselor-style paragraph with guidance and encouragement for their next steps:\n\n{cv_text}"
93
+ result = generate_with_llm(prompt, max_tokens=150)
94
+ return "\n".join(result)
95
+