Spaces:
Sleeping
Sleeping
Update utils.py
Browse files
utils.py
CHANGED
|
@@ -1,116 +1,95 @@
|
|
|
|
|
|
|
|
| 1 |
import requests
|
| 2 |
-
import
|
| 3 |
import streamlit as st
|
| 4 |
-
from
|
|
|
|
| 5 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 6 |
-
from keybert import KeyBERT
|
| 7 |
-
|
| 8 |
-
# Keyword extraction
|
| 9 |
-
def extract_keywords(text, num_keywords=10):
|
| 10 |
-
kw_model = KeyBERT()
|
| 11 |
-
keywords = kw_model.extract_keywords(text, top_n=num_keywords, stop_words='english')
|
| 12 |
-
return [kw[0].lower() for kw in keywords]
|
| 13 |
-
|
| 14 |
-
# Field identification
|
| 15 |
-
def identify_field(keywords):
|
| 16 |
-
fields = {
|
| 17 |
-
"Engineering": ["engineer", "mechanical", "electrical", "civil", "plc", "automation"],
|
| 18 |
-
"Data Science": ["machine learning", "data", "python", "statistics", "ai"],
|
| 19 |
-
"Software Development": ["developer", "software", "backend", "frontend", "javascript"],
|
| 20 |
-
"Marketing": ["seo", "content", "marketing", "branding"],
|
| 21 |
-
"Finance": ["accounting", "finance", "budget", "tax"],
|
| 22 |
-
"Design": ["photoshop", "illustrator", "design", "creative"],
|
| 23 |
-
"Healthcare": ["nursing", "surgery", "hospital", "patient"],
|
| 24 |
-
"Construction": ["carpentry", "plumbing", "hvac", "gardening", "mining"]
|
| 25 |
-
}
|
| 26 |
-
scores = {field: len(set(keywords).intersection(terms)) for field, terms in fields.items()}
|
| 27 |
-
return max(scores, key=scores.get)
|
| 28 |
-
|
| 29 |
-
# Technical background
|
| 30 |
-
def is_technical_background(keywords):
|
| 31 |
-
tech_terms = ["engineer", "machine learning", "python", "developer", "software", "automation", "plc", "ai"]
|
| 32 |
-
non_tech_terms = ["marketing", "finance", "content", "seo", "branding", "accounting", "creative"]
|
| 33 |
-
tech_score = len(set(keywords).intersection(tech_terms))
|
| 34 |
-
non_tech_score = len(set(keywords).intersection(non_tech_terms))
|
| 35 |
-
return "Technical" if tech_score >= non_tech_score else "Non-Technical"
|
| 36 |
-
|
| 37 |
-
# CV skill score
|
| 38 |
-
def calculate_cv_score(text, keywords):
|
| 39 |
-
ideal = " ".join(keywords)
|
| 40 |
-
tfidf = TfidfVectorizer()
|
| 41 |
-
tfidf_matrix = tfidf.fit_transform([text, ideal])
|
| 42 |
-
score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
|
| 43 |
-
return round(score * 100)
|
| 44 |
-
|
| 45 |
-
# Dynamic skill suggestions using Open Skills API
|
| 46 |
-
def suggest_upskilling(keywords):
|
| 47 |
-
try:
|
| 48 |
-
response = requests.get("https://publicapis.dev/api/skills")
|
| 49 |
-
response.raise_for_status()
|
| 50 |
-
all_skills = set(response.json().get("skills", []))
|
| 51 |
-
current_skills = set([kw.lower() for kw in keywords])
|
| 52 |
-
missing_skills = all_skills - current_skills
|
| 53 |
-
return list(missing_skills)[:10]
|
| 54 |
-
except Exception as e:
|
| 55 |
-
st.error(f"Error fetching skills: {e}")
|
| 56 |
-
return []
|
| 57 |
|
| 58 |
-
#
|
| 59 |
-
|
| 60 |
-
try:
|
| 61 |
-
response = requests.get("https://free-certifications.com/api/certifications")
|
| 62 |
-
response.raise_for_status()
|
| 63 |
-
certifications = response.json().get("certifications", [])
|
| 64 |
-
relevant_certs = [cert for cert in certifications if any(kw in cert.lower() for kw in keywords)]
|
| 65 |
-
return relevant_certs[:10]
|
| 66 |
-
except Exception as e:
|
| 67 |
-
st.error(f"Error fetching certifications: {e}")
|
| 68 |
-
return []
|
| 69 |
|
| 70 |
-
#
|
| 71 |
-
|
| 72 |
-
try:
|
| 73 |
-
response = requests.get("https://docs.business.scholarshipowl.com/api/scholarships")
|
| 74 |
-
response.raise_for_status()
|
| 75 |
-
scholarships = response.json().get("scholarships", [])
|
| 76 |
-
relevant_scholarships = [sch for sch in scholarships if any(kw in sch.lower() for kw in keywords)]
|
| 77 |
-
return relevant_scholarships[:10]
|
| 78 |
-
except Exception as e:
|
| 79 |
-
st.error(f"Error fetching scholarships: {e}")
|
| 80 |
-
return []
|
| 81 |
|
| 82 |
-
#
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
except Exception as e:
|
| 91 |
-
st.error(f"Error fetching education opportunities: {e}")
|
| 92 |
-
return []
|
| 93 |
|
| 94 |
-
|
| 95 |
-
def suggest_visa_opportunities(keywords):
|
| 96 |
try:
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
visa_info = response.json().get("visa_requirements", [])
|
| 100 |
-
relevant_visas = [visa for visa in visa_info if any(kw in visa.lower() for kw in keywords)]
|
| 101 |
-
return relevant_visas[:10]
|
| 102 |
except Exception as e:
|
| 103 |
-
|
| 104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
|
| 106 |
-
# Job listings using TheirStack Job Postings API
|
| 107 |
-
def get_job_listings(keywords, location="Pakistan", results_per_page=10):
|
| 108 |
try:
|
| 109 |
-
|
| 110 |
-
response = requests.get(f"https://theirstack.com/api/jobs?query={query}&location={location}&limit={results_per_page}")
|
| 111 |
response.raise_for_status()
|
| 112 |
-
|
| 113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
except Exception as e:
|
| 115 |
-
|
| 116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
import json
|
| 3 |
import requests
|
| 4 |
+
import spacy
|
| 5 |
import streamlit as st
|
| 6 |
+
from transformers import pipeline
|
| 7 |
+
from sklearn.feature_extraction.text import CountVectorizer
|
| 8 |
from sklearn.metrics.pairwise import cosine_similarity
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
+
# Load spaCy model
|
| 11 |
+
nlp_spacy = spacy.load("en_core_web_sm")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
+
# Load LLM model (e.g., Mistral, OpenHermes, Phi-2) using Transformers pipeline
|
| 14 |
+
llm = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.1", device=-1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
+
# Adzuna credentials (replace with your actual app_id and app_key)
|
| 17 |
+
ADZUNA_APP_ID = "your_adzuna_app_id"
|
| 18 |
+
ADZUNA_APP_KEY = "your_adzuna_app_key"
|
| 19 |
+
|
| 20 |
+
def extract_keywords(cv_text, top_n=10):
|
| 21 |
+
doc = nlp_spacy(cv_text.lower())
|
| 22 |
+
keywords = [chunk.text for chunk in doc.noun_chunks if len(chunk.text.split()) < 4]
|
| 23 |
+
return list(set(keywords))[:top_n]
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
+
def generate_with_llm(prompt, max_tokens=200):
|
|
|
|
| 26 |
try:
|
| 27 |
+
result = llm(prompt, max_new_tokens=max_tokens, do_sample=True, temperature=0.7)
|
| 28 |
+
return result[0]['generated_text'].split("\n")[1:] # skip prompt
|
|
|
|
|
|
|
|
|
|
| 29 |
except Exception as e:
|
| 30 |
+
return [f"Error generating with LLM: {e}"]
|
| 31 |
+
|
| 32 |
+
def get_skills_suggestions(cv_text):
|
| 33 |
+
prompt = f"Based on the following CV:\n{cv_text}\nSuggest the top 5 relevant technical or professional skills the user should learn to improve job opportunities."
|
| 34 |
+
return generate_with_llm(prompt)
|
| 35 |
+
|
| 36 |
+
def get_certification_recommendations(cv_text):
|
| 37 |
+
prompt = f"Given the CV below, suggest 3 globally recognized certifications to improve their chances of getting a better job or promotion:\n\n{cv_text}"
|
| 38 |
+
return generate_with_llm(prompt)
|
| 39 |
+
|
| 40 |
+
def get_scholarship_suggestions(cv_text):
|
| 41 |
+
prompt = f"The following CV belongs to a person looking for international scholarships. Suggest 3 relevant scholarship opportunities or types of scholarships (e.g., STEM, MBA, Engineering) they could pursue:\n\n{cv_text}"
|
| 42 |
+
return generate_with_llm(prompt)
|
| 43 |
+
|
| 44 |
+
def get_education_opportunities(cv_text):
|
| 45 |
+
prompt = f"Based on the CV content below, suggest 3 higher education programs or degree types (e.g., MBA, MSc in AI, MEng Industrial Design) that would complement the user's experience:\n\n{cv_text}"
|
| 46 |
+
return generate_with_llm(prompt)
|
| 47 |
+
|
| 48 |
+
def get_visa_pathways(cv_text, country="USA"):
|
| 49 |
+
prompt = f"Based on this CV and assuming the user wants to move to {country}, suggest the top 3 most relevant visa types or pathways for this person's profession and experience:\n\n{cv_text}"
|
| 50 |
+
return generate_with_llm(prompt)
|
| 51 |
+
|
| 52 |
+
def get_job_recommendations(cv_text, location="USA", results=10):
|
| 53 |
+
keywords = extract_keywords(cv_text)
|
| 54 |
+
query = "+".join(keywords)
|
| 55 |
+
|
| 56 |
+
url = f"https://api.adzuna.com/v1/api/jobs/us/search/1"
|
| 57 |
+
params = {
|
| 58 |
+
"app_id": ADZUNA_APP_ID,
|
| 59 |
+
"app_key": ADZUNA_APP_KEY,
|
| 60 |
+
"what": query,
|
| 61 |
+
"where": location,
|
| 62 |
+
"results_per_page": results,
|
| 63 |
+
"content-type": "application/json"
|
| 64 |
+
}
|
| 65 |
|
|
|
|
|
|
|
| 66 |
try:
|
| 67 |
+
response = requests.get(url, params=params)
|
|
|
|
| 68 |
response.raise_for_status()
|
| 69 |
+
job_data = response.json()
|
| 70 |
+
jobs = [{
|
| 71 |
+
"title": job.get("title"),
|
| 72 |
+
"company": job.get("company", {}).get("display_name"),
|
| 73 |
+
"location": job.get("location", {}).get("display_name"),
|
| 74 |
+
"description": job.get("description"),
|
| 75 |
+
"url": job.get("redirect_url")
|
| 76 |
+
} for job in job_data.get("results", [])]
|
| 77 |
+
return jobs
|
| 78 |
except Exception as e:
|
| 79 |
+
return [{"title": "Error fetching job listings", "description": str(e)}]
|
| 80 |
+
|
| 81 |
+
def get_cv_score(cv_text):
|
| 82 |
+
# Simple scoring based on keyword richness
|
| 83 |
+
keywords = extract_keywords(cv_text)
|
| 84 |
+
score = min(len(keywords) * 10, 100)
|
| 85 |
+
return score
|
| 86 |
+
|
| 87 |
+
def get_field_classification(cv_text):
|
| 88 |
+
prompt = f"Based on this CV, determine the most relevant professional domain or field (e.g., Data Science, Civil Engineering, Marketing, Education):\n\n{cv_text}"
|
| 89 |
+
return generate_with_llm(prompt, max_tokens=50)[0]
|
| 90 |
+
|
| 91 |
+
def get_personalized_advice(cv_text):
|
| 92 |
+
prompt = f"This is a user's CV. Give a friendly, professional career counselor-style paragraph with guidance and encouragement for their next steps:\n\n{cv_text}"
|
| 93 |
+
result = generate_with_llm(prompt, max_tokens=150)
|
| 94 |
+
return "\n".join(result)
|
| 95 |
+
|