Spaces:
Sleeping
Sleeping
File size: 7,529 Bytes
856e6a7 ec9d8ec a21d2a8 ec9d8ec a21d2a8 856e6a7 ec9d8ec a21d2a8 ec9d8ec a21d2a8 fdf83c7 856e6a7 a21d2a8 ec9d8ec fdf83c7 ec9d8ec 856e6a7 fdf83c7 856e6a7 a21d2a8 0fd64e3 a21d2a8 ec9d8ec a21d2a8 b680025 a21d2a8 b680025 ec9d8ec 1fe7fa3 ec9d8ec 1fe7fa3 ec9d8ec 1fe7fa3 ec9d8ec 1fe7fa3 ec9d8ec 1fe7fa3 ec9d8ec 1fe7fa3 b680025 a21d2a8 ec9d8ec a21d2a8 ec9d8ec fdf83c7 a21d2a8 856e6a7 fdf83c7 856e6a7 ec9d8ec fdf83c7 ec9d8ec fdf83c7 a21d2a8 ec9d8ec 856e6a7 a21d2a8 ec9d8ec 856e6a7 a21d2a8 ec9d8ec 856e6a7 fdf83c7 ec9d8ec a21d2a8 ec9d8ec a21d2a8 ec9d8ec a21d2a8 856e6a7 fdf83c7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 |
import streamlit as st
import pandas as pd
import pdfplumber
import spacy
import requests
import plotly.express as px
from datetime import datetime, timedelta
# Page config
st.set_page_config(page_title="Skill Scoring & Career Roadmap App", layout="wide")
# Load spaCy model
nlp = spacy.load("en_core_web_sm")
# Load datasets
skills_df = pd.read_csv("data/skills_dataset.csv")
countries_df = pd.read_csv("data/countries_dataset.csv")
cert_df = pd.read_csv("data/certifications.csv")
edu_tech_df = pd.read_csv("data/education_technical.csv")
edu_non_tech_df = pd.read_csv("data/education_non_technical.csv")
scholarship_df = pd.read_csv("data/scholarships_dataset.csv")
# Helper functions
def extract_text_from_pdf(file):
with pdfplumber.open(file) as pdf:
return "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
def extract_entities(text):
doc = nlp(text)
skills = [token.text for token in doc if token.text in skills_df['Skill'].values]
technical_skills = {"Python", "Machine Learning", "Cloud Computing", "Cybersecurity", "AI", "DevOps"}
background = "technical" if any(s in technical_skills for s in skills) else "non-technical"
years_exp = 3 # Placeholder, replace with better extraction logic
return list(set(skills)), background, years_exp
def score_skills(user_skills):
if not skills_df.shape[0]:
return 0
return int((len(user_skills) / len(skills_df)) * 100)
def recommend_countries(skills, years_exp):
df = countries_df[countries_df['Skill'].isin(skills)]
df = df[df['MinExperience'] <= years_exp]
return df[["Country", "JobTitle", "AverageSalary", "VisaPath"]].drop_duplicates().reset_index(drop=True)
def recommend_certifications(skills):
return cert_df[cert_df['Skill'].isin(skills)].reset_index(drop=True)
def recommend_education(background):
return edu_tech_df.reset_index(drop=True) if background == "technical" else edu_non_tech_df.reset_index(drop=True)
def recommend_scholarships(field):
return scholarship_df[scholarship_df["Field"].str.lower() == field.lower()].reset_index(drop=True)
def fetch_jobs(skill, country_code="us", max_results=5):
app_id = "f4efd3a2"
app_key = "5702f3c0507ac69f98aa15f855b06901"
url = f"https://api.adzuna.com/v1/api/jobs/{country_code}/search/1"
params = {
"app_id": app_id,
"app_key": app_key,
"results_per_page": max_results,
"what": skill,
"content-type": "application/json"
}
response = requests.get(url, params=params)
if response.status_code == 200:
return response.json()["results"]
else:
return []
def create_dynamic_roadmap(skills, certs, scholarships, edu_opps):
now = datetime.now()
roadmap = []
# Add certifications to roadmap
if not certs.empty and "Certification" in certs.columns:
for i, cert in enumerate(certs['Certification'].tolist()[:2]):
roadmap.append({
"Task": f"Complete Certification: {cert}",
"Start": (now + timedelta(days=i*30)).strftime("%Y-%m-%d"),
"Finish": (now + timedelta(days=(i+1)*30)).strftime("%Y-%m-%d"),
})
# Add scholarships to roadmap
if not scholarships.empty and "Scholarship" in scholarships.columns:
for i, scholarship in enumerate(scholarships['Scholarship'].tolist()[:2]):
roadmap.append({
"Task": f"Apply for Scholarship: {scholarship}",
"Start": (now + timedelta(days=60 + i*30)).strftime("%Y-%m-%d"),
"Finish": (now + timedelta(days=90 + i*30)).strftime("%Y-%m-%d"),
})
# Add education opportunities to roadmap
if not edu_opps.empty and "Program" in edu_opps.columns:
for i, edu in enumerate(edu_opps['Program'].tolist()[:1]):
roadmap.append({
"Task": f"Pursue Education: {edu}",
"Start": (now + timedelta(days=120)).strftime("%Y-%m-%d"),
"Finish": (now + timedelta(days=480)).strftime("%Y-%m-%d"),
})
return pd.DataFrame(roadmap)
# Streamlit UI
st.title("π Personalized Skill Scoring & Career Roadmap App")
st.markdown("Upload your CV and get a detailed career roadmap with live job listings.")
uploaded_file = st.file_uploader("π€ Upload your CV (PDF only)", type=["pdf"])
if uploaded_file:
with st.spinner("Analyzing your CV..."):
text = extract_text_from_pdf(uploaded_file)
skills, background, years_exp = extract_entities(text)
score = score_skills(skills)
country_info = recommend_countries(skills, years_exp)
certs = recommend_certifications(skills)
edu = recommend_education(background)
field = background # Simplified; you should detect actual field from CV
scholarships = recommend_scholarships(field)
st.subheader("β
Identified Skills")
st.write(skills or "No recognized skills found.")
st.subheader("π Skill Score")
st.metric("Your Skill Score", f"{score}/100")
st.subheader("π Job Opportunities & Country Recommendations")
if not country_info.empty:
st.dataframe(country_info)
else:
st.write("No country/job recommendations available for your skill set.")
st.subheader("π Recommended Certifications")
if not certs.empty:
st.dataframe(certs)
else:
st.write("No certification recommendations available.")
st.subheader("π Higher Education Opportunities")
if not edu.empty:
st.dataframe(edu)
else:
st.write("No higher education opportunities available.")
st.subheader("π Scholarship Opportunities")
if not scholarships.empty:
st.dataframe(scholarships)
else:
st.write("No scholarships available for your field.")
# Dynamic roadmap timeline generation & display with checks
roadmap_df = create_dynamic_roadmap(skills, certs, scholarships, edu)
st.write("Roadmap DataFrame preview:")
st.dataframe(roadmap_df)
required_cols = {"Task", "Start", "Finish"}
if not roadmap_df.empty and required_cols.issubset(roadmap_df.columns):
fig = px.timeline(
roadmap_df,
x_start="Start",
x_end="Finish",
y="Task",
title="Career Roadmap Timeline"
)
fig.update_yaxes(autorange="reversed")
st.plotly_chart(fig, use_container_width=True)
else:
st.warning("No roadmap tasks to display or roadmap data missing required columns.")
# Show live job listings using first identified skill and first country code
if skills and not country_info.empty:
st.subheader(f"π Live Job Listings for '{skills[0]}'")
country_code_map = {
"USA": "us",
"Canada": "ca",
"UK": "gb",
"Germany": "de",
"Australia": "au",
"India": "in",
"Netherlands": "nl"
}
country_code = country_code_map.get(country_info.iloc[0]["Country"], "us")
jobs = fetch_jobs(skills[0], country_code=country_code, max_results=5)
if jobs:
for job in jobs:
st.markdown(f"**[{job['title']}]({job['redirect_url']})** - {job['location']['display_name']}")
st.markdown(f"*{job.get('description', '')[:200]}...*")
st.markdown("---")
else:
st.write("No live job listings found.")
else:
st.info("Please upload your CV to begin.")
|