Danial7's picture
Update app.py
ec9d8ec verified
import streamlit as st
import pandas as pd
import pdfplumber
import spacy
import requests
import plotly.express as px
from datetime import datetime, timedelta
# Page config
st.set_page_config(page_title="Skill Scoring & Career Roadmap App", layout="wide")
# Load spaCy model
nlp = spacy.load("en_core_web_sm")
# Load datasets
skills_df = pd.read_csv("data/skills_dataset.csv")
countries_df = pd.read_csv("data/countries_dataset.csv")
cert_df = pd.read_csv("data/certifications.csv")
edu_tech_df = pd.read_csv("data/education_technical.csv")
edu_non_tech_df = pd.read_csv("data/education_non_technical.csv")
scholarship_df = pd.read_csv("data/scholarships_dataset.csv")
# Helper functions
def extract_text_from_pdf(file):
with pdfplumber.open(file) as pdf:
return "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
def extract_entities(text):
doc = nlp(text)
skills = [token.text for token in doc if token.text in skills_df['Skill'].values]
technical_skills = {"Python", "Machine Learning", "Cloud Computing", "Cybersecurity", "AI", "DevOps"}
background = "technical" if any(s in technical_skills for s in skills) else "non-technical"
years_exp = 3 # Placeholder, replace with better extraction logic
return list(set(skills)), background, years_exp
def score_skills(user_skills):
if not skills_df.shape[0]:
return 0
return int((len(user_skills) / len(skills_df)) * 100)
def recommend_countries(skills, years_exp):
df = countries_df[countries_df['Skill'].isin(skills)]
df = df[df['MinExperience'] <= years_exp]
return df[["Country", "JobTitle", "AverageSalary", "VisaPath"]].drop_duplicates().reset_index(drop=True)
def recommend_certifications(skills):
return cert_df[cert_df['Skill'].isin(skills)].reset_index(drop=True)
def recommend_education(background):
return edu_tech_df.reset_index(drop=True) if background == "technical" else edu_non_tech_df.reset_index(drop=True)
def recommend_scholarships(field):
return scholarship_df[scholarship_df["Field"].str.lower() == field.lower()].reset_index(drop=True)
def fetch_jobs(skill, country_code="us", max_results=5):
app_id = "f4efd3a2"
app_key = "5702f3c0507ac69f98aa15f855b06901"
url = f"https://api.adzuna.com/v1/api/jobs/{country_code}/search/1"
params = {
"app_id": app_id,
"app_key": app_key,
"results_per_page": max_results,
"what": skill,
"content-type": "application/json"
}
response = requests.get(url, params=params)
if response.status_code == 200:
return response.json()["results"]
else:
return []
def create_dynamic_roadmap(skills, certs, scholarships, edu_opps):
now = datetime.now()
roadmap = []
# Add certifications to roadmap
if not certs.empty and "Certification" in certs.columns:
for i, cert in enumerate(certs['Certification'].tolist()[:2]):
roadmap.append({
"Task": f"Complete Certification: {cert}",
"Start": (now + timedelta(days=i*30)).strftime("%Y-%m-%d"),
"Finish": (now + timedelta(days=(i+1)*30)).strftime("%Y-%m-%d"),
})
# Add scholarships to roadmap
if not scholarships.empty and "Scholarship" in scholarships.columns:
for i, scholarship in enumerate(scholarships['Scholarship'].tolist()[:2]):
roadmap.append({
"Task": f"Apply for Scholarship: {scholarship}",
"Start": (now + timedelta(days=60 + i*30)).strftime("%Y-%m-%d"),
"Finish": (now + timedelta(days=90 + i*30)).strftime("%Y-%m-%d"),
})
# Add education opportunities to roadmap
if not edu_opps.empty and "Program" in edu_opps.columns:
for i, edu in enumerate(edu_opps['Program'].tolist()[:1]):
roadmap.append({
"Task": f"Pursue Education: {edu}",
"Start": (now + timedelta(days=120)).strftime("%Y-%m-%d"),
"Finish": (now + timedelta(days=480)).strftime("%Y-%m-%d"),
})
return pd.DataFrame(roadmap)
# Streamlit UI
st.title("πŸ“Š Personalized Skill Scoring & Career Roadmap App")
st.markdown("Upload your CV and get a detailed career roadmap with live job listings.")
uploaded_file = st.file_uploader("πŸ“€ Upload your CV (PDF only)", type=["pdf"])
if uploaded_file:
with st.spinner("Analyzing your CV..."):
text = extract_text_from_pdf(uploaded_file)
skills, background, years_exp = extract_entities(text)
score = score_skills(skills)
country_info = recommend_countries(skills, years_exp)
certs = recommend_certifications(skills)
edu = recommend_education(background)
field = background # Simplified; you should detect actual field from CV
scholarships = recommend_scholarships(field)
st.subheader("βœ… Identified Skills")
st.write(skills or "No recognized skills found.")
st.subheader("πŸ“ˆ Skill Score")
st.metric("Your Skill Score", f"{score}/100")
st.subheader("🌍 Job Opportunities & Country Recommendations")
if not country_info.empty:
st.dataframe(country_info)
else:
st.write("No country/job recommendations available for your skill set.")
st.subheader("πŸŽ“ Recommended Certifications")
if not certs.empty:
st.dataframe(certs)
else:
st.write("No certification recommendations available.")
st.subheader("πŸŽ“ Higher Education Opportunities")
if not edu.empty:
st.dataframe(edu)
else:
st.write("No higher education opportunities available.")
st.subheader("πŸŽ“ Scholarship Opportunities")
if not scholarships.empty:
st.dataframe(scholarships)
else:
st.write("No scholarships available for your field.")
# Dynamic roadmap timeline generation & display with checks
roadmap_df = create_dynamic_roadmap(skills, certs, scholarships, edu)
st.write("Roadmap DataFrame preview:")
st.dataframe(roadmap_df)
required_cols = {"Task", "Start", "Finish"}
if not roadmap_df.empty and required_cols.issubset(roadmap_df.columns):
fig = px.timeline(
roadmap_df,
x_start="Start",
x_end="Finish",
y="Task",
title="Career Roadmap Timeline"
)
fig.update_yaxes(autorange="reversed")
st.plotly_chart(fig, use_container_width=True)
else:
st.warning("No roadmap tasks to display or roadmap data missing required columns.")
# Show live job listings using first identified skill and first country code
if skills and not country_info.empty:
st.subheader(f"πŸ” Live Job Listings for '{skills[0]}'")
country_code_map = {
"USA": "us",
"Canada": "ca",
"UK": "gb",
"Germany": "de",
"Australia": "au",
"India": "in",
"Netherlands": "nl"
}
country_code = country_code_map.get(country_info.iloc[0]["Country"], "us")
jobs = fetch_jobs(skills[0], country_code=country_code, max_results=5)
if jobs:
for job in jobs:
st.markdown(f"**[{job['title']}]({job['redirect_url']})** - {job['location']['display_name']}")
st.markdown(f"*{job.get('description', '')[:200]}...*")
st.markdown("---")
else:
st.write("No live job listings found.")
else:
st.info("Please upload your CV to begin.")