Spaces:

Danial7
/

skill_roadmap_app

Sleeping

File size: 7,529 Bytes

856e6a7
 
ec9d8ec
a21d2a8
 
ec9d8ec
a21d2a8
856e6a7
ec9d8ec
a21d2a8
ec9d8ec
 
a21d2a8
fdf83c7
 
856e6a7
 
 
 
 
a21d2a8
 
 
ec9d8ec
 
 
 
 
 
 
 
 
 
 
 
fdf83c7
 
 
 
 
 
 
 
 
 
 
 
 
 
ec9d8ec
856e6a7
fdf83c7
 
856e6a7
a21d2a8
0fd64e3
 
a21d2a8
 
 
 
 
 
 
 
 
 
 
ec9d8ec
 
a21d2a8
b680025
a21d2a8
b680025
 
ec9d8ec
 
 
1fe7fa3
ec9d8ec
 
 
1fe7fa3
 
ec9d8ec
 
 
1fe7fa3
ec9d8ec
 
 
1fe7fa3
 
ec9d8ec
 
 
1fe7fa3
ec9d8ec
 
 
1fe7fa3
b680025
a21d2a8
 
ec9d8ec
a21d2a8
ec9d8ec
fdf83c7
a21d2a8
856e6a7
 
fdf83c7
856e6a7
ec9d8ec
fdf83c7
 
 
ec9d8ec
 
fdf83c7
 
 
 
 
 
 
 
a21d2a8
ec9d8ec
 
 
 
856e6a7
a21d2a8
ec9d8ec
 
 
 
856e6a7
a21d2a8
ec9d8ec
 
 
 
856e6a7
fdf83c7
ec9d8ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a21d2a8
 
 
ec9d8ec
 
 
 
 
 
 
a21d2a8
 
ec9d8ec
a21d2a8
 
 
 
 
 
 
856e6a7
fdf83c7

import streamlit as st
import pandas as pd
import pdfplumber
import spacy
import requests
import plotly.express as px
from datetime import datetime, timedelta

# Page config
st.set_page_config(page_title="Skill Scoring & Career Roadmap App", layout="wide")

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Load datasets
skills_df = pd.read_csv("data/skills_dataset.csv")
countries_df = pd.read_csv("data/countries_dataset.csv")
cert_df = pd.read_csv("data/certifications.csv")
edu_tech_df = pd.read_csv("data/education_technical.csv")
edu_non_tech_df = pd.read_csv("data/education_non_technical.csv")
scholarship_df = pd.read_csv("data/scholarships_dataset.csv")

# Helper functions
def extract_text_from_pdf(file):
    with pdfplumber.open(file) as pdf:
        return "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())

def extract_entities(text):
    doc = nlp(text)
    skills = [token.text for token in doc if token.text in skills_df['Skill'].values]
    technical_skills = {"Python", "Machine Learning", "Cloud Computing", "Cybersecurity", "AI", "DevOps"}
    background = "technical" if any(s in technical_skills for s in skills) else "non-technical"
    years_exp = 3  # Placeholder, replace with better extraction logic
    return list(set(skills)), background, years_exp

def score_skills(user_skills):
    if not skills_df.shape[0]:
        return 0
    return int((len(user_skills) / len(skills_df)) * 100)

def recommend_countries(skills, years_exp):
    df = countries_df[countries_df['Skill'].isin(skills)]
    df = df[df['MinExperience'] <= years_exp]
    return df[["Country", "JobTitle", "AverageSalary", "VisaPath"]].drop_duplicates().reset_index(drop=True)

def recommend_certifications(skills):
    return cert_df[cert_df['Skill'].isin(skills)].reset_index(drop=True)

def recommend_education(background):
    return edu_tech_df.reset_index(drop=True) if background == "technical" else edu_non_tech_df.reset_index(drop=True)

def recommend_scholarships(field):
    return scholarship_df[scholarship_df["Field"].str.lower() == field.lower()].reset_index(drop=True)

def fetch_jobs(skill, country_code="us", max_results=5):
    app_id = "f4efd3a2"
    app_key = "5702f3c0507ac69f98aa15f855b06901"
    url = f"https://api.adzuna.com/v1/api/jobs/{country_code}/search/1"
    params = {
        "app_id": app_id,
        "app_key": app_key,
        "results_per_page": max_results,
        "what": skill,
        "content-type": "application/json"
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        return response.json()["results"]
    else:
        return []

def create_dynamic_roadmap(skills, certs, scholarships, edu_opps):
    now = datetime.now()
    roadmap = []

    # Add certifications to roadmap
    if not certs.empty and "Certification" in certs.columns:
        for i, cert in enumerate(certs['Certification'].tolist()[:2]):
            roadmap.append({
                "Task": f"Complete Certification: {cert}",
                "Start": (now + timedelta(days=i*30)).strftime("%Y-%m-%d"),
                "Finish": (now + timedelta(days=(i+1)*30)).strftime("%Y-%m-%d"),
            })

    # Add scholarships to roadmap
    if not scholarships.empty and "Scholarship" in scholarships.columns:
        for i, scholarship in enumerate(scholarships['Scholarship'].tolist()[:2]):
            roadmap.append({
                "Task": f"Apply for Scholarship: {scholarship}",
                "Start": (now + timedelta(days=60 + i*30)).strftime("%Y-%m-%d"),
                "Finish": (now + timedelta(days=90 + i*30)).strftime("%Y-%m-%d"),
            })

    # Add education opportunities to roadmap
    if not edu_opps.empty and "Program" in edu_opps.columns:
        for i, edu in enumerate(edu_opps['Program'].tolist()[:1]):
            roadmap.append({
                "Task": f"Pursue Education: {edu}",
                "Start": (now + timedelta(days=120)).strftime("%Y-%m-%d"),
                "Finish": (now + timedelta(days=480)).strftime("%Y-%m-%d"),
            })

    return pd.DataFrame(roadmap)

# Streamlit UI
st.title("📊 Personalized Skill Scoring & Career Roadmap App")
st.markdown("Upload your CV and get a detailed career roadmap with live job listings.")

uploaded_file = st.file_uploader("📤 Upload your CV (PDF only)", type=["pdf"])

if uploaded_file:
    with st.spinner("Analyzing your CV..."):
        text = extract_text_from_pdf(uploaded_file)
        skills, background, years_exp = extract_entities(text)
        score = score_skills(skills)
        country_info = recommend_countries(skills, years_exp)
        certs = recommend_certifications(skills)
        edu = recommend_education(background)
        field = background  # Simplified; you should detect actual field from CV
        scholarships = recommend_scholarships(field)

    st.subheader("✅ Identified Skills")
    st.write(skills or "No recognized skills found.")

    st.subheader("📈 Skill Score")
    st.metric("Your Skill Score", f"{score}/100")

    st.subheader("🌍 Job Opportunities & Country Recommendations")
    if not country_info.empty:
        st.dataframe(country_info)
    else:
        st.write("No country/job recommendations available for your skill set.")

    st.subheader("🎓 Recommended Certifications")
    if not certs.empty:
        st.dataframe(certs)
    else:
        st.write("No certification recommendations available.")

    st.subheader("🎓 Higher Education Opportunities")
    if not edu.empty:
        st.dataframe(edu)
    else:
        st.write("No higher education opportunities available.")

    st.subheader("🎓 Scholarship Opportunities")
    if not scholarships.empty:
        st.dataframe(scholarships)
    else:
        st.write("No scholarships available for your field.")

    # Dynamic roadmap timeline generation & display with checks
    roadmap_df = create_dynamic_roadmap(skills, certs, scholarships, edu)
    st.write("Roadmap DataFrame preview:")
    st.dataframe(roadmap_df)

    required_cols = {"Task", "Start", "Finish"}
    if not roadmap_df.empty and required_cols.issubset(roadmap_df.columns):
        fig = px.timeline(
            roadmap_df,
            x_start="Start",
            x_end="Finish",
            y="Task",
            title="Career Roadmap Timeline"
        )
        fig.update_yaxes(autorange="reversed")
        st.plotly_chart(fig, use_container_width=True)
    else:
        st.warning("No roadmap tasks to display or roadmap data missing required columns.")

    # Show live job listings using first identified skill and first country code
    if skills and not country_info.empty:
        st.subheader(f"🔍 Live Job Listings for '{skills[0]}'")
        country_code_map = {
            "USA": "us",
            "Canada": "ca",
            "UK": "gb",
            "Germany": "de",
            "Australia": "au",
            "India": "in",
            "Netherlands": "nl"
        }
        country_code = country_code_map.get(country_info.iloc[0]["Country"], "us")
        jobs = fetch_jobs(skills[0], country_code=country_code, max_results=5)
        if jobs:
            for job in jobs:
                st.markdown(f"**[{job['title']}]({job['redirect_url']})** - {job['location']['display_name']}")
                st.markdown(f"*{job.get('description', '')[:200]}...*")
                st.markdown("---")
        else:
            st.write("No live job listings found.")
else:
    st.info("Please upload your CV to begin.")