File size: 7,529 Bytes
856e6a7
 
ec9d8ec
a21d2a8
 
ec9d8ec
a21d2a8
856e6a7
ec9d8ec
a21d2a8
ec9d8ec
 
a21d2a8
fdf83c7
 
856e6a7
 
 
 
 
a21d2a8
 
 
ec9d8ec
 
 
 
 
 
 
 
 
 
 
 
fdf83c7
 
 
 
 
 
 
 
 
 
 
 
 
 
ec9d8ec
856e6a7
fdf83c7
 
856e6a7
a21d2a8
0fd64e3
 
a21d2a8
 
 
 
 
 
 
 
 
 
 
ec9d8ec
 
a21d2a8
b680025
a21d2a8
b680025
 
ec9d8ec
 
 
1fe7fa3
ec9d8ec
 
 
1fe7fa3
 
ec9d8ec
 
 
1fe7fa3
ec9d8ec
 
 
1fe7fa3
 
ec9d8ec
 
 
1fe7fa3
ec9d8ec
 
 
1fe7fa3
b680025
a21d2a8
 
ec9d8ec
a21d2a8
ec9d8ec
fdf83c7
a21d2a8
856e6a7
 
fdf83c7
856e6a7
ec9d8ec
fdf83c7
 
 
ec9d8ec
 
fdf83c7
 
 
 
 
 
 
 
a21d2a8
ec9d8ec
 
 
 
856e6a7
a21d2a8
ec9d8ec
 
 
 
856e6a7
a21d2a8
ec9d8ec
 
 
 
856e6a7
fdf83c7
ec9d8ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a21d2a8
 
 
ec9d8ec
 
 
 
 
 
 
a21d2a8
 
ec9d8ec
a21d2a8
 
 
 
 
 
 
856e6a7
fdf83c7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
import streamlit as st
import pandas as pd
import pdfplumber
import spacy
import requests
import plotly.express as px
from datetime import datetime, timedelta

# Page config
st.set_page_config(page_title="Skill Scoring & Career Roadmap App", layout="wide")

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Load datasets
skills_df = pd.read_csv("data/skills_dataset.csv")
countries_df = pd.read_csv("data/countries_dataset.csv")
cert_df = pd.read_csv("data/certifications.csv")
edu_tech_df = pd.read_csv("data/education_technical.csv")
edu_non_tech_df = pd.read_csv("data/education_non_technical.csv")
scholarship_df = pd.read_csv("data/scholarships_dataset.csv")

# Helper functions
def extract_text_from_pdf(file):
    with pdfplumber.open(file) as pdf:
        return "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())

def extract_entities(text):
    doc = nlp(text)
    skills = [token.text for token in doc if token.text in skills_df['Skill'].values]
    technical_skills = {"Python", "Machine Learning", "Cloud Computing", "Cybersecurity", "AI", "DevOps"}
    background = "technical" if any(s in technical_skills for s in skills) else "non-technical"
    years_exp = 3  # Placeholder, replace with better extraction logic
    return list(set(skills)), background, years_exp

def score_skills(user_skills):
    if not skills_df.shape[0]:
        return 0
    return int((len(user_skills) / len(skills_df)) * 100)

def recommend_countries(skills, years_exp):
    df = countries_df[countries_df['Skill'].isin(skills)]
    df = df[df['MinExperience'] <= years_exp]
    return df[["Country", "JobTitle", "AverageSalary", "VisaPath"]].drop_duplicates().reset_index(drop=True)

def recommend_certifications(skills):
    return cert_df[cert_df['Skill'].isin(skills)].reset_index(drop=True)

def recommend_education(background):
    return edu_tech_df.reset_index(drop=True) if background == "technical" else edu_non_tech_df.reset_index(drop=True)

def recommend_scholarships(field):
    return scholarship_df[scholarship_df["Field"].str.lower() == field.lower()].reset_index(drop=True)

def fetch_jobs(skill, country_code="us", max_results=5):
    app_id = "f4efd3a2"
    app_key = "5702f3c0507ac69f98aa15f855b06901"
    url = f"https://api.adzuna.com/v1/api/jobs/{country_code}/search/1"
    params = {
        "app_id": app_id,
        "app_key": app_key,
        "results_per_page": max_results,
        "what": skill,
        "content-type": "application/json"
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        return response.json()["results"]
    else:
        return []

def create_dynamic_roadmap(skills, certs, scholarships, edu_opps):
    now = datetime.now()
    roadmap = []

    # Add certifications to roadmap
    if not certs.empty and "Certification" in certs.columns:
        for i, cert in enumerate(certs['Certification'].tolist()[:2]):
            roadmap.append({
                "Task": f"Complete Certification: {cert}",
                "Start": (now + timedelta(days=i*30)).strftime("%Y-%m-%d"),
                "Finish": (now + timedelta(days=(i+1)*30)).strftime("%Y-%m-%d"),
            })

    # Add scholarships to roadmap
    if not scholarships.empty and "Scholarship" in scholarships.columns:
        for i, scholarship in enumerate(scholarships['Scholarship'].tolist()[:2]):
            roadmap.append({
                "Task": f"Apply for Scholarship: {scholarship}",
                "Start": (now + timedelta(days=60 + i*30)).strftime("%Y-%m-%d"),
                "Finish": (now + timedelta(days=90 + i*30)).strftime("%Y-%m-%d"),
            })

    # Add education opportunities to roadmap
    if not edu_opps.empty and "Program" in edu_opps.columns:
        for i, edu in enumerate(edu_opps['Program'].tolist()[:1]):
            roadmap.append({
                "Task": f"Pursue Education: {edu}",
                "Start": (now + timedelta(days=120)).strftime("%Y-%m-%d"),
                "Finish": (now + timedelta(days=480)).strftime("%Y-%m-%d"),
            })

    return pd.DataFrame(roadmap)

# Streamlit UI
st.title("πŸ“Š Personalized Skill Scoring & Career Roadmap App")
st.markdown("Upload your CV and get a detailed career roadmap with live job listings.")

uploaded_file = st.file_uploader("πŸ“€ Upload your CV (PDF only)", type=["pdf"])

if uploaded_file:
    with st.spinner("Analyzing your CV..."):
        text = extract_text_from_pdf(uploaded_file)
        skills, background, years_exp = extract_entities(text)
        score = score_skills(skills)
        country_info = recommend_countries(skills, years_exp)
        certs = recommend_certifications(skills)
        edu = recommend_education(background)
        field = background  # Simplified; you should detect actual field from CV
        scholarships = recommend_scholarships(field)

    st.subheader("βœ… Identified Skills")
    st.write(skills or "No recognized skills found.")

    st.subheader("πŸ“ˆ Skill Score")
    st.metric("Your Skill Score", f"{score}/100")

    st.subheader("🌍 Job Opportunities & Country Recommendations")
    if not country_info.empty:
        st.dataframe(country_info)
    else:
        st.write("No country/job recommendations available for your skill set.")

    st.subheader("πŸŽ“ Recommended Certifications")
    if not certs.empty:
        st.dataframe(certs)
    else:
        st.write("No certification recommendations available.")

    st.subheader("πŸŽ“ Higher Education Opportunities")
    if not edu.empty:
        st.dataframe(edu)
    else:
        st.write("No higher education opportunities available.")

    st.subheader("πŸŽ“ Scholarship Opportunities")
    if not scholarships.empty:
        st.dataframe(scholarships)
    else:
        st.write("No scholarships available for your field.")

    # Dynamic roadmap timeline generation & display with checks
    roadmap_df = create_dynamic_roadmap(skills, certs, scholarships, edu)
    st.write("Roadmap DataFrame preview:")
    st.dataframe(roadmap_df)

    required_cols = {"Task", "Start", "Finish"}
    if not roadmap_df.empty and required_cols.issubset(roadmap_df.columns):
        fig = px.timeline(
            roadmap_df,
            x_start="Start",
            x_end="Finish",
            y="Task",
            title="Career Roadmap Timeline"
        )
        fig.update_yaxes(autorange="reversed")
        st.plotly_chart(fig, use_container_width=True)
    else:
        st.warning("No roadmap tasks to display or roadmap data missing required columns.")

    # Show live job listings using first identified skill and first country code
    if skills and not country_info.empty:
        st.subheader(f"πŸ” Live Job Listings for '{skills[0]}'")
        country_code_map = {
            "USA": "us",
            "Canada": "ca",
            "UK": "gb",
            "Germany": "de",
            "Australia": "au",
            "India": "in",
            "Netherlands": "nl"
        }
        country_code = country_code_map.get(country_info.iloc[0]["Country"], "us")
        jobs = fetch_jobs(skills[0], country_code=country_code, max_results=5)
        if jobs:
            for job in jobs:
                st.markdown(f"**[{job['title']}]({job['redirect_url']})** - {job['location']['display_name']}")
                st.markdown(f"*{job.get('description', '')[:200]}...*")
                st.markdown("---")
        else:
            st.write("No live job listings found.")
else:
    st.info("Please upload your CV to begin.")