File size: 3,319 Bytes
b2c1f4f
 
60afec2
bba628e
 
60afec2
bba628e
 
60afec2
 
 
bba628e
 
60afec2
bba628e
 
60afec2
 
 
 
 
 
 
 
 
 
 
 
bba628e
 
 
60afec2
 
 
 
 
 
b2c1f4f
60afec2
 
 
 
bba628e
60afec2
 
 
bba628e
60afec2
 
bba628e
60afec2
 
bba628e
60afec2
 
 
 
 
bba628e
60afec2
bba628e
60afec2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b2c1f4f
bba628e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import streamlit as st
import pandas as pd
import pdfplumber
import spacy

# Set page config at the top
st.set_page_config(page_title="Skill Scoring App", layout="wide")

# Load spaCy model
import spacy.cli
spacy.cli.download("en_core_web_sm")
nlp = spacy.load("en_core_web_sm")

# Load datasets
skills_df = pd.read_csv("skills_dataset.csv")
countries_df = pd.read_csv("countries_dataset.csv")
cert_df = pd.read_csv("certifications.csv")
edu_tech_df = pd.read_csv("education_technical.csv")
edu_non_tech_df = pd.read_csv("education_non_technical.csv")

# Helper functions
def extract_text_from_pdf(file):
    with pdfplumber.open(file) as pdf:
        return "\n".join(
            page.extract_text() 
            for page in pdf.pages 
            if page.extract_text()
        )

def extract_entities(text):
    doc = nlp(text)
    # Identify skills present in the CV
    skills = [token.text for token in doc if token.text in skills_df['Skill'].values]
    # Determine technical vs nonโ€‘technical background
    technical_skills = {"Python", "Machine Learning", "Cloud Computing", "Cybersecurity", "AI", "DevOps"}
    background = "technical" if any(s in technical_skills for s in skills) else "non-technical"
    return list(set(skills)), background

def score_skills(user_skills):
    if not skills_df.shape[0]:
        return 0
    return int((len(user_skills) / len(skills_df)) * 100)

def recommend_countries(skills):
    df = countries_df[countries_df['Skill'].isin(skills)]
    return df[["Country", "AverageSalary", "VisaPath"]].drop_duplicates().reset_index(drop=True)

def recommend_certifications(skills):
    return cert_df[cert_df['Skill'].isin(skills)].reset_index(drop=True)

def recommend_education(background):
    return edu_tech_df.reset_index(drop=True) if background == "technical" else edu_non_tech_df.reset_index(drop=True)

# Streamlit UI
st.title("๐Ÿ“Š Personalized Skill Scoring & Career Path App")
st.markdown(
    "Upload your CV and get a personalized career guide based on your skills and background."
)

uploaded_file = st.file_uploader("๐Ÿ“ค Upload your CV (PDF format only)", type=["pdf"])

if uploaded_file:
    with st.spinner("Analyzing your CV..."):
        text = extract_text_from_pdf(uploaded_file)
        skills, background = extract_entities(text)
        score = score_skills(skills)
        country_info = recommend_countries(skills)
        certs = recommend_certifications(skills)
        edu = recommend_education(background)

    st.subheader("โœ… Identified Skills")
    st.write(skills or "No recognized skills found.")

    st.subheader("๐Ÿ“ˆ Skill Score")
    st.metric("Your Skill Score", f"{score}/100")

    st.subheader("๐ŸŒ Country Recommendations")
    if not country_info.empty:
        st.dataframe(country_info)
    else:
        st.write("No country recommendations available for your skill set.")

    st.subheader("๐ŸŽ“ Recommended Certifications")
    if not certs.empty:
        st.dataframe(certs)
    else:
        st.write("No certification recommendations available for your skill set.")

    st.subheader("๐ŸŽ“ Higher Education Opportunities")
    if not edu.empty:
        st.dataframe(edu)
    else:
        st.write("No higher education opportunities available for your background.")
else:
    st.info("Please upload your CV to begin.")