Spaces:

Danial7
/

Skill_Score

Sleeping

File size: 3,319 Bytes

b2c1f4f
 
60afec2
bba628e
 
60afec2
bba628e
 
60afec2
 
 
bba628e
 
60afec2
bba628e
 
60afec2
 
 
 
 
 
 
 
 
 
 
 
bba628e
 
 
60afec2
 
 
 
 
 
b2c1f4f
60afec2
 
 
 
bba628e
60afec2
 
 
bba628e
60afec2
 
bba628e
60afec2
 
bba628e
60afec2
 
 
 
 
bba628e
60afec2
bba628e
60afec2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b2c1f4f
bba628e

import streamlit as st
import pandas as pd
import pdfplumber
import spacy

# Set page config at the top
st.set_page_config(page_title="Skill Scoring App", layout="wide")

# Load spaCy model
import spacy.cli
spacy.cli.download("en_core_web_sm")
nlp = spacy.load("en_core_web_sm")

# Load datasets
skills_df = pd.read_csv("skills_dataset.csv")
countries_df = pd.read_csv("countries_dataset.csv")
cert_df = pd.read_csv("certifications.csv")
edu_tech_df = pd.read_csv("education_technical.csv")
edu_non_tech_df = pd.read_csv("education_non_technical.csv")

# Helper functions
def extract_text_from_pdf(file):
    with pdfplumber.open(file) as pdf:
        return "\n".join(
            page.extract_text() 
            for page in pdf.pages 
            if page.extract_text()
        )

def extract_entities(text):
    doc = nlp(text)
    # Identify skills present in the CV
    skills = [token.text for token in doc if token.text in skills_df['Skill'].values]
    # Determine technical vs non‑technical background
    technical_skills = {"Python", "Machine Learning", "Cloud Computing", "Cybersecurity", "AI", "DevOps"}
    background = "technical" if any(s in technical_skills for s in skills) else "non-technical"
    return list(set(skills)), background

def score_skills(user_skills):
    if not skills_df.shape[0]:
        return 0
    return int((len(user_skills) / len(skills_df)) * 100)

def recommend_countries(skills):
    df = countries_df[countries_df['Skill'].isin(skills)]
    return df[["Country", "AverageSalary", "VisaPath"]].drop_duplicates().reset_index(drop=True)

def recommend_certifications(skills):
    return cert_df[cert_df['Skill'].isin(skills)].reset_index(drop=True)

def recommend_education(background):
    return edu_tech_df.reset_index(drop=True) if background == "technical" else edu_non_tech_df.reset_index(drop=True)

# Streamlit UI
st.title("📊 Personalized Skill Scoring & Career Path App")
st.markdown(
    "Upload your CV and get a personalized career guide based on your skills and background."
)

uploaded_file = st.file_uploader("📤 Upload your CV (PDF format only)", type=["pdf"])

if uploaded_file:
    with st.spinner("Analyzing your CV..."):
        text = extract_text_from_pdf(uploaded_file)
        skills, background = extract_entities(text)
        score = score_skills(skills)
        country_info = recommend_countries(skills)
        certs = recommend_certifications(skills)
        edu = recommend_education(background)

    st.subheader("✅ Identified Skills")
    st.write(skills or "No recognized skills found.")

    st.subheader("📈 Skill Score")
    st.metric("Your Skill Score", f"{score}/100")

    st.subheader("🌍 Country Recommendations")
    if not country_info.empty:
        st.dataframe(country_info)
    else:
        st.write("No country recommendations available for your skill set.")

    st.subheader("🎓 Recommended Certifications")
    if not certs.empty:
        st.dataframe(certs)
    else:
        st.write("No certification recommendations available for your skill set.")

    st.subheader("🎓 Higher Education Opportunities")
    if not edu.empty:
        st.dataframe(edu)
    else:
        st.write("No higher education opportunities available for your background.")
else:
    st.info("Please upload your CV to begin.")