Spaces:
Sleeping
Sleeping
File size: 3,319 Bytes
b2c1f4f 60afec2 bba628e 60afec2 bba628e 60afec2 bba628e 60afec2 bba628e 60afec2 bba628e 60afec2 b2c1f4f 60afec2 bba628e 60afec2 bba628e 60afec2 bba628e 60afec2 bba628e 60afec2 bba628e 60afec2 bba628e 60afec2 b2c1f4f bba628e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import streamlit as st
import pandas as pd
import pdfplumber
import spacy
# Set page config at the top
st.set_page_config(page_title="Skill Scoring App", layout="wide")
# Load spaCy model
import spacy.cli
spacy.cli.download("en_core_web_sm")
nlp = spacy.load("en_core_web_sm")
# Load datasets
skills_df = pd.read_csv("skills_dataset.csv")
countries_df = pd.read_csv("countries_dataset.csv")
cert_df = pd.read_csv("certifications.csv")
edu_tech_df = pd.read_csv("education_technical.csv")
edu_non_tech_df = pd.read_csv("education_non_technical.csv")
# Helper functions
def extract_text_from_pdf(file):
with pdfplumber.open(file) as pdf:
return "\n".join(
page.extract_text()
for page in pdf.pages
if page.extract_text()
)
def extract_entities(text):
doc = nlp(text)
# Identify skills present in the CV
skills = [token.text for token in doc if token.text in skills_df['Skill'].values]
# Determine technical vs nonโtechnical background
technical_skills = {"Python", "Machine Learning", "Cloud Computing", "Cybersecurity", "AI", "DevOps"}
background = "technical" if any(s in technical_skills for s in skills) else "non-technical"
return list(set(skills)), background
def score_skills(user_skills):
if not skills_df.shape[0]:
return 0
return int((len(user_skills) / len(skills_df)) * 100)
def recommend_countries(skills):
df = countries_df[countries_df['Skill'].isin(skills)]
return df[["Country", "AverageSalary", "VisaPath"]].drop_duplicates().reset_index(drop=True)
def recommend_certifications(skills):
return cert_df[cert_df['Skill'].isin(skills)].reset_index(drop=True)
def recommend_education(background):
return edu_tech_df.reset_index(drop=True) if background == "technical" else edu_non_tech_df.reset_index(drop=True)
# Streamlit UI
st.title("๐ Personalized Skill Scoring & Career Path App")
st.markdown(
"Upload your CV and get a personalized career guide based on your skills and background."
)
uploaded_file = st.file_uploader("๐ค Upload your CV (PDF format only)", type=["pdf"])
if uploaded_file:
with st.spinner("Analyzing your CV..."):
text = extract_text_from_pdf(uploaded_file)
skills, background = extract_entities(text)
score = score_skills(skills)
country_info = recommend_countries(skills)
certs = recommend_certifications(skills)
edu = recommend_education(background)
st.subheader("โ
Identified Skills")
st.write(skills or "No recognized skills found.")
st.subheader("๐ Skill Score")
st.metric("Your Skill Score", f"{score}/100")
st.subheader("๐ Country Recommendations")
if not country_info.empty:
st.dataframe(country_info)
else:
st.write("No country recommendations available for your skill set.")
st.subheader("๐ Recommended Certifications")
if not certs.empty:
st.dataframe(certs)
else:
st.write("No certification recommendations available for your skill set.")
st.subheader("๐ Higher Education Opportunities")
if not edu.empty:
st.dataframe(edu)
else:
st.write("No higher education opportunities available for your background.")
else:
st.info("Please upload your CV to begin.")
|