import streamlit as st import pandas as pd import pdfplumber import spacy # Set page config at the top st.set_page_config(page_title="Skill Scoring App", layout="wide") # Load spaCy model import spacy.cli spacy.cli.download("en_core_web_sm") nlp = spacy.load("en_core_web_sm") # Load datasets skills_df = pd.read_csv("skills_dataset.csv") countries_df = pd.read_csv("countries_dataset.csv") cert_df = pd.read_csv("certifications.csv") edu_tech_df = pd.read_csv("education_technical.csv") edu_non_tech_df = pd.read_csv("education_non_technical.csv") # Helper functions def extract_text_from_pdf(file): with pdfplumber.open(file) as pdf: return "\n".join( page.extract_text() for page in pdf.pages if page.extract_text() ) def extract_entities(text): doc = nlp(text) # Identify skills present in the CV skills = [token.text for token in doc if token.text in skills_df['Skill'].values] # Determine technical vs non‑technical background technical_skills = {"Python", "Machine Learning", "Cloud Computing", "Cybersecurity", "AI", "DevOps"} background = "technical" if any(s in technical_skills for s in skills) else "non-technical" return list(set(skills)), background def score_skills(user_skills): if not skills_df.shape[0]: return 0 return int((len(user_skills) / len(skills_df)) * 100) def recommend_countries(skills): df = countries_df[countries_df['Skill'].isin(skills)] return df[["Country", "AverageSalary", "VisaPath"]].drop_duplicates().reset_index(drop=True) def recommend_certifications(skills): return cert_df[cert_df['Skill'].isin(skills)].reset_index(drop=True) def recommend_education(background): return edu_tech_df.reset_index(drop=True) if background == "technical" else edu_non_tech_df.reset_index(drop=True) # Streamlit UI st.title("πŸ“Š Personalized Skill Scoring & Career Path App") st.markdown( "Upload your CV and get a personalized career guide based on your skills and background." ) uploaded_file = st.file_uploader("πŸ“€ Upload your CV (PDF format only)", type=["pdf"]) if uploaded_file: with st.spinner("Analyzing your CV..."): text = extract_text_from_pdf(uploaded_file) skills, background = extract_entities(text) score = score_skills(skills) country_info = recommend_countries(skills) certs = recommend_certifications(skills) edu = recommend_education(background) st.subheader("βœ… Identified Skills") st.write(skills or "No recognized skills found.") st.subheader("πŸ“ˆ Skill Score") st.metric("Your Skill Score", f"{score}/100") st.subheader("🌍 Country Recommendations") if not country_info.empty: st.dataframe(country_info) else: st.write("No country recommendations available for your skill set.") st.subheader("πŸŽ“ Recommended Certifications") if not certs.empty: st.dataframe(certs) else: st.write("No certification recommendations available for your skill set.") st.subheader("πŸŽ“ Higher Education Opportunities") if not edu.empty: st.dataframe(edu) else: st.write("No higher education opportunities available for your background.") else: st.info("Please upload your CV to begin.")