import streamlit as st import pandas as pd import pdfplumber import spacy import requests import plotly.express as px from datetime import datetime, timedelta # Page config st.set_page_config(page_title="Skill Scoring & Career Roadmap App", layout="wide") # Load spaCy model nlp = spacy.load("en_core_web_sm") # Load datasets skills_df = pd.read_csv("data/skills_dataset.csv") countries_df = pd.read_csv("data/countries_dataset.csv") cert_df = pd.read_csv("data/certifications.csv") edu_tech_df = pd.read_csv("data/education_technical.csv") edu_non_tech_df = pd.read_csv("data/education_non_technical.csv") scholarship_df = pd.read_csv("data/scholarships_dataset.csv") # Helper functions def extract_text_from_pdf(file): with pdfplumber.open(file) as pdf: return "\n".join(page.extract_text() for page in pdf.pages if page.extract_text()) def extract_entities(text): doc = nlp(text) skills = [token.text for token in doc if token.text in skills_df['Skill'].values] technical_skills = {"Python", "Machine Learning", "Cloud Computing", "Cybersecurity", "AI", "DevOps"} background = "technical" if any(s in technical_skills for s in skills) else "non-technical" years_exp = 3 # Placeholder, replace with better extraction logic return list(set(skills)), background, years_exp def score_skills(user_skills): if not skills_df.shape[0]: return 0 return int((len(user_skills) / len(skills_df)) * 100) def recommend_countries(skills, years_exp): df = countries_df[countries_df['Skill'].isin(skills)] df = df[df['MinExperience'] <= years_exp] return df[["Country", "JobTitle", "AverageSalary", "VisaPath"]].drop_duplicates().reset_index(drop=True) def recommend_certifications(skills): return cert_df[cert_df['Skill'].isin(skills)].reset_index(drop=True) def recommend_education(background): return edu_tech_df.reset_index(drop=True) if background == "technical" else edu_non_tech_df.reset_index(drop=True) def recommend_scholarships(field): return scholarship_df[scholarship_df["Field"].str.lower() == field.lower()].reset_index(drop=True) def fetch_jobs(skill, country_code="us", max_results=5): app_id = "f4efd3a2" app_key = "5702f3c0507ac69f98aa15f855b06901" url = f"https://api.adzuna.com/v1/api/jobs/{country_code}/search/1" params = { "app_id": app_id, "app_key": app_key, "results_per_page": max_results, "what": skill, "content-type": "application/json" } response = requests.get(url, params=params) if response.status_code == 200: return response.json()["results"] else: return [] def create_dynamic_roadmap(skills, certs, scholarships, edu_opps): now = datetime.now() roadmap = [] # Add certifications to roadmap if not certs.empty and "Certification" in certs.columns: for i, cert in enumerate(certs['Certification'].tolist()[:2]): roadmap.append({ "Task": f"Complete Certification: {cert}", "Start": (now + timedelta(days=i*30)).strftime("%Y-%m-%d"), "Finish": (now + timedelta(days=(i+1)*30)).strftime("%Y-%m-%d"), }) # Add scholarships to roadmap if not scholarships.empty and "Scholarship" in scholarships.columns: for i, scholarship in enumerate(scholarships['Scholarship'].tolist()[:2]): roadmap.append({ "Task": f"Apply for Scholarship: {scholarship}", "Start": (now + timedelta(days=60 + i*30)).strftime("%Y-%m-%d"), "Finish": (now + timedelta(days=90 + i*30)).strftime("%Y-%m-%d"), }) # Add education opportunities to roadmap if not edu_opps.empty and "Program" in edu_opps.columns: for i, edu in enumerate(edu_opps['Program'].tolist()[:1]): roadmap.append({ "Task": f"Pursue Education: {edu}", "Start": (now + timedelta(days=120)).strftime("%Y-%m-%d"), "Finish": (now + timedelta(days=480)).strftime("%Y-%m-%d"), }) return pd.DataFrame(roadmap) # Streamlit UI st.title("📊 Personalized Skill Scoring & Career Roadmap App") st.markdown("Upload your CV and get a detailed career roadmap with live job listings.") uploaded_file = st.file_uploader("📤 Upload your CV (PDF only)", type=["pdf"]) if uploaded_file: with st.spinner("Analyzing your CV..."): text = extract_text_from_pdf(uploaded_file) skills, background, years_exp = extract_entities(text) score = score_skills(skills) country_info = recommend_countries(skills, years_exp) certs = recommend_certifications(skills) edu = recommend_education(background) field = background # Simplified; you should detect actual field from CV scholarships = recommend_scholarships(field) st.subheader("✅ Identified Skills") st.write(skills or "No recognized skills found.") st.subheader("📈 Skill Score") st.metric("Your Skill Score", f"{score}/100") st.subheader("🌍 Job Opportunities & Country Recommendations") if not country_info.empty: st.dataframe(country_info) else: st.write("No country/job recommendations available for your skill set.") st.subheader("🎓 Recommended Certifications") if not certs.empty: st.dataframe(certs) else: st.write("No certification recommendations available.") st.subheader("🎓 Higher Education Opportunities") if not edu.empty: st.dataframe(edu) else: st.write("No higher education opportunities available.") st.subheader("🎓 Scholarship Opportunities") if not scholarships.empty: st.dataframe(scholarships) else: st.write("No scholarships available for your field.") # Dynamic roadmap timeline generation & display with checks roadmap_df = create_dynamic_roadmap(skills, certs, scholarships, edu) st.write("Roadmap DataFrame preview:") st.dataframe(roadmap_df) required_cols = {"Task", "Start", "Finish"} if not roadmap_df.empty and required_cols.issubset(roadmap_df.columns): fig = px.timeline( roadmap_df, x_start="Start", x_end="Finish", y="Task", title="Career Roadmap Timeline" ) fig.update_yaxes(autorange="reversed") st.plotly_chart(fig, use_container_width=True) else: st.warning("No roadmap tasks to display or roadmap data missing required columns.") # Show live job listings using first identified skill and first country code if skills and not country_info.empty: st.subheader(f"🔍 Live Job Listings for '{skills[0]}'") country_code_map = { "USA": "us", "Canada": "ca", "UK": "gb", "Germany": "de", "Australia": "au", "India": "in", "Netherlands": "nl" } country_code = country_code_map.get(country_info.iloc[0]["Country"], "us") jobs = fetch_jobs(skills[0], country_code=country_code, max_results=5) if jobs: for job in jobs: st.markdown(f"**[{job['title']}]({job['redirect_url']})** - {job['location']['display_name']}") st.markdown(f"*{job.get('description', '')[:200]}...*") st.markdown("---") else: st.write("No live job listings found.") else: st.info("Please upload your CV to begin.")