Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import pdfplumber | |
| import spacy | |
| import requests | |
| import plotly.express as px | |
| from datetime import datetime, timedelta | |
| # Page config | |
| st.set_page_config(page_title="Skill Scoring & Career Roadmap App", layout="wide") | |
| # Load spaCy model | |
| nlp = spacy.load("en_core_web_sm") | |
| # Load datasets | |
| skills_df = pd.read_csv("data/skills_dataset.csv") | |
| countries_df = pd.read_csv("data/countries_dataset.csv") | |
| cert_df = pd.read_csv("data/certifications.csv") | |
| edu_tech_df = pd.read_csv("data/education_technical.csv") | |
| edu_non_tech_df = pd.read_csv("data/education_non_technical.csv") | |
| scholarship_df = pd.read_csv("data/scholarships_dataset.csv") | |
| # Helper functions | |
| def extract_text_from_pdf(file): | |
| with pdfplumber.open(file) as pdf: | |
| return "\n".join(page.extract_text() for page in pdf.pages if page.extract_text()) | |
| def extract_entities(text): | |
| doc = nlp(text) | |
| skills = [token.text for token in doc if token.text in skills_df['Skill'].values] | |
| technical_skills = {"Python", "Machine Learning", "Cloud Computing", "Cybersecurity", "AI", "DevOps"} | |
| background = "technical" if any(s in technical_skills for s in skills) else "non-technical" | |
| years_exp = 3 # Placeholder, replace with better extraction logic | |
| return list(set(skills)), background, years_exp | |
| def score_skills(user_skills): | |
| if not skills_df.shape[0]: | |
| return 0 | |
| return int((len(user_skills) / len(skills_df)) * 100) | |
| def recommend_countries(skills, years_exp): | |
| df = countries_df[countries_df['Skill'].isin(skills)] | |
| df = df[df['MinExperience'] <= years_exp] | |
| return df[["Country", "JobTitle", "AverageSalary", "VisaPath"]].drop_duplicates().reset_index(drop=True) | |
| def recommend_certifications(skills): | |
| return cert_df[cert_df['Skill'].isin(skills)].reset_index(drop=True) | |
| def recommend_education(background): | |
| return edu_tech_df.reset_index(drop=True) if background == "technical" else edu_non_tech_df.reset_index(drop=True) | |
| def recommend_scholarships(field): | |
| return scholarship_df[scholarship_df["Field"].str.lower() == field.lower()].reset_index(drop=True) | |
| def fetch_jobs(skill, country_code="us", max_results=5): | |
| app_id = "f4efd3a2" | |
| app_key = "5702f3c0507ac69f98aa15f855b06901" | |
| url = f"https://api.adzuna.com/v1/api/jobs/{country_code}/search/1" | |
| params = { | |
| "app_id": app_id, | |
| "app_key": app_key, | |
| "results_per_page": max_results, | |
| "what": skill, | |
| "content-type": "application/json" | |
| } | |
| response = requests.get(url, params=params) | |
| if response.status_code == 200: | |
| return response.json()["results"] | |
| else: | |
| return [] | |
| def create_dynamic_roadmap(skills, certs, scholarships, edu_opps): | |
| now = datetime.now() | |
| roadmap = [] | |
| # Add certifications to roadmap | |
| if not certs.empty and "Certification" in certs.columns: | |
| for i, cert in enumerate(certs['Certification'].tolist()[:2]): | |
| roadmap.append({ | |
| "Task": f"Complete Certification: {cert}", | |
| "Start": (now + timedelta(days=i*30)).strftime("%Y-%m-%d"), | |
| "Finish": (now + timedelta(days=(i+1)*30)).strftime("%Y-%m-%d"), | |
| }) | |
| # Add scholarships to roadmap | |
| if not scholarships.empty and "Scholarship" in scholarships.columns: | |
| for i, scholarship in enumerate(scholarships['Scholarship'].tolist()[:2]): | |
| roadmap.append({ | |
| "Task": f"Apply for Scholarship: {scholarship}", | |
| "Start": (now + timedelta(days=60 + i*30)).strftime("%Y-%m-%d"), | |
| "Finish": (now + timedelta(days=90 + i*30)).strftime("%Y-%m-%d"), | |
| }) | |
| # Add education opportunities to roadmap | |
| if not edu_opps.empty and "Program" in edu_opps.columns: | |
| for i, edu in enumerate(edu_opps['Program'].tolist()[:1]): | |
| roadmap.append({ | |
| "Task": f"Pursue Education: {edu}", | |
| "Start": (now + timedelta(days=120)).strftime("%Y-%m-%d"), | |
| "Finish": (now + timedelta(days=480)).strftime("%Y-%m-%d"), | |
| }) | |
| return pd.DataFrame(roadmap) | |
| # Streamlit UI | |
| st.title("π Personalized Skill Scoring & Career Roadmap App") | |
| st.markdown("Upload your CV and get a detailed career roadmap with live job listings.") | |
| uploaded_file = st.file_uploader("π€ Upload your CV (PDF only)", type=["pdf"]) | |
| if uploaded_file: | |
| with st.spinner("Analyzing your CV..."): | |
| text = extract_text_from_pdf(uploaded_file) | |
| skills, background, years_exp = extract_entities(text) | |
| score = score_skills(skills) | |
| country_info = recommend_countries(skills, years_exp) | |
| certs = recommend_certifications(skills) | |
| edu = recommend_education(background) | |
| field = background # Simplified; you should detect actual field from CV | |
| scholarships = recommend_scholarships(field) | |
| st.subheader("β Identified Skills") | |
| st.write(skills or "No recognized skills found.") | |
| st.subheader("π Skill Score") | |
| st.metric("Your Skill Score", f"{score}/100") | |
| st.subheader("π Job Opportunities & Country Recommendations") | |
| if not country_info.empty: | |
| st.dataframe(country_info) | |
| else: | |
| st.write("No country/job recommendations available for your skill set.") | |
| st.subheader("π Recommended Certifications") | |
| if not certs.empty: | |
| st.dataframe(certs) | |
| else: | |
| st.write("No certification recommendations available.") | |
| st.subheader("π Higher Education Opportunities") | |
| if not edu.empty: | |
| st.dataframe(edu) | |
| else: | |
| st.write("No higher education opportunities available.") | |
| st.subheader("π Scholarship Opportunities") | |
| if not scholarships.empty: | |
| st.dataframe(scholarships) | |
| else: | |
| st.write("No scholarships available for your field.") | |
| # Dynamic roadmap timeline generation & display with checks | |
| roadmap_df = create_dynamic_roadmap(skills, certs, scholarships, edu) | |
| st.write("Roadmap DataFrame preview:") | |
| st.dataframe(roadmap_df) | |
| required_cols = {"Task", "Start", "Finish"} | |
| if not roadmap_df.empty and required_cols.issubset(roadmap_df.columns): | |
| fig = px.timeline( | |
| roadmap_df, | |
| x_start="Start", | |
| x_end="Finish", | |
| y="Task", | |
| title="Career Roadmap Timeline" | |
| ) | |
| fig.update_yaxes(autorange="reversed") | |
| st.plotly_chart(fig, use_container_width=True) | |
| else: | |
| st.warning("No roadmap tasks to display or roadmap data missing required columns.") | |
| # Show live job listings using first identified skill and first country code | |
| if skills and not country_info.empty: | |
| st.subheader(f"π Live Job Listings for '{skills[0]}'") | |
| country_code_map = { | |
| "USA": "us", | |
| "Canada": "ca", | |
| "UK": "gb", | |
| "Germany": "de", | |
| "Australia": "au", | |
| "India": "in", | |
| "Netherlands": "nl" | |
| } | |
| country_code = country_code_map.get(country_info.iloc[0]["Country"], "us") | |
| jobs = fetch_jobs(skills[0], country_code=country_code, max_results=5) | |
| if jobs: | |
| for job in jobs: | |
| st.markdown(f"**[{job['title']}]({job['redirect_url']})** - {job['location']['display_name']}") | |
| st.markdown(f"*{job.get('description', '')[:200]}...*") | |
| st.markdown("---") | |
| else: | |
| st.write("No live job listings found.") | |
| else: | |
| st.info("Please upload your CV to begin.") | |