Spaces:

Danial7
/

skill_roadmap_app

Sleeping

App Files Files Community

skill_roadmap_app / app.py

Danial7

Update app.py

ec9d8ec verified 9 months ago

raw

history blame contribute delete

7.53 kB

	import streamlit as st
	import pandas as pd
	import pdfplumber
	import spacy
	import requests
	import plotly.express as px
	from datetime import datetime, timedelta

	# Page config
	st.set_page_config(page_title="Skill Scoring & Career Roadmap App", layout="wide")

	# Load spaCy model
	nlp = spacy.load("en_core_web_sm")

	# Load datasets
	skills_df = pd.read_csv("data/skills_dataset.csv")
	countries_df = pd.read_csv("data/countries_dataset.csv")
	cert_df = pd.read_csv("data/certifications.csv")
	edu_tech_df = pd.read_csv("data/education_technical.csv")
	edu_non_tech_df = pd.read_csv("data/education_non_technical.csv")
	scholarship_df = pd.read_csv("data/scholarships_dataset.csv")

	# Helper functions
	def extract_text_from_pdf(file):
	with pdfplumber.open(file) as pdf:
	return "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())

	def extract_entities(text):
	doc = nlp(text)
	skills = [token.text for token in doc if token.text in skills_df['Skill'].values]
	technical_skills = {"Python", "Machine Learning", "Cloud Computing", "Cybersecurity", "AI", "DevOps"}
	background = "technical" if any(s in technical_skills for s in skills) else "non-technical"
	years_exp = 3 # Placeholder, replace with better extraction logic
	return list(set(skills)), background, years_exp

	def score_skills(user_skills):
	if not skills_df.shape[0]:
	return 0
	return int((len(user_skills) / len(skills_df)) * 100)

	def recommend_countries(skills, years_exp):
	df = countries_df[countries_df['Skill'].isin(skills)]
	df = df[df['MinExperience'] <= years_exp]
	return df[["Country", "JobTitle", "AverageSalary", "VisaPath"]].drop_duplicates().reset_index(drop=True)

	def recommend_certifications(skills):
	return cert_df[cert_df['Skill'].isin(skills)].reset_index(drop=True)

	def recommend_education(background):
	return edu_tech_df.reset_index(drop=True) if background == "technical" else edu_non_tech_df.reset_index(drop=True)

	def recommend_scholarships(field):
	return scholarship_df[scholarship_df["Field"].str.lower() == field.lower()].reset_index(drop=True)

	def fetch_jobs(skill, country_code="us", max_results=5):
	app_id = "f4efd3a2"
	app_key = "5702f3c0507ac69f98aa15f855b06901"
	url = f"https://api.adzuna.com/v1/api/jobs/{country_code}/search/1"
	params = {
	"app_id": app_id,
	"app_key": app_key,
	"results_per_page": max_results,
	"what": skill,
	"content-type": "application/json"
	}
	response = requests.get(url, params=params)
	if response.status_code == 200:
	return response.json()["results"]
	else:
	return []

	def create_dynamic_roadmap(skills, certs, scholarships, edu_opps):
	now = datetime.now()
	roadmap = []

	# Add certifications to roadmap
	if not certs.empty and "Certification" in certs.columns:
	for i, cert in enumerate(certs['Certification'].tolist()[:2]):
	roadmap.append({
	"Task": f"Complete Certification: {cert}",
	"Start": (now + timedelta(days=i*30)).strftime("%Y-%m-%d"),
	"Finish": (now + timedelta(days=(i+1)*30)).strftime("%Y-%m-%d"),
	})

	# Add scholarships to roadmap
	if not scholarships.empty and "Scholarship" in scholarships.columns:
	for i, scholarship in enumerate(scholarships['Scholarship'].tolist()[:2]):
	roadmap.append({
	"Task": f"Apply for Scholarship: {scholarship}",
	"Start": (now + timedelta(days=60 + i*30)).strftime("%Y-%m-%d"),
	"Finish": (now + timedelta(days=90 + i*30)).strftime("%Y-%m-%d"),
	})

	# Add education opportunities to roadmap
	if not edu_opps.empty and "Program" in edu_opps.columns:
	for i, edu in enumerate(edu_opps['Program'].tolist()[:1]):
	roadmap.append({
	"Task": f"Pursue Education: {edu}",
	"Start": (now + timedelta(days=120)).strftime("%Y-%m-%d"),
	"Finish": (now + timedelta(days=480)).strftime("%Y-%m-%d"),
	})

	return pd.DataFrame(roadmap)

	# Streamlit UI
	st.title("📊 Personalized Skill Scoring & Career Roadmap App")
	st.markdown("Upload your CV and get a detailed career roadmap with live job listings.")

	uploaded_file = st.file_uploader("📤 Upload your CV (PDF only)", type=["pdf"])

	if uploaded_file:
	with st.spinner("Analyzing your CV..."):
	text = extract_text_from_pdf(uploaded_file)
	skills, background, years_exp = extract_entities(text)
	score = score_skills(skills)
	country_info = recommend_countries(skills, years_exp)
	certs = recommend_certifications(skills)
	edu = recommend_education(background)
	field = background # Simplified; you should detect actual field from CV
	scholarships = recommend_scholarships(field)

	st.subheader("✅ Identified Skills")
	st.write(skills or "No recognized skills found.")

	st.subheader("📈 Skill Score")
	st.metric("Your Skill Score", f"{score}/100")

	st.subheader("🌍 Job Opportunities & Country Recommendations")
	if not country_info.empty:
	st.dataframe(country_info)
	else:
	st.write("No country/job recommendations available for your skill set.")

	st.subheader("🎓 Recommended Certifications")
	if not certs.empty:
	st.dataframe(certs)
	else:
	st.write("No certification recommendations available.")

	st.subheader("🎓 Higher Education Opportunities")
	if not edu.empty:
	st.dataframe(edu)
	else:
	st.write("No higher education opportunities available.")

	st.subheader("🎓 Scholarship Opportunities")
	if not scholarships.empty:
	st.dataframe(scholarships)
	else:
	st.write("No scholarships available for your field.")

	# Dynamic roadmap timeline generation & display with checks
	roadmap_df = create_dynamic_roadmap(skills, certs, scholarships, edu)
	st.write("Roadmap DataFrame preview:")
	st.dataframe(roadmap_df)

	required_cols = {"Task", "Start", "Finish"}
	if not roadmap_df.empty and required_cols.issubset(roadmap_df.columns):
	fig = px.timeline(
	roadmap_df,
	x_start="Start",
	x_end="Finish",
	y="Task",
	title="Career Roadmap Timeline"
	)
	fig.update_yaxes(autorange="reversed")
	st.plotly_chart(fig, use_container_width=True)
	else:
	st.warning("No roadmap tasks to display or roadmap data missing required columns.")

	# Show live job listings using first identified skill and first country code
	if skills and not country_info.empty:
	st.subheader(f"🔍 Live Job Listings for '{skills[0]}'")
	country_code_map = {
	"USA": "us",
	"Canada": "ca",
	"UK": "gb",
	"Germany": "de",
	"Australia": "au",
	"India": "in",
	"Netherlands": "nl"
	}
	country_code = country_code_map.get(country_info.iloc[0]["Country"], "us")
	jobs = fetch_jobs(skills[0], country_code=country_code, max_results=5)
	if jobs:
	for job in jobs:
	st.markdown(f"[{job['title']}]({job['redirect_url']}) - {job['location']['display_name']}")
	st.markdown(f"{job.get('description', '')[:200]}...")
	st.markdown("---")
	else:
	st.write("No live job listings found.")
	else:
	st.info("Please upload your CV to begin.")