Spaces:

Danial7
/

skill_roadmap_app

Sleeping

App Files Files Community

Danial7 commited on May 15, 2025

Commit

ec9d8ec

verified ·

1 Parent(s): 1fe7fa3

Update app.py

Browse files

Files changed (1) hide show

app.py +88 -50

app.py CHANGED Viewed

@@ -1,13 +1,15 @@
 import streamlit as st
 import pandas as pd
-import plotly.express as px
 import spacy
 import requests
 from datetime import datetime, timedelta
-from extractor import extract_text_from_pdf, extract_entities
-# Config
 st.set_page_config(page_title="Skill Scoring & Career Roadmap App", layout="wide")
 nlp = spacy.load("en_core_web_sm")
 # Load datasets
@@ -19,6 +21,18 @@ edu_non_tech_df = pd.read_csv("data/education_non_technical.csv")
 scholarship_df = pd.read_csv("data/scholarships_dataset.csv")
 # Helper functions
 def score_skills(user_skills):
     if not skills_df.shape[0]:
         return 0
@@ -33,11 +47,9 @@ def recommend_certifications(skills):
     return cert_df[cert_df['Skill'].isin(skills)].reset_index(drop=True)
 def recommend_education(background):
-    return edu_tech_df if background == "technical" else edu_non_tech_df
 def recommend_scholarships(field):
-    if "Field" not in scholarship_df.columns:
-        return pd.DataFrame()
     return scholarship_df[scholarship_df["Field"].str.lower() == field.lower()].reset_index(drop=True)
 def fetch_jobs(skill, country_code="us", max_results=5):
@@ -54,60 +66,57 @@ def fetch_jobs(skill, country_code="us", max_results=5):
     response = requests.get(url, params=params)
     if response.status_code == 200:
         return response.json()["results"]
-    return []
 def create_dynamic_roadmap(skills, certs, scholarships, edu_opps):
     now = datetime.now()
     roadmap = []
-    # Detect a valid certification column
-    cert_col = next((col for col in certs.columns if col.lower() in ["certification", "name", "title"]), None)
-    if cert_col:
-        for i, cert in enumerate(certs[cert_col].dropna().tolist()[:2]):
             roadmap.append({
-                "Task": f"Complete {cert}",
-                "Start": (now + timedelta(days=i * 30)).strftime("%Y-%m-%d"),
-                "Finish": (now + timedelta(days=(i + 1) * 30)).strftime("%Y-%m-%d")
             })
-    # Detect a valid scholarship column
-    scholarship_col = next((col for col in scholarships.columns if col.lower() in ["scholarship", "name", "title"]), None)
-    if scholarship_col:
-        for i, scholarship in enumerate(scholarships[scholarship_col].dropna().tolist()[:2]):
             roadmap.append({
-                "Task": f"Apply for {scholarship}",
-                "Start": (now + timedelta(days=90 + i * 30)).strftime("%Y-%m-%d"),
-                "Finish": (now + timedelta(days=120 + i * 30)).strftime("%Y-%m-%d")
             })
-    # Detect a valid education column
-    edu_col = next((col for col in edu_opps.columns if col.lower() in ["program", "course", "degree", "title"]), None)
-    if edu_col:
-        for i, degree in enumerate(edu_opps[edu_col].dropna().tolist()[:1]):
             roadmap.append({
-                "Task": f"Pursue {degree}",
-                "Start": (now + timedelta(days=180)).strftime("%Y-%m-%d"),
-                "Finish": (now + timedelta(days=720)).strftime("%Y-%m-%d")
             })
     return pd.DataFrame(roadmap)
-# UI
 st.title("📊 Personalized Skill Scoring & Career Roadmap App")
-st.markdown("Upload your CV and get a detailed roadmap with live job listings.")
 uploaded_file = st.file_uploader("📤 Upload your CV (PDF only)", type=["pdf"])
 if uploaded_file:
     with st.spinner("Analyzing your CV..."):
         text = extract_text_from_pdf(uploaded_file)
-        skills, background, years_exp = extract_entities(text, skills_df)
         score = score_skills(skills)
         country_info = recommend_countries(skills, years_exp)
         certs = recommend_certifications(skills)
-        edu_opps = recommend_education(background)
-        field = background
         scholarships = recommend_scholarships(field)
     st.subheader("✅ Identified Skills")
@@ -117,33 +126,62 @@ if uploaded_file:
     st.metric("Your Skill Score", f"{score}/100")
     st.subheader("🌍 Job Opportunities & Country Recommendations")
-    st.dataframe(country_info if not country_info.empty else pd.DataFrame(columns=["Country", "JobTitle", "AverageSalary", "VisaPath"]))
     st.subheader("🎓 Recommended Certifications")
-    st.dataframe(certs if not certs.empty else pd.DataFrame(columns=["Certification", "Skill"]))
     st.subheader("🎓 Higher Education Opportunities")
-    st.dataframe(edu_opps)
     st.subheader("🎓 Scholarship Opportunities")
-    st.dataframe(scholarships if not scholarships.empty else pd.DataFrame(columns=["Scholarship", "Field"]))
-    # Timeline chart
-    st.subheader("🛤️ Career Roadmap Timeline")
-    roadmap_df = create_dynamic_roadmap(skills, certs, scholarships, edu_opps)
-    fig = px.timeline(roadmap_df, x_start="Start", x_end="Finish", y="Task", title="Career Roadmap Timeline")
-    fig.update_yaxes(autorange="reversed")
-    st.plotly_chart(fig, use_container_width=True)
-    # Live job listings
     if skills and not country_info.empty:
         st.subheader(f"🔍 Live Job Listings for '{skills[0]}'")
         country_code_map = {
-            "USA": "us", "Canada": "ca", "UK": "gb", "Germany": "de",
-            "Australia": "au", "India": "in", "Netherlands": "nl"
         }
         country_code = country_code_map.get(country_info.iloc[0]["Country"], "us")
-        jobs = fetch_jobs(skills[0], country_code=country_code)
         if jobs:
             for job in jobs:
                 st.markdown(f"**[{job['title']}]({job['redirect_url']})** - {job['location']['display_name']}")

 import streamlit as st
 import pandas as pd
+import pdfplumber
 import spacy
 import requests
+import plotly.express as px
 from datetime import datetime, timedelta
+# Page config
 st.set_page_config(page_title="Skill Scoring & Career Roadmap App", layout="wide")
+# Load spaCy model
 nlp = spacy.load("en_core_web_sm")
 # Load datasets
 scholarship_df = pd.read_csv("data/scholarships_dataset.csv")
 # Helper functions
+def extract_text_from_pdf(file):
+    with pdfplumber.open(file) as pdf:
+        return "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
+def extract_entities(text):
+    doc = nlp(text)
+    skills = [token.text for token in doc if token.text in skills_df['Skill'].values]
+    technical_skills = {"Python", "Machine Learning", "Cloud Computing", "Cybersecurity", "AI", "DevOps"}
+    background = "technical" if any(s in technical_skills for s in skills) else "non-technical"
+    years_exp = 3  # Placeholder, replace with better extraction logic
+    return list(set(skills)), background, years_exp
 def score_skills(user_skills):
     if not skills_df.shape[0]:
         return 0
     return cert_df[cert_df['Skill'].isin(skills)].reset_index(drop=True)
 def recommend_education(background):
+    return edu_tech_df.reset_index(drop=True) if background == "technical" else edu_non_tech_df.reset_index(drop=True)
 def recommend_scholarships(field):
     return scholarship_df[scholarship_df["Field"].str.lower() == field.lower()].reset_index(drop=True)
 def fetch_jobs(skill, country_code="us", max_results=5):
     response = requests.get(url, params=params)
     if response.status_code == 200:
         return response.json()["results"]
+    else:
+        return []
 def create_dynamic_roadmap(skills, certs, scholarships, edu_opps):
     now = datetime.now()
     roadmap = []
+    # Add certifications to roadmap
+    if not certs.empty and "Certification" in certs.columns:
+        for i, cert in enumerate(certs['Certification'].tolist()[:2]):
             roadmap.append({
+                "Task": f"Complete Certification: {cert}",
+                "Start": (now + timedelta(days=i*30)).strftime("%Y-%m-%d"),
+                "Finish": (now + timedelta(days=(i+1)*30)).strftime("%Y-%m-%d"),
             })
+    # Add scholarships to roadmap
+    if not scholarships.empty and "Scholarship" in scholarships.columns:
+        for i, scholarship in enumerate(scholarships['Scholarship'].tolist()[:2]):
             roadmap.append({
+                "Task": f"Apply for Scholarship: {scholarship}",
+                "Start": (now + timedelta(days=60 + i*30)).strftime("%Y-%m-%d"),
+                "Finish": (now + timedelta(days=90 + i*30)).strftime("%Y-%m-%d"),
             })
+    # Add education opportunities to roadmap
+    if not edu_opps.empty and "Program" in edu_opps.columns:
+        for i, edu in enumerate(edu_opps['Program'].tolist()[:1]):
             roadmap.append({
+                "Task": f"Pursue Education: {edu}",
+                "Start": (now + timedelta(days=120)).strftime("%Y-%m-%d"),
+                "Finish": (now + timedelta(days=480)).strftime("%Y-%m-%d"),
             })
     return pd.DataFrame(roadmap)
+# Streamlit UI
 st.title("📊 Personalized Skill Scoring & Career Roadmap App")
+st.markdown("Upload your CV and get a detailed career roadmap with live job listings.")
 uploaded_file = st.file_uploader("📤 Upload your CV (PDF only)", type=["pdf"])
 if uploaded_file:
     with st.spinner("Analyzing your CV..."):
         text = extract_text_from_pdf(uploaded_file)
+        skills, background, years_exp = extract_entities(text)
         score = score_skills(skills)
         country_info = recommend_countries(skills, years_exp)
         certs = recommend_certifications(skills)
+        edu = recommend_education(background)
+        field = background  # Simplified; you should detect actual field from CV
         scholarships = recommend_scholarships(field)
     st.subheader("✅ Identified Skills")
     st.metric("Your Skill Score", f"{score}/100")
     st.subheader("🌍 Job Opportunities & Country Recommendations")
+    if not country_info.empty:
+        st.dataframe(country_info)
+    else:
+        st.write("No country/job recommendations available for your skill set.")
     st.subheader("🎓 Recommended Certifications")
+    if not certs.empty:
+        st.dataframe(certs)
+    else:
+        st.write("No certification recommendations available.")
     st.subheader("🎓 Higher Education Opportunities")
+    if not edu.empty:
+        st.dataframe(edu)
+    else:
+        st.write("No higher education opportunities available.")
     st.subheader("🎓 Scholarship Opportunities")
+    if not scholarships.empty:
+        st.dataframe(scholarships)
+    else:
+        st.write("No scholarships available for your field.")
+    # Dynamic roadmap timeline generation & display with checks
+    roadmap_df = create_dynamic_roadmap(skills, certs, scholarships, edu)
+    st.write("Roadmap DataFrame preview:")
+    st.dataframe(roadmap_df)
+    required_cols = {"Task", "Start", "Finish"}
+    if not roadmap_df.empty and required_cols.issubset(roadmap_df.columns):
+        fig = px.timeline(
+            roadmap_df,
+            x_start="Start",
+            x_end="Finish",
+            y="Task",
+            title="Career Roadmap Timeline"
+        )
+        fig.update_yaxes(autorange="reversed")
+        st.plotly_chart(fig, use_container_width=True)
+    else:
+        st.warning("No roadmap tasks to display or roadmap data missing required columns.")
+    # Show live job listings using first identified skill and first country code
     if skills and not country_info.empty:
         st.subheader(f"🔍 Live Job Listings for '{skills[0]}'")
         country_code_map = {
+            "USA": "us",
+            "Canada": "ca",
+            "UK": "gb",
+            "Germany": "de",
+            "Australia": "au",
+            "India": "in",
+            "Netherlands": "nl"
         }
         country_code = country_code_map.get(country_info.iloc[0]["Country"], "us")
+        jobs = fetch_jobs(skills[0], country_code=country_code, max_results=5)
         if jobs:
             for job in jobs:
                 st.markdown(f"**[{job['title']}]({job['redirect_url']})** - {job['location']['display_name']}")