Danial7 commited on
Commit
ec9d8ec
Β·
verified Β·
1 Parent(s): 1fe7fa3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -50
app.py CHANGED
@@ -1,13 +1,15 @@
1
  import streamlit as st
2
  import pandas as pd
3
- import plotly.express as px
4
  import spacy
5
  import requests
 
6
  from datetime import datetime, timedelta
7
- from extractor import extract_text_from_pdf, extract_entities
8
 
9
- # Config
10
  st.set_page_config(page_title="Skill Scoring & Career Roadmap App", layout="wide")
 
 
11
  nlp = spacy.load("en_core_web_sm")
12
 
13
  # Load datasets
@@ -19,6 +21,18 @@ edu_non_tech_df = pd.read_csv("data/education_non_technical.csv")
19
  scholarship_df = pd.read_csv("data/scholarships_dataset.csv")
20
 
21
  # Helper functions
 
 
 
 
 
 
 
 
 
 
 
 
22
  def score_skills(user_skills):
23
  if not skills_df.shape[0]:
24
  return 0
@@ -33,11 +47,9 @@ def recommend_certifications(skills):
33
  return cert_df[cert_df['Skill'].isin(skills)].reset_index(drop=True)
34
 
35
  def recommend_education(background):
36
- return edu_tech_df if background == "technical" else edu_non_tech_df
37
 
38
  def recommend_scholarships(field):
39
- if "Field" not in scholarship_df.columns:
40
- return pd.DataFrame()
41
  return scholarship_df[scholarship_df["Field"].str.lower() == field.lower()].reset_index(drop=True)
42
 
43
  def fetch_jobs(skill, country_code="us", max_results=5):
@@ -54,60 +66,57 @@ def fetch_jobs(skill, country_code="us", max_results=5):
54
  response = requests.get(url, params=params)
55
  if response.status_code == 200:
56
  return response.json()["results"]
57
- return []
 
58
 
59
  def create_dynamic_roadmap(skills, certs, scholarships, edu_opps):
60
  now = datetime.now()
61
  roadmap = []
62
 
63
- # Detect a valid certification column
64
- cert_col = next((col for col in certs.columns if col.lower() in ["certification", "name", "title"]), None)
65
- if cert_col:
66
- for i, cert in enumerate(certs[cert_col].dropna().tolist()[:2]):
67
  roadmap.append({
68
- "Task": f"Complete {cert}",
69
- "Start": (now + timedelta(days=i * 30)).strftime("%Y-%m-%d"),
70
- "Finish": (now + timedelta(days=(i + 1) * 30)).strftime("%Y-%m-%d")
71
  })
72
 
73
- # Detect a valid scholarship column
74
- scholarship_col = next((col for col in scholarships.columns if col.lower() in ["scholarship", "name", "title"]), None)
75
- if scholarship_col:
76
- for i, scholarship in enumerate(scholarships[scholarship_col].dropna().tolist()[:2]):
77
  roadmap.append({
78
- "Task": f"Apply for {scholarship}",
79
- "Start": (now + timedelta(days=90 + i * 30)).strftime("%Y-%m-%d"),
80
- "Finish": (now + timedelta(days=120 + i * 30)).strftime("%Y-%m-%d")
81
  })
82
 
83
- # Detect a valid education column
84
- edu_col = next((col for col in edu_opps.columns if col.lower() in ["program", "course", "degree", "title"]), None)
85
- if edu_col:
86
- for i, degree in enumerate(edu_opps[edu_col].dropna().tolist()[:1]):
87
  roadmap.append({
88
- "Task": f"Pursue {degree}",
89
- "Start": (now + timedelta(days=180)).strftime("%Y-%m-%d"),
90
- "Finish": (now + timedelta(days=720)).strftime("%Y-%m-%d")
91
  })
92
 
93
  return pd.DataFrame(roadmap)
94
 
95
-
96
- # UI
97
  st.title("πŸ“Š Personalized Skill Scoring & Career Roadmap App")
98
- st.markdown("Upload your CV and get a detailed roadmap with live job listings.")
99
 
100
  uploaded_file = st.file_uploader("πŸ“€ Upload your CV (PDF only)", type=["pdf"])
101
 
102
  if uploaded_file:
103
  with st.spinner("Analyzing your CV..."):
104
  text = extract_text_from_pdf(uploaded_file)
105
- skills, background, years_exp = extract_entities(text, skills_df)
106
  score = score_skills(skills)
107
  country_info = recommend_countries(skills, years_exp)
108
  certs = recommend_certifications(skills)
109
- edu_opps = recommend_education(background)
110
- field = background
111
  scholarships = recommend_scholarships(field)
112
 
113
  st.subheader("βœ… Identified Skills")
@@ -117,33 +126,62 @@ if uploaded_file:
117
  st.metric("Your Skill Score", f"{score}/100")
118
 
119
  st.subheader("🌍 Job Opportunities & Country Recommendations")
120
- st.dataframe(country_info if not country_info.empty else pd.DataFrame(columns=["Country", "JobTitle", "AverageSalary", "VisaPath"]))
 
 
 
121
 
122
  st.subheader("πŸŽ“ Recommended Certifications")
123
- st.dataframe(certs if not certs.empty else pd.DataFrame(columns=["Certification", "Skill"]))
 
 
 
124
 
125
  st.subheader("πŸŽ“ Higher Education Opportunities")
126
- st.dataframe(edu_opps)
 
 
 
127
 
128
  st.subheader("πŸŽ“ Scholarship Opportunities")
129
- st.dataframe(scholarships if not scholarships.empty else pd.DataFrame(columns=["Scholarship", "Field"]))
130
-
131
- # Timeline chart
132
- st.subheader("πŸ›€οΈ Career Roadmap Timeline")
133
- roadmap_df = create_dynamic_roadmap(skills, certs, scholarships, edu_opps)
134
- fig = px.timeline(roadmap_df, x_start="Start", x_end="Finish", y="Task", title="Career Roadmap Timeline")
135
- fig.update_yaxes(autorange="reversed")
136
- st.plotly_chart(fig, use_container_width=True)
137
-
138
- # Live job listings
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  if skills and not country_info.empty:
140
  st.subheader(f"πŸ” Live Job Listings for '{skills[0]}'")
141
  country_code_map = {
142
- "USA": "us", "Canada": "ca", "UK": "gb", "Germany": "de",
143
- "Australia": "au", "India": "in", "Netherlands": "nl"
 
 
 
 
 
144
  }
145
  country_code = country_code_map.get(country_info.iloc[0]["Country"], "us")
146
- jobs = fetch_jobs(skills[0], country_code=country_code)
147
  if jobs:
148
  for job in jobs:
149
  st.markdown(f"**[{job['title']}]({job['redirect_url']})** - {job['location']['display_name']}")
 
1
  import streamlit as st
2
  import pandas as pd
3
+ import pdfplumber
4
  import spacy
5
  import requests
6
+ import plotly.express as px
7
  from datetime import datetime, timedelta
 
8
 
9
+ # Page config
10
  st.set_page_config(page_title="Skill Scoring & Career Roadmap App", layout="wide")
11
+
12
+ # Load spaCy model
13
  nlp = spacy.load("en_core_web_sm")
14
 
15
  # Load datasets
 
21
  scholarship_df = pd.read_csv("data/scholarships_dataset.csv")
22
 
23
  # Helper functions
24
+ def extract_text_from_pdf(file):
25
+ with pdfplumber.open(file) as pdf:
26
+ return "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
27
+
28
+ def extract_entities(text):
29
+ doc = nlp(text)
30
+ skills = [token.text for token in doc if token.text in skills_df['Skill'].values]
31
+ technical_skills = {"Python", "Machine Learning", "Cloud Computing", "Cybersecurity", "AI", "DevOps"}
32
+ background = "technical" if any(s in technical_skills for s in skills) else "non-technical"
33
+ years_exp = 3 # Placeholder, replace with better extraction logic
34
+ return list(set(skills)), background, years_exp
35
+
36
  def score_skills(user_skills):
37
  if not skills_df.shape[0]:
38
  return 0
 
47
  return cert_df[cert_df['Skill'].isin(skills)].reset_index(drop=True)
48
 
49
  def recommend_education(background):
50
+ return edu_tech_df.reset_index(drop=True) if background == "technical" else edu_non_tech_df.reset_index(drop=True)
51
 
52
  def recommend_scholarships(field):
 
 
53
  return scholarship_df[scholarship_df["Field"].str.lower() == field.lower()].reset_index(drop=True)
54
 
55
  def fetch_jobs(skill, country_code="us", max_results=5):
 
66
  response = requests.get(url, params=params)
67
  if response.status_code == 200:
68
  return response.json()["results"]
69
+ else:
70
+ return []
71
 
72
  def create_dynamic_roadmap(skills, certs, scholarships, edu_opps):
73
  now = datetime.now()
74
  roadmap = []
75
 
76
+ # Add certifications to roadmap
77
+ if not certs.empty and "Certification" in certs.columns:
78
+ for i, cert in enumerate(certs['Certification'].tolist()[:2]):
 
79
  roadmap.append({
80
+ "Task": f"Complete Certification: {cert}",
81
+ "Start": (now + timedelta(days=i*30)).strftime("%Y-%m-%d"),
82
+ "Finish": (now + timedelta(days=(i+1)*30)).strftime("%Y-%m-%d"),
83
  })
84
 
85
+ # Add scholarships to roadmap
86
+ if not scholarships.empty and "Scholarship" in scholarships.columns:
87
+ for i, scholarship in enumerate(scholarships['Scholarship'].tolist()[:2]):
 
88
  roadmap.append({
89
+ "Task": f"Apply for Scholarship: {scholarship}",
90
+ "Start": (now + timedelta(days=60 + i*30)).strftime("%Y-%m-%d"),
91
+ "Finish": (now + timedelta(days=90 + i*30)).strftime("%Y-%m-%d"),
92
  })
93
 
94
+ # Add education opportunities to roadmap
95
+ if not edu_opps.empty and "Program" in edu_opps.columns:
96
+ for i, edu in enumerate(edu_opps['Program'].tolist()[:1]):
 
97
  roadmap.append({
98
+ "Task": f"Pursue Education: {edu}",
99
+ "Start": (now + timedelta(days=120)).strftime("%Y-%m-%d"),
100
+ "Finish": (now + timedelta(days=480)).strftime("%Y-%m-%d"),
101
  })
102
 
103
  return pd.DataFrame(roadmap)
104
 
105
+ # Streamlit UI
 
106
  st.title("πŸ“Š Personalized Skill Scoring & Career Roadmap App")
107
+ st.markdown("Upload your CV and get a detailed career roadmap with live job listings.")
108
 
109
  uploaded_file = st.file_uploader("πŸ“€ Upload your CV (PDF only)", type=["pdf"])
110
 
111
  if uploaded_file:
112
  with st.spinner("Analyzing your CV..."):
113
  text = extract_text_from_pdf(uploaded_file)
114
+ skills, background, years_exp = extract_entities(text)
115
  score = score_skills(skills)
116
  country_info = recommend_countries(skills, years_exp)
117
  certs = recommend_certifications(skills)
118
+ edu = recommend_education(background)
119
+ field = background # Simplified; you should detect actual field from CV
120
  scholarships = recommend_scholarships(field)
121
 
122
  st.subheader("βœ… Identified Skills")
 
126
  st.metric("Your Skill Score", f"{score}/100")
127
 
128
  st.subheader("🌍 Job Opportunities & Country Recommendations")
129
+ if not country_info.empty:
130
+ st.dataframe(country_info)
131
+ else:
132
+ st.write("No country/job recommendations available for your skill set.")
133
 
134
  st.subheader("πŸŽ“ Recommended Certifications")
135
+ if not certs.empty:
136
+ st.dataframe(certs)
137
+ else:
138
+ st.write("No certification recommendations available.")
139
 
140
  st.subheader("πŸŽ“ Higher Education Opportunities")
141
+ if not edu.empty:
142
+ st.dataframe(edu)
143
+ else:
144
+ st.write("No higher education opportunities available.")
145
 
146
  st.subheader("πŸŽ“ Scholarship Opportunities")
147
+ if not scholarships.empty:
148
+ st.dataframe(scholarships)
149
+ else:
150
+ st.write("No scholarships available for your field.")
151
+
152
+ # Dynamic roadmap timeline generation & display with checks
153
+ roadmap_df = create_dynamic_roadmap(skills, certs, scholarships, edu)
154
+ st.write("Roadmap DataFrame preview:")
155
+ st.dataframe(roadmap_df)
156
+
157
+ required_cols = {"Task", "Start", "Finish"}
158
+ if not roadmap_df.empty and required_cols.issubset(roadmap_df.columns):
159
+ fig = px.timeline(
160
+ roadmap_df,
161
+ x_start="Start",
162
+ x_end="Finish",
163
+ y="Task",
164
+ title="Career Roadmap Timeline"
165
+ )
166
+ fig.update_yaxes(autorange="reversed")
167
+ st.plotly_chart(fig, use_container_width=True)
168
+ else:
169
+ st.warning("No roadmap tasks to display or roadmap data missing required columns.")
170
+
171
+ # Show live job listings using first identified skill and first country code
172
  if skills and not country_info.empty:
173
  st.subheader(f"πŸ” Live Job Listings for '{skills[0]}'")
174
  country_code_map = {
175
+ "USA": "us",
176
+ "Canada": "ca",
177
+ "UK": "gb",
178
+ "Germany": "de",
179
+ "Australia": "au",
180
+ "India": "in",
181
+ "Netherlands": "nl"
182
  }
183
  country_code = country_code_map.get(country_info.iloc[0]["Country"], "us")
184
+ jobs = fetch_jobs(skills[0], country_code=country_code, max_results=5)
185
  if jobs:
186
  for job in jobs:
187
  st.markdown(f"**[{job['title']}]({job['redirect_url']})** - {job['location']['display_name']}")