Danial7 commited on
Commit
41b2d80
Β·
verified Β·
1 Parent(s): 651799b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +124 -213
app.py CHANGED
@@ -1,219 +1,130 @@
1
  import streamlit as st
2
- import PyPDF2
3
- import pandas as pd
4
- import os
5
- from sklearn.feature_extraction.text import TfidfVectorizer
6
- from sklearn.metrics.pairwise import cosine_similarity
7
- from keybert import KeyBERT
8
- from datetime import datetime
9
- import plotly.express as px
10
  from fpdf import FPDF
11
- import requests
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  st.set_page_config(page_title="Universal Smart CV Analyzer", layout="wide")
 
 
 
 
 
14
 
15
- st.title("🌍 Universal Smart CV Analyzer & Career Roadmap")
16
- st.markdown("Upload your **CV (PDF)** to get personalized recommendations, skill score, and complete career roadmap.")
17
-
18
- uploaded_file = st.file_uploader("Upload your CV (PDF)", type=["pdf"])
19
-
20
- # Load datasets
21
- @st.cache_data
22
- def load_data():
23
- base_path = "data"
24
- certs = pd.read_csv(os.path.join(base_path, "certifications.csv"))
25
- scholarships = pd.read_csv(os.path.join(base_path, "scholarships.csv"))
26
- edu_tech = pd.read_csv(os.path.join(base_path, "education_technical.csv"))
27
- edu_nontech = pd.read_csv(os.path.join(base_path, "education_non_technical.csv"))
28
- visa_data = pd.read_csv(os.path.join(base_path, "countries_dataset.csv"))
29
- skills_data = pd.read_csv(os.path.join(base_path, "skills_dataset.csv"))
30
- return certs, scholarships, edu_tech, edu_nontech, visa_data, skills_data
31
-
32
- certs, scholarships, edu_tech, edu_nontech, visa_data, skills_data = load_data()
33
-
34
- # Extract text from PDF
35
- def extract_text_from_pdf(file):
36
- reader = PyPDF2.PdfReader(file)
37
- text = ""
38
- for page in reader.pages:
39
- text += page.extract_text()
40
- return text
41
-
42
- # Keyword extraction
43
- def extract_keywords(text, num_keywords=10):
44
- kw_model = KeyBERT()
45
- keywords = kw_model.extract_keywords(text, top_n=num_keywords, stop_words='english')
46
- return [kw[0].lower() for kw in keywords]
47
-
48
- # Field identification
49
- def identify_field(keywords):
50
- fields = {
51
- "Engineering": ["engineer", "mechanical", "electrical", "civil", "plc", "automation"],
52
- "Data Science": ["machine learning", "data", "python", "statistics", "ai"],
53
- "Software Development": ["developer", "software", "backend", "frontend", "javascript"],
54
- "Marketing": ["seo", "content", "marketing", "branding"],
55
- "Finance": ["accounting", "finance", "budget", "tax"],
56
- "Design": ["photoshop", "illustrator", "design", "creative"],
57
- "Healthcare": ["nursing", "surgery", "hospital", "patient"],
58
- "Construction": ["carpentry", "plumbing", "hvac", "gardening", "mining"]
59
- }
60
- scores = {field: len(set(keywords).intersection(terms)) for field, terms in fields.items()}
61
- return max(scores, key=scores.get)
62
-
63
- # Technical background
64
- def is_technical_background(keywords):
65
- tech_terms = ["engineer", "machine learning", "python", "developer", "software", "automation", "plc", "ai"]
66
- non_tech_terms = ["marketing", "finance", "content", "seo", "branding", "accounting", "creative"]
67
- tech_score = len(set(keywords).intersection(tech_terms))
68
- non_tech_score = len(set(keywords).intersection(non_tech_terms))
69
- return "Technical" if tech_score >= non_tech_score else "Non-Technical"
70
-
71
- # CV skill score
72
- def calculate_cv_score(text, keywords):
73
- ideal = " ".join(keywords)
74
- tfidf = TfidfVectorizer()
75
- tfidf_matrix = tfidf.fit_transform([text, ideal])
76
- score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
77
- return round(score * 100)
78
-
79
- # Field data filter
80
- def filter_data_by_field(df, field_col, field):
81
- return df[df[field_col].str.lower() == field.lower()]
82
-
83
- # Visa opportunities
84
- def suggest_visa_opportunities(keywords, visa_data):
85
- matched_rows = []
86
- for _, row in visa_data.iterrows():
87
- if any(skill.lower() in keywords for skill in row["Skill"].split(",")):
88
- matched_rows.append(row)
89
- return pd.DataFrame(matched_rows)
90
-
91
- # Upskilling suggestions (no reliance on 'Importance')
92
- def suggest_upskilling(keywords, skills_data):
93
- all_skills = set(skills_data["Skill"].str.lower())
94
- current_skills = set([kw.lower() for kw in keywords])
95
- missing_skills = all_skills - current_skills
96
- suggested = skills_data[skills_data["Skill"].str.lower().isin(missing_skills)]
97
- return suggested
98
-
99
- # 🎯 Job listings using Adzuna API
100
- def get_job_listings(keywords, location="Pakistan", results_per_page=10):
101
- app_id = "f4efd3a2" # Replace with your Adzuna app_id
102
- app_key = "5702f3c0507ac69f98aa15f855b06901" # Replace with your Adzuna app_key
103
- base_url = "https://api.adzuna.com/v1/api/jobs/pk/search/1"
104
- query = " ".join(keywords)
105
-
106
- params = {
107
- "app_id": app_id,
108
- "app_key": app_key,
109
- "results_per_page": results_per_page,
110
- "what": query,
111
- "where": location,
112
- "content-type": "application/json"
113
- }
114
-
115
- try:
116
- response = requests.get(base_url, params=params)
117
- response.raise_for_status()
118
- jobs = response.json().get("results", [])
119
- return pd.DataFrame(jobs)
120
- except Exception as e:
121
- st.error(f"Error fetching job listings: {e}")
122
- return pd.DataFrame()
123
-
124
- # Timeline generation
125
- def generate_timeline(data=None):
126
- timeline = pd.DataFrame({
127
- "Task": ["Certifications", "Scholarships", "Education", "Visa Search"],
128
- "Start": ["2025-06-01", "2025-07-01", "2025-08-01", "2025-09-01"],
129
- "Finish": ["2025-06-30", "2025-07-30", "2025-09-30", "2025-10-15"]
130
- })
131
- fig = px.timeline(timeline, x_start="Start", x_end="Finish", y="Task", color="Task")
132
- fig.update_yaxes(categoryorder='total ascending')
133
- st.plotly_chart(fig)
134
-
135
- # PDF Report
136
- class PDF(FPDF):
137
- def header(self):
138
- self.set_font('Arial', 'B', 12)
139
- self.cell(0, 10, 'Career Roadmap Report', ln=True, align='C')
140
- def chapter_title(self, title):
141
- self.set_font('Arial', 'B', 10)
142
- self.cell(0, 10, title, ln=True)
143
- def chapter_body(self, body):
144
- self.set_font('Arial', '', 9)
145
- self.multi_cell(0, 10, body)
146
-
147
- def generate_pdf_report(field, score, keywords, upskills):
148
- pdf = PDF()
149
- pdf.add_page()
150
- pdf.chapter_title("Field: " + field)
151
- pdf.chapter_title("Score: " + str(score))
152
- pdf.chapter_body("Keywords: " + ", ".join(keywords))
153
- pdf.chapter_title("Suggested Upskilling:")
154
- pdf.chapter_body(", ".join(upskills))
155
- pdf.output("report.pdf")
156
- st.success("πŸ“„ PDF Report Generated: report.pdf")
157
-
158
- # 🌟 MAIN APP LOGIC
159
  if uploaded_file:
160
- text = extract_text_from_pdf(uploaded_file)
161
- st.success("βœ… CV Text Extracted")
162
-
163
- keywords = extract_keywords(text)
164
- st.subheader("πŸ”‘ Extracted Keywords")
165
- st.write(keywords)
166
-
167
- field = identify_field(keywords)
168
- st.subheader("🎯 Identified Field / Domain")
169
- st.write(field)
170
-
171
- tech_class = is_technical_background(keywords)
172
- st.subheader("🧠 CV Background Type")
173
- st.write(tech_class)
174
-
175
- score = calculate_cv_score(text, keywords)
176
- st.subheader("πŸ“Š CV Skill Score")
177
- st.metric(label="Score", value=f"{score}/100")
178
-
179
- st.subheader("πŸ“ˆ Suggested Skills to Acquire for Better Opportunities")
180
- missing_skills_df = suggest_upskilling(keywords, skills_data)
181
- if not missing_skills_df.empty:
182
- for skill in missing_skills_df["Skill"].head(10):
183
- st.write(f"πŸ”§ {skill}")
184
- missing_list = missing_skills_df["Skill"].tolist()
185
- else:
186
- st.write("You already have most in-demand skills covered!")
187
- missing_list = []
188
-
189
- st.subheader("πŸ“š Recommended Certifications")
190
- certs_field = filter_data_by_field(certs, "Field", field)
191
- st.dataframe(certs_field)
192
-
193
- st.subheader("πŸŽ“ Scholarships")
194
- scholarships_field = filter_data_by_field(scholarships, "Field", field)
195
- st.dataframe(scholarships_field)
196
-
197
- st.subheader("πŸŽ“ Education Opportunities")
198
- if tech_class == "Technical":
199
- edu_field = filter_data_by_field(edu_tech, "Field", field)
200
- else:
201
- edu_field = filter_data_by_field(edu_nontech, "Field", field)
202
- st.dataframe(edu_field)
203
-
204
- st.subheader("🌍 Visa Opportunities Based on Your Skills")
205
- visa_matches = suggest_visa_opportunities(keywords, visa_data)
206
- st.dataframe(visa_matches)
207
-
208
- st.subheader("πŸ’Ό Job Listings")
209
- job_df = get_job_listings(keywords)
210
- if not job_df.empty:
211
- st.dataframe(job_df[["title", "company", "location", "description"]])
212
- else:
213
- st.write("No job listings available right now.")
214
-
215
- st.subheader("πŸ—“οΈ Personalized Timeline")
216
- generate_timeline()
217
-
218
- if st.button("πŸ“„ Generate PDF Report"):
219
- generate_pdf_report(field, score, keywords, missing_list)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from PyPDF2 import PdfReader
 
 
 
 
 
 
 
3
  from fpdf import FPDF
4
+ import os
5
+
6
+ from utils import (
7
+ extract_keywords,
8
+ identify_field,
9
+ is_technical_background,
10
+ calculate_cv_score,
11
+ suggest_upskilling,
12
+ suggest_certifications,
13
+ suggest_scholarships,
14
+ suggest_education_opportunities,
15
+ suggest_visa_opportunities,
16
+ get_job_listings
17
+ )
18
 
19
  st.set_page_config(page_title="Universal Smart CV Analyzer", layout="wide")
20
+ st.title("πŸ“„ Universal Smart CV Analyzer & Career Roadmap")
21
+ st.markdown("Upload your CV in PDF format to get a complete personalized analysis and roadmap.")
22
+
23
+ # Upload PDF
24
+ uploaded_file = st.file_uploader("Upload your CV", type="pdf")
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  if uploaded_file:
27
+ with st.spinner("Reading and analyzing your CV..."):
28
+ pdf = PdfReader(uploaded_file)
29
+ text = ""
30
+ for page in pdf.pages:
31
+ text += page.extract_text() or ""
32
+
33
+ # Extract keywords
34
+ keywords = extract_keywords(text)
35
+ st.subheader("πŸ” Extracted Keywords")
36
+ st.write(", ".join(keywords))
37
+
38
+ # Identify field
39
+ field = identify_field(keywords)
40
+ st.subheader("🧠 Predicted Field")
41
+ st.write(f"**{field}**")
42
+
43
+ # Score the CV
44
+ score = calculate_cv_score(text, keywords)
45
+ st.subheader("πŸ“Š CV Score")
46
+ st.metric(label="Skill Match Score", value=f"{score}/100")
47
+
48
+ # Determine technical background
49
+ background = is_technical_background(keywords)
50
+ st.subheader("πŸ”§ Technical Background")
51
+ st.write(f"**{background}**")
52
+
53
+ # Suggestions Section
54
+ st.subheader("πŸš€ Suggested Upskilling")
55
+ upskills = suggest_upskilling(keywords)
56
+ st.write(upskills if upskills else "No suggestions found.")
57
+
58
+ st.subheader("πŸŽ“ Certifications")
59
+ certifications = suggest_certifications(keywords)
60
+ st.write(certifications if certifications else "No certifications found.")
61
+
62
+ st.subheader("πŸ’Έ Scholarships")
63
+ scholarships = suggest_scholarships(keywords)
64
+ st.write(scholarships if scholarships else "No scholarships found.")
65
+
66
+ st.subheader("🏫 Education Opportunities")
67
+ education = suggest_education_opportunities(keywords)
68
+ st.write(education if education else "No educational programs found.")
69
+
70
+ st.subheader("🌍 Visa Opportunities")
71
+ visas = suggest_visa_opportunities(keywords)
72
+ st.write(visas if visas else "No visa opportunities found.")
73
+
74
+ st.subheader("πŸ’Ό Job Listings")
75
+ job_df = get_job_listings(keywords, location="Pakistan")
76
+ if not job_df.empty:
77
+ st.dataframe(job_df)
78
+ else:
79
+ st.write("No jobs found.")
80
+
81
+ # PDF Report Generator
82
+ st.subheader("πŸ“₯ Generate PDF Report")
83
+
84
+ class PDF(FPDF):
85
+ def chapter_title(self, title):
86
+ self.set_font("Arial", "B", 12)
87
+ self.set_fill_color(220, 220, 220)
88
+ self.cell(0, 10, title, ln=True, fill=True)
89
+
90
+ def chapter_body(self, body):
91
+ self.set_font("Arial", "", 11)
92
+ self.multi_cell(0, 10, body)
93
+ self.ln()
94
+
95
+ if st.button("Generate & Download Report"):
96
+ with st.spinner("Generating PDF report..."):
97
+ pdf = PDF()
98
+ pdf.add_page()
99
+ pdf.set_title("CV Analysis Report")
100
+ pdf.chapter_title("πŸ“„ CV Analysis Report")
101
+ pdf.chapter_title("Predicted Field:")
102
+ pdf.chapter_body(field)
103
+ pdf.chapter_title("Skill Match Score:")
104
+ pdf.chapter_body(f"{score}/100")
105
+ pdf.chapter_title("Technical Background:")
106
+ pdf.chapter_body(background)
107
+ pdf.chapter_title("Extracted Keywords:")
108
+ pdf.chapter_body(", ".join(keywords))
109
+ pdf.chapter_title("Suggested Upskilling:")
110
+ pdf.chapter_body(", ".join(upskills))
111
+ pdf.chapter_title("Certifications:")
112
+ pdf.chapter_body(", ".join(certifications))
113
+ pdf.chapter_title("Scholarships:")
114
+ pdf.chapter_body(", ".join(scholarships))
115
+ pdf.chapter_title("Education Opportunities:")
116
+ pdf.chapter_body(", ".join(education))
117
+ pdf.chapter_title("Visa Opportunities:")
118
+ pdf.chapter_body(", ".join(visas))
119
+
120
+ output_path = "cv_analysis_report.pdf"
121
+ pdf.output(output_path)
122
+
123
+ with open(output_path, "rb") as f:
124
+ base64_pdf = f.read()
125
+ st.download_button(
126
+ label="πŸ“„ Download CV Report",
127
+ data=base64_pdf,
128
+ file_name="cv_analysis_report.pdf",
129
+ mime="application/pdf",
130
+ )