Danial7 commited on
Commit
bba628e
Β·
verified Β·
1 Parent(s): 90abcef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -117
app.py CHANGED
@@ -1,132 +1,97 @@
 
1
  import streamlit as st
2
- st.set_page_config(page_title="Skill Scoring App", layout="wide") # FIRST!
3
-
4
- # All other imports
5
  import pdfplumber
6
  import pandas as pd
7
  import numpy as np
8
- from transformers import pipeline
9
- from sklearn.feature_extraction.text import TfidfVectorizer
10
- from PIL import Image
11
-
12
- # --- Skill & Country Setup ---
13
- in_demand_skills = [
14
- "Python", "Machine Learning", "Project Management", "Data Analysis", "Communication",
15
- "Leadership", "Cloud Computing", "Cybersecurity", "AI", "DevOps"
16
- ]
17
-
18
- country_salary_data = {
19
- "USA": (90000, 150000),
20
- "Germany": (60000, 100000),
21
- "Canada": (70000, 110000),
22
- "UK": (65000, 95000),
23
- "India": (20000, 35000)
24
- }
25
-
26
- # --- Load Classifier ---
27
- def load_classifier():
28
- return pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
29
-
30
- classifier = load_classifier() # No decorator used to avoid early Streamlit calls
31
 
32
  # --- Functions ---
33
  def extract_text(uploaded_file):
34
  with pdfplumber.open(uploaded_file) as pdf:
35
  return "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
36
 
37
- def classify_skills(text, candidate_skills):
38
- result = classifier(text, candidate_labels=candidate_skills, multi_label=True)
39
- scores = dict(zip(result['labels'], result['scores']))
40
- return {skill: round(score, 2) for skill, score in scores.items() if score > 0.4}
41
-
42
- def score_user_skills(user_skills):
43
- if not user_skills:
44
- return 0
45
- return int(np.clip(len(user_skills) / len(in_demand_skills) * 100, 0, 100))
46
-
47
- def suggest_country_and_salary(score):
48
- if score >= 80:
49
- return "USA", country_salary_data["USA"]
50
- elif score >= 60:
51
- return "Germany", country_salary_data["Germany"]
52
- elif score >= 50:
53
- return "Canada", country_salary_data["Canada"]
54
- elif score >= 40:
55
- return "UK", country_salary_data["UK"]
56
- else:
57
- return "India", country_salary_data["India"]
58
-
59
- def improvement_tips(user_skills):
60
- missing = [skill for skill in in_demand_skills if skill not in user_skills]
61
- suggestions = {
62
- "Python": "Take free Python courses at Coursera or edX.",
63
- "Machine Learning": "Try fast.ai or Andrew Ng’s ML course on Coursera.",
64
- "Cloud Computing": "Get certified in AWS or Azure (many free intro courses).",
65
- "Communication": "Join Toastmasters or attend webinars on soft skills.",
66
- "Leadership": "Look for Udemy or LinkedIn Learning leadership programs.",
67
- }
68
- return [suggestions[s] for s in missing if s in suggestions]
69
-
70
- def generate_report(score, country, salary_range, skills, tips):
71
- report = f"""Skill Score Report\n\n
72
- Skill Score: {score}/100
73
- Suggested Country: {country}
74
- Estimated Salary: ${salary_range[0]:,} - ${salary_range[1]:,} USD
75
-
76
- Skills Identified:\n"""
77
- for skill, val in skills.items():
78
- report += f" - {skill}: {val}\n"
79
-
80
- report += "\nImprovement Suggestions:\n"
81
- for tip in tips:
82
- report += f" - {tip}\n"
83
-
84
- return report
85
-
86
- # --- UI Banner ---
87
- st.markdown(
88
- """
89
- <div style="background-color:#f0f4ff;padding:20px;border-radius:12px;margin-bottom:25px">
90
- <h1 style="color:#003366;text-align:center;">πŸš€ Skill Scoring & Career Guidance App</h1>
91
- <p style="text-align:center;font-size:18px;color:#333;">
92
- Upload your CV to discover your global job potential β€” get personalized skill scores, salary insights,
93
- job location recommendations, and improvement suggestions. All for free.
94
- </p>
95
  </div>
96
- """,
97
- unsafe_allow_html=True
98
- )
99
 
100
- # --- File Upload ---
101
- uploaded_file = st.file_uploader("πŸ“€ Upload your CV (PDF)", type="pdf")
102
 
103
  if uploaded_file:
104
- st.success("CV uploaded successfully.")
105
- with st.spinner("Analyzing your skills..."):
106
- cv_text = extract_text(uploaded_file)
107
- user_skills = classify_skills(cv_text, in_demand_skills)
108
- score = score_user_skills(user_skills)
109
- country, (min_salary, max_salary) = suggest_country_and_salary(score)
110
- tips = improvement_tips(user_skills)
111
- report_text = generate_report(score, country, (min_salary, max_salary), user_skills, tips)
112
-
113
- st.subheader("πŸ” Skill Match")
114
- st.write(user_skills)
115
-
116
- st.subheader("πŸ“ˆ Skill Score")
117
- st.metric(label="Your Skill Score", value=f"{score}/100")
118
-
119
- st.subheader("🌍 Suggested Country & Salary")
120
- st.markdown(f"**Suggested Country:** {country}")
121
- st.markdown(f"**Estimated Salary Range:** ${min_salary:,} - ${max_salary:,} USD")
122
-
123
- st.subheader("πŸ› οΈ Skill Improvement Suggestions")
124
- if tips:
125
- for tip in tips:
126
- st.markdown(f"- {tip}")
127
- else:
128
- st.success("You're already covering the top in-demand skills!")
129
-
130
- st.download_button("πŸ“₯ Download Report as TXT", report_text, file_name="skill_score_report.txt")
131
  else:
132
- st.info("Please upload your CV (PDF) to begin.")
 
1
+ # --- Skill Scoring Streamlit App ---
2
  import streamlit as st
 
 
 
3
  import pdfplumber
4
  import pandas as pd
5
  import numpy as np
6
+ import spacy.cli
7
+ spacy.cli.download("en_core_web_sm")
8
+ import spacy
9
+ import torch
10
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
11
+ from sentence_transformers import SentenceTransformer, util
12
+
13
+ # --- Page Config ---
14
+ st.set_page_config(page_title="Skill Scoring App", layout="wide")
15
+
16
+ # --- Load NLP Models ---
17
+ nlp = spacy.load("en_core_web_sm")
18
+ sentence_model = SentenceTransformer("all-MiniLM-L6-v2")
19
+ summarizer = pipeline("summarization", model="google/flan-t5-base", tokenizer="google/flan-t5-base")
20
+
21
+ # --- Load Datasets ---
22
+ skills_df = pd.read_csv("skills_dataset.csv")
23
+ countries_df = pd.read_csv("countries_dataset.csv")
24
+ certs_df = pd.read_csv("certifications.csv")
25
+ edu_df = pd.read_csv("education_opportunities.csv")
 
 
 
26
 
27
  # --- Functions ---
28
  def extract_text(uploaded_file):
29
  with pdfplumber.open(uploaded_file) as pdf:
30
  return "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
31
 
32
+ def summarize_cv(text):
33
+ inputs = text[:3000] # flan-t5 input token limit workaround
34
+ summary = summarizer(inputs, max_length=200, min_length=50, do_sample=False)
35
+ return summary[0]['summary_text']
36
+
37
+ def extract_entities(text):
38
+ doc = nlp(text)
39
+ skills = [ent.text for ent in doc.ents if ent.label_ in ["ORG", "SKILL"]]
40
+ education = [ent.text for ent in doc.ents if ent.label_ == "EDUCATION"]
41
+ return list(set(skills)), list(set(education))
42
+
43
+ def match_skills(cv_text):
44
+ embeddings = sentence_model.encode([cv_text] + skills_df['Skill'].tolist(), convert_to_tensor=True)
45
+ cosine_scores = util.pytorch_cos_sim(embeddings[0], embeddings[1:])[0]
46
+ top_results = torch.topk(cosine_scores, k=10)
47
+ matched_skills = [skills_df['Skill'].iloc[idx] for idx in top_results.indices]
48
+ return matched_skills
49
+
50
+ def recommend_certifications(matched_skills):
51
+ return certs_df[certs_df['Skill'].isin(matched_skills)].drop_duplicates('Certification')
52
+
53
+ def recommend_countries_and_salary(matched_skills):
54
+ matched_df = countries_df[countries_df['Skill'].isin(matched_skills)]
55
+ return matched_df.groupby('Country').agg({"AverageSalary": "mean", "VisaPath": "first"}).reset_index()
56
+
57
+ def recommend_education(edu_background):
58
+ matches = edu_df[edu_df['Background'].str.contains(edu_background, case=False, na=False)]
59
+ return matches
60
+
61
+ # --- UI ---
62
+ st.markdown("""
63
+ <div style="background-color:#e3f2fd;padding:20px;border-radius:10px">
64
+ <h1 style="color:#0d47a1;text-align:center;">🎯 Global Skill Scorer & Career Recommender</h1>
65
+ <p style="text-align:center;font-size:18px">Upload your CV to get a personalized career growth plan, skill score, salary predictions, and global recommendations.</p>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  </div>
67
+ """, unsafe_allow_html=True)
 
 
68
 
69
+ uploaded_file = st.file_uploader("πŸ“„ Upload your CV (PDF only)", type="pdf")
 
70
 
71
  if uploaded_file:
72
+ with st.spinner("Analyzing your CV..."):
73
+ raw_text = extract_text(uploaded_file)
74
+ summary = summarize_cv(raw_text)
75
+ matched_skills = match_skills(summary)
76
+ certs = recommend_certifications(matched_skills)
77
+ country_salaries = recommend_countries_and_salary(matched_skills)
78
+ education_recos = recommend_education("technical") # defaulting to technical for now
79
+
80
+ st.subheader("πŸ“Œ Summary of Your CV")
81
+ st.info(summary)
82
+
83
+ st.subheader("πŸ’Ό Matched Skills")
84
+ st.write(matched_skills)
85
+
86
+ st.subheader("πŸŽ“ Suggested Certifications")
87
+ st.dataframe(certs)
88
+
89
+ st.subheader("🌍 Best Countries & Salaries")
90
+ st.dataframe(country_salaries)
91
+
92
+ st.subheader("πŸŽ“ Higher Education Options & Scholarships")
93
+ st.dataframe(education_recos)
94
+
95
+ st.success("βœ… Personalized plan generated successfully.")
 
 
 
96
  else:
97
+ st.info("Please upload your CV to begin.")