Danial7 commited on
Commit
60afec2
Β·
verified Β·
1 Parent(s): 6566acb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -75
app.py CHANGED
@@ -1,97 +1,95 @@
1
- # --- Skill Scoring Streamlit App ---
2
  import streamlit as st
3
- import pdfplumber
4
  import pandas as pd
5
- import numpy as np
6
- import spacy.cli
7
- spacy.cli.download("en_core_web_sm")
8
  import spacy
9
- import torch
10
- from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
11
- from sentence_transformers import SentenceTransformer, util
12
 
13
- # --- Page Config ---
14
  st.set_page_config(page_title="Skill Scoring App", layout="wide")
15
 
16
- # --- Load NLP Models ---
 
 
17
  nlp = spacy.load("en_core_web_sm")
18
- sentence_model = SentenceTransformer("all-MiniLM-L6-v2")
19
- summarizer = pipeline("summarization", model="google/flan-t5-base", tokenizer="google/flan-t5-base")
20
 
21
- # --- Load Datasets ---
22
  skills_df = pd.read_csv("skills_dataset.csv")
23
  countries_df = pd.read_csv("countries_dataset.csv")
24
- certs_df = pd.read_csv("certifications.csv")
25
- edu_df = pd.read_csv("education_opportunities.csv")
26
-
27
- # --- Functions ---
28
- def extract_text(uploaded_file):
29
- with pdfplumber.open(uploaded_file) as pdf:
30
- return "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
31
-
32
- def summarize_cv(text):
33
- inputs = text[:3000] # flan-t5 input token limit workaround
34
- summary = summarizer(inputs, max_length=200, min_length=50, do_sample=False)
35
- return summary[0]['summary_text']
36
 
37
  def extract_entities(text):
38
  doc = nlp(text)
39
- skills = [ent.text for ent in doc.ents if ent.label_ in ["ORG", "SKILL"]]
40
- education = [ent.text for ent in doc.ents if ent.label_ == "EDUCATION"]
41
- return list(set(skills)), list(set(education))
42
-
43
- def match_skills(cv_text):
44
- embeddings = sentence_model.encode([cv_text] + skills_df['Skill'].tolist(), convert_to_tensor=True)
45
- cosine_scores = util.pytorch_cos_sim(embeddings[0], embeddings[1:])[0]
46
- top_results = torch.topk(cosine_scores, k=10)
47
- matched_skills = [skills_df['Skill'].iloc[idx] for idx in top_results.indices]
48
- return matched_skills
49
-
50
- def recommend_certifications(matched_skills):
51
- return certs_df[certs_df['Skill'].isin(matched_skills)].drop_duplicates('Certification')
52
-
53
- def recommend_countries_and_salary(matched_skills):
54
- matched_df = countries_df[countries_df['Skill'].isin(matched_skills)]
55
- return matched_df.groupby('Country').agg({"AverageSalary": "mean", "VisaPath": "first"}).reset_index()
56
-
57
- def recommend_education(edu_background):
58
- matches = edu_df[edu_df['Background'].str.contains(edu_background, case=False, na=False)]
59
- return matches
60
-
61
- # --- UI ---
62
- st.markdown("""
63
- <div style="background-color:#e3f2fd;padding:20px;border-radius:10px">
64
- <h1 style="color:#0d47a1;text-align:center;">🎯 Global Skill Scorer & Career Recommender</h1>
65
- <p style="text-align:center;font-size:18px">Upload your CV to get a personalized career growth plan, skill score, salary predictions, and global recommendations.</p>
66
- </div>
67
- """, unsafe_allow_html=True)
68
-
69
- uploaded_file = st.file_uploader("πŸ“„ Upload your CV (PDF only)", type="pdf")
70
 
71
- if uploaded_file:
72
- with st.spinner("Analyzing your CV..."):
73
- raw_text = extract_text(uploaded_file)
74
- summary = summarize_cv(raw_text)
75
- matched_skills = match_skills(summary)
76
- certs = recommend_certifications(matched_skills)
77
- country_salaries = recommend_countries_and_salary(matched_skills)
78
- education_recos = recommend_education("technical") # defaulting to technical for now
79
 
80
- st.subheader("πŸ“Œ Summary of Your CV")
81
- st.info(summary)
 
82
 
83
- st.subheader("πŸ’Ό Matched Skills")
84
- st.write(matched_skills)
85
 
86
- st.subheader("πŸŽ“ Suggested Certifications")
87
- st.dataframe(certs)
88
 
89
- st.subheader("🌍 Best Countries & Salaries")
90
- st.dataframe(country_salaries)
 
 
 
91
 
92
- st.subheader("πŸŽ“ Higher Education Options & Scholarships")
93
- st.dataframe(education_recos)
94
 
95
- st.success("βœ… Personalized plan generated successfully.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  else:
97
  st.info("Please upload your CV to begin.")
 
 
1
  import streamlit as st
 
2
  import pandas as pd
3
+ import pdfplumber
 
 
4
  import spacy
 
 
 
5
 
6
+ # Set page config at the top
7
  st.set_page_config(page_title="Skill Scoring App", layout="wide")
8
 
9
+ # Load spaCy model
10
+ import spacy.cli
11
+ spacy.cli.download("en_core_web_sm")
12
  nlp = spacy.load("en_core_web_sm")
 
 
13
 
14
+ # Load datasets
15
  skills_df = pd.read_csv("skills_dataset.csv")
16
  countries_df = pd.read_csv("countries_dataset.csv")
17
+ cert_df = pd.read_csv("certifications.csv")
18
+ edu_tech_df = pd.read_csv("education_technical.csv")
19
+ edu_non_tech_df = pd.read_csv("education_non_technical.csv")
20
+
21
+ # Helper functions
22
+ def extract_text_from_pdf(file):
23
+ with pdfplumber.open(file) as pdf:
24
+ return "\n".join(
25
+ page.extract_text()
26
+ for page in pdf.pages
27
+ if page.extract_text()
28
+ )
29
 
30
  def extract_entities(text):
31
  doc = nlp(text)
32
+ # Identify skills present in the CV
33
+ skills = [token.text for token in doc if token.text in skills_df['Skill'].values]
34
+ # Determine technical vs non‑technical background
35
+ technical_skills = {"Python", "Machine Learning", "Cloud Computing", "Cybersecurity", "AI", "DevOps"}
36
+ background = "technical" if any(s in technical_skills for s in skills) else "non-technical"
37
+ return list(set(skills)), background
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
+ def score_skills(user_skills):
40
+ if not skills_df.shape[0]:
41
+ return 0
42
+ return int((len(user_skills) / len(skills_df)) * 100)
 
 
 
 
43
 
44
+ def recommend_countries(skills):
45
+ df = countries_df[countries_df['Skill'].isin(skills)]
46
+ return df[["Country", "AverageSalary", "VisaPath"]].drop_duplicates().reset_index(drop=True)
47
 
48
+ def recommend_certifications(skills):
49
+ return cert_df[cert_df['Skill'].isin(skills)].reset_index(drop=True)
50
 
51
+ def recommend_education(background):
52
+ return edu_tech_df.reset_index(drop=True) if background == "technical" else edu_non_tech_df.reset_index(drop=True)
53
 
54
+ # Streamlit UI
55
+ st.title("πŸ“Š Personalized Skill Scoring & Career Path App")
56
+ st.markdown(
57
+ "Upload your CV and get a personalized career guide based on your skills and background."
58
+ )
59
 
60
+ uploaded_file = st.file_uploader("πŸ“€ Upload your CV (PDF format only)", type=["pdf"])
 
61
 
62
+ if uploaded_file:
63
+ with st.spinner("Analyzing your CV..."):
64
+ text = extract_text_from_pdf(uploaded_file)
65
+ skills, background = extract_entities(text)
66
+ score = score_skills(skills)
67
+ country_info = recommend_countries(skills)
68
+ certs = recommend_certifications(skills)
69
+ edu = recommend_education(background)
70
+
71
+ st.subheader("βœ… Identified Skills")
72
+ st.write(skills or "No recognized skills found.")
73
+
74
+ st.subheader("πŸ“ˆ Skill Score")
75
+ st.metric("Your Skill Score", f"{score}/100")
76
+
77
+ st.subheader("🌍 Country Recommendations")
78
+ if not country_info.empty:
79
+ st.dataframe(country_info)
80
+ else:
81
+ st.write("No country recommendations available for your skill set.")
82
+
83
+ st.subheader("πŸŽ“ Recommended Certifications")
84
+ if not certs.empty:
85
+ st.dataframe(certs)
86
+ else:
87
+ st.write("No certification recommendations available for your skill set.")
88
+
89
+ st.subheader("πŸŽ“ Higher Education Opportunities")
90
+ if not edu.empty:
91
+ st.dataframe(edu)
92
+ else:
93
+ st.write("No higher education opportunities available for your background.")
94
  else:
95
  st.info("Please upload your CV to begin.")