Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,97 +1,95 @@
|
|
| 1 |
-
# --- Skill Scoring Streamlit App ---
|
| 2 |
import streamlit as st
|
| 3 |
-
import pdfplumber
|
| 4 |
import pandas as pd
|
| 5 |
-
import
|
| 6 |
-
import spacy.cli
|
| 7 |
-
spacy.cli.download("en_core_web_sm")
|
| 8 |
import spacy
|
| 9 |
-
import torch
|
| 10 |
-
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
|
| 11 |
-
from sentence_transformers import SentenceTransformer, util
|
| 12 |
|
| 13 |
-
#
|
| 14 |
st.set_page_config(page_title="Skill Scoring App", layout="wide")
|
| 15 |
|
| 16 |
-
#
|
|
|
|
|
|
|
| 17 |
nlp = spacy.load("en_core_web_sm")
|
| 18 |
-
sentence_model = SentenceTransformer("all-MiniLM-L6-v2")
|
| 19 |
-
summarizer = pipeline("summarization", model="google/flan-t5-base", tokenizer="google/flan-t5-base")
|
| 20 |
|
| 21 |
-
#
|
| 22 |
skills_df = pd.read_csv("skills_dataset.csv")
|
| 23 |
countries_df = pd.read_csv("countries_dataset.csv")
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
|
| 37 |
def extract_entities(text):
|
| 38 |
doc = nlp(text)
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
cosine_scores = util.pytorch_cos_sim(embeddings[0], embeddings[1:])[0]
|
| 46 |
-
top_results = torch.topk(cosine_scores, k=10)
|
| 47 |
-
matched_skills = [skills_df['Skill'].iloc[idx] for idx in top_results.indices]
|
| 48 |
-
return matched_skills
|
| 49 |
-
|
| 50 |
-
def recommend_certifications(matched_skills):
|
| 51 |
-
return certs_df[certs_df['Skill'].isin(matched_skills)].drop_duplicates('Certification')
|
| 52 |
-
|
| 53 |
-
def recommend_countries_and_salary(matched_skills):
|
| 54 |
-
matched_df = countries_df[countries_df['Skill'].isin(matched_skills)]
|
| 55 |
-
return matched_df.groupby('Country').agg({"AverageSalary": "mean", "VisaPath": "first"}).reset_index()
|
| 56 |
-
|
| 57 |
-
def recommend_education(edu_background):
|
| 58 |
-
matches = edu_df[edu_df['Background'].str.contains(edu_background, case=False, na=False)]
|
| 59 |
-
return matches
|
| 60 |
-
|
| 61 |
-
# --- UI ---
|
| 62 |
-
st.markdown("""
|
| 63 |
-
<div style="background-color:#e3f2fd;padding:20px;border-radius:10px">
|
| 64 |
-
<h1 style="color:#0d47a1;text-align:center;">π― Global Skill Scorer & Career Recommender</h1>
|
| 65 |
-
<p style="text-align:center;font-size:18px">Upload your CV to get a personalized career growth plan, skill score, salary predictions, and global recommendations.</p>
|
| 66 |
-
</div>
|
| 67 |
-
""", unsafe_allow_html=True)
|
| 68 |
-
|
| 69 |
-
uploaded_file = st.file_uploader("π Upload your CV (PDF only)", type="pdf")
|
| 70 |
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
matched_skills = match_skills(summary)
|
| 76 |
-
certs = recommend_certifications(matched_skills)
|
| 77 |
-
country_salaries = recommend_countries_and_salary(matched_skills)
|
| 78 |
-
education_recos = recommend_education("technical") # defaulting to technical for now
|
| 79 |
|
| 80 |
-
|
| 81 |
-
|
|
|
|
| 82 |
|
| 83 |
-
|
| 84 |
-
|
| 85 |
|
| 86 |
-
|
| 87 |
-
|
| 88 |
|
| 89 |
-
|
| 90 |
-
|
|
|
|
|
|
|
|
|
|
| 91 |
|
| 92 |
-
|
| 93 |
-
st.dataframe(education_recos)
|
| 94 |
|
| 95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
else:
|
| 97 |
st.info("Please upload your CV to begin.")
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
+
import pdfplumber
|
|
|
|
|
|
|
| 4 |
import spacy
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
+
# Set page config at the top
|
| 7 |
st.set_page_config(page_title="Skill Scoring App", layout="wide")
|
| 8 |
|
| 9 |
+
# Load spaCy model
|
| 10 |
+
import spacy.cli
|
| 11 |
+
spacy.cli.download("en_core_web_sm")
|
| 12 |
nlp = spacy.load("en_core_web_sm")
|
|
|
|
|
|
|
| 13 |
|
| 14 |
+
# Load datasets
|
| 15 |
skills_df = pd.read_csv("skills_dataset.csv")
|
| 16 |
countries_df = pd.read_csv("countries_dataset.csv")
|
| 17 |
+
cert_df = pd.read_csv("certifications.csv")
|
| 18 |
+
edu_tech_df = pd.read_csv("education_technical.csv")
|
| 19 |
+
edu_non_tech_df = pd.read_csv("education_non_technical.csv")
|
| 20 |
+
|
| 21 |
+
# Helper functions
|
| 22 |
+
def extract_text_from_pdf(file):
|
| 23 |
+
with pdfplumber.open(file) as pdf:
|
| 24 |
+
return "\n".join(
|
| 25 |
+
page.extract_text()
|
| 26 |
+
for page in pdf.pages
|
| 27 |
+
if page.extract_text()
|
| 28 |
+
)
|
| 29 |
|
| 30 |
def extract_entities(text):
|
| 31 |
doc = nlp(text)
|
| 32 |
+
# Identify skills present in the CV
|
| 33 |
+
skills = [token.text for token in doc if token.text in skills_df['Skill'].values]
|
| 34 |
+
# Determine technical vs nonβtechnical background
|
| 35 |
+
technical_skills = {"Python", "Machine Learning", "Cloud Computing", "Cybersecurity", "AI", "DevOps"}
|
| 36 |
+
background = "technical" if any(s in technical_skills for s in skills) else "non-technical"
|
| 37 |
+
return list(set(skills)), background
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
+
def score_skills(user_skills):
|
| 40 |
+
if not skills_df.shape[0]:
|
| 41 |
+
return 0
|
| 42 |
+
return int((len(user_skills) / len(skills_df)) * 100)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
+
def recommend_countries(skills):
|
| 45 |
+
df = countries_df[countries_df['Skill'].isin(skills)]
|
| 46 |
+
return df[["Country", "AverageSalary", "VisaPath"]].drop_duplicates().reset_index(drop=True)
|
| 47 |
|
| 48 |
+
def recommend_certifications(skills):
|
| 49 |
+
return cert_df[cert_df['Skill'].isin(skills)].reset_index(drop=True)
|
| 50 |
|
| 51 |
+
def recommend_education(background):
|
| 52 |
+
return edu_tech_df.reset_index(drop=True) if background == "technical" else edu_non_tech_df.reset_index(drop=True)
|
| 53 |
|
| 54 |
+
# Streamlit UI
|
| 55 |
+
st.title("π Personalized Skill Scoring & Career Path App")
|
| 56 |
+
st.markdown(
|
| 57 |
+
"Upload your CV and get a personalized career guide based on your skills and background."
|
| 58 |
+
)
|
| 59 |
|
| 60 |
+
uploaded_file = st.file_uploader("π€ Upload your CV (PDF format only)", type=["pdf"])
|
|
|
|
| 61 |
|
| 62 |
+
if uploaded_file:
|
| 63 |
+
with st.spinner("Analyzing your CV..."):
|
| 64 |
+
text = extract_text_from_pdf(uploaded_file)
|
| 65 |
+
skills, background = extract_entities(text)
|
| 66 |
+
score = score_skills(skills)
|
| 67 |
+
country_info = recommend_countries(skills)
|
| 68 |
+
certs = recommend_certifications(skills)
|
| 69 |
+
edu = recommend_education(background)
|
| 70 |
+
|
| 71 |
+
st.subheader("β
Identified Skills")
|
| 72 |
+
st.write(skills or "No recognized skills found.")
|
| 73 |
+
|
| 74 |
+
st.subheader("π Skill Score")
|
| 75 |
+
st.metric("Your Skill Score", f"{score}/100")
|
| 76 |
+
|
| 77 |
+
st.subheader("π Country Recommendations")
|
| 78 |
+
if not country_info.empty:
|
| 79 |
+
st.dataframe(country_info)
|
| 80 |
+
else:
|
| 81 |
+
st.write("No country recommendations available for your skill set.")
|
| 82 |
+
|
| 83 |
+
st.subheader("π Recommended Certifications")
|
| 84 |
+
if not certs.empty:
|
| 85 |
+
st.dataframe(certs)
|
| 86 |
+
else:
|
| 87 |
+
st.write("No certification recommendations available for your skill set.")
|
| 88 |
+
|
| 89 |
+
st.subheader("π Higher Education Opportunities")
|
| 90 |
+
if not edu.empty:
|
| 91 |
+
st.dataframe(edu)
|
| 92 |
+
else:
|
| 93 |
+
st.write("No higher education opportunities available for your background.")
|
| 94 |
else:
|
| 95 |
st.info("Please upload your CV to begin.")
|