Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,132 +1,97 @@
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
-
st.set_page_config(page_title="Skill Scoring App", layout="wide") # FIRST!
|
| 3 |
-
|
| 4 |
-
# All other imports
|
| 5 |
import pdfplumber
|
| 6 |
import pandas as pd
|
| 7 |
import numpy as np
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
return pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
|
| 29 |
-
|
| 30 |
-
classifier = load_classifier() # No decorator used to avoid early Streamlit calls
|
| 31 |
|
| 32 |
# --- Functions ---
|
| 33 |
def extract_text(uploaded_file):
|
| 34 |
with pdfplumber.open(uploaded_file) as pdf:
|
| 35 |
return "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
|
| 36 |
|
| 37 |
-
def
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
return
|
| 41 |
-
|
| 42 |
-
def
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
report = f"""Skill Score Report\n\n
|
| 72 |
-
Skill Score: {score}/100
|
| 73 |
-
Suggested Country: {country}
|
| 74 |
-
Estimated Salary: ${salary_range[0]:,} - ${salary_range[1]:,} USD
|
| 75 |
-
|
| 76 |
-
Skills Identified:\n"""
|
| 77 |
-
for skill, val in skills.items():
|
| 78 |
-
report += f" - {skill}: {val}\n"
|
| 79 |
-
|
| 80 |
-
report += "\nImprovement Suggestions:\n"
|
| 81 |
-
for tip in tips:
|
| 82 |
-
report += f" - {tip}\n"
|
| 83 |
-
|
| 84 |
-
return report
|
| 85 |
-
|
| 86 |
-
# --- UI Banner ---
|
| 87 |
-
st.markdown(
|
| 88 |
-
"""
|
| 89 |
-
<div style="background-color:#f0f4ff;padding:20px;border-radius:12px;margin-bottom:25px">
|
| 90 |
-
<h1 style="color:#003366;text-align:center;">π Skill Scoring & Career Guidance App</h1>
|
| 91 |
-
<p style="text-align:center;font-size:18px;color:#333;">
|
| 92 |
-
Upload your CV to discover your global job potential β get personalized skill scores, salary insights,
|
| 93 |
-
job location recommendations, and improvement suggestions. All for free.
|
| 94 |
-
</p>
|
| 95 |
</div>
|
| 96 |
-
|
| 97 |
-
unsafe_allow_html=True
|
| 98 |
-
)
|
| 99 |
|
| 100 |
-
|
| 101 |
-
uploaded_file = st.file_uploader("π€ Upload your CV (PDF)", type="pdf")
|
| 102 |
|
| 103 |
if uploaded_file:
|
| 104 |
-
st.
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
st.
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
st.
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
st.
|
| 120 |
-
|
| 121 |
-
st.
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
st.success("You're already covering the top in-demand skills!")
|
| 129 |
-
|
| 130 |
-
st.download_button("π₯ Download Report as TXT", report_text, file_name="skill_score_report.txt")
|
| 131 |
else:
|
| 132 |
-
st.info("Please upload your CV
|
|
|
|
| 1 |
+
# --- Skill Scoring Streamlit App ---
|
| 2 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
| 3 |
import pdfplumber
|
| 4 |
import pandas as pd
|
| 5 |
import numpy as np
|
| 6 |
+
import spacy.cli
|
| 7 |
+
spacy.cli.download("en_core_web_sm")
|
| 8 |
+
import spacy
|
| 9 |
+
import torch
|
| 10 |
+
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
|
| 11 |
+
from sentence_transformers import SentenceTransformer, util
|
| 12 |
+
|
| 13 |
+
# --- Page Config ---
|
| 14 |
+
st.set_page_config(page_title="Skill Scoring App", layout="wide")
|
| 15 |
+
|
| 16 |
+
# --- Load NLP Models ---
|
| 17 |
+
nlp = spacy.load("en_core_web_sm")
|
| 18 |
+
sentence_model = SentenceTransformer("all-MiniLM-L6-v2")
|
| 19 |
+
summarizer = pipeline("summarization", model="google/flan-t5-base", tokenizer="google/flan-t5-base")
|
| 20 |
+
|
| 21 |
+
# --- Load Datasets ---
|
| 22 |
+
skills_df = pd.read_csv("skills_dataset.csv")
|
| 23 |
+
countries_df = pd.read_csv("countries_dataset.csv")
|
| 24 |
+
certs_df = pd.read_csv("certifications.csv")
|
| 25 |
+
edu_df = pd.read_csv("education_opportunities.csv")
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
# --- Functions ---
|
| 28 |
def extract_text(uploaded_file):
|
| 29 |
with pdfplumber.open(uploaded_file) as pdf:
|
| 30 |
return "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
|
| 31 |
|
| 32 |
+
def summarize_cv(text):
|
| 33 |
+
inputs = text[:3000] # flan-t5 input token limit workaround
|
| 34 |
+
summary = summarizer(inputs, max_length=200, min_length=50, do_sample=False)
|
| 35 |
+
return summary[0]['summary_text']
|
| 36 |
+
|
| 37 |
+
def extract_entities(text):
|
| 38 |
+
doc = nlp(text)
|
| 39 |
+
skills = [ent.text for ent in doc.ents if ent.label_ in ["ORG", "SKILL"]]
|
| 40 |
+
education = [ent.text for ent in doc.ents if ent.label_ == "EDUCATION"]
|
| 41 |
+
return list(set(skills)), list(set(education))
|
| 42 |
+
|
| 43 |
+
def match_skills(cv_text):
|
| 44 |
+
embeddings = sentence_model.encode([cv_text] + skills_df['Skill'].tolist(), convert_to_tensor=True)
|
| 45 |
+
cosine_scores = util.pytorch_cos_sim(embeddings[0], embeddings[1:])[0]
|
| 46 |
+
top_results = torch.topk(cosine_scores, k=10)
|
| 47 |
+
matched_skills = [skills_df['Skill'].iloc[idx] for idx in top_results.indices]
|
| 48 |
+
return matched_skills
|
| 49 |
+
|
| 50 |
+
def recommend_certifications(matched_skills):
|
| 51 |
+
return certs_df[certs_df['Skill'].isin(matched_skills)].drop_duplicates('Certification')
|
| 52 |
+
|
| 53 |
+
def recommend_countries_and_salary(matched_skills):
|
| 54 |
+
matched_df = countries_df[countries_df['Skill'].isin(matched_skills)]
|
| 55 |
+
return matched_df.groupby('Country').agg({"AverageSalary": "mean", "VisaPath": "first"}).reset_index()
|
| 56 |
+
|
| 57 |
+
def recommend_education(edu_background):
|
| 58 |
+
matches = edu_df[edu_df['Background'].str.contains(edu_background, case=False, na=False)]
|
| 59 |
+
return matches
|
| 60 |
+
|
| 61 |
+
# --- UI ---
|
| 62 |
+
st.markdown("""
|
| 63 |
+
<div style="background-color:#e3f2fd;padding:20px;border-radius:10px">
|
| 64 |
+
<h1 style="color:#0d47a1;text-align:center;">π― Global Skill Scorer & Career Recommender</h1>
|
| 65 |
+
<p style="text-align:center;font-size:18px">Upload your CV to get a personalized career growth plan, skill score, salary predictions, and global recommendations.</p>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
</div>
|
| 67 |
+
""", unsafe_allow_html=True)
|
|
|
|
|
|
|
| 68 |
|
| 69 |
+
uploaded_file = st.file_uploader("π Upload your CV (PDF only)", type="pdf")
|
|
|
|
| 70 |
|
| 71 |
if uploaded_file:
|
| 72 |
+
with st.spinner("Analyzing your CV..."):
|
| 73 |
+
raw_text = extract_text(uploaded_file)
|
| 74 |
+
summary = summarize_cv(raw_text)
|
| 75 |
+
matched_skills = match_skills(summary)
|
| 76 |
+
certs = recommend_certifications(matched_skills)
|
| 77 |
+
country_salaries = recommend_countries_and_salary(matched_skills)
|
| 78 |
+
education_recos = recommend_education("technical") # defaulting to technical for now
|
| 79 |
+
|
| 80 |
+
st.subheader("π Summary of Your CV")
|
| 81 |
+
st.info(summary)
|
| 82 |
+
|
| 83 |
+
st.subheader("πΌ Matched Skills")
|
| 84 |
+
st.write(matched_skills)
|
| 85 |
+
|
| 86 |
+
st.subheader("π Suggested Certifications")
|
| 87 |
+
st.dataframe(certs)
|
| 88 |
+
|
| 89 |
+
st.subheader("π Best Countries & Salaries")
|
| 90 |
+
st.dataframe(country_salaries)
|
| 91 |
+
|
| 92 |
+
st.subheader("π Higher Education Options & Scholarships")
|
| 93 |
+
st.dataframe(education_recos)
|
| 94 |
+
|
| 95 |
+
st.success("β
Personalized plan generated successfully.")
|
|
|
|
|
|
|
|
|
|
| 96 |
else:
|
| 97 |
+
st.info("Please upload your CV to begin.")
|