Spaces:

Danial7
/

Skill_Score

Sleeping

App Files Files Community

Danial7 commited on May 14, 2025

Commit

60afec2

verified ·

1 Parent(s): 6566acb

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -75

app.py CHANGED Viewed

@@ -1,97 +1,95 @@
-# --- Skill Scoring Streamlit App ---
 import streamlit as st
-import pdfplumber
 import pandas as pd
-import numpy as np
-import spacy.cli
-spacy.cli.download("en_core_web_sm")
 import spacy
-import torch
-from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
-from sentence_transformers import SentenceTransformer, util
-# --- Page Config ---
 st.set_page_config(page_title="Skill Scoring App", layout="wide")
-# --- Load NLP Models ---
 nlp = spacy.load("en_core_web_sm")
-sentence_model = SentenceTransformer("all-MiniLM-L6-v2")
-summarizer = pipeline("summarization", model="google/flan-t5-base", tokenizer="google/flan-t5-base")
-# --- Load Datasets ---
 skills_df = pd.read_csv("skills_dataset.csv")
 countries_df = pd.read_csv("countries_dataset.csv")
-certs_df = pd.read_csv("certifications.csv")
-edu_df = pd.read_csv("education_opportunities.csv")
-# --- Functions ---
-def extract_text(uploaded_file):
-    with pdfplumber.open(uploaded_file) as pdf:
-        return "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
-def summarize_cv(text):
-    inputs = text[:3000]  # flan-t5 input token limit workaround
-    summary = summarizer(inputs, max_length=200, min_length=50, do_sample=False)
-    return summary[0]['summary_text']
 def extract_entities(text):
     doc = nlp(text)
-    skills = [ent.text for ent in doc.ents if ent.label_ in ["ORG", "SKILL"]]
-    education = [ent.text for ent in doc.ents if ent.label_ == "EDUCATION"]
-    return list(set(skills)), list(set(education))
-def match_skills(cv_text):
-    embeddings = sentence_model.encode([cv_text] + skills_df['Skill'].tolist(), convert_to_tensor=True)
-    cosine_scores = util.pytorch_cos_sim(embeddings[0], embeddings[1:])[0]
-    top_results = torch.topk(cosine_scores, k=10)
-    matched_skills = [skills_df['Skill'].iloc[idx] for idx in top_results.indices]
-    return matched_skills
-def recommend_certifications(matched_skills):
-    return certs_df[certs_df['Skill'].isin(matched_skills)].drop_duplicates('Certification')
-def recommend_countries_and_salary(matched_skills):
-    matched_df = countries_df[countries_df['Skill'].isin(matched_skills)]
-    return matched_df.groupby('Country').agg({"AverageSalary": "mean", "VisaPath": "first"}).reset_index()
-def recommend_education(edu_background):
-    matches = edu_df[edu_df['Background'].str.contains(edu_background, case=False, na=False)]
-    return matches
-# --- UI ---
-st.markdown("""
-    <div style="background-color:#e3f2fd;padding:20px;border-radius:10px">
-        <h1 style="color:#0d47a1;text-align:center;">🎯 Global Skill Scorer & Career Recommender</h1>
-        <p style="text-align:center;font-size:18px">Upload your CV to get a personalized career growth plan, skill score, salary predictions, and global recommendations.</p>
-    </div>
-""", unsafe_allow_html=True)
-uploaded_file = st.file_uploader("📄 Upload your CV (PDF only)", type="pdf")
-if uploaded_file:
-    with st.spinner("Analyzing your CV..."):
-        raw_text = extract_text(uploaded_file)
-        summary = summarize_cv(raw_text)
-        matched_skills = match_skills(summary)
-        certs = recommend_certifications(matched_skills)
-        country_salaries = recommend_countries_and_salary(matched_skills)
-        education_recos = recommend_education("technical")  # defaulting to technical for now
-    st.subheader("📌 Summary of Your CV")
-    st.info(summary)
-    st.subheader("💼 Matched Skills")
-    st.write(matched_skills)
-    st.subheader("🎓 Suggested Certifications")
-    st.dataframe(certs)
-    st.subheader("🌍 Best Countries & Salaries")
-    st.dataframe(country_salaries)
-    st.subheader("🎓 Higher Education Options & Scholarships")
-    st.dataframe(education_recos)
-    st.success("✅ Personalized plan generated successfully.")
 else:
     st.info("Please upload your CV to begin.")

 import streamlit as st
 import pandas as pd
+import pdfplumber
 import spacy
+# Set page config at the top
 st.set_page_config(page_title="Skill Scoring App", layout="wide")
+# Load spaCy model
+import spacy.cli
+spacy.cli.download("en_core_web_sm")
 nlp = spacy.load("en_core_web_sm")
+# Load datasets
 skills_df = pd.read_csv("skills_dataset.csv")
 countries_df = pd.read_csv("countries_dataset.csv")
+cert_df = pd.read_csv("certifications.csv")
+edu_tech_df = pd.read_csv("education_technical.csv")
+edu_non_tech_df = pd.read_csv("education_non_technical.csv")
+# Helper functions
+def extract_text_from_pdf(file):
+    with pdfplumber.open(file) as pdf:
+        return "\n".join(
+            page.extract_text()
+            for page in pdf.pages
+            if page.extract_text()
+        )
 def extract_entities(text):
     doc = nlp(text)
+    # Identify skills present in the CV
+    skills = [token.text for token in doc if token.text in skills_df['Skill'].values]
+    # Determine technical vs non‑technical background
+    technical_skills = {"Python", "Machine Learning", "Cloud Computing", "Cybersecurity", "AI", "DevOps"}
+    background = "technical" if any(s in technical_skills for s in skills) else "non-technical"
+    return list(set(skills)), background
+def score_skills(user_skills):
+    if not skills_df.shape[0]:
+        return 0
+    return int((len(user_skills) / len(skills_df)) * 100)
+def recommend_countries(skills):
+    df = countries_df[countries_df['Skill'].isin(skills)]
+    return df[["Country", "AverageSalary", "VisaPath"]].drop_duplicates().reset_index(drop=True)
+def recommend_certifications(skills):
+    return cert_df[cert_df['Skill'].isin(skills)].reset_index(drop=True)
+def recommend_education(background):
+    return edu_tech_df.reset_index(drop=True) if background == "technical" else edu_non_tech_df.reset_index(drop=True)
+# Streamlit UI
+st.title("📊 Personalized Skill Scoring & Career Path App")
+st.markdown(
+    "Upload your CV and get a personalized career guide based on your skills and background."
+)
+uploaded_file = st.file_uploader("📤 Upload your CV (PDF format only)", type=["pdf"])
+if uploaded_file:
+    with st.spinner("Analyzing your CV..."):
+        text = extract_text_from_pdf(uploaded_file)
+        skills, background = extract_entities(text)
+        score = score_skills(skills)
+        country_info = recommend_countries(skills)
+        certs = recommend_certifications(skills)
+        edu = recommend_education(background)
+    st.subheader("✅ Identified Skills")
+    st.write(skills or "No recognized skills found.")
+    st.subheader("📈 Skill Score")
+    st.metric("Your Skill Score", f"{score}/100")
+    st.subheader("🌍 Country Recommendations")
+    if not country_info.empty:
+        st.dataframe(country_info)
+    else:
+        st.write("No country recommendations available for your skill set.")
+    st.subheader("🎓 Recommended Certifications")
+    if not certs.empty:
+        st.dataframe(certs)
+    else:
+        st.write("No certification recommendations available for your skill set.")
+    st.subheader("🎓 Higher Education Opportunities")
+    if not edu.empty:
+        st.dataframe(edu)
+    else:
+        st.write("No higher education opportunities available for your background.")
 else:
     st.info("Please upload your CV to begin.")