Spaces:

Danial7
/

Skill_Score

Sleeping

App Files Files Community

Danial7 commited on May 14, 2025

Commit

bba628e

verified ·

1 Parent(s): 90abcef

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -117

app.py CHANGED Viewed

@@ -1,132 +1,97 @@
 import streamlit as st
-st.set_page_config(page_title="Skill Scoring App", layout="wide")  # FIRST!
-# All other imports
 import pdfplumber
 import pandas as pd
 import numpy as np
-from transformers import pipeline
-from sklearn.feature_extraction.text import TfidfVectorizer
-from PIL import Image
-# --- Skill & Country Setup ---
-in_demand_skills = [
-    "Python", "Machine Learning", "Project Management", "Data Analysis", "Communication",
-    "Leadership", "Cloud Computing", "Cybersecurity", "AI", "DevOps"
-]
-country_salary_data = {
-    "USA": (90000, 150000),
-    "Germany": (60000, 100000),
-    "Canada": (70000, 110000),
-    "UK": (65000, 95000),
-    "India": (20000, 35000)
-}
-# --- Load Classifier ---
-def load_classifier():
-    return pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
-classifier = load_classifier()  # No decorator used to avoid early Streamlit calls
 # --- Functions ---
 def extract_text(uploaded_file):
     with pdfplumber.open(uploaded_file) as pdf:
         return "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
-def classify_skills(text, candidate_skills):
-    result = classifier(text, candidate_labels=candidate_skills, multi_label=True)
-    scores = dict(zip(result['labels'], result['scores']))
-    return {skill: round(score, 2) for skill, score in scores.items() if score > 0.4}
-def score_user_skills(user_skills):
-    if not user_skills:
-        return 0
-    return int(np.clip(len(user_skills) / len(in_demand_skills) * 100, 0, 100))
-def suggest_country_and_salary(score):
-    if score >= 80:
-        return "USA", country_salary_data["USA"]
-    elif score >= 60:
-        return "Germany", country_salary_data["Germany"]
-    elif score >= 50:
-        return "Canada", country_salary_data["Canada"]
-    elif score >= 40:
-        return "UK", country_salary_data["UK"]
-    else:
-        return "India", country_salary_data["India"]
-def improvement_tips(user_skills):
-    missing = [skill for skill in in_demand_skills if skill not in user_skills]
-    suggestions = {
-        "Python": "Take free Python courses at Coursera or edX.",
-        "Machine Learning": "Try fast.ai or Andrew Ng’s ML course on Coursera.",
-        "Cloud Computing": "Get certified in AWS or Azure (many free intro courses).",
-        "Communication": "Join Toastmasters or attend webinars on soft skills.",
-        "Leadership": "Look for Udemy or LinkedIn Learning leadership programs.",
-    }
-    return [suggestions[s] for s in missing if s in suggestions]
-def generate_report(score, country, salary_range, skills, tips):
-    report = f"""Skill Score Report\n\n
-Skill Score: {score}/100
-Suggested Country: {country}
-Estimated Salary: ${salary_range[0]:,} - ${salary_range[1]:,} USD
-Skills Identified:\n"""
-    for skill, val in skills.items():
-        report += f" - {skill}: {val}\n"
-    report += "\nImprovement Suggestions:\n"
-    for tip in tips:
-        report += f" - {tip}\n"
-    return report
-# --- UI Banner ---
-st.markdown(
-    """
-    <div style="background-color:#f0f4ff;padding:20px;border-radius:12px;margin-bottom:25px">
-        <h1 style="color:#003366;text-align:center;">🚀 Skill Scoring & Career Guidance App</h1>
-        <p style="text-align:center;font-size:18px;color:#333;">
-            Upload your CV to discover your global job potential — get personalized skill scores, salary insights,
-            job location recommendations, and improvement suggestions. All for free.
-        </p>
     </div>
-    """,
-    unsafe_allow_html=True
-)
-# --- File Upload ---
-uploaded_file = st.file_uploader("📤 Upload your CV (PDF)", type="pdf")
 if uploaded_file:
-    st.success("CV uploaded successfully.")
-    with st.spinner("Analyzing your skills..."):
-        cv_text = extract_text(uploaded_file)
-        user_skills = classify_skills(cv_text, in_demand_skills)
-        score = score_user_skills(user_skills)
-        country, (min_salary, max_salary) = suggest_country_and_salary(score)
-        tips = improvement_tips(user_skills)
-        report_text = generate_report(score, country, (min_salary, max_salary), user_skills, tips)
-    st.subheader("🔍 Skill Match")
-    st.write(user_skills)
-    st.subheader("📈 Skill Score")
-    st.metric(label="Your Skill Score", value=f"{score}/100")
-    st.subheader("🌍 Suggested Country & Salary")
-    st.markdown(f"**Suggested Country:** {country}")
-    st.markdown(f"**Estimated Salary Range:** ${min_salary:,} - ${max_salary:,} USD")
-    st.subheader("🛠️ Skill Improvement Suggestions")
-    if tips:
-        for tip in tips:
-            st.markdown(f"- {tip}")
-    else:
-        st.success("You're already covering the top in-demand skills!")
-    st.download_button("📥 Download Report as TXT", report_text, file_name="skill_score_report.txt")
 else:
-    st.info("Please upload your CV (PDF) to begin.")

+# --- Skill Scoring Streamlit App ---
 import streamlit as st
 import pdfplumber
 import pandas as pd
 import numpy as np
+import spacy.cli
+spacy.cli.download("en_core_web_sm")
+import spacy
+import torch
+from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
+from sentence_transformers import SentenceTransformer, util
+# --- Page Config ---
+st.set_page_config(page_title="Skill Scoring App", layout="wide")
+# --- Load NLP Models ---
+nlp = spacy.load("en_core_web_sm")
+sentence_model = SentenceTransformer("all-MiniLM-L6-v2")
+summarizer = pipeline("summarization", model="google/flan-t5-base", tokenizer="google/flan-t5-base")
+# --- Load Datasets ---
+skills_df = pd.read_csv("skills_dataset.csv")
+countries_df = pd.read_csv("countries_dataset.csv")
+certs_df = pd.read_csv("certifications.csv")
+edu_df = pd.read_csv("education_opportunities.csv")
 # --- Functions ---
 def extract_text(uploaded_file):
     with pdfplumber.open(uploaded_file) as pdf:
         return "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
+def summarize_cv(text):
+    inputs = text[:3000]  # flan-t5 input token limit workaround
+    summary = summarizer(inputs, max_length=200, min_length=50, do_sample=False)
+    return summary[0]['summary_text']
+def extract_entities(text):
+    doc = nlp(text)
+    skills = [ent.text for ent in doc.ents if ent.label_ in ["ORG", "SKILL"]]
+    education = [ent.text for ent in doc.ents if ent.label_ == "EDUCATION"]
+    return list(set(skills)), list(set(education))
+def match_skills(cv_text):
+    embeddings = sentence_model.encode([cv_text] + skills_df['Skill'].tolist(), convert_to_tensor=True)
+    cosine_scores = util.pytorch_cos_sim(embeddings[0], embeddings[1:])[0]
+    top_results = torch.topk(cosine_scores, k=10)
+    matched_skills = [skills_df['Skill'].iloc[idx] for idx in top_results.indices]
+    return matched_skills
+def recommend_certifications(matched_skills):
+    return certs_df[certs_df['Skill'].isin(matched_skills)].drop_duplicates('Certification')
+def recommend_countries_and_salary(matched_skills):
+    matched_df = countries_df[countries_df['Skill'].isin(matched_skills)]
+    return matched_df.groupby('Country').agg({"AverageSalary": "mean", "VisaPath": "first"}).reset_index()
+def recommend_education(edu_background):
+    matches = edu_df[edu_df['Background'].str.contains(edu_background, case=False, na=False)]
+    return matches
+# --- UI ---
+st.markdown("""
+    <div style="background-color:#e3f2fd;padding:20px;border-radius:10px">
+        <h1 style="color:#0d47a1;text-align:center;">🎯 Global Skill Scorer & Career Recommender</h1>
+        <p style="text-align:center;font-size:18px">Upload your CV to get a personalized career growth plan, skill score, salary predictions, and global recommendations.</p>
     </div>
+""", unsafe_allow_html=True)
+uploaded_file = st.file_uploader("📄 Upload your CV (PDF only)", type="pdf")
 if uploaded_file:
+    with st.spinner("Analyzing your CV..."):
+        raw_text = extract_text(uploaded_file)
+        summary = summarize_cv(raw_text)
+        matched_skills = match_skills(summary)
+        certs = recommend_certifications(matched_skills)
+        country_salaries = recommend_countries_and_salary(matched_skills)
+        education_recos = recommend_education("technical")  # defaulting to technical for now
+    st.subheader("📌 Summary of Your CV")
+    st.info(summary)
+    st.subheader("💼 Matched Skills")
+    st.write(matched_skills)
+    st.subheader("🎓 Suggested Certifications")
+    st.dataframe(certs)
+    st.subheader("🌍 Best Countries & Salaries")
+    st.dataframe(country_salaries)
+    st.subheader("🎓 Higher Education Options & Scholarships")
+    st.dataframe(education_recos)
+    st.success("✅ Personalized plan generated successfully.")
 else:
+    st.info("Please upload your CV to begin.")