Spaces:

Danial7
/

Smart_CV_Analyzer

Sleeping

App Files Files Community

Danial7 commited on May 17, 2025

Commit

41b2d80

verified ·

1 Parent(s): 651799b

Update app.py

Browse files

Files changed (1) hide show

app.py +124 -213

app.py CHANGED Viewed

@@ -1,219 +1,130 @@
 import streamlit as st
-import PyPDF2
-import pandas as pd
-import os
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.metrics.pairwise import cosine_similarity
-from keybert import KeyBERT
-from datetime import datetime
-import plotly.express as px
 from fpdf import FPDF
-import requests
 st.set_page_config(page_title="Universal Smart CV Analyzer", layout="wide")
-st.title("🌍 Universal Smart CV Analyzer & Career Roadmap")
-st.markdown("Upload your **CV (PDF)** to get personalized recommendations, skill score, and complete career roadmap.")
-uploaded_file = st.file_uploader("Upload your CV (PDF)", type=["pdf"])
-# Load datasets
-@st.cache_data
-def load_data():
-    base_path = "data"
-    certs = pd.read_csv(os.path.join(base_path, "certifications.csv"))
-    scholarships = pd.read_csv(os.path.join(base_path, "scholarships.csv"))
-    edu_tech = pd.read_csv(os.path.join(base_path, "education_technical.csv"))
-    edu_nontech = pd.read_csv(os.path.join(base_path, "education_non_technical.csv"))
-    visa_data = pd.read_csv(os.path.join(base_path, "countries_dataset.csv"))
-    skills_data = pd.read_csv(os.path.join(base_path, "skills_dataset.csv"))
-    return certs, scholarships, edu_tech, edu_nontech, visa_data, skills_data
-certs, scholarships, edu_tech, edu_nontech, visa_data, skills_data = load_data()
-# Extract text from PDF
-def extract_text_from_pdf(file):
-    reader = PyPDF2.PdfReader(file)
-    text = ""
-    for page in reader.pages:
-        text += page.extract_text()
-    return text
-# Keyword extraction
-def extract_keywords(text, num_keywords=10):
-    kw_model = KeyBERT()
-    keywords = kw_model.extract_keywords(text, top_n=num_keywords, stop_words='english')
-    return [kw[0].lower() for kw in keywords]
-# Field identification
-def identify_field(keywords):
-    fields = {
-        "Engineering": ["engineer", "mechanical", "electrical", "civil", "plc", "automation"],
-        "Data Science": ["machine learning", "data", "python", "statistics", "ai"],
-        "Software Development": ["developer", "software", "backend", "frontend", "javascript"],
-        "Marketing": ["seo", "content", "marketing", "branding"],
-        "Finance": ["accounting", "finance", "budget", "tax"],
-        "Design": ["photoshop", "illustrator", "design", "creative"],
-        "Healthcare": ["nursing", "surgery", "hospital", "patient"],
-        "Construction": ["carpentry", "plumbing", "hvac", "gardening", "mining"]
-    }
-    scores = {field: len(set(keywords).intersection(terms)) for field, terms in fields.items()}
-    return max(scores, key=scores.get)
-# Technical background
-def is_technical_background(keywords):
-    tech_terms = ["engineer", "machine learning", "python", "developer", "software", "automation", "plc", "ai"]
-    non_tech_terms = ["marketing", "finance", "content", "seo", "branding", "accounting", "creative"]
-    tech_score = len(set(keywords).intersection(tech_terms))
-    non_tech_score = len(set(keywords).intersection(non_tech_terms))
-    return "Technical" if tech_score >= non_tech_score else "Non-Technical"
-# CV skill score
-def calculate_cv_score(text, keywords):
-    ideal = " ".join(keywords)
-    tfidf = TfidfVectorizer()
-    tfidf_matrix = tfidf.fit_transform([text, ideal])
-    score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
-    return round(score * 100)
-# Field data filter
-def filter_data_by_field(df, field_col, field):
-    return df[df[field_col].str.lower() == field.lower()]
-# Visa opportunities
-def suggest_visa_opportunities(keywords, visa_data):
-    matched_rows = []
-    for _, row in visa_data.iterrows():
-        if any(skill.lower() in keywords for skill in row["Skill"].split(",")):
-            matched_rows.append(row)
-    return pd.DataFrame(matched_rows)
-# Upskilling suggestions (no reliance on 'Importance')
-def suggest_upskilling(keywords, skills_data):
-    all_skills = set(skills_data["Skill"].str.lower())
-    current_skills = set([kw.lower() for kw in keywords])
-    missing_skills = all_skills - current_skills
-    suggested = skills_data[skills_data["Skill"].str.lower().isin(missing_skills)]
-    return suggested
-# 🎯 Job listings using Adzuna API
-def get_job_listings(keywords, location="Pakistan", results_per_page=10):
-    app_id = "f4efd3a2"  # Replace with your Adzuna app_id
-    app_key = "5702f3c0507ac69f98aa15f855b06901"  # Replace with your Adzuna app_key
-    base_url = "https://api.adzuna.com/v1/api/jobs/pk/search/1"
-    query = " ".join(keywords)
-    params = {
-        "app_id": app_id,
-        "app_key": app_key,
-        "results_per_page": results_per_page,
-        "what": query,
-        "where": location,
-        "content-type": "application/json"
-    }
-    try:
-        response = requests.get(base_url, params=params)
-        response.raise_for_status()
-        jobs = response.json().get("results", [])
-        return pd.DataFrame(jobs)
-    except Exception as e:
-        st.error(f"Error fetching job listings: {e}")
-        return pd.DataFrame()
-# Timeline generation
-def generate_timeline(data=None):
-    timeline = pd.DataFrame({
-        "Task": ["Certifications", "Scholarships", "Education", "Visa Search"],
-        "Start": ["2025-06-01", "2025-07-01", "2025-08-01", "2025-09-01"],
-        "Finish": ["2025-06-30", "2025-07-30", "2025-09-30", "2025-10-15"]
-    })
-    fig = px.timeline(timeline, x_start="Start", x_end="Finish", y="Task", color="Task")
-    fig.update_yaxes(categoryorder='total ascending')
-    st.plotly_chart(fig)
-# PDF Report
-class PDF(FPDF):
-    def header(self):
-        self.set_font('Arial', 'B', 12)
-        self.cell(0, 10, 'Career Roadmap Report', ln=True, align='C')
-    def chapter_title(self, title):
-        self.set_font('Arial', 'B', 10)
-        self.cell(0, 10, title, ln=True)
-    def chapter_body(self, body):
-        self.set_font('Arial', '', 9)
-        self.multi_cell(0, 10, body)
-def generate_pdf_report(field, score, keywords, upskills):
-    pdf = PDF()
-    pdf.add_page()
-    pdf.chapter_title("Field: " + field)
-    pdf.chapter_title("Score: " + str(score))
-    pdf.chapter_body("Keywords: " + ", ".join(keywords))
-    pdf.chapter_title("Suggested Upskilling:")
-    pdf.chapter_body(", ".join(upskills))
-    pdf.output("report.pdf")
-    st.success("📄 PDF Report Generated: report.pdf")
-# 🌟 MAIN APP LOGIC
 if uploaded_file:
-    text = extract_text_from_pdf(uploaded_file)
-    st.success("✅ CV Text Extracted")
-    keywords = extract_keywords(text)
-    st.subheader("🔑 Extracted Keywords")
-    st.write(keywords)
-    field = identify_field(keywords)
-    st.subheader("🎯 Identified Field / Domain")
-    st.write(field)
-    tech_class = is_technical_background(keywords)
-    st.subheader("🧠 CV Background Type")
-    st.write(tech_class)
-    score = calculate_cv_score(text, keywords)
-    st.subheader("📊 CV Skill Score")
-    st.metric(label="Score", value=f"{score}/100")
-    st.subheader("📈 Suggested Skills to Acquire for Better Opportunities")
-    missing_skills_df = suggest_upskilling(keywords, skills_data)
-    if not missing_skills_df.empty:
-        for skill in missing_skills_df["Skill"].head(10):
-            st.write(f"🔧 {skill}")
-        missing_list = missing_skills_df["Skill"].tolist()
-    else:
-        st.write("You already have most in-demand skills covered!")
-        missing_list = []
-    st.subheader("📚 Recommended Certifications")
-    certs_field = filter_data_by_field(certs, "Field", field)
-    st.dataframe(certs_field)
-    st.subheader("🎓 Scholarships")
-    scholarships_field = filter_data_by_field(scholarships, "Field", field)
-    st.dataframe(scholarships_field)
-    st.subheader("🎓 Education Opportunities")
-    if tech_class == "Technical":
-        edu_field = filter_data_by_field(edu_tech, "Field", field)
-    else:
-        edu_field = filter_data_by_field(edu_nontech, "Field", field)
-    st.dataframe(edu_field)
-    st.subheader("🌍 Visa Opportunities Based on Your Skills")
-    visa_matches = suggest_visa_opportunities(keywords, visa_data)
-    st.dataframe(visa_matches)
-    st.subheader("💼 Job Listings")
-    job_df = get_job_listings(keywords)
-    if not job_df.empty:
-        st.dataframe(job_df[["title", "company", "location", "description"]])
-    else:
-        st.write("No job listings available right now.")
-    st.subheader("🗓️ Personalized Timeline")
-    generate_timeline()
-    if st.button("📄 Generate PDF Report"):
-        generate_pdf_report(field, score, keywords, missing_list)

 import streamlit as st
+from PyPDF2 import PdfReader
 from fpdf import FPDF
+import os
+from utils import (
+    extract_keywords,
+    identify_field,
+    is_technical_background,
+    calculate_cv_score,
+    suggest_upskilling,
+    suggest_certifications,
+    suggest_scholarships,
+    suggest_education_opportunities,
+    suggest_visa_opportunities,
+    get_job_listings
+)
 st.set_page_config(page_title="Universal Smart CV Analyzer", layout="wide")
+st.title("📄 Universal Smart CV Analyzer & Career Roadmap")
+st.markdown("Upload your CV in PDF format to get a complete personalized analysis and roadmap.")
+# Upload PDF
+uploaded_file = st.file_uploader("Upload your CV", type="pdf")
 if uploaded_file:
+    with st.spinner("Reading and analyzing your CV..."):
+        pdf = PdfReader(uploaded_file)
+        text = ""
+        for page in pdf.pages:
+            text += page.extract_text() or ""
+        # Extract keywords
+        keywords = extract_keywords(text)
+        st.subheader("🔍 Extracted Keywords")
+        st.write(", ".join(keywords))
+        # Identify field
+        field = identify_field(keywords)
+        st.subheader("🧠 Predicted Field")
+        st.write(f"**{field}**")
+        # Score the CV
+        score = calculate_cv_score(text, keywords)
+        st.subheader("📊 CV Score")
+        st.metric(label="Skill Match Score", value=f"{score}/100")
+        # Determine technical background
+        background = is_technical_background(keywords)
+        st.subheader("🔧 Technical Background")
+        st.write(f"**{background}**")
+        # Suggestions Section
+        st.subheader("🚀 Suggested Upskilling")
+        upskills = suggest_upskilling(keywords)
+        st.write(upskills if upskills else "No suggestions found.")
+        st.subheader("🎓 Certifications")
+        certifications = suggest_certifications(keywords)
+        st.write(certifications if certifications else "No certifications found.")
+        st.subheader("💸 Scholarships")
+        scholarships = suggest_scholarships(keywords)
+        st.write(scholarships if scholarships else "No scholarships found.")
+        st.subheader("🏫 Education Opportunities")
+        education = suggest_education_opportunities(keywords)
+        st.write(education if education else "No educational programs found.")
+        st.subheader("🌍 Visa Opportunities")
+        visas = suggest_visa_opportunities(keywords)
+        st.write(visas if visas else "No visa opportunities found.")
+        st.subheader("💼 Job Listings")
+        job_df = get_job_listings(keywords, location="Pakistan")
+        if not job_df.empty:
+            st.dataframe(job_df)
+        else:
+            st.write("No jobs found.")
+        # PDF Report Generator
+        st.subheader("📥 Generate PDF Report")
+        class PDF(FPDF):
+            def chapter_title(self, title):
+                self.set_font("Arial", "B", 12)
+                self.set_fill_color(220, 220, 220)
+                self.cell(0, 10, title, ln=True, fill=True)
+            def chapter_body(self, body):
+                self.set_font("Arial", "", 11)
+                self.multi_cell(0, 10, body)
+                self.ln()
+        if st.button("Generate & Download Report"):
+            with st.spinner("Generating PDF report..."):
+                pdf = PDF()
+                pdf.add_page()
+                pdf.set_title("CV Analysis Report")
+                pdf.chapter_title("📄 CV Analysis Report")
+                pdf.chapter_title("Predicted Field:")
+                pdf.chapter_body(field)
+                pdf.chapter_title("Skill Match Score:")
+                pdf.chapter_body(f"{score}/100")
+                pdf.chapter_title("Technical Background:")
+                pdf.chapter_body(background)
+                pdf.chapter_title("Extracted Keywords:")
+                pdf.chapter_body(", ".join(keywords))
+                pdf.chapter_title("Suggested Upskilling:")
+                pdf.chapter_body(", ".join(upskills))
+                pdf.chapter_title("Certifications:")
+                pdf.chapter_body(", ".join(certifications))
+                pdf.chapter_title("Scholarships:")
+                pdf.chapter_body(", ".join(scholarships))
+                pdf.chapter_title("Education Opportunities:")
+                pdf.chapter_body(", ".join(education))
+                pdf.chapter_title("Visa Opportunities:")
+                pdf.chapter_body(", ".join(visas))
+                output_path = "cv_analysis_report.pdf"
+                pdf.output(output_path)
+                with open(output_path, "rb") as f:
+                    base64_pdf = f.read()
+                st.download_button(
+                    label="📄 Download CV Report",
+                    data=base64_pdf,
+                    file_name="cv_analysis_report.pdf",
+                    mime="application/pdf",
+                )