Spaces:

Bur3hani
/

cview

Sleeping

App Files Files Community

Bur3hani commited on Jul 3, 2025

Commit

85fdd68

verified ·

1 Parent(s): 0c610e5

Update app.py

Browse files

Files changed (1) hide show

app.py +182 -286

app.py CHANGED Viewed

@@ -1,9 +1,9 @@
-import streamlit as st
 import os
 import io
 import re
 from docx import Document
-from PyPDF2 import PdfReader # PyPDF2 is used. For more robust PDF parsing, consider 'PyMuPDF' (fitz)
 import pandas as pd
 import spacy
 from collections import Counter
@@ -14,25 +14,17 @@ import seaborn as sns
 import numpy as np
 # --- SpaCy Model Loading ---
-# Use st.cache_resource to load the model only once and reuse it across sessions.
-@st.cache_resource
-def load_spacy_model():
-    """
-    Loads the English spaCy model.
-    The model should be pre-downloaded via requirements.txt for Hugging Face Spaces.
-    """
-    try:
-        nlp_model = spacy.load("en_core_web_lg")
-        return nlp_model
-    except Exception as e:
-        st.error(f"Error loading spaCy model: {e}. Please ensure 'en_core_web_lg' is correctly installed via requirements.txt.")
-        st.stop() # Stop the app if model fails to load
-nlp = load_spacy_model()
-print("SpaCy model loaded successfully.") # This will appear in your Space logs
-# --- Global Predefined Skills (could be loaded from a file for larger lists) ---
 predefined_skills_list = set([
     "python", "tensorflow", "pytorch", "scikit-learn", "numpy", "pandas",
     "docker", "kubernetes", "aws", "git", "sql", "java", "r", "tableau",
@@ -50,262 +42,181 @@ predefined_skills_list = set([
 ])
 predefined_skills_list.update([
     "machine learning engineer", "data scientist", "ai engineer", "deep learning engineer",
-    "senior machine learning engineer", "junior data scientist", # Adding common job titles too
     "data engineer", "software engineer", "full stack", "frontend", "backend"
 ])
-# --- Text Extraction Functions (Adjusted for Streamlit file_object) ---
-def extract_text_from_pdf(file_object):
     """
-    Extracts text from a PDF file-like object using PyPDF2.
     """
     try:
-        reader = PdfReader(file_object)
-        text = ""
-        for page in reader.pages:
-            text += page.extract_text() or "" # Handle pages with no extractable text
         return text
     except Exception as e:
-        st.error(f"Error reading PDF: {e}")
         return ""
-def extract_text_from_docx(file_object):
     """
-    Extracts text from a DOCX file-like object using python-docx.
     """
     try:
-        document = Document(file_object)
         text = "\n".join([paragraph.text for paragraph in document.paragraphs])
         return text
     except Exception as e:
-        st.error(f"Error reading DOCX: {e}")
         return ""
-# --- Text Preprocessing Functions ---
 def preprocess_text(text):
-    """
-    Applies standard NLP preprocessing steps.
-    """
-    if not isinstance(text, str):
-        return ""
     text = text.lower()
     text = re.sub(r'\s+', ' ', text).strip()
     doc = nlp(text)
-    processed_tokens = [
-        token.lemma_ for token in doc if not token.is_stop and not token.is_punct and not token.is_space
-    ]
     return " ".join(processed_tokens)
-# --- Information Extraction (NER & Keyword Extraction) ---
 def extract_skills(text_doc, skill_keywords=None):
-    """
-    Extracts skills using spaCy's NER and a custom keyword list.
-    Args:
-        text_doc (spacy.tokens.Doc): spaCy Doc object of the text.
-        skill_keywords (set): An optional set of predefined skill keywords.
-    Returns:
-        list: A list of extracted skills.
-    """
     extracted_skills = []
-    if skill_keywords is None:
-        skill_keywords = set() # Should not be None if global is used
     doc_text = text_doc.text.lower()
     for skill in skill_keywords:
         if re.search(r'\b' + re.escape(skill) + r'\b', doc_text):
             extracted_skills.append(skill)
     entities = {}
     for ent in text_doc.ents:
-        if ent.label_ == "ORG":
-            entities.setdefault("organizations", []).append(ent.text)
-        elif ent.label_ == "GPE":
-            entities.setdefault("locations", []).append(ent.text)
-        elif ent.label_ == "DATE":
-            entities.setdefault("dates", []).append(ent.text)
-        elif ent.label_ == "PERSON":
-            entities.setdefault("people", []).append(ent.text)
     return list(set(extracted_skills)), entities
 def extract_experience_and_education(text):
-    """
-    Attempts to extract experience years and education level using regex and simple rules.
-    This is a simplified approach and can be complex for diverse CVs.
-    """
     years_experience = 0
     education_level = "Not Specified"
     exp_matches = re.findall(r'(\d+)\s*(?:\+|plus)?\s*years?\s+of\s+experience|\d+\s*yrs?\s+exp', text.lower())
     if exp_matches:
         try:
             years_experience = max([int(re.findall(r'\d+', m)[0]) for m in exp_matches if re.findall(r'\d+', m)])
-        except (ValueError, IndexError):
-            pass
     text_lower = text.lower()
-    if "phd" in text_lower or "doctorate" in text_lower:
-        education_level = "Ph.D."
-    elif "master" in text_lower or "m.s." in text_lower or "msc" in text_lower:
-        education_level = "Master's"
-    elif "bachelor" in text_lower or "b.s." in text_lower or "bsc" in text_lower:
-        education_level = "Bachelor's"
-    elif "associate" in text_lower:
-        education_level = "Associate's"
     return years_experience, education_level
-# --- Feature Engineering ---
 def get_text_embeddings(text):
-    """
-    Generates sentence embeddings for a given text using spaCy's pre-trained vectors.
-    """
-    if not text:
-        return np.zeros(nlp.vocab.vectors.shape[1])
     doc = nlp(text)
-    if doc.has_vector:
-        return doc.vector
-    else:
-        # Fallback if no vector for doc (shouldn't happen with en_core_web_lg)
-        return np.mean([token.vector for token in doc if token.has_vector], axis=0) if [token.vector for token in doc if token.has_vector] else np.zeros(nlp.vocab.vectors.shape[1])
 def calculate_cosine_similarity(vec1, vec2):
-    """
-    Calculates cosine similarity between two vectors.
-    Handles potential division by zero if vectors are zero vectors.
-    """
-    if np.all(vec1 == 0) or np.all(vec2 == 0):
-        return 0.0
     vec1 = vec1.reshape(1, -1)
     vec2 = vec2.reshape(1, -1)
     return cosine_similarity(vec1, vec2)[0][0]
-# --- Main Processing Pipeline for a Document (CV or Job Description) ---
 def analyze_document(doc_text):
-    """
-    Processes a document (CV or Job Description) for analysis.
-    """
     doc_spacy = nlp(doc_text)
     cleaned_text = preprocess_text(doc_text)
     extracted_skills, general_entities = extract_skills(doc_spacy, skill_keywords=predefined_skills_list)
     years_exp, education_level = extract_experience_and_education(doc_text)
     text_embedding = get_text_embeddings(cleaned_text)
     return {
-        "raw_text": doc_text,
-        "cleaned_text": cleaned_text,
-        "spacy_doc": doc_spacy,
-        "extracted_skills": extracted_skills,
-        "general_entities": general_entities,
-        "years_experience": years_exp,
-        "education_level": education_level,
         "text_embedding": text_embedding
     }
-# --- Matching and Scoring Logic ---
 def calculate_match_scores(cv_data, jd_data):
-    """
-    Calculates various match scores and identifies keyword overlaps.
-    """
     results = {}
-    # 1. Overall Semantic Similarity (using embeddings)
-    overall_similarity = calculate_cosine_similarity(
-        cv_data["text_embedding"],
-        jd_data["text_embedding"]
-    )
     results["overall_match_score"] = round(overall_similarity * 100, 2)
-    # 2. Skill Matching
     cv_skills = set(cv_data["extracted_skills"])
     jd_skills = set(jd_data["extracted_skills"])
     matched_skills = list(cv_skills.intersection(jd_skills))
     missing_skills = list(jd_skills.difference(cv_skills))
     extra_skills_in_cv = list(cv_skills.difference(jd_skills))
     results["matched_skills"] = matched_skills
     results["missing_skills"] = missing_skills
     results["extra_skills_in_cv"] = extra_skills_in_cv
-    if jd_skills:
-        skill_match_percentage = len(matched_skills) / len(jd_skills) * 100
-    else:
-        skill_match_percentage = 0.0
     results["skill_match_percentage"] = round(skill_match_percentage, 2)
-    # 3. Keyword Overlap (using TF-IDF for important words beyond specific skills)
     corpus = [cv_data["cleaned_text"], jd_data["cleaned_text"]]
     tfidf_vectorizer = TfidfVectorizer(max_features=100)
     tfidf_matrix = tfidf_vectorizer.fit_transform(corpus)
     feature_names = tfidf_vectorizer.get_feature_names_out()
     cv_tfidf_scores = {feature_names[i]: tfidf_matrix[0, i] for i in tfidf_matrix[0].nonzero()[1]}
     jd_tfidf_scores = {feature_names[i]: tfidf_matrix[1, i] for i in tfidf_matrix[1].nonzero()[1]}
     top_cv_keywords = sorted(cv_tfidf_scores.items(), key=lambda x: x[1], reverse=True)[:15]
     top_jd_keywords = sorted(jd_tfidf_scores.items(), key=lambda x: x[1], reverse=True)[:15]
     results["top_cv_keywords"] = [k for k,v in top_cv_keywords]
     results["top_jd_keywords"] = [k for k,v in top_jd_keywords]
     common_keywords = set(results["top_cv_keywords"]).intersection(set(results["top_jd_keywords"]))
     results["common_keywords"] = list(common_keywords)
-    # 4. Experience Matching
     cv_exp_years = cv_data["years_experience"]
     jd_exp_years = jd_data["years_experience"]
     results["cv_years_experience"] = cv_exp_years
     results["jd_years_experience"] = jd_exp_years
     exp_status = "Not specified by Job"
     if jd_exp_years > 0:
-        if cv_exp_years >= jd_exp_years:
-            exp_status = "Meets or Exceeds Requirement"
-        else:
-            exp_status = f"Below Requirement (Needs {jd_exp_years - cv_exp_years} more years)"
     results["experience_match_status"] = exp_status
-    # 5. Education Matching (simplified)
     cv_edu = cv_data["education_level"]
     jd_edu = jd_data["education_level"]
     results["cv_education_level"] = cv_edu
     results["jd_education_level"] = jd_edu
     edu_match_status = "Not Specified by Job"
-    if jd_edu != "Not Specified": # Only compare if JD specifies
         edu_order = {"Associate's": 1, "Bachelor's": 2, "Master's": 3, "Ph.D.": 4}
-        if edu_order.get(cv_edu, 0) >= edu_order.get(jd_edu, 0):
-            edu_match_status = "Meets or Exceeds Requirement"
-        else:
-            edu_match_status = "Below Requirement"
     results["education_match_status"] = edu_match_status
     return results
-# --- Overall Analysis Orchestrator ---
 def perform_cv_job_analysis(cv_text, job_desc_text):
-    """
-    Orchestrates the entire analysis process from raw text to results.
-    """
     cv_analysis_data = analyze_document(cv_text)
     job_desc_analysis_data = analyze_document(job_desc_text)
     match_results = calculate_match_scores(cv_analysis_data, job_desc_analysis_data)
     return match_results
-# --- Visualization Functions (Adjusted for Streamlit) ---
-# Each visualization function now returns a matplotlib figure object
-# and Streamlit's st.pyplot() is used to display it, then figure is closed.
 def create_overall_match_plot(score):
-    """Returns a matplotlib figure for overall match."""
     fig, ax = plt.subplots(figsize=(6, 2))
     sns.set_style("whitegrid")
     ax.barh(["Overall Match"], [score], color='skyblue')
@@ -318,153 +229,138 @@ def create_overall_match_plot(score):
     return fig
 def create_skill_match_plot(matched_skills, missing_skills):
-    """Returns a matplotlib figure for skill match breakdown."""
     labels = ['Matched Skills', 'Missing Skills']
     sizes = [len(matched_skills), len(missing_skills)]
     colors = ['#66b3ff', '#ff9999']
     explode = (0.05, 0.05) if sizes[0] > 0 and sizes[1] > 0 else (0,0)
-    if sum(sizes) == 0:
-        return None # Indicate no plot can be made
     fig, ax = plt.subplots(figsize=(7, 7))
-    ax.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%',
-           shadow=True, startangle=90, textprops={'fontsize': 12})
     ax.axis('equal')
     ax.set_title("Skill Match Breakdown", fontsize=14)
     plt.tight_layout()
     return fig
 def create_top_keywords_plot(cv_keywords, jd_keywords):
-    """Returns a matplotlib figure for top keywords."""
     fig, axes = plt.subplots(1, 2, figsize=(16, 6))
     sns.set_style("whitegrid")
     cv_df = pd.DataFrame(Counter(cv_keywords).most_common(10), columns=['Keyword', 'Count'])
     if not cv_df.empty:
         sns.barplot(x='Count', y='Keyword', data=cv_df, ax=axes[0], palette='viridis')
         axes[0].set_title('Top Keywords in CV', fontsize=14)
         axes[0].set_xlabel('Frequency/Importance', fontsize=12)
         axes[0].set_ylabel('')
     jd_df = pd.DataFrame(Counter(jd_keywords).most_common(10), columns=['Keyword', 'Count'])
     if not jd_df.empty:
         sns.barplot(x='Count', y='Keyword', data=jd_df, ax=axes[1], palette='plasma')
         axes[1].set_title('Top Keywords in Job Description', fontsize=14)
         axes[1].set_xlabel('Frequency/Importance', fontsize=12)
         axes[1].set_ylabel('')
     plt.tight_layout()
     return fig
-# --- Streamlit Application Layout ---
-st.set_page_config(page_title="CV-Job Match Analyzer", layout="wide", icon="👨‍💼")
-st.title("👨‍💼 CV-Job Match Analyzer 📈")
-st.markdown("""
-Welcome! This tool helps you understand how well a CV matches a job description.
-Upload a CV (PDF, DOCX, TXT) and paste the job description text to get an instant analysis.
-""")
-# Input for CV
-st.header("1. Upload Your CV")
-uploaded_cv_file = st.file_uploader("Choose a CV file (PDF, DOCX, TXT)", type=["pdf", "docx", "txt"], key="cv_uploader")
-cv_text_area = st.text_area("Or paste your CV text here (overrides file upload)", height=250, key="cv_text_area")
-cv_content = ""
-if uploaded_cv_file is not None:
-    if uploaded_cv_file.name.endswith('.pdf'):
-        cv_content = extract_text_from_pdf(uploaded_cv_file)
-    elif uploaded_cv_file.name.endswith('.docx'):
-        cv_content = extract_text_from_docx(uploaded_cv_file)
-    elif uploaded_cv_file.name.endswith('.txt'):
-        cv_content = uploaded_cv_file.read().decode("utf-8")
-    st.success("CV file uploaded and parsed successfully!")
-elif cv_text_area: # If text area has content and no file uploaded
-    cv_content = cv_text_area
-# Input for Job Description
-st.header("2. Input Job Description")
-job_desc_text_area = st.text_area("Paste the Job Description text here", height=250, key="jd_text_area")
-# Analyze Button
-st.markdown("---")
-if st.button("✨ Analyze CV Match ✨", use_container_width=True):
     if not cv_content:
-        st.error("🚨 Please upload a CV file or paste your CV text to proceed.")
-    if not job_desc_text_area:
-        st.error("🚨 Please paste the Job Description text to proceed.")
-    if cv_content and job_desc_text_area:
-        with st.spinner("🚀 Analyzing your documents... This might take a moment!"):
-            try:
-                analysis_results = perform_cv_job_analysis(cv_content, job_desc_text_area)
-                st.subheader("💡 Analysis Results Summary 💡")
-                # Display KPIs in columns
-                col1, col2, col3 = st.columns(3)
-                with col1:
-                    st.metric(label="Overall Match Score", value=f"{analysis_results['overall_match_score']}%")
-                with col2:
-                    st.metric(label="Skill Match", value=f"{analysis_results['skill_match_percentage']}%")
-                with col3:
-                    exp_status = analysis_results['experience_match_status']
-                    if "Meets" in exp_status or "Exceeds" in exp_status:
-                        st.metric(label="Experience Match", value=exp_status, delta="Good!")
-                    else:
-                        st.metric(label="Experience Match", value=exp_status, delta="Needs attention")
-                st.markdown("---")
-                st.subheader("📊 Visual Insights")
-                # Overall Match Plot
-                fig_overall = create_overall_match_plot(analysis_results['overall_match_score'])
-                st.pyplot(fig_overall)
-                plt.close(fig_overall) # Close figure to free memory
-                # Skill Match Plot
-                fig_skill = create_skill_match_plot(analysis_results['matched_skills'], analysis_results['missing_skills'])
-                if fig_skill:
-                    st.pyplot(fig_skill)
-                    plt.close(fig_skill)
-                else:
-                    st.info("No specific skills identified in the job description for a detailed skill match breakdown.")
-                # Top Keywords Plot
-                fig_keywords = create_top_keywords_plot(analysis_results['top_cv_keywords'], analysis_results['top_jd_keywords'])
-                st.pyplot(fig_keywords)
-                plt.close(fig_keywords)
-                st.markdown("---")
-                st.subheader("📝 Detailed Breakdown")
-                st.markdown("#### Skills Analysis")
-                st.write(f"**✅ Matched Skills:**", ", ".join(analysis_results['matched_skills']) if analysis_results['matched_skills'] else "None found matching job description.")
-                st.write(f"**❌ Missing Skills (from Job Description):**", ", ".join(analysis_results['missing_skills']) if analysis_results['missing_skills'] else "🥳 None! Your CV has all specified skills.")
-                st.write(f"**💡 Extra Skills in CV (not in Job Description):**", ", ".join(analysis_results['extra_skills_in_cv']) if analysis_results['extra_skills_in_cv'] else "None. (This is often fine, showing broader capability.)")
-                st.markdown("#### Keyword Relevance (Top TF-IDF Terms)")
-                st.write(f"**🤝 Top Common Keywords:**", ", ".join(analysis_results['common_keywords']) if analysis_results['common_keywords'] else "No significant common keywords beyond skills.")
-                st.write(f"**🔍 Top Keywords in Your CV:**", ", ".join(analysis_results['top_cv_keywords']) if analysis_results['top_cv_keywords'] else "N/A")
-                st.write(f"**🎯 Top Keywords in Job Description:**", ", ".join(analysis_results['top_jd_keywords']) if analysis_results['top_jd_keywords'] else "N/A")
-                st.markdown("#### Experience & Education Comparison")
-                st.write(f"**👤 Your CV's Experience:** `{analysis_results['cv_years_experience']}` years")
-                st.write(f"**💼 Job's Required Experience:** `{analysis_results['jd_years_experience']}` years")
-                st.info(f"**Status:** {analysis_results['experience_match_status']}")
-                st.write(f"**🎓 Your CV's Education:** `{analysis_results['cv_education_level']}`")
-                st.write(f"**📚 Job's Required Education:** `{analysis_results['jd_education_level']}`")
-                st.info(f"**Status:** {analysis_results['education_match_status']}")
-            except Exception as e:
-                st.error(f"An unexpected error occurred during analysis: {e}")
-                st.exception(e) # Show full traceback in Streamlit debug logs
-st.markdown("---")
-st.markdown("Developed with ❤️ for Data Science by your mentor")

+import gradio as gr
 import os
 import io
 import re
 from docx import Document
+from PyPDF2 import PdfReader
 import pandas as pd
 import spacy
 from collections import Counter
 import numpy as np
 # --- SpaCy Model Loading ---
+# For Gradio on Hugging Face Spaces, the model is usually installed via requirements.txt
+# so spacy.load() will find it.
+try:
+    nlp = spacy.load("en_core_web_lg")
+    print("SpaCy model loaded successfully.")
+except Exception as e:
+    print(f"Error loading spaCy model: {e}. Please ensure 'en_core_web_lg' is correctly installed via requirements.txt.")
+    # In a Gradio app, you might raise an error or display a message in the UI
+    # For now, let's just print to logs if it fails to load at startup.
+# --- Global Predefined Skills ---
 predefined_skills_list = set([
     "python", "tensorflow", "pytorch", "scikit-learn", "numpy", "pandas",
     "docker", "kubernetes", "aws", "git", "sql", "java", "r", "tableau",
 ])
 predefined_skills_list.update([
     "machine learning engineer", "data scientist", "ai engineer", "deep learning engineer",
+    "senior machine learning engineer", "junior data scientist",
     "data engineer", "software engineer", "full stack", "frontend", "backend"
 ])
+# --- Text Extraction Functions (Adapted for file paths in Gradio's File component) ---
+# Gradio's gr.File component provides a file path to the temporary uploaded file.
+def extract_text_from_pdf(pdf_path):
     """
+    Extracts text from a PDF file given its path.
     """
     try:
+        with open(pdf_path, 'rb') as file:
+            reader = PdfReader(file)
+            text = ""
+            for page in reader.pages:
+                text += page.extract_text() or ""
         return text
     except Exception as e:
+        print(f"Error reading PDF {pdf_path}: {e}") # Will print to Gradio logs
         return ""
+def extract_text_from_docx(docx_path):
     """
+    Extracts text from a DOCX file given its path.
     """
     try:
+        document = Document(docx_path)
         text = "\n".join([paragraph.text for paragraph in document.paragraphs])
         return text
     except Exception as e:
+        print(f"Error reading DOCX {docx_path}: {e}") # Will print to Gradio logs
         return ""
+def get_file_content(file_obj):
+    """Helper to get content from Gradio's file component."""
+    if file_obj is None:
+        return ""
+    file_path = file_obj.name # Gradio file component gives path in .name attribute
+    if file_path.endswith('.pdf'):
+        return extract_text_from_pdf(file_path)
+    elif file_path.endswith('.docx'):
+        return extract_text_from_docx(file_path)
+    elif file_path.endswith('.txt'):
+        with open(file_path, 'r', encoding='utf-8') as f:
+            return f.read()
+    else:
+        return ""
+# --- Text Preprocessing Functions (same as before) ---
 def preprocess_text(text):
+    if not isinstance(text, str): return ""
     text = text.lower()
     text = re.sub(r'\s+', ' ', text).strip()
     doc = nlp(text)
+    processed_tokens = [token.lemma_ for token in doc if not token.is_stop and not token.is_punct and not token.is_space]
     return " ".join(processed_tokens)
+# --- Information Extraction (NER & Keyword Extraction) (same as before) ---
 def extract_skills(text_doc, skill_keywords=None):
     extracted_skills = []
+    if skill_keywords is None: skill_keywords = set()
     doc_text = text_doc.text.lower()
     for skill in skill_keywords:
         if re.search(r'\b' + re.escape(skill) + r'\b', doc_text):
             extracted_skills.append(skill)
     entities = {}
     for ent in text_doc.ents:
+        if ent.label_ == "ORG": entities.setdefault("organizations", []).append(ent.text)
+        elif ent.label_ == "GPE": entities.setdefault("locations", []).append(ent.text)
+        elif ent.label_ == "DATE": entities.setdefault("dates", []).append(ent.text)
+        elif ent.label_ == "PERSON": entities.setdefault("people", []).append(ent.text)
     return list(set(extracted_skills)), entities
 def extract_experience_and_education(text):
     years_experience = 0
     education_level = "Not Specified"
     exp_matches = re.findall(r'(\d+)\s*(?:\+|plus)?\s*years?\s+of\s+experience|\d+\s*yrs?\s+exp', text.lower())
     if exp_matches:
         try:
             years_experience = max([int(re.findall(r'\d+', m)[0]) for m in exp_matches if re.findall(r'\d+', m)])
+        except (ValueError, IndexError): pass
     text_lower = text.lower()
+    if "phd" in text_lower or "doctorate" in text_lower: education_level = "Ph.D."
+    elif "master" in text_lower or "m.s." in text_lower or "msc" in text_lower: education_level = "Master's"
+    elif "bachelor" in text_lower or "b.s." in text_lower or "bsc" in text_lower: education_level = "Bachelor's"
+    elif "associate" in text_lower: education_level = "Associate's"
     return years_experience, education_level
+# --- Feature Engineering (same as before) ---
 def get_text_embeddings(text):
+    if not text: return np.zeros(nlp.vocab.vectors.shape[1])
     doc = nlp(text)
+    if doc.has_vector: return doc.vector
+    else: return np.mean([token.vector for token in doc if token.has_vector], axis=0) if [token.vector for token in doc if token.has_vector] else np.zeros(nlp.vocab.vectors.shape[1])
 def calculate_cosine_similarity(vec1, vec2):
+    if np.all(vec1 == 0) or np.all(vec2 == 0): return 0.0
     vec1 = vec1.reshape(1, -1)
     vec2 = vec2.reshape(1, -1)
     return cosine_similarity(vec1, vec2)[0][0]
+# --- Main Processing Pipeline for a Document (CV or Job Description) (same as before) ---
 def analyze_document(doc_text):
     doc_spacy = nlp(doc_text)
     cleaned_text = preprocess_text(doc_text)
     extracted_skills, general_entities = extract_skills(doc_spacy, skill_keywords=predefined_skills_list)
     years_exp, education_level = extract_experience_and_education(doc_text)
     text_embedding = get_text_embeddings(cleaned_text)
     return {
+        "raw_text": doc_text, "cleaned_text": cleaned_text, "spacy_doc": doc_spacy,
+        "extracted_skills": extracted_skills, "general_entities": general_entities,
+        "years_experience": years_exp, "education_level": education_level,
         "text_embedding": text_embedding
     }
+# --- Matching and Scoring Logic (same as before) ---
 def calculate_match_scores(cv_data, jd_data):
     results = {}
+    overall_similarity = calculate_cosine_similarity(cv_data["text_embedding"], jd_data["text_embedding"])
     results["overall_match_score"] = round(overall_similarity * 100, 2)
     cv_skills = set(cv_data["extracted_skills"])
     jd_skills = set(jd_data["extracted_skills"])
     matched_skills = list(cv_skills.intersection(jd_skills))
     missing_skills = list(jd_skills.difference(cv_skills))
     extra_skills_in_cv = list(cv_skills.difference(jd_skills))
     results["matched_skills"] = matched_skills
     results["missing_skills"] = missing_skills
     results["extra_skills_in_cv"] = extra_skills_in_cv
+    if jd_skills: skill_match_percentage = len(matched_skills) / len(jd_skills) * 100
+    else: skill_match_percentage = 0.0
     results["skill_match_percentage"] = round(skill_match_percentage, 2)
     corpus = [cv_data["cleaned_text"], jd_data["cleaned_text"]]
     tfidf_vectorizer = TfidfVectorizer(max_features=100)
     tfidf_matrix = tfidf_vectorizer.fit_transform(corpus)
     feature_names = tfidf_vectorizer.get_feature_names_out()
     cv_tfidf_scores = {feature_names[i]: tfidf_matrix[0, i] for i in tfidf_matrix[0].nonzero()[1]}
     jd_tfidf_scores = {feature_names[i]: tfidf_matrix[1, i] for i in tfidf_matrix[1].nonzero()[1]}
     top_cv_keywords = sorted(cv_tfidf_scores.items(), key=lambda x: x[1], reverse=True)[:15]
     top_jd_keywords = sorted(jd_tfidf_scores.items(), key=lambda x: x[1], reverse=True)[:15]
     results["top_cv_keywords"] = [k for k,v in top_cv_keywords]
     results["top_jd_keywords"] = [k for k,v in top_jd_keywords]
     common_keywords = set(results["top_cv_keywords"]).intersection(set(results["top_jd_keywords"]))
     results["common_keywords"] = list(common_keywords)
     cv_exp_years = cv_data["years_experience"]
     jd_exp_years = jd_data["years_experience"]
     results["cv_years_experience"] = cv_exp_years
     results["jd_years_experience"] = jd_exp_years
     exp_status = "Not specified by Job"
     if jd_exp_years > 0:
+        if cv_exp_years >= jd_exp_years: exp_status = "Meets or Exceeds Requirement"
+        else: exp_status = f"Below Requirement (Needs {jd_exp_years - cv_exp_years} more years)"
     results["experience_match_status"] = exp_status
     cv_edu = cv_data["education_level"]
     jd_edu = jd_data["education_level"]
     results["cv_education_level"] = cv_edu
     results["jd_education_level"] = jd_edu
     edu_match_status = "Not Specified by Job"
+    if jd_edu != "Not Specified":
         edu_order = {"Associate's": 1, "Bachelor's": 2, "Master's": 3, "Ph.D.": 4}
+        if edu_order.get(cv_edu, 0) >= edu_order.get(jd_edu, 0): edu_match_status = "Meets or Exceeds Requirement"
+        else: edu_match_status = "Below Requirement"
     results["education_match_status"] = edu_match_status
     return results
+# --- Overall Analysis Orchestrator (same as before) ---
 def perform_cv_job_analysis(cv_text, job_desc_text):
     cv_analysis_data = analyze_document(cv_text)
     job_desc_analysis_data = analyze_document(job_desc_text)
     match_results = calculate_match_scores(cv_analysis_data, job_desc_analysis_data)
     return match_results
+# --- Visualization Functions (Returns figure object) ---
 def create_overall_match_plot(score):
     fig, ax = plt.subplots(figsize=(6, 2))
     sns.set_style("whitegrid")
     ax.barh(["Overall Match"], [score], color='skyblue')
     return fig
 def create_skill_match_plot(matched_skills, missing_skills):
     labels = ['Matched Skills', 'Missing Skills']
     sizes = [len(matched_skills), len(missing_skills)]
     colors = ['#66b3ff', '#ff9999']
     explode = (0.05, 0.05) if sizes[0] > 0 and sizes[1] > 0 else (0,0)
+    if sum(sizes) == 0: return None
     fig, ax = plt.subplots(figsize=(7, 7))
+    ax.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%', shadow=True, startangle=90, textprops={'fontsize': 12})
     ax.axis('equal')
     ax.set_title("Skill Match Breakdown", fontsize=14)
     plt.tight_layout()
     return fig
 def create_top_keywords_plot(cv_keywords, jd_keywords):
     fig, axes = plt.subplots(1, 2, figsize=(16, 6))
     sns.set_style("whitegrid")
     cv_df = pd.DataFrame(Counter(cv_keywords).most_common(10), columns=['Keyword', 'Count'])
     if not cv_df.empty:
         sns.barplot(x='Count', y='Keyword', data=cv_df, ax=axes[0], palette='viridis')
         axes[0].set_title('Top Keywords in CV', fontsize=14)
         axes[0].set_xlabel('Frequency/Importance', fontsize=12)
         axes[0].set_ylabel('')
     jd_df = pd.DataFrame(Counter(jd_keywords).most_common(10), columns=['Keyword', 'Count'])
     if not jd_df.empty:
         sns.barplot(x='Count', y='Keyword', data=jd_df, ax=axes[1], palette='plasma')
         axes[1].set_title('Top Keywords in Job Description', fontsize=14)
         axes[1].set_xlabel('Frequency/Importance', fontsize=12)
         axes[1].set_ylabel('')
     plt.tight_layout()
     return fig
+# --- Main Gradio Interface Function ---
+def analyze_cv_match(cv_file_obj, cv_text_input, jd_text_input):
+    """
+    This function will be called by Gradio's Interface.
+    It takes Gradio inputs and returns Gradio outputs (HTML, plots).
+    """
+    cv_content = ""
+    # Prioritize file upload over text area if both are provided
+    if cv_file_obj is not None:
+        cv_content = get_file_content(cv_file_obj)
+    elif cv_text_input:
+        cv_content = cv_text_input
     if not cv_content:
+        return "<h4><p style='color:red;'>🚨 Error: Please upload a CV file or paste your CV text.</p></h4>", None, None, None, ""
+    if not jd_text_input:
+        return "<h4><p style='color:red;'>🚨 Error: Please paste the Job Description text.</p></h4>", None, None, None, ""
+    try:
+        analysis_results = perform_cv_job_analysis(cv_content, jd_text_input)
+        # Generate HTML output for KPIs and detailed breakdown
+        html_output = f"""
+        <h2>💡 Analysis Results Summary 💡</h2>
+        <div style='display: flex; justify-content: space-around; flex-wrap: wrap; text-align: center;'>
+            <div style='background-color: #e0f7fa; padding: 15px; border-radius: 8px; margin: 5px; min-width: 200px;'>
+                <h3>Overall Match Score</h3>
+                <h1 style='color: #007bb6;'>{analysis_results['overall_match_score']}%</h1>
+            </div>
+            <div style='background-color: #e8f5e9; padding: 15px; border-radius: 8px; margin: 5px; min-width: 200px;'>
+                <h3>Skill Match</h3>
+                <h1 style='color: #43a047;'>{analysis_results['skill_match_percentage']}%</h1>
+            </div>
+            <div style='background-color: #fff3e0; padding: 15px; border-radius: 8px; margin: 5px; min-width: 200px;'>
+                <h3>Experience Match</h3>
+                <h1 style='color: #fb8c00;'>{analysis_results['experience_match_status']}</h1>
+            </div>
+        </div>
+        <hr/>
+        <h2>📝 Detailed Breakdown</h2>
+        <h4>Skills Analysis</h4>
+        <p><strong>✅ Matched Skills:</strong> {', '.join(analysis_results['matched_skills']) if analysis_results['matched_skills'] else 'None found matching job description.'}</p>
+        <p><strong>❌ Missing Skills (from Job Description):</strong> {', '.join(analysis_results['missing_skills']) if analysis_results['missing_skills'] else '🥳 None! Your CV has all specified skills.'}</p>
+        <p><strong>💡 Extra Skills in CV (not in Job Description):</strong> {', '.join(analysis_results['extra_skills_in_cv']) if analysis_results['extra_skills_in_cv'] else 'None. (This is often fine, showing broader capability.)'}</p>
+        <h4>Keyword Relevance (Top TF-IDF Terms)</h4>
+        <p><strong>🤝 Top Common Keywords:</strong> {', '.join(analysis_results['common_keywords']) if analysis_results['common_keywords'] else 'No significant common keywords beyond skills.'}</p>
+        <p><strong>🔍 Top Keywords in Your CV:</strong> {', '.join(analysis_results['top_cv_keywords']) if analysis_results['top_cv_keywords'] else 'N/A'}</p>
+        <p><strong>🎯 Top Keywords in Job Description:</strong> {', '.join(analysis_results['top_jd_keywords']) if analysis_results['top_jd_keywords'] else 'N/A'}</p>
+        <h4>Experience & Education Comparison</h4>
+        <p><strong>👤 Your CV's Experience:</strong> <code>{analysis_results['cv_years_experience']}</code> years</p>
+        <p><strong>💼 Job's Required Experience:</strong> <code>{analysis_results['jd_years_experience']}</code> years</p>
+        <p style='color:green;'><strong>Status:</strong> {analysis_results['experience_match_status']}</p>
+        <p><strong>🎓 Your CV's Education:</strong> <code>{analysis_results['cv_education_level']}</code></p>
+        <p><strong>📚 Job's Required Education:</strong> <code>{analysis_results['jd_education_level']}</code></p>
+        <p style='color:green;'><strong>Status:</strong> {analysis_results['education_match_status']}</p>
+        """
+        # Generate plots
+        overall_plot = create_overall_match_plot(analysis_results['overall_match_score'])
+        skill_plot = create_skill_match_plot(analysis_results['matched_skills'], analysis_results['missing_skills'])
+        keywords_plot = create_top_keywords_plot(analysis_results['top_cv_keywords'], analysis_results['top_jd_keywords'])
+        # Gradio can return multiple outputs. For plots, it expects the figure objects.
+        return html_output, overall_plot, skill_plot, keywords_plot, "Analysis Complete!"
+    except Exception as e:
+        import traceback
+        error_traceback = traceback.format_exc()
+        return (f"<h4><p style='color:red;'>An unexpected error occurred during analysis: {e}</p></h4>"
+                f"<details><summary>Click for details</summary><pre>{error_traceback}</pre></details>"), None, None, None, "Analysis Failed"
+# --- Gradio Interface Definition ---
+# Define the input components
+inputs = [
+    gr.File(label="1. Upload Your CV (PDF, DOCX, TXT)", file_types=[".pdf", ".docx", ".txt"]),
+    gr.Textbox(label="Or paste your CV text here", lines=10, placeholder="Paste your CV content here..."),
+    gr.Textbox(label="2. Paste the Job Description text here", lines=10, placeholder="Paste the job description content here...")
+]
+# Define the output components
+outputs = [
+    gr.HTML(label="Analysis Report"), # For text-based KPIs and detailed breakdown
+    gr.Plot(label="Overall Match Score"), # For the first plot
+    gr.Plot(label="Skill Match Breakdown"), # For the second plot
+    gr.Plot(label="Top Keywords") # For the third plot
+    # Gradio also returns the status message in the bottom right
+]
+# Create the Gradio Interface
+gr.Interface(
+    fn=analyze_cv_match, # Our main analysis function
+    inputs=inputs,
+    outputs=outputs,
+    title="👨‍💼 CV-Job Match Analyzer 📈",
+    description="Upload your CV and paste a job description to get an instant compatibility analysis with charts and key insights. "
+                "Developed by your mentor (A.I.).",
+    allow_flagging="never", # Disable flagging feature
+    theme=gr.themes.Soft() # A nice, modern theme
+).launch()