Spaces:

Bur3hani
/

cview

Sleeping

App Files Files Community

Bur3hani commited on Jul 18, 2025

Commit

489b15d

verified ·

1 Parent(s): 86f4072

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -45

app.py CHANGED Viewed

@@ -14,15 +14,11 @@ import seaborn as sns
 import numpy as np
 # --- SpaCy Model Loading ---
-# For Gradio on Hugging Face Spaces, the model is usually installed via requirements.txt
-# so spacy.load() will find it.
 try:
     nlp = spacy.load("en_core_web_lg")
     print("SpaCy model loaded successfully.")
 except Exception as e:
     print(f"Error loading spaCy model: {e}. Please ensure 'en_core_web_lg' is correctly installed via requirements.txt.")
-    # In a Gradio app, you might raise an error or display a message in the UI
-    # For now, let's just print to logs if it fails to load at startup.
 # --- Global Predefined Skills ---
 predefined_skills_list = set([
@@ -46,12 +42,8 @@ predefined_skills_list.update([
     "data engineer", "software engineer", "full stack", "frontend", "backend"
 ])
-# --- Text Extraction Functions (Adapted for file paths in Gradio's File component) ---
-# Gradio's gr.File component provides a file path to the temporary uploaded file.
 def extract_text_from_pdf(pdf_path):
-    """
-    Extracts text from a PDF file given its path.
-    """
     try:
         with open(pdf_path, 'rb') as file:
             reader = PdfReader(file)
@@ -60,26 +52,22 @@ def extract_text_from_pdf(pdf_path):
                 text += page.extract_text() or ""
         return text
     except Exception as e:
-        print(f"Error reading PDF {pdf_path}: {e}") # Will print to Gradio logs
         return ""
 def extract_text_from_docx(docx_path):
-    """
-    Extracts text from a DOCX file given its path.
-    """
     try:
         document = Document(docx_path)
         text = "\n".join([paragraph.text for paragraph in document.paragraphs])
         return text
     except Exception as e:
-        print(f"Error reading DOCX {docx_path}: {e}") # Will print to Gradio logs
         return ""
 def get_file_content(file_obj):
-    """Helper to get content from Gradio's file component."""
     if file_obj is None:
         return ""
-    file_path = file_obj.name # Gradio file component gives path in .name attribute
     if file_path.endswith('.pdf'):
         return extract_text_from_pdf(file_path)
     elif file_path.endswith('.docx'):
@@ -90,7 +78,7 @@ def get_file_content(file_obj):
     else:
         return ""
-# --- Text Preprocessing Functions (same as before) ---
 def preprocess_text(text):
     if not isinstance(text, str): return ""
     text = text.lower()
@@ -99,7 +87,7 @@ def preprocess_text(text):
     processed_tokens = [token.lemma_ for token in doc if not token.is_stop and not token.is_punct and not token.is_space]
     return " ".join(processed_tokens)
-# --- Information Extraction (NER & Keyword Extraction) (same as before) ---
 def extract_skills(text_doc, skill_keywords=None):
     extracted_skills = []
     if skill_keywords is None: skill_keywords = set()
@@ -130,7 +118,7 @@ def extract_experience_and_education(text):
     elif "associate" in text_lower: education_level = "Associate's"
     return years_experience, education_level
-# --- Feature Engineering (same as before) ---
 def get_text_embeddings(text):
     if not text: return np.zeros(nlp.vocab.vectors.shape[1])
     doc = nlp(text)
@@ -143,7 +131,7 @@ def calculate_cosine_similarity(vec1, vec2):
     vec2 = vec2.reshape(1, -1)
     return cosine_similarity(vec1, vec2)[0][0]
-# --- Main Processing Pipeline for a Document (CV or Job Description) (same as before) ---
 def analyze_document(doc_text):
     doc_spacy = nlp(doc_text)
     cleaned_text = preprocess_text(doc_text)
@@ -157,7 +145,7 @@ def analyze_document(doc_text):
         "text_embedding": text_embedding
     }
-# --- Matching and Scoring Logic (same as before) ---
 def calculate_match_scores(cv_data, jd_data):
     results = {}
     overall_similarity = calculate_cosine_similarity(cv_data["text_embedding"], jd_data["text_embedding"])
@@ -206,14 +194,14 @@ def calculate_match_scores(cv_data, jd_data):
     results["education_match_status"] = edu_match_status
     return results
-# --- Overall Analysis Orchestrator (same as before) ---
 def perform_cv_job_analysis(cv_text, job_desc_text):
     cv_analysis_data = analyze_document(cv_text)
     job_desc_analysis_data = analyze_document(job_desc_text)
     match_results = calculate_match_scores(cv_analysis_data, job_desc_analysis_data)
     return match_results
-# --- Visualization Functions (Returns figure object) ---
 def create_overall_match_plot(score):
     fig, ax = plt.subplots(figsize=(6, 2))
     sns.set_style("whitegrid")
@@ -259,12 +247,7 @@ def create_top_keywords_plot(cv_keywords, jd_keywords):
 # --- Main Gradio Interface Function ---
 def analyze_cv_match(cv_file_obj, cv_text_input, jd_text_input):
-    """
-    This function will be called by Gradio's Interface.
-    It takes Gradio inputs and returns Gradio outputs (HTML, plots).
-    """
     cv_content = ""
-    # Prioritize file upload over text area if both are provided
     if cv_file_obj is not None:
         cv_content = get_file_content(cv_file_obj)
     elif cv_text_input:
@@ -277,7 +260,6 @@ def analyze_cv_match(cv_file_obj, cv_text_input, jd_text_input):
                 None, None, None, "Analysis Failed")
     try:
         analysis_results = perform_cv_job_analysis(cv_content, jd_text_input)
-        # Generate HTML output for KPIs and detailed breakdown
         html_output = f"""
         <h2 style='text-align: center;'>💡 Analysis Results Summary 💡</h2>
         <div style='display: flex; justify-content: space-around; flex-wrap: wrap; text-align: center; margin-bottom: 20px;'>
@@ -312,23 +294,23 @@ def analyze_cv_match(cv_file_obj, cv_text_input, jd_text_input):
         <p><strong>📚 Job's Required Education:</strong> <code>{analysis_results['jd_education_level']}</code></p>
         <p style='color:green;'><strong>Status:</strong> {analysis_results['education_match_status']}</p>
         """
-        # Generate plots
         overall_plot = create_overall_match_plot(analysis_results['overall_match_score'])
         skill_plot = create_skill_match_plot(analysis_results['matched_skills'], analysis_results['missing_skills'])
         keywords_plot = create_top_keywords_plot(analysis_results['top_cv_keywords'], analysis_results['top_jd_keywords'])
-        # Return all outputs in the correct order
         return html_output, overall_plot, skill_plot, keywords_plot, "Analysis Complete!"
     except Exception as e:
         import traceback
         error_traceback = traceback.format_exc()
-        # Return empty plots/HTML on error
         return (f"<h4><p style='color:red;'>An unexpected error occurred during analysis: {e}</p></h4>"
                 f"<details><summary>Click for details</summary><pre>{error_traceback}</pre></details>",
                 None, None, None, "Analysis Failed")
 # --- Gradio Interface Definition ---
-# Use gr.Blocks for more flexibility in layout
 with gr.Blocks(theme=gr.themes.Soft(), title="CV-Job Match Analyzer") as demo:
     gr.Markdown(
         """
         # 👨‍💼 CV-Job Match Analyzer 📈
@@ -336,29 +318,27 @@ with gr.Blocks(theme=gr.themes.Soft(), title="CV-Job Match Analyzer") as demo:
         Upload a CV (PDF, DOCX, TXT) and paste the job description text to get an instant analysis.
         """
     )
-    with gr.Row(): # Arrange inputs and outputs in two columns
-        with gr.Column(scale=1, min_width=400): # Left column for inputs
-            gr.Markdown("## **1. Your CV**") # Section title for CV input
             cv_file_obj = gr.File(label="Upload CV (PDF, DOCX, TXT)", file_types=[".pdf", ".docx", ".txt"])
             cv_text_input = gr.Textbox(label="Or paste CV text here (overrides file upload)", lines=10, placeholder="Paste your CV content here...")
-            gr.Markdown("## **2. Job Description**") # Section title for JD input
             jd_text_input = gr.Textbox(label="Paste the Job Description text here", lines=10, placeholder="Paste the job description content here...")
-            with gr.Row(): # Buttons in a row
                 analyze_button = gr.Button("✨ Analyze CV Match ✨", variant="primary", scale=1)
                 clear_button = gr.ClearButton([cv_file_obj, cv_text_input, jd_text_input], scale=1)
-        with gr.Column(scale=2, min_width=600): # Right column for outputs (plots and HTML report)
-            output_html = gr.HTML(label="Analysis Report") # This will show the text-based KPIs and detailed breakdown
-            gr.Markdown("## **📊 Visual Insights**") # Title for the plots section
-            output_overall_plot = gr.Plot(label="Overall Match Score") # Plots will appear here
             output_skill_plot = gr.Plot(label="Skill Match Breakdown")
             output_keywords_plot = gr.Plot(label="Top Keywords")
-    # Define the action when the button is clicked
     analyze_button.click(
         fn=analyze_cv_match,
         inputs=[cv_file_obj, cv_text_input, jd_text_input],
         outputs=[output_html, output_overall_plot, output_skill_plot, output_keywords_plot, gr.State(value="")],
-        # The last output for gr.State is a dummy. It needs to match the number of return values from analyze_cv_match.
-        # Gradio uses the last return value for the status bar if it's a string, or you can explicitly link it.
     )
 demo.launch()

 import numpy as np
 # --- SpaCy Model Loading ---
 try:
     nlp = spacy.load("en_core_web_lg")
     print("SpaCy model loaded successfully.")
 except Exception as e:
     print(f"Error loading spaCy model: {e}. Please ensure 'en_core_web_lg' is correctly installed via requirements.txt.")
 # --- Global Predefined Skills ---
 predefined_skills_list = set([
     "data engineer", "software engineer", "full stack", "frontend", "backend"
 ])
+# --- Text Extraction Functions ---
 def extract_text_from_pdf(pdf_path):
     try:
         with open(pdf_path, 'rb') as file:
             reader = PdfReader(file)
                 text += page.extract_text() or ""
         return text
     except Exception as e:
+        print(f"Error reading PDF {pdf_path}: {e}")
         return ""
 def extract_text_from_docx(docx_path):
     try:
         document = Document(docx_path)
         text = "\n".join([paragraph.text for paragraph in document.paragraphs])
         return text
     except Exception as e:
+        print(f"Error reading DOCX {docx_path}: {e}")
         return ""
 def get_file_content(file_obj):
     if file_obj is None:
         return ""
+    file_path = file_obj.name
     if file_path.endswith('.pdf'):
         return extract_text_from_pdf(file_path)
     elif file_path.endswith('.docx'):
     else:
         return ""
+# --- Text Preprocessing Functions ---
 def preprocess_text(text):
     if not isinstance(text, str): return ""
     text = text.lower()
     processed_tokens = [token.lemma_ for token in doc if not token.is_stop and not token.is_punct and not token.is_space]
     return " ".join(processed_tokens)
+# --- Information Extraction ---
 def extract_skills(text_doc, skill_keywords=None):
     extracted_skills = []
     if skill_keywords is None: skill_keywords = set()
     elif "associate" in text_lower: education_level = "Associate's"
     return years_experience, education_level
+# --- Feature Engineering ---
 def get_text_embeddings(text):
     if not text: return np.zeros(nlp.vocab.vectors.shape[1])
     doc = nlp(text)
     vec2 = vec2.reshape(1, -1)
     return cosine_similarity(vec1, vec2)[0][0]
+# --- Main Processing Pipeline ---
 def analyze_document(doc_text):
     doc_spacy = nlp(doc_text)
     cleaned_text = preprocess_text(doc_text)
         "text_embedding": text_embedding
     }
+# --- Matching and Scoring Logic ---
 def calculate_match_scores(cv_data, jd_data):
     results = {}
     overall_similarity = calculate_cosine_similarity(cv_data["text_embedding"], jd_data["text_embedding"])
     results["education_match_status"] = edu_match_status
     return results
+# --- Overall Analysis Orchestrator ---
 def perform_cv_job_analysis(cv_text, job_desc_text):
     cv_analysis_data = analyze_document(cv_text)
     job_desc_analysis_data = analyze_document(job_desc_text)
     match_results = calculate_match_scores(cv_analysis_data, job_desc_analysis_data)
     return match_results
+# --- Visualization Functions ---
 def create_overall_match_plot(score):
     fig, ax = plt.subplots(figsize=(6, 2))
     sns.set_style("whitegrid")
 # --- Main Gradio Interface Function ---
 def analyze_cv_match(cv_file_obj, cv_text_input, jd_text_input):
     cv_content = ""
     if cv_file_obj is not None:
         cv_content = get_file_content(cv_file_obj)
     elif cv_text_input:
                 None, None, None, "Analysis Failed")
     try:
         analysis_results = perform_cv_job_analysis(cv_content, jd_text_input)
         html_output = f"""
         <h2 style='text-align: center;'>💡 Analysis Results Summary 💡</h2>
         <div style='display: flex; justify-content: space-around; flex-wrap: wrap; text-align: center; margin-bottom: 20px;'>
         <p><strong>📚 Job's Required Education:</strong> <code>{analysis_results['jd_education_level']}</code></p>
         <p style='color:green;'><strong>Status:</strong> {analysis_results['education_match_status']}</p>
         """
         overall_plot = create_overall_match_plot(analysis_results['overall_match_score'])
         skill_plot = create_skill_match_plot(analysis_results['matched_skills'], analysis_results['missing_skills'])
         keywords_plot = create_top_keywords_plot(analysis_results['top_cv_keywords'], analysis_results['top_jd_keywords'])
         return html_output, overall_plot, skill_plot, keywords_plot, "Analysis Complete!"
     except Exception as e:
         import traceback
         error_traceback = traceback.format_exc()
         return (f"<h4><p style='color:red;'>An unexpected error occurred during analysis: {e}</p></h4>"
                 f"<details><summary>Click for details</summary><pre>{error_traceback}</pre></details>",
                 None, None, None, "Analysis Failed")
 # --- Gradio Interface Definition ---
 with gr.Blocks(theme=gr.themes.Soft(), title="CV-Job Match Analyzer") as demo:
+    # THIS IS THE NEW LINE TO ADD EMPTY SPACE AT THE TOP
+    gr.Markdown("<br>" * 5)
     gr.Markdown(
         """
         # 👨‍💼 CV-Job Match Analyzer 📈
         Upload a CV (PDF, DOCX, TXT) and paste the job description text to get an instant analysis.
         """
     )
+    with gr.Row():
+        with gr.Column(scale=1, min_width=400):
+            gr.Markdown("## **1. Your CV**")
             cv_file_obj = gr.File(label="Upload CV (PDF, DOCX, TXT)", file_types=[".pdf", ".docx", ".txt"])
             cv_text_input = gr.Textbox(label="Or paste CV text here (overrides file upload)", lines=10, placeholder="Paste your CV content here...")
+            gr.Markdown("## **2. Job Description**")
             jd_text_input = gr.Textbox(label="Paste the Job Description text here", lines=10, placeholder="Paste the job description content here...")
+            with gr.Row():
                 analyze_button = gr.Button("✨ Analyze CV Match ✨", variant="primary", scale=1)
                 clear_button = gr.ClearButton([cv_file_obj, cv_text_input, jd_text_input], scale=1)
+        with gr.Column(scale=2, min_width=600):
+            output_html = gr.HTML(label="Analysis Report")
+            gr.Markdown("## **📊 Visual Insights**")
+            output_overall_plot = gr.Plot(label="Overall Match Score")
             output_skill_plot = gr.Plot(label="Skill Match Breakdown")
             output_keywords_plot = gr.Plot(label="Top Keywords")
     analyze_button.click(
         fn=analyze_cv_match,
         inputs=[cv_file_obj, cv_text_input, jd_text_input],
         outputs=[output_html, output_overall_plot, output_skill_plot, output_keywords_plot, gr.State(value="")],
     )
 demo.launch()