Resume-Comparator

Configuration error

App Files Files Community

Pradyumn Tendulkar commited on Oct 6, 2025

Commit

1118099

1 Parent(s): e256fc1

fixing api errors

Browse files

Files changed (3) hide show

__pycache__/api_model.cpython-313.pyc +0 -0
api_model.py +11 -20
app.py +48 -51

__pycache__/api_model.cpython-313.pyc CHANGED Viewed

Binary files a/__pycache__/api_model.cpython-313.pyc and b/__pycache__/api_model.cpython-313.pyc differ

api_model.py CHANGED Viewed

@@ -1,26 +1,13 @@
 # API_model.py
-"""
-Embedding + similarity via Hugging Face Inference API (no local models).
-Expose: calculate_similarity_api(text_a, text_b) -> float (0..100)
-Env:
-  EMBED_MODEL  : default "sentence-transformers/all-MiniLM-L6-v2"
-  HF_TOKEN     : required (set in Space/host env)
-  MAX_TEXT_LEN : default "20000"
-  REQ_TIMEOUT  : default "40"
-"""
 import os
-import re
 import numpy as np
 from huggingface_hub import InferenceClient
-# -------- App config --------
 EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
-HF_TOKEN    = os.getenv("HF_TOKEN")                      # set in host env / Spaces Variables
 MAX_LEN     = int(os.getenv("MAX_TEXT_LEN", "20000"))
 REQ_TIMEOUT = float(os.getenv("REQ_TIMEOUT", "40"))
-# HF Inference API client (timeout belongs on the client)
 _hf = InferenceClient(model=EMBED_MODEL, token=HF_TOKEN, timeout=REQ_TIMEOUT)
 def _trim(s: str) -> str:
@@ -34,20 +21,24 @@ def _cosine(a: np.ndarray, b: np.ndarray) -> float:
 def _embed_api(text: str) -> np.ndarray:
     if not HF_TOKEN:
         raise RuntimeError("HF_TOKEN is not set (add it in environment or Space → Settings → Variables).")
-    feats = _hf.feature_extraction(_trim(text))  # API returns list/array
     arr = np.array(feats, dtype=np.float32)
-    # If token-level embeddings returned, mean-pool to sentence vector
-    if arr.ndim == 2:
         arr = arr.mean(axis=0)
     if arr.ndim != 1:
         raise RuntimeError(f"Unexpected embedding shape from the Inference API: {arr.shape}")
     return arr
 def calculate_similarity_api(text_a: str, text_b: str) -> float:
-    """
-    Return cosine similarity (%) between two texts using HF Inference API embeddings.
-    """
     a_vec = _embed_api(text_a)
     b_vec = _embed_api(text_b)
     score = _cosine(a_vec, b_vec) * 100.0
     return float(np.round(score, 2))

 # API_model.py
 import os
 import numpy as np
 from huggingface_hub import InferenceClient
 EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
+HF_TOKEN    = os.getenv("HF_TOKEN")                      # set in env/Space Variables
 MAX_LEN     = int(os.getenv("MAX_TEXT_LEN", "20000"))
 REQ_TIMEOUT = float(os.getenv("REQ_TIMEOUT", "40"))
 _hf = InferenceClient(model=EMBED_MODEL, token=HF_TOKEN, timeout=REQ_TIMEOUT)
 def _trim(s: str) -> str:
 def _embed_api(text: str) -> np.ndarray:
     if not HF_TOKEN:
         raise RuntimeError("HF_TOKEN is not set (add it in environment or Space → Settings → Variables).")
+    feats = _hf.feature_extraction(_trim(text))  # may raise on auth/model/timeouts
     arr = np.array(feats, dtype=np.float32)
+    if arr.ndim == 2:  # token-level → mean pool
         arr = arr.mean(axis=0)
     if arr.ndim != 1:
         raise RuntimeError(f"Unexpected embedding shape from the Inference API: {arr.shape}")
     return arr
 def calculate_similarity_api(text_a: str, text_b: str) -> float:
     a_vec = _embed_api(text_a)
     b_vec = _embed_api(text_b)
     score = _cosine(a_vec, b_vec) * 100.0
     return float(np.round(score, 2))
+def check_api_health() -> tuple[bool, str]:
+    """Quick probe to fail fast with a helpful message in the UI."""
+    try:
+        _ = _embed_api("healthcheck")
+        return True, f"OK (model={EMBED_MODEL})"
+    except Exception as e:
+        return False, f"{type(e).__name__}: {e}"

app.py CHANGED Viewed

@@ -405,12 +405,8 @@ from local_model import (
     extract_projects_section,
     extract_top_keywords,
 )
-from api_model import calculate_similarity_api  # API mode (HF Inference)
-# --------------------------
-# Helpers
-# --------------------------
 def _verdict_html(fname: str, sim_pct: float) -> str:
     if sim_pct >= 80:
         return f"<h3 style='color:green;'>✅ Best Match: {fname} ({sim_pct:.2f}%)</h3>"
@@ -431,55 +427,67 @@ def _project_fit_verdict_from_score(score: float) -> str:
             f"The projects may not directly align with the key requirements. "
             f"Consider highlighting different aspects of your work.</p>")
-# --------------------------
-# Main Gradio app logic
-# --------------------------
 def analyze_resumes(files, job_description: str, mode: str):
     if not files or not job_description.strip():
         return 0.0, "Please upload resumes and paste a job description.", "", "", "", "", "", "", "", ""
     results = []
     for file in files:
         try:
             resume_text, fname = extract_text_from_fileobj(file)
             if resume_text.strip().startswith("[Error"):
-                continue  # Skip errored files
-            # Clean both sides before similarity
             cleaned_resume = preprocess_text(resume_text)
-            cleaned_job = preprocess_text(job_description)
-            # Similarity by mode
             if mode == "api":
                 sim_pct = calculate_similarity_api(cleaned_resume, cleaned_job)
-            else:  # "sbert" or "bert" (local)
                 sim_pct = calculate_similarity(cleaned_resume, cleaned_job, mode=mode)
             results.append((sim_pct, resume_text, fname))
-        except Exception:
-            # Skip the file on any error (keep app resilient)
             continue
     if not results:
-        return 0.0, "No valid resumes were provided.", "", "", "", "", "", "", "", ""
-    # Select the best matching resume
-    best = max(results, key=lambda x: x[0])  # highest similarity
     sim_pct, resume_text, fname = best
-    # Keyword + jobs + keywords extraction (mode-independent)
     missing_dict, suggestions_text = analyze_resume_keywords(resume_text, job_description)
     missing_formatted = format_missing_keywords(missing_dict)
     job_suggestions = suggest_jobs(resume_text)
     projects_section = extract_projects_section(resume_text)
-    # Project fit: local for sbert/bert, API for api
     if projects_section.startswith("Could not"):
         project_fit_verdict = "Cannot analyze project fit as no projects section was found."
     else:
         cleaned_projects = preprocess_text(projects_section)
-        cleaned_job = preprocess_text(job_description)
         if cleaned_projects:
             try:
                 if mode == "api":
@@ -487,14 +495,13 @@ def analyze_resumes(files, job_description: str, mode: str):
                 else:
                     pscore = calculate_similarity(cleaned_projects, cleaned_job, mode=mode)
                 project_fit_verdict = _project_fit_verdict_from_score(pscore)
-            except Exception as _:
-                project_fit_verdict = "Could not compute project fit (embedding error)."
         else:
             project_fit_verdict = "Projects section is empty or contains no relevant text to analyze."
     resume_keywords_text = extract_top_keywords(preprocess_text(resume_text))
-    jd_keywords_text = extract_top_keywords(preprocess_text(job_description))
     verdict = _verdict_html(fname, sim_pct)
     return (
@@ -502,18 +509,12 @@ def analyze_resumes(files, job_description: str, mode: str):
         job_suggestions, projects_section, project_fit_verdict, resume_keywords_text, jd_keywords_text, fname
     )
-# --------------------------
-# Clear Button Logic
-# --------------------------
 def clear_inputs():
-    # Reset mode to sbert; clear all outputs
-    return None, "", "sbert", 0.0, "", "", "", "", "", "", ""
-# --------------------------
-# Build Gradio UI
-# --------------------------
 def build_ui():
     with gr.Blocks(theme=gr.themes.Default(), title="Resume ↔ Job Matcher") as demo:
         gr.Markdown("# 📄 Resume & Job Description Analyzer 🎯")
@@ -529,15 +530,14 @@ def build_ui():
                     file_types=[".pdf", ".docx"]
                 )
                 job_desc = gr.Textbox(
-                    lines=10,
-                    label="Job Description",
                     placeholder="Paste the full job description here..."
                 )
                 mode = gr.Radio(
                     choices=["sbert", "bert", "api"],
                     value="sbert",
                     label="Analysis Mode",
-                    info="SBERT/BERΤ use local models; API uses Hugging Face Inference API."
                 )
                 with gr.Row():
                     clear_btn = gr.Button("Clear")
@@ -546,14 +546,11 @@ def build_ui():
             with gr.Column(scale=3):
                 with gr.Tabs():
                     with gr.TabItem("📊 Analysis & Suggestions"):
-                        score_slider = gr.Slider(
-                            value=0, minimum=0, maximum=100, step=0.01, interactive=False,
-                            label="Similarity Score"
-                        )
                         score_text = gr.Markdown()
-                        suggestions_out = gr.Textbox(
-                            label="Suggestions to Improve Your Resume", interactive=False, lines=5
-                        )
                         missing_out = gr.Markdown(label="Keywords Check")
                     with gr.TabItem("🛠️ Project Analysis"):
@@ -572,9 +569,9 @@ def build_ui():
         run_btn.click(
             analyze_resumes,
             inputs=[file_in, job_desc, mode],
-            outputs=[
-                score_slider, score_text, missing_out, suggestions_out, job_suggestions_out, projects_out,
-                project_fit_out, resume_keywords_out, jd_keywords_out, best_fname_out
             ],
             show_progress='full'
         )
@@ -582,7 +579,7 @@ def build_ui():
         clear_btn.click(
             clear_inputs,
             inputs=[],
-            outputs=[
                 file_in, job_desc, mode, score_slider, score_text, missing_out, suggestions_out,
                 job_suggestions_out, projects_out, project_fit_out, resume_keywords_out, jd_keywords_out, best_fname_out
             ]

     extract_projects_section,
     extract_top_keywords,
 )
+from api_model import calculate_similarity_api, check_api_health  # API mode (HF Inference)
 def _verdict_html(fname: str, sim_pct: float) -> str:
     if sim_pct >= 80:
         return f"<h3 style='color:green;'>✅ Best Match: {fname} ({sim_pct:.2f}%)</h3>"
             f"The projects may not directly align with the key requirements. "
             f"Consider highlighting different aspects of your work.</p>")
 def analyze_resumes(files, job_description: str, mode: str):
     if not files or not job_description.strip():
         return 0.0, "Please upload resumes and paste a job description.", "", "", "", "", "", "", "", ""
+    # Fast fail if API mode is selected but HF token/model is not ready
+    if mode == "api":
+        ok, msg = check_api_health()
+        if not ok:
+            return (0.0,
+                    f"<p style='color:red;'>HF Inference API error: {msg}</p>",
+                    "", "", "", "", "", "", "", "")
     results = []
+    first_error = None
     for file in files:
         try:
             resume_text, fname = extract_text_from_fileobj(file)
             if resume_text.strip().startswith("[Error"):
+                # file read failure — skip file but note error
+                first_error = first_error or resume_text
+                continue
             cleaned_resume = preprocess_text(resume_text)
+            cleaned_job    = preprocess_text(job_description)
             if mode == "api":
                 sim_pct = calculate_similarity_api(cleaned_resume, cleaned_job)
+            else:
                 sim_pct = calculate_similarity(cleaned_resume, cleaned_job, mode=mode)
             results.append((sim_pct, resume_text, fname))
+        except Exception as e:
+            # Capture first embedding/API error so the user gets a useful message
+            if first_error is None:
+                first_error = f"{type(e).__name__}: {e}"
             continue
     if not results:
+        # If everything failed, surface the first error instead of a vague message
+        msg = first_error or "No valid resumes were provided."
+        return (0.0,
+                f"<p style='color:red;'>Analysis failed: {msg}</p>",
+                "", "", "", "", "", "", "", "")
+    # Best match
+    best = max(results, key=lambda x: x[0])
     sim_pct, resume_text, fname = best
     missing_dict, suggestions_text = analyze_resume_keywords(resume_text, job_description)
     missing_formatted = format_missing_keywords(missing_dict)
     job_suggestions = suggest_jobs(resume_text)
     projects_section = extract_projects_section(resume_text)
+    # Project fit
     if projects_section.startswith("Could not"):
         project_fit_verdict = "Cannot analyze project fit as no projects section was found."
     else:
         cleaned_projects = preprocess_text(projects_section)
+        cleaned_job      = preprocess_text(job_description)
         if cleaned_projects:
             try:
                 if mode == "api":
                 else:
                     pscore = calculate_similarity(cleaned_projects, cleaned_job, mode=mode)
                 project_fit_verdict = _project_fit_verdict_from_score(pscore)
+            except Exception as e:
+                project_fit_verdict = f"Could not compute project fit (embedding error: {type(e).__name__}: {e})."
         else:
             project_fit_verdict = "Projects section is empty or contains no relevant text to analyze."
     resume_keywords_text = extract_top_keywords(preprocess_text(resume_text))
+    jd_keywords_text     = extract_top_keywords(preprocess_text(job_description))
     verdict = _verdict_html(fname, sim_pct)
     return (
         job_suggestions, projects_section, project_fit_verdict, resume_keywords_text, jd_keywords_text, fname
     )
 def clear_inputs():
+    # MUST return one value per output we wire in clear_btn.click
+    # outputs: file_in, job_desc, mode, score_slider, score_text, missing_out, suggestions_out,
+    #          job_suggestions_out, projects_out, project_fit_out, resume_keywords_out, jd_keywords_out, best_fname_out
+    return (None, "", "sbert", 0.0, "", "", "", "", "", "", "", "", "")
 def build_ui():
     with gr.Blocks(theme=gr.themes.Default(), title="Resume ↔ Job Matcher") as demo:
         gr.Markdown("# 📄 Resume & Job Description Analyzer 🎯")
                     file_types=[".pdf", ".docx"]
                 )
                 job_desc = gr.Textbox(
+                    lines=10, label="Job Description",
                     placeholder="Paste the full job description here..."
                 )
                 mode = gr.Radio(
                     choices=["sbert", "bert", "api"],
                     value="sbert",
                     label="Analysis Mode",
+                    info="SBERT/BERT use local models; API uses Hugging Face Inference API."
                 )
                 with gr.Row():
                     clear_btn = gr.Button("Clear")
             with gr.Column(scale=3):
                 with gr.Tabs():
                     with gr.TabItem("📊 Analysis & Suggestions"):
+                        score_slider = gr.Slider(value=0, minimum=0, maximum=100, step=0.01,
+                                                 interactive=False, label="Similarity Score")
                         score_text = gr.Markdown()
+                        suggestions_out = gr.Textbox(label="Suggestions to Improve Your Resume",
+                                                     interactive=False, lines=5)
                         missing_out = gr.Markdown(label="Keywords Check")
                     with gr.TabItem("🛠️ Project Analysis"):
         run_btn.click(
             analyze_resumes,
             inputs=[file_in, job_desc, mode],
+            outputs=[  # 10 outputs
+                score_slider, score_text, missing_out, suggestions_out, job_suggestions_out,
+                projects_out, project_fit_out, resume_keywords_out, jd_keywords_out, best_fname_out
             ],
             show_progress='full'
         )
         clear_btn.click(
             clear_inputs,
             inputs=[],
+            outputs=[  # 13 outputs; keep in sync with clear_inputs()
                 file_in, job_desc, mode, score_slider, score_text, missing_out, suggestions_out,
                 job_suggestions_out, projects_out, project_fit_out, resume_keywords_out, jd_keywords_out, best_fname_out
             ]