Resume-Comparator

Configuration error

App Files Files Community

Pradyumn Tendulkar commited on Oct 6, 2025

Commit

85de68f

1 Parent(s): f641225

added api product

Browse files

Files changed (5) hide show

__pycache__/api_model.cpython-313.pyc +0 -0
__pycache__/local_model.cpython-313.pyc +0 -0
api_model.py +62 -0
app.py +17 -4
requirements.txt +7 -20

__pycache__/api_model.cpython-313.pyc ADDED Viewed

Binary file (3.9 kB). View file

__pycache__/local_model.cpython-313.pyc ADDED Viewed

Binary file (15.1 kB). View file

api_model.py ADDED Viewed

	@@ -0,0 +1,62 @@

+# API_model.py
+import os
+import re
+import numpy as np
+from typing import Optional
+from huggingface_hub import InferenceClient
+# -------- Config (via env) --------
+EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
+HF_TOKEN    = os.getenv("HF_TOKEN")                      # set in Space/CI env
+MAX_LEN     = int(os.getenv("MAX_TEXT_LEN", "20000"))
+REQ_TIMEOUT = float(os.getenv("REQ_TIMEOUT", "40"))
+# Lazy client (created on first use to avoid import-time failures)
+_hf_client: Optional[InferenceClient] = None
+def _get_client() -> InferenceClient:
+    global _hf_client
+    if not HF_TOKEN:
+        raise RuntimeError("HF_TOKEN is not set (add it in environment/Secrets).")
+    if _hf_client is None:
+        _hf_client = InferenceClient(model=EMBED_MODEL, token=HF_TOKEN, timeout=REQ_TIMEOUT)
+    return _hf_client
+# -------- Utilities --------
+def _trim(s: str) -> str:
+    s = (s or "").strip()
+    return s if len(s) <= MAX_LEN else s[:MAX_LEN]
+def _cosine(a: np.ndarray, b: np.ndarray) -> float:
+    denom = (np.linalg.norm(a) * np.linalg.norm(b)) or 1.0
+    return float(np.dot(a, b) / denom)
+def _embed_api(text: str) -> np.ndarray:
+    """Return a 1D embedding using HF Inference API (mean-pool if token-level)."""
+    client = _get_client()
+    feats = client.feature_extraction(_trim(text))
+    arr = np.array(feats, dtype=np.float32)
+    if arr.ndim == 2:                       # token-level → mean pool
+        arr = arr.mean(axis=0)
+    if arr.ndim != 1:
+        raise RuntimeError("Unexpected embedding shape from the Inference API.")
+    return arr
+# -------- Public API (drop-in for local similarity) --------
+def calculate_similarity_api(resume_text: str, job_text: str) -> float:
+    """
+    Returns similarity in % (0-100), matching the signature/scale used in local_model.
+    Assumes input strings are already preprocessed upstream (lowercased, stopwords removed, etc.).
+    """
+    r_vec = _embed_api(resume_text)
+    j_vec = _embed_api(job_text)
+    score = _cosine(r_vec, j_vec) * 100.0
+    return float(np.round(score, 2))
+def api_healthcheck() -> str:
+    """Optional: ping once to verify credentials/model availability."""
+    try:
+        _ = _embed_api("healthcheck")
+        return f"OK: Using {EMBED_MODEL}"
+    except Exception as e:
+        return f"ERROR: {type(e).__name__}: {e}"

app.py CHANGED Viewed

@@ -407,6 +407,9 @@ from local_model import (
     extract_top_keywords,
 )
 # --------------------------
 # Main Gradio app logic
 # --------------------------
@@ -422,7 +425,13 @@ def analyze_resumes(files, job_description: str, mode: str):
                 continue  # Skip errored files
             cleaned_resume = preprocess_text(resume_text)
             cleaned_job = preprocess_text(job_description)
-            sim_pct = calculate_similarity(cleaned_resume, cleaned_job, mode=mode)
             results.append((sim_pct, resume_text, fname))
         except Exception:
             continue  # Skip if any error
@@ -438,7 +447,10 @@ def analyze_resumes(files, job_description: str, mode: str):
     missing_formatted = format_missing_keywords(missing_dict)
     job_suggestions = suggest_jobs(resume_text)
     projects_section = extract_projects_section(resume_text)
-    project_fit_verdict = analyze_projects_fit(projects_section, job_description, mode)
     resume_keywords_text = extract_top_keywords(preprocess_text(resume_text))
     jd_keywords_text = extract_top_keywords(preprocess_text(job_description))
@@ -483,10 +495,11 @@ def build_ui():
                     placeholder="Paste the full job description here..."
                 )
                 mode = gr.Radio(
-                    choices=["sbert", "bert"],
                     value="sbert",
                     label="Analysis Mode",
-                    info="SBERT is faster, BERT is more detailed."
                 )
                 with gr.Row():
                     clear_btn = gr.Button("Clear")

     extract_top_keywords,
 )
+# NEW: import API mode similarity
+from api_model import calculate_similarity_api
 # --------------------------
 # Main Gradio app logic
 # --------------------------
                 continue  # Skip errored files
             cleaned_resume = preprocess_text(resume_text)
             cleaned_job = preprocess_text(job_description)
+            # Route by mode (SBERT/BERT local vs API)
+            if mode == "api":
+                sim_pct = calculate_similarity_api(cleaned_resume, cleaned_job)
+            else:
+                sim_pct = calculate_similarity(cleaned_resume, cleaned_job, mode=mode)
             results.append((sim_pct, resume_text, fname))
         except Exception:
             continue  # Skip if any error
     missing_formatted = format_missing_keywords(missing_dict)
     job_suggestions = suggest_jobs(resume_text)
     projects_section = extract_projects_section(resume_text)
+    project_fit_verdict = analyze_projects_fit(projects_section, job_description, mode if mode != "api" else "sbert")
+    # ^ Project-fit path uses local formatting thresholds; mode value here affects only wording/thresholds,
+    #   so we map 'api' to 'sbert' for consistent messages.
     resume_keywords_text = extract_top_keywords(preprocess_text(resume_text))
     jd_keywords_text = extract_top_keywords(preprocess_text(job_description))
                     placeholder="Paste the full job description here..."
                 )
                 mode = gr.Radio(
+                    # ADD the 3rd option here:
+                    choices=["sbert", "bert", "api"],
                     value="sbert",
                     label="Analysis Mode",
+                    info="SBERT (local, fast) • BERT (local, detailed) • API (HF Inference, no local model)"
                 )
                 with gr.Row():
                     clear_btn = gr.Button("Clear")

requirements.txt CHANGED Viewed

@@ -1,35 +1,22 @@
-'''--find-links https://storage.googleapis.com/torch-cpu/torch_stable.html
-torch
-torchvision
-gradio
-scikit-learn
-numpy
-PyMuPDF
-python-docx
-sentence-transformers
-transformers
-wordcloud
-matplotlib'''
 # ===== Torch CPU Wheels (for Spaces) =====
 --find-links https://storage.googleapis.com/torch-cpu/torch_stable.html
 --extra-index-url https://download.pytorch.org/whl/cpu
 # ===== Core Scientific Stack =====
-numpy==1.26.4          # compatible with most PyTorch CPU wheels
-scipy<1.11             # optional but stable with numpy 1.26
 # ===== PyTorch =====
-torch==2.2.2
-torchvision==0.17.2
 # ===== App / ML Libraries =====
-gradio>=4.44.1
 fastapi
 uvicorn
-pydantic>=1.10,<3
-huggingface_hub>=0.24
 scikit-learn
 sentence-transformers
 transformers

 # ===== Torch CPU Wheels (for Spaces) =====
 --find-links https://storage.googleapis.com/torch-cpu/torch_stable.html
 --extra-index-url https://download.pytorch.org/whl/cpu
 # ===== Core Scientific Stack =====
+numpy          # compatible with most PyTorch CPU wheels
+scipy             # optional but stable with numpy 1.26
 # ===== PyTorch =====
+torch
+torchvision
 # ===== App / ML Libraries =====
+gradio
 fastapi
 uvicorn
+pydantic
+huggingface_hub
 scikit-learn
 sentence-transformers
 transformers