Spaces:
Configuration error
Configuration error
Pradyumn Tendulkar commited on
Commit ·
85de68f
1
Parent(s): f641225
added api product
Browse files- __pycache__/api_model.cpython-313.pyc +0 -0
- __pycache__/local_model.cpython-313.pyc +0 -0
- api_model.py +62 -0
- app.py +17 -4
- requirements.txt +7 -20
__pycache__/api_model.cpython-313.pyc
ADDED
|
Binary file (3.9 kB). View file
|
|
|
__pycache__/local_model.cpython-313.pyc
ADDED
|
Binary file (15.1 kB). View file
|
|
|
api_model.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# API_model.py
|
| 2 |
+
import os
|
| 3 |
+
import re
|
| 4 |
+
import numpy as np
|
| 5 |
+
from typing import Optional
|
| 6 |
+
from huggingface_hub import InferenceClient
|
| 7 |
+
|
| 8 |
+
# -------- Config (via env) --------
|
| 9 |
+
EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
|
| 10 |
+
HF_TOKEN = os.getenv("HF_TOKEN") # set in Space/CI env
|
| 11 |
+
MAX_LEN = int(os.getenv("MAX_TEXT_LEN", "20000"))
|
| 12 |
+
REQ_TIMEOUT = float(os.getenv("REQ_TIMEOUT", "40"))
|
| 13 |
+
|
| 14 |
+
# Lazy client (created on first use to avoid import-time failures)
|
| 15 |
+
_hf_client: Optional[InferenceClient] = None
|
| 16 |
+
|
| 17 |
+
def _get_client() -> InferenceClient:
|
| 18 |
+
global _hf_client
|
| 19 |
+
if not HF_TOKEN:
|
| 20 |
+
raise RuntimeError("HF_TOKEN is not set (add it in environment/Secrets).")
|
| 21 |
+
if _hf_client is None:
|
| 22 |
+
_hf_client = InferenceClient(model=EMBED_MODEL, token=HF_TOKEN, timeout=REQ_TIMEOUT)
|
| 23 |
+
return _hf_client
|
| 24 |
+
|
| 25 |
+
# -------- Utilities --------
|
| 26 |
+
def _trim(s: str) -> str:
|
| 27 |
+
s = (s or "").strip()
|
| 28 |
+
return s if len(s) <= MAX_LEN else s[:MAX_LEN]
|
| 29 |
+
|
| 30 |
+
def _cosine(a: np.ndarray, b: np.ndarray) -> float:
|
| 31 |
+
denom = (np.linalg.norm(a) * np.linalg.norm(b)) or 1.0
|
| 32 |
+
return float(np.dot(a, b) / denom)
|
| 33 |
+
|
| 34 |
+
def _embed_api(text: str) -> np.ndarray:
|
| 35 |
+
"""Return a 1D embedding using HF Inference API (mean-pool if token-level)."""
|
| 36 |
+
client = _get_client()
|
| 37 |
+
feats = client.feature_extraction(_trim(text))
|
| 38 |
+
arr = np.array(feats, dtype=np.float32)
|
| 39 |
+
if arr.ndim == 2: # token-level → mean pool
|
| 40 |
+
arr = arr.mean(axis=0)
|
| 41 |
+
if arr.ndim != 1:
|
| 42 |
+
raise RuntimeError("Unexpected embedding shape from the Inference API.")
|
| 43 |
+
return arr
|
| 44 |
+
|
| 45 |
+
# -------- Public API (drop-in for local similarity) --------
|
| 46 |
+
def calculate_similarity_api(resume_text: str, job_text: str) -> float:
|
| 47 |
+
"""
|
| 48 |
+
Returns similarity in % (0-100), matching the signature/scale used in local_model.
|
| 49 |
+
Assumes input strings are already preprocessed upstream (lowercased, stopwords removed, etc.).
|
| 50 |
+
"""
|
| 51 |
+
r_vec = _embed_api(resume_text)
|
| 52 |
+
j_vec = _embed_api(job_text)
|
| 53 |
+
score = _cosine(r_vec, j_vec) * 100.0
|
| 54 |
+
return float(np.round(score, 2))
|
| 55 |
+
|
| 56 |
+
def api_healthcheck() -> str:
|
| 57 |
+
"""Optional: ping once to verify credentials/model availability."""
|
| 58 |
+
try:
|
| 59 |
+
_ = _embed_api("healthcheck")
|
| 60 |
+
return f"OK: Using {EMBED_MODEL}"
|
| 61 |
+
except Exception as e:
|
| 62 |
+
return f"ERROR: {type(e).__name__}: {e}"
|
app.py
CHANGED
|
@@ -407,6 +407,9 @@ from local_model import (
|
|
| 407 |
extract_top_keywords,
|
| 408 |
)
|
| 409 |
|
|
|
|
|
|
|
|
|
|
| 410 |
# --------------------------
|
| 411 |
# Main Gradio app logic
|
| 412 |
# --------------------------
|
|
@@ -422,7 +425,13 @@ def analyze_resumes(files, job_description: str, mode: str):
|
|
| 422 |
continue # Skip errored files
|
| 423 |
cleaned_resume = preprocess_text(resume_text)
|
| 424 |
cleaned_job = preprocess_text(job_description)
|
| 425 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 426 |
results.append((sim_pct, resume_text, fname))
|
| 427 |
except Exception:
|
| 428 |
continue # Skip if any error
|
|
@@ -438,7 +447,10 @@ def analyze_resumes(files, job_description: str, mode: str):
|
|
| 438 |
missing_formatted = format_missing_keywords(missing_dict)
|
| 439 |
job_suggestions = suggest_jobs(resume_text)
|
| 440 |
projects_section = extract_projects_section(resume_text)
|
| 441 |
-
project_fit_verdict = analyze_projects_fit(projects_section, job_description, mode)
|
|
|
|
|
|
|
|
|
|
| 442 |
resume_keywords_text = extract_top_keywords(preprocess_text(resume_text))
|
| 443 |
jd_keywords_text = extract_top_keywords(preprocess_text(job_description))
|
| 444 |
|
|
@@ -483,10 +495,11 @@ def build_ui():
|
|
| 483 |
placeholder="Paste the full job description here..."
|
| 484 |
)
|
| 485 |
mode = gr.Radio(
|
| 486 |
-
|
|
|
|
| 487 |
value="sbert",
|
| 488 |
label="Analysis Mode",
|
| 489 |
-
info="SBERT
|
| 490 |
)
|
| 491 |
with gr.Row():
|
| 492 |
clear_btn = gr.Button("Clear")
|
|
|
|
| 407 |
extract_top_keywords,
|
| 408 |
)
|
| 409 |
|
| 410 |
+
# NEW: import API mode similarity
|
| 411 |
+
from api_model import calculate_similarity_api
|
| 412 |
+
|
| 413 |
# --------------------------
|
| 414 |
# Main Gradio app logic
|
| 415 |
# --------------------------
|
|
|
|
| 425 |
continue # Skip errored files
|
| 426 |
cleaned_resume = preprocess_text(resume_text)
|
| 427 |
cleaned_job = preprocess_text(job_description)
|
| 428 |
+
|
| 429 |
+
# Route by mode (SBERT/BERT local vs API)
|
| 430 |
+
if mode == "api":
|
| 431 |
+
sim_pct = calculate_similarity_api(cleaned_resume, cleaned_job)
|
| 432 |
+
else:
|
| 433 |
+
sim_pct = calculate_similarity(cleaned_resume, cleaned_job, mode=mode)
|
| 434 |
+
|
| 435 |
results.append((sim_pct, resume_text, fname))
|
| 436 |
except Exception:
|
| 437 |
continue # Skip if any error
|
|
|
|
| 447 |
missing_formatted = format_missing_keywords(missing_dict)
|
| 448 |
job_suggestions = suggest_jobs(resume_text)
|
| 449 |
projects_section = extract_projects_section(resume_text)
|
| 450 |
+
project_fit_verdict = analyze_projects_fit(projects_section, job_description, mode if mode != "api" else "sbert")
|
| 451 |
+
# ^ Project-fit path uses local formatting thresholds; mode value here affects only wording/thresholds,
|
| 452 |
+
# so we map 'api' to 'sbert' for consistent messages.
|
| 453 |
+
|
| 454 |
resume_keywords_text = extract_top_keywords(preprocess_text(resume_text))
|
| 455 |
jd_keywords_text = extract_top_keywords(preprocess_text(job_description))
|
| 456 |
|
|
|
|
| 495 |
placeholder="Paste the full job description here..."
|
| 496 |
)
|
| 497 |
mode = gr.Radio(
|
| 498 |
+
# ADD the 3rd option here:
|
| 499 |
+
choices=["sbert", "bert", "api"],
|
| 500 |
value="sbert",
|
| 501 |
label="Analysis Mode",
|
| 502 |
+
info="SBERT (local, fast) • BERT (local, detailed) • API (HF Inference, no local model)"
|
| 503 |
)
|
| 504 |
with gr.Row():
|
| 505 |
clear_btn = gr.Button("Clear")
|
requirements.txt
CHANGED
|
@@ -1,35 +1,22 @@
|
|
| 1 |
-
'''--find-links https://storage.googleapis.com/torch-cpu/torch_stable.html
|
| 2 |
-
torch
|
| 3 |
-
torchvision
|
| 4 |
-
|
| 5 |
-
gradio
|
| 6 |
-
scikit-learn
|
| 7 |
-
numpy
|
| 8 |
-
PyMuPDF
|
| 9 |
-
python-docx
|
| 10 |
-
sentence-transformers
|
| 11 |
-
transformers
|
| 12 |
-
wordcloud
|
| 13 |
-
matplotlib'''
|
| 14 |
|
| 15 |
# ===== Torch CPU Wheels (for Spaces) =====
|
| 16 |
--find-links https://storage.googleapis.com/torch-cpu/torch_stable.html
|
| 17 |
--extra-index-url https://download.pytorch.org/whl/cpu
|
| 18 |
|
| 19 |
# ===== Core Scientific Stack =====
|
| 20 |
-
numpy
|
| 21 |
-
scipy
|
| 22 |
|
| 23 |
# ===== PyTorch =====
|
| 24 |
-
torch
|
| 25 |
-
torchvision
|
| 26 |
|
| 27 |
# ===== App / ML Libraries =====
|
| 28 |
-
gradio
|
| 29 |
fastapi
|
| 30 |
uvicorn
|
| 31 |
-
pydantic
|
| 32 |
-
huggingface_hub
|
| 33 |
scikit-learn
|
| 34 |
sentence-transformers
|
| 35 |
transformers
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
|
| 2 |
# ===== Torch CPU Wheels (for Spaces) =====
|
| 3 |
--find-links https://storage.googleapis.com/torch-cpu/torch_stable.html
|
| 4 |
--extra-index-url https://download.pytorch.org/whl/cpu
|
| 5 |
|
| 6 |
# ===== Core Scientific Stack =====
|
| 7 |
+
numpy # compatible with most PyTorch CPU wheels
|
| 8 |
+
scipy # optional but stable with numpy 1.26
|
| 9 |
|
| 10 |
# ===== PyTorch =====
|
| 11 |
+
torch
|
| 12 |
+
torchvision
|
| 13 |
|
| 14 |
# ===== App / ML Libraries =====
|
| 15 |
+
gradio
|
| 16 |
fastapi
|
| 17 |
uvicorn
|
| 18 |
+
pydantic
|
| 19 |
+
huggingface_hub
|
| 20 |
scikit-learn
|
| 21 |
sentence-transformers
|
| 22 |
transformers
|