Spaces:
Configuration error
Configuration error
Pradyumn Tendulkar commited on
Commit ·
1118099
1
Parent(s): e256fc1
fixing api errors
Browse files- __pycache__/api_model.cpython-313.pyc +0 -0
- api_model.py +11 -20
- app.py +48 -51
__pycache__/api_model.cpython-313.pyc
CHANGED
|
Binary files a/__pycache__/api_model.cpython-313.pyc and b/__pycache__/api_model.cpython-313.pyc differ
|
|
|
api_model.py
CHANGED
|
@@ -1,26 +1,13 @@
|
|
| 1 |
# API_model.py
|
| 2 |
-
"""
|
| 3 |
-
Embedding + similarity via Hugging Face Inference API (no local models).
|
| 4 |
-
Expose: calculate_similarity_api(text_a, text_b) -> float (0..100)
|
| 5 |
-
Env:
|
| 6 |
-
EMBED_MODEL : default "sentence-transformers/all-MiniLM-L6-v2"
|
| 7 |
-
HF_TOKEN : required (set in Space/host env)
|
| 8 |
-
MAX_TEXT_LEN : default "20000"
|
| 9 |
-
REQ_TIMEOUT : default "40"
|
| 10 |
-
"""
|
| 11 |
-
|
| 12 |
import os
|
| 13 |
-
import re
|
| 14 |
import numpy as np
|
| 15 |
from huggingface_hub import InferenceClient
|
| 16 |
|
| 17 |
-
# -------- App config --------
|
| 18 |
EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
|
| 19 |
-
HF_TOKEN = os.getenv("HF_TOKEN") # set in
|
| 20 |
MAX_LEN = int(os.getenv("MAX_TEXT_LEN", "20000"))
|
| 21 |
REQ_TIMEOUT = float(os.getenv("REQ_TIMEOUT", "40"))
|
| 22 |
|
| 23 |
-
# HF Inference API client (timeout belongs on the client)
|
| 24 |
_hf = InferenceClient(model=EMBED_MODEL, token=HF_TOKEN, timeout=REQ_TIMEOUT)
|
| 25 |
|
| 26 |
def _trim(s: str) -> str:
|
|
@@ -34,20 +21,24 @@ def _cosine(a: np.ndarray, b: np.ndarray) -> float:
|
|
| 34 |
def _embed_api(text: str) -> np.ndarray:
|
| 35 |
if not HF_TOKEN:
|
| 36 |
raise RuntimeError("HF_TOKEN is not set (add it in environment or Space → Settings → Variables).")
|
| 37 |
-
feats = _hf.feature_extraction(_trim(text)) #
|
| 38 |
arr = np.array(feats, dtype=np.float32)
|
| 39 |
-
|
| 40 |
-
if arr.ndim == 2:
|
| 41 |
arr = arr.mean(axis=0)
|
| 42 |
if arr.ndim != 1:
|
| 43 |
raise RuntimeError(f"Unexpected embedding shape from the Inference API: {arr.shape}")
|
| 44 |
return arr
|
| 45 |
|
| 46 |
def calculate_similarity_api(text_a: str, text_b: str) -> float:
|
| 47 |
-
"""
|
| 48 |
-
Return cosine similarity (%) between two texts using HF Inference API embeddings.
|
| 49 |
-
"""
|
| 50 |
a_vec = _embed_api(text_a)
|
| 51 |
b_vec = _embed_api(text_b)
|
| 52 |
score = _cosine(a_vec, b_vec) * 100.0
|
| 53 |
return float(np.round(score, 2))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# API_model.py
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import os
|
|
|
|
| 3 |
import numpy as np
|
| 4 |
from huggingface_hub import InferenceClient
|
| 5 |
|
|
|
|
| 6 |
EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
|
| 7 |
+
HF_TOKEN = os.getenv("HF_TOKEN") # set in env/Space Variables
|
| 8 |
MAX_LEN = int(os.getenv("MAX_TEXT_LEN", "20000"))
|
| 9 |
REQ_TIMEOUT = float(os.getenv("REQ_TIMEOUT", "40"))
|
| 10 |
|
|
|
|
| 11 |
_hf = InferenceClient(model=EMBED_MODEL, token=HF_TOKEN, timeout=REQ_TIMEOUT)
|
| 12 |
|
| 13 |
def _trim(s: str) -> str:
|
|
|
|
| 21 |
def _embed_api(text: str) -> np.ndarray:
|
| 22 |
if not HF_TOKEN:
|
| 23 |
raise RuntimeError("HF_TOKEN is not set (add it in environment or Space → Settings → Variables).")
|
| 24 |
+
feats = _hf.feature_extraction(_trim(text)) # may raise on auth/model/timeouts
|
| 25 |
arr = np.array(feats, dtype=np.float32)
|
| 26 |
+
if arr.ndim == 2: # token-level → mean pool
|
|
|
|
| 27 |
arr = arr.mean(axis=0)
|
| 28 |
if arr.ndim != 1:
|
| 29 |
raise RuntimeError(f"Unexpected embedding shape from the Inference API: {arr.shape}")
|
| 30 |
return arr
|
| 31 |
|
| 32 |
def calculate_similarity_api(text_a: str, text_b: str) -> float:
|
|
|
|
|
|
|
|
|
|
| 33 |
a_vec = _embed_api(text_a)
|
| 34 |
b_vec = _embed_api(text_b)
|
| 35 |
score = _cosine(a_vec, b_vec) * 100.0
|
| 36 |
return float(np.round(score, 2))
|
| 37 |
+
|
| 38 |
+
def check_api_health() -> tuple[bool, str]:
|
| 39 |
+
"""Quick probe to fail fast with a helpful message in the UI."""
|
| 40 |
+
try:
|
| 41 |
+
_ = _embed_api("healthcheck")
|
| 42 |
+
return True, f"OK (model={EMBED_MODEL})"
|
| 43 |
+
except Exception as e:
|
| 44 |
+
return False, f"{type(e).__name__}: {e}"
|
app.py
CHANGED
|
@@ -405,12 +405,8 @@ from local_model import (
|
|
| 405 |
extract_projects_section,
|
| 406 |
extract_top_keywords,
|
| 407 |
)
|
| 408 |
-
from api_model import calculate_similarity_api # API mode (HF Inference)
|
| 409 |
|
| 410 |
-
|
| 411 |
-
# --------------------------
|
| 412 |
-
# Helpers
|
| 413 |
-
# --------------------------
|
| 414 |
def _verdict_html(fname: str, sim_pct: float) -> str:
|
| 415 |
if sim_pct >= 80:
|
| 416 |
return f"<h3 style='color:green;'>✅ Best Match: {fname} ({sim_pct:.2f}%)</h3>"
|
|
@@ -431,55 +427,67 @@ def _project_fit_verdict_from_score(score: float) -> str:
|
|
| 431 |
f"The projects may not directly align with the key requirements. "
|
| 432 |
f"Consider highlighting different aspects of your work.</p>")
|
| 433 |
|
| 434 |
-
|
| 435 |
-
# --------------------------
|
| 436 |
-
# Main Gradio app logic
|
| 437 |
-
# --------------------------
|
| 438 |
def analyze_resumes(files, job_description: str, mode: str):
|
| 439 |
if not files or not job_description.strip():
|
| 440 |
return 0.0, "Please upload resumes and paste a job description.", "", "", "", "", "", "", "", ""
|
| 441 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 442 |
results = []
|
|
|
|
|
|
|
| 443 |
for file in files:
|
| 444 |
try:
|
| 445 |
resume_text, fname = extract_text_from_fileobj(file)
|
| 446 |
if resume_text.strip().startswith("[Error"):
|
| 447 |
-
|
|
|
|
|
|
|
| 448 |
|
| 449 |
-
# Clean both sides before similarity
|
| 450 |
cleaned_resume = preprocess_text(resume_text)
|
| 451 |
-
cleaned_job
|
| 452 |
|
| 453 |
-
# Similarity by mode
|
| 454 |
if mode == "api":
|
| 455 |
sim_pct = calculate_similarity_api(cleaned_resume, cleaned_job)
|
| 456 |
-
else:
|
| 457 |
sim_pct = calculate_similarity(cleaned_resume, cleaned_job, mode=mode)
|
| 458 |
|
| 459 |
results.append((sim_pct, resume_text, fname))
|
| 460 |
-
|
| 461 |
-
|
|
|
|
|
|
|
|
|
|
| 462 |
continue
|
| 463 |
|
| 464 |
if not results:
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 469 |
sim_pct, resume_text, fname = best
|
| 470 |
|
| 471 |
-
# Keyword + jobs + keywords extraction (mode-independent)
|
| 472 |
missing_dict, suggestions_text = analyze_resume_keywords(resume_text, job_description)
|
| 473 |
missing_formatted = format_missing_keywords(missing_dict)
|
| 474 |
job_suggestions = suggest_jobs(resume_text)
|
| 475 |
projects_section = extract_projects_section(resume_text)
|
| 476 |
|
| 477 |
-
# Project fit
|
| 478 |
if projects_section.startswith("Could not"):
|
| 479 |
project_fit_verdict = "Cannot analyze project fit as no projects section was found."
|
| 480 |
else:
|
| 481 |
cleaned_projects = preprocess_text(projects_section)
|
| 482 |
-
cleaned_job
|
| 483 |
if cleaned_projects:
|
| 484 |
try:
|
| 485 |
if mode == "api":
|
|
@@ -487,14 +495,13 @@ def analyze_resumes(files, job_description: str, mode: str):
|
|
| 487 |
else:
|
| 488 |
pscore = calculate_similarity(cleaned_projects, cleaned_job, mode=mode)
|
| 489 |
project_fit_verdict = _project_fit_verdict_from_score(pscore)
|
| 490 |
-
except Exception as
|
| 491 |
-
project_fit_verdict = "Could not compute project fit (embedding error)."
|
| 492 |
else:
|
| 493 |
project_fit_verdict = "Projects section is empty or contains no relevant text to analyze."
|
| 494 |
|
| 495 |
resume_keywords_text = extract_top_keywords(preprocess_text(resume_text))
|
| 496 |
-
jd_keywords_text
|
| 497 |
-
|
| 498 |
verdict = _verdict_html(fname, sim_pct)
|
| 499 |
|
| 500 |
return (
|
|
@@ -502,18 +509,12 @@ def analyze_resumes(files, job_description: str, mode: str):
|
|
| 502 |
job_suggestions, projects_section, project_fit_verdict, resume_keywords_text, jd_keywords_text, fname
|
| 503 |
)
|
| 504 |
|
| 505 |
-
|
| 506 |
-
# --------------------------
|
| 507 |
-
# Clear Button Logic
|
| 508 |
-
# --------------------------
|
| 509 |
def clear_inputs():
|
| 510 |
-
#
|
| 511 |
-
|
| 512 |
-
|
|
|
|
| 513 |
|
| 514 |
-
# --------------------------
|
| 515 |
-
# Build Gradio UI
|
| 516 |
-
# --------------------------
|
| 517 |
def build_ui():
|
| 518 |
with gr.Blocks(theme=gr.themes.Default(), title="Resume ↔ Job Matcher") as demo:
|
| 519 |
gr.Markdown("# 📄 Resume & Job Description Analyzer 🎯")
|
|
@@ -529,15 +530,14 @@ def build_ui():
|
|
| 529 |
file_types=[".pdf", ".docx"]
|
| 530 |
)
|
| 531 |
job_desc = gr.Textbox(
|
| 532 |
-
lines=10,
|
| 533 |
-
label="Job Description",
|
| 534 |
placeholder="Paste the full job description here..."
|
| 535 |
)
|
| 536 |
mode = gr.Radio(
|
| 537 |
choices=["sbert", "bert", "api"],
|
| 538 |
value="sbert",
|
| 539 |
label="Analysis Mode",
|
| 540 |
-
info="SBERT/
|
| 541 |
)
|
| 542 |
with gr.Row():
|
| 543 |
clear_btn = gr.Button("Clear")
|
|
@@ -546,14 +546,11 @@ def build_ui():
|
|
| 546 |
with gr.Column(scale=3):
|
| 547 |
with gr.Tabs():
|
| 548 |
with gr.TabItem("📊 Analysis & Suggestions"):
|
| 549 |
-
score_slider = gr.Slider(
|
| 550 |
-
|
| 551 |
-
label="Similarity Score"
|
| 552 |
-
)
|
| 553 |
score_text = gr.Markdown()
|
| 554 |
-
suggestions_out = gr.Textbox(
|
| 555 |
-
|
| 556 |
-
)
|
| 557 |
missing_out = gr.Markdown(label="Keywords Check")
|
| 558 |
|
| 559 |
with gr.TabItem("🛠️ Project Analysis"):
|
|
@@ -572,9 +569,9 @@ def build_ui():
|
|
| 572 |
run_btn.click(
|
| 573 |
analyze_resumes,
|
| 574 |
inputs=[file_in, job_desc, mode],
|
| 575 |
-
outputs=[
|
| 576 |
-
score_slider, score_text, missing_out, suggestions_out, job_suggestions_out,
|
| 577 |
-
project_fit_out, resume_keywords_out, jd_keywords_out, best_fname_out
|
| 578 |
],
|
| 579 |
show_progress='full'
|
| 580 |
)
|
|
@@ -582,7 +579,7 @@ def build_ui():
|
|
| 582 |
clear_btn.click(
|
| 583 |
clear_inputs,
|
| 584 |
inputs=[],
|
| 585 |
-
outputs=[
|
| 586 |
file_in, job_desc, mode, score_slider, score_text, missing_out, suggestions_out,
|
| 587 |
job_suggestions_out, projects_out, project_fit_out, resume_keywords_out, jd_keywords_out, best_fname_out
|
| 588 |
]
|
|
|
|
| 405 |
extract_projects_section,
|
| 406 |
extract_top_keywords,
|
| 407 |
)
|
| 408 |
+
from api_model import calculate_similarity_api, check_api_health # API mode (HF Inference)
|
| 409 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 410 |
def _verdict_html(fname: str, sim_pct: float) -> str:
|
| 411 |
if sim_pct >= 80:
|
| 412 |
return f"<h3 style='color:green;'>✅ Best Match: {fname} ({sim_pct:.2f}%)</h3>"
|
|
|
|
| 427 |
f"The projects may not directly align with the key requirements. "
|
| 428 |
f"Consider highlighting different aspects of your work.</p>")
|
| 429 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 430 |
def analyze_resumes(files, job_description: str, mode: str):
|
| 431 |
if not files or not job_description.strip():
|
| 432 |
return 0.0, "Please upload resumes and paste a job description.", "", "", "", "", "", "", "", ""
|
| 433 |
|
| 434 |
+
# Fast fail if API mode is selected but HF token/model is not ready
|
| 435 |
+
if mode == "api":
|
| 436 |
+
ok, msg = check_api_health()
|
| 437 |
+
if not ok:
|
| 438 |
+
return (0.0,
|
| 439 |
+
f"<p style='color:red;'>HF Inference API error: {msg}</p>",
|
| 440 |
+
"", "", "", "", "", "", "", "")
|
| 441 |
+
|
| 442 |
results = []
|
| 443 |
+
first_error = None
|
| 444 |
+
|
| 445 |
for file in files:
|
| 446 |
try:
|
| 447 |
resume_text, fname = extract_text_from_fileobj(file)
|
| 448 |
if resume_text.strip().startswith("[Error"):
|
| 449 |
+
# file read failure — skip file but note error
|
| 450 |
+
first_error = first_error or resume_text
|
| 451 |
+
continue
|
| 452 |
|
|
|
|
| 453 |
cleaned_resume = preprocess_text(resume_text)
|
| 454 |
+
cleaned_job = preprocess_text(job_description)
|
| 455 |
|
|
|
|
| 456 |
if mode == "api":
|
| 457 |
sim_pct = calculate_similarity_api(cleaned_resume, cleaned_job)
|
| 458 |
+
else:
|
| 459 |
sim_pct = calculate_similarity(cleaned_resume, cleaned_job, mode=mode)
|
| 460 |
|
| 461 |
results.append((sim_pct, resume_text, fname))
|
| 462 |
+
|
| 463 |
+
except Exception as e:
|
| 464 |
+
# Capture first embedding/API error so the user gets a useful message
|
| 465 |
+
if first_error is None:
|
| 466 |
+
first_error = f"{type(e).__name__}: {e}"
|
| 467 |
continue
|
| 468 |
|
| 469 |
if not results:
|
| 470 |
+
# If everything failed, surface the first error instead of a vague message
|
| 471 |
+
msg = first_error or "No valid resumes were provided."
|
| 472 |
+
return (0.0,
|
| 473 |
+
f"<p style='color:red;'>Analysis failed: {msg}</p>",
|
| 474 |
+
"", "", "", "", "", "", "", "")
|
| 475 |
+
|
| 476 |
+
# Best match
|
| 477 |
+
best = max(results, key=lambda x: x[0])
|
| 478 |
sim_pct, resume_text, fname = best
|
| 479 |
|
|
|
|
| 480 |
missing_dict, suggestions_text = analyze_resume_keywords(resume_text, job_description)
|
| 481 |
missing_formatted = format_missing_keywords(missing_dict)
|
| 482 |
job_suggestions = suggest_jobs(resume_text)
|
| 483 |
projects_section = extract_projects_section(resume_text)
|
| 484 |
|
| 485 |
+
# Project fit
|
| 486 |
if projects_section.startswith("Could not"):
|
| 487 |
project_fit_verdict = "Cannot analyze project fit as no projects section was found."
|
| 488 |
else:
|
| 489 |
cleaned_projects = preprocess_text(projects_section)
|
| 490 |
+
cleaned_job = preprocess_text(job_description)
|
| 491 |
if cleaned_projects:
|
| 492 |
try:
|
| 493 |
if mode == "api":
|
|
|
|
| 495 |
else:
|
| 496 |
pscore = calculate_similarity(cleaned_projects, cleaned_job, mode=mode)
|
| 497 |
project_fit_verdict = _project_fit_verdict_from_score(pscore)
|
| 498 |
+
except Exception as e:
|
| 499 |
+
project_fit_verdict = f"Could not compute project fit (embedding error: {type(e).__name__}: {e})."
|
| 500 |
else:
|
| 501 |
project_fit_verdict = "Projects section is empty or contains no relevant text to analyze."
|
| 502 |
|
| 503 |
resume_keywords_text = extract_top_keywords(preprocess_text(resume_text))
|
| 504 |
+
jd_keywords_text = extract_top_keywords(preprocess_text(job_description))
|
|
|
|
| 505 |
verdict = _verdict_html(fname, sim_pct)
|
| 506 |
|
| 507 |
return (
|
|
|
|
| 509 |
job_suggestions, projects_section, project_fit_verdict, resume_keywords_text, jd_keywords_text, fname
|
| 510 |
)
|
| 511 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 512 |
def clear_inputs():
|
| 513 |
+
# MUST return one value per output we wire in clear_btn.click
|
| 514 |
+
# outputs: file_in, job_desc, mode, score_slider, score_text, missing_out, suggestions_out,
|
| 515 |
+
# job_suggestions_out, projects_out, project_fit_out, resume_keywords_out, jd_keywords_out, best_fname_out
|
| 516 |
+
return (None, "", "sbert", 0.0, "", "", "", "", "", "", "", "", "")
|
| 517 |
|
|
|
|
|
|
|
|
|
|
| 518 |
def build_ui():
|
| 519 |
with gr.Blocks(theme=gr.themes.Default(), title="Resume ↔ Job Matcher") as demo:
|
| 520 |
gr.Markdown("# 📄 Resume & Job Description Analyzer 🎯")
|
|
|
|
| 530 |
file_types=[".pdf", ".docx"]
|
| 531 |
)
|
| 532 |
job_desc = gr.Textbox(
|
| 533 |
+
lines=10, label="Job Description",
|
|
|
|
| 534 |
placeholder="Paste the full job description here..."
|
| 535 |
)
|
| 536 |
mode = gr.Radio(
|
| 537 |
choices=["sbert", "bert", "api"],
|
| 538 |
value="sbert",
|
| 539 |
label="Analysis Mode",
|
| 540 |
+
info="SBERT/BERT use local models; API uses Hugging Face Inference API."
|
| 541 |
)
|
| 542 |
with gr.Row():
|
| 543 |
clear_btn = gr.Button("Clear")
|
|
|
|
| 546 |
with gr.Column(scale=3):
|
| 547 |
with gr.Tabs():
|
| 548 |
with gr.TabItem("📊 Analysis & Suggestions"):
|
| 549 |
+
score_slider = gr.Slider(value=0, minimum=0, maximum=100, step=0.01,
|
| 550 |
+
interactive=False, label="Similarity Score")
|
|
|
|
|
|
|
| 551 |
score_text = gr.Markdown()
|
| 552 |
+
suggestions_out = gr.Textbox(label="Suggestions to Improve Your Resume",
|
| 553 |
+
interactive=False, lines=5)
|
|
|
|
| 554 |
missing_out = gr.Markdown(label="Keywords Check")
|
| 555 |
|
| 556 |
with gr.TabItem("🛠️ Project Analysis"):
|
|
|
|
| 569 |
run_btn.click(
|
| 570 |
analyze_resumes,
|
| 571 |
inputs=[file_in, job_desc, mode],
|
| 572 |
+
outputs=[ # 10 outputs
|
| 573 |
+
score_slider, score_text, missing_out, suggestions_out, job_suggestions_out,
|
| 574 |
+
projects_out, project_fit_out, resume_keywords_out, jd_keywords_out, best_fname_out
|
| 575 |
],
|
| 576 |
show_progress='full'
|
| 577 |
)
|
|
|
|
| 579 |
clear_btn.click(
|
| 580 |
clear_inputs,
|
| 581 |
inputs=[],
|
| 582 |
+
outputs=[ # 13 outputs; keep in sync with clear_inputs()
|
| 583 |
file_in, job_desc, mode, score_slider, score_text, missing_out, suggestions_out,
|
| 584 |
job_suggestions_out, projects_out, project_fit_out, resume_keywords_out, jd_keywords_out, best_fname_out
|
| 585 |
]
|