Pradyumn Tendulkar commited on
Commit
1118099
·
1 Parent(s): e256fc1

fixing api errors

Browse files
Files changed (3) hide show
  1. __pycache__/api_model.cpython-313.pyc +0 -0
  2. api_model.py +11 -20
  3. app.py +48 -51
__pycache__/api_model.cpython-313.pyc CHANGED
Binary files a/__pycache__/api_model.cpython-313.pyc and b/__pycache__/api_model.cpython-313.pyc differ
 
api_model.py CHANGED
@@ -1,26 +1,13 @@
1
  # API_model.py
2
- """
3
- Embedding + similarity via Hugging Face Inference API (no local models).
4
- Expose: calculate_similarity_api(text_a, text_b) -> float (0..100)
5
- Env:
6
- EMBED_MODEL : default "sentence-transformers/all-MiniLM-L6-v2"
7
- HF_TOKEN : required (set in Space/host env)
8
- MAX_TEXT_LEN : default "20000"
9
- REQ_TIMEOUT : default "40"
10
- """
11
-
12
  import os
13
- import re
14
  import numpy as np
15
  from huggingface_hub import InferenceClient
16
 
17
- # -------- App config --------
18
  EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
19
- HF_TOKEN = os.getenv("HF_TOKEN") # set in host env / Spaces Variables
20
  MAX_LEN = int(os.getenv("MAX_TEXT_LEN", "20000"))
21
  REQ_TIMEOUT = float(os.getenv("REQ_TIMEOUT", "40"))
22
 
23
- # HF Inference API client (timeout belongs on the client)
24
  _hf = InferenceClient(model=EMBED_MODEL, token=HF_TOKEN, timeout=REQ_TIMEOUT)
25
 
26
  def _trim(s: str) -> str:
@@ -34,20 +21,24 @@ def _cosine(a: np.ndarray, b: np.ndarray) -> float:
34
  def _embed_api(text: str) -> np.ndarray:
35
  if not HF_TOKEN:
36
  raise RuntimeError("HF_TOKEN is not set (add it in environment or Space → Settings → Variables).")
37
- feats = _hf.feature_extraction(_trim(text)) # API returns list/array
38
  arr = np.array(feats, dtype=np.float32)
39
- # If token-level embeddings returned, mean-pool to sentence vector
40
- if arr.ndim == 2:
41
  arr = arr.mean(axis=0)
42
  if arr.ndim != 1:
43
  raise RuntimeError(f"Unexpected embedding shape from the Inference API: {arr.shape}")
44
  return arr
45
 
46
  def calculate_similarity_api(text_a: str, text_b: str) -> float:
47
- """
48
- Return cosine similarity (%) between two texts using HF Inference API embeddings.
49
- """
50
  a_vec = _embed_api(text_a)
51
  b_vec = _embed_api(text_b)
52
  score = _cosine(a_vec, b_vec) * 100.0
53
  return float(np.round(score, 2))
 
 
 
 
 
 
 
 
 
1
  # API_model.py
 
 
 
 
 
 
 
 
 
 
2
  import os
 
3
  import numpy as np
4
  from huggingface_hub import InferenceClient
5
 
 
6
  EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
7
+ HF_TOKEN = os.getenv("HF_TOKEN") # set in env/Space Variables
8
  MAX_LEN = int(os.getenv("MAX_TEXT_LEN", "20000"))
9
  REQ_TIMEOUT = float(os.getenv("REQ_TIMEOUT", "40"))
10
 
 
11
  _hf = InferenceClient(model=EMBED_MODEL, token=HF_TOKEN, timeout=REQ_TIMEOUT)
12
 
13
  def _trim(s: str) -> str:
 
21
  def _embed_api(text: str) -> np.ndarray:
22
  if not HF_TOKEN:
23
  raise RuntimeError("HF_TOKEN is not set (add it in environment or Space → Settings → Variables).")
24
+ feats = _hf.feature_extraction(_trim(text)) # may raise on auth/model/timeouts
25
  arr = np.array(feats, dtype=np.float32)
26
+ if arr.ndim == 2: # token-level mean pool
 
27
  arr = arr.mean(axis=0)
28
  if arr.ndim != 1:
29
  raise RuntimeError(f"Unexpected embedding shape from the Inference API: {arr.shape}")
30
  return arr
31
 
32
  def calculate_similarity_api(text_a: str, text_b: str) -> float:
 
 
 
33
  a_vec = _embed_api(text_a)
34
  b_vec = _embed_api(text_b)
35
  score = _cosine(a_vec, b_vec) * 100.0
36
  return float(np.round(score, 2))
37
+
38
+ def check_api_health() -> tuple[bool, str]:
39
+ """Quick probe to fail fast with a helpful message in the UI."""
40
+ try:
41
+ _ = _embed_api("healthcheck")
42
+ return True, f"OK (model={EMBED_MODEL})"
43
+ except Exception as e:
44
+ return False, f"{type(e).__name__}: {e}"
app.py CHANGED
@@ -405,12 +405,8 @@ from local_model import (
405
  extract_projects_section,
406
  extract_top_keywords,
407
  )
408
- from api_model import calculate_similarity_api # API mode (HF Inference)
409
 
410
-
411
- # --------------------------
412
- # Helpers
413
- # --------------------------
414
  def _verdict_html(fname: str, sim_pct: float) -> str:
415
  if sim_pct >= 80:
416
  return f"<h3 style='color:green;'>✅ Best Match: {fname} ({sim_pct:.2f}%)</h3>"
@@ -431,55 +427,67 @@ def _project_fit_verdict_from_score(score: float) -> str:
431
  f"The projects may not directly align with the key requirements. "
432
  f"Consider highlighting different aspects of your work.</p>")
433
 
434
-
435
- # --------------------------
436
- # Main Gradio app logic
437
- # --------------------------
438
  def analyze_resumes(files, job_description: str, mode: str):
439
  if not files or not job_description.strip():
440
  return 0.0, "Please upload resumes and paste a job description.", "", "", "", "", "", "", "", ""
441
 
 
 
 
 
 
 
 
 
442
  results = []
 
 
443
  for file in files:
444
  try:
445
  resume_text, fname = extract_text_from_fileobj(file)
446
  if resume_text.strip().startswith("[Error"):
447
- continue # Skip errored files
 
 
448
 
449
- # Clean both sides before similarity
450
  cleaned_resume = preprocess_text(resume_text)
451
- cleaned_job = preprocess_text(job_description)
452
 
453
- # Similarity by mode
454
  if mode == "api":
455
  sim_pct = calculate_similarity_api(cleaned_resume, cleaned_job)
456
- else: # "sbert" or "bert" (local)
457
  sim_pct = calculate_similarity(cleaned_resume, cleaned_job, mode=mode)
458
 
459
  results.append((sim_pct, resume_text, fname))
460
- except Exception:
461
- # Skip the file on any error (keep app resilient)
 
 
 
462
  continue
463
 
464
  if not results:
465
- return 0.0, "No valid resumes were provided.", "", "", "", "", "", "", "", ""
466
-
467
- # Select the best matching resume
468
- best = max(results, key=lambda x: x[0]) # highest similarity
 
 
 
 
469
  sim_pct, resume_text, fname = best
470
 
471
- # Keyword + jobs + keywords extraction (mode-independent)
472
  missing_dict, suggestions_text = analyze_resume_keywords(resume_text, job_description)
473
  missing_formatted = format_missing_keywords(missing_dict)
474
  job_suggestions = suggest_jobs(resume_text)
475
  projects_section = extract_projects_section(resume_text)
476
 
477
- # Project fit: local for sbert/bert, API for api
478
  if projects_section.startswith("Could not"):
479
  project_fit_verdict = "Cannot analyze project fit as no projects section was found."
480
  else:
481
  cleaned_projects = preprocess_text(projects_section)
482
- cleaned_job = preprocess_text(job_description)
483
  if cleaned_projects:
484
  try:
485
  if mode == "api":
@@ -487,14 +495,13 @@ def analyze_resumes(files, job_description: str, mode: str):
487
  else:
488
  pscore = calculate_similarity(cleaned_projects, cleaned_job, mode=mode)
489
  project_fit_verdict = _project_fit_verdict_from_score(pscore)
490
- except Exception as _:
491
- project_fit_verdict = "Could not compute project fit (embedding error)."
492
  else:
493
  project_fit_verdict = "Projects section is empty or contains no relevant text to analyze."
494
 
495
  resume_keywords_text = extract_top_keywords(preprocess_text(resume_text))
496
- jd_keywords_text = extract_top_keywords(preprocess_text(job_description))
497
-
498
  verdict = _verdict_html(fname, sim_pct)
499
 
500
  return (
@@ -502,18 +509,12 @@ def analyze_resumes(files, job_description: str, mode: str):
502
  job_suggestions, projects_section, project_fit_verdict, resume_keywords_text, jd_keywords_text, fname
503
  )
504
 
505
-
506
- # --------------------------
507
- # Clear Button Logic
508
- # --------------------------
509
  def clear_inputs():
510
- # Reset mode to sbert; clear all outputs
511
- return None, "", "sbert", 0.0, "", "", "", "", "", "", ""
512
-
 
513
 
514
- # --------------------------
515
- # Build Gradio UI
516
- # --------------------------
517
  def build_ui():
518
  with gr.Blocks(theme=gr.themes.Default(), title="Resume ↔ Job Matcher") as demo:
519
  gr.Markdown("# 📄 Resume & Job Description Analyzer 🎯")
@@ -529,15 +530,14 @@ def build_ui():
529
  file_types=[".pdf", ".docx"]
530
  )
531
  job_desc = gr.Textbox(
532
- lines=10,
533
- label="Job Description",
534
  placeholder="Paste the full job description here..."
535
  )
536
  mode = gr.Radio(
537
  choices=["sbert", "bert", "api"],
538
  value="sbert",
539
  label="Analysis Mode",
540
- info="SBERT/BERΤ use local models; API uses Hugging Face Inference API."
541
  )
542
  with gr.Row():
543
  clear_btn = gr.Button("Clear")
@@ -546,14 +546,11 @@ def build_ui():
546
  with gr.Column(scale=3):
547
  with gr.Tabs():
548
  with gr.TabItem("📊 Analysis & Suggestions"):
549
- score_slider = gr.Slider(
550
- value=0, minimum=0, maximum=100, step=0.01, interactive=False,
551
- label="Similarity Score"
552
- )
553
  score_text = gr.Markdown()
554
- suggestions_out = gr.Textbox(
555
- label="Suggestions to Improve Your Resume", interactive=False, lines=5
556
- )
557
  missing_out = gr.Markdown(label="Keywords Check")
558
 
559
  with gr.TabItem("🛠️ Project Analysis"):
@@ -572,9 +569,9 @@ def build_ui():
572
  run_btn.click(
573
  analyze_resumes,
574
  inputs=[file_in, job_desc, mode],
575
- outputs=[
576
- score_slider, score_text, missing_out, suggestions_out, job_suggestions_out, projects_out,
577
- project_fit_out, resume_keywords_out, jd_keywords_out, best_fname_out
578
  ],
579
  show_progress='full'
580
  )
@@ -582,7 +579,7 @@ def build_ui():
582
  clear_btn.click(
583
  clear_inputs,
584
  inputs=[],
585
- outputs=[
586
  file_in, job_desc, mode, score_slider, score_text, missing_out, suggestions_out,
587
  job_suggestions_out, projects_out, project_fit_out, resume_keywords_out, jd_keywords_out, best_fname_out
588
  ]
 
405
  extract_projects_section,
406
  extract_top_keywords,
407
  )
408
+ from api_model import calculate_similarity_api, check_api_health # API mode (HF Inference)
409
 
 
 
 
 
410
  def _verdict_html(fname: str, sim_pct: float) -> str:
411
  if sim_pct >= 80:
412
  return f"<h3 style='color:green;'>✅ Best Match: {fname} ({sim_pct:.2f}%)</h3>"
 
427
  f"The projects may not directly align with the key requirements. "
428
  f"Consider highlighting different aspects of your work.</p>")
429
 
 
 
 
 
430
  def analyze_resumes(files, job_description: str, mode: str):
431
  if not files or not job_description.strip():
432
  return 0.0, "Please upload resumes and paste a job description.", "", "", "", "", "", "", "", ""
433
 
434
+ # Fast fail if API mode is selected but HF token/model is not ready
435
+ if mode == "api":
436
+ ok, msg = check_api_health()
437
+ if not ok:
438
+ return (0.0,
439
+ f"<p style='color:red;'>HF Inference API error: {msg}</p>",
440
+ "", "", "", "", "", "", "", "")
441
+
442
  results = []
443
+ first_error = None
444
+
445
  for file in files:
446
  try:
447
  resume_text, fname = extract_text_from_fileobj(file)
448
  if resume_text.strip().startswith("[Error"):
449
+ # file read failure — skip file but note error
450
+ first_error = first_error or resume_text
451
+ continue
452
 
 
453
  cleaned_resume = preprocess_text(resume_text)
454
+ cleaned_job = preprocess_text(job_description)
455
 
 
456
  if mode == "api":
457
  sim_pct = calculate_similarity_api(cleaned_resume, cleaned_job)
458
+ else:
459
  sim_pct = calculate_similarity(cleaned_resume, cleaned_job, mode=mode)
460
 
461
  results.append((sim_pct, resume_text, fname))
462
+
463
+ except Exception as e:
464
+ # Capture first embedding/API error so the user gets a useful message
465
+ if first_error is None:
466
+ first_error = f"{type(e).__name__}: {e}"
467
  continue
468
 
469
  if not results:
470
+ # If everything failed, surface the first error instead of a vague message
471
+ msg = first_error or "No valid resumes were provided."
472
+ return (0.0,
473
+ f"<p style='color:red;'>Analysis failed: {msg}</p>",
474
+ "", "", "", "", "", "", "", "")
475
+
476
+ # Best match
477
+ best = max(results, key=lambda x: x[0])
478
  sim_pct, resume_text, fname = best
479
 
 
480
  missing_dict, suggestions_text = analyze_resume_keywords(resume_text, job_description)
481
  missing_formatted = format_missing_keywords(missing_dict)
482
  job_suggestions = suggest_jobs(resume_text)
483
  projects_section = extract_projects_section(resume_text)
484
 
485
+ # Project fit
486
  if projects_section.startswith("Could not"):
487
  project_fit_verdict = "Cannot analyze project fit as no projects section was found."
488
  else:
489
  cleaned_projects = preprocess_text(projects_section)
490
+ cleaned_job = preprocess_text(job_description)
491
  if cleaned_projects:
492
  try:
493
  if mode == "api":
 
495
  else:
496
  pscore = calculate_similarity(cleaned_projects, cleaned_job, mode=mode)
497
  project_fit_verdict = _project_fit_verdict_from_score(pscore)
498
+ except Exception as e:
499
+ project_fit_verdict = f"Could not compute project fit (embedding error: {type(e).__name__}: {e})."
500
  else:
501
  project_fit_verdict = "Projects section is empty or contains no relevant text to analyze."
502
 
503
  resume_keywords_text = extract_top_keywords(preprocess_text(resume_text))
504
+ jd_keywords_text = extract_top_keywords(preprocess_text(job_description))
 
505
  verdict = _verdict_html(fname, sim_pct)
506
 
507
  return (
 
509
  job_suggestions, projects_section, project_fit_verdict, resume_keywords_text, jd_keywords_text, fname
510
  )
511
 
 
 
 
 
512
  def clear_inputs():
513
+ # MUST return one value per output we wire in clear_btn.click
514
+ # outputs: file_in, job_desc, mode, score_slider, score_text, missing_out, suggestions_out,
515
+ # job_suggestions_out, projects_out, project_fit_out, resume_keywords_out, jd_keywords_out, best_fname_out
516
+ return (None, "", "sbert", 0.0, "", "", "", "", "", "", "", "", "")
517
 
 
 
 
518
  def build_ui():
519
  with gr.Blocks(theme=gr.themes.Default(), title="Resume ↔ Job Matcher") as demo:
520
  gr.Markdown("# 📄 Resume & Job Description Analyzer 🎯")
 
530
  file_types=[".pdf", ".docx"]
531
  )
532
  job_desc = gr.Textbox(
533
+ lines=10, label="Job Description",
 
534
  placeholder="Paste the full job description here..."
535
  )
536
  mode = gr.Radio(
537
  choices=["sbert", "bert", "api"],
538
  value="sbert",
539
  label="Analysis Mode",
540
+ info="SBERT/BERT use local models; API uses Hugging Face Inference API."
541
  )
542
  with gr.Row():
543
  clear_btn = gr.Button("Clear")
 
546
  with gr.Column(scale=3):
547
  with gr.Tabs():
548
  with gr.TabItem("📊 Analysis & Suggestions"):
549
+ score_slider = gr.Slider(value=0, minimum=0, maximum=100, step=0.01,
550
+ interactive=False, label="Similarity Score")
 
 
551
  score_text = gr.Markdown()
552
+ suggestions_out = gr.Textbox(label="Suggestions to Improve Your Resume",
553
+ interactive=False, lines=5)
 
554
  missing_out = gr.Markdown(label="Keywords Check")
555
 
556
  with gr.TabItem("🛠️ Project Analysis"):
 
569
  run_btn.click(
570
  analyze_resumes,
571
  inputs=[file_in, job_desc, mode],
572
+ outputs=[ # 10 outputs
573
+ score_slider, score_text, missing_out, suggestions_out, job_suggestions_out,
574
+ projects_out, project_fit_out, resume_keywords_out, jd_keywords_out, best_fname_out
575
  ],
576
  show_progress='full'
577
  )
 
579
  clear_btn.click(
580
  clear_inputs,
581
  inputs=[],
582
+ outputs=[ # 13 outputs; keep in sync with clear_inputs()
583
  file_in, job_desc, mode, score_slider, score_text, missing_out, suggestions_out,
584
  job_suggestions_out, projects_out, project_fit_out, resume_keywords_out, jd_keywords_out, best_fname_out
585
  ]