Pradyumn Tendulkar commited on
Commit
85de68f
·
1 Parent(s): f641225

added api product

Browse files
__pycache__/api_model.cpython-313.pyc ADDED
Binary file (3.9 kB). View file
 
__pycache__/local_model.cpython-313.pyc ADDED
Binary file (15.1 kB). View file
 
api_model.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # API_model.py
2
+ import os
3
+ import re
4
+ import numpy as np
5
+ from typing import Optional
6
+ from huggingface_hub import InferenceClient
7
+
8
+ # -------- Config (via env) --------
9
+ EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
10
+ HF_TOKEN = os.getenv("HF_TOKEN") # set in Space/CI env
11
+ MAX_LEN = int(os.getenv("MAX_TEXT_LEN", "20000"))
12
+ REQ_TIMEOUT = float(os.getenv("REQ_TIMEOUT", "40"))
13
+
14
+ # Lazy client (created on first use to avoid import-time failures)
15
+ _hf_client: Optional[InferenceClient] = None
16
+
17
+ def _get_client() -> InferenceClient:
18
+ global _hf_client
19
+ if not HF_TOKEN:
20
+ raise RuntimeError("HF_TOKEN is not set (add it in environment/Secrets).")
21
+ if _hf_client is None:
22
+ _hf_client = InferenceClient(model=EMBED_MODEL, token=HF_TOKEN, timeout=REQ_TIMEOUT)
23
+ return _hf_client
24
+
25
+ # -------- Utilities --------
26
+ def _trim(s: str) -> str:
27
+ s = (s or "").strip()
28
+ return s if len(s) <= MAX_LEN else s[:MAX_LEN]
29
+
30
+ def _cosine(a: np.ndarray, b: np.ndarray) -> float:
31
+ denom = (np.linalg.norm(a) * np.linalg.norm(b)) or 1.0
32
+ return float(np.dot(a, b) / denom)
33
+
34
+ def _embed_api(text: str) -> np.ndarray:
35
+ """Return a 1D embedding using HF Inference API (mean-pool if token-level)."""
36
+ client = _get_client()
37
+ feats = client.feature_extraction(_trim(text))
38
+ arr = np.array(feats, dtype=np.float32)
39
+ if arr.ndim == 2: # token-level → mean pool
40
+ arr = arr.mean(axis=0)
41
+ if arr.ndim != 1:
42
+ raise RuntimeError("Unexpected embedding shape from the Inference API.")
43
+ return arr
44
+
45
+ # -------- Public API (drop-in for local similarity) --------
46
+ def calculate_similarity_api(resume_text: str, job_text: str) -> float:
47
+ """
48
+ Returns similarity in % (0-100), matching the signature/scale used in local_model.
49
+ Assumes input strings are already preprocessed upstream (lowercased, stopwords removed, etc.).
50
+ """
51
+ r_vec = _embed_api(resume_text)
52
+ j_vec = _embed_api(job_text)
53
+ score = _cosine(r_vec, j_vec) * 100.0
54
+ return float(np.round(score, 2))
55
+
56
+ def api_healthcheck() -> str:
57
+ """Optional: ping once to verify credentials/model availability."""
58
+ try:
59
+ _ = _embed_api("healthcheck")
60
+ return f"OK: Using {EMBED_MODEL}"
61
+ except Exception as e:
62
+ return f"ERROR: {type(e).__name__}: {e}"
app.py CHANGED
@@ -407,6 +407,9 @@ from local_model import (
407
  extract_top_keywords,
408
  )
409
 
 
 
 
410
  # --------------------------
411
  # Main Gradio app logic
412
  # --------------------------
@@ -422,7 +425,13 @@ def analyze_resumes(files, job_description: str, mode: str):
422
  continue # Skip errored files
423
  cleaned_resume = preprocess_text(resume_text)
424
  cleaned_job = preprocess_text(job_description)
425
- sim_pct = calculate_similarity(cleaned_resume, cleaned_job, mode=mode)
 
 
 
 
 
 
426
  results.append((sim_pct, resume_text, fname))
427
  except Exception:
428
  continue # Skip if any error
@@ -438,7 +447,10 @@ def analyze_resumes(files, job_description: str, mode: str):
438
  missing_formatted = format_missing_keywords(missing_dict)
439
  job_suggestions = suggest_jobs(resume_text)
440
  projects_section = extract_projects_section(resume_text)
441
- project_fit_verdict = analyze_projects_fit(projects_section, job_description, mode)
 
 
 
442
  resume_keywords_text = extract_top_keywords(preprocess_text(resume_text))
443
  jd_keywords_text = extract_top_keywords(preprocess_text(job_description))
444
 
@@ -483,10 +495,11 @@ def build_ui():
483
  placeholder="Paste the full job description here..."
484
  )
485
  mode = gr.Radio(
486
- choices=["sbert", "bert"],
 
487
  value="sbert",
488
  label="Analysis Mode",
489
- info="SBERT is faster, BERT is more detailed."
490
  )
491
  with gr.Row():
492
  clear_btn = gr.Button("Clear")
 
407
  extract_top_keywords,
408
  )
409
 
410
+ # NEW: import API mode similarity
411
+ from api_model import calculate_similarity_api
412
+
413
  # --------------------------
414
  # Main Gradio app logic
415
  # --------------------------
 
425
  continue # Skip errored files
426
  cleaned_resume = preprocess_text(resume_text)
427
  cleaned_job = preprocess_text(job_description)
428
+
429
+ # Route by mode (SBERT/BERT local vs API)
430
+ if mode == "api":
431
+ sim_pct = calculate_similarity_api(cleaned_resume, cleaned_job)
432
+ else:
433
+ sim_pct = calculate_similarity(cleaned_resume, cleaned_job, mode=mode)
434
+
435
  results.append((sim_pct, resume_text, fname))
436
  except Exception:
437
  continue # Skip if any error
 
447
  missing_formatted = format_missing_keywords(missing_dict)
448
  job_suggestions = suggest_jobs(resume_text)
449
  projects_section = extract_projects_section(resume_text)
450
+ project_fit_verdict = analyze_projects_fit(projects_section, job_description, mode if mode != "api" else "sbert")
451
+ # ^ Project-fit path uses local formatting thresholds; mode value here affects only wording/thresholds,
452
+ # so we map 'api' to 'sbert' for consistent messages.
453
+
454
  resume_keywords_text = extract_top_keywords(preprocess_text(resume_text))
455
  jd_keywords_text = extract_top_keywords(preprocess_text(job_description))
456
 
 
495
  placeholder="Paste the full job description here..."
496
  )
497
  mode = gr.Radio(
498
+ # ADD the 3rd option here:
499
+ choices=["sbert", "bert", "api"],
500
  value="sbert",
501
  label="Analysis Mode",
502
+ info="SBERT (local, fast) BERT (local, detailed) • API (HF Inference, no local model)"
503
  )
504
  with gr.Row():
505
  clear_btn = gr.Button("Clear")
requirements.txt CHANGED
@@ -1,35 +1,22 @@
1
- '''--find-links https://storage.googleapis.com/torch-cpu/torch_stable.html
2
- torch
3
- torchvision
4
-
5
- gradio
6
- scikit-learn
7
- numpy
8
- PyMuPDF
9
- python-docx
10
- sentence-transformers
11
- transformers
12
- wordcloud
13
- matplotlib'''
14
 
15
  # ===== Torch CPU Wheels (for Spaces) =====
16
  --find-links https://storage.googleapis.com/torch-cpu/torch_stable.html
17
  --extra-index-url https://download.pytorch.org/whl/cpu
18
 
19
  # ===== Core Scientific Stack =====
20
- numpy==1.26.4 # compatible with most PyTorch CPU wheels
21
- scipy<1.11 # optional but stable with numpy 1.26
22
 
23
  # ===== PyTorch =====
24
- torch==2.2.2
25
- torchvision==0.17.2
26
 
27
  # ===== App / ML Libraries =====
28
- gradio>=4.44.1
29
  fastapi
30
  uvicorn
31
- pydantic>=1.10,<3
32
- huggingface_hub>=0.24
33
  scikit-learn
34
  sentence-transformers
35
  transformers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
 
2
  # ===== Torch CPU Wheels (for Spaces) =====
3
  --find-links https://storage.googleapis.com/torch-cpu/torch_stable.html
4
  --extra-index-url https://download.pytorch.org/whl/cpu
5
 
6
  # ===== Core Scientific Stack =====
7
+ numpy # compatible with most PyTorch CPU wheels
8
+ scipy # optional but stable with numpy 1.26
9
 
10
  # ===== PyTorch =====
11
+ torch
12
+ torchvision
13
 
14
  # ===== App / ML Libraries =====
15
+ gradio
16
  fastapi
17
  uvicorn
18
+ pydantic
19
+ huggingface_hub
20
  scikit-learn
21
  sentence-transformers
22
  transformers