Spaces:
Sleeping
Sleeping
Update backend/agents/matcher.py
Browse files- backend/agents/matcher.py +17 -6
backend/agents/matcher.py
CHANGED
|
@@ -1,15 +1,26 @@
|
|
|
|
|
|
|
|
| 1 |
from sentence_transformers import SentenceTransformer, util
|
|
|
|
|
|
|
| 2 |
|
| 3 |
-
# Load embedding model once (fast, CPU friendly)
|
| 4 |
model = SentenceTransformer("BAAI/bge-small-en-v1.5")
|
| 5 |
|
| 6 |
def compute_match(resume_text: str, job_desc: str) -> float:
|
| 7 |
-
"""
|
| 8 |
-
Compute semantic similarity between resume text and job description.
|
| 9 |
-
Returns a score between -1 and 1 (cosine similarity).
|
| 10 |
-
"""
|
| 11 |
emb_resume = model.encode(resume_text, convert_to_tensor=True, normalize_embeddings=True)
|
| 12 |
emb_job = model.encode(job_desc, convert_to_tensor=True, normalize_embeddings=True)
|
| 13 |
-
|
| 14 |
score = util.cos_sim(emb_resume, emb_job).item()
|
| 15 |
return round(float(score), 3)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# matcher.py
|
| 2 |
+
|
| 3 |
from sentence_transformers import SentenceTransformer, util
|
| 4 |
+
import re
|
| 5 |
+
from collections import Counter
|
| 6 |
|
|
|
|
| 7 |
model = SentenceTransformer("BAAI/bge-small-en-v1.5")
|
| 8 |
|
| 9 |
def compute_match(resume_text: str, job_desc: str) -> float:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
emb_resume = model.encode(resume_text, convert_to_tensor=True, normalize_embeddings=True)
|
| 11 |
emb_job = model.encode(job_desc, convert_to_tensor=True, normalize_embeddings=True)
|
|
|
|
| 12 |
score = util.cos_sim(emb_resume, emb_job).item()
|
| 13 |
return round(float(score), 3)
|
| 14 |
+
|
| 15 |
+
def extract_keywords(text, top_n=20):
|
| 16 |
+
stopwords = set([
|
| 17 |
+
"the","and","a","an","to","of","in","for","on","with",
|
| 18 |
+
"at","by","from","or","is","are","as","this","that",
|
| 19 |
+
"your","you","be","has","have","will","can","may","our"
|
| 20 |
+
])
|
| 21 |
+
text = re.sub(r"[^a-zA-Z0-9\s]", "", text)
|
| 22 |
+
words = text.lower().split()
|
| 23 |
+
words = [w for w in words if w not in stopwords and len(w) > 2]
|
| 24 |
+
counter = Counter(words)
|
| 25 |
+
keywords = [word for word, _ in counter.most_common(top_n)]
|
| 26 |
+
return keywords
|