Spaces:
Build error
Build error
Upload 33 files
Browse files- model/bias/__pycache__/detector.cpython-313.pyc +0 -0
- model/bias/__pycache__/parser.cpython-313.pyc +0 -0
- model/bias/__pycache__/prompt.cpython-313.pyc +0 -0
- model/bias/__pycache__/rules.cpython-313.pyc +0 -0
- model/bias/__pycache__/schema.cpython-313.pyc +0 -0
- model/bias/__pycache__/system_checks.cpython-313.pyc +0 -0
- model/bias/detector.py +19 -0
- model/bias/parser.py +99 -0
- model/bias/prompt.py +20 -0
- model/bias/rules.py +42 -0
- model/bias/schema.py +8 -0
- model/bias/system_checks.py +7 -0
- model/llm/__pycache__/client.cpython-313.pyc +0 -0
- model/llm/__pycache__/extraction.cpython-313.pyc +0 -0
- model/llm/__pycache__/parser.cpython-313.pyc +0 -0
- model/llm/__pycache__/prompt.cpython-313.pyc +0 -0
- model/llm/__pycache__/schemas.cpython-313.pyc +0 -0
- model/llm/client.py +25 -0
- model/llm/demo.py +30 -0
- model/llm/extraction.py +21 -0
- model/llm/parser.py +13 -0
- model/llm/prompt.py +37 -0
- model/llm/schemas.py +14 -0
- model/semantic/__pycache__/embeddings.cpython-313.pyc +0 -0
- model/semantic/__pycache__/jd.cpython-313.pyc +0 -0
- model/semantic/__pycache__/scoring.cpython-313.pyc +0 -0
- model/semantic/__pycache__/similarity.cpython-313.pyc +0 -0
- model/semantic/__pycache__/skills.cpython-313.pyc +0 -0
- model/semantic/embeddings.py +6 -0
- model/semantic/jd.py +6 -0
- model/semantic/scoring.py +21 -0
- model/semantic/similarity.py +6 -0
- model/semantic/skills.py +19 -0
model/bias/__pycache__/detector.cpython-313.pyc
ADDED
|
Binary file (812 Bytes). View file
|
|
|
model/bias/__pycache__/parser.cpython-313.pyc
ADDED
|
Binary file (3.14 kB). View file
|
|
|
model/bias/__pycache__/prompt.cpython-313.pyc
ADDED
|
Binary file (685 Bytes). View file
|
|
|
model/bias/__pycache__/rules.cpython-313.pyc
ADDED
|
Binary file (1.86 kB). View file
|
|
|
model/bias/__pycache__/schema.cpython-313.pyc
ADDED
|
Binary file (652 Bytes). View file
|
|
|
model/bias/__pycache__/system_checks.cpython-313.pyc
ADDED
|
Binary file (606 Bytes). View file
|
|
|
model/bias/detector.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .rules import (
|
| 2 |
+
detect_gender_coded_language,
|
| 3 |
+
detect_prestige_bias,
|
| 4 |
+
detect_company_brand_bias,
|
| 5 |
+
)
|
| 6 |
+
from .system_checks import detect_score_dominance
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def run_bias_rules(
|
| 10 |
+
resume_text: str,
|
| 11 |
+
jd_text: str,
|
| 12 |
+
scores: dict
|
| 13 |
+
) -> list[str]:
|
| 14 |
+
findings = []
|
| 15 |
+
findings += detect_gender_coded_language(jd_text)
|
| 16 |
+
findings += detect_prestige_bias(resume_text)
|
| 17 |
+
findings += detect_company_brand_bias(resume_text)
|
| 18 |
+
findings += detect_score_dominance(scores)
|
| 19 |
+
return findings
|
model/bias/parser.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import re
|
| 3 |
+
from typing import List
|
| 4 |
+
from pydantic import BaseModel
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
# =========================
|
| 8 |
+
# Pydantic Model
|
| 9 |
+
# =========================
|
| 10 |
+
|
| 11 |
+
class BiasReport(BaseModel):
|
| 12 |
+
detected_biases: List[str]
|
| 13 |
+
risk_level: str
|
| 14 |
+
explanations: List[str]
|
| 15 |
+
mitigation_suggestions: List[str]
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
# =========================
|
| 19 |
+
# JSON Normalizer
|
| 20 |
+
# =========================
|
| 21 |
+
|
| 22 |
+
def normalize_llm_json(raw: str) -> dict:
|
| 23 |
+
"""
|
| 24 |
+
Extracts and parses a JSON object from noisy LLM output.
|
| 25 |
+
Handles markdown fences, 'json' prefixes, and extra text.
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
if raw is None:
|
| 29 |
+
raise ValueError("LLM returned None")
|
| 30 |
+
|
| 31 |
+
raw = raw.strip()
|
| 32 |
+
|
| 33 |
+
if not raw:
|
| 34 |
+
raise ValueError("LLM returned empty output")
|
| 35 |
+
|
| 36 |
+
# Remove ```json fences
|
| 37 |
+
if raw.startswith("```"):
|
| 38 |
+
parts = raw.split("```")
|
| 39 |
+
if len(parts) >= 2:
|
| 40 |
+
raw = parts[1].strip()
|
| 41 |
+
|
| 42 |
+
# Remove leading 'json'
|
| 43 |
+
if raw.lower().startswith("json"):
|
| 44 |
+
raw = raw[4:].strip()
|
| 45 |
+
|
| 46 |
+
# Extract JSON object
|
| 47 |
+
match = re.search(r"\{.*\}", raw, re.DOTALL)
|
| 48 |
+
if not match:
|
| 49 |
+
raise ValueError(f"No JSON object found in LLM output:\n{raw}")
|
| 50 |
+
|
| 51 |
+
json_text = match.group()
|
| 52 |
+
|
| 53 |
+
return json.loads(json_text)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
# =========================
|
| 57 |
+
# Bias Output Parser
|
| 58 |
+
# =========================
|
| 59 |
+
|
| 60 |
+
def parse_bias_output(raw: str) -> BiasReport:
|
| 61 |
+
"""
|
| 62 |
+
Normalizes and parses LLM bias analysis output into BiasReport.
|
| 63 |
+
Safely handles dict/list variations from the LLM.
|
| 64 |
+
"""
|
| 65 |
+
|
| 66 |
+
data = normalize_llm_json(raw)
|
| 67 |
+
|
| 68 |
+
# ---------- findings_explanation ----------
|
| 69 |
+
findings = data.get("findings_explanation", [])
|
| 70 |
+
|
| 71 |
+
if isinstance(findings, dict):
|
| 72 |
+
detected_biases = list(findings.keys())
|
| 73 |
+
explanations = list(findings.values())
|
| 74 |
+
elif isinstance(findings, list):
|
| 75 |
+
detected_biases = []
|
| 76 |
+
explanations = findings
|
| 77 |
+
else:
|
| 78 |
+
detected_biases = []
|
| 79 |
+
explanations = []
|
| 80 |
+
|
| 81 |
+
# ---------- mitigation_steps ----------
|
| 82 |
+
mitigation = data.get("mitigation_steps", [])
|
| 83 |
+
|
| 84 |
+
if isinstance(mitigation, dict):
|
| 85 |
+
mitigation_suggestions = list(mitigation.values())
|
| 86 |
+
elif isinstance(mitigation, list):
|
| 87 |
+
mitigation_suggestions = mitigation
|
| 88 |
+
else:
|
| 89 |
+
mitigation_suggestions = []
|
| 90 |
+
|
| 91 |
+
# ---------- risk level ----------
|
| 92 |
+
risk_level = data.get("overall_risk_level", "unknown")
|
| 93 |
+
|
| 94 |
+
return BiasReport(
|
| 95 |
+
detected_biases=detected_biases,
|
| 96 |
+
risk_level=risk_level,
|
| 97 |
+
explanations=explanations,
|
| 98 |
+
mitigation_suggestions=mitigation_suggestions,
|
| 99 |
+
)
|
model/bias/prompt.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def build_bias_prompt(findings: list[str]) -> str:
|
| 2 |
+
return f"""
|
| 3 |
+
You are an AI ethics auditor.
|
| 4 |
+
|
| 5 |
+
RULES:
|
| 6 |
+
- Do NOT infer personal attributes.
|
| 7 |
+
- Do NOT add new bias types.
|
| 8 |
+
- Only explain provided findings.
|
| 9 |
+
- Be factual and neutral.
|
| 10 |
+
|
| 11 |
+
FINDINGS:
|
| 12 |
+
{findings}
|
| 13 |
+
|
| 14 |
+
TASK:
|
| 15 |
+
1. Assess overall risk level (low, medium, high).
|
| 16 |
+
2. Explain why each finding matters.
|
| 17 |
+
3. Suggest mitigation steps.
|
| 18 |
+
|
| 19 |
+
Respond ONLY in JSON.
|
| 20 |
+
"""
|
model/bias/rules.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
|
| 3 |
+
GENDER_CODED_WORDS = {
|
| 4 |
+
"masculine": ["aggressive", "dominant", "competitive", "rockstar"],
|
| 5 |
+
"feminine": ["supportive", "empathetic", "nurturing"]
|
| 6 |
+
}
|
| 7 |
+
|
| 8 |
+
PRESTIGE_SCHOOLS = {
|
| 9 |
+
"iit", "nit", "mit", "stanford", "harvard", "oxford"
|
| 10 |
+
}
|
| 11 |
+
|
| 12 |
+
BIG_TECH = {
|
| 13 |
+
"google", "amazon", "meta", "microsoft", "apple"
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def detect_gender_coded_language(jd_text: str) -> list[str]:
|
| 18 |
+
findings = []
|
| 19 |
+
lower = jd_text.lower()
|
| 20 |
+
for category, words in GENDER_CODED_WORDS.items():
|
| 21 |
+
for w in words:
|
| 22 |
+
if w in lower:
|
| 23 |
+
findings.append(f"Gender-coded language detected: '{w}' ({category})")
|
| 24 |
+
return findings
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def detect_prestige_bias(resume_text: str) -> list[str]:
|
| 28 |
+
findings = []
|
| 29 |
+
lower = resume_text.lower()
|
| 30 |
+
for school in PRESTIGE_SCHOOLS:
|
| 31 |
+
if school in lower:
|
| 32 |
+
findings.append("Prestige institution mention may influence scoring")
|
| 33 |
+
return findings
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def detect_company_brand_bias(resume_text: str) -> list[str]:
|
| 37 |
+
findings = []
|
| 38 |
+
lower = resume_text.lower()
|
| 39 |
+
for company in BIG_TECH:
|
| 40 |
+
if company in lower:
|
| 41 |
+
findings.append("Well-known company mention may bias evaluation")
|
| 42 |
+
return findings
|
model/bias/schema.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
+
from typing import List
|
| 3 |
+
|
| 4 |
+
class BiasReport(BaseModel):
|
| 5 |
+
detected_biases: List[str]
|
| 6 |
+
risk_level: str # low | medium | high
|
| 7 |
+
explanations: List[str]
|
| 8 |
+
mitigation_suggestions: List[str]
|
model/bias/system_checks.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def detect_score_dominance(scores: dict) -> list[str]:
|
| 2 |
+
findings = []
|
| 3 |
+
if scores["semantic_similarity"] > 0.85 and scores["skill_match"] < 0.3:
|
| 4 |
+
findings.append(
|
| 5 |
+
"Semantic similarity heavily influenced score despite weak skill match"
|
| 6 |
+
)
|
| 7 |
+
return findings
|
model/llm/__pycache__/client.cpython-313.pyc
ADDED
|
Binary file (879 Bytes). View file
|
|
|
model/llm/__pycache__/extraction.cpython-313.pyc
ADDED
|
Binary file (1.14 kB). View file
|
|
|
model/llm/__pycache__/parser.cpython-313.pyc
ADDED
|
Binary file (784 Bytes). View file
|
|
|
model/llm/__pycache__/prompt.cpython-313.pyc
ADDED
|
Binary file (1.09 kB). View file
|
|
|
model/llm/__pycache__/schemas.cpython-313.pyc
ADDED
|
Binary file (1.02 kB). View file
|
|
|
model/llm/client.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_mistralai import ChatMistralAI
|
| 2 |
+
from langchain_core.messages import SystemMessage, HumanMessage
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
|
| 5 |
+
load_dotenv()
|
| 6 |
+
|
| 7 |
+
# -------------------------------------------------
|
| 8 |
+
# Initialize Mistral (API-based, no local model)
|
| 9 |
+
# -------------------------------------------------
|
| 10 |
+
|
| 11 |
+
llm = ChatMistralAI(
|
| 12 |
+
model="mistral-small-latest",
|
| 13 |
+
temperature=0.2,
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
# -------------------------------------------------
|
| 17 |
+
# LLM Runner (LangChain-native)
|
| 18 |
+
# -------------------------------------------------
|
| 19 |
+
|
| 20 |
+
def run_llm(prompt: str) -> str:
|
| 21 |
+
response = llm.invoke([
|
| 22 |
+
HumanMessage(content=prompt),
|
| 23 |
+
])
|
| 24 |
+
print("the type of the reponse ",type(response.content))
|
| 25 |
+
return response.content
|
model/llm/demo.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_mistralai import ChatMistralAI
|
| 2 |
+
from langchain_core.messages import SystemMessage, HumanMessage
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
|
| 5 |
+
load_dotenv()
|
| 6 |
+
|
| 7 |
+
# -------------------------------------------------
|
| 8 |
+
# Initialize Mistral via API (NO DOWNLOAD)
|
| 9 |
+
# -------------------------------------------------
|
| 10 |
+
|
| 11 |
+
llm = ChatMistralAI(
|
| 12 |
+
model="mistral-small-latest",
|
| 13 |
+
temperature=0.2
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
# -------------------------------------------------
|
| 17 |
+
# LLM Runner
|
| 18 |
+
# -------------------------------------------------
|
| 19 |
+
|
| 20 |
+
def run_llm(prompt: str) -> str:
|
| 21 |
+
messages = [
|
| 22 |
+
SystemMessage(content="You explain ATS evaluations."),
|
| 23 |
+
HumanMessage(content=prompt),
|
| 24 |
+
]
|
| 25 |
+
|
| 26 |
+
response = llm.invoke(messages)
|
| 27 |
+
return response.content
|
| 28 |
+
|
| 29 |
+
if __name__ == "__main__":
|
| 30 |
+
print(run_llm("Explain why a resume with no projects scores low in ATS."))
|
model/llm/extraction.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import re
|
| 3 |
+
|
| 4 |
+
def extract_json(raw: str) -> dict:
|
| 5 |
+
if not raw or not raw.strip():
|
| 6 |
+
raise ValueError("Empty LLM output")
|
| 7 |
+
|
| 8 |
+
text = raw.strip()
|
| 9 |
+
|
| 10 |
+
# Remove markdown fences
|
| 11 |
+
text = re.sub(r"^```(?:json)?", "", text, flags=re.IGNORECASE).strip()
|
| 12 |
+
text = re.sub(r"```$", "", text).strip()
|
| 13 |
+
|
| 14 |
+
# Extract JSON object defensively
|
| 15 |
+
start = text.find("{")
|
| 16 |
+
end = text.rfind("}")
|
| 17 |
+
|
| 18 |
+
if start == -1 or end == -1:
|
| 19 |
+
raise ValueError("No JSON object found")
|
| 20 |
+
|
| 21 |
+
return json.loads(text[start:end + 1])
|
model/llm/parser.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .schemas import Explanation
|
| 2 |
+
from model.llm.extraction import extract_json
|
| 3 |
+
def parse_llm_output(raw: str) -> Explanation:
|
| 4 |
+
data = extract_json(raw)
|
| 5 |
+
# 🔧 NORMALIZATION STEP
|
| 6 |
+
if isinstance(data.get("explanation"), str):
|
| 7 |
+
data["explanation"] = {
|
| 8 |
+
"overall_score": data["explanation"],
|
| 9 |
+
"skills": None,
|
| 10 |
+
"semantic_similarity": None,
|
| 11 |
+
}
|
| 12 |
+
print("the ddata has reacjed her e")
|
| 13 |
+
return Explanation(**data)
|
model/llm/prompt.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def build_prompt(
|
| 2 |
+
structured_resume: dict,
|
| 3 |
+
skills_found: list[str],
|
| 4 |
+
scores: dict,
|
| 5 |
+
job_description: str
|
| 6 |
+
) -> str:
|
| 7 |
+
return f"""
|
| 8 |
+
You are an ATS evaluation assistant.
|
| 9 |
+
|
| 10 |
+
RULES:
|
| 11 |
+
- Do NOT invent skills.
|
| 12 |
+
- Do NOT change scores.
|
| 13 |
+
- Use ONLY the provided resume content.
|
| 14 |
+
- Base explanations on evidence.
|
| 15 |
+
- Be concise and factual.
|
| 16 |
+
|
| 17 |
+
RESUME (STRUCTURED):
|
| 18 |
+
{structured_resume}
|
| 19 |
+
|
| 20 |
+
SKILLS FOUND:
|
| 21 |
+
{skills_found}
|
| 22 |
+
|
| 23 |
+
JOB DESCRIPTION:
|
| 24 |
+
{job_description}
|
| 25 |
+
|
| 26 |
+
SCORES (DO NOT MODIFY):
|
| 27 |
+
{scores}
|
| 28 |
+
|
| 29 |
+
TASK:
|
| 30 |
+
1. Explain why the overall score is what it is.
|
| 31 |
+
2. List clear strengths based on resume evidence.
|
| 32 |
+
3. Identify gaps relative to the job description.
|
| 33 |
+
4. Suggest realistic improvements.
|
| 34 |
+
5. Generate interview questions strictly from resume + JD.
|
| 35 |
+
|
| 36 |
+
Respond ONLY in valid JSON.
|
| 37 |
+
"""
|
model/llm/schemas.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
+
from typing import List
|
| 3 |
+
|
| 4 |
+
class ExplanationDetail(BaseModel):
|
| 5 |
+
overall_score: str
|
| 6 |
+
skills: str | None = None
|
| 7 |
+
semantic_similarity: str | None = None
|
| 8 |
+
|
| 9 |
+
class Explanation(BaseModel):
|
| 10 |
+
explanation: ExplanationDetail
|
| 11 |
+
strengths: List[str]
|
| 12 |
+
gaps: List[str]
|
| 13 |
+
improvements: List[str]
|
| 14 |
+
interview_questions: List[str]
|
model/semantic/__pycache__/embeddings.cpython-313.pyc
ADDED
|
Binary file (513 Bytes). View file
|
|
|
model/semantic/__pycache__/jd.cpython-313.pyc
ADDED
|
Binary file (601 Bytes). View file
|
|
|
model/semantic/__pycache__/scoring.cpython-313.pyc
ADDED
|
Binary file (787 Bytes). View file
|
|
|
model/semantic/__pycache__/similarity.cpython-313.pyc
ADDED
|
Binary file (544 Bytes). View file
|
|
|
model/semantic/__pycache__/skills.cpython-313.pyc
ADDED
|
Binary file (959 Bytes). View file
|
|
|
model/semantic/embeddings.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sentence_transformers import SentenceTransformer
|
| 2 |
+
|
| 3 |
+
model = SentenceTransformer("all-MiniLM-L6-v2")
|
| 4 |
+
|
| 5 |
+
def embed_texts(texts: list[str]):
|
| 6 |
+
return model.encode(texts, convert_to_numpy=True)
|
model/semantic/jd.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def prepare_jd_chunks(jd_text: str) -> list[str]:
|
| 2 |
+
return [
|
| 3 |
+
line.strip()
|
| 4 |
+
for line in jd_text.split("\n")
|
| 5 |
+
if len(line.strip()) > 20
|
| 6 |
+
]
|
model/semantic/scoring.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def compute_scores(
|
| 2 |
+
resume_skills: set[str],
|
| 3 |
+
jd_skills: set[str],
|
| 4 |
+
semantic_score: float
|
| 5 |
+
) -> dict:
|
| 6 |
+
|
| 7 |
+
if not jd_skills:
|
| 8 |
+
skill_match = 0.0
|
| 9 |
+
else:
|
| 10 |
+
skill_match = len(resume_skills & jd_skills) / len(jd_skills)
|
| 11 |
+
|
| 12 |
+
final_score = (
|
| 13 |
+
0.5 * skill_match +
|
| 14 |
+
0.5 * semantic_score
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
return {
|
| 18 |
+
"skill_match": round(skill_match, 3),
|
| 19 |
+
"semantic_similarity": round(semantic_score, 3),
|
| 20 |
+
"overall_score": round(final_score, 3)
|
| 21 |
+
}
|
model/semantic/similarity.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 3 |
+
|
| 4 |
+
def semantic_similarity(resume_vecs, jd_vecs) -> float:
|
| 5 |
+
sims = cosine_similarity(resume_vecs, jd_vecs)
|
| 6 |
+
return float(np.max(sims))
|
model/semantic/skills.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
|
| 3 |
+
SKILL_VOCAB = {
|
| 4 |
+
"python", "java", "javascript", "typescript",
|
| 5 |
+
"react", "node", "fastapi", "django",
|
| 6 |
+
"sql", "postgresql", "mongodb",
|
| 7 |
+
"aws", "docker", "kubernetes",
|
| 8 |
+
"machine learning", "nlp"
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
def extract_skills(skill_lines: list[str]) -> set[str]:
|
| 12 |
+
found = set()
|
| 13 |
+
text = " ".join(skill_lines).lower()
|
| 14 |
+
|
| 15 |
+
for skill in SKILL_VOCAB:
|
| 16 |
+
if re.search(rf"\b{re.escape(skill)}\b", text):
|
| 17 |
+
found.add(skill)
|
| 18 |
+
|
| 19 |
+
return found
|