MossaicMan commited on
Commit
345000b
·
verified ·
1 Parent(s): 8352cc2

Upload 33 files

Browse files
model/bias/__pycache__/detector.cpython-313.pyc ADDED
Binary file (812 Bytes). View file
 
model/bias/__pycache__/parser.cpython-313.pyc ADDED
Binary file (3.14 kB). View file
 
model/bias/__pycache__/prompt.cpython-313.pyc ADDED
Binary file (685 Bytes). View file
 
model/bias/__pycache__/rules.cpython-313.pyc ADDED
Binary file (1.86 kB). View file
 
model/bias/__pycache__/schema.cpython-313.pyc ADDED
Binary file (652 Bytes). View file
 
model/bias/__pycache__/system_checks.cpython-313.pyc ADDED
Binary file (606 Bytes). View file
 
model/bias/detector.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .rules import (
2
+ detect_gender_coded_language,
3
+ detect_prestige_bias,
4
+ detect_company_brand_bias,
5
+ )
6
+ from .system_checks import detect_score_dominance
7
+
8
+
9
+ def run_bias_rules(
10
+ resume_text: str,
11
+ jd_text: str,
12
+ scores: dict
13
+ ) -> list[str]:
14
+ findings = []
15
+ findings += detect_gender_coded_language(jd_text)
16
+ findings += detect_prestige_bias(resume_text)
17
+ findings += detect_company_brand_bias(resume_text)
18
+ findings += detect_score_dominance(scores)
19
+ return findings
model/bias/parser.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+ from typing import List
4
+ from pydantic import BaseModel
5
+
6
+
7
+ # =========================
8
+ # Pydantic Model
9
+ # =========================
10
+
11
+ class BiasReport(BaseModel):
12
+ detected_biases: List[str]
13
+ risk_level: str
14
+ explanations: List[str]
15
+ mitigation_suggestions: List[str]
16
+
17
+
18
+ # =========================
19
+ # JSON Normalizer
20
+ # =========================
21
+
22
+ def normalize_llm_json(raw: str) -> dict:
23
+ """
24
+ Extracts and parses a JSON object from noisy LLM output.
25
+ Handles markdown fences, 'json' prefixes, and extra text.
26
+ """
27
+
28
+ if raw is None:
29
+ raise ValueError("LLM returned None")
30
+
31
+ raw = raw.strip()
32
+
33
+ if not raw:
34
+ raise ValueError("LLM returned empty output")
35
+
36
+ # Remove ```json fences
37
+ if raw.startswith("```"):
38
+ parts = raw.split("```")
39
+ if len(parts) >= 2:
40
+ raw = parts[1].strip()
41
+
42
+ # Remove leading 'json'
43
+ if raw.lower().startswith("json"):
44
+ raw = raw[4:].strip()
45
+
46
+ # Extract JSON object
47
+ match = re.search(r"\{.*\}", raw, re.DOTALL)
48
+ if not match:
49
+ raise ValueError(f"No JSON object found in LLM output:\n{raw}")
50
+
51
+ json_text = match.group()
52
+
53
+ return json.loads(json_text)
54
+
55
+
56
+ # =========================
57
+ # Bias Output Parser
58
+ # =========================
59
+
60
+ def parse_bias_output(raw: str) -> BiasReport:
61
+ """
62
+ Normalizes and parses LLM bias analysis output into BiasReport.
63
+ Safely handles dict/list variations from the LLM.
64
+ """
65
+
66
+ data = normalize_llm_json(raw)
67
+
68
+ # ---------- findings_explanation ----------
69
+ findings = data.get("findings_explanation", [])
70
+
71
+ if isinstance(findings, dict):
72
+ detected_biases = list(findings.keys())
73
+ explanations = list(findings.values())
74
+ elif isinstance(findings, list):
75
+ detected_biases = []
76
+ explanations = findings
77
+ else:
78
+ detected_biases = []
79
+ explanations = []
80
+
81
+ # ---------- mitigation_steps ----------
82
+ mitigation = data.get("mitigation_steps", [])
83
+
84
+ if isinstance(mitigation, dict):
85
+ mitigation_suggestions = list(mitigation.values())
86
+ elif isinstance(mitigation, list):
87
+ mitigation_suggestions = mitigation
88
+ else:
89
+ mitigation_suggestions = []
90
+
91
+ # ---------- risk level ----------
92
+ risk_level = data.get("overall_risk_level", "unknown")
93
+
94
+ return BiasReport(
95
+ detected_biases=detected_biases,
96
+ risk_level=risk_level,
97
+ explanations=explanations,
98
+ mitigation_suggestions=mitigation_suggestions,
99
+ )
model/bias/prompt.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def build_bias_prompt(findings: list[str]) -> str:
2
+ return f"""
3
+ You are an AI ethics auditor.
4
+
5
+ RULES:
6
+ - Do NOT infer personal attributes.
7
+ - Do NOT add new bias types.
8
+ - Only explain provided findings.
9
+ - Be factual and neutral.
10
+
11
+ FINDINGS:
12
+ {findings}
13
+
14
+ TASK:
15
+ 1. Assess overall risk level (low, medium, high).
16
+ 2. Explain why each finding matters.
17
+ 3. Suggest mitigation steps.
18
+
19
+ Respond ONLY in JSON.
20
+ """
model/bias/rules.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ GENDER_CODED_WORDS = {
4
+ "masculine": ["aggressive", "dominant", "competitive", "rockstar"],
5
+ "feminine": ["supportive", "empathetic", "nurturing"]
6
+ }
7
+
8
+ PRESTIGE_SCHOOLS = {
9
+ "iit", "nit", "mit", "stanford", "harvard", "oxford"
10
+ }
11
+
12
+ BIG_TECH = {
13
+ "google", "amazon", "meta", "microsoft", "apple"
14
+ }
15
+
16
+
17
+ def detect_gender_coded_language(jd_text: str) -> list[str]:
18
+ findings = []
19
+ lower = jd_text.lower()
20
+ for category, words in GENDER_CODED_WORDS.items():
21
+ for w in words:
22
+ if w in lower:
23
+ findings.append(f"Gender-coded language detected: '{w}' ({category})")
24
+ return findings
25
+
26
+
27
+ def detect_prestige_bias(resume_text: str) -> list[str]:
28
+ findings = []
29
+ lower = resume_text.lower()
30
+ for school in PRESTIGE_SCHOOLS:
31
+ if school in lower:
32
+ findings.append("Prestige institution mention may influence scoring")
33
+ return findings
34
+
35
+
36
+ def detect_company_brand_bias(resume_text: str) -> list[str]:
37
+ findings = []
38
+ lower = resume_text.lower()
39
+ for company in BIG_TECH:
40
+ if company in lower:
41
+ findings.append("Well-known company mention may bias evaluation")
42
+ return findings
model/bias/schema.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import List
3
+
4
+ class BiasReport(BaseModel):
5
+ detected_biases: List[str]
6
+ risk_level: str # low | medium | high
7
+ explanations: List[str]
8
+ mitigation_suggestions: List[str]
model/bias/system_checks.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ def detect_score_dominance(scores: dict) -> list[str]:
2
+ findings = []
3
+ if scores["semantic_similarity"] > 0.85 and scores["skill_match"] < 0.3:
4
+ findings.append(
5
+ "Semantic similarity heavily influenced score despite weak skill match"
6
+ )
7
+ return findings
model/llm/__pycache__/client.cpython-313.pyc ADDED
Binary file (879 Bytes). View file
 
model/llm/__pycache__/extraction.cpython-313.pyc ADDED
Binary file (1.14 kB). View file
 
model/llm/__pycache__/parser.cpython-313.pyc ADDED
Binary file (784 Bytes). View file
 
model/llm/__pycache__/prompt.cpython-313.pyc ADDED
Binary file (1.09 kB). View file
 
model/llm/__pycache__/schemas.cpython-313.pyc ADDED
Binary file (1.02 kB). View file
 
model/llm/client.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_mistralai import ChatMistralAI
2
+ from langchain_core.messages import SystemMessage, HumanMessage
3
+ from dotenv import load_dotenv
4
+
5
+ load_dotenv()
6
+
7
+ # -------------------------------------------------
8
+ # Initialize Mistral (API-based, no local model)
9
+ # -------------------------------------------------
10
+
11
+ llm = ChatMistralAI(
12
+ model="mistral-small-latest",
13
+ temperature=0.2,
14
+ )
15
+
16
+ # -------------------------------------------------
17
+ # LLM Runner (LangChain-native)
18
+ # -------------------------------------------------
19
+
20
+ def run_llm(prompt: str) -> str:
21
+ response = llm.invoke([
22
+ HumanMessage(content=prompt),
23
+ ])
24
+ print("the type of the reponse ",type(response.content))
25
+ return response.content
model/llm/demo.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_mistralai import ChatMistralAI
2
+ from langchain_core.messages import SystemMessage, HumanMessage
3
+ from dotenv import load_dotenv
4
+
5
+ load_dotenv()
6
+
7
+ # -------------------------------------------------
8
+ # Initialize Mistral via API (NO DOWNLOAD)
9
+ # -------------------------------------------------
10
+
11
+ llm = ChatMistralAI(
12
+ model="mistral-small-latest",
13
+ temperature=0.2
14
+ )
15
+
16
+ # -------------------------------------------------
17
+ # LLM Runner
18
+ # -------------------------------------------------
19
+
20
+ def run_llm(prompt: str) -> str:
21
+ messages = [
22
+ SystemMessage(content="You explain ATS evaluations."),
23
+ HumanMessage(content=prompt),
24
+ ]
25
+
26
+ response = llm.invoke(messages)
27
+ return response.content
28
+
29
+ if __name__ == "__main__":
30
+ print(run_llm("Explain why a resume with no projects scores low in ATS."))
model/llm/extraction.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+
4
+ def extract_json(raw: str) -> dict:
5
+ if not raw or not raw.strip():
6
+ raise ValueError("Empty LLM output")
7
+
8
+ text = raw.strip()
9
+
10
+ # Remove markdown fences
11
+ text = re.sub(r"^```(?:json)?", "", text, flags=re.IGNORECASE).strip()
12
+ text = re.sub(r"```$", "", text).strip()
13
+
14
+ # Extract JSON object defensively
15
+ start = text.find("{")
16
+ end = text.rfind("}")
17
+
18
+ if start == -1 or end == -1:
19
+ raise ValueError("No JSON object found")
20
+
21
+ return json.loads(text[start:end + 1])
model/llm/parser.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .schemas import Explanation
2
+ from model.llm.extraction import extract_json
3
+ def parse_llm_output(raw: str) -> Explanation:
4
+ data = extract_json(raw)
5
+ # 🔧 NORMALIZATION STEP
6
+ if isinstance(data.get("explanation"), str):
7
+ data["explanation"] = {
8
+ "overall_score": data["explanation"],
9
+ "skills": None,
10
+ "semantic_similarity": None,
11
+ }
12
+ print("the ddata has reacjed her e")
13
+ return Explanation(**data)
model/llm/prompt.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def build_prompt(
2
+ structured_resume: dict,
3
+ skills_found: list[str],
4
+ scores: dict,
5
+ job_description: str
6
+ ) -> str:
7
+ return f"""
8
+ You are an ATS evaluation assistant.
9
+
10
+ RULES:
11
+ - Do NOT invent skills.
12
+ - Do NOT change scores.
13
+ - Use ONLY the provided resume content.
14
+ - Base explanations on evidence.
15
+ - Be concise and factual.
16
+
17
+ RESUME (STRUCTURED):
18
+ {structured_resume}
19
+
20
+ SKILLS FOUND:
21
+ {skills_found}
22
+
23
+ JOB DESCRIPTION:
24
+ {job_description}
25
+
26
+ SCORES (DO NOT MODIFY):
27
+ {scores}
28
+
29
+ TASK:
30
+ 1. Explain why the overall score is what it is.
31
+ 2. List clear strengths based on resume evidence.
32
+ 3. Identify gaps relative to the job description.
33
+ 4. Suggest realistic improvements.
34
+ 5. Generate interview questions strictly from resume + JD.
35
+
36
+ Respond ONLY in valid JSON.
37
+ """
model/llm/schemas.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import List
3
+
4
+ class ExplanationDetail(BaseModel):
5
+ overall_score: str
6
+ skills: str | None = None
7
+ semantic_similarity: str | None = None
8
+
9
+ class Explanation(BaseModel):
10
+ explanation: ExplanationDetail
11
+ strengths: List[str]
12
+ gaps: List[str]
13
+ improvements: List[str]
14
+ interview_questions: List[str]
model/semantic/__pycache__/embeddings.cpython-313.pyc ADDED
Binary file (513 Bytes). View file
 
model/semantic/__pycache__/jd.cpython-313.pyc ADDED
Binary file (601 Bytes). View file
 
model/semantic/__pycache__/scoring.cpython-313.pyc ADDED
Binary file (787 Bytes). View file
 
model/semantic/__pycache__/similarity.cpython-313.pyc ADDED
Binary file (544 Bytes). View file
 
model/semantic/__pycache__/skills.cpython-313.pyc ADDED
Binary file (959 Bytes). View file
 
model/semantic/embeddings.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+
3
+ model = SentenceTransformer("all-MiniLM-L6-v2")
4
+
5
+ def embed_texts(texts: list[str]):
6
+ return model.encode(texts, convert_to_numpy=True)
model/semantic/jd.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ def prepare_jd_chunks(jd_text: str) -> list[str]:
2
+ return [
3
+ line.strip()
4
+ for line in jd_text.split("\n")
5
+ if len(line.strip()) > 20
6
+ ]
model/semantic/scoring.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def compute_scores(
2
+ resume_skills: set[str],
3
+ jd_skills: set[str],
4
+ semantic_score: float
5
+ ) -> dict:
6
+
7
+ if not jd_skills:
8
+ skill_match = 0.0
9
+ else:
10
+ skill_match = len(resume_skills & jd_skills) / len(jd_skills)
11
+
12
+ final_score = (
13
+ 0.5 * skill_match +
14
+ 0.5 * semantic_score
15
+ )
16
+
17
+ return {
18
+ "skill_match": round(skill_match, 3),
19
+ "semantic_similarity": round(semantic_score, 3),
20
+ "overall_score": round(final_score, 3)
21
+ }
model/semantic/similarity.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from sklearn.metrics.pairwise import cosine_similarity
3
+
4
+ def semantic_similarity(resume_vecs, jd_vecs) -> float:
5
+ sims = cosine_similarity(resume_vecs, jd_vecs)
6
+ return float(np.max(sims))
model/semantic/skills.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ SKILL_VOCAB = {
4
+ "python", "java", "javascript", "typescript",
5
+ "react", "node", "fastapi", "django",
6
+ "sql", "postgresql", "mongodb",
7
+ "aws", "docker", "kubernetes",
8
+ "machine learning", "nlp"
9
+ }
10
+
11
+ def extract_skills(skill_lines: list[str]) -> set[str]:
12
+ found = set()
13
+ text = " ".join(skill_lines).lower()
14
+
15
+ for skill in SKILL_VOCAB:
16
+ if re.search(rf"\b{re.escape(skill)}\b", text):
17
+ found.add(skill)
18
+
19
+ return found