Spaces:

MossaicMan
/

resume-model-api

Build error

App Files Files Community

MossaicMan commited on 29 days ago

Commit

345000b

verified ·

1 Parent(s): 8352cc2

Upload 33 files

Browse files

Files changed (33) hide show

model/bias/__pycache__/detector.cpython-313.pyc +0 -0
model/bias/__pycache__/parser.cpython-313.pyc +0 -0
model/bias/__pycache__/prompt.cpython-313.pyc +0 -0
model/bias/__pycache__/rules.cpython-313.pyc +0 -0
model/bias/__pycache__/schema.cpython-313.pyc +0 -0
model/bias/__pycache__/system_checks.cpython-313.pyc +0 -0
model/bias/detector.py +19 -0
model/bias/parser.py +99 -0
model/bias/prompt.py +20 -0
model/bias/rules.py +42 -0
model/bias/schema.py +8 -0
model/bias/system_checks.py +7 -0
model/llm/__pycache__/client.cpython-313.pyc +0 -0
model/llm/__pycache__/extraction.cpython-313.pyc +0 -0
model/llm/__pycache__/parser.cpython-313.pyc +0 -0
model/llm/__pycache__/prompt.cpython-313.pyc +0 -0
model/llm/__pycache__/schemas.cpython-313.pyc +0 -0
model/llm/client.py +25 -0
model/llm/demo.py +30 -0
model/llm/extraction.py +21 -0
model/llm/parser.py +13 -0
model/llm/prompt.py +37 -0
model/llm/schemas.py +14 -0
model/semantic/__pycache__/embeddings.cpython-313.pyc +0 -0
model/semantic/__pycache__/jd.cpython-313.pyc +0 -0
model/semantic/__pycache__/scoring.cpython-313.pyc +0 -0
model/semantic/__pycache__/similarity.cpython-313.pyc +0 -0
model/semantic/__pycache__/skills.cpython-313.pyc +0 -0
model/semantic/embeddings.py +6 -0
model/semantic/jd.py +6 -0
model/semantic/scoring.py +21 -0
model/semantic/similarity.py +6 -0
model/semantic/skills.py +19 -0

model/bias/__pycache__/detector.cpython-313.pyc ADDED Viewed

Binary file (812 Bytes). View file

model/bias/__pycache__/parser.cpython-313.pyc ADDED Viewed

Binary file (3.14 kB). View file

model/bias/__pycache__/prompt.cpython-313.pyc ADDED Viewed

Binary file (685 Bytes). View file

model/bias/__pycache__/rules.cpython-313.pyc ADDED Viewed

Binary file (1.86 kB). View file

model/bias/__pycache__/schema.cpython-313.pyc ADDED Viewed

Binary file (652 Bytes). View file

model/bias/__pycache__/system_checks.cpython-313.pyc ADDED Viewed

Binary file (606 Bytes). View file

model/bias/detector.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from .rules import (
+    detect_gender_coded_language,
+    detect_prestige_bias,
+    detect_company_brand_bias,
+)
+from .system_checks import detect_score_dominance
+def run_bias_rules(
+    resume_text: str,
+    jd_text: str,
+    scores: dict
+) -> list[str]:
+    findings = []
+    findings += detect_gender_coded_language(jd_text)
+    findings += detect_prestige_bias(resume_text)
+    findings += detect_company_brand_bias(resume_text)
+    findings += detect_score_dominance(scores)
+    return findings

model/bias/parser.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import json
+import re
+from typing import List
+from pydantic import BaseModel
+# =========================
+# Pydantic Model
+# =========================
+class BiasReport(BaseModel):
+    detected_biases: List[str]
+    risk_level: str
+    explanations: List[str]
+    mitigation_suggestions: List[str]
+# =========================
+# JSON Normalizer
+# =========================
+def normalize_llm_json(raw: str) -> dict:
+    """
+    Extracts and parses a JSON object from noisy LLM output.
+    Handles markdown fences, 'json' prefixes, and extra text.
+    """
+    if raw is None:
+        raise ValueError("LLM returned None")
+    raw = raw.strip()
+    if not raw:
+        raise ValueError("LLM returned empty output")
+    # Remove ```json fences
+    if raw.startswith("```"):
+        parts = raw.split("```")
+        if len(parts) >= 2:
+            raw = parts[1].strip()
+    # Remove leading 'json'
+    if raw.lower().startswith("json"):
+        raw = raw[4:].strip()
+    # Extract JSON object
+    match = re.search(r"\{.*\}", raw, re.DOTALL)
+    if not match:
+        raise ValueError(f"No JSON object found in LLM output:\n{raw}")
+    json_text = match.group()
+    return json.loads(json_text)
+# =========================
+# Bias Output Parser
+# =========================
+def parse_bias_output(raw: str) -> BiasReport:
+    """
+    Normalizes and parses LLM bias analysis output into BiasReport.
+    Safely handles dict/list variations from the LLM.
+    """
+    data = normalize_llm_json(raw)
+    # ---------- findings_explanation ----------
+    findings = data.get("findings_explanation", [])
+    if isinstance(findings, dict):
+        detected_biases = list(findings.keys())
+        explanations = list(findings.values())
+    elif isinstance(findings, list):
+        detected_biases = []
+        explanations = findings
+    else:
+        detected_biases = []
+        explanations = []
+    # ---------- mitigation_steps ----------
+    mitigation = data.get("mitigation_steps", [])
+    if isinstance(mitigation, dict):
+        mitigation_suggestions = list(mitigation.values())
+    elif isinstance(mitigation, list):
+        mitigation_suggestions = mitigation
+    else:
+        mitigation_suggestions = []
+    # ---------- risk level ----------
+    risk_level = data.get("overall_risk_level", "unknown")
+    return BiasReport(
+        detected_biases=detected_biases,
+        risk_level=risk_level,
+        explanations=explanations,
+        mitigation_suggestions=mitigation_suggestions,
+    )

model/bias/prompt.py ADDED Viewed

	@@ -0,0 +1,20 @@

+def build_bias_prompt(findings: list[str]) -> str:
+    return f"""
+You are an AI ethics auditor.
+RULES:
+- Do NOT infer personal attributes.
+- Do NOT add new bias types.
+- Only explain provided findings.
+- Be factual and neutral.
+FINDINGS:
+{findings}
+TASK:
+1. Assess overall risk level (low, medium, high).
+2. Explain why each finding matters.
+3. Suggest mitigation steps.
+Respond ONLY in JSON.
+"""

model/bias/rules.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import re
+GENDER_CODED_WORDS = {
+    "masculine": ["aggressive", "dominant", "competitive", "rockstar"],
+    "feminine": ["supportive", "empathetic", "nurturing"]
+}
+PRESTIGE_SCHOOLS = {
+    "iit", "nit", "mit", "stanford", "harvard", "oxford"
+}
+BIG_TECH = {
+    "google", "amazon", "meta", "microsoft", "apple"
+}
+def detect_gender_coded_language(jd_text: str) -> list[str]:
+    findings = []
+    lower = jd_text.lower()
+    for category, words in GENDER_CODED_WORDS.items():
+        for w in words:
+            if w in lower:
+                findings.append(f"Gender-coded language detected: '{w}' ({category})")
+    return findings
+def detect_prestige_bias(resume_text: str) -> list[str]:
+    findings = []
+    lower = resume_text.lower()
+    for school in PRESTIGE_SCHOOLS:
+        if school in lower:
+            findings.append("Prestige institution mention may influence scoring")
+    return findings
+def detect_company_brand_bias(resume_text: str) -> list[str]:
+    findings = []
+    lower = resume_text.lower()
+    for company in BIG_TECH:
+        if company in lower:
+            findings.append("Well-known company mention may bias evaluation")
+    return findings

model/bias/schema.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from pydantic import BaseModel
+from typing import List
+class BiasReport(BaseModel):
+    detected_biases: List[str]
+    risk_level: str  # low | medium | high
+    explanations: List[str]
+    mitigation_suggestions: List[str]

model/bias/system_checks.py ADDED Viewed

	@@ -0,0 +1,7 @@

+def detect_score_dominance(scores: dict) -> list[str]:
+    findings = []
+    if scores["semantic_similarity"] > 0.85 and scores["skill_match"] < 0.3:
+        findings.append(
+            "Semantic similarity heavily influenced score despite weak skill match"
+        )
+    return findings

model/llm/__pycache__/client.cpython-313.pyc ADDED Viewed

Binary file (879 Bytes). View file

model/llm/__pycache__/extraction.cpython-313.pyc ADDED Viewed

Binary file (1.14 kB). View file

model/llm/__pycache__/parser.cpython-313.pyc ADDED Viewed

Binary file (784 Bytes). View file

model/llm/__pycache__/prompt.cpython-313.pyc ADDED Viewed

Binary file (1.09 kB). View file

model/llm/__pycache__/schemas.cpython-313.pyc ADDED Viewed

Binary file (1.02 kB). View file

model/llm/client.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from langchain_mistralai import ChatMistralAI
+from langchain_core.messages import SystemMessage, HumanMessage
+from dotenv import load_dotenv
+load_dotenv()
+# -------------------------------------------------
+# Initialize Mistral (API-based, no local model)
+# -------------------------------------------------
+llm = ChatMistralAI(
+    model="mistral-small-latest",
+    temperature=0.2,
+)
+# -------------------------------------------------
+# LLM Runner (LangChain-native)
+# -------------------------------------------------
+def run_llm(prompt: str) -> str:
+    response = llm.invoke([
+        HumanMessage(content=prompt),
+    ])
+    print("the type of the reponse ",type(response.content))
+    return response.content

model/llm/demo.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from langchain_mistralai import ChatMistralAI
+from langchain_core.messages import SystemMessage, HumanMessage
+from dotenv import load_dotenv
+load_dotenv()
+# -------------------------------------------------
+# Initialize Mistral via API (NO DOWNLOAD)
+# -------------------------------------------------
+llm = ChatMistralAI(
+    model="mistral-small-latest",
+    temperature=0.2
+)
+# -------------------------------------------------
+# LLM Runner
+# -------------------------------------------------
+def run_llm(prompt: str) -> str:
+    messages = [
+        SystemMessage(content="You explain ATS evaluations."),
+        HumanMessage(content=prompt),
+    ]
+    response = llm.invoke(messages)
+    return response.content
+if __name__ == "__main__":
+    print(run_llm("Explain why a resume with no projects scores low in ATS."))

model/llm/extraction.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import json
+import re
+def extract_json(raw: str) -> dict:
+    if not raw or not raw.strip():
+        raise ValueError("Empty LLM output")
+    text = raw.strip()
+    # Remove markdown fences
+    text = re.sub(r"^```(?:json)?", "", text, flags=re.IGNORECASE).strip()
+    text = re.sub(r"```$", "", text).strip()
+    # Extract JSON object defensively
+    start = text.find("{")
+    end = text.rfind("}")
+    if start == -1 or end == -1:
+        raise ValueError("No JSON object found")
+    return json.loads(text[start:end + 1])

model/llm/parser.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from .schemas import Explanation
+from model.llm.extraction import extract_json
+def parse_llm_output(raw: str) -> Explanation:
+    data = extract_json(raw)
+     # 🔧 NORMALIZATION STEP
+    if isinstance(data.get("explanation"), str):
+        data["explanation"] = {
+            "overall_score": data["explanation"],
+            "skills": None,
+            "semantic_similarity": None,
+        }
+    print("the ddata has reacjed her e")
+    return Explanation(**data)

model/llm/prompt.py ADDED Viewed

	@@ -0,0 +1,37 @@

+def build_prompt(
+    structured_resume: dict,
+    skills_found: list[str],
+    scores: dict,
+    job_description: str
+) -> str:
+    return f"""
+You are an ATS evaluation assistant.
+RULES:
+- Do NOT invent skills.
+- Do NOT change scores.
+- Use ONLY the provided resume content.
+- Base explanations on evidence.
+- Be concise and factual.
+RESUME (STRUCTURED):
+{structured_resume}
+SKILLS FOUND:
+{skills_found}
+JOB DESCRIPTION:
+{job_description}
+SCORES (DO NOT MODIFY):
+{scores}
+TASK:
+1. Explain why the overall score is what it is.
+2. List clear strengths based on resume evidence.
+3. Identify gaps relative to the job description.
+4. Suggest realistic improvements.
+5. Generate interview questions strictly from resume + JD.
+Respond ONLY in valid JSON.
+"""

model/llm/schemas.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from pydantic import BaseModel
+from typing import List
+class ExplanationDetail(BaseModel):
+    overall_score: str
+    skills: str | None = None
+    semantic_similarity: str | None = None
+class Explanation(BaseModel):
+    explanation: ExplanationDetail
+    strengths: List[str]
+    gaps: List[str]
+    improvements: List[str]
+    interview_questions: List[str]

model/semantic/__pycache__/embeddings.cpython-313.pyc ADDED Viewed

Binary file (513 Bytes). View file

model/semantic/__pycache__/jd.cpython-313.pyc ADDED Viewed

Binary file (601 Bytes). View file

model/semantic/__pycache__/scoring.cpython-313.pyc ADDED Viewed

Binary file (787 Bytes). View file

model/semantic/__pycache__/similarity.cpython-313.pyc ADDED Viewed

Binary file (544 Bytes). View file

model/semantic/__pycache__/skills.cpython-313.pyc ADDED Viewed

Binary file (959 Bytes). View file

model/semantic/embeddings.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from sentence_transformers import SentenceTransformer
+model = SentenceTransformer("all-MiniLM-L6-v2")
+def embed_texts(texts: list[str]):
+    return model.encode(texts, convert_to_numpy=True)

model/semantic/jd.py ADDED Viewed

	@@ -0,0 +1,6 @@

+def prepare_jd_chunks(jd_text: str) -> list[str]:
+    return [
+        line.strip()
+        for line in jd_text.split("\n")
+        if len(line.strip()) > 20
+    ]

model/semantic/scoring.py ADDED Viewed

	@@ -0,0 +1,21 @@

+def compute_scores(
+    resume_skills: set[str],
+    jd_skills: set[str],
+    semantic_score: float
+) -> dict:
+    if not jd_skills:
+        skill_match = 0.0
+    else:
+        skill_match = len(resume_skills & jd_skills) / len(jd_skills)
+    final_score = (
+        0.5 * skill_match +
+        0.5 * semantic_score
+    )
+    return {
+        "skill_match": round(skill_match, 3),
+        "semantic_similarity": round(semantic_score, 3),
+        "overall_score": round(final_score, 3)
+    }

model/semantic/similarity.py ADDED Viewed

	@@ -0,0 +1,6 @@

+import numpy as np
+from sklearn.metrics.pairwise import cosine_similarity
+def semantic_similarity(resume_vecs, jd_vecs) -> float:
+    sims = cosine_similarity(resume_vecs, jd_vecs)
+    return float(np.max(sims))

model/semantic/skills.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import re
+SKILL_VOCAB = {
+    "python", "java", "javascript", "typescript",
+    "react", "node", "fastapi", "django",
+    "sql", "postgresql", "mongodb",
+    "aws", "docker", "kubernetes",
+    "machine learning", "nlp"
+}
+def extract_skills(skill_lines: list[str]) -> set[str]:
+    found = set()
+    text = " ".join(skill_lines).lower()
+    for skill in SKILL_VOCAB:
+        if re.search(rf"\b{re.escape(skill)}\b", text):
+            found.add(skill)
+    return found