thereisnohr / src /ingest /model_fallback.py
jbeiroa's picture
Initial clean deploy of demo app
74711df
"""Ingestion components for parsing resumes and persisting structured ATS artifacts."""
from typing import Literal, TypeVar
from pydantic import BaseModel, Field
from src.llm.client import LLMClient
from src.llm.errors import coerce_provider_exception
SchemaModelT = TypeVar("SchemaModelT", bound=BaseModel)
AllowedSectionType = Literal[
"summary",
"experience",
"education",
"skills",
"projects",
"certifications",
"contact",
"general",
]
class NameFallbackResult(BaseModel):
"""Result shape for name fallback resolution."""
name: str | None = None
confidence: float = Field(default=0.0, ge=0.0, le=1.0)
reason: str = ""
class SectionFallbackResult(BaseModel):
"""Result shape for section fallback classification."""
section_type: AllowedSectionType
confidence: float = Field(default=0.0, ge=0.0, le=1.0)
reason: str = ""
class LLMFallbackResolver:
"""Data model for llmfallbackresolver values."""
def __init__(self, llm_client: LLMClient, *, model_alias: str = "extractor_default") -> None:
self._llm = llm_client
self._model_alias = model_alias
def resolve_name(
self,
*,
candidate_lines: list[str],
emails: list[str],
phones: list[str],
language: str | None,
) -> NameFallbackResult:
"""Resolve the most likely candidate name from header context."""
prompt = (
"Extract the most likely person full name from resume header lines.\n"
"Rules:\n"
"- Prefer real person names (2-4 tokens).\n"
"- Reject locations, skills, roles, and section titles.\n"
"- If uncertain, return null name and low confidence.\n\n"
f"language={language or 'unknown'}\n"
f"emails={emails}\n"
f"phones={phones}\n"
f"candidate_lines={candidate_lines}\n"
"Return JSON: {name, confidence, reason}."
)
return self._generate(prompt=prompt, schema=NameFallbackResult)
def classify_section(
self,
*,
raw_heading: str,
content_excerpt: str,
language: str | None,
) -> SectionFallbackResult:
"""Classify an ambiguous section into one allowed section label."""
prompt = (
"Classify resume section into one of these labels only: "
"summary, experience, education, skills, projects, certifications, contact, general.\n"
"Use heading and content. Favor contact when email/phone/link patterns exist.\n\n"
f"language={language or 'unknown'}\n"
f"heading={raw_heading!r}\n"
f"content_excerpt={content_excerpt!r}\n"
"Return JSON: {section_type, confidence, reason}."
)
return self._generate(prompt=prompt, schema=SectionFallbackResult)
def _generate(self, *, prompt: str, schema: type[SchemaModelT]) -> SchemaModelT:
"""Helper that handles generate.
Args:
prompt (str): Prompt sent to the language model.
schema (type[SchemaModelT]): Pydantic model used to validate structured response payload.
Returns:
SchemaModelT: Return value for this function.
Raises:
normalized: Raised when validation or execution constraints are violated.
"""
try:
return self._llm.generate_structured(
prompt=prompt,
schema=schema,
model_alias=self._model_alias,
temperature=0.0,
)
except Exception as exc:
normalized = coerce_provider_exception(exc)
if normalized is not exc:
raise normalized from exc
raise