"""Ingestion components for parsing resumes and persisting structured ATS artifacts.""" from typing import Literal, TypeVar from pydantic import BaseModel, Field from src.llm.client import LLMClient from src.llm.errors import coerce_provider_exception SchemaModelT = TypeVar("SchemaModelT", bound=BaseModel) AllowedSectionType = Literal[ "summary", "experience", "education", "skills", "projects", "certifications", "contact", "general", ] class NameFallbackResult(BaseModel): """Result shape for name fallback resolution.""" name: str | None = None confidence: float = Field(default=0.0, ge=0.0, le=1.0) reason: str = "" class SectionFallbackResult(BaseModel): """Result shape for section fallback classification.""" section_type: AllowedSectionType confidence: float = Field(default=0.0, ge=0.0, le=1.0) reason: str = "" class LLMFallbackResolver: """Data model for llmfallbackresolver values.""" def __init__(self, llm_client: LLMClient, *, model_alias: str = "extractor_default") -> None: self._llm = llm_client self._model_alias = model_alias def resolve_name( self, *, candidate_lines: list[str], emails: list[str], phones: list[str], language: str | None, ) -> NameFallbackResult: """Resolve the most likely candidate name from header context.""" prompt = ( "Extract the most likely person full name from resume header lines.\n" "Rules:\n" "- Prefer real person names (2-4 tokens).\n" "- Reject locations, skills, roles, and section titles.\n" "- If uncertain, return null name and low confidence.\n\n" f"language={language or 'unknown'}\n" f"emails={emails}\n" f"phones={phones}\n" f"candidate_lines={candidate_lines}\n" "Return JSON: {name, confidence, reason}." ) return self._generate(prompt=prompt, schema=NameFallbackResult) def classify_section( self, *, raw_heading: str, content_excerpt: str, language: str | None, ) -> SectionFallbackResult: """Classify an ambiguous section into one allowed section label.""" prompt = ( "Classify resume section into one of these labels only: " "summary, experience, education, skills, projects, certifications, contact, general.\n" "Use heading and content. Favor contact when email/phone/link patterns exist.\n\n" f"language={language or 'unknown'}\n" f"heading={raw_heading!r}\n" f"content_excerpt={content_excerpt!r}\n" "Return JSON: {section_type, confidence, reason}." ) return self._generate(prompt=prompt, schema=SectionFallbackResult) def _generate(self, *, prompt: str, schema: type[SchemaModelT]) -> SchemaModelT: """Helper that handles generate. Args: prompt (str): Prompt sent to the language model. schema (type[SchemaModelT]): Pydantic model used to validate structured response payload. Returns: SchemaModelT: Return value for this function. Raises: normalized: Raised when validation or execution constraints are violated. """ try: return self._llm.generate_structured( prompt=prompt, schema=schema, model_alias=self._model_alias, temperature=0.0, ) except Exception as exc: normalized = coerce_provider_exception(exc) if normalized is not exc: raise normalized from exc raise