Spaces:

jbeiroa
/

thereisnohr

Sleeping

App Files Files Community

thereisnohr / src /ingest /model_fallback.py

jbeiroa

Initial clean deploy of demo app

74711df about 2 months ago

raw

history blame contribute delete

3.77 kB

	"""Ingestion components for parsing resumes and persisting structured ATS artifacts."""

	from typing import Literal, TypeVar

	from pydantic import BaseModel, Field

	from src.llm.client import LLMClient
	from src.llm.errors import coerce_provider_exception

	SchemaModelT = TypeVar("SchemaModelT", bound=BaseModel)


	AllowedSectionType = Literal[
	"summary",
	"experience",
	"education",
	"skills",
	"projects",
	"certifications",
	"contact",
	"general",
	]


	class NameFallbackResult(BaseModel):
	"""Result shape for name fallback resolution."""

	name: str \| None = None
	confidence: float = Field(default=0.0, ge=0.0, le=1.0)
	reason: str = ""


	class SectionFallbackResult(BaseModel):
	"""Result shape for section fallback classification."""

	section_type: AllowedSectionType
	confidence: float = Field(default=0.0, ge=0.0, le=1.0)
	reason: str = ""


	class LLMFallbackResolver:
	"""Data model for llmfallbackresolver values."""

	def __init__(self, llm_client: LLMClient, *, model_alias: str = "extractor_default") -> None:
	self._llm = llm_client
	self._model_alias = model_alias

	def resolve_name(
	self,
	*,
	candidate_lines: list[str],
	emails: list[str],
	phones: list[str],
	language: str \| None,
	) -> NameFallbackResult:
	"""Resolve the most likely candidate name from header context."""
	prompt = (
	"Extract the most likely person full name from resume header lines.\n"
	"Rules:\n"
	"- Prefer real person names (2-4 tokens).\n"
	"- Reject locations, skills, roles, and section titles.\n"
	"- If uncertain, return null name and low confidence.\n\n"
	f"language={language or 'unknown'}\n"
	f"emails={emails}\n"
	f"phones={phones}\n"
	f"candidate_lines={candidate_lines}\n"
	"Return JSON: {name, confidence, reason}."
	)
	return self._generate(prompt=prompt, schema=NameFallbackResult)

	def classify_section(
	self,
	*,
	raw_heading: str,
	content_excerpt: str,
	language: str \| None,
	) -> SectionFallbackResult:
	"""Classify an ambiguous section into one allowed section label."""
	prompt = (
	"Classify resume section into one of these labels only: "
	"summary, experience, education, skills, projects, certifications, contact, general.\n"
	"Use heading and content. Favor contact when email/phone/link patterns exist.\n\n"
	f"language={language or 'unknown'}\n"
	f"heading={raw_heading!r}\n"
	f"content_excerpt={content_excerpt!r}\n"
	"Return JSON: {section_type, confidence, reason}."
	)
	return self._generate(prompt=prompt, schema=SectionFallbackResult)

	def _generate(self, *, prompt: str, schema: type[SchemaModelT]) -> SchemaModelT:
	"""Helper that handles generate.

	Args:
	prompt (str): Prompt sent to the language model.
	schema (type[SchemaModelT]): Pydantic model used to validate structured response payload.

	Returns:
	SchemaModelT: Return value for this function.

	Raises:
	normalized: Raised when validation or execution constraints are violated.
	"""
	try:
	return self._llm.generate_structured(
	prompt=prompt,
	schema=schema,
	model_alias=self._model_alias,
	temperature=0.0,
	)
	except Exception as exc:
	normalized = coerce_provider_exception(exc)
	if normalized is not exc:
	raise normalized from exc
	raise