"""Data models for the Text2SPARQL repair pipeline. All Pydantic models live here. No business logic. """ from __future__ import annotations from typing import Any, Optional from pydantic import BaseModel, Field class QueryRequest(BaseModel): """Incoming natural-language question to translate to SPARQL.""" request_id: str dataset_id: str question: str language: str | None = None class DatasetConfig(BaseModel): """Dataset-specific configuration loaded from YAML.""" dataset_id: str endpoint_url: str kg_profile_path: str default_prefixes: dict[str, str] mode: str # "dbpedia" or "corporate" class ContextPackage(BaseModel): """Compact context package built from KG profile and question analysis.""" entity_candidates: list[dict] = Field(default_factory=list) relation_candidates: list[dict] = Field(default_factory=list) class_candidates: list[dict] = Field(default_factory=list) answer_type_hint: str | None = None prefix_hints: dict[str, str] = Field(default_factory=dict) notes: list[str] = Field(default_factory=list) class CandidateQuery(BaseModel): """A SPARQL query candidate, either from generation or repair.""" candidate_id: str query: str source: str # "generation" or "repair" generation_index: int parent_candidate_id: str | None = None repair_iteration: int = 0 class ValidationResult(BaseModel): """Result of cheap symbolic validation on a candidate.""" candidate_id: str parse_ok: bool execute_ok: bool timeout: bool execution_error: str | None = None result_count: int | None = None result_preview: list[dict] = Field(default_factory=list) answer_type_fit: float = 0.0 schema_fit: float = 0.0 suspicious_flags: list[str] = Field(default_factory=list) score: float = 0.0 class ExpertFeedback(BaseModel): """Structured feedback from a semantic committee expert.""" expert_name: str candidate_id: str verdict: str # "ok", "suspicious", "bad" confidence: float issue_summary: str suspected_elements: list[str] = Field(default_factory=list) suggested_action: str | None = None evidence: list[str] = Field(default_factory=list) class CoordinatorDecision(BaseModel): """Merged decision from expert committee feedback.""" candidate_id: str decision: str # "accept", "repair", "discard" selected_action: str | None = None rationale: list[str] = Field(default_factory=list) class RepairResult(BaseModel): """Result of a single repair step.""" old_candidate_id: str new_candidate: CandidateQuery action_used: str changed: bool diff_summary: str class RunTrace(BaseModel): """Full trace of a pipeline run, for logging and inspection.""" request: QueryRequest dataset: DatasetConfig context: Optional[ContextPackage] = None initial_candidates: list[CandidateQuery] = Field(default_factory=list) semantic_loop_candidates: list[CandidateQuery] = Field(default_factory=list) validation_history: list[ValidationResult] = Field(default_factory=list) committee_history: list[ExpertFeedback] = Field(default_factory=list) decision_history: list[CoordinatorDecision] = Field(default_factory=list) repair_history: list[RepairResult] = Field(default_factory=list) final_candidate_id: str = "" final_query: str = "" final_status: str = "" # "accepted", "attempt_limit", "repair_stalled", "syntax_failed", "failed"