iris-at-text2sparql / src /models.py
Alex Latipov
Harden frozen eval prompts and judge JSON handling
d745844
"""Data models for the Text2SPARQL repair pipeline.
All Pydantic models live here. No business logic.
"""
from __future__ import annotations
from typing import Any, Optional
from pydantic import BaseModel, Field
class QueryRequest(BaseModel):
"""Incoming natural-language question to translate to SPARQL."""
request_id: str
dataset_id: str
question: str
language: str | None = None
class DatasetConfig(BaseModel):
"""Dataset-specific configuration loaded from YAML."""
dataset_id: str
endpoint_url: str
kg_profile_path: str
default_prefixes: dict[str, str]
mode: str # "dbpedia" or "corporate"
class ContextPackage(BaseModel):
"""Compact context package built from KG profile and question analysis."""
entity_candidates: list[dict] = Field(default_factory=list)
relation_candidates: list[dict] = Field(default_factory=list)
class_candidates: list[dict] = Field(default_factory=list)
answer_type_hint: str | None = None
prefix_hints: dict[str, str] = Field(default_factory=dict)
notes: list[str] = Field(default_factory=list)
class CandidateQuery(BaseModel):
"""A SPARQL query candidate, either from generation or repair."""
candidate_id: str
query: str
source: str # "generation" or "repair"
generation_index: int
parent_candidate_id: str | None = None
repair_iteration: int = 0
class ValidationResult(BaseModel):
"""Result of cheap symbolic validation on a candidate."""
candidate_id: str
parse_ok: bool
execute_ok: bool
timeout: bool
execution_error: str | None = None
result_count: int | None = None
result_preview: list[dict] = Field(default_factory=list)
answer_type_fit: float = 0.0
schema_fit: float = 0.0
suspicious_flags: list[str] = Field(default_factory=list)
score: float = 0.0
class ExpertFeedback(BaseModel):
"""Structured feedback from a semantic committee expert."""
expert_name: str
candidate_id: str
verdict: str # "ok", "suspicious", "bad"
confidence: float
issue_summary: str
suspected_elements: list[str] = Field(default_factory=list)
suggested_action: str | None = None
evidence: list[str] = Field(default_factory=list)
class CoordinatorDecision(BaseModel):
"""Merged decision from expert committee feedback."""
candidate_id: str
decision: str # "accept", "repair", "discard"
selected_action: str | None = None
rationale: list[str] = Field(default_factory=list)
class RepairResult(BaseModel):
"""Result of a single repair step."""
old_candidate_id: str
new_candidate: CandidateQuery
action_used: str
changed: bool
diff_summary: str
class RunTrace(BaseModel):
"""Full trace of a pipeline run, for logging and inspection."""
request: QueryRequest
dataset: DatasetConfig
context: Optional[ContextPackage] = None
initial_candidates: list[CandidateQuery] = Field(default_factory=list)
semantic_loop_candidates: list[CandidateQuery] = Field(default_factory=list)
validation_history: list[ValidationResult] = Field(default_factory=list)
committee_history: list[ExpertFeedback] = Field(default_factory=list)
decision_history: list[CoordinatorDecision] = Field(default_factory=list)
repair_history: list[RepairResult] = Field(default_factory=list)
final_candidate_id: str = ""
final_query: str = ""
final_status: str = "" # "accepted", "attempt_limit", "repair_stalled", "syntax_failed", "failed"