Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
| """Data models for the Text2SPARQL repair pipeline. | |
| All Pydantic models live here. No business logic. | |
| """ | |
| from __future__ import annotations | |
| from typing import Any, Optional | |
| from pydantic import BaseModel, Field | |
| class QueryRequest(BaseModel): | |
| """Incoming natural-language question to translate to SPARQL.""" | |
| request_id: str | |
| dataset_id: str | |
| question: str | |
| language: str | None = None | |
| class DatasetConfig(BaseModel): | |
| """Dataset-specific configuration loaded from YAML.""" | |
| dataset_id: str | |
| endpoint_url: str | |
| kg_profile_path: str | |
| default_prefixes: dict[str, str] | |
| mode: str # "dbpedia" or "corporate" | |
| class ContextPackage(BaseModel): | |
| """Compact context package built from KG profile and question analysis.""" | |
| entity_candidates: list[dict] = Field(default_factory=list) | |
| relation_candidates: list[dict] = Field(default_factory=list) | |
| class_candidates: list[dict] = Field(default_factory=list) | |
| answer_type_hint: str | None = None | |
| prefix_hints: dict[str, str] = Field(default_factory=dict) | |
| notes: list[str] = Field(default_factory=list) | |
| class CandidateQuery(BaseModel): | |
| """A SPARQL query candidate, either from generation or repair.""" | |
| candidate_id: str | |
| query: str | |
| source: str # "generation" or "repair" | |
| generation_index: int | |
| parent_candidate_id: str | None = None | |
| repair_iteration: int = 0 | |
| class ValidationResult(BaseModel): | |
| """Result of cheap symbolic validation on a candidate.""" | |
| candidate_id: str | |
| parse_ok: bool | |
| execute_ok: bool | |
| timeout: bool | |
| execution_error: str | None = None | |
| result_count: int | None = None | |
| result_preview: list[dict] = Field(default_factory=list) | |
| answer_type_fit: float = 0.0 | |
| schema_fit: float = 0.0 | |
| suspicious_flags: list[str] = Field(default_factory=list) | |
| score: float = 0.0 | |
| class ExpertFeedback(BaseModel): | |
| """Structured feedback from a semantic committee expert.""" | |
| expert_name: str | |
| candidate_id: str | |
| verdict: str # "ok", "suspicious", "bad" | |
| confidence: float | |
| issue_summary: str | |
| suspected_elements: list[str] = Field(default_factory=list) | |
| suggested_action: str | None = None | |
| evidence: list[str] = Field(default_factory=list) | |
| class CoordinatorDecision(BaseModel): | |
| """Merged decision from expert committee feedback.""" | |
| candidate_id: str | |
| decision: str # "accept", "repair", "discard" | |
| selected_action: str | None = None | |
| rationale: list[str] = Field(default_factory=list) | |
| class RepairResult(BaseModel): | |
| """Result of a single repair step.""" | |
| old_candidate_id: str | |
| new_candidate: CandidateQuery | |
| action_used: str | |
| changed: bool | |
| diff_summary: str | |
| class RunTrace(BaseModel): | |
| """Full trace of a pipeline run, for logging and inspection.""" | |
| request: QueryRequest | |
| dataset: DatasetConfig | |
| context: Optional[ContextPackage] = None | |
| initial_candidates: list[CandidateQuery] = Field(default_factory=list) | |
| semantic_loop_candidates: list[CandidateQuery] = Field(default_factory=list) | |
| validation_history: list[ValidationResult] = Field(default_factory=list) | |
| committee_history: list[ExpertFeedback] = Field(default_factory=list) | |
| decision_history: list[CoordinatorDecision] = Field(default_factory=list) | |
| repair_history: list[RepairResult] = Field(default_factory=list) | |
| final_candidate_id: str = "" | |
| final_query: str = "" | |
| final_status: str = "" # "accepted", "attempt_limit", "repair_stalled", "syntax_failed", "failed" | |