File size: 3,516 Bytes
d745844
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
"""Data models for the Text2SPARQL repair pipeline.

All Pydantic models live here. No business logic.
"""

from __future__ import annotations

from typing import Any, Optional
from pydantic import BaseModel, Field


class QueryRequest(BaseModel):
    """Incoming natural-language question to translate to SPARQL."""
    request_id: str
    dataset_id: str
    question: str
    language: str | None = None


class DatasetConfig(BaseModel):
    """Dataset-specific configuration loaded from YAML."""
    dataset_id: str
    endpoint_url: str
    kg_profile_path: str
    default_prefixes: dict[str, str]
    mode: str  # "dbpedia" or "corporate"


class ContextPackage(BaseModel):
    """Compact context package built from KG profile and question analysis."""
    entity_candidates: list[dict] = Field(default_factory=list)
    relation_candidates: list[dict] = Field(default_factory=list)
    class_candidates: list[dict] = Field(default_factory=list)
    answer_type_hint: str | None = None
    prefix_hints: dict[str, str] = Field(default_factory=dict)
    notes: list[str] = Field(default_factory=list)


class CandidateQuery(BaseModel):
    """A SPARQL query candidate, either from generation or repair."""
    candidate_id: str
    query: str
    source: str  # "generation" or "repair"
    generation_index: int
    parent_candidate_id: str | None = None
    repair_iteration: int = 0


class ValidationResult(BaseModel):
    """Result of cheap symbolic validation on a candidate."""
    candidate_id: str
    parse_ok: bool
    execute_ok: bool
    timeout: bool
    execution_error: str | None = None
    result_count: int | None = None
    result_preview: list[dict] = Field(default_factory=list)
    answer_type_fit: float = 0.0
    schema_fit: float = 0.0
    suspicious_flags: list[str] = Field(default_factory=list)
    score: float = 0.0


class ExpertFeedback(BaseModel):
    """Structured feedback from a semantic committee expert."""
    expert_name: str
    candidate_id: str
    verdict: str  # "ok", "suspicious", "bad"
    confidence: float
    issue_summary: str
    suspected_elements: list[str] = Field(default_factory=list)
    suggested_action: str | None = None
    evidence: list[str] = Field(default_factory=list)


class CoordinatorDecision(BaseModel):
    """Merged decision from expert committee feedback."""
    candidate_id: str
    decision: str  # "accept", "repair", "discard"
    selected_action: str | None = None
    rationale: list[str] = Field(default_factory=list)


class RepairResult(BaseModel):
    """Result of a single repair step."""
    old_candidate_id: str
    new_candidate: CandidateQuery
    action_used: str
    changed: bool
    diff_summary: str


class RunTrace(BaseModel):
    """Full trace of a pipeline run, for logging and inspection."""
    request: QueryRequest
    dataset: DatasetConfig
    context: Optional[ContextPackage] = None
    initial_candidates: list[CandidateQuery] = Field(default_factory=list)
    semantic_loop_candidates: list[CandidateQuery] = Field(default_factory=list)
    validation_history: list[ValidationResult] = Field(default_factory=list)
    committee_history: list[ExpertFeedback] = Field(default_factory=list)
    decision_history: list[CoordinatorDecision] = Field(default_factory=list)
    repair_history: list[RepairResult] = Field(default_factory=list)
    final_candidate_id: str = ""
    final_query: str = ""
    final_status: str = ""  # "accepted", "attempt_limit", "repair_stalled", "syntax_failed", "failed"