duongtruongbinh's picture
Add support for multiple PDF uploads and enhanced citation handling
bc2d97e
"""Pydantic schemas for chunks, answers, and learning outputs."""
from __future__ import annotations
from typing import Literal
from pydantic import BaseModel, Field, model_validator
class ChunkMetadata(BaseModel):
"""Stable metadata attached to every chunk stored in Qdrant."""
document_id: str
filename: str
source: str
page: int
chunk_id: str
section: str | None = None
class RetrievedChunk(BaseModel):
"""A retrieved chunk with its score and metadata."""
text: str
score: float
metadata: ChunkMetadata
class Citation(BaseModel):
"""Citation extracted from a retrieved chunk's metadata."""
source_index: int
source_marker: str
filename: str
page: int
source_text: str | None = None
section: str | None = None
chunk_id: str | None = None
class RagAnswer(BaseModel):
"""Final grounded answer returned to the caller."""
question: str
answer: str
citations: list[Citation] = Field(default_factory=list)
chunks: list[RetrievedChunk] = Field(default_factory=list)
class Summary(BaseModel):
"""Grounded study-oriented summary of a document or subset."""
scope: Literal["query", "document", "filter", "corpus"]
target: str | None = None
summary: str
key_points: list[str] = Field(default_factory=list)
citations: list[Citation] = Field(default_factory=list)
class QuizItem(BaseModel):
"""A single multiple-choice quiz item grounded in the source material."""
question: str
options: list[str] = Field(min_length=4, max_length=4)
correct_index: int
explanation: str
source_markers: list[str] = Field(default_factory=list)
difficulty: str | None = None
topic: str | None = None
@model_validator(mode="after")
def _validate_correct_index(self) -> "QuizItem":
if not 0 <= self.correct_index < len(self.options):
raise ValueError(
f"correct_index {self.correct_index} out of range for {len(self.options)} options"
)
return self
class QuizSet(BaseModel):
"""A reusable set of grounded quiz items with resolved citations."""
scope: Literal["query", "document", "filter", "corpus"]
target: str | None = None
items: list[QuizItem] = Field(default_factory=list)
citations: list[Citation] = Field(default_factory=list)
class Flashcard(BaseModel):
"""A single study flashcard grounded in the source material."""
front: str
back: str
hint: str | None = None
topic: str | None = None
source_markers: list[str] = Field(default_factory=list)
class FlashcardSet(BaseModel):
"""A reusable set of grounded flashcards with resolved citations."""
scope: Literal["query", "document", "filter", "corpus"]
target: str | None = None
cards: list[Flashcard] = Field(default_factory=list)
citations: list[Citation] = Field(default_factory=list)