| from typing import Any |
|
|
| from pydantic import BaseModel, Field |
|
|
| |
| |
|
|
|
|
| class PredictRequest(BaseModel): |
| query: str = Field(..., min_length=1, description="User query text") |
| model: str = Field(default="Llama-3-8B", description="Model name key") |
| top_k: int = Field(default=10, ge=1, le=20) |
| final_k: int = Field(default=3, ge=1, le=8) |
| chunking_technique: str = Field(default="all", description="all | fixed | sentence | paragraph | semantic | recursive | page | markdown") |
| mode: str = Field(default="hybrid", description="semantic | bm25 | hybrid") |
| rerank_strategy: str = Field(default="cross-encoder", description="cross-encoder | rrf | none") |
| use_mmr: bool = Field(default=True, description="Whether to apply MMR after reranking") |
| lambda_param: float = Field(default=0.5, ge=0.0, le=1.0, description="MMR relevance/diversity tradeoff") |
| temperature: float = Field(default=0.1, ge=0.0, le=2.0, description="Generation temperature") |
|
|
|
|
| class PredictResponse(BaseModel): |
| model: str |
| answer: str |
| contexts: list[str] |
| retrieved_chunks: list[dict[str, Any]] |
|
|
|
|
| class TitleRequest(BaseModel): |
| query: str = Field(..., min_length=1, description="First user message") |
|
|
|
|
| class TitleResponse(BaseModel): |
| title: str |
| source: str |
|
|