File size: 1,319 Bytes
c7256ee
 
 
 
 
 
 
 
 
 
 
8f37cc7
c7256ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from typing import Any

from pydantic import BaseModel, Field

# this defines the schemas for API endpoints
#


class PredictRequest(BaseModel):
    query: str = Field(..., min_length=1, description="User query text")
    model: str = Field(default="Llama-3-8B", description="Model name key")
    top_k: int = Field(default=50, ge=1, le=100)
    final_k: int = Field(default=3, ge=1, le=8)
    chunking_technique: str = Field(default="all", description="all | fixed | sentence | paragraph | semantic | recursive | page | markdown")
    mode: str = Field(default="hybrid", description="semantic | bm25 | hybrid")
    rerank_strategy: str = Field(default="cross-encoder", description="cross-encoder | rrf | none")
    use_mmr: bool = Field(default=True, description="Whether to apply MMR after reranking")
    lambda_param: float = Field(default=0.5, ge=0.0, le=1.0, description="MMR relevance/diversity tradeoff")
    temperature: float = Field(default=0.1, ge=0.0, le=2.0, description="Generation temperature")


class PredictResponse(BaseModel):
    model: str
    answer: str
    contexts: list[str]
    retrieved_chunks: list[dict[str, Any]]


class TitleRequest(BaseModel):
    query: str = Field(..., min_length=1, description="First user message")


class TitleResponse(BaseModel):
    title: str
    source: str