File size: 3,173 Bytes
b1a3dce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
"""
Pydantic request/response models for the Kerdos AI RAG API.
"""

from __future__ import annotations

from typing import List, Optional
from pydantic import BaseModel, Field


# ─── Session ────────────────────────────────────────────────────────────────

class SessionCreateResponse(BaseModel):
    session_id: str = Field(..., description="Unique session identifier")
    message: str = Field(default="Session created successfully")


class SessionStatusResponse(BaseModel):
    session_id: str
    document_count: int = Field(..., description="Number of uploaded documents")
    chunk_count: int = Field(..., description="Number of indexed text chunks")
    history_length: int = Field(..., description="Number of turns in conversation history")
    created_at: str
    expires_at: str


# ─── Documents ──────────────────────────────────────────────────────────────

class IndexResponse(BaseModel):
    session_id: str
    indexed_files: List[str] = Field(..., description="Names of successfully indexed files")
    failed_files: List[str] = Field(default_factory=list, description="Files that failed to parse")
    chunk_count: int = Field(..., description="Total chunks in FAISS index")
    message: str = Field(default="Documents indexed successfully")


# ─── Chat ────────────────────────────────────────────────────────────────────

class Source(BaseModel):
    filename: str
    chunk_index: int
    excerpt: str = Field(..., description="Short preview of the retrieved chunk")


class ChatRequest(BaseModel):
    question: str = Field(..., min_length=1, description="The question to ask about your documents")
    hf_token: str = Field(..., description="Hugging Face API token (Write access required for Llama 3)")
    top_k: int = Field(default=5, ge=1, le=20, description="Number of chunks to retrieve")
    temperature: float = Field(default=0.3, ge=0.0, le=1.0)
    max_new_tokens: int = Field(default=512, ge=64, le=2048)


class ChatResponse(BaseModel):
    session_id: str
    question: str
    answer: str
    sources: List[Source] = Field(default_factory=list)
    model: str = Field(default="meta-llama/Llama-3.1-8B-Instruct")


# ─── Health ──────────────────────────────────────────────────────────────────

class HealthResponse(BaseModel):
    status: str = "ok"
    version: str
    uptime_seconds: float
    active_sessions: int


# ─── Generic ─────────────────────────────────────────────────────────────────

class MessageResponse(BaseModel):
    message: str