Spaces:
Running
Running
File size: 3,173 Bytes
b1a3dce | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 | """
Pydantic request/response models for the Kerdos AI RAG API.
"""
from __future__ import annotations
from typing import List, Optional
from pydantic import BaseModel, Field
# ─── Session ────────────────────────────────────────────────────────────────
class SessionCreateResponse(BaseModel):
session_id: str = Field(..., description="Unique session identifier")
message: str = Field(default="Session created successfully")
class SessionStatusResponse(BaseModel):
session_id: str
document_count: int = Field(..., description="Number of uploaded documents")
chunk_count: int = Field(..., description="Number of indexed text chunks")
history_length: int = Field(..., description="Number of turns in conversation history")
created_at: str
expires_at: str
# ─── Documents ──────────────────────────────────────────────────────────────
class IndexResponse(BaseModel):
session_id: str
indexed_files: List[str] = Field(..., description="Names of successfully indexed files")
failed_files: List[str] = Field(default_factory=list, description="Files that failed to parse")
chunk_count: int = Field(..., description="Total chunks in FAISS index")
message: str = Field(default="Documents indexed successfully")
# ─── Chat ────────────────────────────────────────────────────────────────────
class Source(BaseModel):
filename: str
chunk_index: int
excerpt: str = Field(..., description="Short preview of the retrieved chunk")
class ChatRequest(BaseModel):
question: str = Field(..., min_length=1, description="The question to ask about your documents")
hf_token: str = Field(..., description="Hugging Face API token (Write access required for Llama 3)")
top_k: int = Field(default=5, ge=1, le=20, description="Number of chunks to retrieve")
temperature: float = Field(default=0.3, ge=0.0, le=1.0)
max_new_tokens: int = Field(default=512, ge=64, le=2048)
class ChatResponse(BaseModel):
session_id: str
question: str
answer: str
sources: List[Source] = Field(default_factory=list)
model: str = Field(default="meta-llama/Llama-3.1-8B-Instruct")
# ─── Health ──────────────────────────────────────────────────────────────────
class HealthResponse(BaseModel):
status: str = "ok"
version: str
uptime_seconds: float
active_sessions: int
# ─── Generic ─────────────────────────────────────────────────────────────────
class MessageResponse(BaseModel):
message: str
|