kerdos-llm-rag-api / models.py
Bhaskar Ram
feat: Kerdos AI RAG API v1.0
b1a3dce
"""
Pydantic request/response models for the Kerdos AI RAG API.
"""
from __future__ import annotations
from typing import List, Optional
from pydantic import BaseModel, Field
# ─── Session ────────────────────────────────────────────────────────────────
class SessionCreateResponse(BaseModel):
session_id: str = Field(..., description="Unique session identifier")
message: str = Field(default="Session created successfully")
class SessionStatusResponse(BaseModel):
session_id: str
document_count: int = Field(..., description="Number of uploaded documents")
chunk_count: int = Field(..., description="Number of indexed text chunks")
history_length: int = Field(..., description="Number of turns in conversation history")
created_at: str
expires_at: str
# ─── Documents ──────────────────────────────────────────────────────────────
class IndexResponse(BaseModel):
session_id: str
indexed_files: List[str] = Field(..., description="Names of successfully indexed files")
failed_files: List[str] = Field(default_factory=list, description="Files that failed to parse")
chunk_count: int = Field(..., description="Total chunks in FAISS index")
message: str = Field(default="Documents indexed successfully")
# ─── Chat ────────────────────────────────────────────────────────────────────
class Source(BaseModel):
filename: str
chunk_index: int
excerpt: str = Field(..., description="Short preview of the retrieved chunk")
class ChatRequest(BaseModel):
question: str = Field(..., min_length=1, description="The question to ask about your documents")
hf_token: str = Field(..., description="Hugging Face API token (Write access required for Llama 3)")
top_k: int = Field(default=5, ge=1, le=20, description="Number of chunks to retrieve")
temperature: float = Field(default=0.3, ge=0.0, le=1.0)
max_new_tokens: int = Field(default=512, ge=64, le=2048)
class ChatResponse(BaseModel):
session_id: str
question: str
answer: str
sources: List[Source] = Field(default_factory=list)
model: str = Field(default="meta-llama/Llama-3.1-8B-Instruct")
# ─── Health ──────────────────────────────────────────────────────────────────
class HealthResponse(BaseModel):
status: str = "ok"
version: str
uptime_seconds: float
active_sessions: int
# ─── Generic ─────────────────────────────────────────────────────────────────
class MessageResponse(BaseModel):
message: str