from pydantic import BaseModel from typing import List, Optional, Dict, Any from enum import Enum class NodeType(str, Enum): HEAD = "head" RAM = "ram" DISK = "disk" COMPUTE = "compute" GPU = "gpu" TPU = "tpu" NPU = "npu" class ChatMessage(BaseModel): role: str # "user" or "assistant" content: str class ChatRequest(BaseModel): messages: List[ChatMessage] model: str = "sam-x-nano" max_tokens: Optional[int] = 512 temperature: Optional[float] = 0.8 top_k: Optional[int] = 40 top_p: Optional[float] = 0.9 repetition_penalty: Optional[float] = 1.1 stream: Optional[bool] = False # Support for streaming use_token_distribution: Optional[bool] = False # Enable token-by-token distribution for autoregressive models class ChatResponse(BaseModel): id: str object: str = "chat.completion" created: int model: str choices: List[Dict[str, Any]] usage: Optional[Dict[str, int]] = None class StreamChoice(BaseModel): index: int delta: Dict[str, Any] # For streaming, contains the delta content finish_reason: Optional[str] = None class ChatStreamResponse(BaseModel): id: str object: str = "chat.completion.chunk" created: int model: str choices: List[StreamChoice] class WorkerStatus(BaseModel): model_name: str node_type: Optional[NodeType] = None is_active: bool load: float last_heartbeat: int capabilities: Optional[Dict[str, Any]] = None class TaskFileRequest(BaseModel): task_type: str model_name: str task_data: Dict[str, Any] priority: str = "normal" max_workers: int = 1