Bc-AI's picture
Upload folder using huggingface_hub
af68acb verified
from pydantic import BaseModel
from typing import List, Optional, Dict, Any
from enum import Enum
class NodeType(str, Enum):
HEAD = "head"
RAM = "ram"
DISK = "disk"
COMPUTE = "compute"
GPU = "gpu"
TPU = "tpu"
NPU = "npu"
class ChatMessage(BaseModel):
role: str # "user" or "assistant"
content: str
class ChatRequest(BaseModel):
messages: List[ChatMessage]
model: str = "sam-x-nano"
max_tokens: Optional[int] = 512
temperature: Optional[float] = 0.8
top_k: Optional[int] = 40
top_p: Optional[float] = 0.9
repetition_penalty: Optional[float] = 1.1
stream: Optional[bool] = False # Support for streaming
use_token_distribution: Optional[bool] = False # Enable token-by-token distribution for autoregressive models
class ChatResponse(BaseModel):
id: str
object: str = "chat.completion"
created: int
model: str
choices: List[Dict[str, Any]]
usage: Optional[Dict[str, int]] = None
class StreamChoice(BaseModel):
index: int
delta: Dict[str, Any] # For streaming, contains the delta content
finish_reason: Optional[str] = None
class ChatStreamResponse(BaseModel):
id: str
object: str = "chat.completion.chunk"
created: int
model: str
choices: List[StreamChoice]
class WorkerStatus(BaseModel):
model_name: str
node_type: Optional[NodeType] = None
is_active: bool
load: float
last_heartbeat: int
capabilities: Optional[Dict[str, Any]] = None
class TaskFileRequest(BaseModel):
task_type: str
model_name: str
task_data: Dict[str, Any]
priority: str = "normal"
max_workers: int = 1