File size: 1,660 Bytes
af68acb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from pydantic import BaseModel
from typing import List, Optional, Dict, Any
from enum import Enum


class NodeType(str, Enum):
    HEAD = "head"
    RAM = "ram"
    DISK = "disk"
    COMPUTE = "compute"
    GPU = "gpu"
    TPU = "tpu"
    NPU = "npu"


class ChatMessage(BaseModel):
    role: str  # "user" or "assistant"
    content: str


class ChatRequest(BaseModel):
    messages: List[ChatMessage]
    model: str = "sam-x-nano"
    max_tokens: Optional[int] = 512
    temperature: Optional[float] = 0.8
    top_k: Optional[int] = 40
    top_p: Optional[float] = 0.9
    repetition_penalty: Optional[float] = 1.1
    stream: Optional[bool] = False  # Support for streaming
    use_token_distribution: Optional[bool] = False  # Enable token-by-token distribution for autoregressive models


class ChatResponse(BaseModel):
    id: str
    object: str = "chat.completion"
    created: int
    model: str
    choices: List[Dict[str, Any]]
    usage: Optional[Dict[str, int]] = None


class StreamChoice(BaseModel):
    index: int
    delta: Dict[str, Any]  # For streaming, contains the delta content
    finish_reason: Optional[str] = None


class ChatStreamResponse(BaseModel):
    id: str
    object: str = "chat.completion.chunk"
    created: int
    model: str
    choices: List[StreamChoice]


class WorkerStatus(BaseModel):
    model_name: str
    node_type: Optional[NodeType] = None
    is_active: bool
    load: float
    last_heartbeat: int
    capabilities: Optional[Dict[str, Any]] = None


class TaskFileRequest(BaseModel):
    task_type: str
    model_name: str
    task_data: Dict[str, Any]
    priority: str = "normal"
    max_workers: int = 1