File size: 8,014 Bytes
b219d99
 
 
 
 
 
 
 
 
 
 
 
 
aed88a2
b219d99
 
 
 
 
 
 
 
 
 
 
 
 
aed88a2
 
 
 
 
 
 
 
 
b219d99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aed88a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b219d99
 
aed88a2
 
 
 
 
 
 
 
 
b219d99
 
 
 
 
aed88a2
 
 
b219d99
 
 
aed88a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b219d99
 
 
 
 
 
 
 
 
aed88a2
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
from pydantic import BaseModel, Field
from typing import List, Optional, Union, Any

import sys


from pydantic_settings import BaseSettings


class CliConfig(BaseSettings, cli_parse_args=True, cli_use_class_docs_for_groups=True):
    CONFIG_FILE: str = Field("./config.local.yaml", description="Config file path")


CLI_CONFIG = CliConfig(CONFIG_FILE="./config.local.yaml")


class SamplerConfig(BaseModel):
    """Default sampler configuration for each model."""

    max_tokens: int = Field(512, description="Maximum number of tokens to generate.")
    temperature: float = Field(1.0, description="Sampling temperature.")
    top_p: float = Field(0.3, description="Top-p sampling threshold.")
    presence_penalty: float = Field(0.5, description="Presence penalty.")
    count_penalty: float = Field(0.5, description="Count penalty.")
    penalty_decay: float = Field(0.996, description="Penalty decay factor.")
    stop: List[str] = Field(["\n\n"], description="List of stop sequences.")
    stop_tokens: List[int] = Field([0], description="List of stop tokens.")
    ALLOW_WEB_SEARCH: Optional[bool] = Field(None, description="Per-sampler override for allowing web search. If None, falls back to model/global.")
    ALLOW_FILE_TOOL: Optional[bool] = Field(None, description="Per-sampler override for allowing file tools (e.g., file_read). If None, falls back to model/global.")
    ALLOW_TOOLS: Optional[bool] = Field(None, description="Per-sampler override for allowing server-side tools. If None, falls back to model/global.")
    ALLOW_REASONING: Optional[bool] = Field(None, description="Per-sampler override for allowing built-in reasoning. If None, falls back to model/global.")
    # UI flags (non-functional in server, included so UI clients can show controls)
    SHOW_WEB_SEARCH_BUTTON: Optional[bool] = Field(None, description="Whether to show the web-search toggle in the client UI for this sampler")
    SHOW_FILE_UPLOAD_BUTTON: Optional[bool] = Field(None, description="Whether to show the file-upload control in the client UI for this sampler")
    SHOW_REASONING_TOGGLE: Optional[bool] = Field(None, description="Whether to show the reasoning (think) toggle in the client UI for this sampler")
    UI_STYLE: Optional[str] = Field(None, description="UI style hint that clients may use to render controls (example: 'whatsapp' or 'compact')")


class ModelConfig(BaseModel):
    """Configuration for each individual model."""

    SERVICE_NAME: str = Field(..., description="Service name of the model.")

    MODEL_FILE_PATH: Optional[str] = Field(None, description="Model file path.")

    DOWNLOAD_MODEL_FILE_NAME: Optional[str] = Field(
        None, description="Model name, should end with .pth"
    )
    DOWNLOAD_MODEL_REPO_ID: Optional[str] = Field(
        None, description="Model repository ID on Hugging Face Hub."
    )
    DOWNLOAD_MODEL_DIR: Optional[str] = Field(
        "./models", description="Directory to download the model to."
    )

    REASONING: bool = Field(
        False, description="Whether reasoning is enabled for this model."
    )

    DEFAULT_CHAT: bool = Field(False, description="Whether this model is the default chat model.")
    DEFAULT_REASONING: bool = Field(False, description="Whether this model is the default reasoning model.")
    DEFAULT_SAMPLER: SamplerConfig = Field(
        SamplerConfig(
            max_tokens=512,
            temperature=1.0,
            top_p=0.3,
            presence_penalty=0.5,
            count_penalty=0.5,
            penalty_decay=0.996,
            stop=["\n\n"],
            stop_tokens=[0],
            ALLOW_WEB_SEARCH=None,
            ALLOW_TOOLS=None,
            ALLOW_REASONING=None,
            ALLOW_FILE_TOOL=None,
            SHOW_WEB_SEARCH_BUTTON=None,
            SHOW_FILE_UPLOAD_BUTTON=None,
            SHOW_REASONING_TOGGLE=None,
            UI_STYLE=None,
        ),
        description="Default sampler configuration for this model."
    )
    VOCAB: str = Field("rwkv_vocab_v20230424", description="Vocab Name")
    # Allow or disallow server-side features on a per-model basis
    ALLOW_WEB_SEARCH: bool = Field(True, description="Whether this model supports web search injection")
    ALLOW_TOOLS: bool = Field(True, description="Whether this model supports server-side tools execution")
    ALLOW_REASONING: bool = Field(True, description="Whether this model supports built-in reasoning (in-process)")
    ALLOW_FILE_TOOL: bool = Field(True, description="Whether this model supports file-based tools (file_upload/file_read)")
    # UI flags for the model that the client may use to show/hide controls
    SHOW_WEB_SEARCH_BUTTON: bool = Field(True, description="Whether to show the web search toggle for this model in client UIs")
    SHOW_FILE_UPLOAD_BUTTON: bool = Field(True, description="Whether to show a file upload button for this model in client UIs")
    SHOW_REASONING_TOGGLE: bool = Field(True, description="Whether to show the reasoning toggle for this model in client UIs")


class RootConfig(BaseModel):
    """Root configuration for the RWKV service."""

    HOST: Optional[str] = Field("127.0.0.1", description="Host IP address to bind to.")
    PORT: Optional[int] = Field(8000, description="Port number to listen on.")
    STRATEGY: str = Field("cpu", description="Strategy for model execution (e.g., 'cuda fp16').")
    RWKV_CUDA_ON: bool = Field(False, description="Whether to enable RWKV CUDA kernel.")
    CHUNK_LEN: int = Field(256, description="Chunk length for processing.")
    MODELS: List[ModelConfig] = Field(..., description="List of model configurations.")
    # Additional defaults for auto behavior
    DEFAULT_STREAM: bool = Field(True, description="Whether streaming is enabled by default")
    AUTO_ENABLE_TOOLS: bool = Field(True, description="Whether to try auto-enabling tools based on intent")
    AUTO_ENABLE_REASONING: bool = Field(True, description="Whether to auto-enable reasoning when needed")
    AUTO_ENABLE_WEB_SEARCH: bool = Field(True, description="Whether to auto-enable web search based on intent")
    ENABLE_TOOLS_BY_DEFAULT: bool = Field(False, description="Whether tools are enabled by default (without explicit request)")
    ENABLE_WEB_SEARCH_BY_DEFAULT: bool = Field(True, description="Whether web search is enabled by default")
    ENABLE_REASONING_BY_DEFAULT: bool = Field(True, description="Whether model reasoning is enabled by default when requested/supported")
    # State store persistence
    STATE_STORE_PATH: str = Field("./state_store.json", description="Path to persist streaming/resume state store")
    STATE_STORE_FLUSH_INTERVAL: int = Field(5, description="Seconds between background flushes to the state store file")
    STATE_STORE_SAVE_ON_UPDATE: bool = Field(True, description="Whether to save the state store to disk immediately when updated")
    # File uploads / tools
    UPLOAD_DIR: str = Field("./uploads", description="Directory to store uploaded files")
    ALLOW_FILE_TOOL_BY_DEFAULT: bool = Field(True, description="Whether file-based tools are enabled by default")
    # UI flags for the root server. These flags are advisory only and do not enable functionality.
    SHOW_WEB_SEARCH_BUTTON_BY_DEFAULT: bool = Field(True, description="Whether to show web search toggle by default in clients")
    SHOW_FILE_UPLOAD_BUTTON_BY_DEFAULT: bool = Field(True, description="Whether to show file-upload control by default in clients")
    SHOW_REASONING_TOGGLE_BY_DEFAULT: bool = Field(True, description="Whether to show reasoning toggle by default in clients")


import yaml

try:
    with open(CLI_CONFIG.CONFIG_FILE, "r", encoding="utf-8") as f:
        CONFIG = RootConfig.model_validate(yaml.safe_load(f.read()))
except Exception as e:
    print(f"Pydantic Model Validation Failed: {e}")
    # Exit with non-zero to indicate error when config is invalid
    sys.exit(1)