Upload 6 files
Browse files- app.py +55 -3
- config.local.yaml +13 -2
- model_tags.py +90 -0
app.py
CHANGED
|
@@ -204,6 +204,56 @@ def _recompute_out_and_state_from_tokens(model_name: str, model_tokens: List[int
|
|
| 204 |
tokens = tokens[CONFIG.CHUNK_LEN :]
|
| 205 |
return out, model_state
|
| 206 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
logger.info(f"STRATEGY - {CONFIG.STRATEGY}")
|
| 208 |
|
| 209 |
logGPUState()
|
|
@@ -1604,9 +1654,11 @@ async def chat_completions(request: ChatCompletionRequest):
|
|
| 1604 |
completionId = str(next(CompletionIdGenerator))
|
| 1605 |
logger.info(f"[REQ] {completionId} - {request.model_dump()}")
|
| 1606 |
|
| 1607 |
-
#
|
| 1608 |
-
|
| 1609 |
-
|
|
|
|
|
|
|
| 1610 |
modelName = request.model
|
| 1611 |
|
| 1612 |
if request.model == "rwkv-latest":
|
|
|
|
| 204 |
tokens = tokens[CONFIG.CHUNK_LEN :]
|
| 205 |
return out, model_state
|
| 206 |
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
# Move ChatCompletionRequest definition above fallback apply_model_tags_to_request
|
| 210 |
+
|
| 211 |
+
|
| 212 |
+
# Move ChatCompletionRequest definition above fallback apply_model_tags_to_request
|
| 213 |
+
|
| 214 |
+
try:
|
| 215 |
+
from model_tags import apply_model_tags_to_request_obj as apply_model_tags_to_request
|
| 216 |
+
except Exception:
|
| 217 |
+
def apply_model_tags_to_request(req: Any):
|
| 218 |
+
# Fallback implementation if the module import fails; keep behavior robust
|
| 219 |
+
if not req or not getattr(req, 'model', None) or ':' not in req.model:
|
| 220 |
+
return
|
| 221 |
+
original = req.model
|
| 222 |
+
parts = [p.strip() for p in original.split(":") if p is not None and p != ""]
|
| 223 |
+
if len(parts) <= 1:
|
| 224 |
+
return
|
| 225 |
+
base = parts[0]
|
| 226 |
+
tags = parts[1:]
|
| 227 |
+
req.model = base
|
| 228 |
+
for tag in tags:
|
| 229 |
+
t = tag.lower()
|
| 230 |
+
if t in ("thinking", "think", "reasoning", "reason"):
|
| 231 |
+
req.enable_reasoning = True
|
| 232 |
+
req.auto_reasoning = False
|
| 233 |
+
elif t in ("web", "web_search", "search"):
|
| 234 |
+
req.enable_web_search = True
|
| 235 |
+
req.web_search = True
|
| 236 |
+
req.auto_web_search = False
|
| 237 |
+
elif t in ("no-web", "disable-web", "no-web-search"):
|
| 238 |
+
req.enable_web_search = False
|
| 239 |
+
req.web_search = False
|
| 240 |
+
elif t in ("tools", "enable-tools"):
|
| 241 |
+
req.enable_tools = True
|
| 242 |
+
req.auto_tools = False
|
| 243 |
+
elif t in ("no-tools", "disable-tools"):
|
| 244 |
+
req.enable_tools = False
|
| 245 |
+
elif t in ("file", "file_tool", "filetool"):
|
| 246 |
+
req.enable_file_tool = True
|
| 247 |
+
req.auto_file_tool = False
|
| 248 |
+
elif t in ("no-file", "disable-file"):
|
| 249 |
+
req.enable_file_tool = False
|
| 250 |
+
elif t in ("universal", "univ"):
|
| 251 |
+
req.enable_universal = True
|
| 252 |
+
req.auto_universal = False
|
| 253 |
+
elif t in ("stream",):
|
| 254 |
+
req.stream = True
|
| 255 |
+
|
| 256 |
+
|
| 257 |
logger.info(f"STRATEGY - {CONFIG.STRATEGY}")
|
| 258 |
|
| 259 |
logGPUState()
|
|
|
|
| 1654 |
completionId = str(next(CompletionIdGenerator))
|
| 1655 |
logger.info(f"[REQ] {completionId} - {request.model_dump()}")
|
| 1656 |
|
| 1657 |
+
# Apply any legacy model suffix tags (e.g., 'rwkv-latest:thinking' -> enable_reasoning)
|
| 1658 |
+
# This helper is defined at module level so it can be unit-tested and reused.
|
| 1659 |
+
|
| 1660 |
+
# Apply legacy tags (if present) to request and proceed normally
|
| 1661 |
+
apply_model_tags_to_request(request)
|
| 1662 |
modelName = request.model
|
| 1663 |
|
| 1664 |
if request.model == "rwkv-latest":
|
config.local.yaml
CHANGED
|
@@ -1,8 +1,13 @@
|
|
| 1 |
HOST: "0.0.0.0"
|
| 2 |
PORT: 7860
|
| 3 |
-
STRATEGY: "cpu fp16"
|
| 4 |
-
RWKV_CUDA_ON: False
|
| 5 |
CHUNK_LEN: 256
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
MODELS:
|
| 7 |
- SERVICE_NAME: "rwkv7-g1a-0.1b-20250728-ctx4096"
|
| 8 |
DOWNLOAD_MODEL_FILE_NAME: "rwkv7-g1a-0.1b-20250728-ctx4096.pth"
|
|
@@ -11,6 +16,9 @@ MODELS:
|
|
| 11 |
REASONING: True
|
| 12 |
DEFAULT_CHAT: True
|
| 13 |
DEFAULT_REASONING: True
|
|
|
|
|
|
|
|
|
|
| 14 |
DEFAULT_SAMPLER:
|
| 15 |
max_tokens: 4096
|
| 16 |
temperature: 1.0
|
|
@@ -25,3 +33,6 @@ MODELS:
|
|
| 25 |
ALLOW_WEB_SEARCH: True
|
| 26 |
ALLOW_TOOLS: True
|
| 27 |
ALLOW_REASONING: True
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
HOST: "0.0.0.0"
|
| 2 |
PORT: 7860
|
| 3 |
+
STRATEGY: "cpu fp16"
|
| 4 |
+
RWKV_CUDA_ON: False
|
| 5 |
CHUNK_LEN: 256
|
| 6 |
+
DEFAULT_STREAM: True
|
| 7 |
+
AUTO_ENABLE_TOOLS: True
|
| 8 |
+
AUTO_ENABLE_REASONING: True
|
| 9 |
+
AUTO_ENABLE_WEB_SEARCH: True
|
| 10 |
+
ENABLE_TOOLS_BY_DEFAULT: False
|
| 11 |
MODELS:
|
| 12 |
- SERVICE_NAME: "rwkv7-g1a-0.1b-20250728-ctx4096"
|
| 13 |
DOWNLOAD_MODEL_FILE_NAME: "rwkv7-g1a-0.1b-20250728-ctx4096.pth"
|
|
|
|
| 16 |
REASONING: True
|
| 17 |
DEFAULT_CHAT: True
|
| 18 |
DEFAULT_REASONING: True
|
| 19 |
+
ALLOW_WEB_SEARCH: True
|
| 20 |
+
ALLOW_TOOLS: True
|
| 21 |
+
ALLOW_REASONING: True
|
| 22 |
DEFAULT_SAMPLER:
|
| 23 |
max_tokens: 4096
|
| 24 |
temperature: 1.0
|
|
|
|
| 33 |
ALLOW_WEB_SEARCH: True
|
| 34 |
ALLOW_TOOLS: True
|
| 35 |
ALLOW_REASONING: True
|
| 36 |
+
STATE_STORE_PATH: "./state_store.json"
|
| 37 |
+
STATE_STORE_FLUSH_INTERVAL: 5
|
| 38 |
+
STATE_STORE_SAVE_ON_UPDATE: True
|
model_tags.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Utility module for parsing model tags like 'model:thinking:web' and
|
| 2 |
+
applying them as request flags on a request-like object.
|
| 3 |
+
|
| 4 |
+
The functions here operate on any object with the expected attributes (model,
|
| 5 |
+
enable_reasoning, auto_reasoning, enable_web_search, web_search, auto_web_search,
|
| 6 |
+
enable_tools, auto_tools, enable_file_tool, auto_file_tool, enable_universal,
|
| 7 |
+
auto_universal, stream) and don't require heavy app imports, which makes them
|
| 8 |
+
easy to unit-test.
|
| 9 |
+
"""
|
| 10 |
+
from types import SimpleNamespace
|
| 11 |
+
from typing import Any
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def apply_model_tags_to_request_obj(req: Any) -> None:
|
| 15 |
+
"""Apply model suffix tags to a request-like object.
|
| 16 |
+
|
| 17 |
+
The `req` can be a SimpleNamespace, an object, or a pydantic model with
|
| 18 |
+
mutable attributes. The function mutates `req.model` to the base model and
|
| 19 |
+
sets boolean flags for detected tags.
|
| 20 |
+
"""
|
| 21 |
+
if not req or not getattr(req, 'model', None) or ':' not in req.model:
|
| 22 |
+
return
|
| 23 |
+
original = req.model
|
| 24 |
+
parts = [p.strip() for p in original.split(":") if p is not None and p != ""]
|
| 25 |
+
if len(parts) <= 1:
|
| 26 |
+
return
|
| 27 |
+
base = parts[0]
|
| 28 |
+
tags = parts[1:]
|
| 29 |
+
try:
|
| 30 |
+
req.model = base
|
| 31 |
+
except Exception:
|
| 32 |
+
# If object is read-only for model, don't fail
|
| 33 |
+
pass
|
| 34 |
+
for tag in tags:
|
| 35 |
+
t = tag.lower()
|
| 36 |
+
if t in ("thinking", "think", "reasoning", "reason"):
|
| 37 |
+
try:
|
| 38 |
+
setattr(req, 'enable_reasoning', True)
|
| 39 |
+
setattr(req, 'auto_reasoning', False)
|
| 40 |
+
except Exception:
|
| 41 |
+
pass
|
| 42 |
+
elif t in ("web", "web_search", "search"):
|
| 43 |
+
try:
|
| 44 |
+
setattr(req, 'enable_web_search', True)
|
| 45 |
+
setattr(req, 'web_search', True)
|
| 46 |
+
setattr(req, 'auto_web_search', False)
|
| 47 |
+
except Exception:
|
| 48 |
+
pass
|
| 49 |
+
elif t in ("no-web", "disable-web", "no-web-search"):
|
| 50 |
+
try:
|
| 51 |
+
setattr(req, 'enable_web_search', False)
|
| 52 |
+
setattr(req, 'web_search', False)
|
| 53 |
+
except Exception:
|
| 54 |
+
pass
|
| 55 |
+
elif t in ("tools", "enable-tools"):
|
| 56 |
+
try:
|
| 57 |
+
setattr(req, 'enable_tools', True)
|
| 58 |
+
setattr(req, 'auto_tools', False)
|
| 59 |
+
except Exception:
|
| 60 |
+
pass
|
| 61 |
+
elif t in ("no-tools", "disable-tools"):
|
| 62 |
+
try:
|
| 63 |
+
setattr(req, 'enable_tools', False)
|
| 64 |
+
except Exception:
|
| 65 |
+
pass
|
| 66 |
+
elif t in ("file", "file_tool", "filetool"):
|
| 67 |
+
try:
|
| 68 |
+
setattr(req, 'enable_file_tool', True)
|
| 69 |
+
setattr(req, 'auto_file_tool', False)
|
| 70 |
+
except Exception:
|
| 71 |
+
pass
|
| 72 |
+
elif t in ("no-file", "disable-file"):
|
| 73 |
+
try:
|
| 74 |
+
setattr(req, 'enable_file_tool', False)
|
| 75 |
+
except Exception:
|
| 76 |
+
pass
|
| 77 |
+
elif t in ("universal", "univ"):
|
| 78 |
+
try:
|
| 79 |
+
setattr(req, 'enable_universal', True)
|
| 80 |
+
setattr(req, 'auto_universal', False)
|
| 81 |
+
except Exception:
|
| 82 |
+
pass
|
| 83 |
+
elif t in ("stream",):
|
| 84 |
+
try:
|
| 85 |
+
setattr(req, 'stream', True)
|
| 86 |
+
except Exception:
|
| 87 |
+
pass
|
| 88 |
+
else:
|
| 89 |
+
# Unknown tags are ignored; callers may log if needed
|
| 90 |
+
pass
|