Ksjsjjdj commited on
Commit
b5f596d
·
verified ·
1 Parent(s): 1d08aca

Upload 6 files

Browse files
Files changed (3) hide show
  1. app.py +55 -3
  2. config.local.yaml +13 -2
  3. model_tags.py +90 -0
app.py CHANGED
@@ -204,6 +204,56 @@ def _recompute_out_and_state_from_tokens(model_name: str, model_tokens: List[int
204
  tokens = tokens[CONFIG.CHUNK_LEN :]
205
  return out, model_state
206
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  logger.info(f"STRATEGY - {CONFIG.STRATEGY}")
208
 
209
  logGPUState()
@@ -1604,9 +1654,11 @@ async def chat_completions(request: ChatCompletionRequest):
1604
  completionId = str(next(CompletionIdGenerator))
1605
  logger.info(f"[REQ] {completionId} - {request.model_dump()}")
1606
 
1607
- # Model suffix tags are not allowed. Instruct clients to use request flags instead.
1608
- if ":" in request.model:
1609
- raise HTTPException(400, "Model tags/suffixes (e.g., ':thinking', ':web') are deprecated. Please set enable_reasoning, web_search, and enable_file_tool in the request body instead.")
 
 
1610
  modelName = request.model
1611
 
1612
  if request.model == "rwkv-latest":
 
204
  tokens = tokens[CONFIG.CHUNK_LEN :]
205
  return out, model_state
206
 
207
+
208
+
209
+ # Move ChatCompletionRequest definition above fallback apply_model_tags_to_request
210
+
211
+
212
+ # Move ChatCompletionRequest definition above fallback apply_model_tags_to_request
213
+
214
+ try:
215
+ from model_tags import apply_model_tags_to_request_obj as apply_model_tags_to_request
216
+ except Exception:
217
+ def apply_model_tags_to_request(req: Any):
218
+ # Fallback implementation if the module import fails; keep behavior robust
219
+ if not req or not getattr(req, 'model', None) or ':' not in req.model:
220
+ return
221
+ original = req.model
222
+ parts = [p.strip() for p in original.split(":") if p is not None and p != ""]
223
+ if len(parts) <= 1:
224
+ return
225
+ base = parts[0]
226
+ tags = parts[1:]
227
+ req.model = base
228
+ for tag in tags:
229
+ t = tag.lower()
230
+ if t in ("thinking", "think", "reasoning", "reason"):
231
+ req.enable_reasoning = True
232
+ req.auto_reasoning = False
233
+ elif t in ("web", "web_search", "search"):
234
+ req.enable_web_search = True
235
+ req.web_search = True
236
+ req.auto_web_search = False
237
+ elif t in ("no-web", "disable-web", "no-web-search"):
238
+ req.enable_web_search = False
239
+ req.web_search = False
240
+ elif t in ("tools", "enable-tools"):
241
+ req.enable_tools = True
242
+ req.auto_tools = False
243
+ elif t in ("no-tools", "disable-tools"):
244
+ req.enable_tools = False
245
+ elif t in ("file", "file_tool", "filetool"):
246
+ req.enable_file_tool = True
247
+ req.auto_file_tool = False
248
+ elif t in ("no-file", "disable-file"):
249
+ req.enable_file_tool = False
250
+ elif t in ("universal", "univ"):
251
+ req.enable_universal = True
252
+ req.auto_universal = False
253
+ elif t in ("stream",):
254
+ req.stream = True
255
+
256
+
257
  logger.info(f"STRATEGY - {CONFIG.STRATEGY}")
258
 
259
  logGPUState()
 
1654
  completionId = str(next(CompletionIdGenerator))
1655
  logger.info(f"[REQ] {completionId} - {request.model_dump()}")
1656
 
1657
+ # Apply any legacy model suffix tags (e.g., 'rwkv-latest:thinking' -> enable_reasoning)
1658
+ # This helper is defined at module level so it can be unit-tested and reused.
1659
+
1660
+ # Apply legacy tags (if present) to request and proceed normally
1661
+ apply_model_tags_to_request(request)
1662
  modelName = request.model
1663
 
1664
  if request.model == "rwkv-latest":
config.local.yaml CHANGED
@@ -1,8 +1,13 @@
1
  HOST: "0.0.0.0"
2
  PORT: 7860
3
- STRATEGY: "cpu fp16"
4
- RWKV_CUDA_ON: False
5
  CHUNK_LEN: 256
 
 
 
 
 
6
  MODELS:
7
  - SERVICE_NAME: "rwkv7-g1a-0.1b-20250728-ctx4096"
8
  DOWNLOAD_MODEL_FILE_NAME: "rwkv7-g1a-0.1b-20250728-ctx4096.pth"
@@ -11,6 +16,9 @@ MODELS:
11
  REASONING: True
12
  DEFAULT_CHAT: True
13
  DEFAULT_REASONING: True
 
 
 
14
  DEFAULT_SAMPLER:
15
  max_tokens: 4096
16
  temperature: 1.0
@@ -25,3 +33,6 @@ MODELS:
25
  ALLOW_WEB_SEARCH: True
26
  ALLOW_TOOLS: True
27
  ALLOW_REASONING: True
 
 
 
 
1
  HOST: "0.0.0.0"
2
  PORT: 7860
3
+ STRATEGY: "cpu fp16"
4
+ RWKV_CUDA_ON: False
5
  CHUNK_LEN: 256
6
+ DEFAULT_STREAM: True
7
+ AUTO_ENABLE_TOOLS: True
8
+ AUTO_ENABLE_REASONING: True
9
+ AUTO_ENABLE_WEB_SEARCH: True
10
+ ENABLE_TOOLS_BY_DEFAULT: False
11
  MODELS:
12
  - SERVICE_NAME: "rwkv7-g1a-0.1b-20250728-ctx4096"
13
  DOWNLOAD_MODEL_FILE_NAME: "rwkv7-g1a-0.1b-20250728-ctx4096.pth"
 
16
  REASONING: True
17
  DEFAULT_CHAT: True
18
  DEFAULT_REASONING: True
19
+ ALLOW_WEB_SEARCH: True
20
+ ALLOW_TOOLS: True
21
+ ALLOW_REASONING: True
22
  DEFAULT_SAMPLER:
23
  max_tokens: 4096
24
  temperature: 1.0
 
33
  ALLOW_WEB_SEARCH: True
34
  ALLOW_TOOLS: True
35
  ALLOW_REASONING: True
36
+ STATE_STORE_PATH: "./state_store.json"
37
+ STATE_STORE_FLUSH_INTERVAL: 5
38
+ STATE_STORE_SAVE_ON_UPDATE: True
model_tags.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Utility module for parsing model tags like 'model:thinking:web' and
2
+ applying them as request flags on a request-like object.
3
+
4
+ The functions here operate on any object with the expected attributes (model,
5
+ enable_reasoning, auto_reasoning, enable_web_search, web_search, auto_web_search,
6
+ enable_tools, auto_tools, enable_file_tool, auto_file_tool, enable_universal,
7
+ auto_universal, stream) and don't require heavy app imports, which makes them
8
+ easy to unit-test.
9
+ """
10
+ from types import SimpleNamespace
11
+ from typing import Any
12
+
13
+
14
+ def apply_model_tags_to_request_obj(req: Any) -> None:
15
+ """Apply model suffix tags to a request-like object.
16
+
17
+ The `req` can be a SimpleNamespace, an object, or a pydantic model with
18
+ mutable attributes. The function mutates `req.model` to the base model and
19
+ sets boolean flags for detected tags.
20
+ """
21
+ if not req or not getattr(req, 'model', None) or ':' not in req.model:
22
+ return
23
+ original = req.model
24
+ parts = [p.strip() for p in original.split(":") if p is not None and p != ""]
25
+ if len(parts) <= 1:
26
+ return
27
+ base = parts[0]
28
+ tags = parts[1:]
29
+ try:
30
+ req.model = base
31
+ except Exception:
32
+ # If object is read-only for model, don't fail
33
+ pass
34
+ for tag in tags:
35
+ t = tag.lower()
36
+ if t in ("thinking", "think", "reasoning", "reason"):
37
+ try:
38
+ setattr(req, 'enable_reasoning', True)
39
+ setattr(req, 'auto_reasoning', False)
40
+ except Exception:
41
+ pass
42
+ elif t in ("web", "web_search", "search"):
43
+ try:
44
+ setattr(req, 'enable_web_search', True)
45
+ setattr(req, 'web_search', True)
46
+ setattr(req, 'auto_web_search', False)
47
+ except Exception:
48
+ pass
49
+ elif t in ("no-web", "disable-web", "no-web-search"):
50
+ try:
51
+ setattr(req, 'enable_web_search', False)
52
+ setattr(req, 'web_search', False)
53
+ except Exception:
54
+ pass
55
+ elif t in ("tools", "enable-tools"):
56
+ try:
57
+ setattr(req, 'enable_tools', True)
58
+ setattr(req, 'auto_tools', False)
59
+ except Exception:
60
+ pass
61
+ elif t in ("no-tools", "disable-tools"):
62
+ try:
63
+ setattr(req, 'enable_tools', False)
64
+ except Exception:
65
+ pass
66
+ elif t in ("file", "file_tool", "filetool"):
67
+ try:
68
+ setattr(req, 'enable_file_tool', True)
69
+ setattr(req, 'auto_file_tool', False)
70
+ except Exception:
71
+ pass
72
+ elif t in ("no-file", "disable-file"):
73
+ try:
74
+ setattr(req, 'enable_file_tool', False)
75
+ except Exception:
76
+ pass
77
+ elif t in ("universal", "univ"):
78
+ try:
79
+ setattr(req, 'enable_universal', True)
80
+ setattr(req, 'auto_universal', False)
81
+ except Exception:
82
+ pass
83
+ elif t in ("stream",):
84
+ try:
85
+ setattr(req, 'stream', True)
86
+ except Exception:
87
+ pass
88
+ else:
89
+ # Unknown tags are ignored; callers may log if needed
90
+ pass