luancy1208 commited on
Commit
fccbf85
·
verified ·
1 Parent(s): 00add69

Delete chip-space/chip/rules/rules.yaml

Browse files
Files changed (1) hide show
  1. chip-space/chip/rules/rules.yaml +0 -418
chip-space/chip/rules/rules.yaml DELETED
@@ -1,418 +0,0 @@
1
- # CHIP Compression Rules v0.2 (2025-05-01)
2
- # ==========================================
3
- # v0.2 vs v0.1 主要变化:
4
- # - 标签层从 [角:X] / 【任】 改为 ### 角色 (实测全 tokenizer 1 token,完爆方括号)
5
- # - 新增 L3 成语层(基于 idiom_whitelist.json 实测)
6
- # - 新增 L4 协议层(归一化用户已有的标签)
7
-
8
- rules:
9
- # ============================================================
10
- # L1: 词法层 — 啰嗦套话剪枝
11
- # ============================================================
12
- - id: L1-001
13
- layer: L1
14
- pattern: "请你?帮我?"
15
- replacement: ""
16
- saves: 2
17
- risk: low
18
- description: "客套语 '请你帮我' / '请帮我' → 空"
19
-
20
- - id: L1-002
21
- layer: L1
22
- pattern: "麻烦你?"
23
- replacement: ""
24
- saves: 2
25
- risk: low
26
-
27
- - id: L1-003
28
- layer: L1
29
- pattern: "如果可以的话[,,]?"
30
- replacement: ""
31
- saves: 3
32
- risk: low
33
-
34
- - id: L1-004
35
- layer: L1
36
- pattern: "(?:能不能|可不可以|可以|能)(?=帮|告诉|解释|总结|分析)"
37
- replacement: ""
38
- saves: 2
39
- risk: low
40
-
41
- - id: L1-005
42
- layer: L1
43
- pattern: "辛苦你?"
44
- replacement: ""
45
- saves: 2
46
- risk: low
47
-
48
- - id: L1-006
49
- layer: L1
50
- pattern: "(?:谢谢|感谢)(?:你|了)?[!!.。]?"
51
- replacement: ""
52
- saves: 2
53
- risk: low
54
-
55
- # ---- 进行/做 + 动词性名词 → 单字动词 ----
56
- - id: L1-010
57
- layer: L1
58
- pattern: "进行(?:一?(?:个|下|次)?)?分析"
59
- replacement: "分析"
60
- saves: 2
61
- risk: low
62
-
63
- - id: L1-011
64
- layer: L1
65
- pattern: "进行(?:一?(?:个|下|次)?)?总结"
66
- replacement: "总结"
67
- saves: 2
68
- risk: low
69
-
70
- - id: L1-012
71
- layer: L1
72
- pattern: "进行(?:一?(?:个|下|次)?)?处理"
73
- replacement: "处理"
74
- saves: 2
75
- risk: low
76
-
77
- - id: L1-013
78
- layer: L1
79
- pattern: "进行(?:一?(?:个|下|次)?)?解释"
80
- replacement: "解释"
81
- saves: 2
82
- risk: low
83
-
84
- - id: L1-014
85
- layer: L1
86
- pattern: "做(?:一?(?:个|下|次)?)?判断"
87
- replacement: "判定"
88
- saves: 3
89
- risk: low
90
-
91
- - id: L1-015
92
- layer: L1
93
- pattern: "做(?:一?(?:个|下|次)?)?解释"
94
- replacement: "解释"
95
- saves: 3
96
- risk: low
97
-
98
- - id: L1-016
99
- layer: L1
100
- pattern: "给(?:出|我)(?:一些|几个)?建议"
101
- replacement: "建议"
102
- saves: 2
103
- risk: low
104
-
105
- - id: L1-017
106
- layer: L1
107
- pattern: "提供(?:一些|相关|相对)?帮助"
108
- replacement: "助"
109
- saves: 2
110
- risk: mid
111
-
112
- - id: L1-018
113
- layer: L1
114
- pattern: "进行(?:一?(?:个|下|次)?)?检查"
115
- replacement: "检查"
116
- saves: 2
117
- risk: low
118
-
119
- - id: L1-019
120
- layer: L1
121
- pattern: "进行(?:一?(?:个|下|次)?)?优化"
122
- replacement: "优化"
123
- saves: 2
124
- risk: low
125
-
126
- # ---- 连接词 ----
127
- - id: L1-020
128
- layer: L1
129
- pattern: "也就是说[,,]?"
130
- replacement: "即"
131
- saves: 3
132
- risk: low
133
-
134
- - id: L1-021
135
- layer: L1
136
- pattern: "换句话说[,,]?"
137
- replacement: "即"
138
- saves: 3
139
- risk: low
140
-
141
- - id: L1-022
142
- layer: L1
143
- pattern: "与此同时[,,]?"
144
- replacement: "同时,"
145
- saves: 2
146
- risk: low
147
-
148
- - id: L1-023
149
- layer: L1
150
- pattern: "在这种情况下[,,]?"
151
- replacement: "此时,"
152
- saves: 3
153
- risk: low
154
-
155
- - id: L1-024
156
- layer: L1
157
- pattern: "由此可见[,,]?"
158
- replacement: "故"
159
- saves: 3
160
- risk: low
161
-
162
- - id: L1-025
163
- layer: L1
164
- pattern: "因此(?:[,,]|说)?"
165
- replacement: "故"
166
- saves: 1
167
- risk: low
168
-
169
- - id: L1-026
170
- layer: L1
171
- pattern: "如果没有"
172
- replacement: "若无"
173
- saves: 2
174
- risk: low
175
-
176
- - id: L1-027
177
- layer: L1
178
- pattern: "通过(.+?)的方式"
179
- replacement: "用\\1"
180
- saves: 2
181
- risk: mid
182
-
183
- - id: L1-028
184
- layer: L1
185
- pattern: "(?:如上所述|前面提到的|刚才说的)"
186
- replacement: "前述"
187
- saves: 3
188
- risk: low
189
-
190
- # ---- 修饰副词 ----
191
- - id: L1-030
192
- layer: L1
193
- pattern: "比较(?:简洁|清晰|详细)地?"
194
- replacement: ""
195
- saves: 3
196
- risk: low
197
-
198
- - id: L1-031
199
- layer: L1
200
- pattern: "相对(?:简洁|详细|完整)地?"
201
- replacement: ""
202
- saves: 3
203
- risk: low
204
-
205
- - id: L1-032
206
- layer: L1
207
- pattern: "尽可能(?:地)?"
208
- replacement: "尽量"
209
- saves: 1
210
- risk: low
211
-
212
- - id: L1-033
213
- layer: L1
214
- pattern: "非常(?:详细|详尽|全面)地?"
215
- replacement: "详细"
216
- saves: 2
217
- risk: low
218
-
219
- # ============================================================
220
- # L2: 句法层
221
- # ============================================================
222
- - id: L2-001
223
- layer: L2
224
- pattern: "对(.+?)进行(?:一?(?:个|下|次)?(?:全面|详细|简要|认真|深入)?的?)?([\\u4e00-\\u9fff]{1,4})"
225
- replacement: "\\2\\1"
226
- saves: 2
227
- risk: mid
228
- description: "'对 X 进行 Y' → 'Y X'"
229
-
230
- - id: L2-002
231
- layer: L2
232
- pattern: "把(.+?)作为(.+?)(?=[,,。.\\s])"
233
- replacement: "视\\1为\\2"
234
- saves: 2
235
- risk: mid
236
-
237
- - id: L2-003
238
- layer: L2
239
- pattern: "由于(.+?)所以"
240
- replacement: "\\1故"
241
- saves: 3
242
- risk: low
243
-
244
- - id: L2-004
245
- layer: L2
246
- pattern: "虽然(.+?)但是"
247
- replacement: "\\1然"
248
- saves: 3
249
- risk: mid
250
-
251
- - id: L2-005
252
- layer: L2
253
- pattern: "不仅(.+?)而且"
254
- replacement: "\\1且"
255
- saves: 3
256
- risk: low
257
-
258
- - id: L2-006
259
- layer: L2
260
- pattern: "因为(.+?)所以"
261
- replacement: "\\1故"
262
- saves: 3
263
- risk: low
264
-
265
- - id: L2-007
266
- layer: L2
267
- pattern: "如果(.+?)那么"
268
- replacement: "若\\1则"
269
- saves: 2
270
- risk: low
271
-
272
- # ---- 列表化 ----
273
- - id: L2-010
274
- layer: L2
275
- pattern: "第一[,,]"
276
- replacement: "1. "
277
- saves: 1
278
- risk: low
279
-
280
- - id: L2-011
281
- layer: L2
282
- pattern: "第二[,,]"
283
- replacement: "2. "
284
- saves: 1
285
- risk: low
286
-
287
- - id: L2-012
288
- layer: L2
289
- pattern: "第三[,,]"
290
- replacement: "3. "
291
- saves: 1
292
- risk: low
293
-
294
- - id: L2-013
295
- layer: L2
296
- pattern: "第四[,,]"
297
- replacement: "4. "
298
- saves: 1
299
- risk: low
300
-
301
- - id: L2-014
302
- layer: L2
303
- pattern: "首先[,,]"
304
- replacement: "1. "
305
- saves: 1
306
- risk: low
307
-
308
- - id: L2-015
309
- layer: L2
310
- pattern: "其次[,,]"
311
- replacement: "2. "
312
- saves: 1
313
- risk: low
314
-
315
- # ============================================================
316
- # L2 协议化重写 (v0.2 修订)
317
- # 实测:### 在所有 9 个 tokenizer 上都是 1 token
318
- # ============================================================
319
- - id: L2-020
320
- layer: L2
321
- pattern: "请\\s*(?:用|以)?\\s*(?:JSON|json|Json)\\s*格式\\s*(?:输出|返回|回答)"
322
- replacement: "\n### 输出\nJSON"
323
- saves: 4
324
- risk: low
325
-
326
- - id: L2-021
327
- layer: L2
328
- pattern: "请\\s*(?:用|以)?\\s*中文\\s*(?:回答|回复|输出)"
329
- replacement: "\n### 输出\n中文"
330
- saves: 3
331
- risk: low
332
-
333
- - id: L2-022
334
- layer: L2
335
- pattern: "请\\s*(?:你)?\\s*扮演\\s*(?:一(?:个|位))?\\s*(.+?)(?=[,,。.\\n]|的角色|$)"
336
- replacement: "\n### 角色\n\\1\n"
337
- saves: 4
338
- risk: high
339
- description: |
340
- '请你扮演一位 X' → '### 角色\nX'
341
- 已知问题:含空格的复合 NP 可能被截断,Day 3 用 jieba 修复
342
-
343
- # ============================================================
344
- # L3: 成语层(默认 universal 11 条核心成语,需 layer=L3 显式启用)
345
- # 在 ≥3 国产 tokenizer 上 1 token,基于 idiom_whitelist.json 实测
346
- # ============================================================
347
- - id: L3-001
348
- layer: L3
349
- pattern: "(?:大家都知道|每个人都知道|众人皆知)"
350
- replacement: "众所周知"
351
- saves: 2
352
- risk: mid
353
-
354
- - id: L3-002
355
- layer: L3
356
- pattern: "投入(?:全部|所有)?(?:精力|力量)(?:去做|做)?"
357
- replacement: "全力以赴"
358
- saves: 2
359
- risk: mid
360
-
361
- - id: L3-003
362
- layer: L3
363
- pattern: "(?:根据|结合|按照)(?:当地|实际)情况"
364
- replacement: "因地制宜"
365
- saves: 2
366
- risk: mid
367
-
368
- - id: L3-004
369
- layer: L3
370
- pattern: "(?:一步一步|一步步)(?:地)?(?:推进|进行)"
371
- replacement: "循序渐进"
372
- saves: 3
373
- risk: mid
374
-
375
- - id: L3-005
376
- layer: L3
377
- pattern: "(?:不断|持续|一直)(?:坚持|努力做)"
378
- replacement: "持之以恒"
379
- saves: 2
380
- risk: mid
381
-
382
- - id: L3-006
383
- layer: L3
384
- pattern: "认真(?:仔细)?(?:地)?对待"
385
- replacement: "脚踏实地"
386
- saves: 1
387
- risk: mid
388
-
389
- # ============================================================
390
- # L4: 协议层归一化
391
- # ============================================================
392
- - id: L4-001
393
- layer: L4
394
- pattern: "(?:#+\\s*)?(?:任务|目标|Task|TASK)\\s*[::]\\s*"
395
- replacement: "### 任务\n"
396
- saves: 0
397
- risk: low
398
-
399
- - id: L4-002
400
- layer: L4
401
- pattern: "(?:#+\\s*)?(?:角色|身份|Role|ROLE)\\s*[::]\\s*"
402
- replacement: "### 角色\n"
403
- saves: 0
404
- risk: low
405
-
406
- - id: L4-003
407
- layer: L4
408
- pattern: "(?:#+\\s*)?(?:输出|返回|输出格式|Output|OUTPUT)\\s*[::]\\s*"
409
- replacement: "### 输出\n"
410
- saves: 0
411
- risk: low
412
-
413
- - id: L4-004
414
- layer: L4
415
- pattern: "(?:#+\\s*)?(?:约束|限制|要求|规则|Constraints|CONSTRAINTS)\\s*[::]\\s*"
416
- replacement: "### 约束\n"
417
- saves: 0
418
- risk: low