cmh commited on
Commit
c72d1a7
·
verified ·
1 Parent(s): 09bb8c8

Upload prompts.py

Browse files
Files changed (1) hide show
  1. backend/prompts.py +599 -0
backend/prompts.py ADDED
@@ -0,0 +1,599 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ class PromptFormat:
3
+
4
+ def __init__(self):
5
+ pass
6
+
7
+ def format(self, prompt, response, system_prompt, settings):
8
+ raise NotImplementedError
9
+
10
+ def stop_conditions(self, tokenizer, settings):
11
+ raise NotImplementedError
12
+
13
+ def is_instruct(self):
14
+ raise NotImplementedError
15
+
16
+ def encode_special_tokens(self):
17
+ return True
18
+
19
+ def context_bos(self):
20
+ return False
21
+
22
+ @staticmethod
23
+ def supports_system_prompt():
24
+ return True
25
+
26
+
27
+ class PromptFormat_raw(PromptFormat):
28
+
29
+ description = "Model-agnostic mode simulating a raw chatlog between two or more users"
30
+
31
+ def __init__(self):
32
+ super().__init__()
33
+ pass
34
+
35
+ def is_instruct(self):
36
+ return False
37
+
38
+ def stop_conditions(self, tokenizer, settings):
39
+ raise NotImplementedError
40
+
41
+ def format(self, prompt, response, system_prompt, settings):
42
+ raise NotImplementedError
43
+
44
+ def encode_special_tokens(self):
45
+ return True
46
+
47
+
48
+ class PromptFormat_llama(PromptFormat):
49
+
50
+ description = "Llama-chat, Llama2-chat and Mistral-instruct models"
51
+
52
+ def __init__(self):
53
+ super().__init__()
54
+ pass
55
+
56
+ def is_instruct(self):
57
+ return True
58
+
59
+ def stop_conditions(self, tokenizer, settings):
60
+ return \
61
+ [tokenizer.eos_token_id]
62
+
63
+ def format(self, prompt, response, system_prompt, settings):
64
+ text = "<s>[INST] "
65
+ if system_prompt and system_prompt.strip() != "":
66
+ text += "<<SYS>>\n"
67
+ text += system_prompt
68
+ text += "\n<</SYS>>\n\n "
69
+ text += prompt
70
+ text += " [/INST]"
71
+ if response:
72
+ text += response
73
+ text += "</s>"
74
+ return text
75
+
76
+ class PromptFormat_mistral(PromptFormat):
77
+
78
+ def __init__(self):
79
+ super().__init__()
80
+ pass
81
+
82
+ def is_instruct(self):
83
+ return True
84
+
85
+ def stop_conditions(self, tokenizer, settings):
86
+ return \
87
+ [tokenizer.eos_token_id]
88
+
89
+ def context_bos(self):
90
+ return True
91
+
92
+ class PromptFormat_mistralv1(PromptFormat_mistral):
93
+ """
94
+ <s> [INST] user message [/INST] assistant message</s> [INST] new user message [/INST]
95
+ """
96
+ description = "Mistral tokenizer v1"
97
+
98
+ def __init__(self):
99
+ super().__init__()
100
+ pass
101
+
102
+ def format(self, p, r, sp, settings):
103
+ if sp and sp.strip():
104
+ text = f" [INST] {sp.strip()}\n\n {p.strip()} [/INST]"
105
+ else:
106
+ text = f" [INST] {p.strip()} [/INST]"
107
+ if r:
108
+ text += f" {r.strip()}</s>"
109
+ return text
110
+
111
+ class PromptFormat_mistralv2v3(PromptFormat_mistral):
112
+ """
113
+ <s>[INST] user message[/INST] assistant message</s>[INST] new user message[/INST]
114
+ """
115
+ description = "Mistral tokenizer v2/v3"
116
+
117
+ def __init__(self):
118
+ super().__init__()
119
+ pass
120
+
121
+ def format(self, p, r, sp, settings):
122
+ if sp and sp.strip():
123
+ text = f"[INST] {sp.strip()}\n\n {p.strip()}[/INST]"
124
+ else:
125
+ text = f"[INST] {p.strip()}[/INST]"
126
+ if r:
127
+ text += f" {r.strip()}</s>"
128
+ return text
129
+
130
+ class PromptFormat_mistralTekken(PromptFormat_mistral):
131
+ """
132
+ <s>[INST]user message[/INST]assistant message</s>[INST]new user message[/INST]
133
+ """
134
+ description = "Mistral tokenizer V3 (Tekken)"
135
+
136
+ def format(self, p, r, sp, settings):
137
+ if sp and sp.strip():
138
+ text = f"[INST]{sp.strip()}\n\n{p.strip()}[/INST]"
139
+ else:
140
+ text = f"[INST]{p.strip()}[/INST]"
141
+ if r:
142
+ text += f"{r.strip()}</s>"
143
+ return text
144
+
145
+
146
+ class PromptFormat_llama3(PromptFormat):
147
+
148
+ description = "Llama-3 instruct template."
149
+
150
+ def __init__(self):
151
+ super().__init__()
152
+ pass
153
+
154
+ def is_instruct(self):
155
+ return True
156
+
157
+ def stop_conditions(self, tokenizer, settings):
158
+ return \
159
+ [tokenizer.single_id("<|eot_id|>"),
160
+ tokenizer.single_id("<|start_header_id|>"),
161
+ tokenizer.eos_token_id]
162
+
163
+ def format(self, prompt, response, system_prompt, settings):
164
+ text = ""
165
+ if system_prompt and system_prompt.strip() != "":
166
+ text += "<|start_header_id|>system<|end_header_id|>\n\n"
167
+ text += system_prompt
168
+ text += "<|eot_id|>"
169
+ text += "<|start_header_id|>user<|end_header_id|>\n\n"
170
+ text += prompt
171
+ text += "<|eot_id|>"
172
+ text += "<|start_header_id|>assistant<|end_header_id|>\n\n"
173
+ if response:
174
+ text += response
175
+ text += "<|eot_id|>"
176
+ return text
177
+
178
+ def context_bos(self):
179
+ return True
180
+
181
+
182
+ class PromptFormat_phi3(PromptFormat):
183
+
184
+ description = "phi-4"
185
+
186
+ def __init__(self):
187
+ super().__init__()
188
+ pass
189
+
190
+ def is_instruct(self):
191
+ return True
192
+
193
+ def stop_conditions(self, tokenizer, settings):
194
+ return \
195
+ [tokenizer.single_id("<|im_end|>"),
196
+ tokenizer.single_id("<|im_start|>assistant<|im_sep|>"),
197
+ tokenizer.eos_token_id]
198
+
199
+ def format(self, prompt, response, system_prompt, settings):
200
+ text = ""
201
+ if system_prompt and system_prompt.strip() != "":
202
+ text += "<|im_start|>system<|im_sep|>\n"
203
+ text += system_prompt
204
+ text += "<|im_end|>\n"
205
+ text += "<|im_start|>user<|im_sep|>\n"
206
+ text += prompt
207
+ text += "<|im_end|>\n"
208
+ text += "<|im_start|>assistant<|im_sep|>\n"
209
+ if response:
210
+ text += response
211
+ text += "<|im_end|>"
212
+ return text
213
+
214
+ def context_bos(self):
215
+ return True
216
+
217
+ class PromptFormat_phi4(PromptFormat):
218
+
219
+ description = "ChatML format, as used by e.g. (Mistral)Orca"
220
+
221
+ def __init__(self):
222
+ super().__init__()
223
+ pass
224
+
225
+ def is_instruct(self):
226
+ return True
227
+
228
+ def stop_conditions(self, tokenizer, settings):
229
+ return \
230
+ [tokenizer.eos_token_id,
231
+ """<|im_end|>"""]
232
+
233
+ def format(self, prompt, response, system_prompt, settings):
234
+ text = ""
235
+ if system_prompt and system_prompt.strip() != "":
236
+ text += "<|im_start|>system\n"
237
+ text += system_prompt
238
+ text += "\n<|im_end|>\n"
239
+ text += "<|im_start|>user\n"
240
+ text += prompt
241
+ text += "<|im_end|>\n"
242
+ text += "<|im_start|>assistant\n"
243
+ if response:
244
+ text += response
245
+ text += "<|im_end|>\n"
246
+ return text
247
+
248
+ def context_bos(self):
249
+ return True
250
+
251
+ class PromptFormat_mistrallite(PromptFormat):
252
+
253
+ description = "MistralLite format"
254
+
255
+ def __init__(self):
256
+ super().__init__()
257
+ pass
258
+
259
+ def is_instruct(self):
260
+ return True
261
+
262
+ def stop_conditions(self, tokenizer, settings):
263
+ return \
264
+ [tokenizer.eos_token_id]
265
+
266
+ def format(self, prompt, response, system_prompt, settings):
267
+ text = "<|prompter|>"
268
+ if system_prompt and system_prompt.strip() != "":
269
+ text += system_prompt
270
+ text += "</s><|assistant|>Understood.</s><|prompter|>"
271
+ text += prompt
272
+ text += "</s><|assistant|>"
273
+ if response:
274
+ text += response
275
+ text += "</s>"
276
+ return text
277
+
278
+ # class PromptFormat_codellama(PromptFormat_llama):
279
+ #
280
+ # description = "CodeLlama-instruct"
281
+ #
282
+ # def __init__(self):
283
+ # super().__init__()
284
+ # pass
285
+ #
286
+ # def default_system_prompt(self):
287
+ # return \
288
+ # """You are a helpful coding assistant. Always answer as helpfully as possible."""
289
+
290
+
291
+ class PromptFormat_chatml(PromptFormat):
292
+
293
+ description = "ChatML format, as used by e.g. (Mistral)Orca"
294
+
295
+ def __init__(self):
296
+ super().__init__()
297
+ pass
298
+
299
+ def is_instruct(self):
300
+ return True
301
+
302
+ def stop_conditions(self, tokenizer, settings):
303
+ return \
304
+ [tokenizer.eos_token_id,
305
+ """<|im_end|>"""]
306
+
307
+ def format(self, prompt, response, system_prompt, settings):
308
+ text = ""
309
+ if system_prompt and system_prompt.strip() != "":
310
+ text += "<|im_start|>system\n"
311
+ text += system_prompt
312
+ text += "\n<|im_end|>\n"
313
+ text += "<|im_start|>user\n"
314
+ text += prompt
315
+ text += "<|im_end|>\n"
316
+ text += "<|im_start|>assistant\n"
317
+ if response:
318
+ text += response
319
+ text += "<|im_end|>\n"
320
+ return text
321
+
322
+ def context_bos(self):
323
+ return True
324
+
325
+
326
+ class PromptFormat_tinyllama(PromptFormat_chatml):
327
+
328
+ description = "ChatML format, but ignoring special/added tokens. Use for TinyLlama-chat v0.3"
329
+
330
+ def encode_special_tokens(self):
331
+ return False
332
+
333
+
334
+ class PromptFormat_phind_codellama(PromptFormat):
335
+
336
+ description = "Vicuna/Alpaca-like format for Phind-CodeLlama"
337
+
338
+ def __init__(self):
339
+ super().__init__()
340
+ pass
341
+
342
+ def is_instruct(self):
343
+ return True
344
+
345
+ def stop_conditions(self, tokenizer, settings):
346
+ return \
347
+ [tokenizer.eos_token_id, "\n### "]
348
+
349
+ def format(self, prompt, response, system_prompt, settings):
350
+ text = ""
351
+ if system_prompt and system_prompt.strip() != "":
352
+ text += "### System Prompt\n"
353
+ text += system_prompt
354
+ text += "\n\n"
355
+ text += "### User Message\n"
356
+ text += prompt
357
+ text += "\n\n### Assistant\n"
358
+ if response:
359
+ text += response
360
+ text += "\n\n"
361
+ return text
362
+
363
+
364
+ class PromptFormat_deepseek_chat(PromptFormat):
365
+
366
+ description = "Deepseek LLM chat format"
367
+
368
+ def __init__(self):
369
+ super().__init__()
370
+ pass
371
+
372
+ def is_instruct(self):
373
+ return True
374
+
375
+ def stop_conditions(self, tokenizer, settings):
376
+ return \
377
+ [tokenizer.eos_token_id, "\n\nAssistant:"]
378
+
379
+ def format(self, prompt, response, system_prompt, settings):
380
+ text = ""
381
+ if system_prompt and system_prompt.strip() != "":
382
+ text += system_prompt
383
+ text += "\n\n"
384
+ text += "User: "
385
+ text += prompt
386
+ text += "\n\nAssistant:"
387
+ if response:
388
+ text += response
389
+ text += "\n\n"
390
+ return text
391
+
392
+
393
+ class PromptFormat_deepseek_instruct(PromptFormat):
394
+
395
+ description = "Deepseek instruct format for 'coder' models"
396
+
397
+ def __init__(self):
398
+ super().__init__()
399
+ pass
400
+
401
+ def is_instruct(self):
402
+ return True
403
+
404
+ def stop_conditions(self, tokenizer, settings):
405
+ return \
406
+ [tokenizer.eos_token_id, "<|EOT|>"]
407
+
408
+ def format(self, prompt, response, system_prompt, settings):
409
+ text = ""
410
+ if system_prompt and system_prompt.strip() != "":
411
+ text += "<|begin▁of▁sentence|>"
412
+ text += system_prompt
413
+ text += "\n"
414
+ text += "### Instruction:\n"
415
+ text += prompt
416
+ text += "\n### Response:\n"
417
+ if response:
418
+ text += response
419
+ text += "\n<|EOT|>\n"
420
+ return text
421
+
422
+
423
+ class PromptFormat_openchat(PromptFormat):
424
+
425
+ description = "OpenChat"
426
+
427
+ def __init__(self):
428
+ super().__init__()
429
+ pass
430
+
431
+ def is_instruct(self):
432
+ return True
433
+
434
+ def stop_conditions(self, tokenizer, settings):
435
+ return \
436
+ [tokenizer.eos_token_id,
437
+ "<|end_of_turn|>",
438
+ "<|endoftext|>",
439
+ "GPT4 Correct User:"
440
+ ]
441
+
442
+ def format(self, prompt, response, system_prompt, settings):
443
+ text = ""
444
+ if system_prompt and system_prompt.strip() != "":
445
+ text += system_prompt
446
+ text += "<|end_of_turn|>"
447
+ text += "GPT4 Correct User:"
448
+ text += prompt
449
+ text += "<|end_of_turn|>"
450
+ text += "GPT4 Correct Assistant:"
451
+ if response:
452
+ text += response
453
+ text += "<|end_of_turn|>"
454
+ return text
455
+
456
+
457
+ class PromptFormat_gemma(PromptFormat):
458
+
459
+ description = "OpenChat"
460
+
461
+ def __init__(self):
462
+ super().__init__()
463
+ pass
464
+
465
+ def is_instruct(self):
466
+ return True
467
+
468
+ def stop_conditions(self, tokenizer, settings):
469
+ return \
470
+ [tokenizer.eos_token_id,
471
+ "<end_of_turn>",
472
+ ]
473
+
474
+ def format(self, prompt, response, system_prompt, settings):
475
+ text = ""
476
+ if system_prompt is not None:
477
+ text += "<bos>"
478
+ # s = system_prompt.strip()
479
+ # if s != "":
480
+ # text += "<start_of_turn>user\n"
481
+ # text += s + "<end_of_turn>\n"
482
+ # text += "<start_of_turn>model\n"
483
+ # text += "Okay!<end_of_turn>\n"
484
+ text += "<start_of_turn>user\n"
485
+ text += prompt
486
+ text += "<end_of_turn>\n"
487
+ text += "<start_of_turn>model\n"
488
+ if response:
489
+ text += response
490
+ text += "<end_of_turn>\n"
491
+ return text
492
+
493
+ @staticmethod
494
+ def supports_system_prompt():
495
+ return False
496
+
497
+
498
+ class PromptFormat_cohere(PromptFormat):
499
+
500
+ description = "Cohere"
501
+
502
+ def __init__(self):
503
+ super().__init__()
504
+ pass
505
+
506
+ def is_instruct(self):
507
+ return True
508
+
509
+ def stop_conditions(self, tokenizer, settings):
510
+ return \
511
+ [tokenizer.eos_token_id,
512
+ "<|END_OF_TURN_TOKEN|>",
513
+ ]
514
+
515
+ def format(self, prompt, response, system_prompt, settings):
516
+ text = ""
517
+ if system_prompt is not None:
518
+ text += "<BOS_TOKEN>"
519
+ text += "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>"
520
+ text += system_prompt.strip()
521
+ text += "<|END_OF_TURN_TOKEN|>"
522
+ text += "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>"
523
+ text += prompt
524
+ text += "<|END_OF_TURN_TOKEN|>"
525
+ text += "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"
526
+ if response:
527
+ text += response
528
+ text += "<|END_OF_TURN_TOKEN|>"
529
+ return text
530
+
531
+
532
+ class PromptFormat_granite(PromptFormat):
533
+
534
+ description = "Granite"
535
+
536
+ def __init__(self):
537
+ super().__init__()
538
+ pass
539
+
540
+ def is_instruct(self):
541
+ return True
542
+
543
+ def stop_conditions(self, tokenizer, settings):
544
+ return \
545
+ [tokenizer.eos_token_id,
546
+ "\n\nQuestion:",
547
+ ]
548
+
549
+ def format(self, prompt, response, system_prompt, settings):
550
+ text = ""
551
+ if system_prompt is not None:
552
+ text += "System:\n"
553
+ text += system_prompt.strip()
554
+ text += "\n\n"
555
+ text += "Question:\n"
556
+ text += prompt
557
+ text += "\n\n"
558
+ text += "Answer:\n"
559
+ if response:
560
+ text += response
561
+ text += "\n\n"
562
+ return text
563
+
564
+ def context_bos(self):
565
+ return True
566
+
567
+
568
+ prompt_formats = \
569
+ {
570
+ "Chat-RP": PromptFormat_raw,
571
+ "Llama-chat": PromptFormat_llama,
572
+ "Llama3-instruct": PromptFormat_llama3,
573
+ "ChatML": PromptFormat_chatml,
574
+ "TinyLlama-chat": PromptFormat_tinyllama,
575
+ "MistralLite": PromptFormat_mistrallite,
576
+ "Phind-CodeLlama": PromptFormat_phind_codellama,
577
+ "Deepseek-chat": PromptFormat_deepseek_chat,
578
+ "Deepseek-instruct": PromptFormat_deepseek_instruct,
579
+ "OpenChat": PromptFormat_openchat,
580
+ "Gemma": PromptFormat_gemma,
581
+ "Cohere": PromptFormat_cohere,
582
+ "Phi3-instruct": PromptFormat_phi3,
583
+ "Phi4": PromptFormat_phi4,
584
+ "Granite": PromptFormat_granite,
585
+ "Mistral V1": PromptFormat_mistralv1,
586
+ "Mistral V2/V3": PromptFormat_mistralv2v3,
587
+ "Mistral V3 (Tekken)": PromptFormat_mistralTekken,
588
+ }
589
+
590
+ def list_prompt_formats():
591
+ global prompt_formats
592
+ prompts = [
593
+ {
594
+ "name": k,
595
+ "supports_system_prompt": v.supports_system_prompt()
596
+ }
597
+ for k, v in prompt_formats.items()
598
+ ]
599
+ return prompts