Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -837,7 +837,7 @@ def chat_response_stream_multiturn(
|
|
| 837 |
|
| 838 |
full_prompt = chatml_format(message.strip(), history=history, system_prompt=system_prompt)
|
| 839 |
|
| 840 |
-
if len(tokenizer.encode(full_prompt
|
| 841 |
raise gr.Error(f"Conversation or prompt is too long, please clear the chatbox or try shorter input.")
|
| 842 |
|
| 843 |
sampling_params = SamplingParams(
|
|
@@ -942,7 +942,7 @@ def generate_free_form_stream(
|
|
| 942 |
if message_safety is not None:
|
| 943 |
raise gr.Error(message_safety)
|
| 944 |
|
| 945 |
-
if len(tokenizer.encode(message
|
| 946 |
raise gr.Error(f"Prompt is too long!")
|
| 947 |
|
| 948 |
cur_out = None
|
|
@@ -1173,7 +1173,7 @@ def validate_file_item(filename, index, item: Dict[str, str]):
|
|
| 1173 |
raise gr.Error(f'Prompt {index} invalid: {message_safety}')
|
| 1174 |
|
| 1175 |
tokenizer = llm.get_tokenizer() if llm is not None else None
|
| 1176 |
-
if tokenizer is None or len(tokenizer.encode(message
|
| 1177 |
raise gr.Error(f"Prompt {index} too long, should be less than {BATCH_INFER_MAX_PROMPT_TOKENS} tokens")
|
| 1178 |
|
| 1179 |
|
|
@@ -1299,7 +1299,7 @@ def batch_inference(
|
|
| 1299 |
]
|
| 1300 |
print(f'{full_prompts[0]}\n')
|
| 1301 |
|
| 1302 |
-
if any(len(tokenizer.encode(x
|
| 1303 |
raise gr.Error(f"Some prompt is too long!")
|
| 1304 |
|
| 1305 |
stop_seq = list(set(['<s>', '</s>', '<<SYS>>', '<</SYS>>', '[INST]', '[/INST]'] + stop_strings))
|
|
|
|
| 837 |
|
| 838 |
full_prompt = chatml_format(message.strip(), history=history, system_prompt=system_prompt)
|
| 839 |
|
| 840 |
+
if len(tokenizer.encode(full_prompt)) >= 4050:
|
| 841 |
raise gr.Error(f"Conversation or prompt is too long, please clear the chatbox or try shorter input.")
|
| 842 |
|
| 843 |
sampling_params = SamplingParams(
|
|
|
|
| 942 |
if message_safety is not None:
|
| 943 |
raise gr.Error(message_safety)
|
| 944 |
|
| 945 |
+
if len(tokenizer.encode(message)) >= 4050:
|
| 946 |
raise gr.Error(f"Prompt is too long!")
|
| 947 |
|
| 948 |
cur_out = None
|
|
|
|
| 1173 |
raise gr.Error(f'Prompt {index} invalid: {message_safety}')
|
| 1174 |
|
| 1175 |
tokenizer = llm.get_tokenizer() if llm is not None else None
|
| 1176 |
+
if tokenizer is None or len(tokenizer.encode(message)) >= BATCH_INFER_MAX_PROMPT_TOKENS:
|
| 1177 |
raise gr.Error(f"Prompt {index} too long, should be less than {BATCH_INFER_MAX_PROMPT_TOKENS} tokens")
|
| 1178 |
|
| 1179 |
|
|
|
|
| 1299 |
]
|
| 1300 |
print(f'{full_prompts[0]}\n')
|
| 1301 |
|
| 1302 |
+
if any(len(tokenizer.encode(x)) >= 4090 for x in full_prompts):
|
| 1303 |
raise gr.Error(f"Some prompt is too long!")
|
| 1304 |
|
| 1305 |
stop_seq = list(set(['<s>', '</s>', '<<SYS>>', '<</SYS>>', '[INST]', '[/INST]'] + stop_strings))
|