Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -98,13 +98,14 @@ def predict_chat(message: str, history: list):
|
|
| 98 |
prompt_input += f"Assistant: {msg['content']}\n"
|
| 99 |
prompt_input += "Assistant:"
|
| 100 |
|
|
|
|
| 101 |
for token in model.generate(
|
| 102 |
prompt_input,
|
| 103 |
-
|
| 104 |
temperature=TEMPERATURE,
|
| 105 |
top_k=TOP_K,
|
| 106 |
top_p=TOP_P,
|
| 107 |
-
|
| 108 |
repetition_penalty=1.1,
|
| 109 |
stop=["User:", "\nUser", "\n#", "\n##", "<|endoftext|>"]
|
| 110 |
):
|
|
@@ -168,4 +169,4 @@ if __name__ == "__main__":
|
|
| 168 |
|
| 169 |
demo.chatbot.value = initial_messages_for_value
|
| 170 |
|
| 171 |
-
demo.launch()
|
|
|
|
| 98 |
prompt_input += f"Assistant: {msg['content']}\n"
|
| 99 |
prompt_input += "Assistant:"
|
| 100 |
|
| 101 |
+
# FIXED: Use max_tokens instead of max_new_tokens for ctransformers
|
| 102 |
for token in model.generate(
|
| 103 |
prompt_input,
|
| 104 |
+
max_tokens=MAX_NEW_TOKENS, # Changed from max_new_tokens
|
| 105 |
temperature=TEMPERATURE,
|
| 106 |
top_k=TOP_K,
|
| 107 |
top_p=TOP_P,
|
| 108 |
+
sample=DO_SAMPLE, # Changed from do_sample
|
| 109 |
repetition_penalty=1.1,
|
| 110 |
stop=["User:", "\nUser", "\n#", "\n##", "<|endoftext|>"]
|
| 111 |
):
|
|
|
|
| 169 |
|
| 170 |
demo.chatbot.value = initial_messages_for_value
|
| 171 |
|
| 172 |
+
demo.launch()
|