Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 3 |
from peft import PeftModel
|
| 4 |
import torch
|
| 5 |
|
|
@@ -29,7 +29,6 @@ def generate_response(user_input, chat_history):
|
|
| 29 |
enable_thinking=True,
|
| 30 |
)
|
| 31 |
|
| 32 |
-
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)
|
| 33 |
inputs = tokenizer(text, return_tensors="pt").to("cpu")
|
| 34 |
|
| 35 |
model.generate(
|
|
@@ -38,14 +37,10 @@ def generate_response(user_input, chat_history):
|
|
| 38 |
temperature=0.6,
|
| 39 |
top_p=0.95,
|
| 40 |
top_k=20,
|
| 41 |
-
|
| 42 |
)
|
| 43 |
|
| 44 |
-
|
| 45 |
-
response = ""
|
| 46 |
-
for new_text in streamer:
|
| 47 |
-
response += new_text
|
| 48 |
-
|
| 49 |
response = response.split(user_input)[-1].strip()
|
| 50 |
|
| 51 |
chat_history.append({"role": "assistant", "content": response})
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 3 |
from peft import PeftModel
|
| 4 |
import torch
|
| 5 |
|
|
|
|
| 29 |
enable_thinking=True,
|
| 30 |
)
|
| 31 |
|
|
|
|
| 32 |
inputs = tokenizer(text, return_tensors="pt").to("cpu")
|
| 33 |
|
| 34 |
model.generate(
|
|
|
|
| 37 |
temperature=0.6,
|
| 38 |
top_p=0.95,
|
| 39 |
top_k=20,
|
| 40 |
+
do_sample=True
|
| 41 |
)
|
| 42 |
|
| 43 |
+
response = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
response = response.split(user_input)[-1].strip()
|
| 45 |
|
| 46 |
chat_history.append({"role": "assistant", "content": response})
|