IST199655 commited on
Commit ·
12b9045
1
Parent(s): f63e352
app.py
CHANGED
|
@@ -5,7 +5,7 @@ from huggingface_hub import InferenceClient
|
|
| 5 |
Copied from inference in colab notebook
|
| 6 |
"""
|
| 7 |
|
| 8 |
-
from transformers import AutoTokenizer , AutoModelForCausalLM ,
|
| 9 |
import torch
|
| 10 |
from threading import Thread
|
| 11 |
|
|
@@ -101,7 +101,7 @@ def respond(
|
|
| 101 |
return_tensors = "pt",
|
| 102 |
)
|
| 103 |
# Generate tokens incrementally
|
| 104 |
-
streamer =
|
| 105 |
generation_kwargs = {
|
| 106 |
"input_ids": inputs,
|
| 107 |
"max_new_tokens": max_tokens,
|
|
|
|
| 5 |
Copied from inference in colab notebook
|
| 6 |
"""
|
| 7 |
|
| 8 |
+
from transformers import AutoTokenizer , AutoModelForCausalLM , TextIteratorStreamer
|
| 9 |
import torch
|
| 10 |
from threading import Thread
|
| 11 |
|
|
|
|
| 101 |
return_tensors = "pt",
|
| 102 |
)
|
| 103 |
# Generate tokens incrementally
|
| 104 |
+
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)
|
| 105 |
generation_kwargs = {
|
| 106 |
"input_ids": inputs,
|
| 107 |
"max_new_tokens": max_tokens,
|