Update app.py
Browse files
app.py
CHANGED
|
@@ -35,7 +35,7 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
| 35 |
tokenizer = AutoTokenizer.from_pretrained("THUDM/LongWriter-glm4-9b",trust_remote_code=True)
|
| 36 |
|
| 37 |
|
| 38 |
-
@spaces.GPU
|
| 39 |
def stream_chat(message: str, history: list, temperature: float, max_length: int):
|
| 40 |
print(f'message is - {message}')
|
| 41 |
print(f'history is - {history}')
|
|
@@ -61,13 +61,12 @@ def stream_chat(message: str, history: list, temperature: float, max_length: int
|
|
| 61 |
)
|
| 62 |
gen_kwargs = {**input_ids, **generate_kwargs}
|
| 63 |
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
yield buffer
|
| 71 |
|
| 72 |
chatbot = gr.Chatbot(height=600, placeholder = PLACEHOLDER)
|
| 73 |
|
|
|
|
| 35 |
tokenizer = AutoTokenizer.from_pretrained("THUDM/LongWriter-glm4-9b",trust_remote_code=True)
|
| 36 |
|
| 37 |
|
| 38 |
+
@spaces.GPU()
|
| 39 |
def stream_chat(message: str, history: list, temperature: float, max_length: int):
|
| 40 |
print(f'message is - {message}')
|
| 41 |
print(f'history is - {history}')
|
|
|
|
| 61 |
)
|
| 62 |
gen_kwargs = {**input_ids, **generate_kwargs}
|
| 63 |
|
| 64 |
+
thread = Thread(target=model.generate, kwargs=gen_kwargs)
|
| 65 |
+
thread.start()
|
| 66 |
+
buffer = ""
|
| 67 |
+
for new_text in streamer:
|
| 68 |
+
buffer += new_text
|
| 69 |
+
yield buffer
|
|
|
|
| 70 |
|
| 71 |
chatbot = gr.Chatbot(height=600, placeholder = PLACEHOLDER)
|
| 72 |
|