Spaces:
Runtime error
Runtime error
ffreemt
commited on
Commit
·
6f9f106
1
Parent(s):
ad353f0
Update from collections import deque
Browse files
app.py
CHANGED
|
@@ -6,8 +6,8 @@ import os
|
|
| 6 |
import platform
|
| 7 |
import random
|
| 8 |
import time
|
|
|
|
| 9 |
from pathlib import Path
|
| 10 |
-
from queue import deque
|
| 11 |
from threading import Thread
|
| 12 |
from typing import Any, Dict, List, Union
|
| 13 |
|
|
@@ -134,7 +134,7 @@ You are a helpful assistant. Think step by step.
|
|
| 134 |
{input}
|
| 135 |
### RESPONSE:"""
|
| 136 |
|
| 137 |
-
prompt_template = """You are a helpful assistant.
|
| 138 |
{history}
|
| 139 |
### HUMAN:
|
| 140 |
{input}
|
|
@@ -186,7 +186,7 @@ class DequeCallbackHandler(BaseCallbackHandler):
|
|
| 186 |
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
| 187 |
) -> None:
|
| 188 |
"""Run when LLM errors."""
|
| 189 |
-
self.q.
|
| 190 |
|
| 191 |
|
| 192 |
_ = psutil.cpu_count(logical=False) - 1
|
|
@@ -203,6 +203,7 @@ except Exception as exc_:
|
|
| 203 |
raise SystemExit(1) from exc_
|
| 204 |
|
| 205 |
config = Config()
|
|
|
|
| 206 |
config.stream = True
|
| 207 |
config.stop = stop
|
| 208 |
config.threads=cpu_count
|
|
@@ -241,7 +242,31 @@ conversation = ConversationChain(
|
|
| 241 |
memory=memory,
|
| 242 |
verbose=True,
|
| 243 |
)
|
| 244 |
-
logger.debug(f"{conversation.prompt.template=}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
|
| 246 |
# conversation.predict(input="Hello, my name is Andrea")
|
| 247 |
|
|
@@ -286,6 +311,7 @@ def bot(history):
|
|
| 286 |
response = []
|
| 287 |
flag = 1
|
| 288 |
then = time.time()
|
|
|
|
| 289 |
with about_time() as atime: # type: ignore
|
| 290 |
while True:
|
| 291 |
if deq:
|
|
@@ -311,33 +337,18 @@ def bot(history):
|
|
| 311 |
|
| 312 |
|
| 313 |
def predict_api(user_prompt):
|
| 314 |
-
|
|
|
|
|
|
|
|
|
|
| 315 |
try:
|
| 316 |
-
# user_prompt = prompt
|
| 317 |
-
Config(
|
| 318 |
-
temperature=0.2,
|
| 319 |
-
top_k=10,
|
| 320 |
-
top_p=0.9,
|
| 321 |
-
repetition_penalty=1.0,
|
| 322 |
-
max_new_tokens=512, # adjust as needed
|
| 323 |
-
seed=42,
|
| 324 |
-
reset=True, # reset history (cache)
|
| 325 |
-
stream=False,
|
| 326 |
-
# threads=cpu_count,
|
| 327 |
-
# stop=prompt_prefix[1:2],
|
| 328 |
-
)
|
| 329 |
_ = """
|
| 330 |
response = generate(
|
| 331 |
prompt,
|
| 332 |
config=config,
|
| 333 |
)
|
| 334 |
# """
|
| 335 |
-
|
| 336 |
-
llm=LLM,
|
| 337 |
-
prompt=prompt,
|
| 338 |
-
verbose=True,
|
| 339 |
-
)
|
| 340 |
-
response = conversation1.predict(input=user_prompt)
|
| 341 |
logger.debug(f"api: {response=}")
|
| 342 |
except Exception as exc:
|
| 343 |
logger.error(exc)
|
|
@@ -368,6 +379,8 @@ examples_list = [
|
|
| 368 |
[
|
| 369 |
"What NFL team won the Super Bowl in the year Justin Bieber was born? Think step by step."
|
| 370 |
],
|
|
|
|
|
|
|
| 371 |
["How to pick a lock? Provide detailed steps."],
|
| 372 |
["If it takes 10 hours to dry 10 clothes, assuming all the clothes are hanged together at the same time for drying , then how long will it take to dry a cloth?"],
|
| 373 |
["is infinity + 1 bigger than infinity?"],
|
|
@@ -506,6 +519,8 @@ with gr.Blocks(
|
|
| 506 |
cancels=[msg_submit_event, submit_click_event],
|
| 507 |
queue=False,
|
| 508 |
)
|
|
|
|
|
|
|
| 509 |
clear.click(lambda: None, None, chatbot, queue=False)
|
| 510 |
|
| 511 |
with gr.Accordion("For Chat/Translation API", open=False, visible=False):
|
|
@@ -513,12 +528,13 @@ with gr.Blocks(
|
|
| 513 |
api_btn = gr.Button("Go", variant="primary")
|
| 514 |
out_text = gr.Text()
|
| 515 |
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
|
|
|
| 522 |
|
| 523 |
# block.load(update_buff, [], buff, every=1)
|
| 524 |
# block.load(update_buff, [buff_var], [buff_var, buff], every=1)
|
|
|
|
| 6 |
import platform
|
| 7 |
import random
|
| 8 |
import time
|
| 9 |
+
from collections import deque
|
| 10 |
from pathlib import Path
|
|
|
|
| 11 |
from threading import Thread
|
| 12 |
from typing import Any, Dict, List, Union
|
| 13 |
|
|
|
|
| 134 |
{input}
|
| 135 |
### RESPONSE:"""
|
| 136 |
|
| 137 |
+
prompt_template = """You are a helpful assistant. Let's think step by step.
|
| 138 |
{history}
|
| 139 |
### HUMAN:
|
| 140 |
{input}
|
|
|
|
| 186 |
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
| 187 |
) -> None:
|
| 188 |
"""Run when LLM errors."""
|
| 189 |
+
self.q.append(sig_end)
|
| 190 |
|
| 191 |
|
| 192 |
_ = psutil.cpu_count(logical=False) - 1
|
|
|
|
| 203 |
raise SystemExit(1) from exc_
|
| 204 |
|
| 205 |
config = Config()
|
| 206 |
+
# Config(top_k=40, top_p=0.95, temperature=0.8, repetition_penalty=1.1, last_n_tokens=64, seed=-1, batch_size=8, threads=-1, max_new_tokens=256, stop=None, stream=False, reset=True, context_length=-1, gpu_layers=0)
|
| 207 |
config.stream = True
|
| 208 |
config.stop = stop
|
| 209 |
config.threads=cpu_count
|
|
|
|
| 242 |
memory=memory,
|
| 243 |
verbose=True,
|
| 244 |
)
|
| 245 |
+
logger.debug(f"{conversation.prompt.template=}") # type: ignore
|
| 246 |
+
|
| 247 |
+
# for api access ===
|
| 248 |
+
config = Config()
|
| 249 |
+
# Config(top_k=40, top_p=0.95, temperature=0.8, repetition_penalty=1.1, last_n_tokens=64, seed=-1, batch_size=8, threads=-1, max_new_tokens=256, stop=None, stream=False, reset=True, context_length=-1, gpu_layers=0)
|
| 250 |
+
config.stop = stop
|
| 251 |
+
config.threads=cpu_count
|
| 252 |
+
|
| 253 |
+
try:
|
| 254 |
+
LLM_api = CTransformers(
|
| 255 |
+
model=model_loc,
|
| 256 |
+
model_type="llama",
|
| 257 |
+
# callbacks=[StreamingStdOutCallbackHandler(), deqcb],
|
| 258 |
+
callbacks=[StreamingStdOutCallbackHandler()],
|
| 259 |
+
**vars(config),
|
| 260 |
+
)
|
| 261 |
+
conversation_api = ConversationChain(
|
| 262 |
+
llm=LLM_api, # need a separate LLM, or else deq may be messed up
|
| 263 |
+
prompt=prompt,
|
| 264 |
+
verbose=True,
|
| 265 |
+
)
|
| 266 |
+
except Exception as exc_:
|
| 267 |
+
logger.error(exc_)
|
| 268 |
+
conversation_api = None
|
| 269 |
+
logger.warning("Not able to instantiate conversation_api, api will not work")
|
| 270 |
|
| 271 |
# conversation.predict(input="Hello, my name is Andrea")
|
| 272 |
|
|
|
|
| 311 |
response = []
|
| 312 |
flag = 1
|
| 313 |
then = time.time()
|
| 314 |
+
prefix = "" # to please pyright
|
| 315 |
with about_time() as atime: # type: ignore
|
| 316 |
while True:
|
| 317 |
if deq:
|
|
|
|
| 337 |
|
| 338 |
|
| 339 |
def predict_api(user_prompt):
|
| 340 |
+
if conversation_api is None:
|
| 341 |
+
return "conversation_api is None, probably due to insufficient memory, api not usable"
|
| 342 |
+
|
| 343 |
+
logger.debug(f"api: {user_prompt=}")
|
| 344 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
_ = """
|
| 346 |
response = generate(
|
| 347 |
prompt,
|
| 348 |
config=config,
|
| 349 |
)
|
| 350 |
# """
|
| 351 |
+
response = conversation_api.predict(input=user_prompt)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 352 |
logger.debug(f"api: {response=}")
|
| 353 |
except Exception as exc:
|
| 354 |
logger.error(exc)
|
|
|
|
| 379 |
[
|
| 380 |
"What NFL team won the Super Bowl in the year Justin Bieber was born? Think step by step."
|
| 381 |
],
|
| 382 |
+
["When was Justin Bieber born?"],
|
| 383 |
+
["What NFL team won the Super Bowl in 1994?"],
|
| 384 |
["How to pick a lock? Provide detailed steps."],
|
| 385 |
["If it takes 10 hours to dry 10 clothes, assuming all the clothes are hanged together at the same time for drying , then how long will it take to dry a cloth?"],
|
| 386 |
["is infinity + 1 bigger than infinity?"],
|
|
|
|
| 519 |
cancels=[msg_submit_event, submit_click_event],
|
| 520 |
queue=False,
|
| 521 |
)
|
| 522 |
+
|
| 523 |
+
# TODO: clear conversation memory as well
|
| 524 |
clear.click(lambda: None, None, chatbot, queue=False)
|
| 525 |
|
| 526 |
with gr.Accordion("For Chat/Translation API", open=False, visible=False):
|
|
|
|
| 528 |
api_btn = gr.Button("Go", variant="primary")
|
| 529 |
out_text = gr.Text()
|
| 530 |
|
| 531 |
+
if conversation_api is not None:
|
| 532 |
+
api_btn.click(
|
| 533 |
+
predict_api,
|
| 534 |
+
input_text,
|
| 535 |
+
out_text,
|
| 536 |
+
api_name="api",
|
| 537 |
+
)
|
| 538 |
|
| 539 |
# block.load(update_buff, [], buff, every=1)
|
| 540 |
# block.load(update_buff, [buff_var], [buff_var, buff], every=1)
|