Spaces:
Runtime error
Runtime error
ffreemt
commited on
Commit
·
ab343b8
1
Parent(s):
21d3b25
Update predict_api
Browse files
README.md
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
---
|
| 2 |
-
title: mpt
|
| 3 |
emoji: 🔥
|
| 4 |
colorFrom: purple
|
| 5 |
colorTo: red
|
|
@@ -8,6 +8,6 @@ sdk_version: 3.35.2
|
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
---
|
| 11 |
-
NB: Need a CPU
|
| 12 |
|
| 13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
+
title: mpt-30b-ggml-chat
|
| 3 |
emoji: 🔥
|
| 4 |
colorFrom: purple
|
| 5 |
colorTo: red
|
|
|
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
---
|
| 11 |
+
NB: Need a CPU UPGRADE (32GB RAM) instance to run on a huggingface space or 19GB+ disk, 22GB+ RAM at a minimum
|
| 12 |
|
| 13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
|
@@ -31,6 +31,7 @@ def predict0(prompt, bot):
|
|
| 31 |
print(assistant_prefix, end=" ", flush=True)
|
| 32 |
|
| 33 |
response = ""
|
|
|
|
| 34 |
for word in generator:
|
| 35 |
print(word, end="", flush=True)
|
| 36 |
response += word
|
|
@@ -46,6 +47,30 @@ def predict0(prompt, bot):
|
|
| 46 |
|
| 47 |
return prompt, bot
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
def download_mpt_quant(destination_folder: str, repo_id: str, model_filename: str):
|
| 51 |
local_path = os.path.abspath(destination_folder)
|
|
@@ -445,6 +470,18 @@ with gr.Blocks(
|
|
| 445 |
# AttributeError: 'Blocks' object has no attribute 'run_forever'
|
| 446 |
# block.run_forever(lambda: ns.response, None, [buff], every=1)
|
| 447 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 448 |
# concurrency_count=5, max_size=20
|
| 449 |
# max_size=36, concurrency_count=14
|
| 450 |
block.queue(concurrency_count=5, max_size=20).launch(debug=True)
|
|
|
|
| 31 |
print(assistant_prefix, end=" ", flush=True)
|
| 32 |
|
| 33 |
response = ""
|
| 34 |
+
buff.update(value="diggin...")
|
| 35 |
for word in generator:
|
| 36 |
print(word, end="", flush=True)
|
| 37 |
response += word
|
|
|
|
| 47 |
|
| 48 |
return prompt, bot
|
| 49 |
|
| 50 |
+
def predict_api(prompt):
|
| 51 |
+
logger.debug(f"{prompt=}")
|
| 52 |
+
ns.response = ""
|
| 53 |
+
try:
|
| 54 |
+
user_prompt = prompt
|
| 55 |
+
generator = generate(llm, generation_config, system_prompt, user_prompt.strip())
|
| 56 |
+
print(assistant_prefix, end=" ", flush=True)
|
| 57 |
+
|
| 58 |
+
response = ""
|
| 59 |
+
buff.update(value="diggin...")
|
| 60 |
+
for word in generator:
|
| 61 |
+
print(word, end="", flush=True)
|
| 62 |
+
response += word
|
| 63 |
+
ns.response = response
|
| 64 |
+
buff.update(value=response)
|
| 65 |
+
print("")
|
| 66 |
+
logger.debug(f"{response=}")
|
| 67 |
+
except Exception as exc:
|
| 68 |
+
logger.error(exc)
|
| 69 |
+
response = f"{exc=}"
|
| 70 |
+
# bot = {"inputs": [response]}
|
| 71 |
+
# bot = [(prompt, response)]
|
| 72 |
+
|
| 73 |
+
return response
|
| 74 |
|
| 75 |
def download_mpt_quant(destination_folder: str, repo_id: str, model_filename: str):
|
| 76 |
local_path = os.path.abspath(destination_folder)
|
|
|
|
| 470 |
# AttributeError: 'Blocks' object has no attribute 'run_forever'
|
| 471 |
# block.run_forever(lambda: ns.response, None, [buff], every=1)
|
| 472 |
|
| 473 |
+
with gr.Accordion("For Chat/Translation API", open=False, visible=False):
|
| 474 |
+
input_text = gr.Text()
|
| 475 |
+
api_btn = gr.Button("Go", variant="primary")
|
| 476 |
+
out_text = gr.Text()
|
| 477 |
+
api_btn.click(
|
| 478 |
+
predict_api,
|
| 479 |
+
input_text,
|
| 480 |
+
out_text,
|
| 481 |
+
# show_progress="full",
|
| 482 |
+
api_name="api",
|
| 483 |
+
)
|
| 484 |
+
|
| 485 |
# concurrency_count=5, max_size=20
|
| 486 |
# max_size=36, concurrency_count=14
|
| 487 |
block.queue(concurrency_count=5, max_size=20).launch(debug=True)
|