Spaces:
Runtime error
Runtime error
ffreemt
commited on
Commit
·
1681f8a
1
Parent(s):
bd2d2e2
Fix 7b
Browse files
app.py
CHANGED
|
@@ -47,7 +47,8 @@ _ = (
|
|
| 47 |
"golay" in platform.node()
|
| 48 |
or "okteto" in platform.node()
|
| 49 |
or Path("/kaggle").exists()
|
| 50 |
-
or psutil.cpu_count(logical=False) < 4
|
|
|
|
| 51 |
)
|
| 52 |
|
| 53 |
if _:
|
|
@@ -116,7 +117,7 @@ except Exception as exc_:
|
|
| 116 |
LLM = AutoModelForCausalLM.from_pretrained(
|
| 117 |
model_loc,
|
| 118 |
model_type="llama",
|
| 119 |
-
threads=cpu_count,
|
| 120 |
)
|
| 121 |
|
| 122 |
logger.info(f"done load llm {model_loc=} {file_size=}G")
|
|
@@ -145,7 +146,7 @@ class GenerationConfig:
|
|
| 145 |
seed: int = 42
|
| 146 |
reset: bool = False
|
| 147 |
stream: bool = True
|
| 148 |
-
threads: int = cpu_count
|
| 149 |
# stop: list[str] = field(default_factory=lambda: [stop_string])
|
| 150 |
|
| 151 |
|
|
@@ -237,7 +238,7 @@ def predict_api(prompt):
|
|
| 237 |
seed=42,
|
| 238 |
reset=True, # reset history (cache)
|
| 239 |
stream=False,
|
| 240 |
-
threads=cpu_count,
|
| 241 |
# stop=prompt_prefix[1:2],
|
| 242 |
)
|
| 243 |
|
|
@@ -392,18 +393,18 @@ with gr.Blocks(
|
|
| 392 |
fn=user,
|
| 393 |
inputs=[msg, chatbot],
|
| 394 |
outputs=[msg, chatbot],
|
| 395 |
-
queue=
|
| 396 |
show_progress="full",
|
| 397 |
-
api_name=
|
| 398 |
).then(bot, chatbot, chatbot, queue=False)
|
| 399 |
submit.click(
|
| 400 |
fn=lambda x, y: ("",) + user(x, y)[1:], # clear msg
|
| 401 |
inputs=[msg, chatbot],
|
| 402 |
outputs=[msg, chatbot],
|
| 403 |
-
|
| 404 |
-
queue=False,
|
| 405 |
show_progress="full",
|
| 406 |
-
api_name=
|
| 407 |
).then(bot, chatbot, chatbot, queue=False)
|
| 408 |
|
| 409 |
clear.click(lambda: None, None, chatbot, queue=False)
|
|
@@ -429,13 +430,16 @@ with gr.Blocks(
|
|
| 429 |
# CPU UPGRADE cpu_count=8 32G, model 7G
|
| 430 |
|
| 431 |
# does not work
|
|
|
|
| 432 |
# _ = int(psutil.virtual_memory().total / 10**9 // file_size - 1)
|
| 433 |
# concurrency_count = max(_, 1)
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
concurrency_count = max(int(32 / file_size) - 1, 1)
|
| 437 |
else:
|
| 438 |
-
concurrency_count = max(int(16 / file_size) - 1, 1)
|
|
|
|
|
|
|
|
|
|
| 439 |
logger.info(f"{concurrency_count=}")
|
| 440 |
|
| 441 |
-
block.queue(concurrency_count=
|
|
|
|
| 47 |
"golay" in platform.node()
|
| 48 |
or "okteto" in platform.node()
|
| 49 |
or Path("/kaggle").exists()
|
| 50 |
+
# or psutil.cpu_count(logical=False) < 4
|
| 51 |
+
or 1 # run 7b in hf
|
| 52 |
)
|
| 53 |
|
| 54 |
if _:
|
|
|
|
| 117 |
LLM = AutoModelForCausalLM.from_pretrained(
|
| 118 |
model_loc,
|
| 119 |
model_type="llama",
|
| 120 |
+
# threads=cpu_count,
|
| 121 |
)
|
| 122 |
|
| 123 |
logger.info(f"done load llm {model_loc=} {file_size=}G")
|
|
|
|
| 146 |
seed: int = 42
|
| 147 |
reset: bool = False
|
| 148 |
stream: bool = True
|
| 149 |
+
# threads: int = cpu_count
|
| 150 |
# stop: list[str] = field(default_factory=lambda: [stop_string])
|
| 151 |
|
| 152 |
|
|
|
|
| 238 |
seed=42,
|
| 239 |
reset=True, # reset history (cache)
|
| 240 |
stream=False,
|
| 241 |
+
# threads=cpu_count,
|
| 242 |
# stop=prompt_prefix[1:2],
|
| 243 |
)
|
| 244 |
|
|
|
|
| 393 |
fn=user,
|
| 394 |
inputs=[msg, chatbot],
|
| 395 |
outputs=[msg, chatbot],
|
| 396 |
+
queue=True,
|
| 397 |
show_progress="full",
|
| 398 |
+
api_name=None,
|
| 399 |
).then(bot, chatbot, chatbot, queue=False)
|
| 400 |
submit.click(
|
| 401 |
fn=lambda x, y: ("",) + user(x, y)[1:], # clear msg
|
| 402 |
inputs=[msg, chatbot],
|
| 403 |
outputs=[msg, chatbot],
|
| 404 |
+
queue=True,
|
| 405 |
+
# queue=False,
|
| 406 |
show_progress="full",
|
| 407 |
+
api_name=None,
|
| 408 |
).then(bot, chatbot, chatbot, queue=False)
|
| 409 |
|
| 410 |
clear.click(lambda: None, None, chatbot, queue=False)
|
|
|
|
| 430 |
# CPU UPGRADE cpu_count=8 32G, model 7G
|
| 431 |
|
| 432 |
# does not work
|
| 433 |
+
_ = """
|
| 434 |
# _ = int(psutil.virtual_memory().total / 10**9 // file_size - 1)
|
| 435 |
# concurrency_count = max(_, 1)
|
| 436 |
+
if psutil.cpu_count(logical=False) >= 8:
|
| 437 |
+
# concurrency_count = max(int(32 / file_size) - 1, 1)
|
|
|
|
| 438 |
else:
|
| 439 |
+
# concurrency_count = max(int(16 / file_size) - 1, 1)
|
| 440 |
+
# """
|
| 441 |
+
|
| 442 |
+
concurrency_count = 1
|
| 443 |
logger.info(f"{concurrency_count=}")
|
| 444 |
|
| 445 |
+
block.queue(concurrency_count=concurrency_count, max_size=5).launch(debug=True)
|