Spaces:
Runtime error
Runtime error
ffreemt
commited on
Commit
·
0acdcc9
1
Parent(s):
cb39675
Update
Browse files
README.md
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
---
|
| 2 |
-
title: llama2-7b-chat-uncensored-ggml
|
| 3 |
emoji: 🚀
|
| 4 |
colorFrom: green
|
| 5 |
colorTo: green
|
|
|
|
| 1 |
---
|
| 2 |
+
title: langchain-llama2-7b-chat-uncensored-ggml
|
| 3 |
emoji: 🚀
|
| 4 |
colorFrom: green
|
| 5 |
colorTo: green
|
app.py
CHANGED
|
@@ -1,21 +1,34 @@
|
|
| 1 |
"""Run codes."""
|
| 2 |
# pylint: disable=line-too-long, broad-exception-caught, invalid-name, missing-function-docstring, too-many-instance-attributes, missing-class-docstring
|
| 3 |
# ruff: noqa: E501
|
|
|
|
| 4 |
import os
|
| 5 |
import platform
|
| 6 |
import random
|
| 7 |
import time
|
| 8 |
-
from dataclasses import asdict, dataclass
|
| 9 |
from pathlib import Path
|
|
|
|
|
|
|
| 10 |
|
| 11 |
# from types import SimpleNamespace
|
| 12 |
import gradio as gr
|
| 13 |
import psutil
|
| 14 |
from about_time import about_time
|
| 15 |
-
from ctransformers import
|
| 16 |
from dl_hf_model import dl_hf_model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
from loguru import logger
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
filename_list = [
|
| 20 |
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q2_K.bin",
|
| 21 |
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q3_K_L.bin",
|
|
@@ -121,6 +134,7 @@ cpu_count: int = int(_) if _ else 1
|
|
| 121 |
logger.debug(f"{cpu_count=}")
|
| 122 |
|
| 123 |
LLM = None
|
|
|
|
| 124 |
|
| 125 |
try:
|
| 126 |
model_loc, file_size = dl_hf_model(url)
|
|
@@ -128,10 +142,15 @@ except Exception as exc_:
|
|
| 128 |
logger.error(exc_)
|
| 129 |
raise SystemExit(1) from exc_
|
| 130 |
|
| 131 |
-
|
| 132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
model_type="llama",
|
| 134 |
-
|
|
|
|
| 135 |
)
|
| 136 |
|
| 137 |
logger.info(f"done load llm {model_loc=} {file_size=}G")
|
|
@@ -143,45 +162,33 @@ except Exception:
|
|
| 143 |
# Windows
|
| 144 |
logger.warning("Windows, cant run time.tzset()")
|
| 145 |
|
| 146 |
-
_ = """
|
| 147 |
-
ns = SimpleNamespace(
|
| 148 |
-
response="",
|
| 149 |
-
generator=(_ for _ in []),
|
| 150 |
-
)
|
| 151 |
-
# """
|
| 152 |
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
llm=LLM,
|
| 170 |
-
config: GenerationConfig = GenerationConfig(),
|
| 171 |
-
):
|
| 172 |
-
"""Run model inference, will return a Generator if streaming is true."""
|
| 173 |
-
# _ = prompt_template.format(question=question)
|
| 174 |
-
# print(_)
|
| 175 |
-
|
| 176 |
-
prompt = prompt_template.format(question=question)
|
| 177 |
-
|
| 178 |
-
return llm(
|
| 179 |
-
prompt,
|
| 180 |
-
**asdict(config),
|
| 181 |
-
)
|
| 182 |
|
|
|
|
|
|
|
|
|
|
| 183 |
|
| 184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
|
| 186 |
|
| 187 |
def user(user_message, history):
|
|
@@ -223,7 +230,7 @@ def bot(history):
|
|
| 223 |
|
| 224 |
logger.debug("about to generate")
|
| 225 |
|
| 226 |
-
config =
|
| 227 |
for elm in generate(user_message, config=config):
|
| 228 |
if flag == 1:
|
| 229 |
logger.debug("in the loop")
|
|
@@ -251,7 +258,7 @@ def predict_api(prompt):
|
|
| 251 |
logger.debug(f"{prompt=}")
|
| 252 |
try:
|
| 253 |
# user_prompt = prompt
|
| 254 |
-
config =
|
| 255 |
temperature=0.2,
|
| 256 |
top_k=10,
|
| 257 |
top_p=0.9,
|
|
@@ -467,4 +474,4 @@ else:
|
|
| 467 |
concurrency_count = 1
|
| 468 |
logger.info(f"{concurrency_count=}")
|
| 469 |
|
| 470 |
-
block.queue(concurrency_count=concurrency_count, max_size=5).launch(debug=True)
|
|
|
|
| 1 |
"""Run codes."""
|
| 2 |
# pylint: disable=line-too-long, broad-exception-caught, invalid-name, missing-function-docstring, too-many-instance-attributes, missing-class-docstring
|
| 3 |
# ruff: noqa: E501
|
| 4 |
+
import gc
|
| 5 |
import os
|
| 6 |
import platform
|
| 7 |
import random
|
| 8 |
import time
|
|
|
|
| 9 |
from pathlib import Path
|
| 10 |
+
from queue import deque
|
| 11 |
+
from typing import Any, Dict, List, Union
|
| 12 |
|
| 13 |
# from types import SimpleNamespace
|
| 14 |
import gradio as gr
|
| 15 |
import psutil
|
| 16 |
from about_time import about_time
|
| 17 |
+
from ctransformers import Config
|
| 18 |
from dl_hf_model import dl_hf_model
|
| 19 |
+
from langchain.callbacks.base import BaseCallbackHandler
|
| 20 |
+
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
| 21 |
+
|
| 22 |
+
# from ctransformers import AutoModelForCausalLM
|
| 23 |
+
from langchain.llms import CTransformers
|
| 24 |
+
from langchain.schema import LLMResult
|
| 25 |
from loguru import logger
|
| 26 |
|
| 27 |
+
deq = deque()
|
| 28 |
+
sig_end = object() # signals the processing is done
|
| 29 |
+
|
| 30 |
+
# from langchain.llms import OpenAI
|
| 31 |
+
|
| 32 |
filename_list = [
|
| 33 |
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q2_K.bin",
|
| 34 |
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q3_K_L.bin",
|
|
|
|
| 134 |
logger.debug(f"{cpu_count=}")
|
| 135 |
|
| 136 |
LLM = None
|
| 137 |
+
gc.collect()
|
| 138 |
|
| 139 |
try:
|
| 140 |
model_loc, file_size = dl_hf_model(url)
|
|
|
|
| 142 |
logger.error(exc_)
|
| 143 |
raise SystemExit(1) from exc_
|
| 144 |
|
| 145 |
+
config = Config()
|
| 146 |
+
config.steam = True
|
| 147 |
+
|
| 148 |
+
# LLM = AutoModelForCausalLM.from_pretrained(
|
| 149 |
+
LLM = CTransformers(
|
| 150 |
+
model=model_loc,
|
| 151 |
model_type="llama",
|
| 152 |
+
threads=cpu_count,
|
| 153 |
+
callbacks=[StreamingStdOutCallbackHandler()],
|
| 154 |
)
|
| 155 |
|
| 156 |
logger.info(f"done load llm {model_loc=} {file_size=}G")
|
|
|
|
| 162 |
# Windows
|
| 163 |
logger.warning("Windows, cant run time.tzset()")
|
| 164 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
|
| 166 |
+
class DequeCallbackHandler(BaseCallbackHandler):
|
| 167 |
+
"""Mediate gradio and stream output."""
|
| 168 |
+
|
| 169 |
+
def __init__(self, deq: deque):
|
| 170 |
+
"""Init deque for FIFO, may need to upgrade to queue.Queue or queue.SimpleQueue."""
|
| 171 |
+
self.q = deq
|
| 172 |
+
|
| 173 |
+
def on_llm_start(
|
| 174 |
+
self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
|
| 175 |
+
) -> None:
|
| 176 |
+
"""Run when LLM starts running. Clean the queue."""
|
| 177 |
+
self.q.clear()
|
| 178 |
+
|
| 179 |
+
def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
|
| 180 |
+
"""Run on new LLM token. Only available when streaming is enabled."""
|
| 181 |
+
self.q.append(token)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
|
| 183 |
+
def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
|
| 184 |
+
"""Run when LLM ends running."""
|
| 185 |
+
self.q.append(sig_end)
|
| 186 |
|
| 187 |
+
def on_llm_error(
|
| 188 |
+
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
| 189 |
+
) -> None:
|
| 190 |
+
"""Run when LLM errors."""
|
| 191 |
+
self.q.put(sig_end)
|
| 192 |
|
| 193 |
|
| 194 |
def user(user_message, history):
|
|
|
|
| 230 |
|
| 231 |
logger.debug("about to generate")
|
| 232 |
|
| 233 |
+
config = Config(reset=True)
|
| 234 |
for elm in generate(user_message, config=config):
|
| 235 |
if flag == 1:
|
| 236 |
logger.debug("in the loop")
|
|
|
|
| 258 |
logger.debug(f"{prompt=}")
|
| 259 |
try:
|
| 260 |
# user_prompt = prompt
|
| 261 |
+
config = Config(
|
| 262 |
temperature=0.2,
|
| 263 |
top_k=10,
|
| 264 |
top_p=0.9,
|
|
|
|
| 474 |
concurrency_count = 1
|
| 475 |
logger.info(f"{concurrency_count=}")
|
| 476 |
|
| 477 |
+
# block.queue(concurrency_count=concurrency_count, max_size=5).launch(debug=True)
|