yibolu
commited on
Commit
·
308345c
1
Parent(s):
34d9abf
add missing files, fix readme and add docker file
Browse files- .gitattributes +1 -0
- Dockerfile +11 -0
- README.md +12 -1
- demo.py +6 -4
- faster_chat_glm/glm.cpython-38-x86_64-linux-gnu.so +0 -0
- {faster_chat_glm → lyraChatGLM}/__init__.py +3 -0
- {faster_chat_glm → lyraChatGLM}/__init__.py~ +0 -0
- lyraChatGLM/glm.cpython-38-x86_64-linux-gnu.so +3 -0
- lyraChatGLM/libnvinfer_plugin.so +3 -0
- {faster_chat_glm → lyraChatGLM}/model.py +0 -0
- requirements.txt +4 -0
.gitattributes
CHANGED
|
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 35 |
models/glm6b-kv-cache-dy-bs8.ftm filter=lfs diff=lfs merge=lfs -text
|
| 36 |
models/glm6b-bs8.ftm filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 35 |
models/glm6b-kv-cache-dy-bs8.ftm filter=lfs diff=lfs merge=lfs -text
|
| 36 |
models/glm6b-bs8.ftm filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
*.so filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM nvcr.io/nvidia/pytorch:23.02-py3
|
| 2 |
+
|
| 3 |
+
WORKDIR /workdir
|
| 4 |
+
|
| 5 |
+
COPY requirements.txt /workdir/
|
| 6 |
+
|
| 7 |
+
# since installing icetk will install protobuf 3.18.3, and we need protobuf==3.20.3
|
| 8 |
+
RUN pip install -r requirements.txt && \
|
| 9 |
+
pip install protobuf==3.20.3
|
| 10 |
+
|
| 11 |
+
|
README.md
CHANGED
|
@@ -35,6 +35,17 @@ Among its main features are:
|
|
| 35 |
|
| 36 |
- **Repository:** [https://huggingface.co/THUDM/chatglm-6b]
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
## Uses
|
| 39 |
|
| 40 |
```python
|
|
@@ -86,7 +97,7 @@ print(res)
|
|
| 86 |
## Citation
|
| 87 |
``` bibtex
|
| 88 |
@Misc{lyraChatGLM2023,
|
| 89 |
-
author = {Kangjian Wu, Zhengtao Wang, Bin Wu},
|
| 90 |
title = {lyraChatGLM: Accelerating ChatGLM by 10x+},
|
| 91 |
howpublished = {\url{https://huggingface.co/TMElyralab/lyraChatGLM}},
|
| 92 |
year = {2023}
|
|
|
|
| 35 |
|
| 36 |
- **Repository:** [https://huggingface.co/THUDM/chatglm-6b]
|
| 37 |
|
| 38 |
+
## Try Demo in 2 fast steps
|
| 39 |
+
|
| 40 |
+
``` bash
|
| 41 |
+
#step 1
|
| 42 |
+
git clone https://huggingface.co/TMElyralab/lyraChatGLM
|
| 43 |
+
cd lyraChatGLM
|
| 44 |
+
|
| 45 |
+
#step 2
|
| 46 |
+
docker run --gpus=1 --rm --net=host -v ${PWD}:/workdir yibolu96/lyra-chatglm-env:0.0.1 python3 /workdir/demo.py
|
| 47 |
+
```
|
| 48 |
+
|
| 49 |
## Uses
|
| 50 |
|
| 51 |
```python
|
|
|
|
| 97 |
## Citation
|
| 98 |
``` bibtex
|
| 99 |
@Misc{lyraChatGLM2023,
|
| 100 |
+
author = {Kangjian Wu, Zhengtao Wang, Yibo Lu, Bin Wu},
|
| 101 |
title = {lyraChatGLM: Accelerating ChatGLM by 10x+},
|
| 102 |
howpublished = {\url{https://huggingface.co/TMElyralab/lyraChatGLM}},
|
| 103 |
year = {2023}
|
demo.py
CHANGED
|
@@ -1,18 +1,20 @@
|
|
| 1 |
# coding=utf-8
|
| 2 |
|
| 3 |
from transformers import AutoTokenizer
|
| 4 |
-
from
|
|
|
|
| 5 |
|
|
|
|
| 6 |
|
| 7 |
MAX_OUT_LEN = 100
|
| 8 |
-
chatglm6b_dir =
|
| 9 |
tokenizer = AutoTokenizer.from_pretrained(chatglm6b_dir, trust_remote_code=True)
|
| 10 |
input_str = ["为什么我们需要对深度学习模型加速?", ]
|
| 11 |
inputs = tokenizer(input_str, return_tensors="pt", padding=True)
|
| 12 |
input_ids = inputs.input_ids.to('cuda:0')
|
| 13 |
|
|
|
|
| 14 |
|
| 15 |
-
plan_path = './models/glm6b-bs8.ftm'
|
| 16 |
# kernel for chat model.
|
| 17 |
kernel = GLM6B(plan_path=plan_path,
|
| 18 |
batch_size=1,
|
|
@@ -24,7 +26,7 @@ kernel = GLM6B(plan_path=plan_path,
|
|
| 24 |
vocab_size=150528,
|
| 25 |
max_seq_len=MAX_OUT_LEN)
|
| 26 |
|
| 27 |
-
chat = FasterChatGLM(model_dir=
|
| 28 |
|
| 29 |
# generate
|
| 30 |
sample_output = chat.generate(inputs=input_ids, max_length=MAX_OUT_LEN)
|
|
|
|
| 1 |
# coding=utf-8
|
| 2 |
|
| 3 |
from transformers import AutoTokenizer
|
| 4 |
+
from lyraChatGLM import GLM6B, FasterChatGLM
|
| 5 |
+
import os
|
| 6 |
|
| 7 |
+
current_workdir = os.path.dirname(__file__)
|
| 8 |
|
| 9 |
MAX_OUT_LEN = 100
|
| 10 |
+
chatglm6b_dir = os.path.join(current_workdir, "models")
|
| 11 |
tokenizer = AutoTokenizer.from_pretrained(chatglm6b_dir, trust_remote_code=True)
|
| 12 |
input_str = ["为什么我们需要对深度学习模型加速?", ]
|
| 13 |
inputs = tokenizer(input_str, return_tensors="pt", padding=True)
|
| 14 |
input_ids = inputs.input_ids.to('cuda:0')
|
| 15 |
|
| 16 |
+
plan_path = os.path.join(current_workdir, "models/glm6b-bs8.ftm")
|
| 17 |
|
|
|
|
| 18 |
# kernel for chat model.
|
| 19 |
kernel = GLM6B(plan_path=plan_path,
|
| 20 |
batch_size=1,
|
|
|
|
| 26 |
vocab_size=150528,
|
| 27 |
max_seq_len=MAX_OUT_LEN)
|
| 28 |
|
| 29 |
+
chat = FasterChatGLM(model_dir=chatglm6b_dir, kernel=kernel).half().cuda()
|
| 30 |
|
| 31 |
# generate
|
| 32 |
sample_output = chat.generate(inputs=input_ids, max_length=MAX_OUT_LEN)
|
faster_chat_glm/glm.cpython-38-x86_64-linux-gnu.so
DELETED
|
Binary file (188 kB)
|
|
|
{faster_chat_glm → lyraChatGLM}/__init__.py
RENAMED
|
@@ -1,5 +1,8 @@
|
|
| 1 |
import os
|
|
|
|
| 2 |
|
|
|
|
|
|
|
| 3 |
os.environ["TORCH_USE_RTLD_GLOBAL"]="YES"
|
| 4 |
|
| 5 |
import torch
|
|
|
|
| 1 |
import os
|
| 2 |
+
import ctypes
|
| 3 |
|
| 4 |
+
current_workdir = os.path.dirname(__file__)
|
| 5 |
+
ctypes.cdll.LoadLibrary(os.path.join(current_workdir, "libnvinfer_plugin.so"))
|
| 6 |
os.environ["TORCH_USE_RTLD_GLOBAL"]="YES"
|
| 7 |
|
| 8 |
import torch
|
{faster_chat_glm → lyraChatGLM}/__init__.py~
RENAMED
|
File without changes
|
lyraChatGLM/glm.cpython-38-x86_64-linux-gnu.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:feaeb19a7b780cdb669066bb096726d23f0c3ed401fe2f71adf12c66960c0d07
|
| 3 |
+
size 188432
|
lyraChatGLM/libnvinfer_plugin.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0a87eb31795009c545422ef978f607d97be5454c68f09cb829352c0529d1ba8b
|
| 3 |
+
size 235256088
|
{faster_chat_glm → lyraChatGLM}/model.py
RENAMED
|
File without changes
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
icetk
|
| 2 |
+
torch
|
| 3 |
+
transformers
|
| 4 |
+
|