Spaces:
Runtime error
Runtime error
ffreemt
commited on
Commit
·
1a8bf1d
1
Parent(s):
f820383
Update cuda().half(), fix timezone
Browse files
app.py
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# pylint: disable=broad-exception-caught, redefined-outer-name, missing-function-docstring, missing-module-docstring, too-many-arguments, line-too-long, invalid-name, redefined-builtin, redefined-argument-from-local
|
| 2 |
# import gradio as gr
|
| 3 |
|
|
@@ -6,17 +9,24 @@
|
|
| 6 |
|
| 7 |
# %%writefile demo-4bit.py
|
| 8 |
|
|
|
|
|
|
|
| 9 |
from textwrap import dedent
|
| 10 |
|
| 11 |
import gradio as gr
|
| 12 |
import mdtex2html
|
| 13 |
import torch
|
| 14 |
from loguru import logger
|
| 15 |
-
|
| 16 |
-
# credit to https://github.com/THUDM/ChatGLM2-6B/blob/main/web_demo.py
|
| 17 |
-
# while mistakes are mine
|
| 18 |
from transformers import AutoModel, AutoTokenizer
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
model_name = "THUDM/chatglm2-6b"
|
| 21 |
# model_name = "THUDM/chatglm2-6b-int4"
|
| 22 |
|
|
@@ -33,9 +43,9 @@ has_cuda = torch.cuda.is_available()
|
|
| 33 |
# has_cuda = False # force cpu
|
| 34 |
|
| 35 |
if has_cuda:
|
| 36 |
-
model =
|
| 37 |
-
model_name, trust_remote_code=True
|
| 38 |
-
)
|
| 39 |
else:
|
| 40 |
model = AutoModel.from_pretrained(
|
| 41 |
model_name, trust_remote_code=True
|
|
@@ -179,7 +189,7 @@ def retry_last_answer(
|
|
| 179 |
history.pop(-1)
|
| 180 |
|
| 181 |
yield from predict(
|
| 182 |
-
RETRY_FLAG,
|
| 183 |
user_input,
|
| 184 |
chatbot,
|
| 185 |
max_length,
|
|
@@ -196,7 +206,7 @@ with gr.Blocks(title="ChatGLM2-6B-int4", theme=gr.themes.Soft(text_size="sm")) a
|
|
| 196 |
"""<center><a href="https://huggingface.co/spaces/mikeee/chatglm2-6b-4bit?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>To avoid the queue and for faster inference Duplicate this Space and upgrade to GPU</center>"""
|
| 197 |
)
|
| 198 |
|
| 199 |
-
with gr.Accordion("Info", open=False):
|
| 200 |
_ = """
|
| 201 |
## ChatGLM2-6B-int4
|
| 202 |
|
|
|
|
| 1 |
+
"""Credit to https://github.com/THUDM/ChatGLM2-6B/blob/main/web_demo.py
|
| 2 |
+
while mistakes are mine
|
| 3 |
+
"""
|
| 4 |
# pylint: disable=broad-exception-caught, redefined-outer-name, missing-function-docstring, missing-module-docstring, too-many-arguments, line-too-long, invalid-name, redefined-builtin, redefined-argument-from-local
|
| 5 |
# import gradio as gr
|
| 6 |
|
|
|
|
| 9 |
|
| 10 |
# %%writefile demo-4bit.py
|
| 11 |
|
| 12 |
+
import os
|
| 13 |
+
import time
|
| 14 |
from textwrap import dedent
|
| 15 |
|
| 16 |
import gradio as gr
|
| 17 |
import mdtex2html
|
| 18 |
import torch
|
| 19 |
from loguru import logger
|
|
|
|
|
|
|
|
|
|
| 20 |
from transformers import AutoModel, AutoTokenizer
|
| 21 |
|
| 22 |
+
# fix timezone in Linux
|
| 23 |
+
os.environ["TZ"] = "Asia/Shanghai"
|
| 24 |
+
try:
|
| 25 |
+
time.tzset() # type: ignore # pylint: disable=no-member
|
| 26 |
+
except Exception:
|
| 27 |
+
# Windows
|
| 28 |
+
logger.warning("Windows, cant run time.tzset()")
|
| 29 |
+
|
| 30 |
model_name = "THUDM/chatglm2-6b"
|
| 31 |
# model_name = "THUDM/chatglm2-6b-int4"
|
| 32 |
|
|
|
|
| 43 |
# has_cuda = False # force cpu
|
| 44 |
|
| 45 |
if has_cuda:
|
| 46 |
+
model = (
|
| 47 |
+
AutoModel.from_pretrained(model_name, trust_remote_code=True).cuda().half()
|
| 48 |
+
) # 3.92G
|
| 49 |
else:
|
| 50 |
model = AutoModel.from_pretrained(
|
| 51 |
model_name, trust_remote_code=True
|
|
|
|
| 189 |
history.pop(-1)
|
| 190 |
|
| 191 |
yield from predict(
|
| 192 |
+
RETRY_FLAG, # type: ignore
|
| 193 |
user_input,
|
| 194 |
chatbot,
|
| 195 |
max_length,
|
|
|
|
| 206 |
"""<center><a href="https://huggingface.co/spaces/mikeee/chatglm2-6b-4bit?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>To avoid the queue and for faster inference Duplicate this Space and upgrade to GPU</center>"""
|
| 207 |
)
|
| 208 |
|
| 209 |
+
with gr.Accordion("🎈 Info", open=False):
|
| 210 |
_ = """
|
| 211 |
## ChatGLM2-6B-int4
|
| 212 |
|