Spaces:

mikeee
/

chatglm2-6b-4bit

Runtime error

App Files Files Community

ffreemt commited on Jul 4, 2023

Commit

1a8bf1d

1 Parent(s): f820383

Update cuda().half(), fix timezone

Browse files

Files changed (1) hide show

app.py +18 -8

app.py CHANGED Viewed

@@ -1,3 +1,6 @@
 # pylint: disable=broad-exception-caught, redefined-outer-name, missing-function-docstring, missing-module-docstring, too-many-arguments, line-too-long, invalid-name, redefined-builtin, redefined-argument-from-local
 # import gradio as gr
@@ -6,17 +9,24 @@
 # %%writefile demo-4bit.py
 from textwrap import dedent
 import gradio as gr
 import mdtex2html
 import torch
 from loguru import logger
-# credit to https://github.com/THUDM/ChatGLM2-6B/blob/main/web_demo.py
-# while mistakes are mine
 from transformers import AutoModel, AutoTokenizer
 model_name = "THUDM/chatglm2-6b"
 # model_name = "THUDM/chatglm2-6b-int4"
@@ -33,9 +43,9 @@ has_cuda = torch.cuda.is_available()
 # has_cuda = False  # force cpu
 if has_cuda:
-    model = AutoModel.from_pretrained(
-        model_name, trust_remote_code=True
-    ).cuda().half()  # 3.92G
 else:
     model = AutoModel.from_pretrained(
         model_name, trust_remote_code=True
@@ -179,7 +189,7 @@ def retry_last_answer(
         history.pop(-1)
     yield from predict(
-        RETRY_FLAG,
         user_input,
         chatbot,
         max_length,
@@ -196,7 +206,7 @@ with gr.Blocks(title="ChatGLM2-6B-int4", theme=gr.themes.Soft(text_size="sm")) a
         """<center><a href="https://huggingface.co/spaces/mikeee/chatglm2-6b-4bit?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>To avoid the queue and for faster inference Duplicate this Space and upgrade to GPU</center>"""
     )
-    with gr.Accordion("Info", open=False):
         _ = """
             ## ChatGLM2-6B-int4

+"""Credit to https://github.com/THUDM/ChatGLM2-6B/blob/main/web_demo.py
+while mistakes are mine
+"""
 # pylint: disable=broad-exception-caught, redefined-outer-name, missing-function-docstring, missing-module-docstring, too-many-arguments, line-too-long, invalid-name, redefined-builtin, redefined-argument-from-local
 # import gradio as gr
 # %%writefile demo-4bit.py
+import os
+import time
 from textwrap import dedent
 import gradio as gr
 import mdtex2html
 import torch
 from loguru import logger
 from transformers import AutoModel, AutoTokenizer
+# fix timezone in Linux
+os.environ["TZ"] = "Asia/Shanghai"
+try:
+    time.tzset()  # type: ignore # pylint: disable=no-member
+except Exception:
+    # Windows
+    logger.warning("Windows, cant run time.tzset()")
 model_name = "THUDM/chatglm2-6b"
 # model_name = "THUDM/chatglm2-6b-int4"
 # has_cuda = False  # force cpu
 if has_cuda:
+    model = (
+        AutoModel.from_pretrained(model_name, trust_remote_code=True).cuda().half()
+    )  # 3.92G
 else:
     model = AutoModel.from_pretrained(
         model_name, trust_remote_code=True
         history.pop(-1)
     yield from predict(
+        RETRY_FLAG,  # type: ignore
         user_input,
         chatbot,
         max_length,
         """<center><a href="https://huggingface.co/spaces/mikeee/chatglm2-6b-4bit?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>To avoid the queue and for faster inference Duplicate this Space and upgrade to GPU</center>"""
     )
+    with gr.Accordion("🎈 Info", open=False):
         _ = """
             ## ChatGLM2-6B-int4