Spaces:

None1145
/

ChatGLM-CPP

Sleeping

None1145 commited on Nov 26, 2024

Commit

79a34ec

verified ·

1 Parent(s): e49477a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -21,15 +21,10 @@ For more information on `huggingface_hub` Inference API support, please check th
 repo_id = "None1145/ChatGLM3-6B-Theresa-GGML"
 filename = "ChatGLM3-6B-Theresa-GGML-Q4_0.bin"
-hf_hub_download(repo_id=repo_id, filename=filename, local_dir="./Models")
-list_files_tree("./Models")
-import time
-time.sleep(10)
 pipeline = chatglm_cpp.Pipeline(model, max_length=max_length)
 def respond(
     message,
     history: list[tuple[str, str]],
@@ -38,30 +33,36 @@ def respond(
     temperature,
     top_p,
 ):
-    messages = [{"role": "system", "content": system_message}]
     for val in history:
         if val[0]:
-            messages.append({"role": "user", "content": val[0]})
         if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
     response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
         yield response
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface

 repo_id = "None1145/ChatGLM3-6B-Theresa-GGML"
 filename = "ChatGLM3-6B-Theresa-GGML-Q4_0.bin"
+hf_hub_download(repo_id=repo_id, filename=filename, local_dir=f"./Models/{repo_id}")
+model = f"./Models/{repo_id}/{filename}"
 pipeline = chatglm_cpp.Pipeline(model, max_length=max_length)
 def respond(
     message,
     history: list[tuple[str, str]],
     temperature,
     top_p,
 ):
+    generation_kwargs = dict(
+        max_length=max_length,
+        max_context_length=max_tokens,
+        do_sample=temperature > 0,
+        top_k=0,
+        top_p=top_p,
+        temperature=temperature,
+        repetition_penalty=1.0,
+        stream=True,
+    )
+    messages = [chatglm_cpp.ChatMessage(role="system", content=system_message)]
     for val in history:
         if val[0]:
+            messages.append(chatglm_cpp.ChatMessage(role="user", content=val[0]))
         if val[1]:
+            messages.append(chatglm_cpp.ChatMessage(role="assistant", content=val[0]))
+    messages.append(chatglm_cpp.ChatMessage(role="user", content=message))
     response = ""
+    for chunk in pipeline.chat(messages, **generation_kwargs)
+        response += chunk.content
+        chunks.append(chunk)
         yield response
+    messages.append(chatglm_cpp.ChatMessage(role="assistant", content=response))
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface