# Reference: # https://github.com/li-plus/chatglm.cpp # https://github.com/li-plus/chatglm.cpp/blob/main/examples/web_demo.py import chatglm_cpp import gradio as gr import argparse from pathlib import Path pipeline = chatglm_cpp.Pipeline("./chatglm3-ggml.bin") max_length = 2048 top_p = 0.4 temp = 0.95 max_context_length=512 mode = "chat" top_k = 0 repeat_penalty = 1.0 threads = 0 def postprocess(text): #if args.plain: # return f"
{text}"
return text
def predict(input, chatbot, max_length, top_p, temperature, messages):
chatbot.append((postprocess(input), ""))
messages.append(chatglm_cpp.ChatMessage(role="user", content=input))
generation_kwargs = dict(
max_length=max_length,
max_context_length=max_context_length,
do_sample=temperature > 0,
top_k=top_k,
top_p=top_p,
temperature=temperature,
repetition_penalty=repeat_penalty,
num_threads=threads,
stream=True,
)
response = ""
if mode == "chat":
chunks = []
for chunk in pipeline.chat(messages, **generation_kwargs):
response += chunk.content
chunks.append(chunk)
chatbot[-1] = (chatbot[-1][0], postprocess(response))
yield chatbot, messages
messages.append(pipeline.merge_streaming_messages(chunks))
else:
for chunk in pipeline.generate(input, **generation_kwargs):
response += chunk
chatbot[-1] = (chatbot[-1][0], postprocess(response))
yield chatbot, messages
yield chatbot, messages
def reset_user_input():
return gr.update(value="")
def reset_state():
return [], []
with gr.Blocks() as demo:
gr.HTML("""