Spaces:

None1145
/

ChatGLM-CPP

Sleeping

App Files Files Community

ChatGLM-CPP / app.py

None1145

Update app.py

0afeba3 verified about 1 year ago

raw

history blame contribute delete

2.74 kB

	import gradio as gr
	import os
	import time
	from huggingface_hub import InferenceClient
	from huggingface_hub import hf_hub_download
	import chatglm_cpp

	pipeline = None

	def load(repo_id, filename):
	global pipeline
	local_dir = f"./Models/{repo_id}"
	hf_hub_download(repo_id=repo_id, filename=filename, local_dir=local_dir)
	model = os.path.join(local_dir, filename)
	max_length = 8192
	pipeline = chatglm_cpp.Pipeline(model, max_length=max_length)
	return f"Model {filename} from {repo_id} loaded successfully."
	load("None1145/ChatGLM3-6B-Theresa-GGML", "ChatGLM3-6B-Theresa-GGML-Q4_0.bin")

	messages = []

	def respond(
	message,
	history: list[tuple[str, str]],
	system_message,
	max_tokens,
	temperature,
	top_p,
	):
	global messages

	if pipeline is None:
	yield "Error: No model loaded. Please load a model first."
	return

	response = "..."
	for _ in range(0, 3):
	yield response
	time.sleep(1)
	response += " ..."

	generation_kwargs = dict(
	max_length=8192,
	max_context_length=max_tokens,
	do_sample=temperature > 0,
	top_k=0,
	top_p=top_p,
	temperature=temperature,
	repetition_penalty=1.0,
	stream=True,
	)

	if messages == []:
	messages = [chatglm_cpp.ChatMessage(role="system", content=system_message)]

	messages.append(chatglm_cpp.ChatMessage(role="user", content=message))

	response = ""
	for chunk in pipeline.chat(messages, **generation_kwargs):
	response += chunk.content
	yield response

	messages.append(chatglm_cpp.ChatMessage(role="assistant", content=response))

	with gr.Blocks() as chat:
	with gr.Row():
	repo_id_input = gr.Textbox(label="Repo ID", value="None1145/ChatGLM3-6B-Theresa-GGML")
	filename_input = gr.Textbox(label="Filename", value="ChatGLM3-6B-Theresa-GGML-Q4_0.bin")
	load_button = gr.Button("Load Model")
	load_status = gr.Textbox(label="Load Status", interactive=False)
	load_button.click(load, inputs=[repo_id_input, filename_input], outputs=load_status)

	chat_interface = gr.ChatInterface(
	respond,
	additional_inputs=[
	gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
	gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
	gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
	gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-p (nucleus sampling)",
	),
	],
	)

	if __name__ == "__main__":
	chat.launch()