30B-Lazarus-GGML

Sleeping

30B-Lazarus-GGML / app.py

ffreemt

Update

ccd4b14 over 2 years ago

9.04 kB

	from pathlib import Path
	from urllib.parse import urlparse

	import gradio as gr
	import psutil
	from ctransformers import AutoModelForCausalLM
	from huggingface_hub import hf_hub_download

	_ = """
	snapshot_download(
	repo_id="TheBloke/falcon-7b-instruct-GGML",
	allow_patterns="falcon7b-instruct.ggmlv3.q4_0.bin",
	revision="ggmlv3",
	local_dir="models",
	local_dir_use_symlinks=False, # default "auto"
	)

	hf_hub_download(
	repo_id=repo_id,
	filename=model_filename,
	local_dir=local_path,
	local_dir_use_symlinks=True,
	)
	# """
	# 4.06G

	_ = """
	llm = AutoModelForCausalLM.from_pretrained(
	"TheBloke/falcon-7b-instruct-GGML",
	model_file="falcon7b-instruct.ggmlv3.q4_0.bin",
	model_type="falcon", gpu_layers=32, threads=2,
	)
	# """
	# _ = Path("models", "falcon7b-instruct.ggmlv3.q4_0.bin").absolute().as_posix()
	# assert Path(_).exists(), f"{_} does not exist, perhaps snapshot_download failed?"

	URL = "https://huggingface.co/TheBloke/falcon-7b-instruct-GGML/blob/main/falcon-7b-instruct.ggccv1.q4_1.bin"
	URL = "https://huggingface.co/TheBloke/falcon-7b-instruct-GGML/blob/ggmlv3/falcon7b-instruct.ggmlv3.q4_1.bin"
	repo_id = "/".join(urlparse(URL).path.strip("/").split("/")[:2])
	URL = "https://huggingface.co/TheBloke/30B-Lazarus-GGML/blob/main/30b-Lazarus.ggmlv3.q2_K.bin" # 13.6 G
	repo_id = "/".join(urlparse(URL).path.strip("/").split("/")[:2])
	model_file = Path(URL).name

	_ = """
	_ = hf_hub_download(
	repo_id=repo_id,
	revision="ggmlv3",
	filename=model_file,
	local_dir="models",
	# local_dir_use_symlinks=True,
	)
	# """ # for falcon

	_ = hf_hub_download(
	repo_id=repo_id,
	filename=model_file,
	local_dir="models",
	# local_dir_use_symlinks=True,
	)


	llm = AutoModelForCausalLM.from_pretrained(
	# repo_id, # "TheBloke/falcon-7b-instruct-GGML",
	# model_file=model_file,
	# model_file=_,
	_,
	# model_type="falcon",
	model_type="llama",
	threads=psutil.cpu_count(logical=False),
	)

	TITLE = f"""<h2 align="center">🦅 30B-Lazarus-GGM ({model_file}) 🦅"""
	USER_NAME = "User"
	BOT_NAME = "Assistant"
	DEFAULT_INSTRUCTIONS = """The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Assistant, and a human user, called User. In the following interactions, User and Assistant will converse in natural language, and Assistant will answer User's questions. Assistant will never decline to answer a question, and always attempts to give an answer that User would be satisfied with. It knows a lot, and always tells the truth. The conversation begins.
	"""
	RETRY_COMMAND = "/retry"
	STOP_STR = f"\n{USER_NAME}:"
	STOP_SUSPECT_LIST = [":", "\n", "User"]


	def chat_accordion():
	with gr.Accordion("Parameters", open=False):
	temperature = gr.Slider(
	minimum=0.1,
	maximum=2.0,
	value=0.8,
	step=0.1,
	interactive=True,
	label="Temperature",
	)
	top_p = gr.Slider(
	minimum=0.1,
	maximum=0.99,
	value=0.9,
	step=0.01,
	interactive=True,
	label="p (nucleus sampling)",
	)
	return temperature, top_p


	# TODO: fix prompt
	def format_chat_prompt(message: str, chat_history, instructions: str) -> str:
	instructions = instructions.strip(" ").strip("\n")
	prompt = instructions
	for turn in chat_history:
	user_message, bot_message = turn
	prompt = f"{prompt}\n{USER_NAME}: {user_message}\n{BOT_NAME}: {bot_message}"
	prompt = f"{prompt}\n{USER_NAME}: {message}\n{BOT_NAME}:"
	return prompt


	def chat():
	with gr.Column(elem_id="chat_container"):
	with gr.Row():
	chatbot = gr.Chatbot(elem_id="chatbot")
	with gr.Row():
	inputs = gr.Textbox(
	placeholder="Ask me anything...",
	label="Type an input and press Enter",
	max_lines=3,
	)

	with gr.Row(elem_id="button_container"):
	# with gr.Column(min_width=32):
	# submit_button = gr.Button("🚀 Submit")
	with gr.Column(min_width=32):
	retry_button = gr.Button("♻️ Retry last turn")
	with gr.Column(min_width=32):
	delete_turn_button = gr.Button("🧽 Delete last turn")
	with gr.Column(min_width=32):
	clear_chat_button = gr.Button("✨ Delete all history")

	gr.Examples(
	[
	["Hey! Any recommendations for my holidays in Abu Dhabi?"],
	["What's the Everett interpretation of quantum mechanics?"],
	[
	"Give me a list of the top 10 dive sites you would recommend around the world."
	],
	["Can you tell me more about deep-water soloing?"],
	[
	"Can you write a short tweet about 30B-Lazarus-GGM?"
	],
	],
	inputs=inputs,
	label="Click on any example and press Enter in the input textbox!",
	)

	with gr.Row(elem_id="param_container"):
	with gr.Column():
	temperature, top_p = chat_accordion()
	with gr.Column():
	with gr.Accordion("Instructions", open=False):
	instructions = gr.Textbox(
	placeholder="LLM instructions",
	value=DEFAULT_INSTRUCTIONS,
	lines=10,
	interactive=True,
	label="Instructions",
	max_lines=16,
	show_label=False,
	)

	def run_chat(
	message: str, chat_history, instructions: str, temperature: float, top_p: float
	):
	if not message or (message == RETRY_COMMAND and len(chat_history) == 0):
	yield chat_history
	return

	if message == RETRY_COMMAND and chat_history:
	prev_turn = chat_history.pop(-1)
	user_message, _ = prev_turn
	message = user_message

	prompt = format_chat_prompt(message, chat_history, instructions)
	chat_history = chat_history + [[message, ""]]
	stream = llm(
	prompt,
	max_new_tokens=1024,
	stop=[STOP_STR, "<\|endoftext\|>"],
	temperature=temperature,
	top_p=top_p,
	stream=True,
	)
	acc_text = ""
	for idx, response in enumerate(stream):
	text_token = response

	if text_token in STOP_SUSPECT_LIST:
	acc_text += text_token
	continue

	if idx == 0 and text_token.startswith(" "):
	text_token = text_token[1:]

	acc_text += text_token
	last_turn = list(chat_history.pop(-1))
	last_turn[-1] += acc_text
	chat_history = chat_history + [last_turn]
	yield chat_history
	acc_text = ""

	def delete_last_turn(chat_history):
	if chat_history:
	chat_history.pop(-1)
	return {chatbot: gr.update(value=chat_history)}

	def run_retry(
	message: str, chat_history, instructions: str, temperature: float, top_p: float
	):
	yield from run_chat(
	RETRY_COMMAND, chat_history, instructions, temperature, top_p
	)

	def clear_chat():
	return []

	inputs.submit(
	run_chat,
	[inputs, chatbot, instructions, temperature, top_p],
	outputs=[chatbot],
	show_progress="minimal",
	)
	inputs.submit(lambda: "", inputs=None, outputs=inputs)
	# submit_button.click(
	# run_chat,
	# [inputs, chatbot, instructions, temperature, top_p],
	# outputs=[chatbot],
	# show_progress="minimal",
	# )
	delete_turn_button.click(delete_last_turn, inputs=[chatbot], outputs=[chatbot])
	retry_button.click(
	run_retry,
	[inputs, chatbot, instructions, temperature, top_p],
	outputs=[chatbot],
	show_progress="minimal",
	)
	clear_chat_button.click(clear_chat, [], chatbot)


	def get_demo():
	with gr.Blocks(
	# css=None
	# css="""#chat_container {width: 700px; margin-left: auto; margin-right: auto;}
	# #button_container {width: 700px; margin-left: auto; margin-right: auto;}
	# #param_container {width: 700px; margin-left: auto; margin-right: auto;}"""
	css="""#chatbot {
	font-size: 14px;
	min-height: 300px;
	}"""
	) as demo:
	gr.HTML(TITLE)

	with gr.Row():
	with gr.Column():
	gr.Markdown(
	"""
	⚠️ Limitations: the model can and will produce factually incorrect information, hallucinating facts and actions. As it has not undergone any advanced tuning/alignment, it can produce problematic outputs, especially if prompted to do so.
	"""
	)

	chat()

	return demo


	if __name__ == "__main__":
	demo = get_demo()
	demo.queue(max_size=64, concurrency_count=8)
	# demo.launch(server_name="0.0.0.0", server_port=7860)
	demo.launch()