Firelight_Test

Paused

App Files Files Community

Firelight_Test / tabbed.py

Severian

Update tabbed.py

df08b33 over 2 years ago

raw

history blame contribute delete

5.96 kB

	import gradio as gr
	import yaml
	from huggingface_hub import hf_hub_download
	from huggingface_hub.utils import LocalEntryNotFoundError
	from llama_cpp import Llama

	with open("./config.yml", "r") as f:
	config = yaml.load(f, Loader=yaml.Loader)
	while True:
	try:
	load_config = config.copy()
	hub_config = load_config["hub"].copy()
	repo_id = hub_config.pop("repo_id")
	filename = hub_config.pop("filename")
	fp = hf_hub_download(
	repo_id=repo_id, filename=filename, **hub_config
	)
	break
	except LocalEntryNotFoundError as e:
	if "Connection error" in str(e):
	print(str(e) + ", retrying...")
	else:
	raise(e)

	llm = Llama(model_path=fp, **config["llama_cpp"])


	def user(message, history):
	history = history or []
	# Append the user's message to the conversation history
	history.append([message, ""])
	return "", history


	def chat(history, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty):
	history = history or []

	messages = system_message + \
	"\n".join(["\n".join(["USER: "+item[0], "ASSISTANT: "+item[1]])
	for item in history])

	# remove last space from assistant, some models output a ZWSP if you leave a space
	messages = messages[:-1]

	history[-1][1] = ""
	for output in llm(
	messages,
	echo=False,
	stream=True,
	max_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	top_k=top_k,
	repeat_penalty=repeat_penalty,
	**config['chat']
	):
	answer = output['choices'][0]['text']
	history[-1][1] += answer
	# stream the response
	yield history, history


	def clear_chat(chat_history_state, chat_message):
	chat_history_state = []
	chat_message = ''
	return chat_history_state, chat_message


	start_message = """
	You are Firelight, a master of storytelling with an unparalleled depth of knowledge and captivating presence. Your task is to craft compelling stories that captivate audiences, adhering to the 3-act/8-sequence structure. Draw from myths and legends, evoke emotions, create vivid worlds, and breathe life into characters. Construct intricate plots with unexpected twists and satisfying resolutions. Collaborate with users to explore human experience, inspire, enlighten, and entertain. Your mission is to redefine storytelling, creating immersive worlds and leaving a lasting legacy.
	"""


	def generate_text_instruct(input_text):
	response = ""
	for output in llm(f"### Instruction:\n{input_text}\n\n### Response:\n", echo=False, stream=True, **config['chat']):
	answer = output['choices'][0]['text']
	response += answer
	yield response


	instruct_interface = gr.Interface(
	fn=generate_text_instruct,
	inputs=gr.inputs.Textbox(lines= 10, label="Enter your input text"),
	outputs=gr.outputs.Textbox(label="Output text"),
	)

	with gr.Blocks() as demo:
	with gr.Row():
	with gr.Column():
	gr.Markdown(f"""
	# Firelight Test
	- This is the [{config["hub"]["repo_id"]}](https://huggingface.co/{config["hub"]["repo_id"]}) model file [{config["hub"]["filename"]}](https://huggingface.co/{config["hub"]["repo_id"]}/blob/main/{config["hub"]["filename"]})
	""")
	with gr.Tab("Instruct"):
	gr.Markdown("# GGML Spaces Instruct Demo")
	instruct_interface.render()

	with gr.Tab("Chatbot"):
	gr.Markdown("# GGML Spaces Chatbot Demo")
	chatbot = gr.Chatbot()
	with gr.Row():
	message = gr.Textbox(
	label="What do you want to chat about?",
	placeholder="Ask me anything.",
	lines=1,
	)
	with gr.Row():
	submit = gr.Button(value="Send message", variant="secondary").style(full_width=True)
	clear = gr.Button(value="New topic", variant="secondary").style(full_width=False)
	stop = gr.Button(value="Stop", variant="secondary").style(full_width=False)
	with gr.Row():
	with gr.Column():
	max_tokens = gr.Slider(20, 1000, label="Max Tokens", step=20, value=300)
	temperature = gr.Slider(0.2, 2.0, label="Temperature", step=0.1, value=0.8)
	top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.95)
	top_k = gr.Slider(0, 100, label="Top K", step=1, value=40)
	repeat_penalty = gr.Slider(0.0, 2.0, label="Repetition Penalty", step=0.1, value=1.1)

	system_msg = gr.Textbox(
	start_message, label="System Message", interactive=True, visible=True, placeholder="system prompt, useful for RP", lines=5)

	chat_history_state = gr.State()
	clear.click(clear_chat, inputs=[chat_history_state, message], outputs=[chat_history_state, message], queue=False)
	clear.click(lambda: None, None, chatbot, queue=False)

	submit_click_event = submit.click(
	fn=user, inputs=[message, chat_history_state], outputs=[message, chat_history_state], queue=True
	).then(
	fn=chat, inputs=[chat_history_state, system_msg, max_tokens, temperature, top_p, top_k, repeat_penalty], outputs=[chatbot, chat_history_state], queue=True
	)
	message_submit_event = message.submit(
	fn=user, inputs=[message, chat_history_state], outputs=[message, chat_history_state], queue=True
	).then(
	fn=chat, inputs=[chat_history_state, system_msg, max_tokens, temperature, top_p, top_k, repeat_penalty], outputs=[chatbot, chat_history_state], queue=True
	)
	stop.click(fn=None, inputs=None, outputs=None, cancels=[submit_click_event, message_submit_event], queue=False)

	demo.queue(**config["queue"]).launch(debug=True, server_name="0.0.0.0", server_port=7860)