Spaces:

jvnickerson
/

chat

Runtime error

App Files Files Community

chat / old /ct.py

jvnickerson

Upload folder using huggingface_hub

6417feb verified over 1 year ago

raw

history blame contribute delete

3.7 kB

	#https://raw.githubusercontent.com/rohan-paul/LLM-FineTuning-Large-Language-Models/refs/heads/main/Mixtral_Chatbot_with_Gradio/Mixtral_Chatbot_with_Gradio.py
	from transformers import AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig

	from threading import Thread
	import gradio as gr
	import transformers
	import torch

	# Run the entire app with `python run_mixtral.py`

	""" The messages list should be of the following format:

	messages =

	[
	{"role": "user", "content": "User's first message"},
	{"role": "assistant", "content": "Assistant's first response"},
	{"role": "user", "content": "User's second message"},
	{"role": "assistant", "content": "Assistant's second response"},
	{"role": "user", "content": "User's third message"}
	]

	"""
	""" The `format_chat_history` function below is designed to format the dialogue history into a prompt that can be fed into the Mixtral model. This will help understand the context of the conversation and generate appropriate responses by the Model.
	The function takes a history of dialogues as input, which is a list of lists where each sublist represents a pair of user and assistant messages.
	"""

	def format_chat_history(history) -> str:
	messages = []

	# Add a system message to set the context
	messages.append({"role": "system", "content": "You are a helpful assistant."})

	for i, dialog in enumerate(history):
	if i == 0:
	# For the first interaction, only add the user message
	messages.append({"role": "user", "content": dialog[0]})
	else:
	# For subsequent interactions, add both user and assistant messages
	if dialog[0]: # User message
	messages.append({"role": "user", "content": dialog[0]})
	if dialog[1]: # Assistant message
	messages.append({"role": "assistant", "content": dialog[1]})

	return pipeline.tokenizer.apply_chat_template(
	messages, tokenize=False,
	add_generation_prompt=True)

	def model_loading_pipeline():

	model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
	tokenizer = AutoTokenizer.from_pretrained(model_id)
	streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, Timeout=5)

	pipeline = transformers.pipeline(
	"text-generation",
	model=model_id,
	tokenizer=tokenizer,
	torch_dtype=torch.float16,
	load_in_4bit=True, # or load_in_8bit=True, depending on your preference
	device_map="auto", # This will automatically determine the best device setup
	streamer=streamer
	)
	return pipeline, streamer

	def launch_gradio_app(pipeline, streamer):
	with gr.Blocks() as demo:
	chatbot = gr.Chatbot()
	msg = gr.Textbox()
	clear = gr.Button("Clear")

	def user(user_message, history):
	return "", history + [[user_message, None]]

	def bot(history):
	prompt = format_chat_history(history)

	history[-1][1] = ""
	kwargs = dict(text_inputs=prompt, max_new_tokens=2048, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
	thread = Thread(target=pipeline, kwargs=kwargs)
	thread.start()

	for token in streamer:
	history[-1][1] += token
	yield history

	msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(bot, chatbot, chatbot)
	clear.click(lambda: None, None, chatbot, queue=False)

	demo.queue()
	demo.launch(share=True, debug=True)

	if __name__ == '__main__':
	pipeline, streamer = model_loading_pipeline()
	launch_gradio_app(pipeline, streamer)

	# Run the entire app with `python run_mixtral.py`