Spaces:
Runtime error
Runtime error
| #https://raw.githubusercontent.com/rohan-paul/LLM-FineTuning-Large-Language-Models/refs/heads/main/Mixtral_Chatbot_with_Gradio/Mixtral_Chatbot_with_Gradio.py | |
| from transformers import AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig | |
| from threading import Thread | |
| import gradio as gr | |
| import transformers | |
| import torch | |
| # Run the entire app with `python run_mixtral.py` | |
| """ The messages list should be of the following format: | |
| messages = | |
| [ | |
| {"role": "user", "content": "User's first message"}, | |
| {"role": "assistant", "content": "Assistant's first response"}, | |
| {"role": "user", "content": "User's second message"}, | |
| {"role": "assistant", "content": "Assistant's second response"}, | |
| {"role": "user", "content": "User's third message"} | |
| ] | |
| """ | |
| """ The `format_chat_history` function below is designed to format the dialogue history into a prompt that can be fed into the Mixtral model. This will help understand the context of the conversation and generate appropriate responses by the Model. | |
| The function takes a history of dialogues as input, which is a list of lists where each sublist represents a pair of user and assistant messages. | |
| """ | |
| def format_chat_history(history) -> str: | |
| messages = [] | |
| # Add a system message to set the context | |
| messages.append({"role": "system", "content": "You are a helpful assistant."}) | |
| for i, dialog in enumerate(history): | |
| if i == 0: | |
| # For the first interaction, only add the user message | |
| messages.append({"role": "user", "content": dialog[0]}) | |
| else: | |
| # For subsequent interactions, add both user and assistant messages | |
| if dialog[0]: # User message | |
| messages.append({"role": "user", "content": dialog[0]}) | |
| if dialog[1]: # Assistant message | |
| messages.append({"role": "assistant", "content": dialog[1]}) | |
| return pipeline.tokenizer.apply_chat_template( | |
| messages, tokenize=False, | |
| add_generation_prompt=True) | |
| def model_loading_pipeline(): | |
| model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1" | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, Timeout=5) | |
| pipeline = transformers.pipeline( | |
| "text-generation", | |
| model=model_id, | |
| tokenizer=tokenizer, | |
| torch_dtype=torch.float16, | |
| load_in_4bit=True, # or load_in_8bit=True, depending on your preference | |
| device_map="auto", # This will automatically determine the best device setup | |
| streamer=streamer | |
| ) | |
| return pipeline, streamer | |
| def launch_gradio_app(pipeline, streamer): | |
| with gr.Blocks() as demo: | |
| chatbot = gr.Chatbot() | |
| msg = gr.Textbox() | |
| clear = gr.Button("Clear") | |
| def user(user_message, history): | |
| return "", history + [[user_message, None]] | |
| def bot(history): | |
| prompt = format_chat_history(history) | |
| history[-1][1] = "" | |
| kwargs = dict(text_inputs=prompt, max_new_tokens=2048, do_sample=True, temperature=0.7, top_k=50, top_p=0.95) | |
| thread = Thread(target=pipeline, kwargs=kwargs) | |
| thread.start() | |
| for token in streamer: | |
| history[-1][1] += token | |
| yield history | |
| msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(bot, chatbot, chatbot) | |
| clear.click(lambda: None, None, chatbot, queue=False) | |
| demo.queue() | |
| demo.launch(share=True, debug=True) | |
| if __name__ == '__main__': | |
| pipeline, streamer = model_loading_pipeline() | |
| launch_gradio_app(pipeline, streamer) | |
| # Run the entire app with `python run_mixtral.py` |