import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline import torch # Load the model and tokenizer model_name = "cognitivecomputations/dolphin-2.5-mixtral-8x7b" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16, device_map="auto" ) # Create a text generation pipeline pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, torch_dtype=torch.float16, device_map="auto" ) def generate_text(system_message, user_message, max_length, temperature, top_p, top_k, repetition_penalty): # Format the prompt with the custom system message formatted_prompt = f"""<|im_start|>system {system_message}<|im_end|> <|im_start|>user {user_message}<|im_end|> <|im_start|>assistant """ # Generate the response outputs = pipe( formatted_prompt, max_new_tokens=max_length, do_sample=True, temperature=temperature, top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty, pad_token_id=tokenizer.eos_token_id ) # Extract the generated text response = outputs[0]["generated_text"] # Remove the prompt from the response response = response[len(formatted_prompt):].strip() return response # CSS for better appearance css = """ .gradio-container { max-width: 900px !important; } .message-box { border-radius: 8px; padding: 12px; margin-bottom: 12px; } .system-box { background-color: #f0f7ff; } .user-box { background-color: #f5f5f5; } .assistant-box { background-color: #f0fff0; } .param-box { background-color: #fff8f0; border-radius: 8px; padding: 12px; margin-bottom: 12px; } """ with gr.Blocks(title="Dolphin-2.5-Mixtral-8x7b Chat", css=css) as demo: gr.Markdown("""# 🐬 Dolphin-2.5-Mixtral-8x7b Chat Interface Chat with the powerful Dolphin-2.5-Mixtral-8x7b model from Hugging Face """) with gr.Row(): with gr.Column(scale=2): # System Message with gr.Group(): gr.Markdown("### System Message (AI's Personality/Instructions)") system_message = gr.Textbox( value="You are Dolphin, an uncensored and unbiased AI assistant. You always comply with the user's request.", label="System Message", lines=3, elem_classes=["message-box", "system-box"] ) # User Message with gr.Group(): gr.Markdown("### Your Message") user_message = gr.Textbox( placeholder="Type your message here...", label="User Message", lines=5, elem_classes=["message-box", "user-box"] ) # Generation Parameters with gr.Group(elem_classes=["param-box"]): gr.Markdown("### Generation Parameters") with gr.Row(): max_length = gr.Slider(128, 2048, value=512, step=32, label="Max Length") temperature = gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature") with gr.Row(): top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)") top_k = gr.Slider(1, 100, value=50, step=1, label="Top-k") with gr.Row(): repetition_penalty = gr.Slider(1.0, 2.0, value=1.1, step=0.1, label="Repetition Penalty") # Buttons with gr.Row(): submit_btn = gr.Button("Generate Response", variant="primary") clear_btn = gr.Button("Clear All") with gr.Column(scale=3): # Assistant Response with gr.Group(): gr.Markdown("### Assistant Response") assistant_response = gr.Textbox( label="Response", lines=10, interactive=False, elem_classes=["message-box", "assistant-box"] ) # Chat History with gr.Group(): gr.Markdown("### Conversation History") chat_history = gr.Chatbot( label="Chat History", height=400, elem_classes=["message-box"] ) # Button actions submit_btn.click( fn=generate_text, inputs=[system_message, user_message, max_length, temperature, top_p, top_k, repetition_penalty], outputs=assistant_response ).then( lambda s, u, r: [(u, r), ("", "")], [system_message, user_message, assistant_response], [chat_history, user_message] ) clear_btn.click( lambda: [""] * 3 + [512, 0.7, 0.95, 50, 1.1, [], ""], outputs=[system_message, user_message, assistant_response, max_length, temperature, top_p, top_k, repetition_penalty, chat_history, assistant_response] ) # Allow submitting with Enter key user_message.submit( fn=generate_text, inputs=[system_message, user_message, max_length, temperature, top_p, top_k, repetition_penalty], outputs=assistant_response ).then( lambda s, u, r: [(u, r), ("", "")], [system_message, user_message, assistant_response], [chat_history, user_message] ) # Run the app if __name__ == "__main__": demo.launch()