| from huggingface_hub import InferenceClient |
| import gradio as gr |
| import os |
|
|
| client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1") |
|
|
| secret_prompt = os.getenv("SECRET_PROMPT") |
|
|
| def format_prompt(new_message, history): |
| prompt = secret_prompt |
| for user_msg, bot_msg in history: |
| prompt += f"[INST] {user_msg} [/INST]" |
| prompt += f" {bot_msg}</s> " |
| prompt += f"[INST] {new_message} [/INST]" |
| return prompt |
|
|
| def generate(prompt, history, |
| temperature=0.25, |
| max_new_tokens=512, |
| top_p=0.95, |
| repetition_penalty=1.0): |
| |
| temperature = float(temperature) |
| |
| if temperature < 1e-2: |
| temperature = 1e-2 |
| |
| top_p = float(top_p) |
|
|
| generate_kwargs = dict( |
| temperature=temperature, |
| max_new_tokens=max_new_tokens, |
| top_p=top_p, |
| repetition_penalty=repetition_penalty, |
| do_sample=True, |
| seed=727, |
| ) |
|
|
| formatted_prompt = format_prompt(prompt, history) |
|
|
| stream = client.text_generation(formatted_prompt, |
| **generate_kwargs, |
| stream=True, |
| details=True, |
| return_full_text=False) |
| output = "" |
|
|
| for response in stream: |
| output += response.token.text |
| yield output |
| return output |
|
|
| |
| ailexchatbot = gr.Chatbot(avatar_images=["./user.png", "./bot.png"], |
| bubble_full_width=False, |
| show_label=False, |
| show_copy_button=True, |
| likeable=True,) |
|
|
| |
| theme = 'syddharth/gray-minimal' |
| demo = gr.ChatInterface(fn=generate, |
| chatbot=ailexchatbot, |
| title="Ailexs Mixtral 8x7b Chat", |
| theme=theme) |
|
|
| demo.queue().launch(show_api=False) |
|
|