Spaces:

Ctaake
/

FranziBot2

Sleeping

App Files Files Community

Ctaake commited on Apr 11, 2024

Commit

194241b

verified ·

1 Parent(s): 798d29f

Create app.py

Browse files

Files changed (1) hide show

app.py +143 -0

app.py ADDED Viewed

	@@ -0,0 +1,143 @@

+import gradio as gr
+from huggingface_hub import InferenceClient
+from transformers import AutoTokenizer
+import random
+# Model which is used
+checkpoint = "mistralai/Mistral-7B-Instruct-v0.2"
+# Tokenizer to convert into the right format
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+# Inference client with the model (And HF-token if needed)
+client = InferenceClient(checkpoint)
+def format_prompt(message, history, systemPrompt):
+    # Adjusting the format to fit the currently selected model
+    # First everything is converted into role format
+    # First a system prompt
+    messages = []
+    messages.append({"role": "user", "content": systemPrompt})
+    messages.append({"role": "assistant", "content": ""})
+    # Followed by the message history
+    for user_message, bot_message in history:
+        messages.append({"role": "user", "content": user_message})
+        messages.append({"role": "assistant", "content": bot_message})
+    # Followed by the current message
+    messages.append({"role": "user", "content": message})
+    # The tokenizer converts into the model format
+    messages = tokenizer.apply_chat_template(messages, tokenize=False)
+    return messages
+def inference(message, history, systemPrompt, temperature, maxTokens, topP, repPenalty):
+    # Updating the settings for the generation
+    client_settings = dict(
+        temperature=temperature,
+        max_new_tokens=maxTokens,
+        top_p=topP,
+        repetition_penalty=repPenalty,
+        do_sample=True,
+        stream=True,
+        details=True,
+        return_full_text=False,
+        seed=random.randint(0, 999999999),
+    )
+    # Generating the response by passing the prompt in right format plus the client settings
+    stream = client.text_generation(format_prompt(message, history, systemPrompt),
+                                    **client_settings)
+    # Reading the stream
+    partial_response = ""
+    for stream_part in stream:
+        partial_response += stream_part.token.text
+        yield partial_response
+myAdditionalInputs = [
+    gr.Textbox(
+        label="System Prompt",
+        max_lines=500,
+        lines=10,
+        interactive=True,
+        value="You are a friendly girl who doesn't answer unnecessarily long."
+    ),
+    gr.Slider(
+        label="Temperature",
+        value=0.9,
+        minimum=0.0,
+        maximum=1.0,
+        step=0.05,
+        interactive=True,
+        info="Higher values produce more diverse outputs",
+    ),
+    gr.Slider(
+        label="Max new tokens",
+        value=256,
+        minimum=0,
+        maximum=1048,
+        step=64,
+        interactive=True,
+        info="The maximum numbers of new tokens",
+    ),
+    gr.Slider(
+        label="Top-p (nucleus sampling)",
+        value=0.9,
+        minimum=0.0,
+        maximum=1,
+        step=0.05,
+        interactive=True,
+        info="Higher values sample more low-probability tokens",
+    ),
+    gr.Slider(
+        label="Repetition penalty",
+        value=1.1,
+        minimum=1.0,
+        maximum=2.0,
+        step=0.05,
+        interactive=True,
+        info="Penalize repeated tokens",
+    )
+]
+myChatbot = gr.Chatbot(avatar_images=["./ava_m.png", "./ava_f.png"],
+                       bubble_full_width=False,
+                       show_label=False,
+                       show_copy_button=False,
+                       likeable=False)
+myTextInput = gr.Textbox(lines=2,
+                         max_lines=2,
+                         placeholder="Send a message",
+                         container=False,
+                         scale=7)
+myTheme = gr.themes.Soft(primary_hue=gr.themes.colors.fuchsia,
+                         secondary_hue=gr.themes.colors.fuchsia,
+                         spacing_size="sm",
+                         radius_size="md")
+mySubmitButton = gr.Button(value="SEND",
+                           variant='primary')
+myRetryButton = gr.Button(value="RETRY",
+                          variant='secondary',
+                          size="sm")
+myUndoButton = gr.Button(value="UNDO",
+                         variant='secondary',
+                         size="sm")
+myClearButton = gr.Button(value="CLEAR",
+                          variant='secondary',
+                          size="sm")
+gr.ChatInterface(
+    inference,
+    chatbot=myChatbot,
+    textbox=myTextInput,
+    title="My chat bot",
+    theme=myTheme,
+    additional_inputs=myAdditionalInputs,
+    submit_btn=mySubmitButton,
+    stop_btn="STOP",
+    retry_btn=myRetryButton,
+    undo_btn=myUndoButton,
+    clear_btn=myClearButton,
+).queue().launch(show_api=False)