Spaces:

Ctaake
/

FranziBot2

Sleeping

App Files Files Community

Ctaake commited on Apr 12, 2024

Commit

d60afe0

verified ·

1 Parent(s): 19e144a

Return to full

Browse files

Files changed (1) hide show

app.py +134 -6

app.py CHANGED Viewed

@@ -1,9 +1,137 @@
-import os
 import gradio as gr
-read_key = os.environ.get("HF_TOKEN")
-with gr.Blocks() as demo:
-    gr.load("Ctaake/FranziBotPrivate", hf_token=read_key, src="spaces")
-demo.queue()
-demo.launch()

 import gradio as gr
+from huggingface_hub import InferenceClient
+import random
+from transformers import AutoTokenizer
+from mySystemPrompt import SYSTEM_PROMPT
+# Model which is used
+checkpoint = "mistralai/Mistral-7B-Instruct-v0.2"
+# Inference client with the model (And HF-token if needed)
+client = InferenceClient(checkpoint)
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+# Tokenizer chat template correction(Only works for mistral models)
+chat_template = open("mistral-instruct.jinja").read()
+chat_template = chat_template.replace('    ', '').replace('\n', '')
+tokenizer.chat_template = chat_template
+def format_prompt(message,chatbot,system_prompt):
+    messages = [{"role": "system","content": system_prompt}]
+    for user_message,bot_message in chatbot:
+        messages.append({"role": "user", "content":user_message})
+        messages.append({"role": "assistant", "content":bot_message})
+    messages.append({"role": "user", "content":message})
+    tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True, return_tensors="pt")
+    return tokenized_chat
+def inference(message, history, systemPrompt=SYSTEM_PROMPT, temperature=0.9, maxTokens=512, topP=0.9, repPenalty=1.1):
+    # Updating the settings for the generation
+    client_settings = dict(
+        temperature=temperature,
+        max_new_tokens=maxTokens,
+        top_p=topP,
+        repetition_penalty=repPenalty,
+        do_sample=True,
+        stream=True,
+        details=True,
+        return_full_text=False,
+        seed=random.randint(0, 999999999),
+    )
+    # Generating the response by passing the prompt in right format plus the client settings
+    stream = client.text_generation(format_prompt(message, history, systemPrompt),
+                                    **client_settings)
+    # Reading the stream
+    partial_response = ""
+    for stream_part in stream:
+        partial_response += stream_part.token.text
+        yield partial_response
+myAdditionalInputs = [
+    gr.Textbox(
+        label="System Prompt",
+        max_lines=500,
+        lines=10,
+        interactive=True,
+        value="You are a friendly girl who doesn't answer unnecessarily long."
+    ),
+    gr.Slider(
+        label="Temperature",
+        value=0.9,
+        minimum=0.0,
+        maximum=1.0,
+        step=0.05,
+        interactive=True,
+        info="Higher values produce more diverse outputs",
+    ),
+    gr.Slider(
+        label="Max new tokens",
+        value=256,
+        minimum=0,
+        maximum=1048,
+        step=64,
+        interactive=True,
+        info="The maximum numbers of new tokens",
+    ),
+    gr.Slider(
+        label="Top-p (nucleus sampling)",
+        value=0.9,
+        minimum=0.0,
+        maximum=1,
+        step=0.05,
+        interactive=True,
+        info="Higher values sample more low-probability tokens",
+    ),
+    gr.Slider(
+        label="Repetition penalty",
+        value=1.1,
+        minimum=1.0,
+        maximum=2.0,
+        step=0.05,
+        interactive=True,
+        info="Penalize repeated tokens",
+    )
+]
+myChatbot = gr.Chatbot(avatar_images=["./ava_m.png", "./avatar_franzi.jpg"],
+                       bubble_full_width=False,
+                       show_label=False,
+                       show_copy_button=False,
+                       likeable=False)
+myTextInput = gr.Textbox(lines=2,
+                         max_lines=2,
+                         placeholder="Send a message",
+                         container=False,
+                         scale=7)
+myTheme = gr.themes.Soft(primary_hue=gr.themes.colors.fuchsia,
+                         secondary_hue=gr.themes.colors.fuchsia,
+                         spacing_size="sm",
+                         radius_size="md")
+mySubmitButton = gr.Button(value="SEND",
+                           variant='primary')
+myRetryButton = gr.Button(value="RETRY",
+                          variant='secondary',
+                          size="sm")
+myUndoButton = gr.Button(value="UNDO",
+                         variant='secondary',
+                         size="sm")
+myClearButton = gr.Button(value="CLEAR",
+                          variant='secondary',
+                          size="sm")
+gr.ChatInterface(
+    inference,
+    chatbot=myChatbot,
+    textbox=myTextInput,
+    title="FRANZI-Bot",
+    theme=myTheme,
+    #additional_inputs=myAdditionalInputs,
+    submit_btn=mySubmitButton,
+    stop_btn="STOP",
+    retry_btn=myRetryButton,
+    undo_btn=myUndoButton,
+    clear_btn=myClearButton,
+).queue().launch(show_api=False)