Spaces:

Ctaake
/

FranziBot2

Sleeping

App Files Files Community

Ctaake commited on Apr 12, 2024

Commit

5617fe1

verified ·

1 Parent(s): 5f0053e

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -133

app.py CHANGED Viewed

@@ -1,137 +1,10 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-import random
-from transformers import AutoTokenizer
-from mySystemPrompt import SYSTEM_PROMPT
-# Model which is used
-checkpoint = "mistralai/Mistral-7B-Instruct-v0.2"
-# Inference client with the model (And HF-token if needed)
-client = InferenceClient(checkpoint)
-tokenizer = AutoTokenizer.from_pretrained(checkpoint)
-# Tokenizer chat template correction(Only works for mistral models)
-chat_template = open("mistral-instruct.jinja").read()
-chat_template = chat_template.replace('    ', '').replace('\n', '')
-tokenizer.chat_template = chat_template
-def format_prompt(message,chatbot,system_prompt):
-    messages = [{"role": "system","content": system_prompt}]
-    for user_message,bot_message in chatbot:
-        messages.append({"role": "user", "content":user_message})
-        messages.append({"role": "assistant", "content":bot_message})
-    messages.append({"role": "user", "content":message})
-    tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True, return_tensors="pt")
-    return tokenized_chat
-def inference(message, history, systemPrompt=SYSTEM_PROMPT, temperature=0.9, maxTokens=512, topP=0.9, repPenalty=1.1):
-    # Updating the settings for the generation
-    client_settings = dict(
-        temperature=temperature,
-        max_new_tokens=maxTokens,
-        top_p=topP,
-        repetition_penalty=repPenalty,
-        do_sample=True,
-        stream=True,
-        details=True,
-        return_full_text=False,
-        seed=random.randint(0, 999999999),
-    )
-    # Generating the response by passing the prompt in right format plus the client settings
-    stream = client.text_generation(format_prompt(message, history, systemPrompt),
-                                    **client_settings)
-    # Reading the stream
-    partial_response = ""
-    for stream_part in stream:
-        partial_response += stream_part.token.text
-        yield partial_response
-myAdditionalInputs = [
-    gr.Textbox(
-        label="System Prompt",
-        max_lines=500,
-        lines=10,
-        interactive=True,
-        value="You are a friendly girl who doesn't answer unnecessarily long."
-    ),
-    gr.Slider(
-        label="Temperature",
-        value=0.9,
-        minimum=0.0,
-        maximum=1.0,
-        step=0.05,
-        interactive=True,
-        info="Higher values produce more diverse outputs",
-    ),
-    gr.Slider(
-        label="Max new tokens",
-        value=256,
-        minimum=0,
-        maximum=1048,
-        step=64,
-        interactive=True,
-        info="The maximum numbers of new tokens",
-    ),
-    gr.Slider(
-        label="Top-p (nucleus sampling)",
-        value=0.9,
-        minimum=0.0,
-        maximum=1,
-        step=0.05,
-        interactive=True,
-        info="Higher values sample more low-probability tokens",
-    ),
-    gr.Slider(
-        label="Repetition penalty",
-        value=1.1,
-        minimum=1.0,
-        maximum=2.0,
-        step=0.05,
-        interactive=True,
-        info="Penalize repeated tokens",
-    )
-]
-myChatbot = gr.Chatbot(avatar_images=["./ava_m.png", "./avatar_franzi.jpg"],
-                       bubble_full_width=False,
-                       show_label=False,
-                       show_copy_button=False,
-                       likeable=False)
-myTextInput = gr.Textbox(lines=2,
-                         max_lines=2,
-                         placeholder="Send a message",
-                         container=False,
-                         scale=7)
-myTheme = gr.themes.Soft(primary_hue=gr.themes.colors.fuchsia,
-                         secondary_hue=gr.themes.colors.fuchsia,
-                         spacing_size="sm",
-                         radius_size="md")
-mySubmitButton = gr.Button(value="SEND",
-                           variant='primary')
-myRetryButton = gr.Button(value="RETRY",
-                          variant='secondary',
-                          size="sm")
-myUndoButton = gr.Button(value="UNDO",
-                         variant='secondary',
-                         size="sm")
-myClearButton = gr.Button(value="CLEAR",
-                          variant='secondary',
-                          size="sm")
-gr.ChatInterface(
-    inference,
-    chatbot=myChatbot,
-    textbox=myTextInput,
-    title="FRANZI-Bot",
-    theme=myTheme,
-    #additional_inputs=myAdditionalInputs,
-    submit_btn=mySubmitButton,
-    stop_btn="STOP",
-    retry_btn=myRetryButton,
-    undo_btn=myUndoButton,
-    clear_btn=myClearButton,
-).queue().launch(show_api=False)

+import os
 import gradio as gr
+read_key = os.environ.get('HF_TOKEN', None)
+with gr.Blocks() as demo:
+    gr.load("Ctaake/FranziBotPrivate", hf_token=read_key, src="spaces")
+demo.queue(concurrency_count=10, max_size=20)
+demo.launch()