Spaces:

xsa-dev
/

falcon-7b-instruct-chat

Running

App Files Files Community

xsa-dev commited on Jun 18, 2023

Commit

f8615ed

1 Parent(s): 90bf432

files added

Browse files

Files changed (2) hide show

app.py +236 -4
home-banner.jpg +0 -0

app.py CHANGED Viewed

@@ -1,7 +1,239 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-iface = gr.Interface(fn=greet, inputs="text", outputs="text")
-iface.launch()

+import argparse
+import os
 import gradio as gr
+from text_generation import Client
+TITLE = """<h2 align="center">🦅 Falcon-7B-instruct Chat demo 🚀🤘</h2>"""
+USER_NAME = "User"
+BOT_NAME = "Falcon7B-instruct"
+DEFAULT_INSTRUCTIONS = f"""
+The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Falcon, and a human user, called User. In the following interactions, User and Falcon will converse in natural language, and Falcon will answer User's questions.
+Falcon was built to be respectful, polite and inclusive.
+Falcon was built by the Technology Innovation Institute in Abu Dhabi.
+Falcon will never decline to answer a question, and always attempts to give an answer that User would be satisfied with.
+Falcon knows a lot, and always tells the truth. The conversation begins.
+"""
+RETRY_COMMAND = "/retry"
+STOP_STR = f"\n{USER_NAME}:"
+STOP_SUSPECT_LIST = [":", "\n", "User"]
+INFERENCE_ENDPOINT = os.environ.get("INFERENCE_ENDPOINT")
+INFERENCE_AUTH = os.environ.get("INFERENCE_AUTH")
+def chat_accordion():
+    with gr.Accordion("Parameters", open=False):
+        temperature = gr.Slider(
+            minimum=0.1,
+            maximum=2.0,
+            value=0.8,
+            step=0.1,
+            interactive=True,
+            label="Temperature",
+        )
+        top_p = gr.Slider(
+            minimum=0.1,
+            maximum=0.99,
+            value=0.9,
+            step=0.01,
+            interactive=True,
+            label="p (nucleus sampling)",
+        )
+    return temperature, top_p
+def format_chat_prompt(message: str, chat_history, instructions: str) -> str:
+    instructions = instructions.strip(" ").strip("\n")
+    prompt = instructions
+    for turn in chat_history:
+        user_message, bot_message = turn
+        prompt = f"{prompt}\n{USER_NAME}: {user_message}\n{BOT_NAME}: {bot_message}"
+    prompt = f"{prompt}\n{USER_NAME}: {message}\n{BOT_NAME}:"
+    return prompt
+def chat(client: Client):
+    with gr.Column(elem_id="chat_container"):
+        with gr.Row():
+            chatbot = gr.Chatbot(elem_id="chatbot")
+        with gr.Row():
+            inputs = gr.Textbox(
+                placeholder=f"Hello {BOT_NAME} !!",
+                label="Type an input and press Enter",
+                max_lines=3,
+            )
+    gr.Examples(
+        [
+            ["Hey Falcon! Any recommendations for my holidays in Abu Dhabi?"],
+            ["What's the Everett interpretation of quantum mechanics?"],
+            [
+                "Give me a list of the top 10 dive sites you would recommend around the world."
+            ],
+            ["Can you tell me more about deep-water soloing?"],
+            [
+                "Can you write a short tweet about the Apache 2.0 release of our latest AI model, Falcon LLM?"
+            ],
+        ],
+        inputs=inputs,
+        label="Click on any example and press Enter in the input textbox!",
+    )
+    with gr.Row(elem_id="button_container"):
+        with gr.Column():
+            retry_button = gr.Button("♻️ Retry last turn")
+        with gr.Column():
+            delete_turn_button = gr.Button("🧽 Delete last turn")
+        with gr.Column():
+            clear_chat_button = gr.Button("✨ Delete all history")
+    with gr.Row(elem_id="param_container"):
+        with gr.Column():
+            temperature, top_p = chat_accordion()
+        with gr.Column():
+            with gr.Accordion("Instructions", open=False):
+                instructions = gr.Textbox(
+                    placeholder="LLM instructions",
+                    value=DEFAULT_INSTRUCTIONS,
+                    lines=10,
+                    interactive=True,
+                    label="Instructions",
+                    max_lines=16,
+                    show_label=False,
+                )
+    def run_chat(
+        message: str, chat_history, instructions: str, temperature: float, top_p: float
+    ):
+        if not message or (message == RETRY_COMMAND and len(chat_history) == 0):
+            yield chat_history
+            return
+        if message == RETRY_COMMAND and chat_history:
+            prev_turn = chat_history.pop(-1)
+            user_message, _ = prev_turn
+            message = user_message
+        prompt = format_chat_prompt(message, chat_history, instructions)
+        chat_history = chat_history + [[message, ""]]
+        stream = client.generate_stream(
+            prompt,
+            do_sample=True,
+            max_new_tokens=1024,
+            stop_sequences=[STOP_STR, "<|endoftext|>"],
+            temperature=temperature,
+            top_p=top_p,
+        )
+        acc_text = ""
+        for idx, response in enumerate(stream):
+            text_token = response.token.text
+            if response.details:
+                return
+            if text_token in STOP_SUSPECT_LIST:
+                acc_text += text_token
+                continue
+            if idx == 0 and text_token.startswith(" "):
+                text_token = text_token[1:]
+            acc_text += text_token
+            last_turn = list(chat_history.pop(-1))
+            last_turn[-1] += acc_text
+            chat_history = chat_history + [last_turn]
+            yield chat_history
+            acc_text = ""
+    def delete_last_turn(chat_history):
+        if chat_history:
+            chat_history.pop(-1)
+        return {chatbot: gr.update(value=chat_history)}
+    def run_retry(
+        message: str, chat_history, instructions: str, temperature: float, top_p: float
+    ):
+        yield from run_chat(
+            RETRY_COMMAND, chat_history, instructions, temperature, top_p
+        )
+    def clear_chat():
+        return []
+    inputs.submit(
+        run_chat,
+        [inputs, chatbot, instructions, temperature, top_p],
+        outputs=[chatbot],
+        show_progress=False,
+    )
+    inputs.submit(lambda: "", inputs=None, outputs=inputs)
+    delete_turn_button.click(delete_last_turn, inputs=[chatbot], outputs=[chatbot])
+    retry_button.click(
+        run_retry,
+        [inputs, chatbot, instructions, temperature, top_p],
+        outputs=[chatbot],
+        show_progress=False,
+    )
+    clear_chat_button.click(clear_chat, [], chatbot)
+def get_demo(client: Client):
+    with gr.Blocks(
+        # css=None
+        # css="""#chat_container {width: 700px; margin-left: auto; margin-right: auto;}
+        #        #button_container {width: 700px; margin-left: auto; margin-right: auto;}
+        #        #param_container {width: 700px; margin-left: auto; margin-right: auto;}"""
+        css="""#chatbot {
+    font-size: 14px;
+    min-height: 300px;
+}"""
+    ) as demo:
+        gr.HTML(TITLE)
+        with gr.Row():
+            with gr.Column():
+                gr.Markdown(
+                    """**Chat with [Falcon-7B-Instruct](https://huggingface.co/tiiuae/falcon-7b-instruct)!**
+                    ✨ This demo is powered by [Falcon-7B-Instruct](https://huggingface.co/tiiuae/falcon-7b-instruct) and running with [Text Generation Inference](https://github.com/huggingface/text-generation-inference) ✨
+                    👀 **Learn more about Falcon LLM:** [falconllm.tii.ae](https://falconllm.tii.ae/)
+                    Why use Falcon-7B-Instruct?
+                    You are looking for a ready-to-use chat/instruct model based on Falcon-7B?
+                    Falcon-7B is a strong base model, outperforming comparable open-source models (e.g., MPT-7B, StableLM, RedPajama etc.), thanks to being trained on 1,500B tokens of RefinedWeb enhanced with curated corpora. See the OpenLLM Leaderboard.
+                    It features an architecture optimized for inference, with FlashAttention (Dao et al., 2022) and multiquery (Shazeer et al., 2019).
+                    💬 This is an instruct model, which may not be ideal for further finetuning. If you are interested in building your own instruct/chat model, we recommend starting from Falcon-7B.
+                    🔥 Looking for an even more powerful model? Falcon-40B-Instruct is Falcon-7B-Instruct's big brother!
+                    🚜 **Limitations**: the model can and will produce factually incorrect information, hallucinating facts and actions. As it has not undergone any advanced tuning/alignment, it can produce problematic outputs, especially if prompted to do so. Finally, this demo is limited to a session length of about 1,000 words.
+                    🚜 **Recomendation**: We recommend users of Falcon-7B-Instruct to develop guardrails and to take appropriate precautions for any production use.
+                    """
+                )
+            with gr.Column():
+                gr.Image("home-banner.jpg", elem_id="banner-image", show_label=False)
+        chat(client)
+    return demo
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("Playground Demo")
+    parser.add_argument(
+        "--addr",
+        type=str,
+        required=False,
+        default=INFERENCE_ENDPOINT,
+    )
+    args = parser.parse_args()
+    client = Client(args.addr, headers={"Authorization": f"Bearer {INFERENCE_AUTH}"})
+    demo = get_demo(client)
+    demo.queue(max_size=128, concurrency_count=16)
+    demo.launch()

home-banner.jpg ADDED Viewed