Spaces:

fxmarty
/

tgi-mi300-demo-chat

Runtime error

App Files Files Community

fxmarty commited on May 7, 2024

Commit

d6e5fcb

1 Parent(s): 384f0b8

add app

Browse files

Files changed (1) hide show

app.py +56 -0

app.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import gradio as gr
+import random
+import time
+from huggingface_hub import InferenceClient
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
+client = InferenceClient(model="https://770c-20-63-4-233.ngrok-free.app")
+SYSTEM_COMMAND = {"role": "system", "content": "Context: date: Monday 20th May 2024; location: NYC; running on: 8 AMD Instinct MI300 GPU; model name: Llama 70B. Only provide these information if asked. You are a knowledgeable assistant trained to provide accurate and helpful information. Please respond to the user's queries promptly and politely."}
+IGNORED_TOKENS = {None, "<|start_header_id|>", "<|end_header_id|>", "<|eot_id|>", "<|reserved_special_token"}
+STOP_TOKENS = ["<|start_header_id|>", "<|end_header_id|>", "<|eot_id|>", "<|reserved_special_token"]
+with gr.Blocks() as demo:
+    tfs_history = gr.State([SYSTEM_COMMAND])
+    chatbot = gr.Chatbot()
+    msg = gr.Textbox()
+    clear = gr.Button("Clear")
+    def user(user_message, history, dict_history):
+        data = {"role": "user", "content": user_message}
+        dict_history.append(data)
+        return "", history + [[user_message, None]], dict_history
+    def bot(history, dict_history):
+        history[-1][1] = ""
+        response = {"role": "assistant", "content": ""}
+        start_tokenize = time.perf_counter()
+        text_input = tokenizer.apply_chat_template(dict_history, tokenize=False, add_generation_prompt=True)
+        end_tokenize = time.perf_counter()
+        try:
+            for token in client.text_generation(prompt=text_input, max_new_tokens=100, stop_sequences=STOP_TOKENS, stream=True):
+                if token not in IGNORED_TOKENS:
+                    history[-1][1] += token
+                    response["content"] += token
+                yield history
+        finally:
+            dict_history.append(response)
+    msg.submit(
+        user,
+        inputs=[msg, chatbot, tfs_history],
+        outputs=[msg, chatbot, tfs_history],
+        queue=False).then(
+            bot,
+            [chatbot, tfs_history],
+            chatbot
+    )
+    clear.click(lambda: None, None, chatbot, queue=False)
+demo.queue()
+demo.launch()