Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import random | |
| import time | |
| from huggingface_hub import InferenceClient | |
| from transformers import AutoTokenizer | |
| tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct") | |
| client = InferenceClient(model="https://770c-20-63-4-233.ngrok-free.app") | |
| SYSTEM_COMMAND = {"role": "system", "content": "Context: date: Monday 20th May 2024; location: NYC; running on: 8 AMD Instinct MI300 GPU; model name: Llama 70B. Only provide these information if asked. You are a knowledgeable assistant trained to provide accurate and helpful information. Please respond to the user's queries promptly and politely."} | |
| IGNORED_TOKENS = {None, "<|start_header_id|>", "<|end_header_id|>", "<|eot_id|>", "<|reserved_special_token"} | |
| STOP_TOKENS = ["<|start_header_id|>", "<|end_header_id|>", "<|eot_id|>", "<|reserved_special_token"] | |
| with gr.Blocks() as demo: | |
| tfs_history = gr.State([SYSTEM_COMMAND]) | |
| chatbot = gr.Chatbot() | |
| msg = gr.Textbox() | |
| clear = gr.Button("Clear") | |
| def user(user_message, history, dict_history): | |
| data = {"role": "user", "content": user_message} | |
| dict_history.append(data) | |
| return "", history + [[user_message, None]], dict_history | |
| def bot(history, dict_history): | |
| history[-1][1] = "" | |
| response = {"role": "assistant", "content": ""} | |
| start_tokenize = time.perf_counter() | |
| text_input = tokenizer.apply_chat_template(dict_history, tokenize=False, add_generation_prompt=True) | |
| end_tokenize = time.perf_counter() | |
| try: | |
| for token in client.text_generation(prompt=text_input, max_new_tokens=100, stop_sequences=STOP_TOKENS, stream=True): | |
| if token not in IGNORED_TOKENS: | |
| history[-1][1] += token | |
| response["content"] += token | |
| yield history | |
| finally: | |
| dict_history.append(response) | |
| msg.submit( | |
| user, | |
| inputs=[msg, chatbot, tfs_history], | |
| outputs=[msg, chatbot, tfs_history], | |
| queue=False).then( | |
| bot, | |
| [chatbot, tfs_history], | |
| chatbot | |
| ) | |
| clear.click(lambda: None, None, chatbot, queue=False) | |
| demo.queue() | |
| demo.launch() | |