Spaces:

AdamF92
/

RxT-Beta-Demo

Runtime error

App Files Files Community

AdamF92 commited on Nov 4

Commit

5729414

verified ·

1 Parent(s): cd07cbb

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -57

app.py CHANGED Viewed

@@ -1,70 +1,51 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-def respond(
-    message,
-    history: list[dict[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-    hf_token: gr.OAuthToken,
-):
-    """
-    For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-    """
-    client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
-    messages = [{"role": "system", "content": system_message}]
-    messages.extend(history)
-    messages.append({"role": "user", "content": message})
     response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        choices = message.choices
-        token = ""
-        if len(choices) and choices[0].delta.content:
-            token = choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-chatbot = gr.ChatInterface(
-    respond,
-    type="messages",
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
-with gr.Blocks() as demo:
-    with gr.Sidebar():
-        gr.LoginButton()
-    chatbot.render()
 if __name__ == "__main__":
-    demo.launch()

+# app.py
 import gradio as gr
+import torch
+import spaces
+from rxlm.rxt.models import RxTBeta
+from rxlm.training.tokenizer import load_tokenizer_from_hf_hub
+tokenizer = load_tokenizer_from_hf_hub('ReactiveAI/RxT-Beta-Micro-Supervised-AI')
+model = RxTBeta.from_pretrained('RxT-Beta-Micro-Supervised-AI', tokenizer=tokenizer)
+model.share_components()
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+model.to(device)
+seq_len = 1024
+@spaces.GPU
+def chat(message: str, history: list):
+    tokenized_query = model.tokenize_query(message, max_seq_len=seq_len, device=device)
     response = ""
+    for token_id in model.interact(**tokenized_query, max_seq_len=seq_len, temperature=1.0):
+        response += model.stringify_token(token_id, show_memory_update=True)
+        yield history + [[message, response]]
+    return history + [[message, response]]
+with gr.Blocks(title="RxT-Beta-Micro-AI 270M (Supervised) Demo") as demo:
+    gr.Markdown("""
+    Experimental Reactive Transformer model fine-tuned for AI/Data Science knowledge based chats
+    and interactive Reactive AI documentation.
+    Supervised version of the model is still in intermediate stage and will be further improved
+    in Reinforcement Learning stages (demo will be constantly updated), so model could generate
+    inaccurate answers and memory is weak. However, it should still demonstate the architecture
+    advantages, especially infinite context and no delays.
+    """)
+    chatbot = gr.Chatbot(height=600)
+    msg = gr.Textbox(placeholder="Ask RxT...", label="Query")
+    clear = gr.Button("Clear")
+    msg.submit(chat, [msg, chatbot], chatbot, queue=True).then(
+        lambda: gr.update(value=""), outputs=msg
+    )
+    clear.click(lambda: [], None, chatbot)
 if __name__ == "__main__":
+    demo.queue()
+    demo.launch()