Spaces:

NoesisLab
/

Kai-30B-Instruct

Sleeping

App Files Files Community

OzTianlu commited on Mar 3

Commit

54720a6

verified ·

1 Parent(s): 43e46d0

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -7

app.py CHANGED Viewed

@@ -3,16 +3,30 @@ import torch
 from threading import Thread
 from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
 import gradio as gr
-MODEL_ID = "NoesisLab/Kai-30B-Instruct"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(
-    "NoesisLab/Kai-30B-Instruct",
 )
 @spaces.GPU
 def respond(message, history):
-    messages = [{"role": "system", "content": "You are Spartacus, a helpful assistant."}]
     for msg in history:
         messages.append({"role": msg["role"], "content": msg["content"]})
     messages.append({"role": "user", "content": message})
@@ -26,8 +40,8 @@ def respond(message, history):
     generate_kwargs = dict(
         input_ids=input_ids,
         streamer=streamer,
-        temperature=0.5,
-        top_p=0.9,
         do_sample=True,
     )
@@ -42,8 +56,8 @@ def respond(message, history):
 demo = gr.ChatInterface(
     fn=respond,
-    title="Chat with Kai-30B-Instruct",
-    description="Chat with NoesisLab/Kai-30B-Instruct",
 )
 if __name__ == "__main__":

 from threading import Thread
 from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
 import gradio as gr
+MODEL_ID = "NoesisLab/Kai-3B-Instruct"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(
+    "NoesisLab/Kai-3B-Instruct",
 )
 @spaces.GPU
 def respond(message, history):
+    msg = """You are Kai, a helpful assistant.
+    You are a logical assistant that follows a strict "Reason-then-Act" process. For every query, you must structure your response into two distinct sections:
+        1. ### Reasoning Process
+        - Break down the user's request into smaller parts.
+        - Check for potential pitfalls or edge cases.
+        - Draft a step-by-step plan to solve the problem.
+        - Verify your logic before moving to the final answer.
+        2. ### Final Answer
+        - Provide the concise and direct result based on the reasoning above.
+        - Do not repeat the reasoning; just provide the output.
+        Strictly follow this format for every response. Begin your thought process now."""
+    messages = [{"role": "system", "content": msg}]
     for msg in history:
         messages.append({"role": msg["role"], "content": msg["content"]})
     messages.append({"role": "user", "content": message})
     generate_kwargs = dict(
         input_ids=input_ids,
         streamer=streamer,
+        temperature=0.6,
+        top_p=0.95,
         do_sample=True,
     )
 demo = gr.ChatInterface(
     fn=respond,
+    title="Chat with Kai-3B-Instruct",
+    description="Chat with NoesisLab/Kai-3B-Instruct",
 )
 if __name__ == "__main__":