Spaces:
Running on Zero
Running on Zero
| import spaces | |
| import torch | |
| from threading import Thread | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer | |
| import gradio as gr | |
| MODEL_ID = "NoesisLab/Kai-30B-Instruct" | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| "NoesisLab/Kai-30B-Instruct", | |
| ) | |
| def respond(message, history): | |
| msg = """You are Kai, a helpful assistant. | |
| You are a logical assistant that follows a strict "Reason-then-Act" process. For every query, you must structure your response into two distinct sections: | |
| 1. ### Reasoning Process | |
| - Break down the user's request into smaller parts. | |
| - Check for potential pitfalls or edge cases. | |
| - Draft a step-by-step plan to solve the problem. | |
| - Verify your logic before moving to the final answer. | |
| 2. ### Final Answer | |
| - Provide the concise and direct result based on the reasoning above. | |
| - Do not repeat the reasoning; just provide the output. | |
| Strictly follow this format for every response. Begin your thought process now.""" | |
| messages = [{"role": "system", "content": msg}] | |
| for msg in history: | |
| messages.append({"role": msg["role"], "content": msg["content"]}) | |
| messages.append({"role": "user", "content": message}) | |
| input_ids = tokenizer.apply_chat_template( | |
| messages, add_generation_prompt=True, return_tensors="pt" | |
| ).to(model.device) | |
| streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) | |
| generate_kwargs = dict( | |
| input_ids=input_ids, | |
| streamer=streamer, | |
| temperature=0.6, | |
| top_p=0.95, | |
| do_sample=True, | |
| ) | |
| thread = Thread(target=model.generate, kwargs=generate_kwargs) | |
| thread.start() | |
| response = "" | |
| for token in streamer: | |
| response += token | |
| yield response | |
| demo = gr.ChatInterface( | |
| fn=respond, | |
| title="Chat with Kai-30B-Instruct", | |
| description="Chat with NoesisLab/Kai-30B-Instruct", | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |