Spaces:

NoesisLab
/

Kai-30B-Instruct

Running on Zero

App Files Files Community

Kai-30B-Instruct / app.py

OzTianlu

Update app.py

1064c57 verified 3 days ago

raw

history blame contribute delete

2.11 kB

	import spaces
	import torch
	from threading import Thread
	from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
	import gradio as gr
	MODEL_ID = "NoesisLab/Kai-30B-Instruct"

	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
	model = AutoModelForCausalLM.from_pretrained(
	"NoesisLab/Kai-30B-Instruct",
	)

	@spaces.GPU
	def respond(message, history):
	msg = """You are Kai, a helpful assistant.
	You are a logical assistant that follows a strict "Reason-then-Act" process. For every query, you must structure your response into two distinct sections:

	1. ### Reasoning Process
	- Break down the user's request into smaller parts.
	- Check for potential pitfalls or edge cases.
	- Draft a step-by-step plan to solve the problem.
	- Verify your logic before moving to the final answer.

	2. ### Final Answer
	- Provide the concise and direct result based on the reasoning above.
	- Do not repeat the reasoning; just provide the output.

	Strictly follow this format for every response. Begin your thought process now."""
	messages = [{"role": "system", "content": msg}]
	for msg in history:
	messages.append({"role": msg["role"], "content": msg["content"]})
	messages.append({"role": "user", "content": message})

	input_ids = tokenizer.apply_chat_template(
	messages, add_generation_prompt=True, return_tensors="pt"
	).to(model.device)

	streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

	generate_kwargs = dict(
	input_ids=input_ids,
	streamer=streamer,
	temperature=0.6,
	top_p=0.95,
	do_sample=True,
	)

	thread = Thread(target=model.generate, kwargs=generate_kwargs)
	thread.start()

	response = ""
	for token in streamer:
	response += token
	yield response


	demo = gr.ChatInterface(
	fn=respond,
	title="Chat with Kai-30B-Instruct",
	description="Chat with NoesisLab/Kai-30B-Instruct",
	)

	if __name__ == "__main__":
	demo.launch()