# app.py
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load model once at startup
tokenizer = AutoTokenizer.from_pretrained("BrainChip-AI/tenns-llm-1b")
model = AutoModelForCausalLM.from_pretrained(
    "BrainChip-AI/tenns-llm-1b",
    trust_remote_code=True,
)

def chat(message, history):
    # Build a simple prompt from history + new message
    prompt = ""
    for user_msg, bot_msg in history:
        prompt += f"User: {user_msg}\nAssistant: {bot_msg}\n"
    prompt += f"User: {message}\nAssistant:"

    output = model.generate_text(
        prompt,
        tokenizer,
        max_new_tokens=256,
        temperature=0.8,
        top_k=50,
    )

    # Strip the prompt from the output (model returns full text)
    response = output[len(prompt):].strip()
    return response

gr.ChatInterface(
    fn=chat,
    title="TENNs LLM 1B",
    description="Chat with BrainChip's 1B parameter SSM language model",
).launch()