# app.py import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer # Load model once at startup tokenizer = AutoTokenizer.from_pretrained("BrainChip-AI/tenns-llm-1b") model = AutoModelForCausalLM.from_pretrained( "BrainChip-AI/tenns-llm-1b", trust_remote_code=True, ) def chat(message, history): # Build a simple prompt from history + new message prompt = "" for user_msg, bot_msg in history: prompt += f"User: {user_msg}\nAssistant: {bot_msg}\n" prompt += f"User: {message}\nAssistant:" output = model.generate_text( prompt, tokenizer, max_new_tokens=256, temperature=0.8, top_k=50, ) # Strip the prompt from the output (model returns full text) response = output[len(prompt):].strip() return response gr.ChatInterface( fn=chat, title="TENNs LLM 1B", description="Chat with BrainChip's 1B parameter SSM language model", ).launch()