Spaces:
Running
Running
| # app.py | |
| import gradio as gr | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| # Load model once at startup | |
| tokenizer = AutoTokenizer.from_pretrained("BrainChip-AI/tenns-llm-1b") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| "BrainChip-AI/tenns-llm-1b", | |
| trust_remote_code=True, | |
| ) | |
| def chat(message, history): | |
| # Build a simple prompt from history + new message | |
| prompt = "" | |
| for user_msg, bot_msg in history: | |
| prompt += f"User: {user_msg}\nAssistant: {bot_msg}\n" | |
| prompt += f"User: {message}\nAssistant:" | |
| output = model.generate_text( | |
| prompt, | |
| tokenizer, | |
| max_new_tokens=256, | |
| temperature=0.8, | |
| top_k=50, | |
| ) | |
| # Strip the prompt from the output (model returns full text) | |
| response = output[len(prompt):].strip() | |
| return response | |
| gr.ChatInterface( | |
| fn=chat, | |
| title="TENNs LLM 1B", | |
| description="Chat with BrainChip's 1B parameter SSM language model", | |
| ).launch() |