Instructions to use N8Programs/talkie-box with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- MLX
How to use N8Programs/talkie-box with MLX:
# Make sure mlx-lm is installed # pip install --upgrade mlx-lm # Generate text with mlx-lm from mlx_lm import load, generate model, tokenizer = load("N8Programs/talkie-box") prompt = "Write a story about Einstein" messages = [{"role": "user", "content": prompt}] prompt = tokenizer.apply_chat_template( messages, add_generation_prompt=True ) text = generate(model, tokenizer, prompt=prompt, verbose=True) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- LM Studio
- MLX LM
How to use N8Programs/talkie-box with MLX LM:
Generate or start a chat session
# Install MLX LM uv tool install mlx-lm # Interactive chat REPL mlx_lm.chat --model "N8Programs/talkie-box"
Run an OpenAI-compatible server
# Install MLX LM uv tool install mlx-lm # Start the server mlx_lm.server --model "N8Programs/talkie-box" # Calling the OpenAI-compatible server with curl curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "N8Programs/talkie-box", "messages": [ {"role": "user", "content": "Hello"} ] }'
| # Copyright 2026 | |
| # | |
| # MLX implementation of talkie-lm/talkie-1930-13b-base. | |
| # This file is intentionally self-contained so an MLX model directory can load it | |
| # through config.json: {"model_file": "talkie_mlx.py"}. | |
| import math | |
| from dataclasses import dataclass | |
| from typing import Any, Optional | |
| import mlx.core as mx | |
| import mlx.nn as nn | |
| from mlx_lm.models.base import BaseModelArgs, create_attention_mask | |
| from mlx_lm.models.base import scaled_dot_product_attention | |
| class ModelArgs(BaseModelArgs): | |
| model_type: str = "talkie" | |
| vocab_size: int = 65536 | |
| hidden_size: int = 5120 | |
| num_hidden_layers: int = 40 | |
| num_attention_heads: int = 40 | |
| intermediate_size: int = 13696 | |
| head_dim: int = 128 | |
| max_position_embeddings: int = 2048 | |
| rope_theta: float = 1_000_000.0 | |
| tie_word_embeddings: bool = False | |
| rms_norm_eps: Optional[float] = 1.1920928955078125e-7 | |
| def rms_norm(x: mx.array, eps: Optional[float] = None) -> mx.array: | |
| if eps is None: | |
| eps = mx.finfo(x.dtype).eps | |
| return mx.fast.rms_norm(x, None, eps) | |
| def apply_talkie_rope(x: mx.array, offset: int, base: float) -> mx.array: | |
| """Apply Talkie's split-half RoPE to tensors shaped [B, H, T, D].""" | |
| head_dim = x.shape[-1] | |
| half_dim = head_dim // 2 | |
| freqs = -mx.exp( | |
| mx.arange(0.0, half_dim, dtype=mx.float32) * (math.log(base) / half_dim) | |
| ) | |
| return mx.fast.rope( | |
| x, | |
| dims=head_dim, | |
| traditional=False, | |
| base=None, | |
| freqs=freqs, | |
| scale=1.0, | |
| offset=offset, | |
| ) | |
| class HeadGain(nn.Module): | |
| def __init__(self, num_heads: int): | |
| super().__init__() | |
| self.head_g = mx.ones((num_heads,), dtype=mx.float32) | |
| def __call__(self, x: mx.array) -> mx.array: | |
| return x * self.head_g.astype(x.dtype).reshape(1, -1, 1, 1) | |
| class WeightGain(nn.Module): | |
| def __init__(self): | |
| super().__init__() | |
| self.w_g = mx.ones((1,), dtype=mx.float32) | |
| def __call__(self, w: mx.array) -> mx.array: | |
| return w * self.w_g.astype(w.dtype) | |
| class ActGain(nn.Module): | |
| def __init__(self, init_value: float): | |
| super().__init__() | |
| self.a_g = mx.array([init_value], dtype=mx.float32) | |
| def __call__(self, x: mx.array) -> mx.array: | |
| return x * self.a_g.astype(x.dtype) | |
| class CausalSelfAttention(nn.Module): | |
| def __init__(self, args: ModelArgs): | |
| super().__init__() | |
| self.n_head = args.num_attention_heads | |
| self.head_dim = args.head_dim | |
| self.rope_theta = args.rope_theta | |
| self.rms_norm_eps = args.rms_norm_eps | |
| self.scale = self.head_dim**-0.5 | |
| n_state = args.hidden_size | |
| self.attn_query = nn.Linear(n_state, n_state, bias=False) | |
| self.attn_key = nn.Linear(n_state, n_state, bias=False) | |
| self.attn_value = nn.Linear(n_state, n_state, bias=False) | |
| self.attn_resid = nn.Linear(n_state, n_state, bias=False) | |
| self.head_gain = HeadGain(self.n_head) | |
| def __call__( | |
| self, | |
| x: mx.array, | |
| mask: Optional[mx.array] = None, | |
| cache: Optional[Any] = None, | |
| ) -> mx.array: | |
| bsz, seq_len, _ = x.shape | |
| q = self.attn_query(x).reshape(bsz, seq_len, self.n_head, self.head_dim) | |
| k = self.attn_key(x).reshape(bsz, seq_len, self.n_head, self.head_dim) | |
| v = self.attn_value(x).reshape(bsz, seq_len, self.n_head, self.head_dim) | |
| q = q.transpose(0, 2, 1, 3) | |
| k = k.transpose(0, 2, 1, 3) | |
| v = v.transpose(0, 2, 1, 3) | |
| offset = cache.offset if cache is not None else 0 | |
| q = apply_talkie_rope(q, offset=offset, base=self.rope_theta) | |
| k = apply_talkie_rope(k, offset=offset, base=self.rope_theta) | |
| q = rms_norm(q, self.rms_norm_eps) | |
| k = rms_norm(k, self.rms_norm_eps) | |
| q = self.head_gain(q) | |
| if cache is not None: | |
| k, v = cache.update_and_fetch(k, v) | |
| y = scaled_dot_product_attention( | |
| q, k, v, cache=cache, scale=self.scale, mask=mask | |
| ) | |
| y = y.transpose(0, 2, 1, 3).reshape(bsz, seq_len, -1) | |
| return self.attn_resid(y) | |
| class MLP(nn.Module): | |
| def __init__(self, args: ModelArgs): | |
| super().__init__() | |
| n_state = args.hidden_size | |
| n_mlp = args.intermediate_size | |
| self.mlp_gate = nn.Linear(n_state, n_mlp, bias=False) | |
| self.mlp_linear = nn.Linear(n_state, n_mlp, bias=False) | |
| self.mlp_resid = nn.Linear(n_mlp, n_state, bias=False) | |
| def __call__(self, x: mx.array) -> mx.array: | |
| gate = self.mlp_gate(x) | |
| x = gate * mx.sigmoid(gate) * self.mlp_linear(x) | |
| return self.mlp_resid(x) | |
| class Block(nn.Module): | |
| def __init__(self, args: ModelArgs): | |
| super().__init__() | |
| init_gain = (2 * args.num_hidden_layers) ** -0.5 | |
| self.attn = CausalSelfAttention(args) | |
| self.attn_gain = ActGain(init_gain) | |
| self.mlp = MLP(args) | |
| self.mlp_gain = ActGain(init_gain) | |
| self.embed_skip = ActGain(0.0) | |
| self.rms_norm_eps = args.rms_norm_eps | |
| def __call__( | |
| self, | |
| e_x: mx.array, | |
| x: mx.array, | |
| mask: Optional[mx.array] = None, | |
| cache: Optional[Any] = None, | |
| ) -> mx.array: | |
| x = x + self.attn_gain(self.attn(rms_norm(x, self.rms_norm_eps), mask, cache)) | |
| x = x + self.mlp_gain(self.mlp(rms_norm(x, self.rms_norm_eps))) | |
| x = x + self.embed_skip(e_x) | |
| return x | |
| class Model(nn.Module): | |
| def __init__(self, args: ModelArgs): | |
| super().__init__() | |
| self.args = args | |
| self.model_type = args.model_type | |
| self.embed = nn.Embedding(args.vocab_size, args.hidden_size) | |
| self.blocks = [Block(args) for _ in range(args.num_hidden_layers)] | |
| self.lm_head = mx.zeros((args.vocab_size, args.hidden_size), dtype=mx.float32) | |
| self.lm_head_gain = WeightGain() | |
| def __call__( | |
| self, | |
| input_ids: mx.array, | |
| cache: Optional[Any] = None, | |
| input_embeddings: Optional[mx.array] = None, | |
| ) -> mx.array: | |
| if input_embeddings is not None: | |
| x = input_embeddings | |
| else: | |
| x = self.embed(input_ids) | |
| x = rms_norm(x, self.args.rms_norm_eps) | |
| e_x = x | |
| if cache is None: | |
| cache = [None] * len(self.blocks) | |
| mask = create_attention_mask(x, cache[0]) | |
| for block, c in zip(self.blocks, cache): | |
| x = block(e_x, x, mask=mask, cache=c) | |
| x = rms_norm(x, self.args.rms_norm_eps) | |
| return x @ self.lm_head_gain(self.lm_head).T | |
| def layers(self): | |
| return self.blocks | |