""" Sentinel Tiny Text Space — Interactive text generation with Sentinel transformer """ import gradio as gr import torch import torch.nn as nn import numpy as np from transformers import AutoTokenizer import json # ─── Sentinel Components ───────────────────────────────────────────────────── class SentinelAct(nn.Module): def __init__(self): super().__init__() self.inv_e = 1.0 / np.e def forward(self, x): return x * (1.0 / torch.cosh(self.inv_e * x)) class SentinelAttn(nn.Module): def __init__(self, d, h=4): super().__init__() self.d, self.h, self.hd = d, h, d // h self.Wq = nn.Linear(d, d, bias=False) self.Wk = nn.Linear(d, d, bias=False) self.Wv = nn.Linear(d, d, bias=False) self.Wo = nn.Linear(d, d, bias=False) def forward(self, x, mask): B, S, _ = x.shape Q = self.Wq(x).view(B, S, self.h, self.hd).transpose(1, 2) K = self.Wk(x).view(B, S, self.h, self.hd).transpose(1, 2) V = self.Wv(x).view(B, S, self.h, self.hd).transpose(1, 2) scores = torch.matmul(Q, K.transpose(-2, -1)) / np.sqrt(self.hd) scores = scores.masked_fill(mask == 0, float('-inf')) attn = torch.where(scores == float('-inf'), torch.zeros_like(scores), 1.0 / torch.cosh(scores)) attn = attn / (attn.sum(dim=-1, keepdim=True) + 1e-8) out = torch.matmul(attn, V) out = out.transpose(1, 2).contiguous().view(B, S, self.d) return self.Wo(out) class TinyTrans(nn.Module): def __init__(self, v=50257, d=128, h=4, l=4, ff=256, s=128): super().__init__() self.tok = nn.Embedding(v, d) self.pos = nn.Embedding(s, d) self.layers = nn.ModuleList([nn.ModuleDict({ 'attn': SentinelAttn(d, h), 'ffn': nn.Sequential(nn.Linear(d, ff), SentinelAct(), nn.Linear(ff, d)), 'n1': nn.LayerNorm(d), 'n2': nn.LayerNorm(d), }) for _ in range(l)]) self.norm = nn.LayerNorm(d) self.head = nn.Linear(d, v, bias=False) self.seq = s def forward(self, ids): B, S = ids.shape pos = torch.arange(S, device=ids.device).unsqueeze(0).expand(B, -1) x = self.tok(ids) + self.pos(pos) mask = torch.tril(torch.ones(S, S, device=ids.device)).view(1, 1, S, S) for L in self.layers: x = x + L['attn'](L['n1'](x), mask) x = x + L['ffn'](L['n2'](x)) x = self.norm(x) return self.head(x) # ─── Load Model ────────────────────────────────────────────────────────────── MODEL_URL = "https://huggingface.co/5dimension/sentinel-tiny-text/resolve/main/model.pt" TOKENIZER = "gpt2" tokenizer = AutoTokenizer.from_pretrained(TOKENIZER) tokenizer.pad_token = tokenizer.eos_token model = TinyTrans(v=tokenizer.vocab_size, d=128, h=4, l=4, ff=256, s=128) # Try to load weights from local or download try: import os from huggingface_hub import hf_hub_download model_path = hf_hub_download(repo_id="5dimension/sentinel-tiny-text", filename="model.pt") state_dict = torch.load(model_path, map_location="cpu", weights_only=True) model.load_state_dict(state_dict) model_status = "✅ Model loaded from HF Hub" except Exception as e: model_status = f"⚠️ Using random weights: {str(e)[:100]}" model = model.cpu().eval() # ─── Generation Function ──────────────────────────────────────────────────── def generate_text(prompt, max_tokens=50, temperature=0.8, top_p=0.9): with torch.no_grad(): ids = torch.tensor([tokenizer.encode(prompt)], device="cpu") for _ in range(max_tokens): logits = model(ids)[:, -1, :] / temperature probs = torch.softmax(logits, dim=-1) # Top-p sampling sorted_probs, sorted_indices = torch.sort(probs, descending=True) cumsum = torch.cumsum(sorted_probs, dim=-1) mask = cumsum <= top_p mask[0, 0] = True # At least one token filtered_probs = sorted_probs * mask.float() filtered_probs = filtered_probs / filtered_probs.sum() idx = torch.multinomial(filtered_probs, 1) tok = sorted_indices[0, idx].unsqueeze(1) ids = torch.cat([ids, tok], dim=1) if ids.size(1) >= 128: break return tokenizer.decode(ids[0], skip_special_tokens=True) # ─── UI ──────────────────────────────────────────────────────────────────────── with gr.Blocks(title="🦴 Sentinel Tiny Text", css=""" .gradio-container { max-width: 800px; margin: 0 auto; } .title { text-align: center; font-size: 2em; font-weight: bold; color: #6b4c9a; } .subtitle { text-align: center; color: #888; margin-bottom: 1em; } """) as demo: gr.Markdown("""
🦴 Sentinel Tiny Text
13.4M parameter transformer with Sentinel activation σ(x) = x·sech(x/e)
""") gr.Markdown(f"**Status**: {model_status}") with gr.Row(): with gr.Column(scale=2): prompt = gr.Textbox( label="Prompt", placeholder="Once upon a time, a little cat...", value="Once upon a time", lines=2 ) with gr.Column(scale=1): max_tokens = gr.Slider(10, 100, value=50, step=5, label="Max Tokens") temperature = gr.Slider(0.3, 1.5, value=0.8, step=0.1, label="Temperature") generate_btn = gr.Button("🚀 Generate", variant="primary") output = gr.Textbox(label="Generated Text", lines=8, interactive=False) with gr.Row(): gr.Markdown(""" ### About - **Activation**: Sentinel sech: σ(x) = x·sech(x/e) - **Attention**: Sentinel sech (no softmax) - **Architecture**: 4 layers, 128 hidden, 4 heads - **Dataset**: TinyStories (1K samples demo) - **Parameters**: 13.4M | **Quantized INT8**: [13 MB](https://huggingface.co/5dimension/sentinel-tiny-text-int8) | **INT4**: [6.4 MB](https://huggingface.co/5dimension/sentinel-tiny-text-int4) """) generate_btn.click(generate_text, [prompt, max_tokens, temperature], output) demo.launch()