Spaces:
Sleeping
Sleeping
| """ | |
| Sentinel Tiny Text Space β Interactive text generation with Sentinel transformer | |
| """ | |
| import gradio as gr | |
| import torch | |
| import torch.nn as nn | |
| import numpy as np | |
| from transformers import AutoTokenizer | |
| import json | |
| # βββ Sentinel Components βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class SentinelAct(nn.Module): | |
| def __init__(self): | |
| super().__init__() | |
| self.inv_e = 1.0 / np.e | |
| def forward(self, x): | |
| return x * (1.0 / torch.cosh(self.inv_e * x)) | |
| class SentinelAttn(nn.Module): | |
| def __init__(self, d, h=4): | |
| super().__init__() | |
| self.d, self.h, self.hd = d, h, d // h | |
| self.Wq = nn.Linear(d, d, bias=False) | |
| self.Wk = nn.Linear(d, d, bias=False) | |
| self.Wv = nn.Linear(d, d, bias=False) | |
| self.Wo = nn.Linear(d, d, bias=False) | |
| def forward(self, x, mask): | |
| B, S, _ = x.shape | |
| Q = self.Wq(x).view(B, S, self.h, self.hd).transpose(1, 2) | |
| K = self.Wk(x).view(B, S, self.h, self.hd).transpose(1, 2) | |
| V = self.Wv(x).view(B, S, self.h, self.hd).transpose(1, 2) | |
| scores = torch.matmul(Q, K.transpose(-2, -1)) / np.sqrt(self.hd) | |
| scores = scores.masked_fill(mask == 0, float('-inf')) | |
| attn = torch.where(scores == float('-inf'), torch.zeros_like(scores), 1.0 / torch.cosh(scores)) | |
| attn = attn / (attn.sum(dim=-1, keepdim=True) + 1e-8) | |
| out = torch.matmul(attn, V) | |
| out = out.transpose(1, 2).contiguous().view(B, S, self.d) | |
| return self.Wo(out) | |
| class TinyTrans(nn.Module): | |
| def __init__(self, v=50257, d=128, h=4, l=4, ff=256, s=128): | |
| super().__init__() | |
| self.tok = nn.Embedding(v, d) | |
| self.pos = nn.Embedding(s, d) | |
| self.layers = nn.ModuleList([nn.ModuleDict({ | |
| 'attn': SentinelAttn(d, h), | |
| 'ffn': nn.Sequential(nn.Linear(d, ff), SentinelAct(), nn.Linear(ff, d)), | |
| 'n1': nn.LayerNorm(d), 'n2': nn.LayerNorm(d), | |
| }) for _ in range(l)]) | |
| self.norm = nn.LayerNorm(d) | |
| self.head = nn.Linear(d, v, bias=False) | |
| self.seq = s | |
| def forward(self, ids): | |
| B, S = ids.shape | |
| pos = torch.arange(S, device=ids.device).unsqueeze(0).expand(B, -1) | |
| x = self.tok(ids) + self.pos(pos) | |
| mask = torch.tril(torch.ones(S, S, device=ids.device)).view(1, 1, S, S) | |
| for L in self.layers: | |
| x = x + L['attn'](L['n1'](x), mask) | |
| x = x + L['ffn'](L['n2'](x)) | |
| x = self.norm(x) | |
| return self.head(x) | |
| # βββ Load Model ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| MODEL_URL = "https://huggingface.co/5dimension/sentinel-tiny-text/resolve/main/model.pt" | |
| TOKENIZER = "gpt2" | |
| tokenizer = AutoTokenizer.from_pretrained(TOKENIZER) | |
| tokenizer.pad_token = tokenizer.eos_token | |
| model = TinyTrans(v=tokenizer.vocab_size, d=128, h=4, l=4, ff=256, s=128) | |
| # Try to load weights from local or download | |
| try: | |
| import os | |
| from huggingface_hub import hf_hub_download | |
| model_path = hf_hub_download(repo_id="5dimension/sentinel-tiny-text", filename="model.pt") | |
| state_dict = torch.load(model_path, map_location="cpu", weights_only=True) | |
| model.load_state_dict(state_dict) | |
| model_status = "β Model loaded from HF Hub" | |
| except Exception as e: | |
| model_status = f"β οΈ Using random weights: {str(e)[:100]}" | |
| model = model.cpu().eval() | |
| # βββ Generation Function ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def generate_text(prompt, max_tokens=50, temperature=0.8, top_p=0.9): | |
| with torch.no_grad(): | |
| ids = torch.tensor([tokenizer.encode(prompt)], device="cpu") | |
| for _ in range(max_tokens): | |
| logits = model(ids)[:, -1, :] / temperature | |
| probs = torch.softmax(logits, dim=-1) | |
| # Top-p sampling | |
| sorted_probs, sorted_indices = torch.sort(probs, descending=True) | |
| cumsum = torch.cumsum(sorted_probs, dim=-1) | |
| mask = cumsum <= top_p | |
| mask[0, 0] = True # At least one token | |
| filtered_probs = sorted_probs * mask.float() | |
| filtered_probs = filtered_probs / filtered_probs.sum() | |
| idx = torch.multinomial(filtered_probs, 1) | |
| tok = sorted_indices[0, idx].unsqueeze(1) | |
| ids = torch.cat([ids, tok], dim=1) | |
| if ids.size(1) >= 128: | |
| break | |
| return tokenizer.decode(ids[0], skip_special_tokens=True) | |
| # βββ UI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Blocks(title="𦴠Sentinel Tiny Text", css=""" | |
| .gradio-container { max-width: 800px; margin: 0 auto; } | |
| .title { text-align: center; font-size: 2em; font-weight: bold; color: #6b4c9a; } | |
| .subtitle { text-align: center; color: #888; margin-bottom: 1em; } | |
| """) as demo: | |
| gr.Markdown(""" | |
| <div class="title">𦴠Sentinel Tiny Text</div> | |
| <div class="subtitle">13.4M parameter transformer with Sentinel activation Ο(x) = xΒ·sech(x/e)</div> | |
| """) | |
| gr.Markdown(f"**Status**: {model_status}") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| prompt = gr.Textbox( | |
| label="Prompt", | |
| placeholder="Once upon a time, a little cat...", | |
| value="Once upon a time", | |
| lines=2 | |
| ) | |
| with gr.Column(scale=1): | |
| max_tokens = gr.Slider(10, 100, value=50, step=5, label="Max Tokens") | |
| temperature = gr.Slider(0.3, 1.5, value=0.8, step=0.1, label="Temperature") | |
| generate_btn = gr.Button("π Generate", variant="primary") | |
| output = gr.Textbox(label="Generated Text", lines=8, interactive=False) | |
| with gr.Row(): | |
| gr.Markdown(""" | |
| ### About | |
| - **Activation**: Sentinel sech: Ο(x) = xΒ·sech(x/e) | |
| - **Attention**: Sentinel sech (no softmax) | |
| - **Architecture**: 4 layers, 128 hidden, 4 heads | |
| - **Dataset**: TinyStories (1K samples demo) | |
| - **Parameters**: 13.4M | **Quantized INT8**: [13 MB](https://huggingface.co/5dimension/sentinel-tiny-text-int8) | **INT4**: [6.4 MB](https://huggingface.co/5dimension/sentinel-tiny-text-int4) | |
| """) | |
| generate_btn.click(generate_text, [prompt, max_tokens, temperature], output) | |
| demo.launch() | |