5dimension's picture
Upload app.py with huggingface_hub
5efd475 verified
"""
Sentinel Tiny Text Space β€” Interactive text generation with Sentinel transformer
"""
import gradio as gr
import torch
import torch.nn as nn
import numpy as np
from transformers import AutoTokenizer
import json
# ─── Sentinel Components ─────────────────────────────────────────────────────
class SentinelAct(nn.Module):
def __init__(self):
super().__init__()
self.inv_e = 1.0 / np.e
def forward(self, x):
return x * (1.0 / torch.cosh(self.inv_e * x))
class SentinelAttn(nn.Module):
def __init__(self, d, h=4):
super().__init__()
self.d, self.h, self.hd = d, h, d // h
self.Wq = nn.Linear(d, d, bias=False)
self.Wk = nn.Linear(d, d, bias=False)
self.Wv = nn.Linear(d, d, bias=False)
self.Wo = nn.Linear(d, d, bias=False)
def forward(self, x, mask):
B, S, _ = x.shape
Q = self.Wq(x).view(B, S, self.h, self.hd).transpose(1, 2)
K = self.Wk(x).view(B, S, self.h, self.hd).transpose(1, 2)
V = self.Wv(x).view(B, S, self.h, self.hd).transpose(1, 2)
scores = torch.matmul(Q, K.transpose(-2, -1)) / np.sqrt(self.hd)
scores = scores.masked_fill(mask == 0, float('-inf'))
attn = torch.where(scores == float('-inf'), torch.zeros_like(scores), 1.0 / torch.cosh(scores))
attn = attn / (attn.sum(dim=-1, keepdim=True) + 1e-8)
out = torch.matmul(attn, V)
out = out.transpose(1, 2).contiguous().view(B, S, self.d)
return self.Wo(out)
class TinyTrans(nn.Module):
def __init__(self, v=50257, d=128, h=4, l=4, ff=256, s=128):
super().__init__()
self.tok = nn.Embedding(v, d)
self.pos = nn.Embedding(s, d)
self.layers = nn.ModuleList([nn.ModuleDict({
'attn': SentinelAttn(d, h),
'ffn': nn.Sequential(nn.Linear(d, ff), SentinelAct(), nn.Linear(ff, d)),
'n1': nn.LayerNorm(d), 'n2': nn.LayerNorm(d),
}) for _ in range(l)])
self.norm = nn.LayerNorm(d)
self.head = nn.Linear(d, v, bias=False)
self.seq = s
def forward(self, ids):
B, S = ids.shape
pos = torch.arange(S, device=ids.device).unsqueeze(0).expand(B, -1)
x = self.tok(ids) + self.pos(pos)
mask = torch.tril(torch.ones(S, S, device=ids.device)).view(1, 1, S, S)
for L in self.layers:
x = x + L['attn'](L['n1'](x), mask)
x = x + L['ffn'](L['n2'](x))
x = self.norm(x)
return self.head(x)
# ─── Load Model ──────────────────────────────────────────────────────────────
MODEL_URL = "https://huggingface.co/5dimension/sentinel-tiny-text/resolve/main/model.pt"
TOKENIZER = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER)
tokenizer.pad_token = tokenizer.eos_token
model = TinyTrans(v=tokenizer.vocab_size, d=128, h=4, l=4, ff=256, s=128)
# Try to load weights from local or download
try:
import os
from huggingface_hub import hf_hub_download
model_path = hf_hub_download(repo_id="5dimension/sentinel-tiny-text", filename="model.pt")
state_dict = torch.load(model_path, map_location="cpu", weights_only=True)
model.load_state_dict(state_dict)
model_status = "βœ… Model loaded from HF Hub"
except Exception as e:
model_status = f"⚠️ Using random weights: {str(e)[:100]}"
model = model.cpu().eval()
# ─── Generation Function ────────────────────────────────────────────────────
def generate_text(prompt, max_tokens=50, temperature=0.8, top_p=0.9):
with torch.no_grad():
ids = torch.tensor([tokenizer.encode(prompt)], device="cpu")
for _ in range(max_tokens):
logits = model(ids)[:, -1, :] / temperature
probs = torch.softmax(logits, dim=-1)
# Top-p sampling
sorted_probs, sorted_indices = torch.sort(probs, descending=True)
cumsum = torch.cumsum(sorted_probs, dim=-1)
mask = cumsum <= top_p
mask[0, 0] = True # At least one token
filtered_probs = sorted_probs * mask.float()
filtered_probs = filtered_probs / filtered_probs.sum()
idx = torch.multinomial(filtered_probs, 1)
tok = sorted_indices[0, idx].unsqueeze(1)
ids = torch.cat([ids, tok], dim=1)
if ids.size(1) >= 128:
break
return tokenizer.decode(ids[0], skip_special_tokens=True)
# ─── UI ────────────────────────────────────────────────────────────────────────
with gr.Blocks(title="🦴 Sentinel Tiny Text", css="""
.gradio-container { max-width: 800px; margin: 0 auto; }
.title { text-align: center; font-size: 2em; font-weight: bold; color: #6b4c9a; }
.subtitle { text-align: center; color: #888; margin-bottom: 1em; }
""") as demo:
gr.Markdown("""
<div class="title">🦴 Sentinel Tiny Text</div>
<div class="subtitle">13.4M parameter transformer with Sentinel activation Οƒ(x) = xΒ·sech(x/e)</div>
""")
gr.Markdown(f"**Status**: {model_status}")
with gr.Row():
with gr.Column(scale=2):
prompt = gr.Textbox(
label="Prompt",
placeholder="Once upon a time, a little cat...",
value="Once upon a time",
lines=2
)
with gr.Column(scale=1):
max_tokens = gr.Slider(10, 100, value=50, step=5, label="Max Tokens")
temperature = gr.Slider(0.3, 1.5, value=0.8, step=0.1, label="Temperature")
generate_btn = gr.Button("πŸš€ Generate", variant="primary")
output = gr.Textbox(label="Generated Text", lines=8, interactive=False)
with gr.Row():
gr.Markdown("""
### About
- **Activation**: Sentinel sech: Οƒ(x) = xΒ·sech(x/e)
- **Attention**: Sentinel sech (no softmax)
- **Architecture**: 4 layers, 128 hidden, 4 heads
- **Dataset**: TinyStories (1K samples demo)
- **Parameters**: 13.4M | **Quantized INT8**: [13 MB](https://huggingface.co/5dimension/sentinel-tiny-text-int8) | **INT4**: [6.4 MB](https://huggingface.co/5dimension/sentinel-tiny-text-int4)
""")
generate_btn.click(generate_text, [prompt, max_tokens, temperature], output)
demo.launch()