File size: 6,720 Bytes
5efd475
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
"""
Sentinel Tiny Text Space β€” Interactive text generation with Sentinel transformer
"""
import gradio as gr
import torch
import torch.nn as nn
import numpy as np
from transformers import AutoTokenizer
import json

# ─── Sentinel Components ─────────────────────────────────────────────────────
class SentinelAct(nn.Module):
    def __init__(self):
        super().__init__()
        self.inv_e = 1.0 / np.e
    def forward(self, x):
        return x * (1.0 / torch.cosh(self.inv_e * x))

class SentinelAttn(nn.Module):
    def __init__(self, d, h=4):
        super().__init__()
        self.d, self.h, self.hd = d, h, d // h
        self.Wq = nn.Linear(d, d, bias=False)
        self.Wk = nn.Linear(d, d, bias=False)
        self.Wv = nn.Linear(d, d, bias=False)
        self.Wo = nn.Linear(d, d, bias=False)
    def forward(self, x, mask):
        B, S, _ = x.shape
        Q = self.Wq(x).view(B, S, self.h, self.hd).transpose(1, 2)
        K = self.Wk(x).view(B, S, self.h, self.hd).transpose(1, 2)
        V = self.Wv(x).view(B, S, self.h, self.hd).transpose(1, 2)
        scores = torch.matmul(Q, K.transpose(-2, -1)) / np.sqrt(self.hd)
        scores = scores.masked_fill(mask == 0, float('-inf'))
        attn = torch.where(scores == float('-inf'), torch.zeros_like(scores), 1.0 / torch.cosh(scores))
        attn = attn / (attn.sum(dim=-1, keepdim=True) + 1e-8)
        out = torch.matmul(attn, V)
        out = out.transpose(1, 2).contiguous().view(B, S, self.d)
        return self.Wo(out)

class TinyTrans(nn.Module):
    def __init__(self, v=50257, d=128, h=4, l=4, ff=256, s=128):
        super().__init__()
        self.tok = nn.Embedding(v, d)
        self.pos = nn.Embedding(s, d)
        self.layers = nn.ModuleList([nn.ModuleDict({
            'attn': SentinelAttn(d, h),
            'ffn': nn.Sequential(nn.Linear(d, ff), SentinelAct(), nn.Linear(ff, d)),
            'n1': nn.LayerNorm(d), 'n2': nn.LayerNorm(d),
        }) for _ in range(l)])
        self.norm = nn.LayerNorm(d)
        self.head = nn.Linear(d, v, bias=False)
        self.seq = s
    def forward(self, ids):
        B, S = ids.shape
        pos = torch.arange(S, device=ids.device).unsqueeze(0).expand(B, -1)
        x = self.tok(ids) + self.pos(pos)
        mask = torch.tril(torch.ones(S, S, device=ids.device)).view(1, 1, S, S)
        for L in self.layers:
            x = x + L['attn'](L['n1'](x), mask)
            x = x + L['ffn'](L['n2'](x))
        x = self.norm(x)
        return self.head(x)

# ─── Load Model ──────────────────────────────────────────────────────────────
MODEL_URL = "https://huggingface.co/5dimension/sentinel-tiny-text/resolve/main/model.pt"
TOKENIZER = "gpt2"

tokenizer = AutoTokenizer.from_pretrained(TOKENIZER)
tokenizer.pad_token = tokenizer.eos_token

model = TinyTrans(v=tokenizer.vocab_size, d=128, h=4, l=4, ff=256, s=128)

# Try to load weights from local or download
try:
    import os
    from huggingface_hub import hf_hub_download
    model_path = hf_hub_download(repo_id="5dimension/sentinel-tiny-text", filename="model.pt")
    state_dict = torch.load(model_path, map_location="cpu", weights_only=True)
    model.load_state_dict(state_dict)
    model_status = "βœ… Model loaded from HF Hub"
except Exception as e:
    model_status = f"⚠️ Using random weights: {str(e)[:100]}"

model = model.cpu().eval()

# ─── Generation Function ────────────────────────────────────────────────────
def generate_text(prompt, max_tokens=50, temperature=0.8, top_p=0.9):
    with torch.no_grad():
        ids = torch.tensor([tokenizer.encode(prompt)], device="cpu")
        for _ in range(max_tokens):
            logits = model(ids)[:, -1, :] / temperature
            probs = torch.softmax(logits, dim=-1)
            # Top-p sampling
            sorted_probs, sorted_indices = torch.sort(probs, descending=True)
            cumsum = torch.cumsum(sorted_probs, dim=-1)
            mask = cumsum <= top_p
            mask[0, 0] = True  # At least one token
            filtered_probs = sorted_probs * mask.float()
            filtered_probs = filtered_probs / filtered_probs.sum()
            idx = torch.multinomial(filtered_probs, 1)
            tok = sorted_indices[0, idx].unsqueeze(1)
            ids = torch.cat([ids, tok], dim=1)
            if ids.size(1) >= 128:
                break
        return tokenizer.decode(ids[0], skip_special_tokens=True)

# ─── UI ────────────────────────────────────────────────────────────────────────
with gr.Blocks(title="🦴 Sentinel Tiny Text", css="""
    .gradio-container { max-width: 800px; margin: 0 auto; }
    .title { text-align: center; font-size: 2em; font-weight: bold; color: #6b4c9a; }
    .subtitle { text-align: center; color: #888; margin-bottom: 1em; }
""") as demo:
    gr.Markdown("""
    <div class="title">🦴 Sentinel Tiny Text</div>
    <div class="subtitle">13.4M parameter transformer with Sentinel activation Οƒ(x) = xΒ·sech(x/e)</div>
    """)
    
    gr.Markdown(f"**Status**: {model_status}")
    
    with gr.Row():
        with gr.Column(scale=2):
            prompt = gr.Textbox(
                label="Prompt",
                placeholder="Once upon a time, a little cat...",
                value="Once upon a time",
                lines=2
            )
        with gr.Column(scale=1):
            max_tokens = gr.Slider(10, 100, value=50, step=5, label="Max Tokens")
            temperature = gr.Slider(0.3, 1.5, value=0.8, step=0.1, label="Temperature")
    
    generate_btn = gr.Button("πŸš€ Generate", variant="primary")
    output = gr.Textbox(label="Generated Text", lines=8, interactive=False)
    
    with gr.Row():
        gr.Markdown("""
        ### About
        - **Activation**: Sentinel sech: Οƒ(x) = xΒ·sech(x/e)
        - **Attention**: Sentinel sech (no softmax)
        - **Architecture**: 4 layers, 128 hidden, 4 heads
        - **Dataset**: TinyStories (1K samples demo)
        - **Parameters**: 13.4M | **Quantized INT8**: [13 MB](https://huggingface.co/5dimension/sentinel-tiny-text-int8) | **INT4**: [6.4 MB](https://huggingface.co/5dimension/sentinel-tiny-text-int4)
        """)
    
    generate_btn.click(generate_text, [prompt, max_tokens, temperature], output)

demo.launch()