Spaces:

5dimension
/

sentinel-tiny-text-space

Sleeping

App Files Files Community

sentinel-tiny-text-space / app.py

5dimension

Upload app.py with huggingface_hub

5efd475 verified 17 days ago

raw

history blame contribute delete

6.72 kB

	"""
	Sentinel Tiny Text Space — Interactive text generation with Sentinel transformer
	"""
	import gradio as gr
	import torch
	import torch.nn as nn
	import numpy as np
	from transformers import AutoTokenizer
	import json

	# ─── Sentinel Components ─────────────────────────────────────────────────────
	class SentinelAct(nn.Module):
	def __init__(self):
	super().__init__()
	self.inv_e = 1.0 / np.e
	def forward(self, x):
	return x * (1.0 / torch.cosh(self.inv_e * x))

	class SentinelAttn(nn.Module):
	def __init__(self, d, h=4):
	super().__init__()
	self.d, self.h, self.hd = d, h, d // h
	self.Wq = nn.Linear(d, d, bias=False)
	self.Wk = nn.Linear(d, d, bias=False)
	self.Wv = nn.Linear(d, d, bias=False)
	self.Wo = nn.Linear(d, d, bias=False)
	def forward(self, x, mask):
	B, S, _ = x.shape
	Q = self.Wq(x).view(B, S, self.h, self.hd).transpose(1, 2)
	K = self.Wk(x).view(B, S, self.h, self.hd).transpose(1, 2)
	V = self.Wv(x).view(B, S, self.h, self.hd).transpose(1, 2)
	scores = torch.matmul(Q, K.transpose(-2, -1)) / np.sqrt(self.hd)
	scores = scores.masked_fill(mask == 0, float('-inf'))
	attn = torch.where(scores == float('-inf'), torch.zeros_like(scores), 1.0 / torch.cosh(scores))
	attn = attn / (attn.sum(dim=-1, keepdim=True) + 1e-8)
	out = torch.matmul(attn, V)
	out = out.transpose(1, 2).contiguous().view(B, S, self.d)
	return self.Wo(out)

	class TinyTrans(nn.Module):
	def __init__(self, v=50257, d=128, h=4, l=4, ff=256, s=128):
	super().__init__()
	self.tok = nn.Embedding(v, d)
	self.pos = nn.Embedding(s, d)
	self.layers = nn.ModuleList([nn.ModuleDict({
	'attn': SentinelAttn(d, h),
	'ffn': nn.Sequential(nn.Linear(d, ff), SentinelAct(), nn.Linear(ff, d)),
	'n1': nn.LayerNorm(d), 'n2': nn.LayerNorm(d),
	}) for _ in range(l)])
	self.norm = nn.LayerNorm(d)
	self.head = nn.Linear(d, v, bias=False)
	self.seq = s
	def forward(self, ids):
	B, S = ids.shape
	pos = torch.arange(S, device=ids.device).unsqueeze(0).expand(B, -1)
	x = self.tok(ids) + self.pos(pos)
	mask = torch.tril(torch.ones(S, S, device=ids.device)).view(1, 1, S, S)
	for L in self.layers:
	x = x + L['attn'](L['n1'](x), mask)
	x = x + L['ffn'](L['n2'](x))
	x = self.norm(x)
	return self.head(x)

	# ─── Load Model ──────────────────────────────────────────────────────────────
	MODEL_URL = "https://huggingface.co/5dimension/sentinel-tiny-text/resolve/main/model.pt"
	TOKENIZER = "gpt2"

	tokenizer = AutoTokenizer.from_pretrained(TOKENIZER)
	tokenizer.pad_token = tokenizer.eos_token

	model = TinyTrans(v=tokenizer.vocab_size, d=128, h=4, l=4, ff=256, s=128)

	# Try to load weights from local or download
	try:
	import os
	from huggingface_hub import hf_hub_download
	model_path = hf_hub_download(repo_id="5dimension/sentinel-tiny-text", filename="model.pt")
	state_dict = torch.load(model_path, map_location="cpu", weights_only=True)
	model.load_state_dict(state_dict)
	model_status = "✅ Model loaded from HF Hub"
	except Exception as e:
	model_status = f"⚠️ Using random weights: {str(e)[:100]}"

	model = model.cpu().eval()

	# ─── Generation Function ────────────────────────────────────────────────────
	def generate_text(prompt, max_tokens=50, temperature=0.8, top_p=0.9):
	with torch.no_grad():
	ids = torch.tensor([tokenizer.encode(prompt)], device="cpu")
	for _ in range(max_tokens):
	logits = model(ids)[:, -1, :] / temperature
	probs = torch.softmax(logits, dim=-1)
	# Top-p sampling
	sorted_probs, sorted_indices = torch.sort(probs, descending=True)
	cumsum = torch.cumsum(sorted_probs, dim=-1)
	mask = cumsum <= top_p
	mask[0, 0] = True # At least one token
	filtered_probs = sorted_probs * mask.float()
	filtered_probs = filtered_probs / filtered_probs.sum()
	idx = torch.multinomial(filtered_probs, 1)
	tok = sorted_indices[0, idx].unsqueeze(1)
	ids = torch.cat([ids, tok], dim=1)
	if ids.size(1) >= 128:
	break
	return tokenizer.decode(ids[0], skip_special_tokens=True)

	# ─── UI ────────────────────────────────────────────────────────────────────────
	with gr.Blocks(title="🦴 Sentinel Tiny Text", css="""
	.gradio-container { max-width: 800px; margin: 0 auto; }
	.title { text-align: center; font-size: 2em; font-weight: bold; color: #6b4c9a; }
	.subtitle { text-align: center; color: #888; margin-bottom: 1em; }
	""") as demo:
	gr.Markdown("""
	<div class="title">🦴 Sentinel Tiny Text</div>
	<div class="subtitle">13.4M parameter transformer with Sentinel activation σ(x) = x·sech(x/e)</div>
	""")

	gr.Markdown(f"Status: {model_status}")

	with gr.Row():
	with gr.Column(scale=2):
	prompt = gr.Textbox(
	label="Prompt",
	placeholder="Once upon a time, a little cat...",
	value="Once upon a time",
	lines=2
	)
	with gr.Column(scale=1):
	max_tokens = gr.Slider(10, 100, value=50, step=5, label="Max Tokens")
	temperature = gr.Slider(0.3, 1.5, value=0.8, step=0.1, label="Temperature")

	generate_btn = gr.Button("🚀 Generate", variant="primary")
	output = gr.Textbox(label="Generated Text", lines=8, interactive=False)

	with gr.Row():
	gr.Markdown("""
	### About
	- Activation: Sentinel sech: σ(x) = x·sech(x/e)
	- Attention: Sentinel sech (no softmax)
	- Architecture: 4 layers, 128 hidden, 4 heads
	- Dataset: TinyStories (1K samples demo)
	- Parameters: 13.4M \| Quantized INT8: [13 MB](https://huggingface.co/5dimension/sentinel-tiny-text-int8) \| INT4: [6.4 MB](https://huggingface.co/5dimension/sentinel-tiny-text-int4)
	""")

	generate_btn.click(generate_text, [prompt, max_tokens, temperature], output)

	demo.launch()