Spaces:

itriedcoding
/

sage-1b-space

Sleeping

App Files Files Community

sage-1b-space / app.py

itriedcoding

Upload app.py with huggingface_hub

dc28463 verified 3 days ago

raw

history blame contribute delete

5.95 kB

	# Sage 1B Space - rebuilt
	import gradio as gr
	import torch
	import torch.nn as nn
	import math
	import json
	from huggingface_hub import hf_hub_download

	REPO_ID = "itriedcoding/Sage-1B"

	class RotaryEmbedding(nn.Module):
	def __init__(self, dim, max_seq_len=128):
	super().__init__()
	inv_freq = 1.0 / (10000 ** (torch.arange(0, dim, 2).float() / dim))
	self.register_buffer("inv_freq", inv_freq)
	self.max_seq_len = max_seq_len
	self._cos = None
	self._sin = None
	def get_cos_sin(self, x, seq_len=None):
	seq_len = seq_len or x.size(1)
	if self._cos is None or self._cos.size(-2) < seq_len:
	t = torch.arange(self.max_seq_len, device=x.device).type_as(self.inv_freq)
	freqs = torch.einsum("i,j->ij", t, self.inv_freq)
	emb = torch.cat((freqs, freqs), dim=-1)[None, None]
	self._cos = emb.cos()
	self._sin = emb.sin()
	return self._cos[..., :seq_len, :], self._sin[..., :seq_len, :]

	def rotate_half(x):
	x1, x2 = x.chunk(2, dim=-1)
	return torch.cat((-x2, x1), dim=-1)

	def apply_rotary(x, c, s):
	return (x * c) + (rotate_half(x) * s)

	class Attention(nn.Module):
	def __init__(self, h, nh, hd):
	super().__init__()
	self.h = h; self.nh = nh; self.hd = hd
	self.q = nn.Linear(h, h, bias=False)
	self.k = nn.Linear(h, h, bias=False)
	self.v = nn.Linear(h, h, bias=False)
	self.o = nn.Linear(h, h, bias=False)
	def forward(self, x, cos, sin, mask):
	B, T, _ = x.shape
	q = self.q(x).reshape(B, T, self.nh, self.hd).transpose(1, 2)
	k = self.k(x).reshape(B, T, self.nh, self.hd).transpose(1, 2)
	v = self.v(x).reshape(B, T, self.nh, self.hd).transpose(1, 2)
	q, k = apply_rotary(q, cos, sin), apply_rotary(k, cos, sin)
	a = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.hd)
	a = a + mask[:, :, :T, :T]
	a = torch.nn.functional.softmax(a, dim=-1)
	return self.o(a.matmul(v).transpose(1, 2).reshape(B, T, self.h))

	class FF(nn.Module):
	def __init__(self, h, i):
	super().__init__()
	self.g = nn.Linear(h, i, bias=False)
	self.u = nn.Linear(h, i, bias=False)
	self.d = nn.Linear(i, h, bias=False)
	def forward(self, x):
	return self.d(torch.nn.functional.silu(self.g(x)) * self.u(x))

	class Block(nn.Module):
	def __init__(self, h, nh, hd, i):
	super().__init__()
	self.an = nn.RMSNorm(h, eps=1e-6)
	self.fn = nn.RMSNorm(h, eps=1e-6)
	self.attn = Attention(h, nh, hd)
	self.ff = FF(h, i)
	def forward(self, x, c, s, m):
	x = x + self.attn(self.an(x), c, s, m)
	x = x + self.ff(self.fn(x))
	return x

	class Sage1B(nn.Module):
	def __init__(self, cfg):
	super().__init__()
	self.embed = nn.Embedding(cfg["vocab_size"], cfg["hidden_size"])
	self.layers = nn.ModuleList([
	Block(cfg["hidden_size"], cfg["num_attention_heads"],
	cfg["head_dim"], cfg["intermediate_size"])
	for _ in range(cfg["num_hidden_layers"])
	])
	self.norm = nn.RMSNorm(cfg["hidden_size"], eps=1e-6)
	self.head = nn.Linear(cfg["hidden_size"], cfg["vocab_size"], bias=False)
	self.rotary = RotaryEmbedding(cfg["head_dim"])
	self.max_seq_len = cfg["max_position_embeddings"]
	self.vocab_size = cfg["vocab_size"]
	self.hidden_size = cfg["hidden_size"]

	def forward(self, inp):
	B, T = inp.shape
	x = self.embed(inp) * math.sqrt(self.hidden_size)
	cos, sin = self.rotary.get_cos_sin(x, T)
	mask = torch.triu(torch.full((T, T), float("-inf"), device=x.device), diagonal=1)[None, None]
	for l in self.layers:
	x = l(x, cos, sin, mask)
	x = self.norm(x)
	return self.head(x)

	@torch.no_grad()
	def generate(self, inp, max_new=50, temp=0.8, top_k=40):
	self.eval()
	for _ in range(max_new):
	if inp.size(1) > self.max_seq_len:
	inp = inp[:, -self.max_seq_len:]
	logits = self.forward(inp)[:, -1, :] / temp
	if top_k > 0:
	vals = torch.topk(logits, top_k).values[:, -1:]
	logits[logits < vals] = float("-inf")
	probs = torch.nn.functional.softmax(logits, dim=-1)
	nxt = torch.multinomial(probs, 1)
	inp = torch.cat([inp, nxt], dim=1)
	if nxt.item() == 3:
	break
	return inp

	from tokenizers import Tokenizer as Tk
	print("Loading Sage 1B...")

	cfg_p = hf_hub_download(REPO_ID, "config.json")
	with open(cfg_p) as f:
	cfg = json.load(f)

	tok = Tk.from_file(hf_hub_download(REPO_ID, "tokenizer.json"))

	model = Sage1B(cfg)
	sd = torch.load(hf_hub_download(REPO_ID, "pytorch_model_state.bin"),
	map_location="cpu", weights_only=True)
	model.load_state_dict({k: v for k, v in sd.items() if "rotary" not in k}, strict=False)
	model.eval()
	print(f"Sage 1B loaded - {sum(p.numel() for p in model.parameters()):,} params")

	def generate_text(prompt, max_length, temperature):
	tokens = tok.encode(prompt).ids[:50]
	inp = torch.tensor([[2] + tokens], dtype=torch.long)
	out = model.generate(inp, max_new=int(max_length), temp=temperature, top_k=40)
	return tok.decode(out[0].tolist(), skip_special_tokens=True)

	demo = gr.Interface(
	fn=generate_text,
	inputs=[
	gr.Textbox(label="Prompt", value="Once upon a time"),
	gr.Slider(10, 100, 30, step=1, label="Max Length"),
	gr.Slider(0.1, 2.0, 0.8, step=0.1, label="Temperature"),
	],
	outputs=gr.Textbox(label="Generated Text"),
	title="Sage 1B",
	description="Custom 1.286B parameter language model from scratch.",
	examples=[["Once upon a time", 30, 0.8],
	["The story begins", 30, 0.8],
	["In a world", 30, 0.8]],
	)

	if __name__ == "__main__":
	demo.launch()