dpe1's picture
download
raw
1.5 kB
import torch
import torch.nn as nn
class TinyReasonerModel(nn.Module):
def __init__(self, vocab_size, embed_dim=128, hidden_dim=256, num_layers=2):
super().__init__()
self.embedding = nn.Embedding(vocab_size, embed_dim)
self.lstm = nn.LSTM(embed_dim, hidden_dim, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_dim, vocab_size)
def forward(self, x, hidden=None):
# x: (batch, seq_len)
embeds = self.embedding(x) # (batch, seq_len, embed_dim)
lstm_out, hidden = self.lstm(embeds, hidden) # (batch, seq_len, hidden_dim)
logits = self.fc(lstm_out) # (batch, seq_len, vocab_size)
return logits, hidden
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
if __name__ == "__main__":
from src.tokenizer import CharTokenizer
tokenizer = CharTokenizer()
model = TinyReasonerModel(tokenizer.vocab_size)
params = count_parameters(model)
print(f"Vocab size: {tokenizer.vocab_size}")
print(f"Total parameters: {params}")
if params < 1000000:
print("Model is under 1 million parameters.")
else:
print("Model is TOO BIG!")
# Test forward pass
test_input = torch.tensor([tokenizer.encode("test")]).long()
logits, _ = model(test_input)
print(f"Input shape: {test_input.shape}")
print(f"Logits shape: {logits.shape}")
assert logits.shape == (1, 4, tokenizer.vocab_size)
print("Model test passed!")

Xet Storage Details

Size:
1.5 kB
·
Xet hash:
7862f6a26c5acca718914a288d875650fe3ff3b5d0f0c2129e207656bfa88fbc

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.