Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

README.md +66 -0
__pycache__/modeling_tinylm.cpython-312.pyc +0 -0
config.json +11 -0
modeling_tinylm.py +117 -0
pytorch_model.bin +3 -0
tokenizer.json +0 -0
tokenizer_config.json +12 -0

README.md ADDED Viewed

	@@ -0,0 +1,66 @@

+---
+language: en
+license: mit
+tags:
+  - tiny
+  - language-model
+  - causal-lm
+  - from-scratch
+  - pytorch
+---
+# TinyLM
+A ~1M parameter causal language model trained from scratch, for fun and experimentation.
+## Architecture
+| Hyperparameter | Value |
+|---|---|
+| Parameters | ~1M |
+| Layers | 4 |
+| Hidden size | 64 |
+| Attention heads | 4 |
+| FFN dim | 192 |
+| Embedding rank | 32 |
+| Context length | 256 |
+| Tokenizer | GPT-2 (50,257 vocab) |
+Uses a **factored (low-rank) embedding** to keep the vocab projection from eating the entire parameter budget, with weight tying on the output head.
+## Training
+| | |
+|---|---|
+| Datasets | Skylion007/openwebtext (10k samples), roneneldan/TinyStories (10k samples) |
+| Optimizer | AdamW (lr=3e-3, weight_decay=0.01) |
+| Scheduler | Cosine annealing with warm restarts |
+| Mixed precision | fp16 (torch.cuda.amp) |
+| Hardware | Nvidia P100 (Kaggle) |
+## Usage
+```python
+from huggingface_hub import snapshot_download
+import importlib.util
+import torch
+# Download all files
+snapshot_download(repo_id="Fu01978/TinyLM", local_dir="./tinylm")
+# Load via included script
+spec   = importlib.util.spec_from_file_location("modeling_tinylm", "./tinylm/modeling_tinylm.py")
+module = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(module)
+model, tokenizer, config = module.load_tinylm("./tinylm")
+model.eval()
+# Generate
+output = module.generate(model, tokenizer, "Once upon a time")
+print(output)
+```
+## Example Outputs
+**Prompt:** Once upon a time
+**Output:** Once upon a time there was a little girl named Mrs. She decided to go and be a little girl in the park. One day she had to go on a bed. From then on a lot of bread. She said, "What are you doing?" ...

__pycache__/modeling_tinylm.cpython-312.pyc ADDED Viewed

Binary file (8.23 kB). View file

config.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "model_type": "TinyLM",
+  "vocab_size": 50257,
+  "embed_rank": 32,
+  "d_model": 64,
+  "n_heads": 4,
+  "ffn_dim": 192,
+  "n_layers": 4,
+  "max_seq_len": 256,
+  "dropout": 0.0
+}

modeling_tinylm.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import json
+import torch
+import torch.nn as nn
+from transformers import GPT2Tokenizer
+def load_tinylm(model_dir, device="cpu"):
+    # Load config
+    with open(f"{model_dir}/config.json") as f:
+        config = json.load(f)
+    VOCAB_SIZE  = config["vocab_size"]
+    EMBED_RANK  = config["embed_rank"]
+    D_MODEL     = config["d_model"]
+    N_HEADS     = config["n_heads"]
+    FFN_DIM     = config["ffn_dim"]
+    N_LAYERS    = config["n_layers"]
+    MAX_SEQ_LEN = config["max_seq_len"]
+    DROPOUT     = config["dropout"]
+    class FactoredEmbedding(nn.Module):
+        def __init__(self, vocab_size, rank, d_model):
+            super().__init__()
+            self.in_proj  = nn.Embedding(vocab_size, rank)
+            self.out_proj = nn.Linear(rank, d_model, bias=False)
+        def forward(self, x):
+            return self.out_proj(self.in_proj(x))
+    class TransformerBlock(nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.ln1  = nn.LayerNorm(D_MODEL)
+            self.attn = nn.MultiheadAttention(D_MODEL, N_HEADS, dropout=DROPOUT, batch_first=True)
+            self.ln2  = nn.LayerNorm(D_MODEL)
+            self.ffn  = nn.Sequential(
+                nn.Linear(D_MODEL, FFN_DIM),
+                nn.GELU(),
+                nn.Linear(FFN_DIM, D_MODEL),
+                nn.Dropout(DROPOUT),
+            )
+        def forward(self, x, attn_mask=None, key_padding_mask=None):
+            x_norm = self.ln1(x)
+            attn_out, _ = self.attn(x_norm, x_norm, x_norm,
+                                    attn_mask=attn_mask,
+                                    key_padding_mask=key_padding_mask,
+                                    is_causal=True)
+            x = x + attn_out
+            x = x + self.ffn(self.ln2(x))
+            return x
+    class TinyLM(nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.tok_emb  = FactoredEmbedding(VOCAB_SIZE, EMBED_RANK, D_MODEL)
+            self.pos_emb  = nn.Embedding(MAX_SEQ_LEN, D_MODEL)
+            self.drop     = nn.Dropout(DROPOUT)
+            self.blocks   = nn.ModuleList([TransformerBlock() for _ in range(N_LAYERS)])
+            self.ln_final = nn.LayerNorm(D_MODEL)
+            self.head_down  = nn.Linear(D_MODEL, EMBED_RANK, bias=False)
+            self.head_vocab = nn.Linear(EMBED_RANK, VOCAB_SIZE, bias=False)
+            self.head_vocab.weight = nn.Parameter(self.tok_emb.in_proj.weight)
+        def forward(self, idx):
+            B, T = idx.shape
+            if T > MAX_SEQ_LEN:
+                idx = idx[:, :MAX_SEQ_LEN]
+            T = idx.shape[1]
+            positions = torch.arange(T, device=idx.device).unsqueeze(0)
+            x = self.drop(self.tok_emb(idx) + self.pos_emb(positions))
+            mask = nn.Transformer.generate_square_subsequent_mask(T, device=idx.device)
+            for block in self.blocks:
+                x = block(x, attn_mask=mask)
+            x = self.ln_final(x)
+            x = self.head_down(x)
+            return self.head_vocab(x)
+    # Build and load weights
+    model = TinyLM().to(device)
+    state_dict = torch.load(f"{model_dir}/pytorch_model.bin", map_location=device)
+    model.load_state_dict(state_dict)
+    model.eval()
+    # Load tokenizer
+    tokenizer = GPT2Tokenizer.from_pretrained(model_dir)
+    tokenizer.pad_token = tokenizer.eos_token
+    return model, tokenizer, config
+def generate(model, tokenizer, prompt, max_new_tokens=100, temperature=0.8, top_k=40, device="cpu"):
+    MAX_SEQ_LEN = model.pos_emb.num_embeddings
+    model.eval()
+    ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
+    with torch.no_grad():
+        for _ in range(max_new_tokens):
+            idx_cond = ids[:, -MAX_SEQ_LEN:]
+            logits   = model(idx_cond)
+            logits   = logits[:, -1, :] / temperature
+            if top_k is not None:
+                values, _ = torch.topk(logits, top_k)
+                logits[logits < values[:, -1:]] = -float("inf")
+            probs   = torch.softmax(logits, dim=-1)
+            next_id = torch.multinomial(probs, num_samples=1)
+            if next_id.item() == tokenizer.eos_token_id:
+                break
+            ids = torch.cat([ids, next_id], dim=1)
+    return tokenizer.decode(ids[0], skip_special_tokens=True)
+if __name__ == "__main__":
+    model, tokenizer, config = load_tinylm("./tinylm")
+    print("Model loaded!")
+    print(generate(model, tokenizer, "Once upon a time"))

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c11755f9b669393914fd960a75a855cd4bb5fa80c39853878b6de735fc975794
+size 13635564

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "is_local": false,
+  "model_max_length": 1024,
+  "pad_token": "<|endoftext|>",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}