Update README.md
Browse files
README.md
CHANGED
|
@@ -41,7 +41,7 @@ tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
|
| 41 |
print(generate(model, tokenizer, "Once upon a time"))
|
| 42 |
|
| 43 |
Performance
|
| 44 |
-
FineWeb, 187M params: 3.73 val loss / 41.6 PPL (75k steps)
|
| 45 |
Architecture: 21 layers, 768d, 12 heads, 16 slots
|
| 46 |
Links
|
| 47 |
Code: https://github.com/DigitalDaimyo/AddressedStateAttention
|
|
|
|
| 41 |
print(generate(model, tokenizer, "Once upon a time"))
|
| 42 |
|
| 43 |
Performance
|
| 44 |
+
FineWeb, 187M params: 3.73 val loss / 41.6 PPL (75k steps•32 batch•1024 seq)
|
| 45 |
Architecture: 21 layers, 768d, 12 heads, 16 slots
|
| 46 |
Links
|
| 47 |
Code: https://github.com/DigitalDaimyo/AddressedStateAttention
|