{ "metadata": { "format": "torch", "version": "1.0", "model": "micro-distill-grpo-vae", "hidden_size": 512, "num_layers": 8, "num_heads": 8, "vocab_size": 50257, "training_steps": 100 }, "tensors": { "transformer.wte.weight": { "shape": [ 50257, 512 ], "dtype": "float32", "size": "98.2 MB" }, "transformer.wpe.weight": { "shape": [ 1024, 512 ], "dtype": "float32", "size": "2.0 MB" }, "transformer.h.0.ln_1.weight": { "shape": [ 512 ], "dtype": "float32", "size": "2.0 KB" }, "transformer.h.0.attn.c_attn.weight": { "shape": [ 512, 1536 ], "dtype": "float32", "size": "3.0 MB" }, "transformer.h.0.mlp.c_fc.weight": { "shape": [ 512, 2048 ], "dtype": "float32", "size": "4.0 MB" }, "transformer.h.1.ln_1.weight": { "shape": [ 512 ], "dtype": "float32", "size": "2.0 KB" }, "transformer.h.1.attn.c_attn.weight": { "shape": [ 512, 1536 ], "dtype": "float32", "size": "3.0 MB" }, "transformer.h.1.mlp.c_fc.weight": { "shape": [ 512, 2048 ], "dtype": "float32", "size": "4.0 MB" }, "transformer.h.2.ln_1.weight": { "shape": [ 512 ], "dtype": "float32", "size": "2.0 KB" }, "transformer.h.2.attn.c_attn.weight": { "shape": [ 512, 1536 ], "dtype": "float32", "size": "3.0 MB" }, "transformer.h.2.mlp.c_fc.weight": { "shape": [ 512, 2048 ], "dtype": "float32", "size": "4.0 MB" }, "transformer.h.3.ln_1.weight": { "shape": [ 512 ], "dtype": "float32", "size": "2.0 KB" }, "transformer.h.3.attn.c_attn.weight": { "shape": [ 512, 1536 ], "dtype": "float32", "size": "3.0 MB" }, "transformer.h.3.mlp.c_fc.weight": { "shape": [ 512, 2048 ], "dtype": "float32", "size": "4.0 MB" }, "transformer.h.4.ln_1.weight": { "shape": [ 512 ], "dtype": "float32", "size": "2.0 KB" }, "transformer.h.4.attn.c_attn.weight": { "shape": [ 512, 1536 ], "dtype": "float32", "size": "3.0 MB" }, "transformer.h.4.mlp.c_fc.weight": { "shape": [ 512, 2048 ], "dtype": "float32", "size": "4.0 MB" }, "transformer.h.5.ln_1.weight": { "shape": [ 512 ], "dtype": "float32", "size": "2.0 KB" }, "transformer.h.5.attn.c_attn.weight": { "shape": [ 512, 1536 ], "dtype": "float32", "size": "3.0 MB" }, "transformer.h.5.mlp.c_fc.weight": { "shape": [ 512, 2048 ], "dtype": "float32", "size": "4.0 MB" }, "transformer.h.6.ln_1.weight": { "shape": [ 512 ], "dtype": "float32", "size": "2.0 KB" }, "transformer.h.6.attn.c_attn.weight": { "shape": [ 512, 1536 ], "dtype": "float32", "size": "3.0 MB" }, "transformer.h.6.mlp.c_fc.weight": { "shape": [ 512, 2048 ], "dtype": "float32", "size": "4.0 MB" }, "transformer.h.7.ln_1.weight": { "shape": [ 512 ], "dtype": "float32", "size": "2.0 KB" }, "transformer.h.7.attn.c_attn.weight": { "shape": [ 512, 1536 ], "dtype": "float32", "size": "3.0 MB" }, "transformer.h.7.mlp.c_fc.weight": { "shape": [ 512, 2048 ], "dtype": "float32", "size": "4.0 MB" }, "lm_head.weight": { "shape": [ 50257, 512 ], "dtype": "float32", "size": "98.2 MB" } } }