Justin Brown
Add checkpoint + card (ASA_ASM_wt103-rawv1_gpt2_T1024_L21_D384_H8_K16_M32_ropek1_alibi1_gamma1_step75000_best.pt)
6084694
verified
| { | |
| "repo_id": "DigitalShogun/ASA-ASM-wikitext103-raw", | |
| "exported_at_utc": "2026-01-25T14:38:05.116518+00:00", | |
| "checkpoint_filename": "ASA_ASM_wt103-rawv1_gpt2_T1024_L21_D384_H8_K16_M32_ropek1_alibi1_gamma1_step75000_best.pt", | |
| "checkpoint_sha256": "3ebad7be0e98fa7c50aa7cb6c5b709f0b4cf218b2a021a6f8aab21e5ecd94165", | |
| "checkpoint_bytes": 680885861, | |
| "github_repo": "https://github.com/digitaldaimyo/ASA", | |
| "github_revision": "codex/implement-runnable-and-releasable-setup", | |
| "train_config_in_ckpt": { | |
| "dataset_name": "wikitext", | |
| "dataset_config": "wikitext-103-raw-v1", | |
| "tokenizer_name": "gpt2", | |
| "max_seq_len": 1024, | |
| "stride_frac_val": 0.5, | |
| "seed": 1337, | |
| "train_samples_target": 100000000, | |
| "val_samples_target": 25000, | |
| "batch_size": 32, | |
| "learning_rate": 0.0003, | |
| "weight_decay": 0.01, | |
| "betas": [ | |
| 0.9, | |
| 0.95 | |
| ], | |
| "grad_clip": 1.0, | |
| "warmup_steps": 1000, | |
| "total_steps": 75000, | |
| "eval_interval": 1000, | |
| "log_interval": 100, | |
| "vocab_size": 50257, | |
| "embed_dim": 384, | |
| "num_layers": 21, | |
| "num_heads": 8, | |
| "num_slots": 16, | |
| "mlp_ratio": 4.0, | |
| "dropout": 0.1, | |
| "tie_weights": true, | |
| "read_temperature": 1.0, | |
| "write_temperature": 1.0, | |
| "slot_dropout": 0.05, | |
| "state_fp32": true, | |
| "normalize_k": false, | |
| "use_abs_pos": false, | |
| "use_rope_keys": true, | |
| "rope_base": 10000.0, | |
| "use_alibi_write": true, | |
| "alibi_strength_init": 0.1, | |
| "learn_alibi_strength": true, | |
| "min_strength": 0.0, | |
| "use_content_read": true, | |
| "content_read_init": -4.0, | |
| "content_read_max_gamma": 3.0, | |
| "use_slotspace_refine": true, | |
| "slotspace_dim": 32, | |
| "slotspace_gate_init": -4.0, | |
| "slotspace_dropout": 0.05, | |
| "slotspace_signed_weights": true, | |
| "use_rope_slotspace": true, | |
| "rope_base_slotspace": 100000.0, | |
| "write_chunk_size": 128, | |
| "slotspace_chunk_size": 128, | |
| "eval_max_batches": 150, | |
| "analytics_last_k": 32, | |
| "output_dir": "./drive/MyDrive/asm_outputs", | |
| "tag": "asm_wikitext_1024t_384d_32sd_16s_35l", | |
| "cache_dir": "./drive/MyDrive/asm_caches", | |
| "val_windows_cache": "./drive/MyDrive/asm_nlp/val_cache_wikitext_windows_1024.pkl" | |
| }, | |
| "ckpt_step_fields": { | |
| "global_step": null, | |
| "step": 75000, | |
| "train_step": null, | |
| "epoch": null | |
| } | |
| } |