Release Echo-Memory Wan 2.1 1.3B memory baseline checkpoints (epoch-0)
Browse files- README.md +42 -0
- block_wise_ssm_two_chunk/epoch-0.safetensors +3 -0
- checkpoints.json +116 -0
- context_k1/epoch-0.safetensors +3 -0
- context_k20/epoch-0.safetensors +3 -0
- spatial_concat_text_two_chunk/epoch-0.safetensors +3 -0
- spatial_cross_attn_readout_t32_g4_two_chunk/epoch-0.safetensors +3 -0
- spatial_inject_none_two_chunk/epoch-0.safetensors +3 -0
- spatial_mem/epoch-0.safetensors +3 -0
- ssm_ablation_ctx1_every4_hint21/epoch-0.safetensors +3 -0
- ssm_ablation_ctx5_every1_hint21/epoch-0.safetensors +3 -0
- ssm_ablation_ctx5_every4_hint81/epoch-0.safetensors +3 -0
- videossm_hybrid/epoch-0.safetensors +3 -0
README.md
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: cc-by-4.0
|
| 3 |
+
language:
|
| 4 |
+
- en
|
| 5 |
+
tags:
|
| 6 |
+
- video-generation
|
| 7 |
+
- world-model
|
| 8 |
+
- memory
|
| 9 |
+
- action-conditioned
|
| 10 |
+
- wan
|
| 11 |
+
library_name: diffsynth
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
# Echo-Memory — Wan 2.1 1.3B memory baseline checkpoints
|
| 15 |
+
|
| 16 |
+
Paper-aligned **epoch-0** fine-tunes for the Echo-Memory controlled memory study ([GitHub](https://github.com/Echo-Team-Joy-Future-Academy-JD/Echo-Memory) · [project page](https://echo-team-joy-future-academy-jd.github.io/Echo-Memory/)).
|
| 17 |
+
|
| 18 |
+
**Backbone:** [Wan-AI/Wan2.1-T2V-1.3B](https://huggingface.co/Wan-AI/Wan2.1-T2V-1.3B)
|
| 19 |
+
**Training:** static in-domain pool, 1 epoch, **30,000 steps**, 640×352, 81-frame chunks
|
| 20 |
+
**File layout:** `{row_id}/epoch-0.safetensors` — see `checkpoints.json` for metadata.
|
| 21 |
+
|
| 22 |
+
## Download
|
| 23 |
+
|
| 24 |
+
```bash
|
| 25 |
+
pip install -U "huggingface_hub[cli]"
|
| 26 |
+
huggingface-cli download Echo-Team/Echo-Memory context_k1/epoch-0.safetensors --local-dir ./ckpts
|
| 27 |
+
```
|
| 28 |
+
|
| 29 |
+
## Usage with Echo-Memory eval
|
| 30 |
+
|
| 31 |
+
```bash
|
| 32 |
+
export WAN_BASE_MODEL=/path/to/Wan2.1-T2V-1.3B
|
| 33 |
+
export DATASET_BASE_PATH=data/Context-as-Memory-Dataset
|
| 34 |
+
export CKPT=./ckpts/context_k1/epoch-0.safetensors
|
| 35 |
+
bash eval/v2/run_static_consistency_loop_and_revisit.sh
|
| 36 |
+
```
|
| 37 |
+
|
| 38 |
+
Runtime memory flags are inferred from the checkpoint path via `env/memory_baseline_runtime.py`.
|
| 39 |
+
|
| 40 |
+
## Citation
|
| 41 |
+
|
| 42 |
+
Echo-Memory: A Controlled Study of Memory in Action World Models — Echo Team @ Joy Future Academy, JD ([ResearchGate DOI](https://doi.org/10.13140/RG.2.2.19906.34248)).
|
block_wise_ssm_two_chunk/epoch-0.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb4fed0db344738a7a27983d2573c0b395975c0d76a1fd506ef4a652eb2adae0
|
| 3 |
+
size 3406100392
|
checkpoints.json
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"repo_id": "Echo-Team/Echo-Memory",
|
| 3 |
+
"backbone": "Wan2.1-T2V-1.3B",
|
| 4 |
+
"checkpoints": [
|
| 5 |
+
{
|
| 6 |
+
"id": "context_k1",
|
| 7 |
+
"family": "Raw context",
|
| 8 |
+
"paper_row": "Context K=1",
|
| 9 |
+
"train_recipe": "train/context_learning/run_pre_qkv_ctx1.sh",
|
| 10 |
+
"hf_path": "context_k1/epoch-0.safetensors",
|
| 11 |
+
"source_subdir": "exp1_4_4_cam_rt_paper_style_from_scratch_merged_cam_ctx_1_noise_15_atomic_cam_inject_pre_qkv_single_rt",
|
| 12 |
+
"training_steps": 30000,
|
| 13 |
+
"backbone": "Wan2.1-T2V-1.3B"
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"id": "context_k20",
|
| 17 |
+
"family": "Raw context",
|
| 18 |
+
"paper_row": "Context K=20",
|
| 19 |
+
"train_recipe": "train/context_learning/run_pre_qkv_ctx20.sh",
|
| 20 |
+
"hf_path": "context_k20/epoch-0.safetensors",
|
| 21 |
+
"source_subdir": "exp1_4_4_cam_rt_paper_style_from_scratch_merged_cam_ctx_20_noise_15_atomic_cam_inject_pre_qkv_per_frame_vae",
|
| 22 |
+
"training_steps": 30000,
|
| 23 |
+
"backbone": "Wan2.1-T2V-1.3B"
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"id": "spatial_mem",
|
| 27 |
+
"family": "Spatial",
|
| 28 |
+
"paper_row": "Spatial Memory",
|
| 29 |
+
"train_recipe": "train/memory_baselines_basic/run_spatial_memory_baseline.sh",
|
| 30 |
+
"hf_path": "spatial_mem/epoch-0.safetensors",
|
| 31 |
+
"source_subdir": "exp1_4_4_cam_rt_paper_style_local/exp1_4_4_cam_rt_paper_style_memory_baselines_basic_spatial_mem",
|
| 32 |
+
"training_steps": 30000,
|
| 33 |
+
"backbone": "Wan2.1-T2V-1.3B"
|
| 34 |
+
},
|
| 35 |
+
{
|
| 36 |
+
"id": "block_wise_ssm_two_chunk",
|
| 37 |
+
"family": "State-space",
|
| 38 |
+
"paper_row": "Block-wise SSM",
|
| 39 |
+
"train_recipe": "train/memory_baselines_basic/run_ablation_block_wise_ssm_two_chunk.sh",
|
| 40 |
+
"hf_path": "block_wise_ssm_two_chunk/epoch-0.safetensors",
|
| 41 |
+
"source_subdir": "exp1_4_4_cam_rt_paper_style_local/exp1_4_4_cam_rt_paper_style_memory_baselines_basic_abl_block_wise_ssm_two_chunk",
|
| 42 |
+
"training_steps": 30000,
|
| 43 |
+
"backbone": "Wan2.1-T2V-1.3B"
|
| 44 |
+
},
|
| 45 |
+
{
|
| 46 |
+
"id": "videossm_hybrid",
|
| 47 |
+
"family": "State-space",
|
| 48 |
+
"paper_row": "Legacy Hybrid (VideoSSM)",
|
| 49 |
+
"train_recipe": "train/memory_baselines_basic/run_videossm_hybrid_baseline.sh",
|
| 50 |
+
"hf_path": "videossm_hybrid/epoch-0.safetensors",
|
| 51 |
+
"source_subdir": "exp1_4_4_cam_rt_paper_style_local/exp1_4_4_cam_rt_paper_style_memory_baselines_basic_videossm_hybrid",
|
| 52 |
+
"training_steps": 30000,
|
| 53 |
+
"backbone": "Wan2.1-T2V-1.3B"
|
| 54 |
+
},
|
| 55 |
+
{
|
| 56 |
+
"id": "spatial_concat_text_two_chunk",
|
| 57 |
+
"family": "Spatial",
|
| 58 |
+
"paper_row": "Spatial \u2014 concat text readout (ablation)",
|
| 59 |
+
"train_recipe": "train/memory_baselines_basic/run_ablation_spatial_concat_text_two_chunk.sh",
|
| 60 |
+
"hf_path": "spatial_concat_text_two_chunk/epoch-0.safetensors",
|
| 61 |
+
"source_subdir": "exp1_4_4_cam_rt_paper_style_local/exp1_4_4_cam_rt_paper_style_memory_baselines_basic_abl_spatial_concat_text_two_chunk",
|
| 62 |
+
"training_steps": 30000,
|
| 63 |
+
"backbone": "Wan2.1-T2V-1.3B"
|
| 64 |
+
},
|
| 65 |
+
{
|
| 66 |
+
"id": "spatial_inject_none_two_chunk",
|
| 67 |
+
"family": "Spatial",
|
| 68 |
+
"paper_row": "Spatial \u2014 inject none (ablation)",
|
| 69 |
+
"train_recipe": "train/memory_baselines_basic/run_ablation_spatial_inject_none_two_chunk.sh",
|
| 70 |
+
"hf_path": "spatial_inject_none_two_chunk/epoch-0.safetensors",
|
| 71 |
+
"source_subdir": "exp1_4_4_cam_rt_paper_style_local/exp1_4_4_cam_rt_paper_style_memory_baselines_basic_abl_spatial_inject_none_two_chunk",
|
| 72 |
+
"training_steps": 30000,
|
| 73 |
+
"backbone": "Wan2.1-T2V-1.3B"
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"id": "spatial_cross_attn_readout_t32_g4_two_chunk",
|
| 77 |
+
"family": "Spatial",
|
| 78 |
+
"paper_row": "Spatial \u2014 cross-attn readout t32 (ablation)",
|
| 79 |
+
"train_recipe": "train/memory_baselines_basic/run_ablation_spatial_cross_attn_readout_two_chunk.sh",
|
| 80 |
+
"hf_path": "spatial_cross_attn_readout_t32_g4_two_chunk/epoch-0.safetensors",
|
| 81 |
+
"source_subdir": "exp1_4_4_cam_rt_paper_style_local/exp1_4_4_cam_rt_paper_style_memory_baselines_basic_abl_spatial_cross_attn_readout_t32_g4_two_chunk",
|
| 82 |
+
"training_steps": 30000,
|
| 83 |
+
"backbone": "Wan2.1-T2V-1.3B"
|
| 84 |
+
},
|
| 85 |
+
{
|
| 86 |
+
"id": "ssm_ablation_ctx1_every4_hint21",
|
| 87 |
+
"family": "State-space",
|
| 88 |
+
"paper_row": "SSM ablation \u2014 ctx=1, every 4 blocks, hint 21",
|
| 89 |
+
"train_recipe": "internal SSM ablation recipe",
|
| 90 |
+
"hf_path": "ssm_ablation_ctx1_every4_hint21/epoch-0.safetensors",
|
| 91 |
+
"source_subdir": "exp1_4_4_cam_rt_paper_style_from_scratch_ssm_ablation_ctx1_every4_hint21",
|
| 92 |
+
"training_steps": 30000,
|
| 93 |
+
"backbone": "Wan2.1-T2V-1.3B"
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"id": "ssm_ablation_ctx5_every1_hint21",
|
| 97 |
+
"family": "State-space",
|
| 98 |
+
"paper_row": "SSM ablation \u2014 ctx=5, every 1 block, hint 21",
|
| 99 |
+
"train_recipe": "internal SSM ablation recipe",
|
| 100 |
+
"hf_path": "ssm_ablation_ctx5_every1_hint21/epoch-0.safetensors",
|
| 101 |
+
"source_subdir": "exp1_4_4_cam_rt_paper_style_from_scratch_ssm_ablation_ctx5_every1_hint21",
|
| 102 |
+
"training_steps": 30000,
|
| 103 |
+
"backbone": "Wan2.1-T2V-1.3B"
|
| 104 |
+
},
|
| 105 |
+
{
|
| 106 |
+
"id": "ssm_ablation_ctx5_every4_hint81",
|
| 107 |
+
"family": "State-space",
|
| 108 |
+
"paper_row": "SSM ablation \u2014 ctx=5, every 4 blocks, hint 81",
|
| 109 |
+
"train_recipe": "internal SSM ablation recipe",
|
| 110 |
+
"hf_path": "ssm_ablation_ctx5_every4_hint81/epoch-0.safetensors",
|
| 111 |
+
"source_subdir": "exp1_4_4_cam_rt_paper_style_from_scratch_ssm_ablation_ctx5_every4_hint81",
|
| 112 |
+
"training_steps": 30000,
|
| 113 |
+
"backbone": "Wan2.1-T2V-1.3B"
|
| 114 |
+
}
|
| 115 |
+
]
|
| 116 |
+
}
|
context_k1/epoch-0.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:adf323dc903c96b8ff6e73e284f55aeec45ec16eaa723c25ad03d29e269edbfa
|
| 3 |
+
size 3406100392
|
context_k20/epoch-0.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:46578700bd62a2a940d6da887ad2e69fd76ff0c642cec538f7821a4e8d3b40fd
|
| 3 |
+
size 3406100392
|
spatial_concat_text_two_chunk/epoch-0.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:239196a6739bba214ca947fc88d9fabc0e75b9d586fc37cb4fe752b97d699630
|
| 3 |
+
size 3406100392
|
spatial_cross_attn_readout_t32_g4_two_chunk/epoch-0.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:00521a91a4d4cb4b799f1bcc754809018d49aa4d41cfab2c75d55b2c68037029
|
| 3 |
+
size 3406100392
|
spatial_inject_none_two_chunk/epoch-0.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bf6e4d30860807fc158fbd4b3298d952dfaa429e90b18f078107e93ef83771ea
|
| 3 |
+
size 3406100392
|
spatial_mem/epoch-0.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:741d3fd26ca344d05743f80cd51f79f8251cb3dc1e0159378d79aff0735f5976
|
| 3 |
+
size 3406100392
|
ssm_ablation_ctx1_every4_hint21/epoch-0.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4b0ba18ed76cb027ccd61e0e6d42596ba339fec76a6fe7c1a51921d81e5aea6
|
| 3 |
+
size 3406100392
|
ssm_ablation_ctx5_every1_hint21/epoch-0.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a7f55f099358ca436e9f337dba53f76e31fa22afb27cdacdbaf3b290ff9036c
|
| 3 |
+
size 3406100392
|
ssm_ablation_ctx5_every4_hint81/epoch-0.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:211e428409e167201d33d598945d0b2660607f2dec98b0bf218fb8f0c5481740
|
| 3 |
+
size 3406100392
|
videossm_hybrid/epoch-0.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:748fa086896603a866ee93a7e9612262193e96f2e150ff5fabf42dc1bd141939
|
| 3 |
+
size 3406100392
|