diff --git a/.gitattributes b/.gitattributes index a676daa012036b28b9270480846155e6baf78e2e..34049f7a249a6e9be6eaeab69116449e935bb78d 100644 --- a/.gitattributes +++ b/.gitattributes @@ -54,3 +54,5 @@ ms-swift-data/video_sft_full_v800k_sharegpt.json filter=lfs diff=lfs merge=lfs - ms-swift-data/video_sft_small_10pct.json filter=lfs diff=lfs merge=lfs -text ms-swift-data/video_sft_small_10pct_sharegpt.json filter=lfs diff=lfs merge=lfs -text llava_video/llava_video_178k_v9_decord_good_manifest.json filter=lfs diff=lfs merge=lfs -text +legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/tokenizer.json filter=lfs diff=lfs merge=lfs -text +legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/declip_siglip2/spatial_align/latest b/legacy/declip_siglip2/spatial_align/latest similarity index 100% rename from declip_siglip2/spatial_align/latest rename to legacy/declip_siglip2/spatial_align/latest diff --git a/declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/legacy/declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt similarity index 100% rename from declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt rename to legacy/declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt diff --git a/declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/legacy/declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt similarity index 100% rename from declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt rename to legacy/declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt diff --git a/declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/legacy/declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt similarity index 100% rename from declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt rename to legacy/declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt diff --git a/declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/legacy/declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt similarity index 100% rename from declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt rename to legacy/declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt diff --git a/declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/legacy/declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt similarity index 100% rename from declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt rename to legacy/declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt diff --git a/declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/legacy/declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt similarity index 100% rename from declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt rename to legacy/declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt diff --git a/declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/legacy/declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt similarity index 100% rename from declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt rename to legacy/declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt diff --git a/declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/legacy/declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt similarity index 100% rename from declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt rename to legacy/declip_siglip2/spatial_align/step011088/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt diff --git a/declip_siglip2/spatial_align/step011088/mp_rank_00_model_states.pt b/legacy/declip_siglip2/spatial_align/step011088/mp_rank_00_model_states.pt similarity index 100% rename from declip_siglip2/spatial_align/step011088/mp_rank_00_model_states.pt rename to legacy/declip_siglip2/spatial_align/step011088/mp_rank_00_model_states.pt diff --git a/declip_siglip2/spatial_align/zero_to_fp32.py b/legacy/declip_siglip2/spatial_align/zero_to_fp32.py similarity index 100% rename from declip_siglip2/spatial_align/zero_to_fp32.py rename to legacy/declip_siglip2/spatial_align/zero_to_fp32.py diff --git a/kd_mllm/s1_kd_pretrain/args.json b/legacy/kd_mllm/s1_kd_pretrain/args.json similarity index 100% rename from kd_mllm/s1_kd_pretrain/args.json rename to legacy/kd_mllm/s1_kd_pretrain/args.json diff --git a/kd_mllm/s1_kd_pretrain/checkpoint-2181/args.json b/legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/args.json similarity index 100% rename from kd_mllm/s1_kd_pretrain/checkpoint-2181/args.json rename to legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/args.json diff --git a/kd_mllm/s1_kd_pretrain/checkpoint-2181/chat_template.jinja b/legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/chat_template.jinja similarity index 100% rename from kd_mllm/s1_kd_pretrain/checkpoint-2181/chat_template.jinja rename to legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/chat_template.jinja diff --git a/kd_mllm/s1_kd_pretrain/checkpoint-2181/config.json b/legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/config.json similarity index 100% rename from kd_mllm/s1_kd_pretrain/checkpoint-2181/config.json rename to legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/config.json diff --git a/kd_mllm/s1_kd_pretrain/checkpoint-2181/generation_config.json b/legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/generation_config.json similarity index 100% rename from kd_mllm/s1_kd_pretrain/checkpoint-2181/generation_config.json rename to legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/generation_config.json diff --git a/kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt similarity index 100% rename from kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt rename to legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt diff --git a/kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt similarity index 100% rename from kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt rename to legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt diff --git a/kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt similarity index 100% rename from kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt rename to legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt diff --git a/kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt similarity index 100% rename from kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt rename to legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt diff --git a/kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt similarity index 100% rename from kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt rename to legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt diff --git a/kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt similarity index 100% rename from kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt rename to legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt diff --git a/kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt similarity index 100% rename from kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt rename to legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt diff --git a/kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt similarity index 100% rename from kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt rename to legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt diff --git a/kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/mp_rank_00_model_states.pt b/legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/mp_rank_00_model_states.pt similarity index 100% rename from kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/mp_rank_00_model_states.pt rename to legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/global_step2181/mp_rank_00_model_states.pt diff --git a/kd_mllm/s1_kd_pretrain/checkpoint-2181/latest b/legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/latest similarity index 100% rename from kd_mllm/s1_kd_pretrain/checkpoint-2181/latest rename to legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/latest diff --git a/kd_mllm/s1_kd_pretrain/checkpoint-2181/model.safetensors b/legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/model.safetensors similarity index 100% rename from kd_mllm/s1_kd_pretrain/checkpoint-2181/model.safetensors rename to legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/model.safetensors diff --git a/kd_mllm/s1_kd_pretrain/checkpoint-2181/processor_config.json b/legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/processor_config.json similarity index 100% rename from kd_mllm/s1_kd_pretrain/checkpoint-2181/processor_config.json rename to legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/processor_config.json diff --git a/kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_0.pth b/legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_0.pth similarity index 100% rename from kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_0.pth rename to legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_0.pth diff --git a/kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_1.pth b/legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_1.pth similarity index 100% rename from kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_1.pth rename to legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_1.pth diff --git a/kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_2.pth b/legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_2.pth similarity index 100% rename from kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_2.pth rename to legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_2.pth diff --git a/kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_3.pth b/legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_3.pth similarity index 100% rename from kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_3.pth rename to legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_3.pth diff --git a/kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_4.pth b/legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_4.pth similarity index 100% rename from kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_4.pth rename to legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_4.pth diff --git a/kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_5.pth b/legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_5.pth similarity index 100% rename from kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_5.pth rename to legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_5.pth diff --git a/kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_6.pth b/legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_6.pth similarity index 100% rename from kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_6.pth rename to legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_6.pth diff --git a/kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_7.pth b/legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_7.pth similarity index 100% rename from kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_7.pth rename to legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/rng_state_7.pth diff --git a/kd_mllm/s1_kd_pretrain/checkpoint-2181/scheduler.pt b/legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/scheduler.pt similarity index 100% rename from kd_mllm/s1_kd_pretrain/checkpoint-2181/scheduler.pt rename to legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/scheduler.pt diff --git a/kd_mllm/s1_kd_pretrain/checkpoint-2181/tokenizer.json b/legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/tokenizer.json similarity index 100% rename from kd_mllm/s1_kd_pretrain/checkpoint-2181/tokenizer.json rename to legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/tokenizer.json diff --git a/kd_mllm/s1_kd_pretrain/checkpoint-2181/tokenizer_config.json b/legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/tokenizer_config.json similarity index 100% rename from kd_mllm/s1_kd_pretrain/checkpoint-2181/tokenizer_config.json rename to legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/tokenizer_config.json diff --git a/kd_mllm/s1_kd_pretrain/checkpoint-2181/trainer_state.json b/legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/trainer_state.json similarity index 100% rename from kd_mllm/s1_kd_pretrain/checkpoint-2181/trainer_state.json rename to legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/trainer_state.json diff --git a/kd_mllm/s1_kd_pretrain/checkpoint-2181/training_args.bin b/legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/training_args.bin similarity index 100% rename from kd_mllm/s1_kd_pretrain/checkpoint-2181/training_args.bin rename to legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/training_args.bin diff --git a/kd_mllm/s1_kd_pretrain/checkpoint-2181/zero_to_fp32.py b/legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/zero_to_fp32.py similarity index 100% rename from kd_mllm/s1_kd_pretrain/checkpoint-2181/zero_to_fp32.py rename to legacy/kd_mllm/s1_kd_pretrain/checkpoint-2181/zero_to_fp32.py diff --git a/kd_mllm/s1_kd_pretrain/logging.jsonl b/legacy/kd_mllm/s1_kd_pretrain/logging.jsonl similarity index 100% rename from kd_mllm/s1_kd_pretrain/logging.jsonl rename to legacy/kd_mllm/s1_kd_pretrain/logging.jsonl diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/args.json b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/args.json similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/args.json rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/args.json diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/args.json b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/args.json similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/args.json rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/args.json diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/chat_template.jinja b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/chat_template.jinja similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/chat_template.jinja rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/chat_template.jinja diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/config.json b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/config.json similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/config.json rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/config.json diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/generation_config.json b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/generation_config.json similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/generation_config.json rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/generation_config.json diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/mp_rank_00_model_states.pt b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/mp_rank_00_model_states.pt similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/mp_rank_00_model_states.pt rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/global_step2181/mp_rank_00_model_states.pt diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/latest b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/latest similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/latest rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/latest diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/model-00001-of-00002.safetensors b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/model-00001-of-00002.safetensors similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/model-00001-of-00002.safetensors rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/model-00001-of-00002.safetensors diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/model-00002-of-00002.safetensors b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/model-00002-of-00002.safetensors similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/model-00002-of-00002.safetensors rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/model-00002-of-00002.safetensors diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/model.safetensors.index.json b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/model.safetensors.index.json similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/model.safetensors.index.json rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/model.safetensors.index.json diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/processor_config.json b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/processor_config.json similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/processor_config.json rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/processor_config.json diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_0.pth b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_0.pth similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_0.pth rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_0.pth diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_1.pth b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_1.pth similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_1.pth rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_1.pth diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_2.pth b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_2.pth similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_2.pth rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_2.pth diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_3.pth b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_3.pth similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_3.pth rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_3.pth diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_4.pth b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_4.pth similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_4.pth rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_4.pth diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_5.pth b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_5.pth similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_5.pth rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_5.pth diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_6.pth b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_6.pth similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_6.pth rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_6.pth diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_7.pth b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_7.pth similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_7.pth rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/rng_state_7.pth diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/scheduler.pt b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/scheduler.pt similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/scheduler.pt rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/scheduler.pt diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/tokenizer.json b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/tokenizer.json similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/tokenizer.json rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/tokenizer.json diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/tokenizer_config.json b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/tokenizer_config.json similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/tokenizer_config.json rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/tokenizer_config.json diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/trainer_state.json b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/trainer_state.json similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/trainer_state.json rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/trainer_state.json diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/training_args.bin b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/training_args.bin similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/training_args.bin rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/training_args.bin diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/zero_to_fp32.py b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/zero_to_fp32.py similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/zero_to_fp32.py rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/checkpoint-2181/zero_to_fp32.py diff --git a/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/logging.jsonl b/legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/logging.jsonl similarity index 100% rename from kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/logging.jsonl rename to legacy/kd_mllm/s1_siglip2_qwen3_4b/v1-20260320-102316/logging.jsonl diff --git a/kd_mllm/s2_siglip2_qwen3_4b_10pct/args.json b/legacy/kd_mllm/s2_siglip2_qwen3_4b_10pct/args.json similarity index 100% rename from kd_mllm/s2_siglip2_qwen3_4b_10pct/args.json rename to legacy/kd_mllm/s2_siglip2_qwen3_4b_10pct/args.json diff --git a/kd_mllm/s2_siglip2_qwen3_4b_10pct/checkpoint-1000/args.json b/legacy/kd_mllm/s2_siglip2_qwen3_4b_10pct/checkpoint-1000/args.json similarity index 100% rename from kd_mllm/s2_siglip2_qwen3_4b_10pct/checkpoint-1000/args.json rename to legacy/kd_mllm/s2_siglip2_qwen3_4b_10pct/checkpoint-1000/args.json diff --git a/kd_mllm/s2_siglip2_qwen3_4b_10pct/checkpoint-1000/chat_template.jinja b/legacy/kd_mllm/s2_siglip2_qwen3_4b_10pct/checkpoint-1000/chat_template.jinja similarity index 100% rename from kd_mllm/s2_siglip2_qwen3_4b_10pct/checkpoint-1000/chat_template.jinja rename to legacy/kd_mllm/s2_siglip2_qwen3_4b_10pct/checkpoint-1000/chat_template.jinja