Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- lora/log_1740621679.txt +0 -0
- lora/log_1740628789.txt +0 -0
- lora/log_1740721863.txt +0 -0
- lora/log_1740722398.txt +0 -0
- lora/log_1740723317.txt +0 -0
- lora/log_1740725990.txt +0 -0
- lora/log_1740726090.txt +0 -0
- lora/log_1740726165.txt +0 -0
- lora/log_1740728003.txt +0 -0
- lora/model_weights/config.json +1 -0
- ttt_models_no_demonstration_loss/00576224/adapter_config.json +1 -0
- ttt_models_no_demonstration_loss/00576224/adapter_model.bin +3 -0
- ttt_models_no_demonstration_loss/00576224/config.json +1 -0
- ttt_models_no_demonstration_loss/00576224/log_1740726167.txt +96 -0
- ttt_models_no_demonstration_loss/00576224/td_False_ttd_False_ttdwa_False_ad_True_trd_False.jsonl +0 -0
- ttt_models_no_demonstration_loss/00576224/td_True_ttd_False_ttdwa_False_ad_True_trd_False.jsonl +1 -0
- ttt_models_no_demonstration_loss/009d5c81/adapter_config.json +1 -0
- ttt_models_no_demonstration_loss/009d5c81/adapter_model.bin +3 -0
- ttt_models_no_demonstration_loss/009d5c81/config.json +1 -0
- ttt_models_no_demonstration_loss/009d5c81/log_1740726174.txt +125 -0
- ttt_models_no_demonstration_loss/009d5c81/td_False_ttd_False_ttdwa_False_ad_True_trd_False.jsonl +0 -0
- ttt_models_no_demonstration_loss/009d5c81/td_True_ttd_False_ttdwa_False_ad_True_trd_False.jsonl +1 -0
- ttt_models_no_demonstration_loss/00dbd492/adapter_config.json +1 -0
- ttt_models_no_demonstration_loss/00dbd492/adapter_model.bin +3 -0
- ttt_models_no_demonstration_loss/00dbd492/config.json +1 -0
- ttt_models_no_demonstration_loss/00dbd492/log_1740726247.txt +125 -0
- ttt_models_no_demonstration_loss/00dbd492/td_False_ttd_False_ttdwa_False_ad_True_trd_False.jsonl +0 -0
- ttt_models_no_demonstration_loss/00dbd492/td_True_ttd_False_ttdwa_False_ad_True_trd_False.jsonl +1 -0
- ttt_models_no_demonstration_loss/03560426/adapter_config.json +1 -0
- ttt_models_no_demonstration_loss/03560426/adapter_model.bin +3 -0
- ttt_models_no_demonstration_loss/03560426/config.json +1 -0
- ttt_models_no_demonstration_loss/03560426/log_1740726294.txt +125 -0
- ttt_models_no_demonstration_loss/03560426/td_False_ttd_False_ttdwa_False_ad_True_trd_False.jsonl +0 -0
- ttt_models_no_demonstration_loss/03560426/td_True_ttd_False_ttdwa_False_ad_True_trd_False.jsonl +1 -0
- ttt_models_no_demonstration_loss/05a7bcf2/adapter_config.json +1 -0
- ttt_models_no_demonstration_loss/05a7bcf2/adapter_model.bin +3 -0
- ttt_models_no_demonstration_loss/05a7bcf2/config.json +1 -0
- ttt_models_no_demonstration_loss/05a7bcf2/log_1740726329.txt +48 -0
- ttt_models_no_demonstration_loss/05a7bcf2/td_False_ttd_False_ttdwa_False_ad_True_trd_False.jsonl +0 -0
- ttt_models_no_demonstration_loss/05a7bcf2/td_True_ttd_False_ttdwa_False_ad_True_trd_False.jsonl +1 -0
- ttt_models_no_demonstration_loss/0607ce86/adapter_config.json +1 -0
- ttt_models_no_demonstration_loss/0607ce86/adapter_model.bin +3 -0
- ttt_models_no_demonstration_loss/0607ce86/config.json +1 -0
- ttt_models_no_demonstration_loss/0607ce86/log_1740726360.txt +72 -0
- ttt_models_no_demonstration_loss/0607ce86/td_False_ttd_False_ttdwa_False_ad_True_trd_False.jsonl +0 -0
- ttt_models_no_demonstration_loss/0607ce86/td_True_ttd_False_ttdwa_False_ad_True_trd_False.jsonl +1 -0
- ttt_models_no_demonstration_loss/0692e18c/adapter_config.json +1 -0
- ttt_models_no_demonstration_loss/0692e18c/adapter_model.bin +3 -0
- ttt_models_no_demonstration_loss/0692e18c/config.json +1 -0
- ttt_models_no_demonstration_loss/0692e18c/log_1740726391.txt +125 -0
lora/log_1740621679.txt
ADDED
|
File without changes
|
lora/log_1740628789.txt
ADDED
|
File without changes
|
lora/log_1740721863.txt
ADDED
|
File without changes
|
lora/log_1740722398.txt
ADDED
|
File without changes
|
lora/log_1740723317.txt
ADDED
|
File without changes
|
lora/log_1740725990.txt
ADDED
|
File without changes
|
lora/log_1740726090.txt
ADDED
|
File without changes
|
lora/log_1740726165.txt
ADDED
|
File without changes
|
lora/log_1740728003.txt
ADDED
|
File without changes
|
lora/model_weights/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": [128001, 128008, 128009], "head_dim": 64, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "max_position_embeddings": 131072, "mlp_bias": false, "model_type": "llama", "num_attention_heads": 32, "num_hidden_layers": 16, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": {"factor": 32.0, "high_freq_factor": 4.0, "low_freq_factor": 1.0, "original_max_position_embeddings": 8192, "rope_type": "llama3"}, "rope_theta": 500000.0, "tie_word_embeddings": true, "torch_dtype": "bfloat16", "transformers_version": "4.45.0.dev0", "use_cache": true, "vocab_size": 128256}
|
ttt_models_no_demonstration_loss/00576224/adapter_config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"base_model_name_or_path": "checkpoints/finetuned_1B_model", "bias": "none", "fan_in_fan_out": false, "inference_mode": true, "init_lora_weights": true, "lora_alpha": 16.0, "lora_dropout": 0.0, "modules_to_save": null, "peft_type": "LORA", "r": 128, "target_modules": ["gate_proj", "down_proj", "up_proj", "lm_head", "q_proj", "v_proj"], "task_type": "CAUSAL_LM"}
|
ttt_models_no_demonstration_loss/00576224/adapter_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:128ac7a9576163e2fefd2ec98ef96d01114bdd1d01d7989b055b5774637e04cc
|
| 3 |
+
size 153153674
|
ttt_models_no_demonstration_loss/00576224/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"_name_or_path": "unsloth/Llama-3.2-1B-Instruct", "architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128009, "head_dim": 64, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "max_position_embeddings": 131072, "mlp_bias": false, "model_type": "llama", "num_attention_heads": 32, "num_hidden_layers": 16, "num_key_value_heads": 8, "pad_token_id": 128004, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": {"factor": 32.0, "high_freq_factor": 4.0, "low_freq_factor": 1.0, "original_max_position_embeddings": 8192, "rope_type": "llama3"}, "rope_theta": 500000.0, "tie_word_embeddings": true, "torch_dtype": "bfloat16", "transformers_version": "4.49.0", "unsloth_fixed": true, "use_cache": true, "vocab_size": 128256}
|
ttt_models_no_demonstration_loss/00576224/log_1740726167.txt
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Step 1 | loss:0.14904449880123138 lr:1.1111111111111112e-05 tokens_per_second_per_gpu:2851.4719552986166
|
| 2 |
+
Step 2 | loss:0.14729571342468262 lr:2.2222222222222223e-05 tokens_per_second_per_gpu:20949.5525303565
|
| 3 |
+
Step 3 | loss:0.35557758808135986 lr:3.3333333333333335e-05 tokens_per_second_per_gpu:7236.828799693814
|
| 4 |
+
Step 4 | loss:0.34570375084877014 lr:4.4444444444444447e-05 tokens_per_second_per_gpu:7594.38955991631
|
| 5 |
+
Step 5 | loss:0.03538966551423073 lr:5.555555555555556e-05 tokens_per_second_per_gpu:21994.868180770784
|
| 6 |
+
Step 6 | loss:0.08201596140861511 lr:6.666666666666667e-05 tokens_per_second_per_gpu:12128.844613759138
|
| 7 |
+
Step 7 | loss:0.24148191511631012 lr:7.777777777777778e-05 tokens_per_second_per_gpu:12229.866597940601
|
| 8 |
+
Step 8 | loss:0.05400775000452995 lr:8.888888888888889e-05 tokens_per_second_per_gpu:22263.251204378896
|
| 9 |
+
Step 9 | loss:0.21825583279132843 lr:0.0001 tokens_per_second_per_gpu:12289.065878814343
|
| 10 |
+
Step 10 | loss:0.0315493680536747 lr:9.996740476948385e-05 tokens_per_second_per_gpu:21932.04898159818
|
| 11 |
+
Step 11 | loss:0.0808226466178894 lr:9.98696615758975e-05 tokens_per_second_per_gpu:12774.465983156848
|
| 12 |
+
Step 12 | loss:0.1154557317495346 lr:9.970689785771798e-05 tokens_per_second_per_gpu:12904.308849099121
|
| 13 |
+
Step 13 | loss:0.18417231738567352 lr:9.947932582778188e-05 tokens_per_second_per_gpu:12859.261264706298
|
| 14 |
+
Step 14 | loss:0.07041065394878387 lr:9.918724219660013e-05 tokens_per_second_per_gpu:12242.823406366608
|
| 15 |
+
Step 15 | loss:0.18801505863666534 lr:9.883102778550434e-05 tokens_per_second_per_gpu:4790.9550672063915
|
| 16 |
+
Step 16 | loss:0.05552726984024048 lr:9.841114703012817e-05 tokens_per_second_per_gpu:12275.30492705435
|
| 17 |
+
Step 17 | loss:0.10469891875982285 lr:9.792814737487207e-05 tokens_per_second_per_gpu:7513.275382190046
|
| 18 |
+
Step 18 | loss:0.10473458468914032 lr:9.738265855914013e-05 tokens_per_second_per_gpu:12815.050966893854
|
| 19 |
+
Step 19 | loss:0.13106568157672882 lr:9.677539179628005e-05 tokens_per_second_per_gpu:7795.958671356599
|
| 20 |
+
Step 20 | loss:0.07537608593702316 lr:9.610713884629666e-05 tokens_per_second_per_gpu:12997.411048408178
|
| 21 |
+
Step 21 | loss:0.16871023178100586 lr:9.537877098354786e-05 tokens_per_second_per_gpu:7736.496830944336
|
| 22 |
+
Step 22 | loss:0.02240842953324318 lr:9.459123786076912e-05 tokens_per_second_per_gpu:22309.595368741993
|
| 23 |
+
Step 23 | loss:0.1097758412361145 lr:9.374556627090749e-05 tokens_per_second_per_gpu:7720.95100406919
|
| 24 |
+
Step 24 | loss:0.10470158606767654 lr:9.284285880837946e-05 tokens_per_second_per_gpu:7673.750901461417
|
| 25 |
+
Step 25 | loss:0.03743935376405716 lr:9.188429243149824e-05 tokens_per_second_per_gpu:11915.345620954722
|
| 26 |
+
Step 26 | loss:0.013439328409731388 lr:9.087111692794459e-05 tokens_per_second_per_gpu:22234.625080229656
|
| 27 |
+
Step 27 | loss:0.06791043281555176 lr:8.980465328528219e-05 tokens_per_second_per_gpu:7511.31551553852
|
| 28 |
+
Step 28 | loss:0.002402265788987279 lr:8.868629196864182e-05 tokens_per_second_per_gpu:21930.553379220197
|
| 29 |
+
Step 29 | loss:0.010298206470906734 lr:8.751749110782012e-05 tokens_per_second_per_gpu:7723.2228863835935
|
| 30 |
+
Step 30 | loss:0.02463030256330967 lr:8.629977459615655e-05 tokens_per_second_per_gpu:12504.128790519273
|
| 31 |
+
Step 31 | loss:0.02505524270236492 lr:8.503473010366713e-05 tokens_per_second_per_gpu:12592.387357263073
|
| 32 |
+
Step 32 | loss:0.015597482211887836 lr:8.37240070070257e-05 tokens_per_second_per_gpu:7726.0327550968395
|
| 33 |
+
Step 33 | loss:0.016075199469923973 lr:8.236931423909138e-05 tokens_per_second_per_gpu:22304.422623121394
|
| 34 |
+
Step 34 | loss:0.005453611258417368 lr:8.097241806078615e-05 tokens_per_second_per_gpu:7718.068853603704
|
| 35 |
+
Step 35 | loss:0.007494851481169462 lr:7.953513975822755e-05 tokens_per_second_per_gpu:21997.84343822955
|
| 36 |
+
Step 36 | loss:0.066264308989048 lr:7.805935326811912e-05 tokens_per_second_per_gpu:12451.343124589901
|
| 37 |
+
Step 37 | loss:0.06131032481789589 lr:7.654698273449435e-05 tokens_per_second_per_gpu:12248.336290659063
|
| 38 |
+
Step 38 | loss:0.034204430878162384 lr:7.500000000000001e-05 tokens_per_second_per_gpu:12273.809053213246
|
| 39 |
+
Step 39 | loss:0.006211467087268829 lr:7.342042203498951e-05 tokens_per_second_per_gpu:12492.643951618329
|
| 40 |
+
Step 40 | loss:0.05282324180006981 lr:7.181030830777837e-05 tokens_per_second_per_gpu:7689.15595580658
|
| 41 |
+
Step 41 | loss:0.022629745304584503 lr:7.017175809949044e-05 tokens_per_second_per_gpu:22108.710351302314
|
| 42 |
+
Step 42 | loss:0.019897570833563805 lr:6.850690776699573e-05 tokens_per_second_per_gpu:22107.89933575055
|
| 43 |
+
Step 43 | loss:0.006202423945069313 lr:6.681792795750875e-05 tokens_per_second_per_gpu:19972.84156595078
|
| 44 |
+
Step 44 | loss:0.06745807826519012 lr:6.510702077847863e-05 tokens_per_second_per_gpu:7601.391839354373
|
| 45 |
+
Step 45 | loss:0.020223373547196388 lr:6.337641692646106e-05 tokens_per_second_per_gpu:12800.420347482073
|
| 46 |
+
Step 46 | loss:0.02760479785501957 lr:6.162837277871553e-05 tokens_per_second_per_gpu:7531.4065269653365
|
| 47 |
+
Step 47 | loss:0.0552951842546463 lr:5.9865167451320005e-05 tokens_per_second_per_gpu:4831.4160184401735
|
| 48 |
+
Step 48 | loss:0.010314182378351688 lr:5.808909982763825e-05 tokens_per_second_per_gpu:22103.384562644675
|
| 49 |
+
Step 49 | loss:0.0036271591670811176 lr:5.6302485561014475e-05 tokens_per_second_per_gpu:22155.52069296788
|
| 50 |
+
Step 50 | loss:0.01667805202305317 lr:5.4507654055603275e-05 tokens_per_second_per_gpu:22066.98588216636
|
| 51 |
+
Step 51 | loss:0.0010930394055321813 lr:5.270694542927088e-05 tokens_per_second_per_gpu:12333.48050600657
|
| 52 |
+
Step 52 | loss:0.001210671034641564 lr:5.090270746252802e-05 tokens_per_second_per_gpu:22163.56799860346
|
| 53 |
+
Step 53 | loss:0.015023387037217617 lr:4.909729253747197e-05 tokens_per_second_per_gpu:12839.945001819797
|
| 54 |
+
Step 54 | loss:0.031054478138685226 lr:4.729305457072913e-05 tokens_per_second_per_gpu:7542.734441114701
|
| 55 |
+
Step 55 | loss:0.008544590324163437 lr:4.549234594439674e-05 tokens_per_second_per_gpu:12230.85027257788
|
| 56 |
+
Step 56 | loss:0.0020282124169170856 lr:4.3697514438985536e-05 tokens_per_second_per_gpu:12322.18317397805
|
| 57 |
+
Step 57 | loss:0.003092979546636343 lr:4.1910900172361764e-05 tokens_per_second_per_gpu:22303.08253941034
|
| 58 |
+
Step 58 | loss:0.012704712338745594 lr:4.0134832548680006e-05 tokens_per_second_per_gpu:12429.823748296505
|
| 59 |
+
Step 59 | loss:0.01068410649895668 lr:3.8371627221284495e-05 tokens_per_second_per_gpu:6993.404182355678
|
| 60 |
+
Step 60 | loss:0.0002961690188385546 lr:3.6623583073538966e-05 tokens_per_second_per_gpu:12337.96544578768
|
| 61 |
+
Step 61 | loss:0.05859158933162689 lr:3.489297922152136e-05 tokens_per_second_per_gpu:12830.842179867788
|
| 62 |
+
Step 62 | loss:0.0016673931386321783 lr:3.3182072042491244e-05 tokens_per_second_per_gpu:22160.333689821073
|
| 63 |
+
Step 63 | loss:0.004928383976221085 lr:3.149309223300428e-05 tokens_per_second_per_gpu:22076.64615453516
|
| 64 |
+
Step 64 | loss:0.020266426727175713 lr:2.982824190050958e-05 tokens_per_second_per_gpu:7544.0406390773005
|
| 65 |
+
Step 65 | loss:0.045180853456258774 lr:2.8189691692221627e-05 tokens_per_second_per_gpu:7582.233434303501
|
| 66 |
+
Step 66 | loss:0.0017555938102304935 lr:2.65795779650105e-05 tokens_per_second_per_gpu:21957.43429795542
|
| 67 |
+
Step 67 | loss:0.014189746230840683 lr:2.500000000000001e-05 tokens_per_second_per_gpu:12214.115931944754
|
| 68 |
+
Step 68 | loss:0.009678625501692295 lr:2.3453017265505673e-05 tokens_per_second_per_gpu:12334.970975936598
|
| 69 |
+
Step 69 | loss:0.0020698723383247852 lr:2.194064673188089e-05 tokens_per_second_per_gpu:22469.415663691943
|
| 70 |
+
Step 70 | loss:0.012371936812996864 lr:2.0464860241772455e-05 tokens_per_second_per_gpu:12780.459541479
|
| 71 |
+
Step 71 | loss:0.009074215777218342 lr:1.902758193921385e-05 tokens_per_second_per_gpu:7592.038792059456
|
| 72 |
+
Step 72 | loss:0.00711387675255537 lr:1.7630685760908622e-05 tokens_per_second_per_gpu:12372.29735180471
|
| 73 |
+
Step 73 | loss:0.005709199700504541 lr:1.6275992992974308e-05 tokens_per_second_per_gpu:22181.641042105657
|
| 74 |
+
Step 74 | loss:0.00010057680628960952 lr:1.4965269896332885e-05 tokens_per_second_per_gpu:7650.383047449945
|
| 75 |
+
Step 75 | loss:0.011214444413781166 lr:1.3700225403843469e-05 tokens_per_second_per_gpu:12230.746130667338
|
| 76 |
+
Step 76 | loss:0.013415924273431301 lr:1.2482508892179884e-05 tokens_per_second_per_gpu:7344.770015293763
|
| 77 |
+
Step 77 | loss:0.0032110661268234253 lr:1.1313708031358183e-05 tokens_per_second_per_gpu:7599.727719066029
|
| 78 |
+
Step 78 | loss:0.03114166297018528 lr:1.0195346714717813e-05 tokens_per_second_per_gpu:7599.802552249471
|
| 79 |
+
Step 79 | loss:0.005268631968647242 lr:9.12888307205541e-06 tokens_per_second_per_gpu:22150.419451407924
|
| 80 |
+
Step 80 | loss:0.01586207188665867 lr:8.115707568501768e-06 tokens_per_second_per_gpu:22266.21251093014
|
| 81 |
+
Step 81 | loss:0.01174659002572298 lr:7.157141191620548e-06 tokens_per_second_per_gpu:22009.104036894296
|
| 82 |
+
Step 82 | loss:0.007050564046949148 lr:6.2544337290925185e-06 tokens_per_second_per_gpu:12837.202740177849
|
| 83 |
+
Step 83 | loss:0.01672087423503399 lr:5.408762139230888e-06 tokens_per_second_per_gpu:12355.455375359465
|
| 84 |
+
Step 84 | loss:0.002478977432474494 lr:4.621229016452156e-06 tokens_per_second_per_gpu:21946.787056566478
|
| 85 |
+
Step 85 | loss:0.012155639939010143 lr:3.892861153703342e-06 tokens_per_second_per_gpu:12255.945826699046
|
| 86 |
+
Step 86 | loss:0.010689085349440575 lr:3.2246082037199532e-06 tokens_per_second_per_gpu:12812.527102375288
|
| 87 |
+
Step 87 | loss:0.0005194115801714361 lr:2.6173414408598827e-06 tokens_per_second_per_gpu:12390.882794525154
|
| 88 |
+
Step 88 | loss:0.0049830237403512 lr:2.0718526251279346e-06 tokens_per_second_per_gpu:12345.319708643035
|
| 89 |
+
Step 89 | loss:0.03666767477989197 lr:1.5888529698718346e-06 tokens_per_second_per_gpu:11623.1015022011
|
| 90 |
+
Step 90 | loss:0.008715181611478329 lr:1.1689722144956671e-06 tokens_per_second_per_gpu:12282.203400856055
|
| 91 |
+
Step 91 | loss:0.0031739503610879183 lr:8.127578033998662e-07 tokens_per_second_per_gpu:22196.150053072324
|
| 92 |
+
Step 92 | loss:0.010224398225545883 lr:5.206741722181386e-07 tokens_per_second_per_gpu:12854.984899816864
|
| 93 |
+
Step 93 | loss:0.011694942601025105 lr:2.9310214228202013e-07 tokens_per_second_per_gpu:4843.402025371898
|
| 94 |
+
Step 94 | loss:0.07014063745737076 lr:1.3033842410251075e-07 tokens_per_second_per_gpu:4853.051032488962
|
| 95 |
+
Step 95 | loss:0.00699853990226984 lr:3.259523051615254e-08 tokens_per_second_per_gpu:22167.85614200547
|
| 96 |
+
Step 96 | loss:0.038939666002988815 lr:0.0 tokens_per_second_per_gpu:7643.401694527072
|
ttt_models_no_demonstration_loss/00576224/td_False_ttd_False_ttdwa_False_ad_True_trd_False.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ttt_models_no_demonstration_loss/00576224/td_True_ttd_False_ttdwa_False_ad_True_trd_False.jsonl
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"input": [{"role": "system", "content": "Figure out the underlying transformation in the following examples and apply it to the test case. Here are some examples from this transformation, your answer must follow the format.\nThe input-output grids are provided as python arrays:\n[[6 8]\n [4 6]] -> [[6 4 6 4 6 4]\n [8 6 8 6 8 6]\n [6 4 6 4 6 4]\n [6 8 6 8 6 8]\n [4 6 4 6 4 6]\n [8 6 8 6 8 6]]#\n"}, {"role": "user", "content": "[[9 7]\n [3 4]] -> "}], "output": {"role": "assistant", "content": "[[4 3 4 3 4 3]\n [7 9 7 9 7 9]\n [4 3 4 3 4 3]\n [9 7 9 7 9 7]\n [3 4 3 4 3 4]\n [7 9 7 9 7 9]]#"}, "total_tokens": 239}
|
ttt_models_no_demonstration_loss/009d5c81/adapter_config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"base_model_name_or_path": "checkpoints/finetuned_1B_model", "bias": "none", "fan_in_fan_out": false, "inference_mode": true, "init_lora_weights": true, "lora_alpha": 16.0, "lora_dropout": 0.0, "modules_to_save": null, "peft_type": "LORA", "r": 128, "target_modules": ["gate_proj", "down_proj", "up_proj", "lm_head", "q_proj", "v_proj"], "task_type": "CAUSAL_LM"}
|
ttt_models_no_demonstration_loss/009d5c81/adapter_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ba49a4c0a183cd678ddae3c87493b711b44d51994d98662d92e9beaee8c37a46
|
| 3 |
+
size 153153674
|
ttt_models_no_demonstration_loss/009d5c81/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"_name_or_path": "unsloth/Llama-3.2-1B-Instruct", "architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128009, "head_dim": 64, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "max_position_embeddings": 131072, "mlp_bias": false, "model_type": "llama", "num_attention_heads": 32, "num_hidden_layers": 16, "num_key_value_heads": 8, "pad_token_id": 128004, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": {"factor": 32.0, "high_freq_factor": 4.0, "low_freq_factor": 1.0, "original_max_position_embeddings": 8192, "rope_type": "llama3"}, "rope_theta": 500000.0, "tie_word_embeddings": true, "torch_dtype": "bfloat16", "transformers_version": "4.49.0", "unsloth_fixed": true, "use_cache": true, "vocab_size": 128256}
|
ttt_models_no_demonstration_loss/009d5c81/log_1740726174.txt
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Step 1 | loss:0.010432140901684761 lr:8.333333333333334e-06 tokens_per_second_per_gpu:22954.58404863369
|
| 2 |
+
Step 2 | loss:0.008794802241027355 lr:1.6666666666666667e-05 tokens_per_second_per_gpu:16701.298570132414
|
| 3 |
+
Step 3 | loss:0.01694098673760891 lr:2.5e-05 tokens_per_second_per_gpu:24492.991090808995
|
| 4 |
+
Step 4 | loss:0.013064547441899776 lr:3.3333333333333335e-05 tokens_per_second_per_gpu:23054.128366696663
|
| 5 |
+
Step 5 | loss:0.012506375089287758 lr:4.166666666666667e-05 tokens_per_second_per_gpu:23408.390119519896
|
| 6 |
+
Step 6 | loss:0.012613199651241302 lr:5e-05 tokens_per_second_per_gpu:22946.670020024107
|
| 7 |
+
Step 7 | loss:0.008189075626432896 lr:5.833333333333334e-05 tokens_per_second_per_gpu:22412.7313101541
|
| 8 |
+
Step 8 | loss:0.011700471863150597 lr:6.666666666666667e-05 tokens_per_second_per_gpu:22811.128694581646
|
| 9 |
+
Step 9 | loss:0.007529854774475098 lr:7.500000000000001e-05 tokens_per_second_per_gpu:22810.43130050475
|
| 10 |
+
Step 10 | loss:0.012670820578932762 lr:8.333333333333334e-05 tokens_per_second_per_gpu:24527.46549156332
|
| 11 |
+
Step 11 | loss:0.007261978462338448 lr:9.166666666666667e-05 tokens_per_second_per_gpu:23660.919604485567
|
| 12 |
+
Step 12 | loss:0.031069112941622734 lr:0.0001 tokens_per_second_per_gpu:22821.11766445037
|
| 13 |
+
Step 13 | loss:0.0036376703064888716 lr:9.998067787472772e-05 tokens_per_second_per_gpu:23457.097900853245
|
| 14 |
+
Step 14 | loss:0.007268179673701525 lr:9.99227264326918e-05 tokens_per_second_per_gpu:23447.549674741007
|
| 15 |
+
Step 15 | loss:0.003887709230184555 lr:9.982619046369321e-05 tokens_per_second_per_gpu:22359.896730548397
|
| 16 |
+
Step 16 | loss:0.04504425823688507 lr:9.96911445789354e-05 tokens_per_second_per_gpu:24205.20658716209
|
| 17 |
+
Step 17 | loss:0.0047857859171926975 lr:9.951769315335844e-05 tokens_per_second_per_gpu:22918.665389053735
|
| 18 |
+
Step 18 | loss:0.006646800320595503 lr:9.930597024496931e-05 tokens_per_second_per_gpu:24440.599594677275
|
| 19 |
+
Step 19 | loss:0.01939203031361103 lr:9.905613949123036e-05 tokens_per_second_per_gpu:22956.888570065792
|
| 20 |
+
Step 20 | loss:0.004182838834822178 lr:9.876839398258641e-05 tokens_per_second_per_gpu:22958.370438425885
|
| 21 |
+
Step 21 | loss:0.00993522722274065 lr:9.844295611322804e-05 tokens_per_second_per_gpu:24426.40338851473
|
| 22 |
+
Step 22 | loss:0.03339752182364464 lr:9.808007740920646e-05 tokens_per_second_per_gpu:24422.751620285562
|
| 23 |
+
Step 23 | loss:0.005310469772666693 lr:9.768003833403278e-05 tokens_per_second_per_gpu:22416.95644617719
|
| 24 |
+
Step 24 | loss:0.006643536034971476 lr:9.724314807191195e-05 tokens_per_second_per_gpu:23359.620814999605
|
| 25 |
+
Step 25 | loss:0.006281149107962847 lr:9.676974428877901e-05 tokens_per_second_per_gpu:24570.757929950716
|
| 26 |
+
Step 26 | loss:0.005074056796729565 lr:9.626019287132203e-05 tokens_per_second_per_gpu:22570.186540039256
|
| 27 |
+
Step 27 | loss:0.017168261110782623 lr:9.571488764419381e-05 tokens_per_second_per_gpu:22813.45940547367
|
| 28 |
+
Step 28 | loss:0.004192687105387449 lr:9.513425006563079e-05 tokens_per_second_per_gpu:22803.44354909576
|
| 29 |
+
Step 29 | loss:0.0045415726490318775 lr:9.451872890171419e-05 tokens_per_second_per_gpu:23458.83720011921
|
| 30 |
+
Step 30 | loss:0.00395458796992898 lr:9.386879987952549e-05 tokens_per_second_per_gpu:23425.78672860484
|
| 31 |
+
Step 31 | loss:0.0061028930358588696 lr:9.31849653194641e-05 tokens_per_second_per_gpu:25053.06245792037
|
| 32 |
+
Step 32 | loss:0.0048234593123197556 lr:9.246775374701139e-05 tokens_per_second_per_gpu:23494.429274806444
|
| 33 |
+
Step 33 | loss:0.002704465761780739 lr:9.171771948424137e-05 tokens_per_second_per_gpu:23158.20320078271
|
| 34 |
+
Step 34 | loss:0.015400098636746407 lr:9.093544222139337e-05 tokens_per_second_per_gpu:22737.843702913982
|
| 35 |
+
Step 35 | loss:0.004418112337589264 lr:9.012152656883823e-05 tokens_per_second_per_gpu:24616.887534222577
|
| 36 |
+
Step 36 | loss:0.003136880462989211 lr:8.927660158978392e-05 tokens_per_second_per_gpu:22875.550489133904
|
| 37 |
+
Step 37 | loss:0.022687217220664024 lr:8.84013203140821e-05 tokens_per_second_per_gpu:24287.61752339216
|
| 38 |
+
Step 38 | loss:0.004061760380864143 lr:8.749635923351107e-05 tokens_per_second_per_gpu:23455.577658792045
|
| 39 |
+
Step 39 | loss:0.0031959109473973513 lr:8.656241777892543e-05 tokens_per_second_per_gpu:22426.55320561869
|
| 40 |
+
Step 40 | loss:0.004088667221367359 lr:8.560021777967649e-05 tokens_per_second_per_gpu:22823.49904243706
|
| 41 |
+
Step 41 | loss:0.002563514281064272 lr:8.461050290572114e-05 tokens_per_second_per_gpu:22813.20839659465
|
| 42 |
+
Step 42 | loss:0.00410807179287076 lr:8.359403809285053e-05 tokens_per_second_per_gpu:23588.920512917386
|
| 43 |
+
Step 43 | loss:0.0026001236401498318 lr:8.255160895148263e-05 tokens_per_second_per_gpu:22662.244681864086
|
| 44 |
+
Step 44 | loss:0.0029869298450648785 lr:8.148402115947571e-05 tokens_per_second_per_gpu:22807.291402918167
|
| 45 |
+
Step 45 | loss:0.0028475383296608925 lr:8.039209983943201e-05 tokens_per_second_per_gpu:22809.7554706589
|
| 46 |
+
Step 46 | loss:0.0019230066100135446 lr:7.927668892097289e-05 tokens_per_second_per_gpu:22498.223004632313
|
| 47 |
+
Step 47 | loss:0.009930501691997051 lr:7.813865048847819e-05 tokens_per_second_per_gpu:22780.52185953639
|
| 48 |
+
Step 48 | loss:0.004407620523124933 lr:7.697886411479423e-05 tokens_per_second_per_gpu:24484.046663826906
|
| 49 |
+
Step 49 | loss:0.002827950520440936 lr:7.579822618142505e-05 tokens_per_second_per_gpu:23495.723020767346
|
| 50 |
+
Step 50 | loss:0.0026968042366206646 lr:7.459764918573264e-05 tokens_per_second_per_gpu:22747.06968767957
|
| 51 |
+
Step 51 | loss:0.0016967440024018288 lr:7.33780610356814e-05 tokens_per_second_per_gpu:22796.795128326186
|
| 52 |
+
Step 52 | loss:0.003814347553998232 lr:7.214040433267198e-05 tokens_per_second_per_gpu:22777.993321067544
|
| 53 |
+
Step 53 | loss:0.0028922681231051683 lr:7.088563564301873e-05 tokens_per_second_per_gpu:23453.00877130853
|
| 54 |
+
Step 54 | loss:0.003019278636202216 lr:6.961472475863405e-05 tokens_per_second_per_gpu:24377.825388395468
|
| 55 |
+
Step 55 | loss:0.005399859510362148 lr:6.832865394749065e-05 tokens_per_second_per_gpu:24486.159970530978
|
| 56 |
+
Step 56 | loss:0.004557427484542131 lr:6.702841719444141e-05 tokens_per_second_per_gpu:24365.50847622352
|
| 57 |
+
Step 57 | loss:0.0033511051442474127 lr:6.571501943298334e-05 tokens_per_second_per_gpu:23436.108251963964
|
| 58 |
+
Step 58 | loss:0.0023917832877486944 lr:6.438947576855968e-05 tokens_per_second_per_gpu:25090.43021964884
|
| 59 |
+
Step 59 | loss:0.0029730135574936867 lr:6.305281069399989e-05 tokens_per_second_per_gpu:23388.67556891684
|
| 60 |
+
Step 60 | loss:0.004620478954166174 lr:6.17060572977047e-05 tokens_per_second_per_gpu:23317.500288188523
|
| 61 |
+
Step 61 | loss:0.005031143315136433 lr:6.035025646518746e-05 tokens_per_second_per_gpu:25030.471160897196
|
| 62 |
+
Step 62 | loss:0.0016502024373039603 lr:5.8986456074589404e-05 tokens_per_second_per_gpu:22799.278971627667
|
| 63 |
+
Step 63 | loss:0.0017189333448186517 lr:5.761571018679025e-05 tokens_per_second_per_gpu:22492.45041964418
|
| 64 |
+
Step 64 | loss:0.003847371554002166 lr:5.6239078230740436e-05 tokens_per_second_per_gpu:23227.552328727765
|
| 65 |
+
Step 65 | loss:0.001996465492993593 lr:5.48576241846443e-05 tokens_per_second_per_gpu:22778.1775368249
|
| 66 |
+
Step 66 | loss:0.0020511718466877937 lr:5.347241575362729e-05 tokens_per_second_per_gpu:22341.828330111817
|
| 67 |
+
Step 67 | loss:0.0026627436745911837 lr:5.208452354452274e-05 tokens_per_second_per_gpu:24270.36175826407
|
| 68 |
+
Step 68 | loss:0.0035100297536700964 lr:5.0695020238415756e-05 tokens_per_second_per_gpu:24340.550535206432
|
| 69 |
+
Step 69 | loss:0.0014067479642108083 lr:4.9304979761584256e-05 tokens_per_second_per_gpu:22728.29087880062
|
| 70 |
+
Step 70 | loss:0.004003740381449461 lr:4.791547645547726e-05 tokens_per_second_per_gpu:22742.938223736244
|
| 71 |
+
Step 71 | loss:0.001960608409717679 lr:4.652758424637271e-05 tokens_per_second_per_gpu:23253.029518853622
|
| 72 |
+
Step 72 | loss:0.0021944893524050713 lr:4.5142375815355706e-05 tokens_per_second_per_gpu:23049.822869087842
|
| 73 |
+
Step 73 | loss:0.002225143602117896 lr:4.376092176925958e-05 tokens_per_second_per_gpu:22780.965514444695
|
| 74 |
+
Step 74 | loss:0.0009351027547381818 lr:4.238428981320975e-05 tokens_per_second_per_gpu:22363.584129409035
|
| 75 |
+
Step 75 | loss:0.0008425263222306967 lr:4.101354392541061e-05 tokens_per_second_per_gpu:22346.879589057437
|
| 76 |
+
Step 76 | loss:0.0015157210873439908 lr:3.964974353481254e-05 tokens_per_second_per_gpu:22258.987868826625
|
| 77 |
+
Step 77 | loss:0.0038348797243088484 lr:3.829394270229531e-05 tokens_per_second_per_gpu:23219.074093208492
|
| 78 |
+
Step 78 | loss:0.0015734429471194744 lr:3.694718930600012e-05 tokens_per_second_per_gpu:22389.447148449457
|
| 79 |
+
Step 79 | loss:0.002502255840227008 lr:3.561052423144032e-05 tokens_per_second_per_gpu:22768.635778603857
|
| 80 |
+
Step 80 | loss:0.0019395934650674462 lr:3.428498056701665e-05 tokens_per_second_per_gpu:22558.30914840681
|
| 81 |
+
Step 81 | loss:0.0008147542830556631 lr:3.297158280555862e-05 tokens_per_second_per_gpu:22414.827322728954
|
| 82 |
+
Step 82 | loss:0.0015751593746244907 lr:3.167134605250938e-05 tokens_per_second_per_gpu:22581.37512344514
|
| 83 |
+
Step 83 | loss:0.008450040593743324 lr:3.0385275241365962e-05 tokens_per_second_per_gpu:22683.183398635374
|
| 84 |
+
Step 84 | loss:0.0007072880398482084 lr:2.9114364356981272e-05 tokens_per_second_per_gpu:22190.206672997825
|
| 85 |
+
Step 85 | loss:0.01250297948718071 lr:2.7859595667328026e-05 tokens_per_second_per_gpu:24292.708936916177
|
| 86 |
+
Step 86 | loss:0.0015557208098471165 lr:2.6621938964318595e-05 tokens_per_second_per_gpu:23049.068055813274
|
| 87 |
+
Step 87 | loss:0.0036120519507676363 lr:2.540235081426736e-05 tokens_per_second_per_gpu:24332.60319440471
|
| 88 |
+
Step 88 | loss:0.0014258506707847118 lr:2.4201773818574956e-05 tokens_per_second_per_gpu:22287.419294402614
|
| 89 |
+
Step 89 | loss:0.0037643308751285076 lr:2.302113588520578e-05 tokens_per_second_per_gpu:23434.12593358025
|
| 90 |
+
Step 90 | loss:0.0008397825877182186 lr:2.1861349511521815e-05 tokens_per_second_per_gpu:24910.432886545193
|
| 91 |
+
Step 91 | loss:0.001311503117904067 lr:2.072331107902713e-05 tokens_per_second_per_gpu:22522.938695560464
|
| 92 |
+
Step 92 | loss:0.003427803749218583 lr:1.960790016056801e-05 tokens_per_second_per_gpu:23241.417586927862
|
| 93 |
+
Step 93 | loss:0.0004121843376196921 lr:1.8515978840524302e-05 tokens_per_second_per_gpu:22531.246279263618
|
| 94 |
+
Step 94 | loss:0.00312836654484272 lr:1.7448391048517376e-05 tokens_per_second_per_gpu:24239.72819001614
|
| 95 |
+
Step 95 | loss:0.001992614474147558 lr:1.640596190714947e-05 tokens_per_second_per_gpu:22773.02797758673
|
| 96 |
+
Step 96 | loss:0.0015763831324875355 lr:1.538949709427886e-05 tokens_per_second_per_gpu:24361.752804654298
|
| 97 |
+
Step 97 | loss:0.0016665292205289006 lr:1.4399782220323515e-05 tokens_per_second_per_gpu:24357.405592574567
|
| 98 |
+
Step 98 | loss:0.0023872442543506622 lr:1.3437582221074573e-05 tokens_per_second_per_gpu:22263.492354123417
|
| 99 |
+
Step 99 | loss:0.0014657737920060754 lr:1.250364076648894e-05 tokens_per_second_per_gpu:22560.03162760802
|
| 100 |
+
Step 100 | loss:0.008913733065128326 lr:1.1598679685917901e-05 tokens_per_second_per_gpu:23386.22426750836
|
| 101 |
+
Step 101 | loss:0.0012886931654065847 lr:1.0723398410216084e-05 tokens_per_second_per_gpu:24349.42564559428
|
| 102 |
+
Step 102 | loss:0.0029927073046565056 lr:9.878473431161767e-06 tokens_per_second_per_gpu:23413.838428964773
|
| 103 |
+
Step 103 | loss:0.0016142845852300525 lr:9.064557778606631e-06 tokens_per_second_per_gpu:22892.965503351814
|
| 104 |
+
Step 104 | loss:0.004492833279073238 lr:8.28228051575864e-06 tokens_per_second_per_gpu:24565.80977166718
|
| 105 |
+
Step 105 | loss:0.00285129202529788 lr:7.532246252988617e-06 tokens_per_second_per_gpu:24391.608473607652
|
| 106 |
+
Step 106 | loss:0.0028387606143951416 lr:6.815034680535915e-06 tokens_per_second_per_gpu:24264.536267597356
|
| 107 |
+
Step 107 | loss:0.002112480578944087 lr:6.1312001204745115e-06 tokens_per_second_per_gpu:23272.471595819847
|
| 108 |
+
Step 108 | loss:0.0033439788967370987 lr:5.481271098285817e-06 tokens_per_second_per_gpu:24999.961873144744
|
| 109 |
+
Step 109 | loss:0.0034719647374004126 lr:4.865749934369223e-06 tokens_per_second_per_gpu:22803.56313183214
|
| 110 |
+
Step 110 | loss:0.013906840234994888 lr:4.285112355806192e-06 tokens_per_second_per_gpu:24351.93657374786
|
| 111 |
+
Step 111 | loss:0.0009324107668362558 lr:3.7398071286779857e-06 tokens_per_second_per_gpu:22609.227655197963
|
| 112 |
+
Step 112 | loss:0.003379145637154579 lr:3.230255711220992e-06 tokens_per_second_per_gpu:22762.3291006239
|
| 113 |
+
Step 113 | loss:0.0020893726032227278 lr:2.7568519280880558e-06 tokens_per_second_per_gpu:23286.1557654273
|
| 114 |
+
Step 114 | loss:0.0022421840112656355 lr:2.3199616659672354e-06 tokens_per_second_per_gpu:22808.22343754297
|
| 115 |
+
Step 115 | loss:0.002479110611602664 lr:1.919922590793549e-06 tokens_per_second_per_gpu:22386.250672880793
|
| 116 |
+
Step 116 | loss:0.002459143754094839 lr:1.5570438867719694e-06 tokens_per_second_per_gpu:22408.597454631134
|
| 117 |
+
Step 117 | loss:0.0026496825739741325 lr:1.2316060174136002e-06 tokens_per_second_per_gpu:22475.49557707404
|
| 118 |
+
Step 118 | loss:0.00342993950471282 lr:9.43860508769645e-07 tokens_per_second_per_gpu:24343.875662573646
|
| 119 |
+
Step 119 | loss:0.0021067785564810038 lr:6.940297550306896e-07 tokens_per_second_per_gpu:22663.87172626564
|
| 120 |
+
Step 120 | loss:0.0012191813439130783 lr:4.823068466415615e-07 tokens_per_second_per_gpu:22226.4704019978
|
| 121 |
+
Step 121 | loss:0.0004489855782594532 lr:3.088554210646133e-07 tokens_per_second_per_gpu:22301.993823898018
|
| 122 |
+
Step 122 | loss:0.002996098482981324 lr:1.7380953630678488e-07 tokens_per_second_per_gpu:22417.34729082245
|
| 123 |
+
Step 123 | loss:0.005473646800965071 lr:7.727356730820035e-08 tokens_per_second_per_gpu:24406.949552256385
|
| 124 |
+
Step 124 | loss:0.0032128365710377693 lr:1.9322125272297488e-08 tokens_per_second_per_gpu:22670.6159905944
|
| 125 |
+
Step 125 | loss:0.00292785232886672 lr:0.0 tokens_per_second_per_gpu:23210.10521482543
|
ttt_models_no_demonstration_loss/009d5c81/td_False_ttd_False_ttdwa_False_ad_True_trd_False.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ttt_models_no_demonstration_loss/009d5c81/td_True_ttd_False_ttdwa_False_ad_True_trd_False.jsonl
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"input": [{"role": "system", "content": "Figure out the underlying transformation in the following examples and apply it to the test case. Here are some examples from this transformation, your answer must follow the format.\nThe input-output grids are provided as python arrays:\n[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 3 3 0 3 0 3 3 0 0 0 0 0 0 3 3 0 3 0 3 3 0 0 0 0]\n [0 0 0 0 3 0 3 0 3 0 3 0 0 0 0 0 0 3 0 3 0 3 0 3 0 0 0 0]\n [0 0 0 0 3 3 0 3 0 3 3 0 0 0 0 0 0 3 3 0 3 0 3 3 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 6 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 6 0 0 0]\n [0 0 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 0 0 0]\n [0 0 0 6 6 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]] -> [[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 8 8 0 8 0 8 8 0 0 0 0 0 0 8 8 0 8 0 8 8 0 0 0 0]\n [0 0 0 0 8 0 8 0 8 0 8 0 0 0 0 0 0 8 0 8 0 8 0 8 0 0 0 0]\n [0 0 0 0 8 8 0 8 0 8 8 0 0 0 0 0 0 8 8 0 8 0 8 8 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]#\n[[0 0 0 0 0 0 0 3 3 0 0 0 0 0 0 0 0 0 0 3 3 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 3 3 3 0 0 0 0 0 0 3 3 3 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 3 0 3 0 0 3 0 3 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 3 3 3 3 0 3 3 3 0 0 3 3 3 0 3 3 3 3 0 0 0 0 0]\n [0 0 0 0 3 3 0 0 3 3 3 0 3 3 3 3 0 3 3 3 0 0 3 3 0 0 0 0]\n [0 0 0 0 0 0 0 3 3 0 0 0 3 0 0 3 0 0 0 3 3 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 3 3 0 0 0 3 3 0 0 3 3 0 0 0 3 3 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 3 3 3 0 0 0 0 3 3 3 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 6 6 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 0 0 0 0]\n [0 0 0 0 6 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 6 0 0 0 0]\n [0 0 0 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]] -> [[0 0 0 0 0 0 0 9 9 0 0 0 0 0 0 0 0 0 0 9 9 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 9 9 9 0 0 0 0 0 0 9 9 9 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 9 0 9 0 0 9 0 9 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 9 9 9 9 0 9 9 9 0 0 9 9 9 0 9 9 9 9 0 0 0 0 0]\n [0 0 0 0 9 9 0 0 9 9 9 0 9 9 9 9 0 9 9 9 0 0 9 9 0 0 0 0]\n [0 0 0 0 0 0 0 9 9 0 0 0 9 0 0 9 0 0 0 9 9 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 9 9 0 0 0 9 9 0 0 9 9 0 0 0 9 9 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 9 9 9 0 0 0 0 9 9 9 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]#\n[[0 0 0 3 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 3 0 0 0]\n [0 3 0 3 0 3 0 3 0 3 0 0 0 0 0 0 0 0 3 0 3 0 3 0 3 0 3 0]\n [0 3 3 3 3 3 3 3 3 3 0 0 0 0 0 0 0 0 3 3 3 3 3 3 3 3 3 0]\n [0 3 0 3 0 3 0 3 0 3 0 0 0 0 0 0 0 0 3 0 3 0 3 0 3 0 3 0]\n [0 3 0 0 0 3 0 0 0 3 0 0 0 0 0 0 0 0 3 0 0 0 3 0 0 0 3 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 6 0 0 0 0 0 0 0 0 0 0 6 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 6 6 6 0 0 0 0 0 0 0 0 6 6 6 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 6 0 0 0 0 0 0 0 0 0 0 6 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]] -> [[0 0 0 4 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 4 0 0 0]\n [0 4 0 4 0 4 0 4 0 4 0 0 0 0 0 0 0 0 4 0 4 0 4 0 4 0 4 0]\n [0 4 4 4 4 4 4 4 4 4 0 0 0 0 0 0 0 0 4 4 4 4 4 4 4 4 4 0]\n [0 4 0 4 0 4 0 4 0 4 0 0 0 0 0 0 0 0 4 0 4 0 4 0 4 0 4 0]\n [0 4 0 0 0 4 0 0 0 4 0 0 0 0 0 0 0 0 4 0 0 0 4 0 0 0 4 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]#\n[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 3 3 0 3 3 0 0 0 0 3 3 0 3 3 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 3 3 3 0 0 0 0 0 0 3 3 3 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 3 3 3 3 0 0 0 0 0 0 0 0 0 0 3 3 3 3 0 0 0 0 0]\n [0 0 0 0 3 3 0 3 0 0 3 3 0 0 0 0 3 3 0 0 3 0 3 3 0 0 0 0]\n [0 0 0 0 0 0 0 3 3 3 3 0 0 0 0 0 0 3 3 3 3 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 3 0 3 0 0 0 0 0 0 3 0 3 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 3 3 3 0 3 3 3 0 0 3 3 3 0 3 3 3 0 0 0 0 0 0]\n [0 0 0 0 0 0 3 0 0 0 0 0 3 0 0 3 0 0 0 0 0 3 0 0 0 0 0 0]\n [0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 0]\n [0 6 6 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 0]\n [0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]] -> [[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 4 4 0 4 4 0 0 0 0 4 4 0 4 4 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 4 4 4 0 0 0 0 0 0 4 4 4 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 4 4 4 4 0 0 0 0 0 0 0 0 0 0 4 4 4 4 0 0 0 0 0]\n [0 0 0 0 4 4 0 4 0 0 4 4 0 0 0 0 4 4 0 0 4 0 4 4 0 0 0 0]\n [0 0 0 0 0 0 0 4 4 4 4 0 0 0 0 0 0 4 4 4 4 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 4 0 4 0 0 0 0 0 0 4 0 4 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 4 4 4 0 4 4 4 0 0 4 4 4 0 4 4 4 0 0 0 0 0 0]\n [0 0 0 0 0 0 4 0 0 0 0 0 4 0 0 4 0 0 0 0 0 4 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]#\n"}, {"role": "user", "content": "[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 3 3 3 0 3 3 0 0 0 0 0 0 3 3 0 3 3 3 0 0 0 0 0]\n [0 0 0 0 0 0 3 0 3 3 0 0 0 0 0 0 0 0 3 3 0 3 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 3 0 0 3 3 0 0 0 0 3 3 0 0 3 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 3 3 0 3 0 0 0 0 3 0 3 3 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 6 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 6 0 0]\n [0 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 0 0]\n [0 0 6 6 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]] -> "}], "output": {"role": "assistant", "content": "[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 8 8 8 0 8 8 0 0 0 0 0 0 8 8 0 8 8 8 0 0 0 0 0]\n [0 0 0 0 0 0 8 0 8 8 0 0 0 0 0 0 0 0 8 8 0 8 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 8 0 0 8 8 0 0 0 0 8 8 0 0 8 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 8 8 0 8 0 0 0 0 8 0 8 8 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]#"}, "total_tokens": 8049}
|
ttt_models_no_demonstration_loss/00dbd492/adapter_config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"base_model_name_or_path": "checkpoints/finetuned_1B_model", "bias": "none", "fan_in_fan_out": false, "inference_mode": true, "init_lora_weights": true, "lora_alpha": 16.0, "lora_dropout": 0.0, "modules_to_save": null, "peft_type": "LORA", "r": 128, "target_modules": ["gate_proj", "down_proj", "up_proj", "lm_head", "q_proj", "v_proj"], "task_type": "CAUSAL_LM"}
|
ttt_models_no_demonstration_loss/00dbd492/adapter_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:11eb576939542bb3986eb7721b096b5ca13e28e4b0c121f8e04d53a4a4fb710c
|
| 3 |
+
size 153153674
|
ttt_models_no_demonstration_loss/00dbd492/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"_name_or_path": "unsloth/Llama-3.2-1B-Instruct", "architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128009, "head_dim": 64, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "max_position_embeddings": 131072, "mlp_bias": false, "model_type": "llama", "num_attention_heads": 32, "num_hidden_layers": 16, "num_key_value_heads": 8, "pad_token_id": 128004, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": {"factor": 32.0, "high_freq_factor": 4.0, "low_freq_factor": 1.0, "original_max_position_embeddings": 8192, "rope_type": "llama3"}, "rope_theta": 500000.0, "tie_word_embeddings": true, "torch_dtype": "bfloat16", "transformers_version": "4.49.0", "unsloth_fixed": true, "use_cache": true, "vocab_size": 128256}
|
ttt_models_no_demonstration_loss/00dbd492/log_1740726247.txt
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Step 1 | loss:0.02171466499567032 lr:8.333333333333334e-06 tokens_per_second_per_gpu:24799.53648661074
|
| 2 |
+
Step 2 | loss:0.016681579872965813 lr:1.6666666666666667e-05 tokens_per_second_per_gpu:24843.69614211704
|
| 3 |
+
Step 3 | loss:0.014237483963370323 lr:2.5e-05 tokens_per_second_per_gpu:24447.52434492213
|
| 4 |
+
Step 4 | loss:0.012818340212106705 lr:3.3333333333333335e-05 tokens_per_second_per_gpu:23933.17179712239
|
| 5 |
+
Step 5 | loss:0.013356140814721584 lr:4.166666666666667e-05 tokens_per_second_per_gpu:24551.43025577372
|
| 6 |
+
Step 6 | loss:0.009371860884130001 lr:5e-05 tokens_per_second_per_gpu:24747.23366238875
|
| 7 |
+
Step 7 | loss:0.01595083624124527 lr:5.833333333333334e-05 tokens_per_second_per_gpu:24736.452211236126
|
| 8 |
+
Step 8 | loss:0.013696957379579544 lr:6.666666666666667e-05 tokens_per_second_per_gpu:24838.462896804176
|
| 9 |
+
Step 9 | loss:0.019859785214066505 lr:7.500000000000001e-05 tokens_per_second_per_gpu:24690.537326359732
|
| 10 |
+
Step 10 | loss:0.01109483651816845 lr:8.333333333333334e-05 tokens_per_second_per_gpu:24683.851412296608
|
| 11 |
+
Step 11 | loss:0.009253481402993202 lr:9.166666666666667e-05 tokens_per_second_per_gpu:24586.638675449078
|
| 12 |
+
Step 12 | loss:0.005993467755615711 lr:0.0001 tokens_per_second_per_gpu:24313.61179722891
|
| 13 |
+
Step 13 | loss:0.01616750843822956 lr:9.998067787472772e-05 tokens_per_second_per_gpu:24678.971638936684
|
| 14 |
+
Step 14 | loss:0.010712800547480583 lr:9.99227264326918e-05 tokens_per_second_per_gpu:25206.11307232062
|
| 15 |
+
Step 15 | loss:0.048287104815244675 lr:9.982619046369321e-05 tokens_per_second_per_gpu:24787.23636285909
|
| 16 |
+
Step 16 | loss:0.011139166541397572 lr:9.96911445789354e-05 tokens_per_second_per_gpu:24857.834858423954
|
| 17 |
+
Step 17 | loss:0.026212723925709724 lr:9.951769315335844e-05 tokens_per_second_per_gpu:24145.862207273138
|
| 18 |
+
Step 18 | loss:0.004827597178518772 lr:9.930597024496931e-05 tokens_per_second_per_gpu:24454.49683916025
|
| 19 |
+
Step 19 | loss:0.00912852305918932 lr:9.905613949123036e-05 tokens_per_second_per_gpu:24743.823161291446
|
| 20 |
+
Step 20 | loss:0.0053284624591469765 lr:9.876839398258641e-05 tokens_per_second_per_gpu:23755.991273910775
|
| 21 |
+
Step 21 | loss:0.005577064584940672 lr:9.844295611322804e-05 tokens_per_second_per_gpu:23527.83733691267
|
| 22 |
+
Step 22 | loss:0.010724058374762535 lr:9.808007740920646e-05 tokens_per_second_per_gpu:24893.51677410872
|
| 23 |
+
Step 23 | loss:0.006071750074625015 lr:9.768003833403278e-05 tokens_per_second_per_gpu:23904.260074057456
|
| 24 |
+
Step 24 | loss:0.0048974622040987015 lr:9.724314807191195e-05 tokens_per_second_per_gpu:24542.854684088925
|
| 25 |
+
Step 25 | loss:0.0051076156087219715 lr:9.676974428877901e-05 tokens_per_second_per_gpu:24661.327385759654
|
| 26 |
+
Step 26 | loss:0.007608518004417419 lr:9.626019287132203e-05 tokens_per_second_per_gpu:25396.40576388213
|
| 27 |
+
Step 27 | loss:0.0077490974217653275 lr:9.571488764419381e-05 tokens_per_second_per_gpu:24651.160905845383
|
| 28 |
+
Step 28 | loss:0.0056304591707885265 lr:9.513425006563079e-05 tokens_per_second_per_gpu:24398.972457588774
|
| 29 |
+
Step 29 | loss:0.0037231314927339554 lr:9.451872890171419e-05 tokens_per_second_per_gpu:23368.15754322512
|
| 30 |
+
Step 30 | loss:0.0036683056969195604 lr:9.386879987952549e-05 tokens_per_second_per_gpu:23740.102364248192
|
| 31 |
+
Step 31 | loss:0.005371502134948969 lr:9.31849653194641e-05 tokens_per_second_per_gpu:23540.827641282893
|
| 32 |
+
Step 32 | loss:0.002613919321447611 lr:9.246775374701139e-05 tokens_per_second_per_gpu:23648.5529613043
|
| 33 |
+
Step 33 | loss:0.004338048864156008 lr:9.171771948424137e-05 tokens_per_second_per_gpu:23446.788767572474
|
| 34 |
+
Step 34 | loss:0.02053719572722912 lr:9.093544222139337e-05 tokens_per_second_per_gpu:24850.376875641334
|
| 35 |
+
Step 35 | loss:0.005994304083287716 lr:9.012152656883823e-05 tokens_per_second_per_gpu:24769.784522466707
|
| 36 |
+
Step 36 | loss:0.040789030492305756 lr:8.927660158978392e-05 tokens_per_second_per_gpu:24723.23925007207
|
| 37 |
+
Step 37 | loss:0.0066206990741193295 lr:8.84013203140821e-05 tokens_per_second_per_gpu:24845.192210551755
|
| 38 |
+
Step 38 | loss:0.0023146590683609247 lr:8.749635923351107e-05 tokens_per_second_per_gpu:22734.96259290467
|
| 39 |
+
Step 39 | loss:0.008204630576074123 lr:8.656241777892543e-05 tokens_per_second_per_gpu:24757.0390919208
|
| 40 |
+
Step 40 | loss:0.004942636471241713 lr:8.560021777967649e-05 tokens_per_second_per_gpu:24135.156379932094
|
| 41 |
+
Step 41 | loss:0.006699174176901579 lr:8.461050290572114e-05 tokens_per_second_per_gpu:24770.778195488692
|
| 42 |
+
Step 42 | loss:0.013007741421461105 lr:8.359403809285053e-05 tokens_per_second_per_gpu:23378.419320526413
|
| 43 |
+
Step 43 | loss:0.0024298480711877346 lr:8.255160895148263e-05 tokens_per_second_per_gpu:23418.361003498532
|
| 44 |
+
Step 44 | loss:0.007806473411619663 lr:8.148402115947571e-05 tokens_per_second_per_gpu:24907.374674754657
|
| 45 |
+
Step 45 | loss:0.006131949834525585 lr:8.039209983943201e-05 tokens_per_second_per_gpu:24741.19646038393
|
| 46 |
+
Step 46 | loss:0.005548450630158186 lr:7.927668892097289e-05 tokens_per_second_per_gpu:24839.190785200037
|
| 47 |
+
Step 47 | loss:0.0040435497649014 lr:7.813865048847819e-05 tokens_per_second_per_gpu:24774.678918304246
|
| 48 |
+
Step 48 | loss:0.0040214089676737785 lr:7.697886411479423e-05 tokens_per_second_per_gpu:24822.724236543418
|
| 49 |
+
Step 49 | loss:0.003595452755689621 lr:7.579822618142505e-05 tokens_per_second_per_gpu:22838.920704094915
|
| 50 |
+
Step 50 | loss:0.0034062040504068136 lr:7.459764918573264e-05 tokens_per_second_per_gpu:24347.878264611812
|
| 51 |
+
Step 51 | loss:0.0065414332784712315 lr:7.33780610356814e-05 tokens_per_second_per_gpu:24712.145178316037
|
| 52 |
+
Step 52 | loss:0.006530676037073135 lr:7.214040433267198e-05 tokens_per_second_per_gpu:24867.837606345944
|
| 53 |
+
Step 53 | loss:0.003939696121960878 lr:7.088563564301873e-05 tokens_per_second_per_gpu:24825.77595107384
|
| 54 |
+
Step 54 | loss:0.0020872438326478004 lr:6.961472475863405e-05 tokens_per_second_per_gpu:24434.82682997351
|
| 55 |
+
Step 55 | loss:0.003674750216305256 lr:6.832865394749065e-05 tokens_per_second_per_gpu:24346.452789845094
|
| 56 |
+
Step 56 | loss:0.0031817385461181402 lr:6.702841719444141e-05 tokens_per_second_per_gpu:24237.766607105114
|
| 57 |
+
Step 57 | loss:0.00376288965344429 lr:6.571501943298334e-05 tokens_per_second_per_gpu:24808.461139115356
|
| 58 |
+
Step 58 | loss:0.0033553314860910177 lr:6.438947576855968e-05 tokens_per_second_per_gpu:22869.92222592007
|
| 59 |
+
Step 59 | loss:0.0027935828547924757 lr:6.305281069399989e-05 tokens_per_second_per_gpu:25175.441961970097
|
| 60 |
+
Step 60 | loss:0.002476707799360156 lr:6.17060572977047e-05 tokens_per_second_per_gpu:24321.013163182233
|
| 61 |
+
Step 61 | loss:0.014254859648644924 lr:6.035025646518746e-05 tokens_per_second_per_gpu:24685.316280735762
|
| 62 |
+
Step 62 | loss:0.001233600778505206 lr:5.8986456074589404e-05 tokens_per_second_per_gpu:24480.963720607233
|
| 63 |
+
Step 63 | loss:0.0036471583880484104 lr:5.761571018679025e-05 tokens_per_second_per_gpu:23819.61885895252
|
| 64 |
+
Step 64 | loss:0.0021924255415797234 lr:5.6239078230740436e-05 tokens_per_second_per_gpu:22472.849453821615
|
| 65 |
+
Step 65 | loss:0.031539108604192734 lr:5.48576241846443e-05 tokens_per_second_per_gpu:24944.950226126333
|
| 66 |
+
Step 66 | loss:0.026242490857839584 lr:5.347241575362729e-05 tokens_per_second_per_gpu:25282.960280125233
|
| 67 |
+
Step 67 | loss:0.0014482818078249693 lr:5.208452354452274e-05 tokens_per_second_per_gpu:22764.00620512377
|
| 68 |
+
Step 68 | loss:0.0021937191486358643 lr:5.0695020238415756e-05 tokens_per_second_per_gpu:24740.012104045236
|
| 69 |
+
Step 69 | loss:0.0012303405674174428 lr:4.9304979761584256e-05 tokens_per_second_per_gpu:24414.832470167934
|
| 70 |
+
Step 70 | loss:0.0036467849276959896 lr:4.791547645547726e-05 tokens_per_second_per_gpu:24829.54052879216
|
| 71 |
+
Step 71 | loss:0.006036479491740465 lr:4.652758424637271e-05 tokens_per_second_per_gpu:24046.16088589868
|
| 72 |
+
Step 72 | loss:0.0013190842000767589 lr:4.5142375815355706e-05 tokens_per_second_per_gpu:24779.20218009019
|
| 73 |
+
Step 73 | loss:0.0047671315260231495 lr:4.376092176925958e-05 tokens_per_second_per_gpu:24822.18419221936
|
| 74 |
+
Step 74 | loss:0.00270656100474298 lr:4.238428981320975e-05 tokens_per_second_per_gpu:24808.631386936646
|
| 75 |
+
Step 75 | loss:0.0017343678046017885 lr:4.101354392541061e-05 tokens_per_second_per_gpu:24928.522135574483
|
| 76 |
+
Step 76 | loss:0.01889096572995186 lr:3.964974353481254e-05 tokens_per_second_per_gpu:24665.043390863226
|
| 77 |
+
Step 77 | loss:0.001265051425434649 lr:3.829394270229531e-05 tokens_per_second_per_gpu:23702.27031698839
|
| 78 |
+
Step 78 | loss:0.005145639646798372 lr:3.694718930600012e-05 tokens_per_second_per_gpu:25055.08554459717
|
| 79 |
+
Step 79 | loss:0.006528004072606564 lr:3.561052423144032e-05 tokens_per_second_per_gpu:24867.559828399848
|
| 80 |
+
Step 80 | loss:0.001696317340247333 lr:3.428498056701665e-05 tokens_per_second_per_gpu:25385.95831545221
|
| 81 |
+
Step 81 | loss:0.007679549045860767 lr:3.297158280555862e-05 tokens_per_second_per_gpu:24702.51492709328
|
| 82 |
+
Step 82 | loss:0.002695110160857439 lr:3.167134605250938e-05 tokens_per_second_per_gpu:23737.02409777984
|
| 83 |
+
Step 83 | loss:0.012815081514418125 lr:3.0385275241365962e-05 tokens_per_second_per_gpu:24849.901049609758
|
| 84 |
+
Step 84 | loss:0.003646001685410738 lr:2.9114364356981272e-05 tokens_per_second_per_gpu:24182.44061191535
|
| 85 |
+
Step 85 | loss:0.002074805786833167 lr:2.7859595667328026e-05 tokens_per_second_per_gpu:24194.231726138645
|
| 86 |
+
Step 86 | loss:0.0046462249010801315 lr:2.6621938964318595e-05 tokens_per_second_per_gpu:24767.480421765955
|
| 87 |
+
Step 87 | loss:0.0017079500248655677 lr:2.540235081426736e-05 tokens_per_second_per_gpu:24596.542909429492
|
| 88 |
+
Step 88 | loss:0.005164774600416422 lr:2.4201773818574956e-05 tokens_per_second_per_gpu:24782.98783041469
|
| 89 |
+
Step 89 | loss:0.006911298725754023 lr:2.302113588520578e-05 tokens_per_second_per_gpu:24636.292643475033
|
| 90 |
+
Step 90 | loss:0.0024441727437078953 lr:2.1861349511521815e-05 tokens_per_second_per_gpu:22945.55081018147
|
| 91 |
+
Step 91 | loss:0.004577205516397953 lr:2.072331107902713e-05 tokens_per_second_per_gpu:24756.125856878443
|
| 92 |
+
Step 92 | loss:0.0031550955027341843 lr:1.960790016056801e-05 tokens_per_second_per_gpu:24744.97137549072
|
| 93 |
+
Step 93 | loss:0.00976309459656477 lr:1.8515978840524302e-05 tokens_per_second_per_gpu:24918.018839511024
|
| 94 |
+
Step 94 | loss:0.026600737124681473 lr:1.7448391048517376e-05 tokens_per_second_per_gpu:25057.581895695243
|
| 95 |
+
Step 95 | loss:0.0030940358992666006 lr:1.640596190714947e-05 tokens_per_second_per_gpu:24992.012888661295
|
| 96 |
+
Step 96 | loss:0.009170479141175747 lr:1.538949709427886e-05 tokens_per_second_per_gpu:24200.972070945496
|
| 97 |
+
Step 97 | loss:0.00234222412109375 lr:1.4399782220323515e-05 tokens_per_second_per_gpu:24109.775064364134
|
| 98 |
+
Step 98 | loss:0.0032506100833415985 lr:1.3437582221074573e-05 tokens_per_second_per_gpu:24740.176579237974
|
| 99 |
+
Step 99 | loss:0.0017990602646023035 lr:1.250364076648894e-05 tokens_per_second_per_gpu:22926.49619640813
|
| 100 |
+
Step 100 | loss:0.0018816280644387007 lr:1.1598679685917901e-05 tokens_per_second_per_gpu:23431.182921574138
|
| 101 |
+
Step 101 | loss:0.003052518703043461 lr:1.0723398410216084e-05 tokens_per_second_per_gpu:24928.00164160214
|
| 102 |
+
Step 102 | loss:0.003914476837962866 lr:9.878473431161767e-06 tokens_per_second_per_gpu:24702.2027775798
|
| 103 |
+
Step 103 | loss:0.0030484492890536785 lr:9.064557778606631e-06 tokens_per_second_per_gpu:24967.743618136228
|
| 104 |
+
Step 104 | loss:0.0038628869224339724 lr:8.28228051575864e-06 tokens_per_second_per_gpu:24683.686236745187
|
| 105 |
+
Step 105 | loss:0.0020495797507464886 lr:7.532246252988617e-06 tokens_per_second_per_gpu:25335.797562707747
|
| 106 |
+
Step 106 | loss:0.004651036579161882 lr:6.815034680535915e-06 tokens_per_second_per_gpu:24755.092554198167
|
| 107 |
+
Step 107 | loss:0.004140360746532679 lr:6.1312001204745115e-06 tokens_per_second_per_gpu:24457.295854272958
|
| 108 |
+
Step 108 | loss:0.001776965567842126 lr:5.481271098285817e-06 tokens_per_second_per_gpu:23053.14432297976
|
| 109 |
+
Step 109 | loss:0.0020851739682257175 lr:4.865749934369223e-06 tokens_per_second_per_gpu:24908.98976731652
|
| 110 |
+
Step 110 | loss:0.00264181662350893 lr:4.285112355806192e-06 tokens_per_second_per_gpu:24739.941303199477
|
| 111 |
+
Step 111 | loss:0.0019281005952507257 lr:3.7398071286779857e-06 tokens_per_second_per_gpu:24724.303140923836
|
| 112 |
+
Step 112 | loss:0.0016568166902288795 lr:3.230255711220992e-06 tokens_per_second_per_gpu:24763.754684463398
|
| 113 |
+
Step 113 | loss:0.0018343753181397915 lr:2.7568519280880558e-06 tokens_per_second_per_gpu:23258.92924780578
|
| 114 |
+
Step 114 | loss:0.001523887855000794 lr:2.3199616659672354e-06 tokens_per_second_per_gpu:24547.10666769586
|
| 115 |
+
Step 115 | loss:0.006373467389494181 lr:1.919922590793549e-06 tokens_per_second_per_gpu:24839.296903331528
|
| 116 |
+
Step 116 | loss:0.003422510577365756 lr:1.5570438867719694e-06 tokens_per_second_per_gpu:24469.335808907184
|
| 117 |
+
Step 117 | loss:0.0023483678232878447 lr:1.2316060174136002e-06 tokens_per_second_per_gpu:24612.036459527175
|
| 118 |
+
Step 118 | loss:0.0014304585056379437 lr:9.43860508769645e-07 tokens_per_second_per_gpu:23255.143889465333
|
| 119 |
+
Step 119 | loss:0.012361050583422184 lr:6.940297550306896e-07 tokens_per_second_per_gpu:24654.048428441318
|
| 120 |
+
Step 120 | loss:0.0017032718751579523 lr:4.823068466415615e-07 tokens_per_second_per_gpu:24226.177081959457
|
| 121 |
+
Step 121 | loss:0.0029196201357990503 lr:3.088554210646133e-07 tokens_per_second_per_gpu:22726.47404388298
|
| 122 |
+
Step 122 | loss:0.002311995718628168 lr:1.7380953630678488e-07 tokens_per_second_per_gpu:24098.328835806235
|
| 123 |
+
Step 123 | loss:0.0015191853744909167 lr:7.727356730820035e-08 tokens_per_second_per_gpu:22466.787294846526
|
| 124 |
+
Step 124 | loss:0.0008380922954529524 lr:1.9322125272297488e-08 tokens_per_second_per_gpu:22552.2208123924
|
| 125 |
+
Step 125 | loss:0.012475581839680672 lr:0.0 tokens_per_second_per_gpu:25215.189269823266
|
ttt_models_no_demonstration_loss/00dbd492/td_False_ttd_False_ttdwa_False_ad_True_trd_False.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ttt_models_no_demonstration_loss/00dbd492/td_True_ttd_False_ttdwa_False_ad_True_trd_False.jsonl
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"input": [{"role": "system", "content": "Figure out the underlying transformation in the following examples and apply it to the test case. Here are some examples from this transformation, your answer must follow the format.\nThe input-output grids are provided as python arrays:\n[[2 2 2 2 2 0 0]\n [2 0 0 0 2 0 0]\n [2 0 2 0 2 0 0]\n [2 0 0 0 2 0 0]\n [2 2 2 2 2 0 0]\n [0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0]\n [2 2 2 2 2 0 0]\n [2 0 0 0 2 0 0]\n [2 0 2 0 2 0 0]\n [2 0 0 0 2 0 0]\n [2 2 2 2 2 0 0]\n [0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0]] -> [[2 2 2 2 2 0 0]\n [2 8 8 8 2 0 0]\n [2 8 2 8 2 0 0]\n [2 8 8 8 2 0 0]\n [2 2 2 2 2 0 0]\n [0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0]\n [2 2 2 2 2 0 0]\n [2 8 8 8 2 0 0]\n [2 8 2 8 2 0 0]\n [2 8 8 8 2 0 0]\n [2 2 2 2 2 0 0]\n [0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0]]#\n[[2 2 2 2 2 2 2 0 0]\n [2 0 0 0 0 0 2 0 0]\n [2 0 0 0 0 0 2 0 0]\n [2 0 0 2 0 0 2 0 0]\n [2 0 0 0 0 0 2 0 0]\n [2 0 0 0 0 0 2 0 0]\n [2 2 2 2 2 2 2 0 0]\n [0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0]\n [2 2 2 2 2 2 2 0 0]\n [2 0 0 0 0 0 2 0 0]\n [2 0 0 0 0 0 2 0 0]\n [2 0 0 2 0 0 2 0 0]\n [2 0 0 0 0 0 2 0 0]\n [2 0 0 0 0 0 2 0 0]\n [2 2 2 2 2 2 2 0 0]\n [0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0]] -> [[2 2 2 2 2 2 2 0 0]\n [2 4 4 4 4 4 2 0 0]\n [2 4 4 4 4 4 2 0 0]\n [2 4 4 2 4 4 2 0 0]\n [2 4 4 4 4 4 2 0 0]\n [2 4 4 4 4 4 2 0 0]\n [2 2 2 2 2 2 2 0 0]\n [0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0]\n [2 2 2 2 2 2 2 0 0]\n [2 4 4 4 4 4 2 0 0]\n [2 4 4 4 4 4 2 0 0]\n [2 4 4 2 4 4 2 0 0]\n [2 4 4 4 4 4 2 0 0]\n [2 4 4 4 4 4 2 0 0]\n [2 2 2 2 2 2 2 0 0]\n [0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0]]#\n[[0 0 0 0 0 0 2 2 2 2 2 2 2 2 2]\n [0 0 0 0 0 0 2 0 0 0 0 0 0 0 2]\n [0 0 0 0 0 0 2 0 0 0 0 0 0 0 2]\n [0 0 0 0 0 0 2 0 0 0 0 0 0 0 2]\n [0 0 0 0 0 0 2 0 0 0 2 0 0 0 2]\n [0 0 0 0 0 0 2 0 0 0 0 0 0 0 2]\n [0 0 0 0 0 0 2 0 0 0 0 0 0 0 2]\n [0 0 0 0 0 0 2 0 0 0 0 0 0 0 2]\n [0 0 0 0 0 0 2 2 2 2 2 2 2 2 2]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 2 2 2 2 2 0 0 0 0 0 0 0 0 0]\n [0 2 0 0 0 2 0 0 0 0 0 0 0 0 0]\n [0 2 0 2 0 2 0 0 0 0 0 0 0 0 0]\n [0 2 0 0 0 2 0 0 0 0 0 0 0 0 0]\n [0 2 2 2 2 2 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 2 2 2 2 2 2 2 2 2]\n [0 0 0 0 0 0 2 0 0 0 0 0 0 0 2]\n [0 0 0 0 0 0 2 0 0 0 0 0 0 0 2]\n [0 0 0 0 0 0 2 0 0 0 0 0 0 0 2]\n [0 0 0 0 0 0 2 0 0 0 2 0 0 0 2]\n [0 0 0 0 0 0 2 0 0 0 0 0 0 0 2]\n [0 0 0 0 0 0 2 0 0 0 0 0 0 0 2]\n [0 0 0 0 0 0 2 0 0 0 0 0 0 0 2]\n [0 0 0 0 0 0 2 2 2 2 2 2 2 2 2]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 2 2 2 2 2 0 0 0 0 0 0 0 0 0]\n [0 2 0 0 0 2 0 0 0 0 0 0 0 0 0]\n [0 2 0 2 0 2 0 0 0 0 0 0 0 0 0]\n [0 2 0 0 0 2 0 0 0 0 0 0 0 0 0]\n [0 2 2 2 2 2 0 0 0 0 0 0 0 0 0]] -> [[0 0 0 0 0 0 2 2 2 2 2 2 2 2 2]\n [0 0 0 0 0 0 2 3 3 3 3 3 3 3 2]\n [0 0 0 0 0 0 2 3 3 3 3 3 3 3 2]\n [0 0 0 0 0 0 2 3 3 3 3 3 3 3 2]\n [0 0 0 0 0 0 2 3 3 3 2 3 3 3 2]\n [0 0 0 0 0 0 2 3 3 3 3 3 3 3 2]\n [0 0 0 0 0 0 2 3 3 3 3 3 3 3 2]\n [0 0 0 0 0 0 2 3 3 3 3 3 3 3 2]\n [0 0 0 0 0 0 2 2 2 2 2 2 2 2 2]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 2 2 2 2 2 0 0 0 0 0 0 0 0 0]\n [0 2 8 8 8 2 0 0 0 0 0 0 0 0 0]\n [0 2 8 2 8 2 0 0 0 0 0 0 0 0 0]\n [0 2 8 8 8 2 0 0 0 0 0 0 0 0 0]\n [0 2 2 2 2 2 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 2 2 2 2 2 2 2 2 2]\n [0 0 0 0 0 0 2 3 3 3 3 3 3 3 2]\n [0 0 0 0 0 0 2 3 3 3 3 3 3 3 2]\n [0 0 0 0 0 0 2 3 3 3 3 3 3 3 2]\n [0 0 0 0 0 0 2 3 3 3 2 3 3 3 2]\n [0 0 0 0 0 0 2 3 3 3 3 3 3 3 2]\n [0 0 0 0 0 0 2 3 3 3 3 3 3 3 2]\n [0 0 0 0 0 0 2 3 3 3 3 3 3 3 2]\n [0 0 0 0 0 0 2 2 2 2 2 2 2 2 2]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 2 2 2 2 2 0 0 0 0 0 0 0 0 0]\n [0 2 8 8 8 2 0 0 0 0 0 0 0 0 0]\n [0 2 8 2 8 2 0 0 0 0 0 0 0 0 0]\n [0 2 8 8 8 2 0 0 0 0 0 0 0 0 0]\n [0 2 2 2 2 2 0 0 0 0 0 0 0 0 0]]#\n"}, {"role": "user", "content": "[[0 2 2 2 2 2 0 0 0 0 0 0 0]\n [0 2 0 0 0 2 0 0 0 0 0 0 0]\n [0 2 0 2 0 2 0 0 0 0 0 0 0]\n [0 2 0 0 0 2 0 0 0 0 0 0 0]\n [0 2 2 2 2 2 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 2 2 2 2 2 2 2 0]\n [0 0 0 0 0 2 0 0 0 0 0 2 0]\n [0 0 0 0 0 2 0 0 0 0 0 2 0]\n [0 0 0 0 0 2 0 0 2 0 0 2 0]\n [0 0 0 0 0 2 0 0 0 0 0 2 0]\n [0 0 0 0 0 2 0 0 0 0 0 2 0]\n [0 0 0 0 0 2 2 2 2 2 2 2 0]\n [0 2 2 2 2 2 0 0 0 0 0 0 0]\n [0 2 0 0 0 2 0 0 0 0 0 0 0]\n [0 2 0 2 0 2 0 0 0 0 0 0 0]\n [0 2 0 0 0 2 0 0 0 0 0 0 0]\n [0 2 2 2 2 2 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 2 2 2 2 2 2 2 0]\n [0 0 0 0 0 2 0 0 0 0 0 2 0]\n [0 0 0 0 0 2 0 0 0 0 0 2 0]\n [0 0 0 0 0 2 0 0 2 0 0 2 0]\n [0 0 0 0 0 2 0 0 0 0 0 2 0]\n [0 0 0 0 0 2 0 0 0 0 0 2 0]\n [0 0 0 0 0 2 2 2 2 2 2 2 0]] -> "}], "output": {"role": "assistant", "content": "[[0 2 2 2 2 2 0 0 0 0 0 0 0]\n [0 2 8 8 8 2 0 0 0 0 0 0 0]\n [0 2 8 2 8 2 0 0 0 0 0 0 0]\n [0 2 8 8 8 2 0 0 0 0 0 0 0]\n [0 2 2 2 2 2 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 2 2 2 2 2 2 2 0]\n [0 0 0 0 0 2 4 4 4 4 4 2 0]\n [0 0 0 0 0 2 4 4 4 4 4 2 0]\n [0 0 0 0 0 2 4 4 2 4 4 2 0]\n [0 0 0 0 0 2 4 4 4 4 4 2 0]\n [0 0 0 0 0 2 4 4 4 4 4 2 0]\n [0 0 0 0 0 2 2 2 2 2 2 2 0]\n [0 2 2 2 2 2 0 0 0 0 0 0 0]\n [0 2 8 8 8 2 0 0 0 0 0 0 0]\n [0 2 8 2 8 2 0 0 0 0 0 0 0]\n [0 2 8 8 8 2 0 0 0 0 0 0 0]\n [0 2 2 2 2 2 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 2 2 2 2 2 2 2 0]\n [0 0 0 0 0 2 4 4 4 4 4 2 0]\n [0 0 0 0 0 2 4 4 4 4 4 2 0]\n [0 0 0 0 0 2 4 4 2 4 4 2 0]\n [0 0 0 0 0 2 4 4 4 4 4 2 0]\n [0 0 0 0 0 2 4 4 4 4 4 2 0]\n [0 0 0 0 0 2 2 2 2 2 2 2 0]]#"}, "total_tokens": 4435}
|
ttt_models_no_demonstration_loss/03560426/adapter_config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"base_model_name_or_path": "checkpoints/finetuned_1B_model", "bias": "none", "fan_in_fan_out": false, "inference_mode": true, "init_lora_weights": true, "lora_alpha": 16.0, "lora_dropout": 0.0, "modules_to_save": null, "peft_type": "LORA", "r": 128, "target_modules": ["gate_proj", "down_proj", "up_proj", "lm_head", "q_proj", "v_proj"], "task_type": "CAUSAL_LM"}
|
ttt_models_no_demonstration_loss/03560426/adapter_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:65052241959e040598f024da69a76be0f09895fd09c1901cae166e6cad66b972
|
| 3 |
+
size 153153674
|
ttt_models_no_demonstration_loss/03560426/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"_name_or_path": "unsloth/Llama-3.2-1B-Instruct", "architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128009, "head_dim": 64, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "max_position_embeddings": 131072, "mlp_bias": false, "model_type": "llama", "num_attention_heads": 32, "num_hidden_layers": 16, "num_key_value_heads": 8, "pad_token_id": 128004, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": {"factor": 32.0, "high_freq_factor": 4.0, "low_freq_factor": 1.0, "original_max_position_embeddings": 8192, "rope_type": "llama3"}, "rope_theta": 500000.0, "tie_word_embeddings": true, "torch_dtype": "bfloat16", "transformers_version": "4.49.0", "unsloth_fixed": true, "use_cache": true, "vocab_size": 128256}
|
ttt_models_no_demonstration_loss/03560426/log_1740726294.txt
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Step 1 | loss:0.06873892992734909 lr:8.333333333333334e-06 tokens_per_second_per_gpu:25355.50899635703
|
| 2 |
+
Step 2 | loss:0.028789404779672623 lr:1.6666666666666667e-05 tokens_per_second_per_gpu:24266.58285108669
|
| 3 |
+
Step 3 | loss:0.0661688894033432 lr:2.5e-05 tokens_per_second_per_gpu:25592.361645091976
|
| 4 |
+
Step 4 | loss:0.053837694227695465 lr:3.3333333333333335e-05 tokens_per_second_per_gpu:25801.02958189746
|
| 5 |
+
Step 5 | loss:0.034431301057338715 lr:4.166666666666667e-05 tokens_per_second_per_gpu:24824.558690862235
|
| 6 |
+
Step 6 | loss:0.06394153833389282 lr:5e-05 tokens_per_second_per_gpu:25567.859405343628
|
| 7 |
+
Step 7 | loss:0.04607772082090378 lr:5.833333333333334e-05 tokens_per_second_per_gpu:24734.493343454385
|
| 8 |
+
Step 8 | loss:0.04575362429022789 lr:6.666666666666667e-05 tokens_per_second_per_gpu:25056.655646919466
|
| 9 |
+
Step 9 | loss:0.049287743866443634 lr:7.500000000000001e-05 tokens_per_second_per_gpu:24486.058803654087
|
| 10 |
+
Step 10 | loss:0.041416436433792114 lr:8.333333333333334e-05 tokens_per_second_per_gpu:24849.92995677606
|
| 11 |
+
Step 11 | loss:0.055559054017066956 lr:9.166666666666667e-05 tokens_per_second_per_gpu:25751.542430915873
|
| 12 |
+
Step 12 | loss:0.07937239110469818 lr:0.0001 tokens_per_second_per_gpu:25307.798211216283
|
| 13 |
+
Step 13 | loss:0.042324408888816833 lr:9.998067787472772e-05 tokens_per_second_per_gpu:25484.56373951837
|
| 14 |
+
Step 14 | loss:0.06463264673948288 lr:9.99227264326918e-05 tokens_per_second_per_gpu:24896.783661029265
|
| 15 |
+
Step 15 | loss:0.024323944002389908 lr:9.982619046369321e-05 tokens_per_second_per_gpu:24021.49813216531
|
| 16 |
+
Step 16 | loss:0.020875846967101097 lr:9.96911445789354e-05 tokens_per_second_per_gpu:24567.698097500528
|
| 17 |
+
Step 17 | loss:0.0583779513835907 lr:9.951769315335844e-05 tokens_per_second_per_gpu:25073.753644686985
|
| 18 |
+
Step 18 | loss:0.06957133859395981 lr:9.930597024496931e-05 tokens_per_second_per_gpu:25705.467782028783
|
| 19 |
+
Step 19 | loss:0.04471553862094879 lr:9.905613949123036e-05 tokens_per_second_per_gpu:24979.025541386993
|
| 20 |
+
Step 20 | loss:0.03664964437484741 lr:9.876839398258641e-05 tokens_per_second_per_gpu:24652.892082276263
|
| 21 |
+
Step 21 | loss:0.026253795251250267 lr:9.844295611322804e-05 tokens_per_second_per_gpu:24689.4291257217
|
| 22 |
+
Step 22 | loss:0.024590816348791122 lr:9.808007740920646e-05 tokens_per_second_per_gpu:24502.964211730065
|
| 23 |
+
Step 23 | loss:0.03540327027440071 lr:9.768003833403278e-05 tokens_per_second_per_gpu:24705.88985338473
|
| 24 |
+
Step 24 | loss:0.022299112752079964 lr:9.724314807191195e-05 tokens_per_second_per_gpu:24746.099464504332
|
| 25 |
+
Step 25 | loss:0.05612886697053909 lr:9.676974428877901e-05 tokens_per_second_per_gpu:26018.75390028042
|
| 26 |
+
Step 26 | loss:0.06222271919250488 lr:9.626019287132203e-05 tokens_per_second_per_gpu:25527.600675114885
|
| 27 |
+
Step 27 | loss:0.023514259606599808 lr:9.571488764419381e-05 tokens_per_second_per_gpu:24404.740585790867
|
| 28 |
+
Step 28 | loss:0.024314450100064278 lr:9.513425006563079e-05 tokens_per_second_per_gpu:24644.70180019579
|
| 29 |
+
Step 29 | loss:0.05481883883476257 lr:9.451872890171419e-05 tokens_per_second_per_gpu:25809.458654336864
|
| 30 |
+
Step 30 | loss:0.02623671106994152 lr:9.386879987952549e-05 tokens_per_second_per_gpu:24829.58202820401
|
| 31 |
+
Step 31 | loss:0.016204971820116043 lr:9.31849653194641e-05 tokens_per_second_per_gpu:25072.927153695553
|
| 32 |
+
Step 32 | loss:0.017854133620858192 lr:9.246775374701139e-05 tokens_per_second_per_gpu:24749.557483809076
|
| 33 |
+
Step 33 | loss:0.051328662782907486 lr:9.171771948424137e-05 tokens_per_second_per_gpu:25596.968287991127
|
| 34 |
+
Step 34 | loss:0.009308368898928165 lr:9.093544222139337e-05 tokens_per_second_per_gpu:24539.867900661986
|
| 35 |
+
Step 35 | loss:0.01959335058927536 lr:9.012152656883823e-05 tokens_per_second_per_gpu:24756.59167711336
|
| 36 |
+
Step 36 | loss:0.015089204534888268 lr:8.927660158978392e-05 tokens_per_second_per_gpu:24639.88655218133
|
| 37 |
+
Step 37 | loss:0.033478204160928726 lr:8.84013203140821e-05 tokens_per_second_per_gpu:25514.52953603085
|
| 38 |
+
Step 38 | loss:0.0165327750146389 lr:8.749635923351107e-05 tokens_per_second_per_gpu:24434.648710750826
|
| 39 |
+
Step 39 | loss:0.04327801987528801 lr:8.656241777892543e-05 tokens_per_second_per_gpu:25089.7841270633
|
| 40 |
+
Step 40 | loss:0.03611057251691818 lr:8.560021777967649e-05 tokens_per_second_per_gpu:25779.691108408035
|
| 41 |
+
Step 41 | loss:0.017709212377667427 lr:8.461050290572114e-05 tokens_per_second_per_gpu:24406.08715469676
|
| 42 |
+
Step 42 | loss:0.02003944292664528 lr:8.359403809285053e-05 tokens_per_second_per_gpu:24735.18029928487
|
| 43 |
+
Step 43 | loss:0.022438379004597664 lr:8.255160895148263e-05 tokens_per_second_per_gpu:24266.50490565329
|
| 44 |
+
Step 44 | loss:0.023830177262425423 lr:8.148402115947571e-05 tokens_per_second_per_gpu:25099.49448197906
|
| 45 |
+
Step 45 | loss:0.0480225645005703 lr:8.039209983943201e-05 tokens_per_second_per_gpu:25122.511146562374
|
| 46 |
+
Step 46 | loss:0.0073935952968895435 lr:7.927668892097289e-05 tokens_per_second_per_gpu:24515.909947524367
|
| 47 |
+
Step 47 | loss:0.03599407896399498 lr:7.813865048847819e-05 tokens_per_second_per_gpu:25549.143042016985
|
| 48 |
+
Step 48 | loss:0.04345555603504181 lr:7.697886411479423e-05 tokens_per_second_per_gpu:25274.33130595374
|
| 49 |
+
Step 49 | loss:0.03225249424576759 lr:7.579822618142505e-05 tokens_per_second_per_gpu:25794.72849847497
|
| 50 |
+
Step 50 | loss:0.021313896402716637 lr:7.459764918573264e-05 tokens_per_second_per_gpu:24891.85482558273
|
| 51 |
+
Step 51 | loss:0.015373020432889462 lr:7.33780610356814e-05 tokens_per_second_per_gpu:24545.303415762108
|
| 52 |
+
Step 52 | loss:0.02718484029173851 lr:7.214040433267198e-05 tokens_per_second_per_gpu:25720.786905623325
|
| 53 |
+
Step 53 | loss:0.030067026615142822 lr:7.088563564301873e-05 tokens_per_second_per_gpu:25858.290268307526
|
| 54 |
+
Step 54 | loss:0.02806292288005352 lr:6.961472475863405e-05 tokens_per_second_per_gpu:24432.160802399805
|
| 55 |
+
Step 55 | loss:0.033236317336559296 lr:6.832865394749065e-05 tokens_per_second_per_gpu:24737.441764854277
|
| 56 |
+
Step 56 | loss:0.03393825888633728 lr:6.702841719444141e-05 tokens_per_second_per_gpu:25157.059629558335
|
| 57 |
+
Step 57 | loss:0.041858695447444916 lr:6.571501943298334e-05 tokens_per_second_per_gpu:25874.21947817479
|
| 58 |
+
Step 58 | loss:0.020077107474207878 lr:6.438947576855968e-05 tokens_per_second_per_gpu:25598.43250673414
|
| 59 |
+
Step 59 | loss:0.034672707319259644 lr:6.305281069399989e-05 tokens_per_second_per_gpu:25334.913860837307
|
| 60 |
+
Step 60 | loss:0.020956991240382195 lr:6.17060572977047e-05 tokens_per_second_per_gpu:25748.462953548187
|
| 61 |
+
Step 61 | loss:0.014053513295948505 lr:6.035025646518746e-05 tokens_per_second_per_gpu:25149.435907375217
|
| 62 |
+
Step 62 | loss:0.011400323361158371 lr:5.8986456074589404e-05 tokens_per_second_per_gpu:25350.590942819028
|
| 63 |
+
Step 63 | loss:0.009925710037350655 lr:5.761571018679025e-05 tokens_per_second_per_gpu:24740.09027026493
|
| 64 |
+
Step 64 | loss:0.007297604810446501 lr:5.6239078230740436e-05 tokens_per_second_per_gpu:25094.702588859553
|
| 65 |
+
Step 65 | loss:0.031133251264691353 lr:5.48576241846443e-05 tokens_per_second_per_gpu:25861.302847600233
|
| 66 |
+
Step 66 | loss:0.028682895004749298 lr:5.347241575362729e-05 tokens_per_second_per_gpu:25640.08141431363
|
| 67 |
+
Step 67 | loss:0.016100572422146797 lr:5.208452354452274e-05 tokens_per_second_per_gpu:24446.017232925362
|
| 68 |
+
Step 68 | loss:0.013851425610482693 lr:5.0695020238415756e-05 tokens_per_second_per_gpu:25345.63534214957
|
| 69 |
+
Step 69 | loss:0.017564577981829643 lr:4.9304979761584256e-05 tokens_per_second_per_gpu:25044.935498184015
|
| 70 |
+
Step 70 | loss:0.020642656832933426 lr:4.791547645547726e-05 tokens_per_second_per_gpu:24663.80683816403
|
| 71 |
+
Step 71 | loss:0.015640761703252792 lr:4.652758424637271e-05 tokens_per_second_per_gpu:25385.99692472134
|
| 72 |
+
Step 72 | loss:0.020120615139603615 lr:4.5142375815355706e-05 tokens_per_second_per_gpu:25332.13346644954
|
| 73 |
+
Step 73 | loss:0.014049666933715343 lr:4.376092176925958e-05 tokens_per_second_per_gpu:24555.93861866578
|
| 74 |
+
Step 74 | loss:0.0038393745198845863 lr:4.238428981320975e-05 tokens_per_second_per_gpu:25854.39016615491
|
| 75 |
+
Step 75 | loss:0.009305994026362896 lr:4.101354392541061e-05 tokens_per_second_per_gpu:25774.829428800927
|
| 76 |
+
Step 76 | loss:0.014746712520718575 lr:3.964974353481254e-05 tokens_per_second_per_gpu:24391.592117449876
|
| 77 |
+
Step 77 | loss:0.007592944428324699 lr:3.829394270229531e-05 tokens_per_second_per_gpu:24639.16189518319
|
| 78 |
+
Step 78 | loss:0.016119051724672318 lr:3.694718930600012e-05 tokens_per_second_per_gpu:24675.461597098696
|
| 79 |
+
Step 79 | loss:0.03586229309439659 lr:3.561052423144032e-05 tokens_per_second_per_gpu:25615.43807052638
|
| 80 |
+
Step 80 | loss:0.02263660542666912 lr:3.428498056701665e-05 tokens_per_second_per_gpu:25547.558320444867
|
| 81 |
+
Step 81 | loss:0.017611002549529076 lr:3.297158280555862e-05 tokens_per_second_per_gpu:24699.865287796358
|
| 82 |
+
Step 82 | loss:0.0182948037981987 lr:3.167134605250938e-05 tokens_per_second_per_gpu:25254.316327447534
|
| 83 |
+
Step 83 | loss:0.009814860299229622 lr:3.0385275241365962e-05 tokens_per_second_per_gpu:25192.085517524298
|
| 84 |
+
Step 84 | loss:0.01945209503173828 lr:2.9114364356981272e-05 tokens_per_second_per_gpu:24826.639231734553
|
| 85 |
+
Step 85 | loss:0.01448115799576044 lr:2.7859595667328026e-05 tokens_per_second_per_gpu:25283.100427963775
|
| 86 |
+
Step 86 | loss:0.019683491438627243 lr:2.6621938964318595e-05 tokens_per_second_per_gpu:24939.65084535261
|
| 87 |
+
Step 87 | loss:0.013437245041131973 lr:2.540235081426736e-05 tokens_per_second_per_gpu:25532.69717401726
|
| 88 |
+
Step 88 | loss:0.00560104101896286 lr:2.4201773818574956e-05 tokens_per_second_per_gpu:24578.930126139396
|
| 89 |
+
Step 89 | loss:0.018941359594464302 lr:2.302113588520578e-05 tokens_per_second_per_gpu:24875.171786909723
|
| 90 |
+
Step 90 | loss:0.02160125970840454 lr:2.1861349511521815e-05 tokens_per_second_per_gpu:23106.728312134685
|
| 91 |
+
Step 91 | loss:0.018141847103834152 lr:2.072331107902713e-05 tokens_per_second_per_gpu:25113.610978461253
|
| 92 |
+
Step 92 | loss:0.01967688463628292 lr:1.960790016056801e-05 tokens_per_second_per_gpu:25066.7569025494
|
| 93 |
+
Step 93 | loss:0.006617952138185501 lr:1.8515978840524302e-05 tokens_per_second_per_gpu:24737.37229446754
|
| 94 |
+
Step 94 | loss:0.017355797812342644 lr:1.7448391048517376e-05 tokens_per_second_per_gpu:24534.340553668015
|
| 95 |
+
Step 95 | loss:0.014681623317301273 lr:1.640596190714947e-05 tokens_per_second_per_gpu:24059.87101163619
|
| 96 |
+
Step 96 | loss:0.0055124047212302685 lr:1.538949709427886e-05 tokens_per_second_per_gpu:25198.841063228097
|
| 97 |
+
Step 97 | loss:0.009426341392099857 lr:1.4399782220323515e-05 tokens_per_second_per_gpu:24392.055127745443
|
| 98 |
+
Step 98 | loss:0.012135070748627186 lr:1.3437582221074573e-05 tokens_per_second_per_gpu:25106.11185059789
|
| 99 |
+
Step 99 | loss:0.008698921650648117 lr:1.250364076648894e-05 tokens_per_second_per_gpu:25705.315907601827
|
| 100 |
+
Step 100 | loss:0.003103363560512662 lr:1.1598679685917901e-05 tokens_per_second_per_gpu:25836.728819197193
|
| 101 |
+
Step 101 | loss:0.0056654345244169235 lr:1.0723398410216084e-05 tokens_per_second_per_gpu:25188.272111180395
|
| 102 |
+
Step 102 | loss:0.008745436556637287 lr:9.878473431161767e-06 tokens_per_second_per_gpu:24645.069279156738
|
| 103 |
+
Step 103 | loss:0.005034927744418383 lr:9.064557778606631e-06 tokens_per_second_per_gpu:24814.393738297196
|
| 104 |
+
Step 104 | loss:0.010717733763158321 lr:8.28228051575864e-06 tokens_per_second_per_gpu:24736.406278872426
|
| 105 |
+
Step 105 | loss:0.012174836359918118 lr:7.532246252988617e-06 tokens_per_second_per_gpu:25019.475324133193
|
| 106 |
+
Step 106 | loss:0.01452668383717537 lr:6.815034680535915e-06 tokens_per_second_per_gpu:24471.552725735855
|
| 107 |
+
Step 107 | loss:0.00781740341335535 lr:6.1312001204745115e-06 tokens_per_second_per_gpu:25280.938263101023
|
| 108 |
+
Step 108 | loss:0.007270528003573418 lr:5.481271098285817e-06 tokens_per_second_per_gpu:24728.076756096845
|
| 109 |
+
Step 109 | loss:0.011511465534567833 lr:4.865749934369223e-06 tokens_per_second_per_gpu:24606.183508208676
|
| 110 |
+
Step 110 | loss:0.01905427686870098 lr:4.285112355806192e-06 tokens_per_second_per_gpu:24610.60107941008
|
| 111 |
+
Step 111 | loss:0.012217280454933643 lr:3.7398071286779857e-06 tokens_per_second_per_gpu:24619.868405483267
|
| 112 |
+
Step 112 | loss:0.013992072083055973 lr:3.230255711220992e-06 tokens_per_second_per_gpu:24515.808047137965
|
| 113 |
+
Step 113 | loss:0.013453691266477108 lr:2.7568519280880558e-06 tokens_per_second_per_gpu:24558.167525460245
|
| 114 |
+
Step 114 | loss:0.015376430004835129 lr:2.3199616659672354e-06 tokens_per_second_per_gpu:24501.681067427136
|
| 115 |
+
Step 115 | loss:0.01376098208129406 lr:1.919922590793549e-06 tokens_per_second_per_gpu:24711.17391301839
|
| 116 |
+
Step 116 | loss:0.013566456735134125 lr:1.5570438867719694e-06 tokens_per_second_per_gpu:25540.124469807546
|
| 117 |
+
Step 117 | loss:0.014363102614879608 lr:1.2316060174136002e-06 tokens_per_second_per_gpu:25402.452521743522
|
| 118 |
+
Step 118 | loss:0.007248923182487488 lr:9.43860508769645e-07 tokens_per_second_per_gpu:24716.135988606573
|
| 119 |
+
Step 119 | loss:0.023749740794301033 lr:6.940297550306896e-07 tokens_per_second_per_gpu:24647.660438849583
|
| 120 |
+
Step 120 | loss:0.011144503019750118 lr:4.823068466415615e-07 tokens_per_second_per_gpu:25448.159627312372
|
| 121 |
+
Step 121 | loss:0.013236049562692642 lr:3.088554210646133e-07 tokens_per_second_per_gpu:24401.10127579883
|
| 122 |
+
Step 122 | loss:0.015846682712435722 lr:1.7380953630678488e-07 tokens_per_second_per_gpu:25644.55822322529
|
| 123 |
+
Step 123 | loss:0.014041547663509846 lr:7.727356730820035e-08 tokens_per_second_per_gpu:25729.264034301028
|
| 124 |
+
Step 124 | loss:0.02395801991224289 lr:1.9322125272297488e-08 tokens_per_second_per_gpu:25669.79101546797
|
| 125 |
+
Step 125 | loss:0.009783685207366943 lr:0.0 tokens_per_second_per_gpu:25124.043528920585
|
ttt_models_no_demonstration_loss/03560426/td_False_ttd_False_ttdwa_False_ad_True_trd_False.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ttt_models_no_demonstration_loss/03560426/td_True_ttd_False_ttdwa_False_ad_True_trd_False.jsonl
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"input": [{"role": "system", "content": "Figure out the underlying transformation in the following examples and apply it to the test case. Here are some examples from this transformation, your answer must follow the format.\nThe input-output grids are provided as python arrays:\n[[0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 8 0 0 0 0]\n [0 0 0 0 0 8 0 0 0 0]\n [0 0 0 0 0 8 0 6 6 6]\n [1 1 1 1 0 8 0 6 6 6]\n [1 1 1 1 0 8 0 6 6 6]\n [1 1 1 1 0 8 0 6 6 6]\n [1 1 1 1 0 8 0 6 6 6]\n [0 0 0 0 0 8 0 6 6 6]\n [0 0 0 0 0 8 0 0 0 0]\n [0 0 0 0 0 8 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]] -> [[1 1 1 1 0 0 0 0 0 0]\n [1 1 1 8 0 0 0 0 0 0]\n [0 0 0 8 0 0 0 0 0 0]\n [0 0 0 8 0 0 0 0 0 0]\n [0 0 0 8 0 0 0 0 0 0]\n [0 0 0 6 6 6 0 0 0 0]\n [0 0 0 6 6 6 0 0 0 0]\n [0 0 0 6 6 6 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 6 6 6 0 0 0 0]\n [0 0 0 6 6 6 0 0 0 0]\n [0 0 0 6 6 6 0 0 0 0]\n [0 0 0 8 0 0 0 0 0 0]\n [0 0 0 8 0 0 0 0 0 0]\n [0 0 0 8 0 0 0 0 0 0]\n [1 1 1 8 0 0 0 0 0 0]\n [1 1 1 1 0 0 0 0 0 0]]#\n[[0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [2 2 2 0 0 0 0 0 0 0]\n [2 2 2 0 0 0 0 0 0 0]\n [2 2 2 0 7 7 0 8 8 8]\n [2 2 2 0 7 7 0 8 8 8]\n [2 2 2 0 7 7 0 8 8 8]\n [2 2 2 0 7 7 0 8 8 8]\n [2 2 2 0 0 0 0 0 0 0]\n [2 2 2 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]] -> [[2 2 2 0 0 0 0 0 0 0]\n [2 2 2 0 0 0 0 0 0 0]\n [2 2 2 0 0 0 0 0 0 0]\n [2 2 7 7 0 0 0 0 0 0]\n [0 0 7 8 8 8 0 0 0 0]\n [0 0 0 8 8 8 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 8 8 8 0 0 0 0]\n [0 0 7 8 8 8 0 0 0 0]\n [2 2 7 7 0 0 0 0 0 0]\n [2 2 2 0 0 0 0 0 0 0]\n [2 2 2 0 0 0 0 0 0 0]\n [2 2 2 0 0 0 0 0 0 0]]#\n"}, {"role": "user", "content": "[[0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 1]\n [4 4 0 0 0 0 0 0 0 1]\n [4 4 0 8 8 0 6 6 0 1]\n [4 4 0 8 8 0 6 6 0 1]\n [4 4 0 8 8 0 6 6 0 1]\n [4 4 0 8 8 0 6 6 0 1]\n [4 4 0 0 0 0 0 0 0 1]\n [0 0 0 0 0 0 0 0 0 1]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]] -> "}], "output": {"role": "assistant", "content": "[[4 4 0 0 0 0 0 0 0 0]\n [4 4 0 0 0 0 0 0 0 0]\n [4 8 8 0 0 0 0 0 0 0]\n [0 8 6 6 0 0 0 0 0 0]\n [0 0 6 1 0 0 0 0 0 0]\n [0 0 0 1 0 0 0 0 0 0]\n [0 0 0 1 0 0 0 0 0 0]\n [0 0 0 1 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 1 0 0 0 0 0 0]\n [0 0 0 1 0 0 0 0 0 0]\n [0 0 0 1 0 0 0 0 0 0]\n [0 0 6 1 0 0 0 0 0 0]\n [0 8 6 6 0 0 0 0 0 0]\n [4 8 8 0 0 0 0 0 0 0]\n [4 4 0 0 0 0 0 0 0 0]\n [4 4 0 0 0 0 0 0 0 0]]#"}, "total_tokens": 2585}
|
ttt_models_no_demonstration_loss/05a7bcf2/adapter_config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"base_model_name_or_path": "checkpoints/finetuned_1B_model", "bias": "none", "fan_in_fan_out": false, "inference_mode": true, "init_lora_weights": true, "lora_alpha": 16.0, "lora_dropout": 0.0, "modules_to_save": null, "peft_type": "LORA", "r": 128, "target_modules": ["gate_proj", "down_proj", "up_proj", "lm_head", "q_proj", "v_proj"], "task_type": "CAUSAL_LM"}
|
ttt_models_no_demonstration_loss/05a7bcf2/adapter_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:af462263beb16ca3b3ae3f063ab5325e9c3e8392cfac700bbee597e3e9792eb7
|
| 3 |
+
size 153153674
|
ttt_models_no_demonstration_loss/05a7bcf2/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"_name_or_path": "unsloth/Llama-3.2-1B-Instruct", "architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128009, "head_dim": 64, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "max_position_embeddings": 131072, "mlp_bias": false, "model_type": "llama", "num_attention_heads": 32, "num_hidden_layers": 16, "num_key_value_heads": 8, "pad_token_id": 128004, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": {"factor": 32.0, "high_freq_factor": 4.0, "low_freq_factor": 1.0, "original_max_position_embeddings": 8192, "rope_type": "llama3"}, "rope_theta": 500000.0, "tie_word_embeddings": true, "torch_dtype": "bfloat16", "transformers_version": "4.49.0", "unsloth_fixed": true, "use_cache": true, "vocab_size": 128256}
|
ttt_models_no_demonstration_loss/05a7bcf2/log_1740726329.txt
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Step 1 | loss:0.039678905159235 lr:2.5e-05 tokens_per_second_per_gpu:22681.006280606685
|
| 2 |
+
Step 2 | loss:0.03408001735806465 lr:5e-05 tokens_per_second_per_gpu:22669.98244832154
|
| 3 |
+
Step 3 | loss:0.041948869824409485 lr:7.500000000000001e-05 tokens_per_second_per_gpu:22642.907432672237
|
| 4 |
+
Step 4 | loss:0.055083926767110825 lr:0.0001 tokens_per_second_per_gpu:22593.21284847233
|
| 5 |
+
Step 5 | loss:0.05746036022901535 lr:9.987260573051269e-05 tokens_per_second_per_gpu:22602.94908726301
|
| 6 |
+
Step 6 | loss:0.04072895273566246 lr:9.949107209404665e-05 tokens_per_second_per_gpu:22607.773272067498
|
| 7 |
+
Step 7 | loss:0.037188395857810974 lr:9.885734329855798e-05 tokens_per_second_per_gpu:22527.163045473946
|
| 8 |
+
Step 8 | loss:0.03904883563518524 lr:9.797464868072488e-05 tokens_per_second_per_gpu:22602.153333307426
|
| 9 |
+
Step 9 | loss:0.03491893410682678 lr:9.68474862499881e-05 tokens_per_second_per_gpu:22602.25605382197
|
| 10 |
+
Step 10 | loss:0.025022652000188828 lr:9.548159976772592e-05 tokens_per_second_per_gpu:22525.012546176862
|
| 11 |
+
Step 11 | loss:0.039401911199092865 lr:9.388394947836279e-05 tokens_per_second_per_gpu:22616.856143533507
|
| 12 |
+
Step 12 | loss:0.030871747061610222 lr:9.206267664155907e-05 tokens_per_second_per_gpu:22579.506429251363
|
| 13 |
+
Step 13 | loss:0.02883242815732956 lr:9.002706204621803e-05 tokens_per_second_per_gpu:22523.941930242425
|
| 14 |
+
Step 14 | loss:0.024800028651952744 lr:8.778747871771292e-05 tokens_per_second_per_gpu:22623.957215577673
|
| 15 |
+
Step 15 | loss:0.021133283153176308 lr:8.535533905932738e-05 tokens_per_second_per_gpu:22596.691061998896
|
| 16 |
+
Step 16 | loss:0.022129565477371216 lr:8.274303669726426e-05 tokens_per_second_per_gpu:22593.4123648654
|
| 17 |
+
Step 17 | loss:0.05008659139275551 lr:7.996388332556735e-05 tokens_per_second_per_gpu:22533.729226539173
|
| 18 |
+
Step 18 | loss:0.045507658272981644 lr:7.703204087277988e-05 tokens_per_second_per_gpu:22576.28585580194
|
| 19 |
+
Step 19 | loss:0.023913830518722534 lr:7.396244933600285e-05 tokens_per_second_per_gpu:22512.557338650186
|
| 20 |
+
Step 20 | loss:0.020740512758493423 lr:7.077075065009433e-05 tokens_per_second_per_gpu:22502.664661612645
|
| 21 |
+
Step 21 | loss:0.04162505269050598 lr:6.747320897995493e-05 tokens_per_second_per_gpu:22534.95861362608
|
| 22 |
+
Step 22 | loss:0.07619903236627579 lr:6.408662784207149e-05 tokens_per_second_per_gpu:22566.411523033807
|
| 23 |
+
Step 23 | loss:0.0150748947635293 lr:6.062826447764883e-05 tokens_per_second_per_gpu:22556.475545145553
|
| 24 |
+
Step 24 | loss:0.04363136366009712 lr:5.7115741913664264e-05 tokens_per_second_per_gpu:22526.117316544394
|
| 25 |
+
Step 25 | loss:0.015474126674234867 lr:5.3566959159961615e-05 tokens_per_second_per_gpu:22626.17805466545
|
| 26 |
+
Step 26 | loss:0.01401583757251501 lr:5e-05 tokens_per_second_per_gpu:22543.922892318013
|
| 27 |
+
Step 27 | loss:0.018474383279681206 lr:4.643304084003839e-05 tokens_per_second_per_gpu:22599.81102411733
|
| 28 |
+
Step 28 | loss:0.01915852539241314 lr:4.288425808633575e-05 tokens_per_second_per_gpu:22533.618865303404
|
| 29 |
+
Step 29 | loss:0.015565263107419014 lr:3.937173552235117e-05 tokens_per_second_per_gpu:22566.992635290993
|
| 30 |
+
Step 30 | loss:0.02235231176018715 lr:3.591337215792852e-05 tokens_per_second_per_gpu:22594.521762171935
|
| 31 |
+
Step 31 | loss:0.013161136768758297 lr:3.2526791020045086e-05 tokens_per_second_per_gpu:22575.52168000625
|
| 32 |
+
Step 32 | loss:0.016177590936422348 lr:2.9229249349905684e-05 tokens_per_second_per_gpu:22528.853770725815
|
| 33 |
+
Step 33 | loss:0.01170271821320057 lr:2.603755066399718e-05 tokens_per_second_per_gpu:22597.990716057455
|
| 34 |
+
Step 34 | loss:0.014407655224204063 lr:2.296795912722014e-05 tokens_per_second_per_gpu:22579.39597288011
|
| 35 |
+
Step 35 | loss:0.040084414184093475 lr:2.0036116674432654e-05 tokens_per_second_per_gpu:22552.373489989033
|
| 36 |
+
Step 36 | loss:0.014151111245155334 lr:1.725696330273575e-05 tokens_per_second_per_gpu:22592.03137282949
|
| 37 |
+
Step 37 | loss:0.011484266258776188 lr:1.4644660940672627e-05 tokens_per_second_per_gpu:22539.979915306623
|
| 38 |
+
Step 38 | loss:0.014496506191790104 lr:1.2212521282287092e-05 tokens_per_second_per_gpu:22611.106088385688
|
| 39 |
+
Step 39 | loss:0.04313400015234947 lr:9.972937953781986e-06 tokens_per_second_per_gpu:22555.621771198515
|
| 40 |
+
Step 40 | loss:0.014026709832251072 lr:7.937323358440935e-06 tokens_per_second_per_gpu:22577.851690544736
|
| 41 |
+
Step 41 | loss:0.0685325637459755 lr:6.116050521637218e-06 tokens_per_second_per_gpu:22543.36671913842
|
| 42 |
+
Step 42 | loss:0.0095054404810071 lr:4.5184002322740785e-06 tokens_per_second_per_gpu:22498.51389506046
|
| 43 |
+
Step 43 | loss:0.012865112163126469 lr:3.1525137500119207e-06 tokens_per_second_per_gpu:22488.31534375868
|
| 44 |
+
Step 44 | loss:0.014397703111171722 lr:2.0253513192751373e-06 tokens_per_second_per_gpu:22604.95328473472
|
| 45 |
+
Step 45 | loss:0.013751344755291939 lr:1.1426567014420297e-06 tokens_per_second_per_gpu:22586.627952209405
|
| 46 |
+
Step 46 | loss:0.01800311729311943 lr:5.089279059533658e-07 tokens_per_second_per_gpu:22543.67238500819
|
| 47 |
+
Step 47 | loss:0.007201721891760826 lr:1.2739426948732424e-07 tokens_per_second_per_gpu:22610.833900022742
|
| 48 |
+
Step 48 | loss:0.049706362187862396 lr:0.0 tokens_per_second_per_gpu:22553.470326752995
|
ttt_models_no_demonstration_loss/05a7bcf2/td_False_ttd_False_ttdwa_False_ad_True_trd_False.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ttt_models_no_demonstration_loss/05a7bcf2/td_True_ttd_False_ttdwa_False_ad_True_trd_False.jsonl
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"input": [{"role": "system", "content": "Figure out the underlying transformation in the following examples and apply it to the test case. Here are some examples from this transformation, your answer must follow the format.\nThe input-output grids are provided as python arrays:\n[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6]\n [0 0 6 0 0 6 6 6 0 0 6 0 6 0 0 0 6 6 0 0 6 0 6 0 0 6 6 0 0 6]\n [0 0 0 0 0 0 6 6 0 0 0 0 0 0 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 5 5 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 5 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 5 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 5 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 5 5 5 0 0 0 0 0 0 5 5 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 5 5 5 0 0 0 0 0 0 5 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]] -> [[0 0 0 0 6 6 6 0 0 0 6 6 6 0 0 0 0 0 0 6 6 0 0 0 0 6 6 0 0 0]\n [0 0 0 0 7 6 6 0 0 0 6 7 6 0 0 0 0 0 0 7 6 0 0 0 0 6 6 0 0 0]\n [0 0 0 0 7 7 6 0 0 0 7 7 7 0 0 0 0 0 0 7 7 0 0 0 0 7 7 0 0 0]\n [0 0 0 0 7 7 7 0 0 0 7 7 7 0 0 0 0 0 0 7 7 0 0 0 0 7 7 0 0 0]\n [0 0 0 0 7 7 7 0 0 0 7 7 7 0 0 0 0 0 0 7 7 0 0 0 0 7 7 0 0 0]\n [0 0 0 0 7 7 7 0 0 0 7 7 7 0 0 0 0 0 0 7 7 0 0 0 0 7 7 0 0 0]\n [0 0 0 0 7 7 7 0 0 0 7 7 7 0 0 0 0 0 0 7 7 0 0 0 0 7 7 0 0 0]\n [0 0 0 0 7 7 7 0 0 0 7 7 7 0 0 0 0 0 0 7 7 0 0 0 0 7 7 0 0 0]\n [6 6 6 6 7 7 7 6 6 6 7 7 7 6 6 6 6 6 6 7 7 6 6 6 6 7 7 6 6 6]\n [0 0 6 0 7 7 7 6 0 0 7 7 7 0 0 0 6 6 0 7 7 0 6 0 0 7 7 0 0 6]\n [0 0 0 0 7 7 7 6 0 0 7 7 7 0 0 0 6 0 0 7 7 0 0 0 0 7 7 0 0 0]\n [0 0 0 0 7 7 7 0 0 0 7 7 7 0 0 0 0 0 0 7 7 0 0 0 0 7 7 0 0 0]\n [0 0 0 0 7 7 7 0 0 0 7 7 7 0 0 0 0 0 0 7 7 0 0 0 0 7 7 0 0 0]\n [0 0 0 0 7 7 7 0 0 0 7 7 7 0 0 0 0 0 0 7 7 0 0 0 0 7 7 0 0 0]\n [0 0 0 0 7 7 7 0 0 0 7 7 7 0 0 0 0 0 0 7 7 0 0 0 0 7 7 0 0 0]\n [0 0 0 0 7 7 7 0 0 0 7 7 7 0 0 0 0 0 0 7 7 0 0 0 0 7 7 0 0 0]\n [0 0 0 0 7 7 7 0 0 0 7 7 7 0 0 0 0 0 0 7 7 0 0 0 0 7 7 0 0 0]\n [0 0 0 0 7 7 7 0 0 0 7 7 7 0 0 0 0 0 0 7 7 0 0 0 0 7 7 0 0 0]\n [0 0 0 0 7 7 7 0 0 0 7 7 7 0 0 0 0 0 0 7 7 0 0 0 0 7 7 0 0 0]\n [7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7]\n [0 0 0 0 5 5 5 0 0 0 5 5 5 0 0 0 0 0 0 5 5 0 0 0 0 5 5 0 0 0]\n [0 0 0 0 5 5 5 0 0 0 5 5 5 0 0 0 0 0 0 5 5 0 0 0 0 5 5 0 0 0]\n [0 0 0 0 1 1 1 0 0 0 5 5 5 0 0 0 0 0 0 5 5 0 0 0 0 5 5 0 0 0]\n [0 0 0 0 1 1 0 0 0 0 5 5 5 0 0 0 0 0 0 5 5 0 0 0 0 5 5 0 0 0]\n [0 0 0 0 1 1 0 0 0 0 5 5 5 0 0 0 0 0 0 5 5 0 0 0 0 1 1 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]#\n"}, {"role": "user", "content": "[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6]\n [0 0 0 6 6 0 6 0 6 6 0 6 0 6 6 6 0 6 0 6 6 6 6 6 6 0 6 6 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 0 0 0 0 0 6 6 6 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 0 0 0 0 6 6 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 5 5 5 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 5 5 0 0 0 0 0 5 5 5 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 5 5 5 5 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]] -> "}], "output": {"role": "assistant", "content": "[[0 0 0 6 6 0 0 0 0 0 6 6 6 6 0 0 0 0 0 6 6 6 6 6 0 0 0 0 0 0]\n [0 0 0 6 6 0 0 0 0 0 7 6 7 6 0 0 0 0 0 6 6 6 6 6 0 0 0 0 0 0]\n [0 0 0 7 7 0 0 0 0 0 7 7 7 6 0 0 0 0 0 7 6 6 6 7 0 0 0 0 0 0]\n [0 0 0 7 7 0 0 0 0 0 7 7 7 7 0 0 0 0 0 7 6 6 7 7 0 0 0 0 0 0]\n [0 0 0 7 7 0 0 0 0 0 7 7 7 7 0 0 0 0 0 7 7 7 7 7 0 0 0 0 0 0]\n [0 0 0 7 7 0 0 0 0 0 7 7 7 7 0 0 0 0 0 7 7 7 7 7 0 0 0 0 0 0]\n [0 0 0 7 7 0 0 0 0 0 7 7 7 7 0 0 0 0 0 7 7 7 7 7 0 0 0 0 0 0]\n [6 6 6 7 7 6 6 6 6 6 7 7 7 7 6 6 6 6 6 7 7 7 7 7 6 6 6 6 6 6]\n [0 0 0 7 7 0 6 0 6 6 7 7 7 7 6 6 0 6 0 7 7 7 7 7 6 0 6 6 0 0]\n [0 0 0 7 7 0 0 0 0 0 7 7 7 7 6 0 0 0 0 7 7 7 7 7 0 0 0 0 0 0]\n [0 0 0 7 7 0 0 0 0 0 7 7 7 7 6 0 0 0 0 7 7 7 7 7 0 0 0 0 0 0]\n [0 0 0 7 7 0 0 0 0 0 7 7 7 7 0 0 0 0 0 7 7 7 7 7 0 0 0 0 0 0]\n [0 0 0 7 7 0 0 0 0 0 7 7 7 7 0 0 0 0 0 7 7 7 7 7 0 0 0 0 0 0]\n [0 0 0 7 7 0 0 0 0 0 7 7 7 7 0 0 0 0 0 7 7 7 7 7 0 0 0 0 0 0]\n [0 0 0 7 7 0 0 0 0 0 7 7 7 7 0 0 0 0 0 7 7 7 7 7 0 0 0 0 0 0]\n [0 0 0 7 7 0 0 0 0 0 7 7 7 7 0 0 0 0 0 7 7 7 7 7 0 0 0 0 0 0]\n [0 0 0 7 7 0 0 0 0 0 7 7 7 7 0 0 0 0 0 7 7 7 7 7 0 0 0 0 0 0]\n [0 0 0 7 7 0 0 0 0 0 7 7 7 7 0 0 0 0 0 7 7 7 7 7 0 0 0 0 0 0]\n [0 0 0 7 7 0 0 0 0 0 7 7 7 7 0 0 0 0 0 7 7 7 7 7 0 0 0 0 0 0]\n [0 0 0 7 7 0 0 0 0 0 7 7 7 7 0 0 0 0 0 7 7 7 7 7 0 0 0 0 0 0]\n [7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7]\n [0 0 0 5 5 0 0 0 0 0 5 5 5 5 0 0 0 0 0 5 5 5 5 5 0 0 0 0 0 0]\n [0 0 0 5 5 0 0 0 0 0 5 5 5 5 0 0 0 0 0 5 5 5 5 5 0 0 0 0 0 0]\n [0 0 0 5 5 0 0 0 0 0 5 5 5 5 0 0 0 0 0 5 5 5 5 5 0 0 0 0 0 0]\n [0 0 0 5 5 0 0 0 0 0 1 1 1 1 0 0 0 0 0 5 5 5 5 5 0 0 0 0 0 0]\n [0 0 0 1 1 0 0 0 0 0 1 1 1 1 0 0 0 0 0 5 5 5 5 5 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 5 5 5 5 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]#"}, "total_tokens": 7383}
|
ttt_models_no_demonstration_loss/0607ce86/adapter_config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"base_model_name_or_path": "checkpoints/finetuned_1B_model", "bias": "none", "fan_in_fan_out": false, "inference_mode": true, "init_lora_weights": true, "lora_alpha": 16.0, "lora_dropout": 0.0, "modules_to_save": null, "peft_type": "LORA", "r": 128, "target_modules": ["gate_proj", "down_proj", "up_proj", "lm_head", "q_proj", "v_proj"], "task_type": "CAUSAL_LM"}
|
ttt_models_no_demonstration_loss/0607ce86/adapter_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca834120c09270f48b5f81d8c6e8ab036ec61aa9fb36fe7d1e040ba31eabc1bf
|
| 3 |
+
size 153153674
|
ttt_models_no_demonstration_loss/0607ce86/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"_name_or_path": "unsloth/Llama-3.2-1B-Instruct", "architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128009, "head_dim": 64, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "max_position_embeddings": 131072, "mlp_bias": false, "model_type": "llama", "num_attention_heads": 32, "num_hidden_layers": 16, "num_key_value_heads": 8, "pad_token_id": 128004, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": {"factor": 32.0, "high_freq_factor": 4.0, "low_freq_factor": 1.0, "original_max_position_embeddings": 8192, "rope_type": "llama3"}, "rope_theta": 500000.0, "tie_word_embeddings": true, "torch_dtype": "bfloat16", "transformers_version": "4.49.0", "unsloth_fixed": true, "use_cache": true, "vocab_size": 128256}
|
ttt_models_no_demonstration_loss/0607ce86/log_1740726360.txt
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Step 1 | loss:0.011402091011404991 lr:1.4285714285714285e-05 tokens_per_second_per_gpu:23836.31589556024
|
| 2 |
+
Step 2 | loss:0.014917513355612755 lr:2.857142857142857e-05 tokens_per_second_per_gpu:23681.314071150206
|
| 3 |
+
Step 3 | loss:0.007054246962070465 lr:4.2857142857142856e-05 tokens_per_second_per_gpu:23684.428520767422
|
| 4 |
+
Step 4 | loss:0.00941946916282177 lr:5.714285714285714e-05 tokens_per_second_per_gpu:24240.202603436785
|
| 5 |
+
Step 5 | loss:0.011502179317176342 lr:7.142857142857143e-05 tokens_per_second_per_gpu:24265.963136243037
|
| 6 |
+
Step 6 | loss:0.016797248274087906 lr:8.571428571428571e-05 tokens_per_second_per_gpu:24315.884903548274
|
| 7 |
+
Step 7 | loss:0.020493533462285995 lr:0.0001 tokens_per_second_per_gpu:23772.406557666825
|
| 8 |
+
Step 8 | loss:0.006338887847959995 lr:9.994161134161634e-05 tokens_per_second_per_gpu:24289.289216630845
|
| 9 |
+
Step 9 | loss:0.008962158113718033 lr:9.976658173588244e-05 tokens_per_second_per_gpu:23837.841230264876
|
| 10 |
+
Step 10 | loss:0.00411253422498703 lr:9.947531997255256e-05 tokens_per_second_per_gpu:24194.303235911786
|
| 11 |
+
Step 11 | loss:0.0039035645313560963 lr:9.906850630697068e-05 tokens_per_second_per_gpu:24177.247330449543
|
| 12 |
+
Step 12 | loss:0.004067366011440754 lr:9.85470908713026e-05 tokens_per_second_per_gpu:24246.286805943884
|
| 13 |
+
Step 13 | loss:0.0033290127757936716 lr:9.791229145545831e-05 tokens_per_second_per_gpu:24263.911120410914
|
| 14 |
+
Step 14 | loss:0.002931546187028289 lr:9.716559066288715e-05 tokens_per_second_per_gpu:23536.43310458837
|
| 15 |
+
Step 15 | loss:0.004406672902405262 lr:9.630873244788883e-05 tokens_per_second_per_gpu:24253.432172753593
|
| 16 |
+
Step 16 | loss:0.005373126361519098 lr:9.534371804252728e-05 tokens_per_second_per_gpu:24185.503756718426
|
| 17 |
+
Step 17 | loss:0.0007308337953872979 lr:9.42728012826605e-05 tokens_per_second_per_gpu:24201.363570929592
|
| 18 |
+
Step 18 | loss:0.008504933677613735 lr:9.309848334400246e-05 tokens_per_second_per_gpu:23789.40827962156
|
| 19 |
+
Step 19 | loss:0.001460474100895226 lr:9.182350690051133e-05 tokens_per_second_per_gpu:23739.841384153347
|
| 20 |
+
Step 20 | loss:0.00033094437094405293 lr:9.045084971874738e-05 tokens_per_second_per_gpu:24261.05693148491
|
| 21 |
+
Step 21 | loss:0.005942088086158037 lr:8.898371770316111e-05 tokens_per_second_per_gpu:24279.639643586906
|
| 22 |
+
Step 22 | loss:0.001386251300573349 lr:8.742553740855506e-05 tokens_per_second_per_gpu:24166.21180044905
|
| 23 |
+
Step 23 | loss:0.002373190363869071 lr:8.577994803720606e-05 tokens_per_second_per_gpu:23469.087974777245
|
| 24 |
+
Step 24 | loss:0.002791325096040964 lr:8.405079293933986e-05 tokens_per_second_per_gpu:24352.78279218008
|
| 25 |
+
Step 25 | loss:0.003450458636507392 lr:8.224211063680853e-05 tokens_per_second_per_gpu:23675.835729365488
|
| 26 |
+
Step 26 | loss:0.0003748237213585526 lr:8.035812539093557e-05 tokens_per_second_per_gpu:23791.67980807843
|
| 27 |
+
Step 27 | loss:0.0008314086007885635 lr:7.840323733655778e-05 tokens_per_second_per_gpu:23524.11210354127
|
| 28 |
+
Step 28 | loss:0.00020886929996777326 lr:7.638201220530665e-05 tokens_per_second_per_gpu:24102.350628036587
|
| 29 |
+
Step 29 | loss:0.0010381933534517884 lr:7.42991706621303e-05 tokens_per_second_per_gpu:24156.545709099642
|
| 30 |
+
Step 30 | loss:0.00023220972798299044 lr:7.215957727996207e-05 tokens_per_second_per_gpu:23981.281352044436
|
| 31 |
+
Step 31 | loss:0.0030444117728620768 lr:6.996822917828477e-05 tokens_per_second_per_gpu:24121.91272201948
|
| 32 |
+
Step 32 | loss:0.0007726883632130921 lr:6.773024435212678e-05 tokens_per_second_per_gpu:24116.300967213254
|
| 33 |
+
Step 33 | loss:0.0001977419451577589 lr:6.545084971874738e-05 tokens_per_second_per_gpu:23765.74739502864
|
| 34 |
+
Step 34 | loss:0.00023761519696563482 lr:6.313536890992935e-05 tokens_per_second_per_gpu:24305.33143240327
|
| 35 |
+
Step 35 | loss:0.002131509594619274 lr:6.078920983839031e-05 tokens_per_second_per_gpu:24182.642387379612
|
| 36 |
+
Step 36 | loss:0.00031443036277778447 lr:5.841785206735192e-05 tokens_per_second_per_gpu:23650.868096400533
|
| 37 |
+
Step 37 | loss:0.0004642090352717787 lr:5.602683401276615e-05 tokens_per_second_per_gpu:23586.196989456355
|
| 38 |
+
Step 38 | loss:0.0024236671160906553 lr:5.3621740008088126e-05 tokens_per_second_per_gpu:24279.498398810912
|
| 39 |
+
Step 39 | loss:0.0008715963340364397 lr:5.1208187261806615e-05 tokens_per_second_per_gpu:24323.658627503635
|
| 40 |
+
Step 40 | loss:0.0013416893780231476 lr:4.87918127381934e-05 tokens_per_second_per_gpu:23412.12018296655
|
| 41 |
+
Step 41 | loss:0.0001469458220526576 lr:4.6378259991911886e-05 tokens_per_second_per_gpu:24159.714284991816
|
| 42 |
+
Step 42 | loss:9.051704546436667e-05 lr:4.397316598723385e-05 tokens_per_second_per_gpu:23430.73935681995
|
| 43 |
+
Step 43 | loss:0.00011708393139997497 lr:4.1582147932648074e-05 tokens_per_second_per_gpu:23649.08065577669
|
| 44 |
+
Step 44 | loss:0.0006106573855504394 lr:3.92107901616097e-05 tokens_per_second_per_gpu:23468.397342632674
|
| 45 |
+
Step 45 | loss:0.00017310594557784498 lr:3.6864631090070655e-05 tokens_per_second_per_gpu:23481.705804195615
|
| 46 |
+
Step 46 | loss:0.001516865217126906 lr:3.4549150281252636e-05 tokens_per_second_per_gpu:24368.04570280773
|
| 47 |
+
Step 47 | loss:0.00015387988241855055 lr:3.226975564787322e-05 tokens_per_second_per_gpu:24206.595537056422
|
| 48 |
+
Step 48 | loss:0.00021737773204222322 lr:3.003177082171523e-05 tokens_per_second_per_gpu:24294.640374297996
|
| 49 |
+
Step 49 | loss:0.0001265592873096466 lr:2.784042272003794e-05 tokens_per_second_per_gpu:24346.41908775448
|
| 50 |
+
Step 50 | loss:4.384720523376018e-05 lr:2.57008293378697e-05 tokens_per_second_per_gpu:23675.988302361337
|
| 51 |
+
Step 51 | loss:7.80585287429858e-06 lr:2.361798779469336e-05 tokens_per_second_per_gpu:24262.941372079036
|
| 52 |
+
Step 52 | loss:7.052448199829087e-05 lr:2.1596762663442218e-05 tokens_per_second_per_gpu:23465.85977875894
|
| 53 |
+
Step 53 | loss:2.023470551648643e-05 lr:1.9641874609064443e-05 tokens_per_second_per_gpu:24186.6408255026
|
| 54 |
+
Step 54 | loss:0.00039702377398498356 lr:1.7757889363191483e-05 tokens_per_second_per_gpu:23781.66117860348
|
| 55 |
+
Step 55 | loss:5.7591008953750134e-05 lr:1.5949207060660138e-05 tokens_per_second_per_gpu:23425.172537897048
|
| 56 |
+
Step 56 | loss:6.809048500144854e-05 lr:1.422005196279395e-05 tokens_per_second_per_gpu:23850.296589748115
|
| 57 |
+
Step 57 | loss:5.8563218772178516e-05 lr:1.257446259144494e-05 tokens_per_second_per_gpu:24303.759587346685
|
| 58 |
+
Step 58 | loss:0.0016791581874713302 lr:1.1016282296838887e-05 tokens_per_second_per_gpu:23813.043724349573
|
| 59 |
+
Step 59 | loss:0.00013597102952189744 lr:9.549150281252633e-06 tokens_per_second_per_gpu:23748.630106208093
|
| 60 |
+
Step 60 | loss:0.00017259041487704962 lr:8.176493099488663e-06 tokens_per_second_per_gpu:24220.84153405734
|
| 61 |
+
Step 61 | loss:0.0007555377669632435 lr:6.901516655997536e-06 tokens_per_second_per_gpu:23674.198062511197
|
| 62 |
+
Step 62 | loss:3.203596133971587e-05 lr:5.727198717339511e-06 tokens_per_second_per_gpu:23693.248778087476
|
| 63 |
+
Step 63 | loss:1.720308318908792e-05 lr:4.65628195747273e-06 tokens_per_second_per_gpu:24254.382626926657
|
| 64 |
+
Step 64 | loss:0.001815351308323443 lr:3.691267552111183e-06 tokens_per_second_per_gpu:23520.32572972853
|
| 65 |
+
Step 65 | loss:5.410272933659144e-05 lr:2.8344093371128424e-06 tokens_per_second_per_gpu:24368.732015141315
|
| 66 |
+
Step 66 | loss:3.675279367598705e-05 lr:2.087708544541689e-06 tokens_per_second_per_gpu:23701.773319089425
|
| 67 |
+
Step 67 | loss:0.0019950042478740215 lr:1.4529091286973995e-06 tokens_per_second_per_gpu:24274.975235337475
|
| 68 |
+
Step 68 | loss:5.307142055244185e-05 lr:9.314936930293283e-07 tokens_per_second_per_gpu:23832.153039997007
|
| 69 |
+
Step 69 | loss:0.00036216157604940236 lr:5.246800274474439e-07 tokens_per_second_per_gpu:24462.014334763506
|
| 70 |
+
Step 70 | loss:0.0015142940683290362 lr:2.334182641175686e-07 tokens_per_second_per_gpu:23743.153933291513
|
| 71 |
+
Step 71 | loss:0.00411961367353797 lr:5.838865838366792e-08 tokens_per_second_per_gpu:23475.600323424762
|
| 72 |
+
Step 72 | loss:6.077798389014788e-05 lr:0.0 tokens_per_second_per_gpu:23732.780269195377
|
ttt_models_no_demonstration_loss/0607ce86/td_False_ttd_False_ttdwa_False_ad_True_trd_False.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ttt_models_no_demonstration_loss/0607ce86/td_True_ttd_False_ttdwa_False_ad_True_trd_False.jsonl
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"input": [{"role": "system", "content": "Figure out the underlying transformation in the following examples and apply it to the test case. Here are some examples from this transformation, your answer must follow the format.\nThe input-output grids are provided as python arrays:\n[[0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 3 0 3 3 0 0 0 0]\n [0 0 0 8 8 8 8 6 0 0 8 8 8 8 6 0 0 8 8 8 8 6 0]\n [0 0 0 8 8 3 8 6 0 0 8 8 8 8 6 0 0 8 8 8 8 6 0]\n [0 0 0 3 3 3 3 3 0 0 3 3 3 3 6 0 0 3 3 3 3 6 0]\n [0 0 0 3 3 3 3 6 0 0 3 3 3 3 6 3 0 3 3 3 3 6 0]\n [0 0 3 8 8 8 8 6 0 0 8 8 3 8 6 0 0 8 8 8 8 6 0]\n [3 0 3 8 8 8 8 6 0 0 8 8 8 8 6 0 0 8 8 8 8 6 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 3 0 0 0 0 0 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 8 8 8 8 6 0 0 8 8 3 3 6 0 3 8 8 8 8 6 3]\n [0 0 0 8 8 8 8 6 0 0 8 3 8 8 6 0 0 8 8 8 3 6 0]\n [0 3 0 3 3 3 3 6 3 0 3 3 3 3 3 0 0 3 3 3 3 6 0]\n [0 0 0 3 3 3 3 3 0 3 3 3 3 3 6 0 0 3 3 3 3 6 0]\n [0 0 0 8 8 8 8 6 0 0 8 3 8 8 6 0 0 8 8 8 8 3 0]\n [0 0 0 8 8 8 8 6 0 0 8 8 8 3 6 0 0 8 3 8 8 6 3]\n [0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 3 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0]\n [0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0]] -> [[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 8 8 8 8 6 0 0 8 8 8 8 6 0 0 8 8 8 8 6 0]\n [0 0 0 8 8 8 8 6 0 0 8 8 8 8 6 0 0 8 8 8 8 6 0]\n [0 0 0 3 3 3 3 6 0 0 3 3 3 3 6 0 0 3 3 3 3 6 0]\n [0 0 0 3 3 3 3 6 0 0 3 3 3 3 6 0 0 3 3 3 3 6 0]\n [0 0 0 8 8 8 8 6 0 0 8 8 8 8 6 0 0 8 8 8 8 6 0]\n [0 0 0 8 8 8 8 6 0 0 8 8 8 8 6 0 0 8 8 8 8 6 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 8 8 8 8 6 0 0 8 8 8 8 6 0 0 8 8 8 8 6 0]\n [0 0 0 8 8 8 8 6 0 0 8 8 8 8 6 0 0 8 8 8 8 6 0]\n [0 0 0 3 3 3 3 6 0 0 3 3 3 3 6 0 0 3 3 3 3 6 0]\n [0 0 0 3 3 3 3 6 0 0 3 3 3 3 6 0 0 3 3 3 3 6 0]\n [0 0 0 8 8 8 8 6 0 0 8 8 8 8 6 0 0 8 8 8 8 6 0]\n [0 0 0 8 8 8 8 6 0 0 8 8 8 8 6 0 0 8 8 8 8 6 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]#\n[[0 0 0 3 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]\n [0 3 1 8 1 1 1 1 0 8 1 1 1 3 0 8 1 1 1 1 3]\n [3 3 0 8 1 1 1 1 0 1 1 1 1 1 0 8 3 1 1 1 0]\n [0 0 0 8 2 2 3 2 1 8 2 2 2 3 0 8 3 2 3 2 0]\n [0 3 0 3 3 3 3 3 0 8 3 3 3 3 0 8 3 3 3 3 0]\n [1 0 0 3 3 3 3 3 0 3 3 3 3 3 0 8 1 3 3 3 3]\n [0 3 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3]\n [0 0 0 8 1 1 1 1 0 8 1 1 1 3 0 8 1 1 1 1 0]\n [0 1 0 8 1 1 1 1 0 8 1 3 1 1 0 8 1 1 1 1 0]\n [0 1 0 8 2 1 2 2 0 8 2 2 2 2 0 8 2 2 2 2 0]\n [0 0 0 8 3 3 3 3 0 8 3 1 3 3 0 8 3 3 3 3 0]\n [3 3 0 8 1 3 3 3 0 8 3 3 3 3 0 8 3 3 3 3 3]\n [0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 1 0]\n [0 0 0 8 1 1 1 1 0 1 1 1 1 1 0 8 1 1 1 1 0]\n [0 0 0 8 1 1 1 1 0 8 3 1 1 1 0 8 1 1 1 1 0]\n [0 0 0 8 2 2 2 2 0 8 2 2 2 2 0 8 2 1 2 2 0]\n [0 0 0 8 3 3 3 3 0 8 3 3 3 3 0 8 3 3 3 3 0]\n [0 0 0 8 3 1 3 3 0 8 3 3 3 3 0 8 3 3 3 3 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0]\n [0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 3 0]\n [0 0 0 1 0 0 0 3 3 0 0 0 0 3 0 0 0 0 3 0 0]\n [0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0]] -> [[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 8 1 1 1 1 0 8 1 1 1 1 0 8 1 1 1 1 0]\n [0 0 0 8 1 1 1 1 0 8 1 1 1 1 0 8 1 1 1 1 0]\n [0 0 0 8 2 2 2 2 0 8 2 2 2 2 0 8 2 2 2 2 0]\n [0 0 0 8 3 3 3 3 0 8 3 3 3 3 0 8 3 3 3 3 0]\n [0 0 0 8 3 3 3 3 0 8 3 3 3 3 0 8 3 3 3 3 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 8 1 1 1 1 0 8 1 1 1 1 0 8 1 1 1 1 0]\n [0 0 0 8 1 1 1 1 0 8 1 1 1 1 0 8 1 1 1 1 0]\n [0 0 0 8 2 2 2 2 0 8 2 2 2 2 0 8 2 2 2 2 0]\n [0 0 0 8 3 3 3 3 0 8 3 3 3 3 0 8 3 3 3 3 0]\n [0 0 0 8 3 3 3 3 0 8 3 3 3 3 0 8 3 3 3 3 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 8 1 1 1 1 0 8 1 1 1 1 0 8 1 1 1 1 0]\n [0 0 0 8 1 1 1 1 0 8 1 1 1 1 0 8 1 1 1 1 0]\n [0 0 0 8 2 2 2 2 0 8 2 2 2 2 0 8 2 2 2 2 0]\n [0 0 0 8 3 3 3 3 0 8 3 3 3 3 0 8 3 3 3 3 0]\n [0 0 0 8 3 3 3 3 0 8 3 3 3 3 0 8 3 3 3 3 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]#\n"}, {"role": "user", "content": "[[0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 2 0 2 3 3 3 2 0 0 3 3 3 2 0 0 2 2 3 2 0]\n [0 0 0 0 0 1 3 2 2 0 0 3 3 2 2 0 0 3 3 2 2 0]\n [3 0 0 0 0 1 3 2 2 0 0 1 3 2 2 2 0 1 3 2 2 0]\n [0 0 0 0 3 3 3 3 2 0 0 3 3 3 2 0 0 3 3 3 2 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 2 0]\n [2 0 2 3 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 3 3 3 3 0 0 3 3 3 2 0 0 2 3 3 2 0]\n [2 0 0 2 0 1 3 2 2 0 0 1 3 2 2 0 0 2 3 2 2 0]\n [0 0 3 0 0 1 3 3 3 0 0 1 3 2 2 0 0 1 3 2 2 0]\n [0 2 0 0 0 3 3 3 2 0 0 3 3 3 2 0 0 2 3 3 2 0]\n [0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 3 0 0]\n [0 0 0 0 0 3 3 3 2 3 0 3 3 3 2 0 0 3 3 3 2 0]\n [0 0 0 0 0 1 2 2 3 0 0 1 3 2 2 0 0 1 3 2 2 0]\n [0 0 0 0 0 1 3 3 2 0 0 1 3 2 2 0 0 1 3 2 2 0]\n [0 0 0 0 0 3 3 3 2 0 0 3 3 3 2 0 0 3 3 3 2 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0]\n [0 2 0 0 0 0 2 0 0 0 0 0 0 0 0 3 0 0 0 3 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3]] -> "}], "output": {"role": "assistant", "content": "[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 3 3 3 2 0 0 3 3 3 2 0 0 3 3 3 2 0]\n [0 0 0 0 0 1 3 2 2 0 0 1 3 2 2 0 0 1 3 2 2 0]\n [0 0 0 0 0 1 3 2 2 0 0 1 3 2 2 0 0 1 3 2 2 0]\n [0 0 0 0 0 3 3 3 2 0 0 3 3 3 2 0 0 3 3 3 2 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 3 3 3 2 0 0 3 3 3 2 0 0 3 3 3 2 0]\n [0 0 0 0 0 1 3 2 2 0 0 1 3 2 2 0 0 1 3 2 2 0]\n [0 0 0 0 0 1 3 2 2 0 0 1 3 2 2 0 0 1 3 2 2 0]\n [0 0 0 0 0 3 3 3 2 0 0 3 3 3 2 0 0 3 3 3 2 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 3 3 3 2 0 0 3 3 3 2 0 0 3 3 3 2 0]\n [0 0 0 0 0 1 3 2 2 0 0 1 3 2 2 0 0 1 3 2 2 0]\n [0 0 0 0 0 1 3 2 2 0 0 1 3 2 2 0 0 1 3 2 2 0]\n [0 0 0 0 0 3 3 3 2 0 0 3 3 3 2 0 0 3 3 3 2 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]#"}, "total_tokens": 6005}
|
ttt_models_no_demonstration_loss/0692e18c/adapter_config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"base_model_name_or_path": "checkpoints/finetuned_1B_model", "bias": "none", "fan_in_fan_out": false, "inference_mode": true, "init_lora_weights": true, "lora_alpha": 16.0, "lora_dropout": 0.0, "modules_to_save": null, "peft_type": "LORA", "r": 128, "target_modules": ["gate_proj", "down_proj", "up_proj", "lm_head", "q_proj", "v_proj"], "task_type": "CAUSAL_LM"}
|
ttt_models_no_demonstration_loss/0692e18c/adapter_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b7d4eb9c0a2b90826d2ed5fafceb875113e9e4e2ed6a393e7cd4c1d026cd8f5
|
| 3 |
+
size 153153674
|
ttt_models_no_demonstration_loss/0692e18c/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"_name_or_path": "unsloth/Llama-3.2-1B-Instruct", "architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128009, "head_dim": 64, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "max_position_embeddings": 131072, "mlp_bias": false, "model_type": "llama", "num_attention_heads": 32, "num_hidden_layers": 16, "num_key_value_heads": 8, "pad_token_id": 128004, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": {"factor": 32.0, "high_freq_factor": 4.0, "low_freq_factor": 1.0, "original_max_position_embeddings": 8192, "rope_type": "llama3"}, "rope_theta": 500000.0, "tie_word_embeddings": true, "torch_dtype": "bfloat16", "transformers_version": "4.49.0", "unsloth_fixed": true, "use_cache": true, "vocab_size": 128256}
|
ttt_models_no_demonstration_loss/0692e18c/log_1740726391.txt
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Step 1 | loss:0.06873417645692825 lr:8.333333333333334e-06 tokens_per_second_per_gpu:25076.602805715203
|
| 2 |
+
Step 2 | loss:0.0907391756772995 lr:1.6666666666666667e-05 tokens_per_second_per_gpu:25134.404199533696
|
| 3 |
+
Step 3 | loss:0.1675177961587906 lr:2.5e-05 tokens_per_second_per_gpu:25549.920030531393
|
| 4 |
+
Step 4 | loss:0.09910345077514648 lr:3.3333333333333335e-05 tokens_per_second_per_gpu:25232.64574707642
|
| 5 |
+
Step 5 | loss:0.13693448901176453 lr:4.166666666666667e-05 tokens_per_second_per_gpu:25185.380956125202
|
| 6 |
+
Step 6 | loss:0.17487424612045288 lr:5e-05 tokens_per_second_per_gpu:24958.46924291943
|
| 7 |
+
Step 7 | loss:0.06412655860185623 lr:5.833333333333334e-05 tokens_per_second_per_gpu:24880.618729073525
|
| 8 |
+
Step 8 | loss:0.09686434268951416 lr:6.666666666666667e-05 tokens_per_second_per_gpu:25197.032382914804
|
| 9 |
+
Step 9 | loss:0.08433305472135544 lr:7.500000000000001e-05 tokens_per_second_per_gpu:25177.7322480083
|
| 10 |
+
Step 10 | loss:0.11164304614067078 lr:8.333333333333334e-05 tokens_per_second_per_gpu:23988.530184478812
|
| 11 |
+
Step 11 | loss:0.08185476064682007 lr:9.166666666666667e-05 tokens_per_second_per_gpu:23935.893029122937
|
| 12 |
+
Step 12 | loss:0.07797159254550934 lr:0.0001 tokens_per_second_per_gpu:25046.55212218092
|
| 13 |
+
Step 13 | loss:0.0508369542658329 lr:9.998067787472772e-05 tokens_per_second_per_gpu:25241.08039245084
|
| 14 |
+
Step 14 | loss:0.06991245597600937 lr:9.99227264326918e-05 tokens_per_second_per_gpu:25259.553946175925
|
| 15 |
+
Step 15 | loss:0.104192353785038 lr:9.982619046369321e-05 tokens_per_second_per_gpu:25204.91543622504
|
| 16 |
+
Step 16 | loss:0.10402433574199677 lr:9.96911445789354e-05 tokens_per_second_per_gpu:22259.799770077967
|
| 17 |
+
Step 17 | loss:0.10239120572805405 lr:9.951769315335844e-05 tokens_per_second_per_gpu:23723.203837382374
|
| 18 |
+
Step 18 | loss:0.07112255692481995 lr:9.930597024496931e-05 tokens_per_second_per_gpu:25229.390619330698
|
| 19 |
+
Step 19 | loss:0.06658779829740524 lr:9.905613949123036e-05 tokens_per_second_per_gpu:25198.413328504797
|
| 20 |
+
Step 20 | loss:0.08846686780452728 lr:9.876839398258641e-05 tokens_per_second_per_gpu:22910.085788339366
|
| 21 |
+
Step 21 | loss:0.1040894165635109 lr:9.844295611322804e-05 tokens_per_second_per_gpu:23267.088248233187
|
| 22 |
+
Step 22 | loss:0.07800353318452835 lr:9.808007740920646e-05 tokens_per_second_per_gpu:25301.427737404807
|
| 23 |
+
Step 23 | loss:0.04706195741891861 lr:9.768003833403278e-05 tokens_per_second_per_gpu:25239.291278223933
|
| 24 |
+
Step 24 | loss:0.053261008113622665 lr:9.724314807191195e-05 tokens_per_second_per_gpu:25476.719593793005
|
| 25 |
+
Step 25 | loss:0.08141924440860748 lr:9.676974428877901e-05 tokens_per_second_per_gpu:24776.62877243341
|
| 26 |
+
Step 26 | loss:0.042390722781419754 lr:9.626019287132203e-05 tokens_per_second_per_gpu:24948.13678837632
|
| 27 |
+
Step 27 | loss:0.06188507005572319 lr:9.571488764419381e-05 tokens_per_second_per_gpu:24937.954856230106
|
| 28 |
+
Step 28 | loss:0.05963794142007828 lr:9.513425006563079e-05 tokens_per_second_per_gpu:25176.32351685193
|
| 29 |
+
Step 29 | loss:0.06711995601654053 lr:9.451872890171419e-05 tokens_per_second_per_gpu:13026.515498493643
|
| 30 |
+
Step 30 | loss:0.02343023009598255 lr:9.386879987952549e-05 tokens_per_second_per_gpu:25466.594970647257
|
| 31 |
+
Step 31 | loss:0.03762052208185196 lr:9.31849653194641e-05 tokens_per_second_per_gpu:23686.051053999123
|
| 32 |
+
Step 32 | loss:0.03201031684875488 lr:9.246775374701139e-05 tokens_per_second_per_gpu:23479.609760164065
|
| 33 |
+
Step 33 | loss:0.0440811850130558 lr:9.171771948424137e-05 tokens_per_second_per_gpu:23183.363000519286
|
| 34 |
+
Step 34 | loss:0.02956802025437355 lr:9.093544222139337e-05 tokens_per_second_per_gpu:24846.26050648923
|
| 35 |
+
Step 35 | loss:0.043681904673576355 lr:9.012152656883823e-05 tokens_per_second_per_gpu:25027.662093380135
|
| 36 |
+
Step 36 | loss:0.026787331327795982 lr:8.927660158978392e-05 tokens_per_second_per_gpu:25355.399324071554
|
| 37 |
+
Step 37 | loss:0.018079999834299088 lr:8.84013203140821e-05 tokens_per_second_per_gpu:25007.259886619504
|
| 38 |
+
Step 38 | loss:0.027538539841771126 lr:8.749635923351107e-05 tokens_per_second_per_gpu:24982.155364395847
|
| 39 |
+
Step 39 | loss:0.12628568708896637 lr:8.656241777892543e-05 tokens_per_second_per_gpu:18661.042155699237
|
| 40 |
+
Step 40 | loss:0.0408908985555172 lr:8.560021777967649e-05 tokens_per_second_per_gpu:23747.996727156344
|
| 41 |
+
Step 41 | loss:0.036181312054395676 lr:8.461050290572114e-05 tokens_per_second_per_gpu:25135.795581973463
|
| 42 |
+
Step 42 | loss:0.03509949892759323 lr:8.359403809285053e-05 tokens_per_second_per_gpu:22927.79378519503
|
| 43 |
+
Step 43 | loss:0.022104158997535706 lr:8.255160895148263e-05 tokens_per_second_per_gpu:23497.11404513014
|
| 44 |
+
Step 44 | loss:0.01899527944624424 lr:8.148402115947571e-05 tokens_per_second_per_gpu:25030.32633946565
|
| 45 |
+
Step 45 | loss:0.03378685191273689 lr:8.039209983943201e-05 tokens_per_second_per_gpu:18854.299716899997
|
| 46 |
+
Step 46 | loss:0.03274151682853699 lr:7.927668892097289e-05 tokens_per_second_per_gpu:25287.602471163016
|
| 47 |
+
Step 47 | loss:0.03538212180137634 lr:7.813865048847819e-05 tokens_per_second_per_gpu:24827.53945126137
|
| 48 |
+
Step 48 | loss:0.03468002378940582 lr:7.697886411479423e-05 tokens_per_second_per_gpu:22840.971997293433
|
| 49 |
+
Step 49 | loss:0.02795535698533058 lr:7.579822618142505e-05 tokens_per_second_per_gpu:25487.723986249635
|
| 50 |
+
Step 50 | loss:0.02198653109371662 lr:7.459764918573264e-05 tokens_per_second_per_gpu:24918.213197920515
|
| 51 |
+
Step 51 | loss:0.022179944440722466 lr:7.33780610356814e-05 tokens_per_second_per_gpu:25126.003149653414
|
| 52 |
+
Step 52 | loss:0.04089502617716789 lr:7.214040433267198e-05 tokens_per_second_per_gpu:25167.84062951732
|
| 53 |
+
Step 53 | loss:0.015825271606445312 lr:7.088563564301873e-05 tokens_per_second_per_gpu:23656.05577008425
|
| 54 |
+
Step 54 | loss:0.021313516423106194 lr:6.961472475863405e-05 tokens_per_second_per_gpu:25291.840533522445
|
| 55 |
+
Step 55 | loss:0.026100896298885345 lr:6.832865394749065e-05 tokens_per_second_per_gpu:23660.080363500176
|
| 56 |
+
Step 56 | loss:0.009968324564397335 lr:6.702841719444141e-05 tokens_per_second_per_gpu:25237.298113003046
|
| 57 |
+
Step 57 | loss:0.012824468314647675 lr:6.571501943298334e-05 tokens_per_second_per_gpu:23856.16329316852
|
| 58 |
+
Step 58 | loss:0.014346851967275143 lr:6.438947576855968e-05 tokens_per_second_per_gpu:23215.361769151077
|
| 59 |
+
Step 59 | loss:0.0115708839148283 lr:6.305281069399989e-05 tokens_per_second_per_gpu:18341.777097691232
|
| 60 |
+
Step 60 | loss:0.028614623472094536 lr:6.17060572977047e-05 tokens_per_second_per_gpu:25481.543543582968
|
| 61 |
+
Step 61 | loss:0.008104612119495869 lr:6.035025646518746e-05 tokens_per_second_per_gpu:25410.887426219768
|
| 62 |
+
Step 62 | loss:0.012446203269064426 lr:5.8986456074589404e-05 tokens_per_second_per_gpu:25332.134196299146
|
| 63 |
+
Step 63 | loss:0.02488069050014019 lr:5.761571018679025e-05 tokens_per_second_per_gpu:24989.747024059317
|
| 64 |
+
Step 64 | loss:0.03153364732861519 lr:5.6239078230740436e-05 tokens_per_second_per_gpu:23531.15756602609
|
| 65 |
+
Step 65 | loss:0.009309901855885983 lr:5.48576241846443e-05 tokens_per_second_per_gpu:25064.17329461201
|
| 66 |
+
Step 66 | loss:0.007008099462836981 lr:5.347241575362729e-05 tokens_per_second_per_gpu:25472.58645577702
|
| 67 |
+
Step 67 | loss:0.05305401235818863 lr:5.208452354452274e-05 tokens_per_second_per_gpu:13746.871117321489
|
| 68 |
+
Step 68 | loss:0.004958346486091614 lr:5.0695020238415756e-05 tokens_per_second_per_gpu:19652.069338916455
|
| 69 |
+
Step 69 | loss:0.012956451624631882 lr:4.9304979761584256e-05 tokens_per_second_per_gpu:25759.311157398104
|
| 70 |
+
Step 70 | loss:0.011759134940803051 lr:4.791547645547726e-05 tokens_per_second_per_gpu:19824.502044914763
|
| 71 |
+
Step 71 | loss:0.010236799716949463 lr:4.652758424637271e-05 tokens_per_second_per_gpu:25363.79309766345
|
| 72 |
+
Step 72 | loss:0.014782914891839027 lr:4.5142375815355706e-05 tokens_per_second_per_gpu:25368.230091152815
|
| 73 |
+
Step 73 | loss:0.020855702459812164 lr:4.376092176925958e-05 tokens_per_second_per_gpu:14475.334695910287
|
| 74 |
+
Step 74 | loss:0.00317940185777843 lr:4.238428981320975e-05 tokens_per_second_per_gpu:25859.44551888207
|
| 75 |
+
Step 75 | loss:0.0021112386602908373 lr:4.101354392541061e-05 tokens_per_second_per_gpu:25022.957603689487
|
| 76 |
+
Step 76 | loss:0.0225250031799078 lr:3.964974353481254e-05 tokens_per_second_per_gpu:24473.372990998832
|
| 77 |
+
Step 77 | loss:0.015027803368866444 lr:3.829394270229531e-05 tokens_per_second_per_gpu:25146.029592461062
|
| 78 |
+
Step 78 | loss:0.0007944289827719331 lr:3.694718930600012e-05 tokens_per_second_per_gpu:23006.2774379864
|
| 79 |
+
Step 79 | loss:0.004939096514135599 lr:3.561052423144032e-05 tokens_per_second_per_gpu:18679.774024481314
|
| 80 |
+
Step 80 | loss:0.0049825748428702354 lr:3.428498056701665e-05 tokens_per_second_per_gpu:25163.314630706074
|
| 81 |
+
Step 81 | loss:0.017561975866556168 lr:3.297158280555862e-05 tokens_per_second_per_gpu:24772.304824025407
|
| 82 |
+
Step 82 | loss:0.010404028929769993 lr:3.167134605250938e-05 tokens_per_second_per_gpu:25016.85940648129
|
| 83 |
+
Step 83 | loss:0.006737596821039915 lr:3.0385275241365962e-05 tokens_per_second_per_gpu:23195.72569449278
|
| 84 |
+
Step 84 | loss:0.0033685865346342325 lr:2.9114364356981272e-05 tokens_per_second_per_gpu:24971.432402238792
|
| 85 |
+
Step 85 | loss:0.008929637260735035 lr:2.7859595667328026e-05 tokens_per_second_per_gpu:25481.866275074477
|
| 86 |
+
Step 86 | loss:0.031112048774957657 lr:2.6621938964318595e-05 tokens_per_second_per_gpu:24114.89091892917
|
| 87 |
+
Step 87 | loss:0.001247547217644751 lr:2.540235081426736e-05 tokens_per_second_per_gpu:25670.11475426286
|
| 88 |
+
Step 88 | loss:0.02326502464711666 lr:2.4201773818574956e-05 tokens_per_second_per_gpu:24666.177666378433
|
| 89 |
+
Step 89 | loss:0.0036518017295747995 lr:2.302113588520578e-05 tokens_per_second_per_gpu:25146.269294907743
|
| 90 |
+
Step 90 | loss:0.0005968324257992208 lr:2.1861349511521815e-05 tokens_per_second_per_gpu:25521.140982351953
|
| 91 |
+
Step 91 | loss:0.006977428682148457 lr:2.072331107902713e-05 tokens_per_second_per_gpu:23279.1725475497
|
| 92 |
+
Step 92 | loss:0.023413771763443947 lr:1.960790016056801e-05 tokens_per_second_per_gpu:25230.858008214553
|
| 93 |
+
Step 93 | loss:0.002284190384671092 lr:1.8515978840524302e-05 tokens_per_second_per_gpu:25272.992471688267
|
| 94 |
+
Step 94 | loss:0.0032079897355288267 lr:1.7448391048517376e-05 tokens_per_second_per_gpu:25321.885483847054
|
| 95 |
+
Step 95 | loss:0.010654137469828129 lr:1.640596190714947e-05 tokens_per_second_per_gpu:25466.225130267263
|
| 96 |
+
Step 96 | loss:0.006805225275456905 lr:1.538949709427886e-05 tokens_per_second_per_gpu:22641.89958660637
|
| 97 |
+
Step 97 | loss:0.03936164453625679 lr:1.4399782220323515e-05 tokens_per_second_per_gpu:23466.59462006398
|
| 98 |
+
Step 98 | loss:0.0029360493645071983 lr:1.3437582221074573e-05 tokens_per_second_per_gpu:18608.277741968865
|
| 99 |
+
Step 99 | loss:0.00660504586994648 lr:1.250364076648894e-05 tokens_per_second_per_gpu:22971.50641841576
|
| 100 |
+
Step 100 | loss:0.008673754520714283 lr:1.1598679685917901e-05 tokens_per_second_per_gpu:25602.881112755094
|
| 101 |
+
Step 101 | loss:0.018985848873853683 lr:1.0723398410216084e-05 tokens_per_second_per_gpu:24987.113608128926
|
| 102 |
+
Step 102 | loss:0.0025321505963802338 lr:9.878473431161767e-06 tokens_per_second_per_gpu:25196.838935213345
|
| 103 |
+
Step 103 | loss:0.002252749865874648 lr:9.064557778606631e-06 tokens_per_second_per_gpu:25089.753245185257
|
| 104 |
+
Step 104 | loss:0.012876608408987522 lr:8.28228051575864e-06 tokens_per_second_per_gpu:25163.450857217118
|
| 105 |
+
Step 105 | loss:0.006892647594213486 lr:7.532246252988617e-06 tokens_per_second_per_gpu:25283.395264753603
|
| 106 |
+
Step 106 | loss:0.005298743490129709 lr:6.815034680535915e-06 tokens_per_second_per_gpu:24703.054673506125
|
| 107 |
+
Step 107 | loss:0.01798229292035103 lr:6.1312001204745115e-06 tokens_per_second_per_gpu:22815.029177582783
|
| 108 |
+
Step 108 | loss:0.0058769481256604195 lr:5.481271098285817e-06 tokens_per_second_per_gpu:25744.095532979176
|
| 109 |
+
Step 109 | loss:0.019404472783207893 lr:4.865749934369223e-06 tokens_per_second_per_gpu:19782.54044631028
|
| 110 |
+
Step 110 | loss:0.01082681119441986 lr:4.285112355806192e-06 tokens_per_second_per_gpu:25577.429835284092
|
| 111 |
+
Step 111 | loss:0.019221190363168716 lr:3.7398071286779857e-06 tokens_per_second_per_gpu:24237.103264660593
|
| 112 |
+
Step 112 | loss:0.01382873672991991 lr:3.230255711220992e-06 tokens_per_second_per_gpu:23345.148531226638
|
| 113 |
+
Step 113 | loss:0.002991994610056281 lr:2.7568519280880558e-06 tokens_per_second_per_gpu:25086.449852480702
|
| 114 |
+
Step 114 | loss:0.005466030910611153 lr:2.3199616659672354e-06 tokens_per_second_per_gpu:25081.560319956046
|
| 115 |
+
Step 115 | loss:0.00419091759249568 lr:1.919922590793549e-06 tokens_per_second_per_gpu:25113.20460315275
|
| 116 |
+
Step 116 | loss:0.001619058195501566 lr:1.5570438867719694e-06 tokens_per_second_per_gpu:25418.23545918285
|
| 117 |
+
Step 117 | loss:0.012408984825015068 lr:1.2316060174136002e-06 tokens_per_second_per_gpu:23757.97691825984
|
| 118 |
+
Step 118 | loss:0.0042074937373399734 lr:9.43860508769645e-07 tokens_per_second_per_gpu:25215.94324018218
|
| 119 |
+
Step 119 | loss:0.004370636772364378 lr:6.940297550306896e-07 tokens_per_second_per_gpu:24456.575071926305
|
| 120 |
+
Step 120 | loss:0.019508644938468933 lr:4.823068466415615e-07 tokens_per_second_per_gpu:25231.35110159217
|
| 121 |
+
Step 121 | loss:0.030789289623498917 lr:3.088554210646133e-07 tokens_per_second_per_gpu:23997.30721971025
|
| 122 |
+
Step 122 | loss:0.00805984903126955 lr:1.7380953630678488e-07 tokens_per_second_per_gpu:24970.803170398103
|
| 123 |
+
Step 123 | loss:0.006753778550773859 lr:7.727356730820035e-08 tokens_per_second_per_gpu:24967.840297381525
|
| 124 |
+
Step 124 | loss:0.004647454712539911 lr:1.9322125272297488e-08 tokens_per_second_per_gpu:25102.859550354806
|
| 125 |
+
Step 125 | loss:0.03784371167421341 lr:0.0 tokens_per_second_per_gpu:23806.958706235564
|