diff --git a/exp0_baseline/checkpoint-1/README.md b/exp0_baseline/checkpoint-1/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp0_baseline/checkpoint-1/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp0_baseline/checkpoint-1/adapter_config.json b/exp0_baseline/checkpoint-1/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5b8d383fc480207c6799099003c73203b0c5a1a3 --- /dev/null +++ b/exp0_baseline/checkpoint-1/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "k_proj", + "v_proj", + "q_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp0_baseline/checkpoint-1/adapter_model.safetensors b/exp0_baseline/checkpoint-1/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3b49d1017affa76254e65237408611469151641d --- /dev/null +++ b/exp0_baseline/checkpoint-1/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdf06f9cea89c8d9d7124c1d52bac71038ad97cd68d0c8e6ed60f7bbfcb25f80 +size 201378736 diff --git a/exp0_baseline/checkpoint-1/optimizer.pt b/exp0_baseline/checkpoint-1/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b1188c854e3a557ec1212a1349a822e0569f9f9c --- /dev/null +++ b/exp0_baseline/checkpoint-1/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9aa55dea0ca598e0bf5b442fb7f2c41ae97a63e414478868fcfe7f6d4d839642 +size 402982627 diff --git a/exp0_baseline/checkpoint-1/rng_state.pth b/exp0_baseline/checkpoint-1/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9430137842c609d7ef017081aa61c2ace38ea401 --- /dev/null +++ b/exp0_baseline/checkpoint-1/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f25a535c3b4666d82432ed14514e08df4d28f434738d10df0544ccff1f604167 +size 14645 diff --git a/exp0_baseline/checkpoint-1/scheduler.pt b/exp0_baseline/checkpoint-1/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ffece88f7b820dce9a2887a420d775e41111ff10 --- /dev/null +++ b/exp0_baseline/checkpoint-1/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:606cfc6388b6cf7d31d99974a689cca32d495ea107cc08ea098ddbe0845d2d35 +size 1465 diff --git a/exp0_baseline/checkpoint-1/trainer_state.json b/exp0_baseline/checkpoint-1/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8a603b8794e91ba50910c41257350e6deb12fa59 --- /dev/null +++ b/exp0_baseline/checkpoint-1/trainer_state.json @@ -0,0 +1,41 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 100, + "global_step": 1, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "grad_norm": 4.6373677253723145, + "learning_rate": 0.0, + "loss": 1.3085, + "step": 1 + } + ], + "logging_steps": 1, + "max_steps": 1, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 3, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 690182437208064.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/exp0_baseline/checkpoint-1/training_args.bin b/exp0_baseline/checkpoint-1/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e435ba0620025de283bab797488b3a7b34ce7b22 --- /dev/null +++ b/exp0_baseline/checkpoint-1/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc1d4d2c81fe84f834ff9f6e7f721e83167a4346e940ee66b8eced62acc15367 +size 6033 diff --git a/exp0_baseline/checkpoint-100/README.md b/exp0_baseline/checkpoint-100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp0_baseline/checkpoint-100/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp0_baseline/checkpoint-100/adapter_config.json b/exp0_baseline/checkpoint-100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2d9e6780978efb4ab936b5f50a2afc11535e6c4b --- /dev/null +++ b/exp0_baseline/checkpoint-100/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "o_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp0_baseline/checkpoint-100/adapter_model.safetensors b/exp0_baseline/checkpoint-100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e01667669b403df3818f8a188a7cb3bf26b196b1 --- /dev/null +++ b/exp0_baseline/checkpoint-100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8209e0cbb700204af15187718e31808cf6e3166b8065c4c05ac7864dc693371 +size 201378736 diff --git a/exp0_baseline/checkpoint-100/optimizer.pt b/exp0_baseline/checkpoint-100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6f3adc5d5f69bcb377d4405de90207a8dd921055 --- /dev/null +++ b/exp0_baseline/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:644358c4c401fbe2649687bc34c4e1a13fecb35ca5ab401cae903e89ccd99528 +size 402982627 diff --git a/exp0_baseline/checkpoint-100/rng_state.pth b/exp0_baseline/checkpoint-100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..6ce9c097d9e48ec448fc9e906660769067929aa3 --- /dev/null +++ b/exp0_baseline/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b92ede016a8dc313d233f2927862e909d5717a49b27265ddd87beb7b3aab3357 +size 14645 diff --git a/exp0_baseline/checkpoint-100/scheduler.pt b/exp0_baseline/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8fdcc1fbec5336295a8ad32e6fb37c9646d5a557 --- /dev/null +++ b/exp0_baseline/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c4e44404b58ce3af1b46c3d4a85a59edbbc386f340c476e894715a1199e1aed +size 1465 diff --git a/exp0_baseline/checkpoint-100/trainer_state.json b/exp0_baseline/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b44797e6105b87c665f9ecd09b8208fac8d8c00b --- /dev/null +++ b/exp0_baseline/checkpoint-100/trainer_state.json @@ -0,0 +1,742 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 100, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010050251256281407, + "grad_norm": 1.144538164138794, + "learning_rate": 0.0, + "loss": 0.561, + "step": 1 + }, + { + "epoch": 0.020100502512562814, + "grad_norm": 1.1173577308654785, + "learning_rate": 5e-06, + "loss": 0.5438, + "step": 2 + }, + { + "epoch": 0.03015075376884422, + "grad_norm": 1.6941046714782715, + "learning_rate": 1e-05, + "loss": 0.5345, + "step": 3 + }, + { + "epoch": 0.04020100502512563, + "grad_norm": 2.2526638507843018, + "learning_rate": 1.5e-05, + "loss": 0.6601, + "step": 4 + }, + { + "epoch": 0.05025125628140704, + "grad_norm": 2.336232900619507, + "learning_rate": 2e-05, + "loss": 0.8386, + "step": 5 + }, + { + "epoch": 0.06030150753768844, + "grad_norm": 1.2247791290283203, + "learning_rate": 2.5e-05, + "loss": 0.3448, + "step": 6 + }, + { + "epoch": 0.07035175879396985, + "grad_norm": 3.491952419281006, + "learning_rate": 3e-05, + "loss": 0.9107, + "step": 7 + }, + { + "epoch": 0.08040201005025126, + "grad_norm": 1.5285453796386719, + "learning_rate": 3.5e-05, + "loss": 0.5574, + "step": 8 + }, + { + "epoch": 0.09045226130653267, + "grad_norm": 1.9498869180679321, + "learning_rate": 4e-05, + "loss": 0.7468, + "step": 9 + }, + { + "epoch": 0.10050251256281408, + "grad_norm": 1.451749563217163, + "learning_rate": 4.5e-05, + "loss": 0.7601, + "step": 10 + }, + { + "epoch": 0.11055276381909548, + "grad_norm": 2.121290683746338, + "learning_rate": 5e-05, + "loss": 0.788, + "step": 11 + }, + { + "epoch": 0.12060301507537688, + "grad_norm": 1.173128604888916, + "learning_rate": 4.99847706754774e-05, + "loss": 0.4117, + "step": 12 + }, + { + "epoch": 0.1306532663316583, + "grad_norm": 1.4134124517440796, + "learning_rate": 4.993910125649561e-05, + "loss": 0.2099, + "step": 13 + }, + { + "epoch": 0.1407035175879397, + "grad_norm": 2.4718620777130127, + "learning_rate": 4.9863047384206835e-05, + "loss": 1.0762, + "step": 14 + }, + { + "epoch": 0.1507537688442211, + "grad_norm": 1.1293636560440063, + "learning_rate": 4.975670171853926e-05, + "loss": 0.338, + "step": 15 + }, + { + "epoch": 0.16080402010050251, + "grad_norm": 2.616136312484741, + "learning_rate": 4.962019382530521e-05, + "loss": 1.0328, + "step": 16 + }, + { + "epoch": 0.1708542713567839, + "grad_norm": 2.410461902618408, + "learning_rate": 4.9453690018345144e-05, + "loss": 0.863, + "step": 17 + }, + { + "epoch": 0.18090452261306533, + "grad_norm": 2.1010797023773193, + "learning_rate": 4.925739315689991e-05, + "loss": 0.6364, + "step": 18 + }, + { + "epoch": 0.19095477386934673, + "grad_norm": 1.2987828254699707, + "learning_rate": 4.9031542398457974e-05, + "loss": 0.4619, + "step": 19 + }, + { + "epoch": 0.20100502512562815, + "grad_norm": 1.2924538850784302, + "learning_rate": 4.877641290737884e-05, + "loss": 0.7118, + "step": 20 + }, + { + "epoch": 0.21105527638190955, + "grad_norm": 1.6322095394134521, + "learning_rate": 4.849231551964771e-05, + "loss": 0.936, + "step": 21 + }, + { + "epoch": 0.22110552763819097, + "grad_norm": 1.9164633750915527, + "learning_rate": 4.817959636416969e-05, + "loss": 0.8829, + "step": 22 + }, + { + "epoch": 0.23115577889447236, + "grad_norm": 1.2494964599609375, + "learning_rate": 4.783863644106502e-05, + "loss": 0.3653, + "step": 23 + }, + { + "epoch": 0.24120603015075376, + "grad_norm": 1.217826247215271, + "learning_rate": 4.7469851157479177e-05, + "loss": 0.4539, + "step": 24 + }, + { + "epoch": 0.25125628140703515, + "grad_norm": 3.2103142738342285, + "learning_rate": 4.707368982147318e-05, + "loss": 0.9621, + "step": 25 + }, + { + "epoch": 0.2613065326633166, + "grad_norm": 1.2653640508651733, + "learning_rate": 4.665063509461097e-05, + "loss": 0.448, + "step": 26 + }, + { + "epoch": 0.271356783919598, + "grad_norm": 0.9144423007965088, + "learning_rate": 4.620120240391065e-05, + "loss": 0.3957, + "step": 27 + }, + { + "epoch": 0.2814070351758794, + "grad_norm": 1.313241958618164, + "learning_rate": 4.572593931387604e-05, + "loss": 0.5468, + "step": 28 + }, + { + "epoch": 0.2914572864321608, + "grad_norm": 0.7544188499450684, + "learning_rate": 4.522542485937369e-05, + "loss": 0.185, + "step": 29 + }, + { + "epoch": 0.3015075376884422, + "grad_norm": 0.9644553661346436, + "learning_rate": 4.4700268840168045e-05, + "loss": 0.3864, + "step": 30 + }, + { + "epoch": 0.31155778894472363, + "grad_norm": 1.281360387802124, + "learning_rate": 4.415111107797445e-05, + "loss": 0.5255, + "step": 31 + }, + { + "epoch": 0.32160804020100503, + "grad_norm": 0.8318763375282288, + "learning_rate": 4.357862063693486e-05, + "loss": 0.392, + "step": 32 + }, + { + "epoch": 0.3316582914572864, + "grad_norm": 1.29972243309021, + "learning_rate": 4.2983495008466276e-05, + "loss": 0.6444, + "step": 33 + }, + { + "epoch": 0.3417085427135678, + "grad_norm": 1.3421469926834106, + "learning_rate": 4.2366459261474933e-05, + "loss": 0.3876, + "step": 34 + }, + { + "epoch": 0.35175879396984927, + "grad_norm": 0.9718002676963806, + "learning_rate": 4.172826515897146e-05, + "loss": 0.5414, + "step": 35 + }, + { + "epoch": 0.36180904522613067, + "grad_norm": 0.9774896502494812, + "learning_rate": 4.1069690242163484e-05, + "loss": 0.4928, + "step": 36 + }, + { + "epoch": 0.37185929648241206, + "grad_norm": 3.244431257247925, + "learning_rate": 4.039153688314145e-05, + "loss": 1.0665, + "step": 37 + }, + { + "epoch": 0.38190954773869346, + "grad_norm": 0.9365755319595337, + "learning_rate": 3.969463130731183e-05, + "loss": 0.3107, + "step": 38 + }, + { + "epoch": 0.39195979899497485, + "grad_norm": 1.4368454217910767, + "learning_rate": 3.897982258676867e-05, + "loss": 0.4084, + "step": 39 + }, + { + "epoch": 0.4020100502512563, + "grad_norm": 2.354626417160034, + "learning_rate": 3.824798160583012e-05, + "loss": 0.4833, + "step": 40 + }, + { + "epoch": 0.4120603015075377, + "grad_norm": 2.426787853240967, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.548, + "step": 41 + }, + { + "epoch": 0.4221105527638191, + "grad_norm": 1.3904081583023071, + "learning_rate": 3.673678906964727e-05, + "loss": 0.847, + "step": 42 + }, + { + "epoch": 0.4321608040201005, + "grad_norm": 0.8043186664581299, + "learning_rate": 3.5959278669726935e-05, + "loss": 0.3826, + "step": 43 + }, + { + "epoch": 0.44221105527638194, + "grad_norm": 0.9501510858535767, + "learning_rate": 3.516841607689501e-05, + "loss": 0.2336, + "step": 44 + }, + { + "epoch": 0.45226130653266333, + "grad_norm": 2.477350950241089, + "learning_rate": 3.436516483539781e-05, + "loss": 0.8067, + "step": 45 + }, + { + "epoch": 0.4623115577889447, + "grad_norm": 1.3030426502227783, + "learning_rate": 3.355050358314172e-05, + "loss": 0.4237, + "step": 46 + }, + { + "epoch": 0.4723618090452261, + "grad_norm": 2.6159799098968506, + "learning_rate": 3.272542485937369e-05, + "loss": 0.5104, + "step": 47 + }, + { + "epoch": 0.4824120603015075, + "grad_norm": 1.256778597831726, + "learning_rate": 3.1890933895424976e-05, + "loss": 0.235, + "step": 48 + }, + { + "epoch": 0.49246231155778897, + "grad_norm": 2.076829195022583, + "learning_rate": 3.104804738999169e-05, + "loss": 0.5265, + "step": 49 + }, + { + "epoch": 0.5025125628140703, + "grad_norm": 1.3327422142028809, + "learning_rate": 3.0197792270443982e-05, + "loss": 0.5636, + "step": 50 + }, + { + "epoch": 0.5125628140703518, + "grad_norm": 1.7859450578689575, + "learning_rate": 2.9341204441673266e-05, + "loss": 0.67, + "step": 51 + }, + { + "epoch": 0.5226130653266332, + "grad_norm": 1.3924661874771118, + "learning_rate": 2.8479327524001636e-05, + "loss": 0.6816, + "step": 52 + }, + { + "epoch": 0.5326633165829145, + "grad_norm": 1.4186939001083374, + "learning_rate": 2.761321158169134e-05, + "loss": 0.3969, + "step": 53 + }, + { + "epoch": 0.542713567839196, + "grad_norm": 2.0062601566314697, + "learning_rate": 2.674391184360313e-05, + "loss": 0.3101, + "step": 54 + }, + { + "epoch": 0.5527638190954773, + "grad_norm": 1.4015522003173828, + "learning_rate": 2.587248741756253e-05, + "loss": 0.2104, + "step": 55 + }, + { + "epoch": 0.5628140703517588, + "grad_norm": 1.5075968503952026, + "learning_rate": 2.5e-05, + "loss": 0.5247, + "step": 56 + }, + { + "epoch": 0.5728643216080402, + "grad_norm": 3.6213722229003906, + "learning_rate": 2.4127512582437485e-05, + "loss": 0.8403, + "step": 57 + }, + { + "epoch": 0.5829145728643216, + "grad_norm": 5.086165904998779, + "learning_rate": 2.3256088156396868e-05, + "loss": 0.9748, + "step": 58 + }, + { + "epoch": 0.592964824120603, + "grad_norm": 0.8291557431221008, + "learning_rate": 2.238678841830867e-05, + "loss": 0.1264, + "step": 59 + }, + { + "epoch": 0.6030150753768844, + "grad_norm": 6.901468753814697, + "learning_rate": 2.1520672475998373e-05, + "loss": 1.7948, + "step": 60 + }, + { + "epoch": 0.6130653266331658, + "grad_norm": 1.2166105508804321, + "learning_rate": 2.0658795558326743e-05, + "loss": 0.3152, + "step": 61 + }, + { + "epoch": 0.6231155778894473, + "grad_norm": 4.400286674499512, + "learning_rate": 1.980220772955602e-05, + "loss": 1.6227, + "step": 62 + }, + { + "epoch": 0.6331658291457286, + "grad_norm": 2.3584349155426025, + "learning_rate": 1.895195261000831e-05, + "loss": 0.6485, + "step": 63 + }, + { + "epoch": 0.6432160804020101, + "grad_norm": 3.535404682159424, + "learning_rate": 1.8109066104575023e-05, + "loss": 1.1718, + "step": 64 + }, + { + "epoch": 0.6532663316582915, + "grad_norm": 1.4264318943023682, + "learning_rate": 1.7274575140626318e-05, + "loss": 0.3102, + "step": 65 + }, + { + "epoch": 0.6633165829145728, + "grad_norm": 1.9855120182037354, + "learning_rate": 1.6449496416858284e-05, + "loss": 0.405, + "step": 66 + }, + { + "epoch": 0.6733668341708543, + "grad_norm": 0.8108851909637451, + "learning_rate": 1.56348351646022e-05, + "loss": 0.1325, + "step": 67 + }, + { + "epoch": 0.6834170854271356, + "grad_norm": 4.136630535125732, + "learning_rate": 1.4831583923104999e-05, + "loss": 0.968, + "step": 68 + }, + { + "epoch": 0.6934673366834171, + "grad_norm": 0.9361010789871216, + "learning_rate": 1.4040721330273062e-05, + "loss": 0.2457, + "step": 69 + }, + { + "epoch": 0.7035175879396985, + "grad_norm": 3.343045473098755, + "learning_rate": 1.3263210930352737e-05, + "loss": 0.7569, + "step": 70 + }, + { + "epoch": 0.7135678391959799, + "grad_norm": 1.0723439455032349, + "learning_rate": 1.2500000000000006e-05, + "loss": 0.1777, + "step": 71 + }, + { + "epoch": 0.7236180904522613, + "grad_norm": 1.3722829818725586, + "learning_rate": 1.175201839416988e-05, + "loss": 0.3186, + "step": 72 + }, + { + "epoch": 0.7336683417085427, + "grad_norm": 2.0538363456726074, + "learning_rate": 1.1020177413231334e-05, + "loss": 0.7053, + "step": 73 + }, + { + "epoch": 0.7437185929648241, + "grad_norm": 1.7194602489471436, + "learning_rate": 1.0305368692688174e-05, + "loss": 0.4186, + "step": 74 + }, + { + "epoch": 0.7537688442211056, + "grad_norm": 1.9144634008407593, + "learning_rate": 9.608463116858542e-06, + "loss": 0.4359, + "step": 75 + }, + { + "epoch": 0.7638190954773869, + "grad_norm": 1.8238540887832642, + "learning_rate": 8.930309757836517e-06, + "loss": 0.3578, + "step": 76 + }, + { + "epoch": 0.7738693467336684, + "grad_norm": 1.620219349861145, + "learning_rate": 8.271734841028553e-06, + "loss": 0.6803, + "step": 77 + }, + { + "epoch": 0.7839195979899497, + "grad_norm": 1.2236140966415405, + "learning_rate": 7.633540738525066e-06, + "loss": 0.285, + "step": 78 + }, + { + "epoch": 0.7939698492462312, + "grad_norm": 2.433211326599121, + "learning_rate": 7.016504991533726e-06, + "loss": 0.3922, + "step": 79 + }, + { + "epoch": 0.8040201005025126, + "grad_norm": 1.6254342794418335, + "learning_rate": 6.421379363065142e-06, + "loss": 0.6371, + "step": 80 + }, + { + "epoch": 0.8140703517587939, + "grad_norm": 0.95794677734375, + "learning_rate": 5.848888922025553e-06, + "loss": 0.4203, + "step": 81 + }, + { + "epoch": 0.8241206030150754, + "grad_norm": 2.8286194801330566, + "learning_rate": 5.299731159831953e-06, + "loss": 0.7793, + "step": 82 + }, + { + "epoch": 0.8341708542713567, + "grad_norm": 2.440025806427002, + "learning_rate": 4.7745751406263165e-06, + "loss": 0.7263, + "step": 83 + }, + { + "epoch": 0.8442211055276382, + "grad_norm": 2.440507173538208, + "learning_rate": 4.274060686123959e-06, + "loss": 0.4484, + "step": 84 + }, + { + "epoch": 0.8542713567839196, + "grad_norm": 1.1934988498687744, + "learning_rate": 3.798797596089351e-06, + "loss": 0.2335, + "step": 85 + }, + { + "epoch": 0.864321608040201, + "grad_norm": 1.1017628908157349, + "learning_rate": 3.3493649053890326e-06, + "loss": 0.3244, + "step": 86 + }, + { + "epoch": 0.8743718592964824, + "grad_norm": 1.3522121906280518, + "learning_rate": 2.9263101785268254e-06, + "loss": 0.3025, + "step": 87 + }, + { + "epoch": 0.8844221105527639, + "grad_norm": 1.3555244207382202, + "learning_rate": 2.5301488425208296e-06, + "loss": 0.2563, + "step": 88 + }, + { + "epoch": 0.8944723618090452, + "grad_norm": 1.7802023887634277, + "learning_rate": 2.1613635589349756e-06, + "loss": 0.8291, + "step": 89 + }, + { + "epoch": 0.9045226130653267, + "grad_norm": 1.5431946516036987, + "learning_rate": 1.8204036358303173e-06, + "loss": 0.496, + "step": 90 + }, + { + "epoch": 0.914572864321608, + "grad_norm": 3.552048444747925, + "learning_rate": 1.5076844803522922e-06, + "loss": 0.6235, + "step": 91 + }, + { + "epoch": 0.9246231155778895, + "grad_norm": 1.6633059978485107, + "learning_rate": 1.2235870926211619e-06, + "loss": 0.3562, + "step": 92 + }, + { + "epoch": 0.9346733668341709, + "grad_norm": 1.6059434413909912, + "learning_rate": 9.684576015420278e-07, + "loss": 0.5505, + "step": 93 + }, + { + "epoch": 0.9447236180904522, + "grad_norm": 1.9522382020950317, + "learning_rate": 7.426068431000882e-07, + "loss": 0.3748, + "step": 94 + }, + { + "epoch": 0.9547738693467337, + "grad_norm": 3.010096788406372, + "learning_rate": 5.463099816548579e-07, + "loss": 0.6046, + "step": 95 + }, + { + "epoch": 0.964824120603015, + "grad_norm": 2.7505385875701904, + "learning_rate": 3.7980617469479953e-07, + "loss": 0.8919, + "step": 96 + }, + { + "epoch": 0.9748743718592965, + "grad_norm": 1.0584425926208496, + "learning_rate": 2.4329828146074095e-07, + "loss": 0.3999, + "step": 97 + }, + { + "epoch": 0.9849246231155779, + "grad_norm": 1.4713913202285767, + "learning_rate": 1.3695261579316777e-07, + "loss": 0.4368, + "step": 98 + }, + { + "epoch": 0.9949748743718593, + "grad_norm": 1.4613982439041138, + "learning_rate": 6.089874350439506e-08, + "loss": 0.3824, + "step": 99 + }, + { + "epoch": 1.0, + "grad_norm": 0.7810527086257935, + "learning_rate": 1.522932452260595e-08, + "loss": 0.1018, + "step": 100 + }, + { + "epoch": 1.0, + "eval_loss": 0.45845267176628113, + "eval_runtime": 85.8607, + "eval_samples_per_second": 3.494, + "eval_steps_per_second": 1.747, + "step": 100 + } + ], + "logging_steps": 1, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 33, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 6.867315250220237e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/exp0_baseline/checkpoint-100/training_args.bin b/exp0_baseline/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..5fe3afbafd0cd89fa6261f47e568e7c633b890ae --- /dev/null +++ b/exp0_baseline/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eea06f89c6a9f3345ae45b085ca42903e4044f47016fd3035bff387ef5774f26 +size 6033 diff --git a/exp0_baseline/checkpoint-33/README.md b/exp0_baseline/checkpoint-33/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp0_baseline/checkpoint-33/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp0_baseline/checkpoint-33/adapter_config.json b/exp0_baseline/checkpoint-33/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2d9e6780978efb4ab936b5f50a2afc11535e6c4b --- /dev/null +++ b/exp0_baseline/checkpoint-33/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "o_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp0_baseline/checkpoint-33/adapter_model.safetensors b/exp0_baseline/checkpoint-33/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9ff8c7f12acdf6f668c3a9c6683276ad3bb6dfb1 --- /dev/null +++ b/exp0_baseline/checkpoint-33/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40e22fb03e7642a23c3093ca2e1365d1d7ca5a08fc84f13f3860912ab1d8384c +size 201378736 diff --git a/exp0_baseline/checkpoint-33/optimizer.pt b/exp0_baseline/checkpoint-33/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..99690021c0aed3470e9f5373b94775fb1354e26f --- /dev/null +++ b/exp0_baseline/checkpoint-33/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5a7a46a4e2ed346bac46b5b5c1431282688ae8e0f2f57f7c74ff06985e2d109 +size 402982627 diff --git a/exp0_baseline/checkpoint-33/rng_state.pth b/exp0_baseline/checkpoint-33/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..fd650bca78c013ce43187807570db259be238162 --- /dev/null +++ b/exp0_baseline/checkpoint-33/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4370640aed47e1cfe501e222405e6de0a6701dc2554814f30a0ec4610f4a16b +size 14645 diff --git a/exp0_baseline/checkpoint-33/scheduler.pt b/exp0_baseline/checkpoint-33/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f3b688fff35c94c1b3f7bf1c91548cb8ab1f20a1 --- /dev/null +++ b/exp0_baseline/checkpoint-33/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b360cc37b95be8398ac4635eb61f23915865018f0c39c4789e480a08ead764a +size 1465 diff --git a/exp0_baseline/checkpoint-33/trainer_state.json b/exp0_baseline/checkpoint-33/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..664ae3a364ddb1e62f24ac7d9ff8fe138503571c --- /dev/null +++ b/exp0_baseline/checkpoint-33/trainer_state.json @@ -0,0 +1,265 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.3316582914572864, + "eval_steps": 100, + "global_step": 33, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010050251256281407, + "grad_norm": 1.144538164138794, + "learning_rate": 0.0, + "loss": 0.561, + "step": 1 + }, + { + "epoch": 0.020100502512562814, + "grad_norm": 1.1173577308654785, + "learning_rate": 5e-06, + "loss": 0.5438, + "step": 2 + }, + { + "epoch": 0.03015075376884422, + "grad_norm": 1.6941046714782715, + "learning_rate": 1e-05, + "loss": 0.5345, + "step": 3 + }, + { + "epoch": 0.04020100502512563, + "grad_norm": 2.2526638507843018, + "learning_rate": 1.5e-05, + "loss": 0.6601, + "step": 4 + }, + { + "epoch": 0.05025125628140704, + "grad_norm": 2.336232900619507, + "learning_rate": 2e-05, + "loss": 0.8386, + "step": 5 + }, + { + "epoch": 0.06030150753768844, + "grad_norm": 1.2247791290283203, + "learning_rate": 2.5e-05, + "loss": 0.3448, + "step": 6 + }, + { + "epoch": 0.07035175879396985, + "grad_norm": 3.491952419281006, + "learning_rate": 3e-05, + "loss": 0.9107, + "step": 7 + }, + { + "epoch": 0.08040201005025126, + "grad_norm": 1.5285453796386719, + "learning_rate": 3.5e-05, + "loss": 0.5574, + "step": 8 + }, + { + "epoch": 0.09045226130653267, + "grad_norm": 1.9498869180679321, + "learning_rate": 4e-05, + "loss": 0.7468, + "step": 9 + }, + { + "epoch": 0.10050251256281408, + "grad_norm": 1.451749563217163, + "learning_rate": 4.5e-05, + "loss": 0.7601, + "step": 10 + }, + { + "epoch": 0.11055276381909548, + "grad_norm": 2.121290683746338, + "learning_rate": 5e-05, + "loss": 0.788, + "step": 11 + }, + { + "epoch": 0.12060301507537688, + "grad_norm": 1.173128604888916, + "learning_rate": 4.99847706754774e-05, + "loss": 0.4117, + "step": 12 + }, + { + "epoch": 0.1306532663316583, + "grad_norm": 1.4134124517440796, + "learning_rate": 4.993910125649561e-05, + "loss": 0.2099, + "step": 13 + }, + { + "epoch": 0.1407035175879397, + "grad_norm": 2.4718620777130127, + "learning_rate": 4.9863047384206835e-05, + "loss": 1.0762, + "step": 14 + }, + { + "epoch": 0.1507537688442211, + "grad_norm": 1.1293636560440063, + "learning_rate": 4.975670171853926e-05, + "loss": 0.338, + "step": 15 + }, + { + "epoch": 0.16080402010050251, + "grad_norm": 2.616136312484741, + "learning_rate": 4.962019382530521e-05, + "loss": 1.0328, + "step": 16 + }, + { + "epoch": 0.1708542713567839, + "grad_norm": 2.410461902618408, + "learning_rate": 4.9453690018345144e-05, + "loss": 0.863, + "step": 17 + }, + { + "epoch": 0.18090452261306533, + "grad_norm": 2.1010797023773193, + "learning_rate": 4.925739315689991e-05, + "loss": 0.6364, + "step": 18 + }, + { + "epoch": 0.19095477386934673, + "grad_norm": 1.2987828254699707, + "learning_rate": 4.9031542398457974e-05, + "loss": 0.4619, + "step": 19 + }, + { + "epoch": 0.20100502512562815, + "grad_norm": 1.2924538850784302, + "learning_rate": 4.877641290737884e-05, + "loss": 0.7118, + "step": 20 + }, + { + "epoch": 0.21105527638190955, + "grad_norm": 1.6322095394134521, + "learning_rate": 4.849231551964771e-05, + "loss": 0.936, + "step": 21 + }, + { + "epoch": 0.22110552763819097, + "grad_norm": 1.9164633750915527, + "learning_rate": 4.817959636416969e-05, + "loss": 0.8829, + "step": 22 + }, + { + "epoch": 0.23115577889447236, + "grad_norm": 1.2494964599609375, + "learning_rate": 4.783863644106502e-05, + "loss": 0.3653, + "step": 23 + }, + { + "epoch": 0.24120603015075376, + "grad_norm": 1.217826247215271, + "learning_rate": 4.7469851157479177e-05, + "loss": 0.4539, + "step": 24 + }, + { + "epoch": 0.25125628140703515, + "grad_norm": 3.2103142738342285, + "learning_rate": 4.707368982147318e-05, + "loss": 0.9621, + "step": 25 + }, + { + "epoch": 0.2613065326633166, + "grad_norm": 1.2653640508651733, + "learning_rate": 4.665063509461097e-05, + "loss": 0.448, + "step": 26 + }, + { + "epoch": 0.271356783919598, + "grad_norm": 0.9144423007965088, + "learning_rate": 4.620120240391065e-05, + "loss": 0.3957, + "step": 27 + }, + { + "epoch": 0.2814070351758794, + "grad_norm": 1.313241958618164, + "learning_rate": 4.572593931387604e-05, + "loss": 0.5468, + "step": 28 + }, + { + "epoch": 0.2914572864321608, + "grad_norm": 0.7544188499450684, + "learning_rate": 4.522542485937369e-05, + "loss": 0.185, + "step": 29 + }, + { + "epoch": 0.3015075376884422, + "grad_norm": 0.9644553661346436, + "learning_rate": 4.4700268840168045e-05, + "loss": 0.3864, + "step": 30 + }, + { + "epoch": 0.31155778894472363, + "grad_norm": 1.281360387802124, + "learning_rate": 4.415111107797445e-05, + "loss": 0.5255, + "step": 31 + }, + { + "epoch": 0.32160804020100503, + "grad_norm": 0.8318763375282288, + "learning_rate": 4.357862063693486e-05, + "loss": 0.392, + "step": 32 + }, + { + "epoch": 0.3316582914572864, + "grad_norm": 1.29972243309021, + "learning_rate": 4.2983495008466276e-05, + "loss": 0.6444, + "step": 33 + } + ], + "logging_steps": 1, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 33, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.277602042786611e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/exp0_baseline/checkpoint-33/training_args.bin b/exp0_baseline/checkpoint-33/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..5fe3afbafd0cd89fa6261f47e568e7c633b890ae --- /dev/null +++ b/exp0_baseline/checkpoint-33/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eea06f89c6a9f3345ae45b085ca42903e4044f47016fd3035bff387ef5774f26 +size 6033 diff --git a/exp0_baseline/checkpoint-66/README.md b/exp0_baseline/checkpoint-66/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp0_baseline/checkpoint-66/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp0_baseline/checkpoint-66/adapter_config.json b/exp0_baseline/checkpoint-66/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2d9e6780978efb4ab936b5f50a2afc11535e6c4b --- /dev/null +++ b/exp0_baseline/checkpoint-66/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "o_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp0_baseline/checkpoint-66/adapter_model.safetensors b/exp0_baseline/checkpoint-66/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b2c227b8c70cd4ee9423399bc6bb81743271eac4 --- /dev/null +++ b/exp0_baseline/checkpoint-66/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0346072ab4d7bbd9a9afd5b3644913d57eaf8efea013486bb1c3d484b0dad19e +size 201378736 diff --git a/exp0_baseline/checkpoint-66/optimizer.pt b/exp0_baseline/checkpoint-66/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5f987443454feb5170b49b4dafbeea9d770861fd --- /dev/null +++ b/exp0_baseline/checkpoint-66/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:073c8f4dc8278d37d2225d3a83ee995296494429a086dfb5f194e151354389fa +size 402982627 diff --git a/exp0_baseline/checkpoint-66/rng_state.pth b/exp0_baseline/checkpoint-66/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9acb6af318562be091fcb3203d32d6aa81ac4bf6 --- /dev/null +++ b/exp0_baseline/checkpoint-66/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbe8399edd28ccffb95622add1833ade04c0b6c9ff41375a11d9923ffa06e322 +size 14645 diff --git a/exp0_baseline/checkpoint-66/scheduler.pt b/exp0_baseline/checkpoint-66/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a327542ffefb0cc080cbcc215ee0039848f86e29 --- /dev/null +++ b/exp0_baseline/checkpoint-66/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2fd2e1422c94e485a1f77b0038f8f0bc12efa27617f0f3dbfaf4b9a3fd1fd59 +size 1465 diff --git a/exp0_baseline/checkpoint-66/trainer_state.json b/exp0_baseline/checkpoint-66/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a5e80f355e1e7b9f70b22a6bd5be7c64ac2ff7b2 --- /dev/null +++ b/exp0_baseline/checkpoint-66/trainer_state.json @@ -0,0 +1,496 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6633165829145728, + "eval_steps": 100, + "global_step": 66, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010050251256281407, + "grad_norm": 1.144538164138794, + "learning_rate": 0.0, + "loss": 0.561, + "step": 1 + }, + { + "epoch": 0.020100502512562814, + "grad_norm": 1.1173577308654785, + "learning_rate": 5e-06, + "loss": 0.5438, + "step": 2 + }, + { + "epoch": 0.03015075376884422, + "grad_norm": 1.6941046714782715, + "learning_rate": 1e-05, + "loss": 0.5345, + "step": 3 + }, + { + "epoch": 0.04020100502512563, + "grad_norm": 2.2526638507843018, + "learning_rate": 1.5e-05, + "loss": 0.6601, + "step": 4 + }, + { + "epoch": 0.05025125628140704, + "grad_norm": 2.336232900619507, + "learning_rate": 2e-05, + "loss": 0.8386, + "step": 5 + }, + { + "epoch": 0.06030150753768844, + "grad_norm": 1.2247791290283203, + "learning_rate": 2.5e-05, + "loss": 0.3448, + "step": 6 + }, + { + "epoch": 0.07035175879396985, + "grad_norm": 3.491952419281006, + "learning_rate": 3e-05, + "loss": 0.9107, + "step": 7 + }, + { + "epoch": 0.08040201005025126, + "grad_norm": 1.5285453796386719, + "learning_rate": 3.5e-05, + "loss": 0.5574, + "step": 8 + }, + { + "epoch": 0.09045226130653267, + "grad_norm": 1.9498869180679321, + "learning_rate": 4e-05, + "loss": 0.7468, + "step": 9 + }, + { + "epoch": 0.10050251256281408, + "grad_norm": 1.451749563217163, + "learning_rate": 4.5e-05, + "loss": 0.7601, + "step": 10 + }, + { + "epoch": 0.11055276381909548, + "grad_norm": 2.121290683746338, + "learning_rate": 5e-05, + "loss": 0.788, + "step": 11 + }, + { + "epoch": 0.12060301507537688, + "grad_norm": 1.173128604888916, + "learning_rate": 4.99847706754774e-05, + "loss": 0.4117, + "step": 12 + }, + { + "epoch": 0.1306532663316583, + "grad_norm": 1.4134124517440796, + "learning_rate": 4.993910125649561e-05, + "loss": 0.2099, + "step": 13 + }, + { + "epoch": 0.1407035175879397, + "grad_norm": 2.4718620777130127, + "learning_rate": 4.9863047384206835e-05, + "loss": 1.0762, + "step": 14 + }, + { + "epoch": 0.1507537688442211, + "grad_norm": 1.1293636560440063, + "learning_rate": 4.975670171853926e-05, + "loss": 0.338, + "step": 15 + }, + { + "epoch": 0.16080402010050251, + "grad_norm": 2.616136312484741, + "learning_rate": 4.962019382530521e-05, + "loss": 1.0328, + "step": 16 + }, + { + "epoch": 0.1708542713567839, + "grad_norm": 2.410461902618408, + "learning_rate": 4.9453690018345144e-05, + "loss": 0.863, + "step": 17 + }, + { + "epoch": 0.18090452261306533, + "grad_norm": 2.1010797023773193, + "learning_rate": 4.925739315689991e-05, + "loss": 0.6364, + "step": 18 + }, + { + "epoch": 0.19095477386934673, + "grad_norm": 1.2987828254699707, + "learning_rate": 4.9031542398457974e-05, + "loss": 0.4619, + "step": 19 + }, + { + "epoch": 0.20100502512562815, + "grad_norm": 1.2924538850784302, + "learning_rate": 4.877641290737884e-05, + "loss": 0.7118, + "step": 20 + }, + { + "epoch": 0.21105527638190955, + "grad_norm": 1.6322095394134521, + "learning_rate": 4.849231551964771e-05, + "loss": 0.936, + "step": 21 + }, + { + "epoch": 0.22110552763819097, + "grad_norm": 1.9164633750915527, + "learning_rate": 4.817959636416969e-05, + "loss": 0.8829, + "step": 22 + }, + { + "epoch": 0.23115577889447236, + "grad_norm": 1.2494964599609375, + "learning_rate": 4.783863644106502e-05, + "loss": 0.3653, + "step": 23 + }, + { + "epoch": 0.24120603015075376, + "grad_norm": 1.217826247215271, + "learning_rate": 4.7469851157479177e-05, + "loss": 0.4539, + "step": 24 + }, + { + "epoch": 0.25125628140703515, + "grad_norm": 3.2103142738342285, + "learning_rate": 4.707368982147318e-05, + "loss": 0.9621, + "step": 25 + }, + { + "epoch": 0.2613065326633166, + "grad_norm": 1.2653640508651733, + "learning_rate": 4.665063509461097e-05, + "loss": 0.448, + "step": 26 + }, + { + "epoch": 0.271356783919598, + "grad_norm": 0.9144423007965088, + "learning_rate": 4.620120240391065e-05, + "loss": 0.3957, + "step": 27 + }, + { + "epoch": 0.2814070351758794, + "grad_norm": 1.313241958618164, + "learning_rate": 4.572593931387604e-05, + "loss": 0.5468, + "step": 28 + }, + { + "epoch": 0.2914572864321608, + "grad_norm": 0.7544188499450684, + "learning_rate": 4.522542485937369e-05, + "loss": 0.185, + "step": 29 + }, + { + "epoch": 0.3015075376884422, + "grad_norm": 0.9644553661346436, + "learning_rate": 4.4700268840168045e-05, + "loss": 0.3864, + "step": 30 + }, + { + "epoch": 0.31155778894472363, + "grad_norm": 1.281360387802124, + "learning_rate": 4.415111107797445e-05, + "loss": 0.5255, + "step": 31 + }, + { + "epoch": 0.32160804020100503, + "grad_norm": 0.8318763375282288, + "learning_rate": 4.357862063693486e-05, + "loss": 0.392, + "step": 32 + }, + { + "epoch": 0.3316582914572864, + "grad_norm": 1.29972243309021, + "learning_rate": 4.2983495008466276e-05, + "loss": 0.6444, + "step": 33 + }, + { + "epoch": 0.3417085427135678, + "grad_norm": 1.3421469926834106, + "learning_rate": 4.2366459261474933e-05, + "loss": 0.3876, + "step": 34 + }, + { + "epoch": 0.35175879396984927, + "grad_norm": 0.9718002676963806, + "learning_rate": 4.172826515897146e-05, + "loss": 0.5414, + "step": 35 + }, + { + "epoch": 0.36180904522613067, + "grad_norm": 0.9774896502494812, + "learning_rate": 4.1069690242163484e-05, + "loss": 0.4928, + "step": 36 + }, + { + "epoch": 0.37185929648241206, + "grad_norm": 3.244431257247925, + "learning_rate": 4.039153688314145e-05, + "loss": 1.0665, + "step": 37 + }, + { + "epoch": 0.38190954773869346, + "grad_norm": 0.9365755319595337, + "learning_rate": 3.969463130731183e-05, + "loss": 0.3107, + "step": 38 + }, + { + "epoch": 0.39195979899497485, + "grad_norm": 1.4368454217910767, + "learning_rate": 3.897982258676867e-05, + "loss": 0.4084, + "step": 39 + }, + { + "epoch": 0.4020100502512563, + "grad_norm": 2.354626417160034, + "learning_rate": 3.824798160583012e-05, + "loss": 0.4833, + "step": 40 + }, + { + "epoch": 0.4120603015075377, + "grad_norm": 2.426787853240967, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.548, + "step": 41 + }, + { + "epoch": 0.4221105527638191, + "grad_norm": 1.3904081583023071, + "learning_rate": 3.673678906964727e-05, + "loss": 0.847, + "step": 42 + }, + { + "epoch": 0.4321608040201005, + "grad_norm": 0.8043186664581299, + "learning_rate": 3.5959278669726935e-05, + "loss": 0.3826, + "step": 43 + }, + { + "epoch": 0.44221105527638194, + "grad_norm": 0.9501510858535767, + "learning_rate": 3.516841607689501e-05, + "loss": 0.2336, + "step": 44 + }, + { + "epoch": 0.45226130653266333, + "grad_norm": 2.477350950241089, + "learning_rate": 3.436516483539781e-05, + "loss": 0.8067, + "step": 45 + }, + { + "epoch": 0.4623115577889447, + "grad_norm": 1.3030426502227783, + "learning_rate": 3.355050358314172e-05, + "loss": 0.4237, + "step": 46 + }, + { + "epoch": 0.4723618090452261, + "grad_norm": 2.6159799098968506, + "learning_rate": 3.272542485937369e-05, + "loss": 0.5104, + "step": 47 + }, + { + "epoch": 0.4824120603015075, + "grad_norm": 1.256778597831726, + "learning_rate": 3.1890933895424976e-05, + "loss": 0.235, + "step": 48 + }, + { + "epoch": 0.49246231155778897, + "grad_norm": 2.076829195022583, + "learning_rate": 3.104804738999169e-05, + "loss": 0.5265, + "step": 49 + }, + { + "epoch": 0.5025125628140703, + "grad_norm": 1.3327422142028809, + "learning_rate": 3.0197792270443982e-05, + "loss": 0.5636, + "step": 50 + }, + { + "epoch": 0.5125628140703518, + "grad_norm": 1.7859450578689575, + "learning_rate": 2.9341204441673266e-05, + "loss": 0.67, + "step": 51 + }, + { + "epoch": 0.5226130653266332, + "grad_norm": 1.3924661874771118, + "learning_rate": 2.8479327524001636e-05, + "loss": 0.6816, + "step": 52 + }, + { + "epoch": 0.5326633165829145, + "grad_norm": 1.4186939001083374, + "learning_rate": 2.761321158169134e-05, + "loss": 0.3969, + "step": 53 + }, + { + "epoch": 0.542713567839196, + "grad_norm": 2.0062601566314697, + "learning_rate": 2.674391184360313e-05, + "loss": 0.3101, + "step": 54 + }, + { + "epoch": 0.5527638190954773, + "grad_norm": 1.4015522003173828, + "learning_rate": 2.587248741756253e-05, + "loss": 0.2104, + "step": 55 + }, + { + "epoch": 0.5628140703517588, + "grad_norm": 1.5075968503952026, + "learning_rate": 2.5e-05, + "loss": 0.5247, + "step": 56 + }, + { + "epoch": 0.5728643216080402, + "grad_norm": 3.6213722229003906, + "learning_rate": 2.4127512582437485e-05, + "loss": 0.8403, + "step": 57 + }, + { + "epoch": 0.5829145728643216, + "grad_norm": 5.086165904998779, + "learning_rate": 2.3256088156396868e-05, + "loss": 0.9748, + "step": 58 + }, + { + "epoch": 0.592964824120603, + "grad_norm": 0.8291557431221008, + "learning_rate": 2.238678841830867e-05, + "loss": 0.1264, + "step": 59 + }, + { + "epoch": 0.6030150753768844, + "grad_norm": 6.901468753814697, + "learning_rate": 2.1520672475998373e-05, + "loss": 1.7948, + "step": 60 + }, + { + "epoch": 0.6130653266331658, + "grad_norm": 1.2166105508804321, + "learning_rate": 2.0658795558326743e-05, + "loss": 0.3152, + "step": 61 + }, + { + "epoch": 0.6231155778894473, + "grad_norm": 4.400286674499512, + "learning_rate": 1.980220772955602e-05, + "loss": 1.6227, + "step": 62 + }, + { + "epoch": 0.6331658291457286, + "grad_norm": 2.3584349155426025, + "learning_rate": 1.895195261000831e-05, + "loss": 0.6485, + "step": 63 + }, + { + "epoch": 0.6432160804020101, + "grad_norm": 3.535404682159424, + "learning_rate": 1.8109066104575023e-05, + "loss": 1.1718, + "step": 64 + }, + { + "epoch": 0.6532663316582915, + "grad_norm": 1.4264318943023682, + "learning_rate": 1.7274575140626318e-05, + "loss": 0.3102, + "step": 65 + }, + { + "epoch": 0.6633165829145728, + "grad_norm": 1.9855120182037354, + "learning_rate": 1.6449496416858284e-05, + "loss": 0.405, + "step": 66 + } + ], + "logging_steps": 1, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 33, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.555204085573222e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/exp0_baseline/checkpoint-66/training_args.bin b/exp0_baseline/checkpoint-66/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..5fe3afbafd0cd89fa6261f47e568e7c633b890ae --- /dev/null +++ b/exp0_baseline/checkpoint-66/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eea06f89c6a9f3345ae45b085ca42903e4044f47016fd3035bff387ef5774f26 +size 6033 diff --git a/exp0_baseline/checkpoint-99/README.md b/exp0_baseline/checkpoint-99/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp0_baseline/checkpoint-99/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp0_baseline/checkpoint-99/adapter_config.json b/exp0_baseline/checkpoint-99/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2d9e6780978efb4ab936b5f50a2afc11535e6c4b --- /dev/null +++ b/exp0_baseline/checkpoint-99/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "o_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp0_baseline/checkpoint-99/adapter_model.safetensors b/exp0_baseline/checkpoint-99/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f016b1e93bd61c3c538a5cbe904dbf1db141838a --- /dev/null +++ b/exp0_baseline/checkpoint-99/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bea1a4945e24e24fe59703e6682e385bd4872796f62a1512b92dbd3c243a4db8 +size 201378736 diff --git a/exp0_baseline/checkpoint-99/optimizer.pt b/exp0_baseline/checkpoint-99/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a0fbdc9f4f855f814f849d04291e175bdb810008 --- /dev/null +++ b/exp0_baseline/checkpoint-99/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48cffc2fd316bc098cfc6ef453f9b26c302bfcc79ef1d3863ad61734bb3d2170 +size 402982627 diff --git a/exp0_baseline/checkpoint-99/rng_state.pth b/exp0_baseline/checkpoint-99/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ef5ac8da68afb8bea8ad1331116be95d18fbb182 --- /dev/null +++ b/exp0_baseline/checkpoint-99/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b293338027ef39107c758bc2300d51015b6a878a9f0e09fe1822d419dcbb163e +size 14645 diff --git a/exp0_baseline/checkpoint-99/scheduler.pt b/exp0_baseline/checkpoint-99/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7a035695abab34b6d7759d2abb6c583da37754d5 --- /dev/null +++ b/exp0_baseline/checkpoint-99/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ba8e76678603d3d1769a2ed6315c7bfd1edb3a261c537a3ff7c1689618cf725 +size 1465 diff --git a/exp0_baseline/checkpoint-99/trainer_state.json b/exp0_baseline/checkpoint-99/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4b6c4a358f423840a3d352550f92458981f41a6c --- /dev/null +++ b/exp0_baseline/checkpoint-99/trainer_state.json @@ -0,0 +1,727 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9949748743718593, + "eval_steps": 100, + "global_step": 99, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010050251256281407, + "grad_norm": 1.144538164138794, + "learning_rate": 0.0, + "loss": 0.561, + "step": 1 + }, + { + "epoch": 0.020100502512562814, + "grad_norm": 1.1173577308654785, + "learning_rate": 5e-06, + "loss": 0.5438, + "step": 2 + }, + { + "epoch": 0.03015075376884422, + "grad_norm": 1.6941046714782715, + "learning_rate": 1e-05, + "loss": 0.5345, + "step": 3 + }, + { + "epoch": 0.04020100502512563, + "grad_norm": 2.2526638507843018, + "learning_rate": 1.5e-05, + "loss": 0.6601, + "step": 4 + }, + { + "epoch": 0.05025125628140704, + "grad_norm": 2.336232900619507, + "learning_rate": 2e-05, + "loss": 0.8386, + "step": 5 + }, + { + "epoch": 0.06030150753768844, + "grad_norm": 1.2247791290283203, + "learning_rate": 2.5e-05, + "loss": 0.3448, + "step": 6 + }, + { + "epoch": 0.07035175879396985, + "grad_norm": 3.491952419281006, + "learning_rate": 3e-05, + "loss": 0.9107, + "step": 7 + }, + { + "epoch": 0.08040201005025126, + "grad_norm": 1.5285453796386719, + "learning_rate": 3.5e-05, + "loss": 0.5574, + "step": 8 + }, + { + "epoch": 0.09045226130653267, + "grad_norm": 1.9498869180679321, + "learning_rate": 4e-05, + "loss": 0.7468, + "step": 9 + }, + { + "epoch": 0.10050251256281408, + "grad_norm": 1.451749563217163, + "learning_rate": 4.5e-05, + "loss": 0.7601, + "step": 10 + }, + { + "epoch": 0.11055276381909548, + "grad_norm": 2.121290683746338, + "learning_rate": 5e-05, + "loss": 0.788, + "step": 11 + }, + { + "epoch": 0.12060301507537688, + "grad_norm": 1.173128604888916, + "learning_rate": 4.99847706754774e-05, + "loss": 0.4117, + "step": 12 + }, + { + "epoch": 0.1306532663316583, + "grad_norm": 1.4134124517440796, + "learning_rate": 4.993910125649561e-05, + "loss": 0.2099, + "step": 13 + }, + { + "epoch": 0.1407035175879397, + "grad_norm": 2.4718620777130127, + "learning_rate": 4.9863047384206835e-05, + "loss": 1.0762, + "step": 14 + }, + { + "epoch": 0.1507537688442211, + "grad_norm": 1.1293636560440063, + "learning_rate": 4.975670171853926e-05, + "loss": 0.338, + "step": 15 + }, + { + "epoch": 0.16080402010050251, + "grad_norm": 2.616136312484741, + "learning_rate": 4.962019382530521e-05, + "loss": 1.0328, + "step": 16 + }, + { + "epoch": 0.1708542713567839, + "grad_norm": 2.410461902618408, + "learning_rate": 4.9453690018345144e-05, + "loss": 0.863, + "step": 17 + }, + { + "epoch": 0.18090452261306533, + "grad_norm": 2.1010797023773193, + "learning_rate": 4.925739315689991e-05, + "loss": 0.6364, + "step": 18 + }, + { + "epoch": 0.19095477386934673, + "grad_norm": 1.2987828254699707, + "learning_rate": 4.9031542398457974e-05, + "loss": 0.4619, + "step": 19 + }, + { + "epoch": 0.20100502512562815, + "grad_norm": 1.2924538850784302, + "learning_rate": 4.877641290737884e-05, + "loss": 0.7118, + "step": 20 + }, + { + "epoch": 0.21105527638190955, + "grad_norm": 1.6322095394134521, + "learning_rate": 4.849231551964771e-05, + "loss": 0.936, + "step": 21 + }, + { + "epoch": 0.22110552763819097, + "grad_norm": 1.9164633750915527, + "learning_rate": 4.817959636416969e-05, + "loss": 0.8829, + "step": 22 + }, + { + "epoch": 0.23115577889447236, + "grad_norm": 1.2494964599609375, + "learning_rate": 4.783863644106502e-05, + "loss": 0.3653, + "step": 23 + }, + { + "epoch": 0.24120603015075376, + "grad_norm": 1.217826247215271, + "learning_rate": 4.7469851157479177e-05, + "loss": 0.4539, + "step": 24 + }, + { + "epoch": 0.25125628140703515, + "grad_norm": 3.2103142738342285, + "learning_rate": 4.707368982147318e-05, + "loss": 0.9621, + "step": 25 + }, + { + "epoch": 0.2613065326633166, + "grad_norm": 1.2653640508651733, + "learning_rate": 4.665063509461097e-05, + "loss": 0.448, + "step": 26 + }, + { + "epoch": 0.271356783919598, + "grad_norm": 0.9144423007965088, + "learning_rate": 4.620120240391065e-05, + "loss": 0.3957, + "step": 27 + }, + { + "epoch": 0.2814070351758794, + "grad_norm": 1.313241958618164, + "learning_rate": 4.572593931387604e-05, + "loss": 0.5468, + "step": 28 + }, + { + "epoch": 0.2914572864321608, + "grad_norm": 0.7544188499450684, + "learning_rate": 4.522542485937369e-05, + "loss": 0.185, + "step": 29 + }, + { + "epoch": 0.3015075376884422, + "grad_norm": 0.9644553661346436, + "learning_rate": 4.4700268840168045e-05, + "loss": 0.3864, + "step": 30 + }, + { + "epoch": 0.31155778894472363, + "grad_norm": 1.281360387802124, + "learning_rate": 4.415111107797445e-05, + "loss": 0.5255, + "step": 31 + }, + { + "epoch": 0.32160804020100503, + "grad_norm": 0.8318763375282288, + "learning_rate": 4.357862063693486e-05, + "loss": 0.392, + "step": 32 + }, + { + "epoch": 0.3316582914572864, + "grad_norm": 1.29972243309021, + "learning_rate": 4.2983495008466276e-05, + "loss": 0.6444, + "step": 33 + }, + { + "epoch": 0.3417085427135678, + "grad_norm": 1.3421469926834106, + "learning_rate": 4.2366459261474933e-05, + "loss": 0.3876, + "step": 34 + }, + { + "epoch": 0.35175879396984927, + "grad_norm": 0.9718002676963806, + "learning_rate": 4.172826515897146e-05, + "loss": 0.5414, + "step": 35 + }, + { + "epoch": 0.36180904522613067, + "grad_norm": 0.9774896502494812, + "learning_rate": 4.1069690242163484e-05, + "loss": 0.4928, + "step": 36 + }, + { + "epoch": 0.37185929648241206, + "grad_norm": 3.244431257247925, + "learning_rate": 4.039153688314145e-05, + "loss": 1.0665, + "step": 37 + }, + { + "epoch": 0.38190954773869346, + "grad_norm": 0.9365755319595337, + "learning_rate": 3.969463130731183e-05, + "loss": 0.3107, + "step": 38 + }, + { + "epoch": 0.39195979899497485, + "grad_norm": 1.4368454217910767, + "learning_rate": 3.897982258676867e-05, + "loss": 0.4084, + "step": 39 + }, + { + "epoch": 0.4020100502512563, + "grad_norm": 2.354626417160034, + "learning_rate": 3.824798160583012e-05, + "loss": 0.4833, + "step": 40 + }, + { + "epoch": 0.4120603015075377, + "grad_norm": 2.426787853240967, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.548, + "step": 41 + }, + { + "epoch": 0.4221105527638191, + "grad_norm": 1.3904081583023071, + "learning_rate": 3.673678906964727e-05, + "loss": 0.847, + "step": 42 + }, + { + "epoch": 0.4321608040201005, + "grad_norm": 0.8043186664581299, + "learning_rate": 3.5959278669726935e-05, + "loss": 0.3826, + "step": 43 + }, + { + "epoch": 0.44221105527638194, + "grad_norm": 0.9501510858535767, + "learning_rate": 3.516841607689501e-05, + "loss": 0.2336, + "step": 44 + }, + { + "epoch": 0.45226130653266333, + "grad_norm": 2.477350950241089, + "learning_rate": 3.436516483539781e-05, + "loss": 0.8067, + "step": 45 + }, + { + "epoch": 0.4623115577889447, + "grad_norm": 1.3030426502227783, + "learning_rate": 3.355050358314172e-05, + "loss": 0.4237, + "step": 46 + }, + { + "epoch": 0.4723618090452261, + "grad_norm": 2.6159799098968506, + "learning_rate": 3.272542485937369e-05, + "loss": 0.5104, + "step": 47 + }, + { + "epoch": 0.4824120603015075, + "grad_norm": 1.256778597831726, + "learning_rate": 3.1890933895424976e-05, + "loss": 0.235, + "step": 48 + }, + { + "epoch": 0.49246231155778897, + "grad_norm": 2.076829195022583, + "learning_rate": 3.104804738999169e-05, + "loss": 0.5265, + "step": 49 + }, + { + "epoch": 0.5025125628140703, + "grad_norm": 1.3327422142028809, + "learning_rate": 3.0197792270443982e-05, + "loss": 0.5636, + "step": 50 + }, + { + "epoch": 0.5125628140703518, + "grad_norm": 1.7859450578689575, + "learning_rate": 2.9341204441673266e-05, + "loss": 0.67, + "step": 51 + }, + { + "epoch": 0.5226130653266332, + "grad_norm": 1.3924661874771118, + "learning_rate": 2.8479327524001636e-05, + "loss": 0.6816, + "step": 52 + }, + { + "epoch": 0.5326633165829145, + "grad_norm": 1.4186939001083374, + "learning_rate": 2.761321158169134e-05, + "loss": 0.3969, + "step": 53 + }, + { + "epoch": 0.542713567839196, + "grad_norm": 2.0062601566314697, + "learning_rate": 2.674391184360313e-05, + "loss": 0.3101, + "step": 54 + }, + { + "epoch": 0.5527638190954773, + "grad_norm": 1.4015522003173828, + "learning_rate": 2.587248741756253e-05, + "loss": 0.2104, + "step": 55 + }, + { + "epoch": 0.5628140703517588, + "grad_norm": 1.5075968503952026, + "learning_rate": 2.5e-05, + "loss": 0.5247, + "step": 56 + }, + { + "epoch": 0.5728643216080402, + "grad_norm": 3.6213722229003906, + "learning_rate": 2.4127512582437485e-05, + "loss": 0.8403, + "step": 57 + }, + { + "epoch": 0.5829145728643216, + "grad_norm": 5.086165904998779, + "learning_rate": 2.3256088156396868e-05, + "loss": 0.9748, + "step": 58 + }, + { + "epoch": 0.592964824120603, + "grad_norm": 0.8291557431221008, + "learning_rate": 2.238678841830867e-05, + "loss": 0.1264, + "step": 59 + }, + { + "epoch": 0.6030150753768844, + "grad_norm": 6.901468753814697, + "learning_rate": 2.1520672475998373e-05, + "loss": 1.7948, + "step": 60 + }, + { + "epoch": 0.6130653266331658, + "grad_norm": 1.2166105508804321, + "learning_rate": 2.0658795558326743e-05, + "loss": 0.3152, + "step": 61 + }, + { + "epoch": 0.6231155778894473, + "grad_norm": 4.400286674499512, + "learning_rate": 1.980220772955602e-05, + "loss": 1.6227, + "step": 62 + }, + { + "epoch": 0.6331658291457286, + "grad_norm": 2.3584349155426025, + "learning_rate": 1.895195261000831e-05, + "loss": 0.6485, + "step": 63 + }, + { + "epoch": 0.6432160804020101, + "grad_norm": 3.535404682159424, + "learning_rate": 1.8109066104575023e-05, + "loss": 1.1718, + "step": 64 + }, + { + "epoch": 0.6532663316582915, + "grad_norm": 1.4264318943023682, + "learning_rate": 1.7274575140626318e-05, + "loss": 0.3102, + "step": 65 + }, + { + "epoch": 0.6633165829145728, + "grad_norm": 1.9855120182037354, + "learning_rate": 1.6449496416858284e-05, + "loss": 0.405, + "step": 66 + }, + { + "epoch": 0.6733668341708543, + "grad_norm": 0.8108851909637451, + "learning_rate": 1.56348351646022e-05, + "loss": 0.1325, + "step": 67 + }, + { + "epoch": 0.6834170854271356, + "grad_norm": 4.136630535125732, + "learning_rate": 1.4831583923104999e-05, + "loss": 0.968, + "step": 68 + }, + { + "epoch": 0.6934673366834171, + "grad_norm": 0.9361010789871216, + "learning_rate": 1.4040721330273062e-05, + "loss": 0.2457, + "step": 69 + }, + { + "epoch": 0.7035175879396985, + "grad_norm": 3.343045473098755, + "learning_rate": 1.3263210930352737e-05, + "loss": 0.7569, + "step": 70 + }, + { + "epoch": 0.7135678391959799, + "grad_norm": 1.0723439455032349, + "learning_rate": 1.2500000000000006e-05, + "loss": 0.1777, + "step": 71 + }, + { + "epoch": 0.7236180904522613, + "grad_norm": 1.3722829818725586, + "learning_rate": 1.175201839416988e-05, + "loss": 0.3186, + "step": 72 + }, + { + "epoch": 0.7336683417085427, + "grad_norm": 2.0538363456726074, + "learning_rate": 1.1020177413231334e-05, + "loss": 0.7053, + "step": 73 + }, + { + "epoch": 0.7437185929648241, + "grad_norm": 1.7194602489471436, + "learning_rate": 1.0305368692688174e-05, + "loss": 0.4186, + "step": 74 + }, + { + "epoch": 0.7537688442211056, + "grad_norm": 1.9144634008407593, + "learning_rate": 9.608463116858542e-06, + "loss": 0.4359, + "step": 75 + }, + { + "epoch": 0.7638190954773869, + "grad_norm": 1.8238540887832642, + "learning_rate": 8.930309757836517e-06, + "loss": 0.3578, + "step": 76 + }, + { + "epoch": 0.7738693467336684, + "grad_norm": 1.620219349861145, + "learning_rate": 8.271734841028553e-06, + "loss": 0.6803, + "step": 77 + }, + { + "epoch": 0.7839195979899497, + "grad_norm": 1.2236140966415405, + "learning_rate": 7.633540738525066e-06, + "loss": 0.285, + "step": 78 + }, + { + "epoch": 0.7939698492462312, + "grad_norm": 2.433211326599121, + "learning_rate": 7.016504991533726e-06, + "loss": 0.3922, + "step": 79 + }, + { + "epoch": 0.8040201005025126, + "grad_norm": 1.6254342794418335, + "learning_rate": 6.421379363065142e-06, + "loss": 0.6371, + "step": 80 + }, + { + "epoch": 0.8140703517587939, + "grad_norm": 0.95794677734375, + "learning_rate": 5.848888922025553e-06, + "loss": 0.4203, + "step": 81 + }, + { + "epoch": 0.8241206030150754, + "grad_norm": 2.8286194801330566, + "learning_rate": 5.299731159831953e-06, + "loss": 0.7793, + "step": 82 + }, + { + "epoch": 0.8341708542713567, + "grad_norm": 2.440025806427002, + "learning_rate": 4.7745751406263165e-06, + "loss": 0.7263, + "step": 83 + }, + { + "epoch": 0.8442211055276382, + "grad_norm": 2.440507173538208, + "learning_rate": 4.274060686123959e-06, + "loss": 0.4484, + "step": 84 + }, + { + "epoch": 0.8542713567839196, + "grad_norm": 1.1934988498687744, + "learning_rate": 3.798797596089351e-06, + "loss": 0.2335, + "step": 85 + }, + { + "epoch": 0.864321608040201, + "grad_norm": 1.1017628908157349, + "learning_rate": 3.3493649053890326e-06, + "loss": 0.3244, + "step": 86 + }, + { + "epoch": 0.8743718592964824, + "grad_norm": 1.3522121906280518, + "learning_rate": 2.9263101785268254e-06, + "loss": 0.3025, + "step": 87 + }, + { + "epoch": 0.8844221105527639, + "grad_norm": 1.3555244207382202, + "learning_rate": 2.5301488425208296e-06, + "loss": 0.2563, + "step": 88 + }, + { + "epoch": 0.8944723618090452, + "grad_norm": 1.7802023887634277, + "learning_rate": 2.1613635589349756e-06, + "loss": 0.8291, + "step": 89 + }, + { + "epoch": 0.9045226130653267, + "grad_norm": 1.5431946516036987, + "learning_rate": 1.8204036358303173e-06, + "loss": 0.496, + "step": 90 + }, + { + "epoch": 0.914572864321608, + "grad_norm": 3.552048444747925, + "learning_rate": 1.5076844803522922e-06, + "loss": 0.6235, + "step": 91 + }, + { + "epoch": 0.9246231155778895, + "grad_norm": 1.6633059978485107, + "learning_rate": 1.2235870926211619e-06, + "loss": 0.3562, + "step": 92 + }, + { + "epoch": 0.9346733668341709, + "grad_norm": 1.6059434413909912, + "learning_rate": 9.684576015420278e-07, + "loss": 0.5505, + "step": 93 + }, + { + "epoch": 0.9447236180904522, + "grad_norm": 1.9522382020950317, + "learning_rate": 7.426068431000882e-07, + "loss": 0.3748, + "step": 94 + }, + { + "epoch": 0.9547738693467337, + "grad_norm": 3.010096788406372, + "learning_rate": 5.463099816548579e-07, + "loss": 0.6046, + "step": 95 + }, + { + "epoch": 0.964824120603015, + "grad_norm": 2.7505385875701904, + "learning_rate": 3.7980617469479953e-07, + "loss": 0.8919, + "step": 96 + }, + { + "epoch": 0.9748743718592965, + "grad_norm": 1.0584425926208496, + "learning_rate": 2.4329828146074095e-07, + "loss": 0.3999, + "step": 97 + }, + { + "epoch": 0.9849246231155779, + "grad_norm": 1.4713913202285767, + "learning_rate": 1.3695261579316777e-07, + "loss": 0.4368, + "step": 98 + }, + { + "epoch": 0.9949748743718593, + "grad_norm": 1.4613982439041138, + "learning_rate": 6.089874350439506e-08, + "loss": 0.3824, + "step": 99 + } + ], + "logging_steps": 1, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 33, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6.832806128359834e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/exp0_baseline/checkpoint-99/training_args.bin b/exp0_baseline/checkpoint-99/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..5fe3afbafd0cd89fa6261f47e568e7c633b890ae --- /dev/null +++ b/exp0_baseline/checkpoint-99/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eea06f89c6a9f3345ae45b085ca42903e4044f47016fd3035bff387ef5774f26 +size 6033 diff --git a/exp0_baseline/final_model/README.md b/exp0_baseline/final_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp0_baseline/final_model/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp0_baseline/final_model/adapter_config.json b/exp0_baseline/final_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2d9e6780978efb4ab936b5f50a2afc11535e6c4b --- /dev/null +++ b/exp0_baseline/final_model/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "o_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp0_baseline/final_model/adapter_model.safetensors b/exp0_baseline/final_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e01667669b403df3818f8a188a7cb3bf26b196b1 --- /dev/null +++ b/exp0_baseline/final_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8209e0cbb700204af15187718e31808cf6e3166b8065c4c05ac7864dc693371 +size 201378736 diff --git a/exp0_baseline/final_model/training_args.bin b/exp0_baseline/final_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..5fe3afbafd0cd89fa6261f47e568e7c633b890ae --- /dev/null +++ b/exp0_baseline/final_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eea06f89c6a9f3345ae45b085ca42903e4044f47016fd3035bff387ef5774f26 +size 6033 diff --git a/exp1_signal_c_100/checkpoint-100/README.md b/exp1_signal_c_100/checkpoint-100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp1_signal_c_100/checkpoint-100/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp1_signal_c_100/checkpoint-100/adapter_config.json b/exp1_signal_c_100/checkpoint-100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..05fb68ca4a7e988cacbcf078beb813a265eb0d53 --- /dev/null +++ b/exp1_signal_c_100/checkpoint-100/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "o_proj", + "q_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp1_signal_c_100/checkpoint-100/adapter_model.safetensors b/exp1_signal_c_100/checkpoint-100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4a1a9c7655812eeab9833db157302c516b781527 --- /dev/null +++ b/exp1_signal_c_100/checkpoint-100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d06d5d1fb081e5847792d8a5a3630488d31de5ea5b5f3835e79c4ec1c3f5e65 +size 201378736 diff --git a/exp1_signal_c_100/checkpoint-100/optimizer.pt b/exp1_signal_c_100/checkpoint-100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..fe169f6221883c302e0212b5e8a751178fd754a0 --- /dev/null +++ b/exp1_signal_c_100/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a81531c61562ce6bf2f54003446cd3bb82829259c65125b09eb2259404ce87b +size 402982627 diff --git a/exp1_signal_c_100/checkpoint-100/rng_state.pth b/exp1_signal_c_100/checkpoint-100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..6ce9c097d9e48ec448fc9e906660769067929aa3 --- /dev/null +++ b/exp1_signal_c_100/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b92ede016a8dc313d233f2927862e909d5717a49b27265ddd87beb7b3aab3357 +size 14645 diff --git a/exp1_signal_c_100/checkpoint-100/scheduler.pt b/exp1_signal_c_100/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8fdcc1fbec5336295a8ad32e6fb37c9646d5a557 --- /dev/null +++ b/exp1_signal_c_100/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c4e44404b58ce3af1b46c3d4a85a59edbbc386f340c476e894715a1199e1aed +size 1465 diff --git a/exp1_signal_c_100/checkpoint-100/trainer_state.json b/exp1_signal_c_100/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..38b90a0ba5f93105f58843ee96e3b989e4733641 --- /dev/null +++ b/exp1_signal_c_100/checkpoint-100/trainer_state.json @@ -0,0 +1,742 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 100, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010050251256281407, + "grad_norm": 2.0252811908721924, + "learning_rate": 0.0, + "loss": 0.6131, + "step": 1 + }, + { + "epoch": 0.020100502512562814, + "grad_norm": 2.2184908390045166, + "learning_rate": 5e-06, + "loss": 0.8686, + "step": 2 + }, + { + "epoch": 0.03015075376884422, + "grad_norm": 2.4873640537261963, + "learning_rate": 1e-05, + "loss": 0.5164, + "step": 3 + }, + { + "epoch": 0.04020100502512563, + "grad_norm": 2.624083995819092, + "learning_rate": 1.5e-05, + "loss": 1.002, + "step": 4 + }, + { + "epoch": 0.05025125628140704, + "grad_norm": 1.3904463052749634, + "learning_rate": 2e-05, + "loss": 0.6515, + "step": 5 + }, + { + "epoch": 0.06030150753768844, + "grad_norm": 3.1512720584869385, + "learning_rate": 2.5e-05, + "loss": 0.9144, + "step": 6 + }, + { + "epoch": 0.07035175879396985, + "grad_norm": 1.309517502784729, + "learning_rate": 3e-05, + "loss": 0.5179, + "step": 7 + }, + { + "epoch": 0.08040201005025126, + "grad_norm": 1.8838770389556885, + "learning_rate": 3.5e-05, + "loss": 0.4935, + "step": 8 + }, + { + "epoch": 0.09045226130653267, + "grad_norm": 2.1511077880859375, + "learning_rate": 4e-05, + "loss": 0.8218, + "step": 9 + }, + { + "epoch": 0.10050251256281408, + "grad_norm": 1.338928461074829, + "learning_rate": 4.5e-05, + "loss": 0.6049, + "step": 10 + }, + { + "epoch": 0.11055276381909548, + "grad_norm": 2.3781137466430664, + "learning_rate": 5e-05, + "loss": 0.4984, + "step": 11 + }, + { + "epoch": 0.12060301507537688, + "grad_norm": 0.8085482716560364, + "learning_rate": 4.99847706754774e-05, + "loss": 0.3011, + "step": 12 + }, + { + "epoch": 0.1306532663316583, + "grad_norm": 2.7491891384124756, + "learning_rate": 4.993910125649561e-05, + "loss": 0.4187, + "step": 13 + }, + { + "epoch": 0.1407035175879397, + "grad_norm": 2.329538583755493, + "learning_rate": 4.9863047384206835e-05, + "loss": 0.5481, + "step": 14 + }, + { + "epoch": 0.1507537688442211, + "grad_norm": 1.2953161001205444, + "learning_rate": 4.975670171853926e-05, + "loss": 0.7587, + "step": 15 + }, + { + "epoch": 0.16080402010050251, + "grad_norm": 1.5520743131637573, + "learning_rate": 4.962019382530521e-05, + "loss": 0.7993, + "step": 16 + }, + { + "epoch": 0.1708542713567839, + "grad_norm": 1.7542706727981567, + "learning_rate": 4.9453690018345144e-05, + "loss": 0.791, + "step": 17 + }, + { + "epoch": 0.18090452261306533, + "grad_norm": 1.2422624826431274, + "learning_rate": 4.925739315689991e-05, + "loss": 0.5716, + "step": 18 + }, + { + "epoch": 0.19095477386934673, + "grad_norm": 2.116292953491211, + "learning_rate": 4.9031542398457974e-05, + "loss": 0.552, + "step": 19 + }, + { + "epoch": 0.20100502512562815, + "grad_norm": 2.5214879512786865, + "learning_rate": 4.877641290737884e-05, + "loss": 1.0288, + "step": 20 + }, + { + "epoch": 0.21105527638190955, + "grad_norm": 3.0591695308685303, + "learning_rate": 4.849231551964771e-05, + "loss": 0.9893, + "step": 21 + }, + { + "epoch": 0.22110552763819097, + "grad_norm": 1.6582666635513306, + "learning_rate": 4.817959636416969e-05, + "loss": 0.4932, + "step": 22 + }, + { + "epoch": 0.23115577889447236, + "grad_norm": 2.2998204231262207, + "learning_rate": 4.783863644106502e-05, + "loss": 0.7694, + "step": 23 + }, + { + "epoch": 0.24120603015075376, + "grad_norm": 1.2947975397109985, + "learning_rate": 4.7469851157479177e-05, + "loss": 0.8939, + "step": 24 + }, + { + "epoch": 0.25125628140703515, + "grad_norm": 3.2607781887054443, + "learning_rate": 4.707368982147318e-05, + "loss": 0.9506, + "step": 25 + }, + { + "epoch": 0.2613065326633166, + "grad_norm": 1.432145118713379, + "learning_rate": 4.665063509461097e-05, + "loss": 0.43, + "step": 26 + }, + { + "epoch": 0.271356783919598, + "grad_norm": 1.6384902000427246, + "learning_rate": 4.620120240391065e-05, + "loss": 0.4947, + "step": 27 + }, + { + "epoch": 0.2814070351758794, + "grad_norm": 2.0519561767578125, + "learning_rate": 4.572593931387604e-05, + "loss": 0.6591, + "step": 28 + }, + { + "epoch": 0.2914572864321608, + "grad_norm": 1.9339925050735474, + "learning_rate": 4.522542485937369e-05, + "loss": 0.5761, + "step": 29 + }, + { + "epoch": 0.3015075376884422, + "grad_norm": 0.7800000905990601, + "learning_rate": 4.4700268840168045e-05, + "loss": 0.3128, + "step": 30 + }, + { + "epoch": 0.31155778894472363, + "grad_norm": 2.304705858230591, + "learning_rate": 4.415111107797445e-05, + "loss": 0.7255, + "step": 31 + }, + { + "epoch": 0.32160804020100503, + "grad_norm": 2.59503436088562, + "learning_rate": 4.357862063693486e-05, + "loss": 0.4959, + "step": 32 + }, + { + "epoch": 0.3316582914572864, + "grad_norm": 2.7538652420043945, + "learning_rate": 4.2983495008466276e-05, + "loss": 1.1096, + "step": 33 + }, + { + "epoch": 0.3417085427135678, + "grad_norm": 2.156506299972534, + "learning_rate": 4.2366459261474933e-05, + "loss": 0.5025, + "step": 34 + }, + { + "epoch": 0.35175879396984927, + "grad_norm": 1.8370133638381958, + "learning_rate": 4.172826515897146e-05, + "loss": 0.6174, + "step": 35 + }, + { + "epoch": 0.36180904522613067, + "grad_norm": 1.5026291608810425, + "learning_rate": 4.1069690242163484e-05, + "loss": 0.7019, + "step": 36 + }, + { + "epoch": 0.37185929648241206, + "grad_norm": 0.9972341656684875, + "learning_rate": 4.039153688314145e-05, + "loss": 0.3783, + "step": 37 + }, + { + "epoch": 0.38190954773869346, + "grad_norm": 2.10705828666687, + "learning_rate": 3.969463130731183e-05, + "loss": 0.4002, + "step": 38 + }, + { + "epoch": 0.39195979899497485, + "grad_norm": 1.508216142654419, + "learning_rate": 3.897982258676867e-05, + "loss": 0.7637, + "step": 39 + }, + { + "epoch": 0.4020100502512563, + "grad_norm": 1.5484806299209595, + "learning_rate": 3.824798160583012e-05, + "loss": 0.6095, + "step": 40 + }, + { + "epoch": 0.4120603015075377, + "grad_norm": 1.2863812446594238, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.331, + "step": 41 + }, + { + "epoch": 0.4221105527638191, + "grad_norm": 1.5361781120300293, + "learning_rate": 3.673678906964727e-05, + "loss": 0.717, + "step": 42 + }, + { + "epoch": 0.4321608040201005, + "grad_norm": 1.3632347583770752, + "learning_rate": 3.5959278669726935e-05, + "loss": 0.5195, + "step": 43 + }, + { + "epoch": 0.44221105527638194, + "grad_norm": 1.6475563049316406, + "learning_rate": 3.516841607689501e-05, + "loss": 0.5608, + "step": 44 + }, + { + "epoch": 0.45226130653266333, + "grad_norm": 2.4081761837005615, + "learning_rate": 3.436516483539781e-05, + "loss": 0.6161, + "step": 45 + }, + { + "epoch": 0.4623115577889447, + "grad_norm": 1.1614190340042114, + "learning_rate": 3.355050358314172e-05, + "loss": 0.5096, + "step": 46 + }, + { + "epoch": 0.4723618090452261, + "grad_norm": 1.1495153903961182, + "learning_rate": 3.272542485937369e-05, + "loss": 0.4686, + "step": 47 + }, + { + "epoch": 0.4824120603015075, + "grad_norm": 0.9374528527259827, + "learning_rate": 3.1890933895424976e-05, + "loss": 0.4752, + "step": 48 + }, + { + "epoch": 0.49246231155778897, + "grad_norm": 0.811876118183136, + "learning_rate": 3.104804738999169e-05, + "loss": 0.3812, + "step": 49 + }, + { + "epoch": 0.5025125628140703, + "grad_norm": 1.067453145980835, + "learning_rate": 3.0197792270443982e-05, + "loss": 0.4631, + "step": 50 + }, + { + "epoch": 0.5125628140703518, + "grad_norm": 0.957179069519043, + "learning_rate": 2.9341204441673266e-05, + "loss": 0.4275, + "step": 51 + }, + { + "epoch": 0.5226130653266332, + "grad_norm": 1.188721776008606, + "learning_rate": 2.8479327524001636e-05, + "loss": 0.5242, + "step": 52 + }, + { + "epoch": 0.5326633165829145, + "grad_norm": 3.2536540031433105, + "learning_rate": 2.761321158169134e-05, + "loss": 1.0363, + "step": 53 + }, + { + "epoch": 0.542713567839196, + "grad_norm": 1.041953682899475, + "learning_rate": 2.674391184360313e-05, + "loss": 0.3354, + "step": 54 + }, + { + "epoch": 0.5527638190954773, + "grad_norm": 1.2986676692962646, + "learning_rate": 2.587248741756253e-05, + "loss": 0.2586, + "step": 55 + }, + { + "epoch": 0.5628140703517588, + "grad_norm": 0.7783204913139343, + "learning_rate": 2.5e-05, + "loss": 0.2513, + "step": 56 + }, + { + "epoch": 0.5728643216080402, + "grad_norm": 1.6265126466751099, + "learning_rate": 2.4127512582437485e-05, + "loss": 0.1845, + "step": 57 + }, + { + "epoch": 0.5829145728643216, + "grad_norm": 2.727491855621338, + "learning_rate": 2.3256088156396868e-05, + "loss": 0.661, + "step": 58 + }, + { + "epoch": 0.592964824120603, + "grad_norm": 2.6408135890960693, + "learning_rate": 2.238678841830867e-05, + "loss": 0.6877, + "step": 59 + }, + { + "epoch": 0.6030150753768844, + "grad_norm": 0.29646119475364685, + "learning_rate": 2.1520672475998373e-05, + "loss": 0.073, + "step": 60 + }, + { + "epoch": 0.6130653266331658, + "grad_norm": 1.218673825263977, + "learning_rate": 2.0658795558326743e-05, + "loss": 0.4253, + "step": 61 + }, + { + "epoch": 0.6231155778894473, + "grad_norm": 1.2460541725158691, + "learning_rate": 1.980220772955602e-05, + "loss": 0.3749, + "step": 62 + }, + { + "epoch": 0.6331658291457286, + "grad_norm": 1.5119431018829346, + "learning_rate": 1.895195261000831e-05, + "loss": 0.4314, + "step": 63 + }, + { + "epoch": 0.6432160804020101, + "grad_norm": 1.4413162469863892, + "learning_rate": 1.8109066104575023e-05, + "loss": 0.3532, + "step": 64 + }, + { + "epoch": 0.6532663316582915, + "grad_norm": 1.122306227684021, + "learning_rate": 1.7274575140626318e-05, + "loss": 0.3553, + "step": 65 + }, + { + "epoch": 0.6633165829145728, + "grad_norm": 2.0227315425872803, + "learning_rate": 1.6449496416858284e-05, + "loss": 0.4409, + "step": 66 + }, + { + "epoch": 0.6733668341708543, + "grad_norm": 2.7001399993896484, + "learning_rate": 1.56348351646022e-05, + "loss": 0.9403, + "step": 67 + }, + { + "epoch": 0.6834170854271356, + "grad_norm": 2.5412521362304688, + "learning_rate": 1.4831583923104999e-05, + "loss": 0.6257, + "step": 68 + }, + { + "epoch": 0.6934673366834171, + "grad_norm": 2.520745038986206, + "learning_rate": 1.4040721330273062e-05, + "loss": 0.4578, + "step": 69 + }, + { + "epoch": 0.7035175879396985, + "grad_norm": 1.906459927558899, + "learning_rate": 1.3263210930352737e-05, + "loss": 0.6205, + "step": 70 + }, + { + "epoch": 0.7135678391959799, + "grad_norm": 1.9079599380493164, + "learning_rate": 1.2500000000000006e-05, + "loss": 0.364, + "step": 71 + }, + { + "epoch": 0.7236180904522613, + "grad_norm": 1.4104374647140503, + "learning_rate": 1.175201839416988e-05, + "loss": 0.4687, + "step": 72 + }, + { + "epoch": 0.7336683417085427, + "grad_norm": 2.257812738418579, + "learning_rate": 1.1020177413231334e-05, + "loss": 0.7489, + "step": 73 + }, + { + "epoch": 0.7437185929648241, + "grad_norm": 1.5222328901290894, + "learning_rate": 1.0305368692688174e-05, + "loss": 0.4103, + "step": 74 + }, + { + "epoch": 0.7537688442211056, + "grad_norm": 1.8094953298568726, + "learning_rate": 9.608463116858542e-06, + "loss": 0.341, + "step": 75 + }, + { + "epoch": 0.7638190954773869, + "grad_norm": 2.328770399093628, + "learning_rate": 8.930309757836517e-06, + "loss": 0.4114, + "step": 76 + }, + { + "epoch": 0.7738693467336684, + "grad_norm": 1.129135012626648, + "learning_rate": 8.271734841028553e-06, + "loss": 0.339, + "step": 77 + }, + { + "epoch": 0.7839195979899497, + "grad_norm": 0.979975163936615, + "learning_rate": 7.633540738525066e-06, + "loss": 0.3299, + "step": 78 + }, + { + "epoch": 0.7939698492462312, + "grad_norm": 2.6319901943206787, + "learning_rate": 7.016504991533726e-06, + "loss": 0.7442, + "step": 79 + }, + { + "epoch": 0.8040201005025126, + "grad_norm": 1.877888560295105, + "learning_rate": 6.421379363065142e-06, + "loss": 0.3072, + "step": 80 + }, + { + "epoch": 0.8140703517587939, + "grad_norm": 1.062574863433838, + "learning_rate": 5.848888922025553e-06, + "loss": 0.2217, + "step": 81 + }, + { + "epoch": 0.8241206030150754, + "grad_norm": 2.3768932819366455, + "learning_rate": 5.299731159831953e-06, + "loss": 0.4072, + "step": 82 + }, + { + "epoch": 0.8341708542713567, + "grad_norm": 1.9588872194290161, + "learning_rate": 4.7745751406263165e-06, + "loss": 0.4908, + "step": 83 + }, + { + "epoch": 0.8442211055276382, + "grad_norm": 0.6415271759033203, + "learning_rate": 4.274060686123959e-06, + "loss": 0.1118, + "step": 84 + }, + { + "epoch": 0.8542713567839196, + "grad_norm": 1.4708070755004883, + "learning_rate": 3.798797596089351e-06, + "loss": 0.3945, + "step": 85 + }, + { + "epoch": 0.864321608040201, + "grad_norm": 1.495408058166504, + "learning_rate": 3.3493649053890326e-06, + "loss": 0.2519, + "step": 86 + }, + { + "epoch": 0.8743718592964824, + "grad_norm": 1.9014512300491333, + "learning_rate": 2.9263101785268254e-06, + "loss": 0.3233, + "step": 87 + }, + { + "epoch": 0.8844221105527639, + "grad_norm": 2.157270908355713, + "learning_rate": 2.5301488425208296e-06, + "loss": 0.4891, + "step": 88 + }, + { + "epoch": 0.8944723618090452, + "grad_norm": 1.6721971035003662, + "learning_rate": 2.1613635589349756e-06, + "loss": 0.2344, + "step": 89 + }, + { + "epoch": 0.9045226130653267, + "grad_norm": 1.1136136054992676, + "learning_rate": 1.8204036358303173e-06, + "loss": 0.4952, + "step": 90 + }, + { + "epoch": 0.914572864321608, + "grad_norm": 2.6617023944854736, + "learning_rate": 1.5076844803522922e-06, + "loss": 0.4057, + "step": 91 + }, + { + "epoch": 0.9246231155778895, + "grad_norm": 1.8409501314163208, + "learning_rate": 1.2235870926211619e-06, + "loss": 0.4969, + "step": 92 + }, + { + "epoch": 0.9346733668341709, + "grad_norm": 1.0654531717300415, + "learning_rate": 9.684576015420278e-07, + "loss": 0.1584, + "step": 93 + }, + { + "epoch": 0.9447236180904522, + "grad_norm": 1.7827986478805542, + "learning_rate": 7.426068431000882e-07, + "loss": 0.4053, + "step": 94 + }, + { + "epoch": 0.9547738693467337, + "grad_norm": 1.0557734966278076, + "learning_rate": 5.463099816548579e-07, + "loss": 0.1529, + "step": 95 + }, + { + "epoch": 0.964824120603015, + "grad_norm": 1.926171898841858, + "learning_rate": 3.7980617469479953e-07, + "loss": 0.7067, + "step": 96 + }, + { + "epoch": 0.9748743718592965, + "grad_norm": 2.5966174602508545, + "learning_rate": 2.4329828146074095e-07, + "loss": 0.4362, + "step": 97 + }, + { + "epoch": 0.9849246231155779, + "grad_norm": 2.601397752761841, + "learning_rate": 1.3695261579316777e-07, + "loss": 0.8023, + "step": 98 + }, + { + "epoch": 0.9949748743718593, + "grad_norm": 1.530727744102478, + "learning_rate": 6.089874350439506e-08, + "loss": 0.5146, + "step": 99 + }, + { + "epoch": 1.0, + "grad_norm": 3.1892786026000977, + "learning_rate": 1.522932452260595e-08, + "loss": 0.4856, + "step": 100 + }, + { + "epoch": 1.0, + "eval_loss": 0.5350430011749268, + "eval_runtime": 85.8193, + "eval_samples_per_second": 3.496, + "eval_steps_per_second": 1.748, + "step": 100 + } + ], + "logging_steps": 1, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 33, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 6.867315250220237e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/exp1_signal_c_100/checkpoint-100/training_args.bin b/exp1_signal_c_100/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..519a036c4c4f776dd18d9d414e11851c970fc417 --- /dev/null +++ b/exp1_signal_c_100/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:572cbdd13202b1ec51905f252af50c916a6de05c78548327ae765c00ceb7d3ab +size 6033 diff --git a/exp1_signal_c_100/checkpoint-33/README.md b/exp1_signal_c_100/checkpoint-33/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp1_signal_c_100/checkpoint-33/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp1_signal_c_100/checkpoint-33/adapter_config.json b/exp1_signal_c_100/checkpoint-33/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..05fb68ca4a7e988cacbcf078beb813a265eb0d53 --- /dev/null +++ b/exp1_signal_c_100/checkpoint-33/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "o_proj", + "q_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp1_signal_c_100/checkpoint-33/adapter_model.safetensors b/exp1_signal_c_100/checkpoint-33/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..de3a9ec5a68ffe94f189fa8250ee554f73800232 --- /dev/null +++ b/exp1_signal_c_100/checkpoint-33/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59b0b7bc845c8157e4d73d51e1a043a969b39f2a4c264dbb16afa830db2a31b7 +size 201378736 diff --git a/exp1_signal_c_100/checkpoint-33/optimizer.pt b/exp1_signal_c_100/checkpoint-33/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1e562caca023e46b69f17c15fd804e405dcc0fbe --- /dev/null +++ b/exp1_signal_c_100/checkpoint-33/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23686548d8f487f5951f1bc1167102a64c3bb820245ddf3650bda56347454dc4 +size 402982627 diff --git a/exp1_signal_c_100/checkpoint-33/rng_state.pth b/exp1_signal_c_100/checkpoint-33/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..fd650bca78c013ce43187807570db259be238162 --- /dev/null +++ b/exp1_signal_c_100/checkpoint-33/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4370640aed47e1cfe501e222405e6de0a6701dc2554814f30a0ec4610f4a16b +size 14645 diff --git a/exp1_signal_c_100/checkpoint-33/scheduler.pt b/exp1_signal_c_100/checkpoint-33/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f3b688fff35c94c1b3f7bf1c91548cb8ab1f20a1 --- /dev/null +++ b/exp1_signal_c_100/checkpoint-33/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b360cc37b95be8398ac4635eb61f23915865018f0c39c4789e480a08ead764a +size 1465 diff --git a/exp1_signal_c_100/checkpoint-33/trainer_state.json b/exp1_signal_c_100/checkpoint-33/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..eac1edd03f0053025e85a8d76971ea88a0054ba6 --- /dev/null +++ b/exp1_signal_c_100/checkpoint-33/trainer_state.json @@ -0,0 +1,265 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.3316582914572864, + "eval_steps": 100, + "global_step": 33, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010050251256281407, + "grad_norm": 2.0252811908721924, + "learning_rate": 0.0, + "loss": 0.6131, + "step": 1 + }, + { + "epoch": 0.020100502512562814, + "grad_norm": 2.2184908390045166, + "learning_rate": 5e-06, + "loss": 0.8686, + "step": 2 + }, + { + "epoch": 0.03015075376884422, + "grad_norm": 2.4873640537261963, + "learning_rate": 1e-05, + "loss": 0.5164, + "step": 3 + }, + { + "epoch": 0.04020100502512563, + "grad_norm": 2.624083995819092, + "learning_rate": 1.5e-05, + "loss": 1.002, + "step": 4 + }, + { + "epoch": 0.05025125628140704, + "grad_norm": 1.3904463052749634, + "learning_rate": 2e-05, + "loss": 0.6515, + "step": 5 + }, + { + "epoch": 0.06030150753768844, + "grad_norm": 3.1512720584869385, + "learning_rate": 2.5e-05, + "loss": 0.9144, + "step": 6 + }, + { + "epoch": 0.07035175879396985, + "grad_norm": 1.309517502784729, + "learning_rate": 3e-05, + "loss": 0.5179, + "step": 7 + }, + { + "epoch": 0.08040201005025126, + "grad_norm": 1.8838770389556885, + "learning_rate": 3.5e-05, + "loss": 0.4935, + "step": 8 + }, + { + "epoch": 0.09045226130653267, + "grad_norm": 2.1511077880859375, + "learning_rate": 4e-05, + "loss": 0.8218, + "step": 9 + }, + { + "epoch": 0.10050251256281408, + "grad_norm": 1.338928461074829, + "learning_rate": 4.5e-05, + "loss": 0.6049, + "step": 10 + }, + { + "epoch": 0.11055276381909548, + "grad_norm": 2.3781137466430664, + "learning_rate": 5e-05, + "loss": 0.4984, + "step": 11 + }, + { + "epoch": 0.12060301507537688, + "grad_norm": 0.8085482716560364, + "learning_rate": 4.99847706754774e-05, + "loss": 0.3011, + "step": 12 + }, + { + "epoch": 0.1306532663316583, + "grad_norm": 2.7491891384124756, + "learning_rate": 4.993910125649561e-05, + "loss": 0.4187, + "step": 13 + }, + { + "epoch": 0.1407035175879397, + "grad_norm": 2.329538583755493, + "learning_rate": 4.9863047384206835e-05, + "loss": 0.5481, + "step": 14 + }, + { + "epoch": 0.1507537688442211, + "grad_norm": 1.2953161001205444, + "learning_rate": 4.975670171853926e-05, + "loss": 0.7587, + "step": 15 + }, + { + "epoch": 0.16080402010050251, + "grad_norm": 1.5520743131637573, + "learning_rate": 4.962019382530521e-05, + "loss": 0.7993, + "step": 16 + }, + { + "epoch": 0.1708542713567839, + "grad_norm": 1.7542706727981567, + "learning_rate": 4.9453690018345144e-05, + "loss": 0.791, + "step": 17 + }, + { + "epoch": 0.18090452261306533, + "grad_norm": 1.2422624826431274, + "learning_rate": 4.925739315689991e-05, + "loss": 0.5716, + "step": 18 + }, + { + "epoch": 0.19095477386934673, + "grad_norm": 2.116292953491211, + "learning_rate": 4.9031542398457974e-05, + "loss": 0.552, + "step": 19 + }, + { + "epoch": 0.20100502512562815, + "grad_norm": 2.5214879512786865, + "learning_rate": 4.877641290737884e-05, + "loss": 1.0288, + "step": 20 + }, + { + "epoch": 0.21105527638190955, + "grad_norm": 3.0591695308685303, + "learning_rate": 4.849231551964771e-05, + "loss": 0.9893, + "step": 21 + }, + { + "epoch": 0.22110552763819097, + "grad_norm": 1.6582666635513306, + "learning_rate": 4.817959636416969e-05, + "loss": 0.4932, + "step": 22 + }, + { + "epoch": 0.23115577889447236, + "grad_norm": 2.2998204231262207, + "learning_rate": 4.783863644106502e-05, + "loss": 0.7694, + "step": 23 + }, + { + "epoch": 0.24120603015075376, + "grad_norm": 1.2947975397109985, + "learning_rate": 4.7469851157479177e-05, + "loss": 0.8939, + "step": 24 + }, + { + "epoch": 0.25125628140703515, + "grad_norm": 3.2607781887054443, + "learning_rate": 4.707368982147318e-05, + "loss": 0.9506, + "step": 25 + }, + { + "epoch": 0.2613065326633166, + "grad_norm": 1.432145118713379, + "learning_rate": 4.665063509461097e-05, + "loss": 0.43, + "step": 26 + }, + { + "epoch": 0.271356783919598, + "grad_norm": 1.6384902000427246, + "learning_rate": 4.620120240391065e-05, + "loss": 0.4947, + "step": 27 + }, + { + "epoch": 0.2814070351758794, + "grad_norm": 2.0519561767578125, + "learning_rate": 4.572593931387604e-05, + "loss": 0.6591, + "step": 28 + }, + { + "epoch": 0.2914572864321608, + "grad_norm": 1.9339925050735474, + "learning_rate": 4.522542485937369e-05, + "loss": 0.5761, + "step": 29 + }, + { + "epoch": 0.3015075376884422, + "grad_norm": 0.7800000905990601, + "learning_rate": 4.4700268840168045e-05, + "loss": 0.3128, + "step": 30 + }, + { + "epoch": 0.31155778894472363, + "grad_norm": 2.304705858230591, + "learning_rate": 4.415111107797445e-05, + "loss": 0.7255, + "step": 31 + }, + { + "epoch": 0.32160804020100503, + "grad_norm": 2.59503436088562, + "learning_rate": 4.357862063693486e-05, + "loss": 0.4959, + "step": 32 + }, + { + "epoch": 0.3316582914572864, + "grad_norm": 2.7538652420043945, + "learning_rate": 4.2983495008466276e-05, + "loss": 1.1096, + "step": 33 + } + ], + "logging_steps": 1, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 33, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.277602042786611e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/exp1_signal_c_100/checkpoint-33/training_args.bin b/exp1_signal_c_100/checkpoint-33/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..519a036c4c4f776dd18d9d414e11851c970fc417 --- /dev/null +++ b/exp1_signal_c_100/checkpoint-33/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:572cbdd13202b1ec51905f252af50c916a6de05c78548327ae765c00ceb7d3ab +size 6033 diff --git a/exp1_signal_c_100/checkpoint-66/README.md b/exp1_signal_c_100/checkpoint-66/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp1_signal_c_100/checkpoint-66/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp1_signal_c_100/checkpoint-66/adapter_config.json b/exp1_signal_c_100/checkpoint-66/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..05fb68ca4a7e988cacbcf078beb813a265eb0d53 --- /dev/null +++ b/exp1_signal_c_100/checkpoint-66/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "o_proj", + "q_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp1_signal_c_100/checkpoint-66/adapter_model.safetensors b/exp1_signal_c_100/checkpoint-66/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..67f76fdc04180b8a427f399e899eb2d36ada2ec6 --- /dev/null +++ b/exp1_signal_c_100/checkpoint-66/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d830e36bf36f93433dacc87e63760f2068ebb8e7312fa3e6e88f2ed403cb1106 +size 201378736 diff --git a/exp1_signal_c_100/checkpoint-66/optimizer.pt b/exp1_signal_c_100/checkpoint-66/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..76a3470844748eaf1550d4a6bffe6156d2b300c4 --- /dev/null +++ b/exp1_signal_c_100/checkpoint-66/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8503033100a46c53e3098543802e038eb01c36612fed9653cfd2206e68627c81 +size 402982627 diff --git a/exp1_signal_c_100/checkpoint-66/rng_state.pth b/exp1_signal_c_100/checkpoint-66/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9acb6af318562be091fcb3203d32d6aa81ac4bf6 --- /dev/null +++ b/exp1_signal_c_100/checkpoint-66/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbe8399edd28ccffb95622add1833ade04c0b6c9ff41375a11d9923ffa06e322 +size 14645 diff --git a/exp1_signal_c_100/checkpoint-66/scheduler.pt b/exp1_signal_c_100/checkpoint-66/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a327542ffefb0cc080cbcc215ee0039848f86e29 --- /dev/null +++ b/exp1_signal_c_100/checkpoint-66/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2fd2e1422c94e485a1f77b0038f8f0bc12efa27617f0f3dbfaf4b9a3fd1fd59 +size 1465 diff --git a/exp1_signal_c_100/checkpoint-66/trainer_state.json b/exp1_signal_c_100/checkpoint-66/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e9a0c98e5bf755715e4cbf81fdaad0f6774297a3 --- /dev/null +++ b/exp1_signal_c_100/checkpoint-66/trainer_state.json @@ -0,0 +1,496 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6633165829145728, + "eval_steps": 100, + "global_step": 66, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010050251256281407, + "grad_norm": 2.0252811908721924, + "learning_rate": 0.0, + "loss": 0.6131, + "step": 1 + }, + { + "epoch": 0.020100502512562814, + "grad_norm": 2.2184908390045166, + "learning_rate": 5e-06, + "loss": 0.8686, + "step": 2 + }, + { + "epoch": 0.03015075376884422, + "grad_norm": 2.4873640537261963, + "learning_rate": 1e-05, + "loss": 0.5164, + "step": 3 + }, + { + "epoch": 0.04020100502512563, + "grad_norm": 2.624083995819092, + "learning_rate": 1.5e-05, + "loss": 1.002, + "step": 4 + }, + { + "epoch": 0.05025125628140704, + "grad_norm": 1.3904463052749634, + "learning_rate": 2e-05, + "loss": 0.6515, + "step": 5 + }, + { + "epoch": 0.06030150753768844, + "grad_norm": 3.1512720584869385, + "learning_rate": 2.5e-05, + "loss": 0.9144, + "step": 6 + }, + { + "epoch": 0.07035175879396985, + "grad_norm": 1.309517502784729, + "learning_rate": 3e-05, + "loss": 0.5179, + "step": 7 + }, + { + "epoch": 0.08040201005025126, + "grad_norm": 1.8838770389556885, + "learning_rate": 3.5e-05, + "loss": 0.4935, + "step": 8 + }, + { + "epoch": 0.09045226130653267, + "grad_norm": 2.1511077880859375, + "learning_rate": 4e-05, + "loss": 0.8218, + "step": 9 + }, + { + "epoch": 0.10050251256281408, + "grad_norm": 1.338928461074829, + "learning_rate": 4.5e-05, + "loss": 0.6049, + "step": 10 + }, + { + "epoch": 0.11055276381909548, + "grad_norm": 2.3781137466430664, + "learning_rate": 5e-05, + "loss": 0.4984, + "step": 11 + }, + { + "epoch": 0.12060301507537688, + "grad_norm": 0.8085482716560364, + "learning_rate": 4.99847706754774e-05, + "loss": 0.3011, + "step": 12 + }, + { + "epoch": 0.1306532663316583, + "grad_norm": 2.7491891384124756, + "learning_rate": 4.993910125649561e-05, + "loss": 0.4187, + "step": 13 + }, + { + "epoch": 0.1407035175879397, + "grad_norm": 2.329538583755493, + "learning_rate": 4.9863047384206835e-05, + "loss": 0.5481, + "step": 14 + }, + { + "epoch": 0.1507537688442211, + "grad_norm": 1.2953161001205444, + "learning_rate": 4.975670171853926e-05, + "loss": 0.7587, + "step": 15 + }, + { + "epoch": 0.16080402010050251, + "grad_norm": 1.5520743131637573, + "learning_rate": 4.962019382530521e-05, + "loss": 0.7993, + "step": 16 + }, + { + "epoch": 0.1708542713567839, + "grad_norm": 1.7542706727981567, + "learning_rate": 4.9453690018345144e-05, + "loss": 0.791, + "step": 17 + }, + { + "epoch": 0.18090452261306533, + "grad_norm": 1.2422624826431274, + "learning_rate": 4.925739315689991e-05, + "loss": 0.5716, + "step": 18 + }, + { + "epoch": 0.19095477386934673, + "grad_norm": 2.116292953491211, + "learning_rate": 4.9031542398457974e-05, + "loss": 0.552, + "step": 19 + }, + { + "epoch": 0.20100502512562815, + "grad_norm": 2.5214879512786865, + "learning_rate": 4.877641290737884e-05, + "loss": 1.0288, + "step": 20 + }, + { + "epoch": 0.21105527638190955, + "grad_norm": 3.0591695308685303, + "learning_rate": 4.849231551964771e-05, + "loss": 0.9893, + "step": 21 + }, + { + "epoch": 0.22110552763819097, + "grad_norm": 1.6582666635513306, + "learning_rate": 4.817959636416969e-05, + "loss": 0.4932, + "step": 22 + }, + { + "epoch": 0.23115577889447236, + "grad_norm": 2.2998204231262207, + "learning_rate": 4.783863644106502e-05, + "loss": 0.7694, + "step": 23 + }, + { + "epoch": 0.24120603015075376, + "grad_norm": 1.2947975397109985, + "learning_rate": 4.7469851157479177e-05, + "loss": 0.8939, + "step": 24 + }, + { + "epoch": 0.25125628140703515, + "grad_norm": 3.2607781887054443, + "learning_rate": 4.707368982147318e-05, + "loss": 0.9506, + "step": 25 + }, + { + "epoch": 0.2613065326633166, + "grad_norm": 1.432145118713379, + "learning_rate": 4.665063509461097e-05, + "loss": 0.43, + "step": 26 + }, + { + "epoch": 0.271356783919598, + "grad_norm": 1.6384902000427246, + "learning_rate": 4.620120240391065e-05, + "loss": 0.4947, + "step": 27 + }, + { + "epoch": 0.2814070351758794, + "grad_norm": 2.0519561767578125, + "learning_rate": 4.572593931387604e-05, + "loss": 0.6591, + "step": 28 + }, + { + "epoch": 0.2914572864321608, + "grad_norm": 1.9339925050735474, + "learning_rate": 4.522542485937369e-05, + "loss": 0.5761, + "step": 29 + }, + { + "epoch": 0.3015075376884422, + "grad_norm": 0.7800000905990601, + "learning_rate": 4.4700268840168045e-05, + "loss": 0.3128, + "step": 30 + }, + { + "epoch": 0.31155778894472363, + "grad_norm": 2.304705858230591, + "learning_rate": 4.415111107797445e-05, + "loss": 0.7255, + "step": 31 + }, + { + "epoch": 0.32160804020100503, + "grad_norm": 2.59503436088562, + "learning_rate": 4.357862063693486e-05, + "loss": 0.4959, + "step": 32 + }, + { + "epoch": 0.3316582914572864, + "grad_norm": 2.7538652420043945, + "learning_rate": 4.2983495008466276e-05, + "loss": 1.1096, + "step": 33 + }, + { + "epoch": 0.3417085427135678, + "grad_norm": 2.156506299972534, + "learning_rate": 4.2366459261474933e-05, + "loss": 0.5025, + "step": 34 + }, + { + "epoch": 0.35175879396984927, + "grad_norm": 1.8370133638381958, + "learning_rate": 4.172826515897146e-05, + "loss": 0.6174, + "step": 35 + }, + { + "epoch": 0.36180904522613067, + "grad_norm": 1.5026291608810425, + "learning_rate": 4.1069690242163484e-05, + "loss": 0.7019, + "step": 36 + }, + { + "epoch": 0.37185929648241206, + "grad_norm": 0.9972341656684875, + "learning_rate": 4.039153688314145e-05, + "loss": 0.3783, + "step": 37 + }, + { + "epoch": 0.38190954773869346, + "grad_norm": 2.10705828666687, + "learning_rate": 3.969463130731183e-05, + "loss": 0.4002, + "step": 38 + }, + { + "epoch": 0.39195979899497485, + "grad_norm": 1.508216142654419, + "learning_rate": 3.897982258676867e-05, + "loss": 0.7637, + "step": 39 + }, + { + "epoch": 0.4020100502512563, + "grad_norm": 1.5484806299209595, + "learning_rate": 3.824798160583012e-05, + "loss": 0.6095, + "step": 40 + }, + { + "epoch": 0.4120603015075377, + "grad_norm": 1.2863812446594238, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.331, + "step": 41 + }, + { + "epoch": 0.4221105527638191, + "grad_norm": 1.5361781120300293, + "learning_rate": 3.673678906964727e-05, + "loss": 0.717, + "step": 42 + }, + { + "epoch": 0.4321608040201005, + "grad_norm": 1.3632347583770752, + "learning_rate": 3.5959278669726935e-05, + "loss": 0.5195, + "step": 43 + }, + { + "epoch": 0.44221105527638194, + "grad_norm": 1.6475563049316406, + "learning_rate": 3.516841607689501e-05, + "loss": 0.5608, + "step": 44 + }, + { + "epoch": 0.45226130653266333, + "grad_norm": 2.4081761837005615, + "learning_rate": 3.436516483539781e-05, + "loss": 0.6161, + "step": 45 + }, + { + "epoch": 0.4623115577889447, + "grad_norm": 1.1614190340042114, + "learning_rate": 3.355050358314172e-05, + "loss": 0.5096, + "step": 46 + }, + { + "epoch": 0.4723618090452261, + "grad_norm": 1.1495153903961182, + "learning_rate": 3.272542485937369e-05, + "loss": 0.4686, + "step": 47 + }, + { + "epoch": 0.4824120603015075, + "grad_norm": 0.9374528527259827, + "learning_rate": 3.1890933895424976e-05, + "loss": 0.4752, + "step": 48 + }, + { + "epoch": 0.49246231155778897, + "grad_norm": 0.811876118183136, + "learning_rate": 3.104804738999169e-05, + "loss": 0.3812, + "step": 49 + }, + { + "epoch": 0.5025125628140703, + "grad_norm": 1.067453145980835, + "learning_rate": 3.0197792270443982e-05, + "loss": 0.4631, + "step": 50 + }, + { + "epoch": 0.5125628140703518, + "grad_norm": 0.957179069519043, + "learning_rate": 2.9341204441673266e-05, + "loss": 0.4275, + "step": 51 + }, + { + "epoch": 0.5226130653266332, + "grad_norm": 1.188721776008606, + "learning_rate": 2.8479327524001636e-05, + "loss": 0.5242, + "step": 52 + }, + { + "epoch": 0.5326633165829145, + "grad_norm": 3.2536540031433105, + "learning_rate": 2.761321158169134e-05, + "loss": 1.0363, + "step": 53 + }, + { + "epoch": 0.542713567839196, + "grad_norm": 1.041953682899475, + "learning_rate": 2.674391184360313e-05, + "loss": 0.3354, + "step": 54 + }, + { + "epoch": 0.5527638190954773, + "grad_norm": 1.2986676692962646, + "learning_rate": 2.587248741756253e-05, + "loss": 0.2586, + "step": 55 + }, + { + "epoch": 0.5628140703517588, + "grad_norm": 0.7783204913139343, + "learning_rate": 2.5e-05, + "loss": 0.2513, + "step": 56 + }, + { + "epoch": 0.5728643216080402, + "grad_norm": 1.6265126466751099, + "learning_rate": 2.4127512582437485e-05, + "loss": 0.1845, + "step": 57 + }, + { + "epoch": 0.5829145728643216, + "grad_norm": 2.727491855621338, + "learning_rate": 2.3256088156396868e-05, + "loss": 0.661, + "step": 58 + }, + { + "epoch": 0.592964824120603, + "grad_norm": 2.6408135890960693, + "learning_rate": 2.238678841830867e-05, + "loss": 0.6877, + "step": 59 + }, + { + "epoch": 0.6030150753768844, + "grad_norm": 0.29646119475364685, + "learning_rate": 2.1520672475998373e-05, + "loss": 0.073, + "step": 60 + }, + { + "epoch": 0.6130653266331658, + "grad_norm": 1.218673825263977, + "learning_rate": 2.0658795558326743e-05, + "loss": 0.4253, + "step": 61 + }, + { + "epoch": 0.6231155778894473, + "grad_norm": 1.2460541725158691, + "learning_rate": 1.980220772955602e-05, + "loss": 0.3749, + "step": 62 + }, + { + "epoch": 0.6331658291457286, + "grad_norm": 1.5119431018829346, + "learning_rate": 1.895195261000831e-05, + "loss": 0.4314, + "step": 63 + }, + { + "epoch": 0.6432160804020101, + "grad_norm": 1.4413162469863892, + "learning_rate": 1.8109066104575023e-05, + "loss": 0.3532, + "step": 64 + }, + { + "epoch": 0.6532663316582915, + "grad_norm": 1.122306227684021, + "learning_rate": 1.7274575140626318e-05, + "loss": 0.3553, + "step": 65 + }, + { + "epoch": 0.6633165829145728, + "grad_norm": 2.0227315425872803, + "learning_rate": 1.6449496416858284e-05, + "loss": 0.4409, + "step": 66 + } + ], + "logging_steps": 1, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 33, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.555204085573222e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/exp1_signal_c_100/checkpoint-66/training_args.bin b/exp1_signal_c_100/checkpoint-66/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..519a036c4c4f776dd18d9d414e11851c970fc417 --- /dev/null +++ b/exp1_signal_c_100/checkpoint-66/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:572cbdd13202b1ec51905f252af50c916a6de05c78548327ae765c00ceb7d3ab +size 6033 diff --git a/exp1_signal_c_100/checkpoint-99/README.md b/exp1_signal_c_100/checkpoint-99/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp1_signal_c_100/checkpoint-99/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp1_signal_c_100/checkpoint-99/adapter_config.json b/exp1_signal_c_100/checkpoint-99/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..05fb68ca4a7e988cacbcf078beb813a265eb0d53 --- /dev/null +++ b/exp1_signal_c_100/checkpoint-99/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "o_proj", + "q_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp1_signal_c_100/checkpoint-99/adapter_model.safetensors b/exp1_signal_c_100/checkpoint-99/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dba01ba142ae85a98a9552ec1d52e2b5442fca5a --- /dev/null +++ b/exp1_signal_c_100/checkpoint-99/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fc5ce2de3594956daa12941e3b9bcd550446a305d28ff79f7ff0eb3ea431d8b +size 201378736 diff --git a/exp1_signal_c_100/checkpoint-99/optimizer.pt b/exp1_signal_c_100/checkpoint-99/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..00a08e85235f9fe9f526acae88cc48f3f8c93a9a --- /dev/null +++ b/exp1_signal_c_100/checkpoint-99/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77d682e6d7fe24cd6c433a6b6f08d48702ed5ae92845bff3cbaf76e5c0001aea +size 402982627 diff --git a/exp1_signal_c_100/checkpoint-99/rng_state.pth b/exp1_signal_c_100/checkpoint-99/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ef5ac8da68afb8bea8ad1331116be95d18fbb182 --- /dev/null +++ b/exp1_signal_c_100/checkpoint-99/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b293338027ef39107c758bc2300d51015b6a878a9f0e09fe1822d419dcbb163e +size 14645 diff --git a/exp1_signal_c_100/checkpoint-99/scheduler.pt b/exp1_signal_c_100/checkpoint-99/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7a035695abab34b6d7759d2abb6c583da37754d5 --- /dev/null +++ b/exp1_signal_c_100/checkpoint-99/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ba8e76678603d3d1769a2ed6315c7bfd1edb3a261c537a3ff7c1689618cf725 +size 1465 diff --git a/exp1_signal_c_100/checkpoint-99/trainer_state.json b/exp1_signal_c_100/checkpoint-99/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..07080b707f3c220becec35c5524950e83db339d1 --- /dev/null +++ b/exp1_signal_c_100/checkpoint-99/trainer_state.json @@ -0,0 +1,727 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9949748743718593, + "eval_steps": 100, + "global_step": 99, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010050251256281407, + "grad_norm": 2.0252811908721924, + "learning_rate": 0.0, + "loss": 0.6131, + "step": 1 + }, + { + "epoch": 0.020100502512562814, + "grad_norm": 2.2184908390045166, + "learning_rate": 5e-06, + "loss": 0.8686, + "step": 2 + }, + { + "epoch": 0.03015075376884422, + "grad_norm": 2.4873640537261963, + "learning_rate": 1e-05, + "loss": 0.5164, + "step": 3 + }, + { + "epoch": 0.04020100502512563, + "grad_norm": 2.624083995819092, + "learning_rate": 1.5e-05, + "loss": 1.002, + "step": 4 + }, + { + "epoch": 0.05025125628140704, + "grad_norm": 1.3904463052749634, + "learning_rate": 2e-05, + "loss": 0.6515, + "step": 5 + }, + { + "epoch": 0.06030150753768844, + "grad_norm": 3.1512720584869385, + "learning_rate": 2.5e-05, + "loss": 0.9144, + "step": 6 + }, + { + "epoch": 0.07035175879396985, + "grad_norm": 1.309517502784729, + "learning_rate": 3e-05, + "loss": 0.5179, + "step": 7 + }, + { + "epoch": 0.08040201005025126, + "grad_norm": 1.8838770389556885, + "learning_rate": 3.5e-05, + "loss": 0.4935, + "step": 8 + }, + { + "epoch": 0.09045226130653267, + "grad_norm": 2.1511077880859375, + "learning_rate": 4e-05, + "loss": 0.8218, + "step": 9 + }, + { + "epoch": 0.10050251256281408, + "grad_norm": 1.338928461074829, + "learning_rate": 4.5e-05, + "loss": 0.6049, + "step": 10 + }, + { + "epoch": 0.11055276381909548, + "grad_norm": 2.3781137466430664, + "learning_rate": 5e-05, + "loss": 0.4984, + "step": 11 + }, + { + "epoch": 0.12060301507537688, + "grad_norm": 0.8085482716560364, + "learning_rate": 4.99847706754774e-05, + "loss": 0.3011, + "step": 12 + }, + { + "epoch": 0.1306532663316583, + "grad_norm": 2.7491891384124756, + "learning_rate": 4.993910125649561e-05, + "loss": 0.4187, + "step": 13 + }, + { + "epoch": 0.1407035175879397, + "grad_norm": 2.329538583755493, + "learning_rate": 4.9863047384206835e-05, + "loss": 0.5481, + "step": 14 + }, + { + "epoch": 0.1507537688442211, + "grad_norm": 1.2953161001205444, + "learning_rate": 4.975670171853926e-05, + "loss": 0.7587, + "step": 15 + }, + { + "epoch": 0.16080402010050251, + "grad_norm": 1.5520743131637573, + "learning_rate": 4.962019382530521e-05, + "loss": 0.7993, + "step": 16 + }, + { + "epoch": 0.1708542713567839, + "grad_norm": 1.7542706727981567, + "learning_rate": 4.9453690018345144e-05, + "loss": 0.791, + "step": 17 + }, + { + "epoch": 0.18090452261306533, + "grad_norm": 1.2422624826431274, + "learning_rate": 4.925739315689991e-05, + "loss": 0.5716, + "step": 18 + }, + { + "epoch": 0.19095477386934673, + "grad_norm": 2.116292953491211, + "learning_rate": 4.9031542398457974e-05, + "loss": 0.552, + "step": 19 + }, + { + "epoch": 0.20100502512562815, + "grad_norm": 2.5214879512786865, + "learning_rate": 4.877641290737884e-05, + "loss": 1.0288, + "step": 20 + }, + { + "epoch": 0.21105527638190955, + "grad_norm": 3.0591695308685303, + "learning_rate": 4.849231551964771e-05, + "loss": 0.9893, + "step": 21 + }, + { + "epoch": 0.22110552763819097, + "grad_norm": 1.6582666635513306, + "learning_rate": 4.817959636416969e-05, + "loss": 0.4932, + "step": 22 + }, + { + "epoch": 0.23115577889447236, + "grad_norm": 2.2998204231262207, + "learning_rate": 4.783863644106502e-05, + "loss": 0.7694, + "step": 23 + }, + { + "epoch": 0.24120603015075376, + "grad_norm": 1.2947975397109985, + "learning_rate": 4.7469851157479177e-05, + "loss": 0.8939, + "step": 24 + }, + { + "epoch": 0.25125628140703515, + "grad_norm": 3.2607781887054443, + "learning_rate": 4.707368982147318e-05, + "loss": 0.9506, + "step": 25 + }, + { + "epoch": 0.2613065326633166, + "grad_norm": 1.432145118713379, + "learning_rate": 4.665063509461097e-05, + "loss": 0.43, + "step": 26 + }, + { + "epoch": 0.271356783919598, + "grad_norm": 1.6384902000427246, + "learning_rate": 4.620120240391065e-05, + "loss": 0.4947, + "step": 27 + }, + { + "epoch": 0.2814070351758794, + "grad_norm": 2.0519561767578125, + "learning_rate": 4.572593931387604e-05, + "loss": 0.6591, + "step": 28 + }, + { + "epoch": 0.2914572864321608, + "grad_norm": 1.9339925050735474, + "learning_rate": 4.522542485937369e-05, + "loss": 0.5761, + "step": 29 + }, + { + "epoch": 0.3015075376884422, + "grad_norm": 0.7800000905990601, + "learning_rate": 4.4700268840168045e-05, + "loss": 0.3128, + "step": 30 + }, + { + "epoch": 0.31155778894472363, + "grad_norm": 2.304705858230591, + "learning_rate": 4.415111107797445e-05, + "loss": 0.7255, + "step": 31 + }, + { + "epoch": 0.32160804020100503, + "grad_norm": 2.59503436088562, + "learning_rate": 4.357862063693486e-05, + "loss": 0.4959, + "step": 32 + }, + { + "epoch": 0.3316582914572864, + "grad_norm": 2.7538652420043945, + "learning_rate": 4.2983495008466276e-05, + "loss": 1.1096, + "step": 33 + }, + { + "epoch": 0.3417085427135678, + "grad_norm": 2.156506299972534, + "learning_rate": 4.2366459261474933e-05, + "loss": 0.5025, + "step": 34 + }, + { + "epoch": 0.35175879396984927, + "grad_norm": 1.8370133638381958, + "learning_rate": 4.172826515897146e-05, + "loss": 0.6174, + "step": 35 + }, + { + "epoch": 0.36180904522613067, + "grad_norm": 1.5026291608810425, + "learning_rate": 4.1069690242163484e-05, + "loss": 0.7019, + "step": 36 + }, + { + "epoch": 0.37185929648241206, + "grad_norm": 0.9972341656684875, + "learning_rate": 4.039153688314145e-05, + "loss": 0.3783, + "step": 37 + }, + { + "epoch": 0.38190954773869346, + "grad_norm": 2.10705828666687, + "learning_rate": 3.969463130731183e-05, + "loss": 0.4002, + "step": 38 + }, + { + "epoch": 0.39195979899497485, + "grad_norm": 1.508216142654419, + "learning_rate": 3.897982258676867e-05, + "loss": 0.7637, + "step": 39 + }, + { + "epoch": 0.4020100502512563, + "grad_norm": 1.5484806299209595, + "learning_rate": 3.824798160583012e-05, + "loss": 0.6095, + "step": 40 + }, + { + "epoch": 0.4120603015075377, + "grad_norm": 1.2863812446594238, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.331, + "step": 41 + }, + { + "epoch": 0.4221105527638191, + "grad_norm": 1.5361781120300293, + "learning_rate": 3.673678906964727e-05, + "loss": 0.717, + "step": 42 + }, + { + "epoch": 0.4321608040201005, + "grad_norm": 1.3632347583770752, + "learning_rate": 3.5959278669726935e-05, + "loss": 0.5195, + "step": 43 + }, + { + "epoch": 0.44221105527638194, + "grad_norm": 1.6475563049316406, + "learning_rate": 3.516841607689501e-05, + "loss": 0.5608, + "step": 44 + }, + { + "epoch": 0.45226130653266333, + "grad_norm": 2.4081761837005615, + "learning_rate": 3.436516483539781e-05, + "loss": 0.6161, + "step": 45 + }, + { + "epoch": 0.4623115577889447, + "grad_norm": 1.1614190340042114, + "learning_rate": 3.355050358314172e-05, + "loss": 0.5096, + "step": 46 + }, + { + "epoch": 0.4723618090452261, + "grad_norm": 1.1495153903961182, + "learning_rate": 3.272542485937369e-05, + "loss": 0.4686, + "step": 47 + }, + { + "epoch": 0.4824120603015075, + "grad_norm": 0.9374528527259827, + "learning_rate": 3.1890933895424976e-05, + "loss": 0.4752, + "step": 48 + }, + { + "epoch": 0.49246231155778897, + "grad_norm": 0.811876118183136, + "learning_rate": 3.104804738999169e-05, + "loss": 0.3812, + "step": 49 + }, + { + "epoch": 0.5025125628140703, + "grad_norm": 1.067453145980835, + "learning_rate": 3.0197792270443982e-05, + "loss": 0.4631, + "step": 50 + }, + { + "epoch": 0.5125628140703518, + "grad_norm": 0.957179069519043, + "learning_rate": 2.9341204441673266e-05, + "loss": 0.4275, + "step": 51 + }, + { + "epoch": 0.5226130653266332, + "grad_norm": 1.188721776008606, + "learning_rate": 2.8479327524001636e-05, + "loss": 0.5242, + "step": 52 + }, + { + "epoch": 0.5326633165829145, + "grad_norm": 3.2536540031433105, + "learning_rate": 2.761321158169134e-05, + "loss": 1.0363, + "step": 53 + }, + { + "epoch": 0.542713567839196, + "grad_norm": 1.041953682899475, + "learning_rate": 2.674391184360313e-05, + "loss": 0.3354, + "step": 54 + }, + { + "epoch": 0.5527638190954773, + "grad_norm": 1.2986676692962646, + "learning_rate": 2.587248741756253e-05, + "loss": 0.2586, + "step": 55 + }, + { + "epoch": 0.5628140703517588, + "grad_norm": 0.7783204913139343, + "learning_rate": 2.5e-05, + "loss": 0.2513, + "step": 56 + }, + { + "epoch": 0.5728643216080402, + "grad_norm": 1.6265126466751099, + "learning_rate": 2.4127512582437485e-05, + "loss": 0.1845, + "step": 57 + }, + { + "epoch": 0.5829145728643216, + "grad_norm": 2.727491855621338, + "learning_rate": 2.3256088156396868e-05, + "loss": 0.661, + "step": 58 + }, + { + "epoch": 0.592964824120603, + "grad_norm": 2.6408135890960693, + "learning_rate": 2.238678841830867e-05, + "loss": 0.6877, + "step": 59 + }, + { + "epoch": 0.6030150753768844, + "grad_norm": 0.29646119475364685, + "learning_rate": 2.1520672475998373e-05, + "loss": 0.073, + "step": 60 + }, + { + "epoch": 0.6130653266331658, + "grad_norm": 1.218673825263977, + "learning_rate": 2.0658795558326743e-05, + "loss": 0.4253, + "step": 61 + }, + { + "epoch": 0.6231155778894473, + "grad_norm": 1.2460541725158691, + "learning_rate": 1.980220772955602e-05, + "loss": 0.3749, + "step": 62 + }, + { + "epoch": 0.6331658291457286, + "grad_norm": 1.5119431018829346, + "learning_rate": 1.895195261000831e-05, + "loss": 0.4314, + "step": 63 + }, + { + "epoch": 0.6432160804020101, + "grad_norm": 1.4413162469863892, + "learning_rate": 1.8109066104575023e-05, + "loss": 0.3532, + "step": 64 + }, + { + "epoch": 0.6532663316582915, + "grad_norm": 1.122306227684021, + "learning_rate": 1.7274575140626318e-05, + "loss": 0.3553, + "step": 65 + }, + { + "epoch": 0.6633165829145728, + "grad_norm": 2.0227315425872803, + "learning_rate": 1.6449496416858284e-05, + "loss": 0.4409, + "step": 66 + }, + { + "epoch": 0.6733668341708543, + "grad_norm": 2.7001399993896484, + "learning_rate": 1.56348351646022e-05, + "loss": 0.9403, + "step": 67 + }, + { + "epoch": 0.6834170854271356, + "grad_norm": 2.5412521362304688, + "learning_rate": 1.4831583923104999e-05, + "loss": 0.6257, + "step": 68 + }, + { + "epoch": 0.6934673366834171, + "grad_norm": 2.520745038986206, + "learning_rate": 1.4040721330273062e-05, + "loss": 0.4578, + "step": 69 + }, + { + "epoch": 0.7035175879396985, + "grad_norm": 1.906459927558899, + "learning_rate": 1.3263210930352737e-05, + "loss": 0.6205, + "step": 70 + }, + { + "epoch": 0.7135678391959799, + "grad_norm": 1.9079599380493164, + "learning_rate": 1.2500000000000006e-05, + "loss": 0.364, + "step": 71 + }, + { + "epoch": 0.7236180904522613, + "grad_norm": 1.4104374647140503, + "learning_rate": 1.175201839416988e-05, + "loss": 0.4687, + "step": 72 + }, + { + "epoch": 0.7336683417085427, + "grad_norm": 2.257812738418579, + "learning_rate": 1.1020177413231334e-05, + "loss": 0.7489, + "step": 73 + }, + { + "epoch": 0.7437185929648241, + "grad_norm": 1.5222328901290894, + "learning_rate": 1.0305368692688174e-05, + "loss": 0.4103, + "step": 74 + }, + { + "epoch": 0.7537688442211056, + "grad_norm": 1.8094953298568726, + "learning_rate": 9.608463116858542e-06, + "loss": 0.341, + "step": 75 + }, + { + "epoch": 0.7638190954773869, + "grad_norm": 2.328770399093628, + "learning_rate": 8.930309757836517e-06, + "loss": 0.4114, + "step": 76 + }, + { + "epoch": 0.7738693467336684, + "grad_norm": 1.129135012626648, + "learning_rate": 8.271734841028553e-06, + "loss": 0.339, + "step": 77 + }, + { + "epoch": 0.7839195979899497, + "grad_norm": 0.979975163936615, + "learning_rate": 7.633540738525066e-06, + "loss": 0.3299, + "step": 78 + }, + { + "epoch": 0.7939698492462312, + "grad_norm": 2.6319901943206787, + "learning_rate": 7.016504991533726e-06, + "loss": 0.7442, + "step": 79 + }, + { + "epoch": 0.8040201005025126, + "grad_norm": 1.877888560295105, + "learning_rate": 6.421379363065142e-06, + "loss": 0.3072, + "step": 80 + }, + { + "epoch": 0.8140703517587939, + "grad_norm": 1.062574863433838, + "learning_rate": 5.848888922025553e-06, + "loss": 0.2217, + "step": 81 + }, + { + "epoch": 0.8241206030150754, + "grad_norm": 2.3768932819366455, + "learning_rate": 5.299731159831953e-06, + "loss": 0.4072, + "step": 82 + }, + { + "epoch": 0.8341708542713567, + "grad_norm": 1.9588872194290161, + "learning_rate": 4.7745751406263165e-06, + "loss": 0.4908, + "step": 83 + }, + { + "epoch": 0.8442211055276382, + "grad_norm": 0.6415271759033203, + "learning_rate": 4.274060686123959e-06, + "loss": 0.1118, + "step": 84 + }, + { + "epoch": 0.8542713567839196, + "grad_norm": 1.4708070755004883, + "learning_rate": 3.798797596089351e-06, + "loss": 0.3945, + "step": 85 + }, + { + "epoch": 0.864321608040201, + "grad_norm": 1.495408058166504, + "learning_rate": 3.3493649053890326e-06, + "loss": 0.2519, + "step": 86 + }, + { + "epoch": 0.8743718592964824, + "grad_norm": 1.9014512300491333, + "learning_rate": 2.9263101785268254e-06, + "loss": 0.3233, + "step": 87 + }, + { + "epoch": 0.8844221105527639, + "grad_norm": 2.157270908355713, + "learning_rate": 2.5301488425208296e-06, + "loss": 0.4891, + "step": 88 + }, + { + "epoch": 0.8944723618090452, + "grad_norm": 1.6721971035003662, + "learning_rate": 2.1613635589349756e-06, + "loss": 0.2344, + "step": 89 + }, + { + "epoch": 0.9045226130653267, + "grad_norm": 1.1136136054992676, + "learning_rate": 1.8204036358303173e-06, + "loss": 0.4952, + "step": 90 + }, + { + "epoch": 0.914572864321608, + "grad_norm": 2.6617023944854736, + "learning_rate": 1.5076844803522922e-06, + "loss": 0.4057, + "step": 91 + }, + { + "epoch": 0.9246231155778895, + "grad_norm": 1.8409501314163208, + "learning_rate": 1.2235870926211619e-06, + "loss": 0.4969, + "step": 92 + }, + { + "epoch": 0.9346733668341709, + "grad_norm": 1.0654531717300415, + "learning_rate": 9.684576015420278e-07, + "loss": 0.1584, + "step": 93 + }, + { + "epoch": 0.9447236180904522, + "grad_norm": 1.7827986478805542, + "learning_rate": 7.426068431000882e-07, + "loss": 0.4053, + "step": 94 + }, + { + "epoch": 0.9547738693467337, + "grad_norm": 1.0557734966278076, + "learning_rate": 5.463099816548579e-07, + "loss": 0.1529, + "step": 95 + }, + { + "epoch": 0.964824120603015, + "grad_norm": 1.926171898841858, + "learning_rate": 3.7980617469479953e-07, + "loss": 0.7067, + "step": 96 + }, + { + "epoch": 0.9748743718592965, + "grad_norm": 2.5966174602508545, + "learning_rate": 2.4329828146074095e-07, + "loss": 0.4362, + "step": 97 + }, + { + "epoch": 0.9849246231155779, + "grad_norm": 2.601397752761841, + "learning_rate": 1.3695261579316777e-07, + "loss": 0.8023, + "step": 98 + }, + { + "epoch": 0.9949748743718593, + "grad_norm": 1.530727744102478, + "learning_rate": 6.089874350439506e-08, + "loss": 0.5146, + "step": 99 + } + ], + "logging_steps": 1, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 33, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6.832806128359834e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/exp1_signal_c_100/checkpoint-99/training_args.bin b/exp1_signal_c_100/checkpoint-99/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..519a036c4c4f776dd18d9d414e11851c970fc417 --- /dev/null +++ b/exp1_signal_c_100/checkpoint-99/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:572cbdd13202b1ec51905f252af50c916a6de05c78548327ae765c00ceb7d3ab +size 6033 diff --git a/exp1_signal_c_100/final_model/README.md b/exp1_signal_c_100/final_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp1_signal_c_100/final_model/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp1_signal_c_100/final_model/adapter_config.json b/exp1_signal_c_100/final_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..05fb68ca4a7e988cacbcf078beb813a265eb0d53 --- /dev/null +++ b/exp1_signal_c_100/final_model/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "o_proj", + "q_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp1_signal_c_100/final_model/adapter_model.safetensors b/exp1_signal_c_100/final_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4a1a9c7655812eeab9833db157302c516b781527 --- /dev/null +++ b/exp1_signal_c_100/final_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d06d5d1fb081e5847792d8a5a3630488d31de5ea5b5f3835e79c4ec1c3f5e65 +size 201378736 diff --git a/exp1_signal_c_100/final_model/training_args.bin b/exp1_signal_c_100/final_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..519a036c4c4f776dd18d9d414e11851c970fc417 --- /dev/null +++ b/exp1_signal_c_100/final_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:572cbdd13202b1ec51905f252af50c916a6de05c78548327ae765c00ceb7d3ab +size 6033 diff --git a/exp1_signal_c_50/checkpoint-100/README.md b/exp1_signal_c_50/checkpoint-100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp1_signal_c_50/checkpoint-100/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp1_signal_c_50/checkpoint-100/adapter_config.json b/exp1_signal_c_50/checkpoint-100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5c80fe69c40e8dc39868e47c17dcdcc979d9b370 --- /dev/null +++ b/exp1_signal_c_50/checkpoint-100/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp1_signal_c_50/checkpoint-100/adapter_model.safetensors b/exp1_signal_c_50/checkpoint-100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..767a5bf5fbdc3dea2b2cbdbf73bec5bc2b6670ae --- /dev/null +++ b/exp1_signal_c_50/checkpoint-100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84aee8a1799b52d3efe40d4399fd4c6119718a6a5b2ea18286c2eee29fdd270a +size 201378736 diff --git a/exp1_signal_c_50/checkpoint-100/optimizer.pt b/exp1_signal_c_50/checkpoint-100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7f7b917726add1da646dcd24b365cfda5dabceec --- /dev/null +++ b/exp1_signal_c_50/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24c0eff9b80db5a473f6142056894c4e196e06d3377778dd48183385d52d58e2 +size 402982627 diff --git a/exp1_signal_c_50/checkpoint-100/rng_state.pth b/exp1_signal_c_50/checkpoint-100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b758229946bfed21fe4a66514628225449494569 --- /dev/null +++ b/exp1_signal_c_50/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7241d54608587827c7b4e27d6e8c6cfa02a7d1188be62c98c295b9bb48b181d8 +size 14645 diff --git a/exp1_signal_c_50/checkpoint-100/scheduler.pt b/exp1_signal_c_50/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8fdcc1fbec5336295a8ad32e6fb37c9646d5a557 --- /dev/null +++ b/exp1_signal_c_50/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c4e44404b58ce3af1b46c3d4a85a59edbbc386f340c476e894715a1199e1aed +size 1465 diff --git a/exp1_signal_c_50/checkpoint-100/trainer_state.json b/exp1_signal_c_50/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..578a1c5912265238cd7c69c245f5bd175b650426 --- /dev/null +++ b/exp1_signal_c_50/checkpoint-100/trainer_state.json @@ -0,0 +1,742 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 100, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010050251256281407, + "grad_norm": 2.090841770172119, + "learning_rate": 0.0, + "loss": 0.6877, + "step": 1 + }, + { + "epoch": 0.020100502512562814, + "grad_norm": 1.6500182151794434, + "learning_rate": 5e-06, + "loss": 0.5553, + "step": 2 + }, + { + "epoch": 0.03015075376884422, + "grad_norm": 2.8341240882873535, + "learning_rate": 1e-05, + "loss": 0.7649, + "step": 3 + }, + { + "epoch": 0.04020100502512563, + "grad_norm": 1.3663409948349, + "learning_rate": 1.5e-05, + "loss": 0.4717, + "step": 4 + }, + { + "epoch": 0.05025125628140704, + "grad_norm": 1.2471226453781128, + "learning_rate": 2e-05, + "loss": 0.4895, + "step": 5 + }, + { + "epoch": 0.06030150753768844, + "grad_norm": 1.8957748413085938, + "learning_rate": 2.5e-05, + "loss": 0.8027, + "step": 6 + }, + { + "epoch": 0.07035175879396985, + "grad_norm": 0.9421080350875854, + "learning_rate": 3e-05, + "loss": 0.2445, + "step": 7 + }, + { + "epoch": 0.08040201005025126, + "grad_norm": 1.3940750360488892, + "learning_rate": 3.5e-05, + "loss": 0.6727, + "step": 8 + }, + { + "epoch": 0.09045226130653267, + "grad_norm": 2.3070380687713623, + "learning_rate": 4e-05, + "loss": 0.8506, + "step": 9 + }, + { + "epoch": 0.10050251256281408, + "grad_norm": 1.6834089756011963, + "learning_rate": 4.5e-05, + "loss": 0.5343, + "step": 10 + }, + { + "epoch": 0.11055276381909548, + "grad_norm": 0.8352651596069336, + "learning_rate": 5e-05, + "loss": 0.3959, + "step": 11 + }, + { + "epoch": 0.12060301507537688, + "grad_norm": 3.2252678871154785, + "learning_rate": 4.99847706754774e-05, + "loss": 0.5884, + "step": 12 + }, + { + "epoch": 0.1306532663316583, + "grad_norm": 2.3922126293182373, + "learning_rate": 4.993910125649561e-05, + "loss": 0.5928, + "step": 13 + }, + { + "epoch": 0.1407035175879397, + "grad_norm": 1.2292028665542603, + "learning_rate": 4.9863047384206835e-05, + "loss": 0.5248, + "step": 14 + }, + { + "epoch": 0.1507537688442211, + "grad_norm": 1.8716892004013062, + "learning_rate": 4.975670171853926e-05, + "loss": 0.7384, + "step": 15 + }, + { + "epoch": 0.16080402010050251, + "grad_norm": 4.264588832855225, + "learning_rate": 4.962019382530521e-05, + "loss": 0.9005, + "step": 16 + }, + { + "epoch": 0.1708542713567839, + "grad_norm": 1.8790229558944702, + "learning_rate": 4.9453690018345144e-05, + "loss": 0.5212, + "step": 17 + }, + { + "epoch": 0.18090452261306533, + "grad_norm": 1.6464147567749023, + "learning_rate": 4.925739315689991e-05, + "loss": 0.4105, + "step": 18 + }, + { + "epoch": 0.19095477386934673, + "grad_norm": 1.471611499786377, + "learning_rate": 4.9031542398457974e-05, + "loss": 0.8898, + "step": 19 + }, + { + "epoch": 0.20100502512562815, + "grad_norm": 1.6528465747833252, + "learning_rate": 4.877641290737884e-05, + "loss": 0.8817, + "step": 20 + }, + { + "epoch": 0.21105527638190955, + "grad_norm": 3.6064441204071045, + "learning_rate": 4.849231551964771e-05, + "loss": 1.1784, + "step": 21 + }, + { + "epoch": 0.22110552763819097, + "grad_norm": 1.2640239000320435, + "learning_rate": 4.817959636416969e-05, + "loss": 0.4255, + "step": 22 + }, + { + "epoch": 0.23115577889447236, + "grad_norm": 1.3834407329559326, + "learning_rate": 4.783863644106502e-05, + "loss": 1.0316, + "step": 23 + }, + { + "epoch": 0.24120603015075376, + "grad_norm": 1.2265491485595703, + "learning_rate": 4.7469851157479177e-05, + "loss": 0.3261, + "step": 24 + }, + { + "epoch": 0.25125628140703515, + "grad_norm": 1.5698051452636719, + "learning_rate": 4.707368982147318e-05, + "loss": 0.6068, + "step": 25 + }, + { + "epoch": 0.2613065326633166, + "grad_norm": 1.315598487854004, + "learning_rate": 4.665063509461097e-05, + "loss": 0.3593, + "step": 26 + }, + { + "epoch": 0.271356783919598, + "grad_norm": 0.7387354969978333, + "learning_rate": 4.620120240391065e-05, + "loss": 0.2985, + "step": 27 + }, + { + "epoch": 0.2814070351758794, + "grad_norm": 1.4930191040039062, + "learning_rate": 4.572593931387604e-05, + "loss": 0.6862, + "step": 28 + }, + { + "epoch": 0.2914572864321608, + "grad_norm": 1.1811872720718384, + "learning_rate": 4.522542485937369e-05, + "loss": 0.5399, + "step": 29 + }, + { + "epoch": 0.3015075376884422, + "grad_norm": 1.3626222610473633, + "learning_rate": 4.4700268840168045e-05, + "loss": 0.4468, + "step": 30 + }, + { + "epoch": 0.31155778894472363, + "grad_norm": 1.855308175086975, + "learning_rate": 4.415111107797445e-05, + "loss": 0.5117, + "step": 31 + }, + { + "epoch": 0.32160804020100503, + "grad_norm": 1.2538566589355469, + "learning_rate": 4.357862063693486e-05, + "loss": 0.4231, + "step": 32 + }, + { + "epoch": 0.3316582914572864, + "grad_norm": 1.4809361696243286, + "learning_rate": 4.2983495008466276e-05, + "loss": 0.7577, + "step": 33 + }, + { + "epoch": 0.3417085427135678, + "grad_norm": 1.7620362043380737, + "learning_rate": 4.2366459261474933e-05, + "loss": 0.7635, + "step": 34 + }, + { + "epoch": 0.35175879396984927, + "grad_norm": 1.337045669555664, + "learning_rate": 4.172826515897146e-05, + "loss": 0.514, + "step": 35 + }, + { + "epoch": 0.36180904522613067, + "grad_norm": 1.4965589046478271, + "learning_rate": 4.1069690242163484e-05, + "loss": 0.5781, + "step": 36 + }, + { + "epoch": 0.37185929648241206, + "grad_norm": 1.4205466508865356, + "learning_rate": 4.039153688314145e-05, + "loss": 0.2826, + "step": 37 + }, + { + "epoch": 0.38190954773869346, + "grad_norm": 1.036288857460022, + "learning_rate": 3.969463130731183e-05, + "loss": 0.3847, + "step": 38 + }, + { + "epoch": 0.39195979899497485, + "grad_norm": 1.3840105533599854, + "learning_rate": 3.897982258676867e-05, + "loss": 0.5401, + "step": 39 + }, + { + "epoch": 0.4020100502512563, + "grad_norm": 4.637338161468506, + "learning_rate": 3.824798160583012e-05, + "loss": 1.2988, + "step": 40 + }, + { + "epoch": 0.4120603015075377, + "grad_norm": 1.0604753494262695, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.2507, + "step": 41 + }, + { + "epoch": 0.4221105527638191, + "grad_norm": 1.5602868795394897, + "learning_rate": 3.673678906964727e-05, + "loss": 0.3806, + "step": 42 + }, + { + "epoch": 0.4321608040201005, + "grad_norm": 0.8546467423439026, + "learning_rate": 3.5959278669726935e-05, + "loss": 0.3237, + "step": 43 + }, + { + "epoch": 0.44221105527638194, + "grad_norm": 0.6842706799507141, + "learning_rate": 3.516841607689501e-05, + "loss": 0.1639, + "step": 44 + }, + { + "epoch": 0.45226130653266333, + "grad_norm": 1.4356191158294678, + "learning_rate": 3.436516483539781e-05, + "loss": 0.1976, + "step": 45 + }, + { + "epoch": 0.4623115577889447, + "grad_norm": 0.9837068915367126, + "learning_rate": 3.355050358314172e-05, + "loss": 0.1973, + "step": 46 + }, + { + "epoch": 0.4723618090452261, + "grad_norm": 1.9256995916366577, + "learning_rate": 3.272542485937369e-05, + "loss": 0.6982, + "step": 47 + }, + { + "epoch": 0.4824120603015075, + "grad_norm": 1.3878264427185059, + "learning_rate": 3.1890933895424976e-05, + "loss": 0.6256, + "step": 48 + }, + { + "epoch": 0.49246231155778897, + "grad_norm": 2.2053303718566895, + "learning_rate": 3.104804738999169e-05, + "loss": 0.5007, + "step": 49 + }, + { + "epoch": 0.5025125628140703, + "grad_norm": 3.3784847259521484, + "learning_rate": 3.0197792270443982e-05, + "loss": 0.8355, + "step": 50 + }, + { + "epoch": 0.5125628140703518, + "grad_norm": 3.213935136795044, + "learning_rate": 2.9341204441673266e-05, + "loss": 0.7751, + "step": 51 + }, + { + "epoch": 0.5226130653266332, + "grad_norm": 1.763657808303833, + "learning_rate": 2.8479327524001636e-05, + "loss": 0.3691, + "step": 52 + }, + { + "epoch": 0.5326633165829145, + "grad_norm": 1.7099454402923584, + "learning_rate": 2.761321158169134e-05, + "loss": 0.3867, + "step": 53 + }, + { + "epoch": 0.542713567839196, + "grad_norm": 2.456601619720459, + "learning_rate": 2.674391184360313e-05, + "loss": 0.8096, + "step": 54 + }, + { + "epoch": 0.5527638190954773, + "grad_norm": 3.4257242679595947, + "learning_rate": 2.587248741756253e-05, + "loss": 0.4707, + "step": 55 + }, + { + "epoch": 0.5628140703517588, + "grad_norm": 3.3254268169403076, + "learning_rate": 2.5e-05, + "loss": 0.4247, + "step": 56 + }, + { + "epoch": 0.5728643216080402, + "grad_norm": 4.219179153442383, + "learning_rate": 2.4127512582437485e-05, + "loss": 1.1432, + "step": 57 + }, + { + "epoch": 0.5829145728643216, + "grad_norm": 1.8128859996795654, + "learning_rate": 2.3256088156396868e-05, + "loss": 0.5847, + "step": 58 + }, + { + "epoch": 0.592964824120603, + "grad_norm": 3.432041883468628, + "learning_rate": 2.238678841830867e-05, + "loss": 0.5025, + "step": 59 + }, + { + "epoch": 0.6030150753768844, + "grad_norm": 1.6512501239776611, + "learning_rate": 2.1520672475998373e-05, + "loss": 0.6528, + "step": 60 + }, + { + "epoch": 0.6130653266331658, + "grad_norm": 2.509225845336914, + "learning_rate": 2.0658795558326743e-05, + "loss": 0.7458, + "step": 61 + }, + { + "epoch": 0.6231155778894473, + "grad_norm": 3.3945913314819336, + "learning_rate": 1.980220772955602e-05, + "loss": 0.8483, + "step": 62 + }, + { + "epoch": 0.6331658291457286, + "grad_norm": 1.7071493864059448, + "learning_rate": 1.895195261000831e-05, + "loss": 0.5162, + "step": 63 + }, + { + "epoch": 0.6432160804020101, + "grad_norm": 1.902148723602295, + "learning_rate": 1.8109066104575023e-05, + "loss": 0.3798, + "step": 64 + }, + { + "epoch": 0.6532663316582915, + "grad_norm": 1.3700332641601562, + "learning_rate": 1.7274575140626318e-05, + "loss": 0.3926, + "step": 65 + }, + { + "epoch": 0.6633165829145728, + "grad_norm": 1.6878489255905151, + "learning_rate": 1.6449496416858284e-05, + "loss": 0.4083, + "step": 66 + }, + { + "epoch": 0.6733668341708543, + "grad_norm": 1.113712191581726, + "learning_rate": 1.56348351646022e-05, + "loss": 0.2043, + "step": 67 + }, + { + "epoch": 0.6834170854271356, + "grad_norm": 1.1055054664611816, + "learning_rate": 1.4831583923104999e-05, + "loss": 0.3076, + "step": 68 + }, + { + "epoch": 0.6934673366834171, + "grad_norm": 1.4783660173416138, + "learning_rate": 1.4040721330273062e-05, + "loss": 0.2804, + "step": 69 + }, + { + "epoch": 0.7035175879396985, + "grad_norm": 1.7568031549453735, + "learning_rate": 1.3263210930352737e-05, + "loss": 0.2255, + "step": 70 + }, + { + "epoch": 0.7135678391959799, + "grad_norm": 3.2712368965148926, + "learning_rate": 1.2500000000000006e-05, + "loss": 0.7584, + "step": 71 + }, + { + "epoch": 0.7236180904522613, + "grad_norm": 4.170383453369141, + "learning_rate": 1.175201839416988e-05, + "loss": 0.6353, + "step": 72 + }, + { + "epoch": 0.7336683417085427, + "grad_norm": 2.4070382118225098, + "learning_rate": 1.1020177413231334e-05, + "loss": 0.7063, + "step": 73 + }, + { + "epoch": 0.7437185929648241, + "grad_norm": 1.7356330156326294, + "learning_rate": 1.0305368692688174e-05, + "loss": 0.4544, + "step": 74 + }, + { + "epoch": 0.7537688442211056, + "grad_norm": 2.725081443786621, + "learning_rate": 9.608463116858542e-06, + "loss": 0.6067, + "step": 75 + }, + { + "epoch": 0.7638190954773869, + "grad_norm": 3.7814385890960693, + "learning_rate": 8.930309757836517e-06, + "loss": 0.6096, + "step": 76 + }, + { + "epoch": 0.7738693467336684, + "grad_norm": 1.4958455562591553, + "learning_rate": 8.271734841028553e-06, + "loss": 0.1921, + "step": 77 + }, + { + "epoch": 0.7839195979899497, + "grad_norm": 2.0017693042755127, + "learning_rate": 7.633540738525066e-06, + "loss": 0.4245, + "step": 78 + }, + { + "epoch": 0.7939698492462312, + "grad_norm": 2.2158942222595215, + "learning_rate": 7.016504991533726e-06, + "loss": 0.5462, + "step": 79 + }, + { + "epoch": 0.8040201005025126, + "grad_norm": 2.1772801876068115, + "learning_rate": 6.421379363065142e-06, + "loss": 0.5683, + "step": 80 + }, + { + "epoch": 0.8140703517587939, + "grad_norm": 2.4165947437286377, + "learning_rate": 5.848888922025553e-06, + "loss": 0.6179, + "step": 81 + }, + { + "epoch": 0.8241206030150754, + "grad_norm": 3.4924304485321045, + "learning_rate": 5.299731159831953e-06, + "loss": 0.5454, + "step": 82 + }, + { + "epoch": 0.8341708542713567, + "grad_norm": 1.286102056503296, + "learning_rate": 4.7745751406263165e-06, + "loss": 0.3365, + "step": 83 + }, + { + "epoch": 0.8442211055276382, + "grad_norm": 1.5090934038162231, + "learning_rate": 4.274060686123959e-06, + "loss": 0.2462, + "step": 84 + }, + { + "epoch": 0.8542713567839196, + "grad_norm": 1.7056686878204346, + "learning_rate": 3.798797596089351e-06, + "loss": 0.2526, + "step": 85 + }, + { + "epoch": 0.864321608040201, + "grad_norm": 1.5727635622024536, + "learning_rate": 3.3493649053890326e-06, + "loss": 0.3391, + "step": 86 + }, + { + "epoch": 0.8743718592964824, + "grad_norm": 2.05062198638916, + "learning_rate": 2.9263101785268254e-06, + "loss": 0.8664, + "step": 87 + }, + { + "epoch": 0.8844221105527639, + "grad_norm": 1.8346749544143677, + "learning_rate": 2.5301488425208296e-06, + "loss": 0.2371, + "step": 88 + }, + { + "epoch": 0.8944723618090452, + "grad_norm": 1.5837578773498535, + "learning_rate": 2.1613635589349756e-06, + "loss": 0.3714, + "step": 89 + }, + { + "epoch": 0.9045226130653267, + "grad_norm": 2.0031440258026123, + "learning_rate": 1.8204036358303173e-06, + "loss": 0.34, + "step": 90 + }, + { + "epoch": 0.914572864321608, + "grad_norm": 2.859940528869629, + "learning_rate": 1.5076844803522922e-06, + "loss": 0.4141, + "step": 91 + }, + { + "epoch": 0.9246231155778895, + "grad_norm": 1.9441653490066528, + "learning_rate": 1.2235870926211619e-06, + "loss": 0.2432, + "step": 92 + }, + { + "epoch": 0.9346733668341709, + "grad_norm": 3.415130138397217, + "learning_rate": 9.684576015420278e-07, + "loss": 0.6276, + "step": 93 + }, + { + "epoch": 0.9447236180904522, + "grad_norm": 1.692018985748291, + "learning_rate": 7.426068431000882e-07, + "loss": 0.2778, + "step": 94 + }, + { + "epoch": 0.9547738693467337, + "grad_norm": 3.519803285598755, + "learning_rate": 5.463099816548579e-07, + "loss": 0.3524, + "step": 95 + }, + { + "epoch": 0.964824120603015, + "grad_norm": 0.46640315651893616, + "learning_rate": 3.7980617469479953e-07, + "loss": 0.0778, + "step": 96 + }, + { + "epoch": 0.9748743718592965, + "grad_norm": 2.561051845550537, + "learning_rate": 2.4329828146074095e-07, + "loss": 0.535, + "step": 97 + }, + { + "epoch": 0.9849246231155779, + "grad_norm": 1.67559814453125, + "learning_rate": 1.3695261579316777e-07, + "loss": 0.2518, + "step": 98 + }, + { + "epoch": 0.9949748743718593, + "grad_norm": 1.0802916288375854, + "learning_rate": 6.089874350439506e-08, + "loss": 0.171, + "step": 99 + }, + { + "epoch": 1.0, + "grad_norm": 1.2519536018371582, + "learning_rate": 1.522932452260595e-08, + "loss": 0.1307, + "step": 100 + }, + { + "epoch": 1.0, + "eval_loss": 0.4638383984565735, + "eval_runtime": 85.6939, + "eval_samples_per_second": 3.501, + "eval_steps_per_second": 1.75, + "step": 100 + } + ], + "logging_steps": 1, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 33, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 6.850060689290035e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/exp1_signal_c_50/checkpoint-100/training_args.bin b/exp1_signal_c_50/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..37ba97d5032b379fc979c06f0dd49af445974b4d --- /dev/null +++ b/exp1_signal_c_50/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8f433c42e68cfba3960ffd965dce97c50ad18a8f4f749a9737ce8ee8df9b700 +size 6033 diff --git a/exp1_signal_c_50/checkpoint-33/README.md b/exp1_signal_c_50/checkpoint-33/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp1_signal_c_50/checkpoint-33/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp1_signal_c_50/checkpoint-33/adapter_config.json b/exp1_signal_c_50/checkpoint-33/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5c80fe69c40e8dc39868e47c17dcdcc979d9b370 --- /dev/null +++ b/exp1_signal_c_50/checkpoint-33/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp1_signal_c_50/checkpoint-33/adapter_model.safetensors b/exp1_signal_c_50/checkpoint-33/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e0ecf126599177762b3ec4a20eec80355456a5ec --- /dev/null +++ b/exp1_signal_c_50/checkpoint-33/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f102629307361c445fd00e23ee927481704c576c634b2dffdcf543d01caa315e +size 201378736 diff --git a/exp1_signal_c_50/checkpoint-33/optimizer.pt b/exp1_signal_c_50/checkpoint-33/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a92e844ac72b3fb99d5b442f552c9f33b39056f --- /dev/null +++ b/exp1_signal_c_50/checkpoint-33/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b52a69bdbb9a23100ca3cba617cc07365c4f4a9368796006c418ae6a9b8b9d26 +size 402982627 diff --git a/exp1_signal_c_50/checkpoint-33/rng_state.pth b/exp1_signal_c_50/checkpoint-33/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..fd650bca78c013ce43187807570db259be238162 --- /dev/null +++ b/exp1_signal_c_50/checkpoint-33/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4370640aed47e1cfe501e222405e6de0a6701dc2554814f30a0ec4610f4a16b +size 14645 diff --git a/exp1_signal_c_50/checkpoint-33/scheduler.pt b/exp1_signal_c_50/checkpoint-33/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f3b688fff35c94c1b3f7bf1c91548cb8ab1f20a1 --- /dev/null +++ b/exp1_signal_c_50/checkpoint-33/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b360cc37b95be8398ac4635eb61f23915865018f0c39c4789e480a08ead764a +size 1465 diff --git a/exp1_signal_c_50/checkpoint-33/trainer_state.json b/exp1_signal_c_50/checkpoint-33/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9c287abddd6d988031dad456a4eea643d27be67b --- /dev/null +++ b/exp1_signal_c_50/checkpoint-33/trainer_state.json @@ -0,0 +1,265 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.3316582914572864, + "eval_steps": 100, + "global_step": 33, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010050251256281407, + "grad_norm": 2.090841770172119, + "learning_rate": 0.0, + "loss": 0.6877, + "step": 1 + }, + { + "epoch": 0.020100502512562814, + "grad_norm": 1.6500182151794434, + "learning_rate": 5e-06, + "loss": 0.5553, + "step": 2 + }, + { + "epoch": 0.03015075376884422, + "grad_norm": 2.8341240882873535, + "learning_rate": 1e-05, + "loss": 0.7649, + "step": 3 + }, + { + "epoch": 0.04020100502512563, + "grad_norm": 1.3663409948349, + "learning_rate": 1.5e-05, + "loss": 0.4717, + "step": 4 + }, + { + "epoch": 0.05025125628140704, + "grad_norm": 1.2471226453781128, + "learning_rate": 2e-05, + "loss": 0.4895, + "step": 5 + }, + { + "epoch": 0.06030150753768844, + "grad_norm": 1.8957748413085938, + "learning_rate": 2.5e-05, + "loss": 0.8027, + "step": 6 + }, + { + "epoch": 0.07035175879396985, + "grad_norm": 0.9421080350875854, + "learning_rate": 3e-05, + "loss": 0.2445, + "step": 7 + }, + { + "epoch": 0.08040201005025126, + "grad_norm": 1.3940750360488892, + "learning_rate": 3.5e-05, + "loss": 0.6727, + "step": 8 + }, + { + "epoch": 0.09045226130653267, + "grad_norm": 2.3070380687713623, + "learning_rate": 4e-05, + "loss": 0.8506, + "step": 9 + }, + { + "epoch": 0.10050251256281408, + "grad_norm": 1.6834089756011963, + "learning_rate": 4.5e-05, + "loss": 0.5343, + "step": 10 + }, + { + "epoch": 0.11055276381909548, + "grad_norm": 0.8352651596069336, + "learning_rate": 5e-05, + "loss": 0.3959, + "step": 11 + }, + { + "epoch": 0.12060301507537688, + "grad_norm": 3.2252678871154785, + "learning_rate": 4.99847706754774e-05, + "loss": 0.5884, + "step": 12 + }, + { + "epoch": 0.1306532663316583, + "grad_norm": 2.3922126293182373, + "learning_rate": 4.993910125649561e-05, + "loss": 0.5928, + "step": 13 + }, + { + "epoch": 0.1407035175879397, + "grad_norm": 1.2292028665542603, + "learning_rate": 4.9863047384206835e-05, + "loss": 0.5248, + "step": 14 + }, + { + "epoch": 0.1507537688442211, + "grad_norm": 1.8716892004013062, + "learning_rate": 4.975670171853926e-05, + "loss": 0.7384, + "step": 15 + }, + { + "epoch": 0.16080402010050251, + "grad_norm": 4.264588832855225, + "learning_rate": 4.962019382530521e-05, + "loss": 0.9005, + "step": 16 + }, + { + "epoch": 0.1708542713567839, + "grad_norm": 1.8790229558944702, + "learning_rate": 4.9453690018345144e-05, + "loss": 0.5212, + "step": 17 + }, + { + "epoch": 0.18090452261306533, + "grad_norm": 1.6464147567749023, + "learning_rate": 4.925739315689991e-05, + "loss": 0.4105, + "step": 18 + }, + { + "epoch": 0.19095477386934673, + "grad_norm": 1.471611499786377, + "learning_rate": 4.9031542398457974e-05, + "loss": 0.8898, + "step": 19 + }, + { + "epoch": 0.20100502512562815, + "grad_norm": 1.6528465747833252, + "learning_rate": 4.877641290737884e-05, + "loss": 0.8817, + "step": 20 + }, + { + "epoch": 0.21105527638190955, + "grad_norm": 3.6064441204071045, + "learning_rate": 4.849231551964771e-05, + "loss": 1.1784, + "step": 21 + }, + { + "epoch": 0.22110552763819097, + "grad_norm": 1.2640239000320435, + "learning_rate": 4.817959636416969e-05, + "loss": 0.4255, + "step": 22 + }, + { + "epoch": 0.23115577889447236, + "grad_norm": 1.3834407329559326, + "learning_rate": 4.783863644106502e-05, + "loss": 1.0316, + "step": 23 + }, + { + "epoch": 0.24120603015075376, + "grad_norm": 1.2265491485595703, + "learning_rate": 4.7469851157479177e-05, + "loss": 0.3261, + "step": 24 + }, + { + "epoch": 0.25125628140703515, + "grad_norm": 1.5698051452636719, + "learning_rate": 4.707368982147318e-05, + "loss": 0.6068, + "step": 25 + }, + { + "epoch": 0.2613065326633166, + "grad_norm": 1.315598487854004, + "learning_rate": 4.665063509461097e-05, + "loss": 0.3593, + "step": 26 + }, + { + "epoch": 0.271356783919598, + "grad_norm": 0.7387354969978333, + "learning_rate": 4.620120240391065e-05, + "loss": 0.2985, + "step": 27 + }, + { + "epoch": 0.2814070351758794, + "grad_norm": 1.4930191040039062, + "learning_rate": 4.572593931387604e-05, + "loss": 0.6862, + "step": 28 + }, + { + "epoch": 0.2914572864321608, + "grad_norm": 1.1811872720718384, + "learning_rate": 4.522542485937369e-05, + "loss": 0.5399, + "step": 29 + }, + { + "epoch": 0.3015075376884422, + "grad_norm": 1.3626222610473633, + "learning_rate": 4.4700268840168045e-05, + "loss": 0.4468, + "step": 30 + }, + { + "epoch": 0.31155778894472363, + "grad_norm": 1.855308175086975, + "learning_rate": 4.415111107797445e-05, + "loss": 0.5117, + "step": 31 + }, + { + "epoch": 0.32160804020100503, + "grad_norm": 1.2538566589355469, + "learning_rate": 4.357862063693486e-05, + "loss": 0.4231, + "step": 32 + }, + { + "epoch": 0.3316582914572864, + "grad_norm": 1.4809361696243286, + "learning_rate": 4.2983495008466276e-05, + "loss": 0.7577, + "step": 33 + } + ], + "logging_steps": 1, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 33, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.277602042786611e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/exp1_signal_c_50/checkpoint-33/training_args.bin b/exp1_signal_c_50/checkpoint-33/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..37ba97d5032b379fc979c06f0dd49af445974b4d --- /dev/null +++ b/exp1_signal_c_50/checkpoint-33/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8f433c42e68cfba3960ffd965dce97c50ad18a8f4f749a9737ce8ee8df9b700 +size 6033 diff --git a/exp1_signal_c_50/checkpoint-66/README.md b/exp1_signal_c_50/checkpoint-66/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp1_signal_c_50/checkpoint-66/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp1_signal_c_50/checkpoint-66/adapter_config.json b/exp1_signal_c_50/checkpoint-66/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5c80fe69c40e8dc39868e47c17dcdcc979d9b370 --- /dev/null +++ b/exp1_signal_c_50/checkpoint-66/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp1_signal_c_50/checkpoint-66/adapter_model.safetensors b/exp1_signal_c_50/checkpoint-66/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..06d17501514316e829c3ae3757a8ab5a99ae6dd5 --- /dev/null +++ b/exp1_signal_c_50/checkpoint-66/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d240f10897d2d0600f5bc47bbca8bcf69ec729509c0027392cc601cb7559271b +size 201378736 diff --git a/exp1_signal_c_50/checkpoint-66/optimizer.pt b/exp1_signal_c_50/checkpoint-66/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..745668517bc78c6dbc94732235cdd072b3166d24 --- /dev/null +++ b/exp1_signal_c_50/checkpoint-66/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5457b59b3ecd68c3642696205dad5c29cfb2e513a7bc8a6531034e658fc0865 +size 402982627 diff --git a/exp1_signal_c_50/checkpoint-66/rng_state.pth b/exp1_signal_c_50/checkpoint-66/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9acb6af318562be091fcb3203d32d6aa81ac4bf6 --- /dev/null +++ b/exp1_signal_c_50/checkpoint-66/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbe8399edd28ccffb95622add1833ade04c0b6c9ff41375a11d9923ffa06e322 +size 14645 diff --git a/exp1_signal_c_50/checkpoint-66/scheduler.pt b/exp1_signal_c_50/checkpoint-66/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a327542ffefb0cc080cbcc215ee0039848f86e29 --- /dev/null +++ b/exp1_signal_c_50/checkpoint-66/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2fd2e1422c94e485a1f77b0038f8f0bc12efa27617f0f3dbfaf4b9a3fd1fd59 +size 1465 diff --git a/exp1_signal_c_50/checkpoint-66/trainer_state.json b/exp1_signal_c_50/checkpoint-66/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3133276b8f7d728305a0394e599faa7a7bac4846 --- /dev/null +++ b/exp1_signal_c_50/checkpoint-66/trainer_state.json @@ -0,0 +1,496 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6633165829145728, + "eval_steps": 100, + "global_step": 66, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010050251256281407, + "grad_norm": 2.090841770172119, + "learning_rate": 0.0, + "loss": 0.6877, + "step": 1 + }, + { + "epoch": 0.020100502512562814, + "grad_norm": 1.6500182151794434, + "learning_rate": 5e-06, + "loss": 0.5553, + "step": 2 + }, + { + "epoch": 0.03015075376884422, + "grad_norm": 2.8341240882873535, + "learning_rate": 1e-05, + "loss": 0.7649, + "step": 3 + }, + { + "epoch": 0.04020100502512563, + "grad_norm": 1.3663409948349, + "learning_rate": 1.5e-05, + "loss": 0.4717, + "step": 4 + }, + { + "epoch": 0.05025125628140704, + "grad_norm": 1.2471226453781128, + "learning_rate": 2e-05, + "loss": 0.4895, + "step": 5 + }, + { + "epoch": 0.06030150753768844, + "grad_norm": 1.8957748413085938, + "learning_rate": 2.5e-05, + "loss": 0.8027, + "step": 6 + }, + { + "epoch": 0.07035175879396985, + "grad_norm": 0.9421080350875854, + "learning_rate": 3e-05, + "loss": 0.2445, + "step": 7 + }, + { + "epoch": 0.08040201005025126, + "grad_norm": 1.3940750360488892, + "learning_rate": 3.5e-05, + "loss": 0.6727, + "step": 8 + }, + { + "epoch": 0.09045226130653267, + "grad_norm": 2.3070380687713623, + "learning_rate": 4e-05, + "loss": 0.8506, + "step": 9 + }, + { + "epoch": 0.10050251256281408, + "grad_norm": 1.6834089756011963, + "learning_rate": 4.5e-05, + "loss": 0.5343, + "step": 10 + }, + { + "epoch": 0.11055276381909548, + "grad_norm": 0.8352651596069336, + "learning_rate": 5e-05, + "loss": 0.3959, + "step": 11 + }, + { + "epoch": 0.12060301507537688, + "grad_norm": 3.2252678871154785, + "learning_rate": 4.99847706754774e-05, + "loss": 0.5884, + "step": 12 + }, + { + "epoch": 0.1306532663316583, + "grad_norm": 2.3922126293182373, + "learning_rate": 4.993910125649561e-05, + "loss": 0.5928, + "step": 13 + }, + { + "epoch": 0.1407035175879397, + "grad_norm": 1.2292028665542603, + "learning_rate": 4.9863047384206835e-05, + "loss": 0.5248, + "step": 14 + }, + { + "epoch": 0.1507537688442211, + "grad_norm": 1.8716892004013062, + "learning_rate": 4.975670171853926e-05, + "loss": 0.7384, + "step": 15 + }, + { + "epoch": 0.16080402010050251, + "grad_norm": 4.264588832855225, + "learning_rate": 4.962019382530521e-05, + "loss": 0.9005, + "step": 16 + }, + { + "epoch": 0.1708542713567839, + "grad_norm": 1.8790229558944702, + "learning_rate": 4.9453690018345144e-05, + "loss": 0.5212, + "step": 17 + }, + { + "epoch": 0.18090452261306533, + "grad_norm": 1.6464147567749023, + "learning_rate": 4.925739315689991e-05, + "loss": 0.4105, + "step": 18 + }, + { + "epoch": 0.19095477386934673, + "grad_norm": 1.471611499786377, + "learning_rate": 4.9031542398457974e-05, + "loss": 0.8898, + "step": 19 + }, + { + "epoch": 0.20100502512562815, + "grad_norm": 1.6528465747833252, + "learning_rate": 4.877641290737884e-05, + "loss": 0.8817, + "step": 20 + }, + { + "epoch": 0.21105527638190955, + "grad_norm": 3.6064441204071045, + "learning_rate": 4.849231551964771e-05, + "loss": 1.1784, + "step": 21 + }, + { + "epoch": 0.22110552763819097, + "grad_norm": 1.2640239000320435, + "learning_rate": 4.817959636416969e-05, + "loss": 0.4255, + "step": 22 + }, + { + "epoch": 0.23115577889447236, + "grad_norm": 1.3834407329559326, + "learning_rate": 4.783863644106502e-05, + "loss": 1.0316, + "step": 23 + }, + { + "epoch": 0.24120603015075376, + "grad_norm": 1.2265491485595703, + "learning_rate": 4.7469851157479177e-05, + "loss": 0.3261, + "step": 24 + }, + { + "epoch": 0.25125628140703515, + "grad_norm": 1.5698051452636719, + "learning_rate": 4.707368982147318e-05, + "loss": 0.6068, + "step": 25 + }, + { + "epoch": 0.2613065326633166, + "grad_norm": 1.315598487854004, + "learning_rate": 4.665063509461097e-05, + "loss": 0.3593, + "step": 26 + }, + { + "epoch": 0.271356783919598, + "grad_norm": 0.7387354969978333, + "learning_rate": 4.620120240391065e-05, + "loss": 0.2985, + "step": 27 + }, + { + "epoch": 0.2814070351758794, + "grad_norm": 1.4930191040039062, + "learning_rate": 4.572593931387604e-05, + "loss": 0.6862, + "step": 28 + }, + { + "epoch": 0.2914572864321608, + "grad_norm": 1.1811872720718384, + "learning_rate": 4.522542485937369e-05, + "loss": 0.5399, + "step": 29 + }, + { + "epoch": 0.3015075376884422, + "grad_norm": 1.3626222610473633, + "learning_rate": 4.4700268840168045e-05, + "loss": 0.4468, + "step": 30 + }, + { + "epoch": 0.31155778894472363, + "grad_norm": 1.855308175086975, + "learning_rate": 4.415111107797445e-05, + "loss": 0.5117, + "step": 31 + }, + { + "epoch": 0.32160804020100503, + "grad_norm": 1.2538566589355469, + "learning_rate": 4.357862063693486e-05, + "loss": 0.4231, + "step": 32 + }, + { + "epoch": 0.3316582914572864, + "grad_norm": 1.4809361696243286, + "learning_rate": 4.2983495008466276e-05, + "loss": 0.7577, + "step": 33 + }, + { + "epoch": 0.3417085427135678, + "grad_norm": 1.7620362043380737, + "learning_rate": 4.2366459261474933e-05, + "loss": 0.7635, + "step": 34 + }, + { + "epoch": 0.35175879396984927, + "grad_norm": 1.337045669555664, + "learning_rate": 4.172826515897146e-05, + "loss": 0.514, + "step": 35 + }, + { + "epoch": 0.36180904522613067, + "grad_norm": 1.4965589046478271, + "learning_rate": 4.1069690242163484e-05, + "loss": 0.5781, + "step": 36 + }, + { + "epoch": 0.37185929648241206, + "grad_norm": 1.4205466508865356, + "learning_rate": 4.039153688314145e-05, + "loss": 0.2826, + "step": 37 + }, + { + "epoch": 0.38190954773869346, + "grad_norm": 1.036288857460022, + "learning_rate": 3.969463130731183e-05, + "loss": 0.3847, + "step": 38 + }, + { + "epoch": 0.39195979899497485, + "grad_norm": 1.3840105533599854, + "learning_rate": 3.897982258676867e-05, + "loss": 0.5401, + "step": 39 + }, + { + "epoch": 0.4020100502512563, + "grad_norm": 4.637338161468506, + "learning_rate": 3.824798160583012e-05, + "loss": 1.2988, + "step": 40 + }, + { + "epoch": 0.4120603015075377, + "grad_norm": 1.0604753494262695, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.2507, + "step": 41 + }, + { + "epoch": 0.4221105527638191, + "grad_norm": 1.5602868795394897, + "learning_rate": 3.673678906964727e-05, + "loss": 0.3806, + "step": 42 + }, + { + "epoch": 0.4321608040201005, + "grad_norm": 0.8546467423439026, + "learning_rate": 3.5959278669726935e-05, + "loss": 0.3237, + "step": 43 + }, + { + "epoch": 0.44221105527638194, + "grad_norm": 0.6842706799507141, + "learning_rate": 3.516841607689501e-05, + "loss": 0.1639, + "step": 44 + }, + { + "epoch": 0.45226130653266333, + "grad_norm": 1.4356191158294678, + "learning_rate": 3.436516483539781e-05, + "loss": 0.1976, + "step": 45 + }, + { + "epoch": 0.4623115577889447, + "grad_norm": 0.9837068915367126, + "learning_rate": 3.355050358314172e-05, + "loss": 0.1973, + "step": 46 + }, + { + "epoch": 0.4723618090452261, + "grad_norm": 1.9256995916366577, + "learning_rate": 3.272542485937369e-05, + "loss": 0.6982, + "step": 47 + }, + { + "epoch": 0.4824120603015075, + "grad_norm": 1.3878264427185059, + "learning_rate": 3.1890933895424976e-05, + "loss": 0.6256, + "step": 48 + }, + { + "epoch": 0.49246231155778897, + "grad_norm": 2.2053303718566895, + "learning_rate": 3.104804738999169e-05, + "loss": 0.5007, + "step": 49 + }, + { + "epoch": 0.5025125628140703, + "grad_norm": 3.3784847259521484, + "learning_rate": 3.0197792270443982e-05, + "loss": 0.8355, + "step": 50 + }, + { + "epoch": 0.5125628140703518, + "grad_norm": 3.213935136795044, + "learning_rate": 2.9341204441673266e-05, + "loss": 0.7751, + "step": 51 + }, + { + "epoch": 0.5226130653266332, + "grad_norm": 1.763657808303833, + "learning_rate": 2.8479327524001636e-05, + "loss": 0.3691, + "step": 52 + }, + { + "epoch": 0.5326633165829145, + "grad_norm": 1.7099454402923584, + "learning_rate": 2.761321158169134e-05, + "loss": 0.3867, + "step": 53 + }, + { + "epoch": 0.542713567839196, + "grad_norm": 2.456601619720459, + "learning_rate": 2.674391184360313e-05, + "loss": 0.8096, + "step": 54 + }, + { + "epoch": 0.5527638190954773, + "grad_norm": 3.4257242679595947, + "learning_rate": 2.587248741756253e-05, + "loss": 0.4707, + "step": 55 + }, + { + "epoch": 0.5628140703517588, + "grad_norm": 3.3254268169403076, + "learning_rate": 2.5e-05, + "loss": 0.4247, + "step": 56 + }, + { + "epoch": 0.5728643216080402, + "grad_norm": 4.219179153442383, + "learning_rate": 2.4127512582437485e-05, + "loss": 1.1432, + "step": 57 + }, + { + "epoch": 0.5829145728643216, + "grad_norm": 1.8128859996795654, + "learning_rate": 2.3256088156396868e-05, + "loss": 0.5847, + "step": 58 + }, + { + "epoch": 0.592964824120603, + "grad_norm": 3.432041883468628, + "learning_rate": 2.238678841830867e-05, + "loss": 0.5025, + "step": 59 + }, + { + "epoch": 0.6030150753768844, + "grad_norm": 1.6512501239776611, + "learning_rate": 2.1520672475998373e-05, + "loss": 0.6528, + "step": 60 + }, + { + "epoch": 0.6130653266331658, + "grad_norm": 2.509225845336914, + "learning_rate": 2.0658795558326743e-05, + "loss": 0.7458, + "step": 61 + }, + { + "epoch": 0.6231155778894473, + "grad_norm": 3.3945913314819336, + "learning_rate": 1.980220772955602e-05, + "loss": 0.8483, + "step": 62 + }, + { + "epoch": 0.6331658291457286, + "grad_norm": 1.7071493864059448, + "learning_rate": 1.895195261000831e-05, + "loss": 0.5162, + "step": 63 + }, + { + "epoch": 0.6432160804020101, + "grad_norm": 1.902148723602295, + "learning_rate": 1.8109066104575023e-05, + "loss": 0.3798, + "step": 64 + }, + { + "epoch": 0.6532663316582915, + "grad_norm": 1.3700332641601562, + "learning_rate": 1.7274575140626318e-05, + "loss": 0.3926, + "step": 65 + }, + { + "epoch": 0.6633165829145728, + "grad_norm": 1.6878489255905151, + "learning_rate": 1.6449496416858284e-05, + "loss": 0.4083, + "step": 66 + } + ], + "logging_steps": 1, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 33, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.555204085573222e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/exp1_signal_c_50/checkpoint-66/training_args.bin b/exp1_signal_c_50/checkpoint-66/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..37ba97d5032b379fc979c06f0dd49af445974b4d --- /dev/null +++ b/exp1_signal_c_50/checkpoint-66/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8f433c42e68cfba3960ffd965dce97c50ad18a8f4f749a9737ce8ee8df9b700 +size 6033 diff --git a/exp1_signal_c_50/checkpoint-99/README.md b/exp1_signal_c_50/checkpoint-99/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp1_signal_c_50/checkpoint-99/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp1_signal_c_50/checkpoint-99/adapter_config.json b/exp1_signal_c_50/checkpoint-99/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5c80fe69c40e8dc39868e47c17dcdcc979d9b370 --- /dev/null +++ b/exp1_signal_c_50/checkpoint-99/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp1_signal_c_50/checkpoint-99/adapter_model.safetensors b/exp1_signal_c_50/checkpoint-99/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9506a45b9f89714aca9b57940808369028928bb8 --- /dev/null +++ b/exp1_signal_c_50/checkpoint-99/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95c734bb6bd6e45c41fdcee607fd17550d9c7f5af2fc03b07438ac38b511191a +size 201378736 diff --git a/exp1_signal_c_50/checkpoint-99/optimizer.pt b/exp1_signal_c_50/checkpoint-99/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b0100c0a1a807d442b7537c1d423a0fe6eea69a --- /dev/null +++ b/exp1_signal_c_50/checkpoint-99/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d62ca3042ca590f00a382c9d379a4d9ab4c94880aa37dd43a28683d6e27d3e93 +size 402982627 diff --git a/exp1_signal_c_50/checkpoint-99/rng_state.pth b/exp1_signal_c_50/checkpoint-99/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ef5ac8da68afb8bea8ad1331116be95d18fbb182 --- /dev/null +++ b/exp1_signal_c_50/checkpoint-99/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b293338027ef39107c758bc2300d51015b6a878a9f0e09fe1822d419dcbb163e +size 14645 diff --git a/exp1_signal_c_50/checkpoint-99/scheduler.pt b/exp1_signal_c_50/checkpoint-99/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7a035695abab34b6d7759d2abb6c583da37754d5 --- /dev/null +++ b/exp1_signal_c_50/checkpoint-99/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ba8e76678603d3d1769a2ed6315c7bfd1edb3a261c537a3ff7c1689618cf725 +size 1465 diff --git a/exp1_signal_c_50/checkpoint-99/trainer_state.json b/exp1_signal_c_50/checkpoint-99/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..15f5924c89456dda0853eb26ead511a92b5fd7bf --- /dev/null +++ b/exp1_signal_c_50/checkpoint-99/trainer_state.json @@ -0,0 +1,727 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9949748743718593, + "eval_steps": 100, + "global_step": 99, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010050251256281407, + "grad_norm": 2.090841770172119, + "learning_rate": 0.0, + "loss": 0.6877, + "step": 1 + }, + { + "epoch": 0.020100502512562814, + "grad_norm": 1.6500182151794434, + "learning_rate": 5e-06, + "loss": 0.5553, + "step": 2 + }, + { + "epoch": 0.03015075376884422, + "grad_norm": 2.8341240882873535, + "learning_rate": 1e-05, + "loss": 0.7649, + "step": 3 + }, + { + "epoch": 0.04020100502512563, + "grad_norm": 1.3663409948349, + "learning_rate": 1.5e-05, + "loss": 0.4717, + "step": 4 + }, + { + "epoch": 0.05025125628140704, + "grad_norm": 1.2471226453781128, + "learning_rate": 2e-05, + "loss": 0.4895, + "step": 5 + }, + { + "epoch": 0.06030150753768844, + "grad_norm": 1.8957748413085938, + "learning_rate": 2.5e-05, + "loss": 0.8027, + "step": 6 + }, + { + "epoch": 0.07035175879396985, + "grad_norm": 0.9421080350875854, + "learning_rate": 3e-05, + "loss": 0.2445, + "step": 7 + }, + { + "epoch": 0.08040201005025126, + "grad_norm": 1.3940750360488892, + "learning_rate": 3.5e-05, + "loss": 0.6727, + "step": 8 + }, + { + "epoch": 0.09045226130653267, + "grad_norm": 2.3070380687713623, + "learning_rate": 4e-05, + "loss": 0.8506, + "step": 9 + }, + { + "epoch": 0.10050251256281408, + "grad_norm": 1.6834089756011963, + "learning_rate": 4.5e-05, + "loss": 0.5343, + "step": 10 + }, + { + "epoch": 0.11055276381909548, + "grad_norm": 0.8352651596069336, + "learning_rate": 5e-05, + "loss": 0.3959, + "step": 11 + }, + { + "epoch": 0.12060301507537688, + "grad_norm": 3.2252678871154785, + "learning_rate": 4.99847706754774e-05, + "loss": 0.5884, + "step": 12 + }, + { + "epoch": 0.1306532663316583, + "grad_norm": 2.3922126293182373, + "learning_rate": 4.993910125649561e-05, + "loss": 0.5928, + "step": 13 + }, + { + "epoch": 0.1407035175879397, + "grad_norm": 1.2292028665542603, + "learning_rate": 4.9863047384206835e-05, + "loss": 0.5248, + "step": 14 + }, + { + "epoch": 0.1507537688442211, + "grad_norm": 1.8716892004013062, + "learning_rate": 4.975670171853926e-05, + "loss": 0.7384, + "step": 15 + }, + { + "epoch": 0.16080402010050251, + "grad_norm": 4.264588832855225, + "learning_rate": 4.962019382530521e-05, + "loss": 0.9005, + "step": 16 + }, + { + "epoch": 0.1708542713567839, + "grad_norm": 1.8790229558944702, + "learning_rate": 4.9453690018345144e-05, + "loss": 0.5212, + "step": 17 + }, + { + "epoch": 0.18090452261306533, + "grad_norm": 1.6464147567749023, + "learning_rate": 4.925739315689991e-05, + "loss": 0.4105, + "step": 18 + }, + { + "epoch": 0.19095477386934673, + "grad_norm": 1.471611499786377, + "learning_rate": 4.9031542398457974e-05, + "loss": 0.8898, + "step": 19 + }, + { + "epoch": 0.20100502512562815, + "grad_norm": 1.6528465747833252, + "learning_rate": 4.877641290737884e-05, + "loss": 0.8817, + "step": 20 + }, + { + "epoch": 0.21105527638190955, + "grad_norm": 3.6064441204071045, + "learning_rate": 4.849231551964771e-05, + "loss": 1.1784, + "step": 21 + }, + { + "epoch": 0.22110552763819097, + "grad_norm": 1.2640239000320435, + "learning_rate": 4.817959636416969e-05, + "loss": 0.4255, + "step": 22 + }, + { + "epoch": 0.23115577889447236, + "grad_norm": 1.3834407329559326, + "learning_rate": 4.783863644106502e-05, + "loss": 1.0316, + "step": 23 + }, + { + "epoch": 0.24120603015075376, + "grad_norm": 1.2265491485595703, + "learning_rate": 4.7469851157479177e-05, + "loss": 0.3261, + "step": 24 + }, + { + "epoch": 0.25125628140703515, + "grad_norm": 1.5698051452636719, + "learning_rate": 4.707368982147318e-05, + "loss": 0.6068, + "step": 25 + }, + { + "epoch": 0.2613065326633166, + "grad_norm": 1.315598487854004, + "learning_rate": 4.665063509461097e-05, + "loss": 0.3593, + "step": 26 + }, + { + "epoch": 0.271356783919598, + "grad_norm": 0.7387354969978333, + "learning_rate": 4.620120240391065e-05, + "loss": 0.2985, + "step": 27 + }, + { + "epoch": 0.2814070351758794, + "grad_norm": 1.4930191040039062, + "learning_rate": 4.572593931387604e-05, + "loss": 0.6862, + "step": 28 + }, + { + "epoch": 0.2914572864321608, + "grad_norm": 1.1811872720718384, + "learning_rate": 4.522542485937369e-05, + "loss": 0.5399, + "step": 29 + }, + { + "epoch": 0.3015075376884422, + "grad_norm": 1.3626222610473633, + "learning_rate": 4.4700268840168045e-05, + "loss": 0.4468, + "step": 30 + }, + { + "epoch": 0.31155778894472363, + "grad_norm": 1.855308175086975, + "learning_rate": 4.415111107797445e-05, + "loss": 0.5117, + "step": 31 + }, + { + "epoch": 0.32160804020100503, + "grad_norm": 1.2538566589355469, + "learning_rate": 4.357862063693486e-05, + "loss": 0.4231, + "step": 32 + }, + { + "epoch": 0.3316582914572864, + "grad_norm": 1.4809361696243286, + "learning_rate": 4.2983495008466276e-05, + "loss": 0.7577, + "step": 33 + }, + { + "epoch": 0.3417085427135678, + "grad_norm": 1.7620362043380737, + "learning_rate": 4.2366459261474933e-05, + "loss": 0.7635, + "step": 34 + }, + { + "epoch": 0.35175879396984927, + "grad_norm": 1.337045669555664, + "learning_rate": 4.172826515897146e-05, + "loss": 0.514, + "step": 35 + }, + { + "epoch": 0.36180904522613067, + "grad_norm": 1.4965589046478271, + "learning_rate": 4.1069690242163484e-05, + "loss": 0.5781, + "step": 36 + }, + { + "epoch": 0.37185929648241206, + "grad_norm": 1.4205466508865356, + "learning_rate": 4.039153688314145e-05, + "loss": 0.2826, + "step": 37 + }, + { + "epoch": 0.38190954773869346, + "grad_norm": 1.036288857460022, + "learning_rate": 3.969463130731183e-05, + "loss": 0.3847, + "step": 38 + }, + { + "epoch": 0.39195979899497485, + "grad_norm": 1.3840105533599854, + "learning_rate": 3.897982258676867e-05, + "loss": 0.5401, + "step": 39 + }, + { + "epoch": 0.4020100502512563, + "grad_norm": 4.637338161468506, + "learning_rate": 3.824798160583012e-05, + "loss": 1.2988, + "step": 40 + }, + { + "epoch": 0.4120603015075377, + "grad_norm": 1.0604753494262695, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.2507, + "step": 41 + }, + { + "epoch": 0.4221105527638191, + "grad_norm": 1.5602868795394897, + "learning_rate": 3.673678906964727e-05, + "loss": 0.3806, + "step": 42 + }, + { + "epoch": 0.4321608040201005, + "grad_norm": 0.8546467423439026, + "learning_rate": 3.5959278669726935e-05, + "loss": 0.3237, + "step": 43 + }, + { + "epoch": 0.44221105527638194, + "grad_norm": 0.6842706799507141, + "learning_rate": 3.516841607689501e-05, + "loss": 0.1639, + "step": 44 + }, + { + "epoch": 0.45226130653266333, + "grad_norm": 1.4356191158294678, + "learning_rate": 3.436516483539781e-05, + "loss": 0.1976, + "step": 45 + }, + { + "epoch": 0.4623115577889447, + "grad_norm": 0.9837068915367126, + "learning_rate": 3.355050358314172e-05, + "loss": 0.1973, + "step": 46 + }, + { + "epoch": 0.4723618090452261, + "grad_norm": 1.9256995916366577, + "learning_rate": 3.272542485937369e-05, + "loss": 0.6982, + "step": 47 + }, + { + "epoch": 0.4824120603015075, + "grad_norm": 1.3878264427185059, + "learning_rate": 3.1890933895424976e-05, + "loss": 0.6256, + "step": 48 + }, + { + "epoch": 0.49246231155778897, + "grad_norm": 2.2053303718566895, + "learning_rate": 3.104804738999169e-05, + "loss": 0.5007, + "step": 49 + }, + { + "epoch": 0.5025125628140703, + "grad_norm": 3.3784847259521484, + "learning_rate": 3.0197792270443982e-05, + "loss": 0.8355, + "step": 50 + }, + { + "epoch": 0.5125628140703518, + "grad_norm": 3.213935136795044, + "learning_rate": 2.9341204441673266e-05, + "loss": 0.7751, + "step": 51 + }, + { + "epoch": 0.5226130653266332, + "grad_norm": 1.763657808303833, + "learning_rate": 2.8479327524001636e-05, + "loss": 0.3691, + "step": 52 + }, + { + "epoch": 0.5326633165829145, + "grad_norm": 1.7099454402923584, + "learning_rate": 2.761321158169134e-05, + "loss": 0.3867, + "step": 53 + }, + { + "epoch": 0.542713567839196, + "grad_norm": 2.456601619720459, + "learning_rate": 2.674391184360313e-05, + "loss": 0.8096, + "step": 54 + }, + { + "epoch": 0.5527638190954773, + "grad_norm": 3.4257242679595947, + "learning_rate": 2.587248741756253e-05, + "loss": 0.4707, + "step": 55 + }, + { + "epoch": 0.5628140703517588, + "grad_norm": 3.3254268169403076, + "learning_rate": 2.5e-05, + "loss": 0.4247, + "step": 56 + }, + { + "epoch": 0.5728643216080402, + "grad_norm": 4.219179153442383, + "learning_rate": 2.4127512582437485e-05, + "loss": 1.1432, + "step": 57 + }, + { + "epoch": 0.5829145728643216, + "grad_norm": 1.8128859996795654, + "learning_rate": 2.3256088156396868e-05, + "loss": 0.5847, + "step": 58 + }, + { + "epoch": 0.592964824120603, + "grad_norm": 3.432041883468628, + "learning_rate": 2.238678841830867e-05, + "loss": 0.5025, + "step": 59 + }, + { + "epoch": 0.6030150753768844, + "grad_norm": 1.6512501239776611, + "learning_rate": 2.1520672475998373e-05, + "loss": 0.6528, + "step": 60 + }, + { + "epoch": 0.6130653266331658, + "grad_norm": 2.509225845336914, + "learning_rate": 2.0658795558326743e-05, + "loss": 0.7458, + "step": 61 + }, + { + "epoch": 0.6231155778894473, + "grad_norm": 3.3945913314819336, + "learning_rate": 1.980220772955602e-05, + "loss": 0.8483, + "step": 62 + }, + { + "epoch": 0.6331658291457286, + "grad_norm": 1.7071493864059448, + "learning_rate": 1.895195261000831e-05, + "loss": 0.5162, + "step": 63 + }, + { + "epoch": 0.6432160804020101, + "grad_norm": 1.902148723602295, + "learning_rate": 1.8109066104575023e-05, + "loss": 0.3798, + "step": 64 + }, + { + "epoch": 0.6532663316582915, + "grad_norm": 1.3700332641601562, + "learning_rate": 1.7274575140626318e-05, + "loss": 0.3926, + "step": 65 + }, + { + "epoch": 0.6633165829145728, + "grad_norm": 1.6878489255905151, + "learning_rate": 1.6449496416858284e-05, + "loss": 0.4083, + "step": 66 + }, + { + "epoch": 0.6733668341708543, + "grad_norm": 1.113712191581726, + "learning_rate": 1.56348351646022e-05, + "loss": 0.2043, + "step": 67 + }, + { + "epoch": 0.6834170854271356, + "grad_norm": 1.1055054664611816, + "learning_rate": 1.4831583923104999e-05, + "loss": 0.3076, + "step": 68 + }, + { + "epoch": 0.6934673366834171, + "grad_norm": 1.4783660173416138, + "learning_rate": 1.4040721330273062e-05, + "loss": 0.2804, + "step": 69 + }, + { + "epoch": 0.7035175879396985, + "grad_norm": 1.7568031549453735, + "learning_rate": 1.3263210930352737e-05, + "loss": 0.2255, + "step": 70 + }, + { + "epoch": 0.7135678391959799, + "grad_norm": 3.2712368965148926, + "learning_rate": 1.2500000000000006e-05, + "loss": 0.7584, + "step": 71 + }, + { + "epoch": 0.7236180904522613, + "grad_norm": 4.170383453369141, + "learning_rate": 1.175201839416988e-05, + "loss": 0.6353, + "step": 72 + }, + { + "epoch": 0.7336683417085427, + "grad_norm": 2.4070382118225098, + "learning_rate": 1.1020177413231334e-05, + "loss": 0.7063, + "step": 73 + }, + { + "epoch": 0.7437185929648241, + "grad_norm": 1.7356330156326294, + "learning_rate": 1.0305368692688174e-05, + "loss": 0.4544, + "step": 74 + }, + { + "epoch": 0.7537688442211056, + "grad_norm": 2.725081443786621, + "learning_rate": 9.608463116858542e-06, + "loss": 0.6067, + "step": 75 + }, + { + "epoch": 0.7638190954773869, + "grad_norm": 3.7814385890960693, + "learning_rate": 8.930309757836517e-06, + "loss": 0.6096, + "step": 76 + }, + { + "epoch": 0.7738693467336684, + "grad_norm": 1.4958455562591553, + "learning_rate": 8.271734841028553e-06, + "loss": 0.1921, + "step": 77 + }, + { + "epoch": 0.7839195979899497, + "grad_norm": 2.0017693042755127, + "learning_rate": 7.633540738525066e-06, + "loss": 0.4245, + "step": 78 + }, + { + "epoch": 0.7939698492462312, + "grad_norm": 2.2158942222595215, + "learning_rate": 7.016504991533726e-06, + "loss": 0.5462, + "step": 79 + }, + { + "epoch": 0.8040201005025126, + "grad_norm": 2.1772801876068115, + "learning_rate": 6.421379363065142e-06, + "loss": 0.5683, + "step": 80 + }, + { + "epoch": 0.8140703517587939, + "grad_norm": 2.4165947437286377, + "learning_rate": 5.848888922025553e-06, + "loss": 0.6179, + "step": 81 + }, + { + "epoch": 0.8241206030150754, + "grad_norm": 3.4924304485321045, + "learning_rate": 5.299731159831953e-06, + "loss": 0.5454, + "step": 82 + }, + { + "epoch": 0.8341708542713567, + "grad_norm": 1.286102056503296, + "learning_rate": 4.7745751406263165e-06, + "loss": 0.3365, + "step": 83 + }, + { + "epoch": 0.8442211055276382, + "grad_norm": 1.5090934038162231, + "learning_rate": 4.274060686123959e-06, + "loss": 0.2462, + "step": 84 + }, + { + "epoch": 0.8542713567839196, + "grad_norm": 1.7056686878204346, + "learning_rate": 3.798797596089351e-06, + "loss": 0.2526, + "step": 85 + }, + { + "epoch": 0.864321608040201, + "grad_norm": 1.5727635622024536, + "learning_rate": 3.3493649053890326e-06, + "loss": 0.3391, + "step": 86 + }, + { + "epoch": 0.8743718592964824, + "grad_norm": 2.05062198638916, + "learning_rate": 2.9263101785268254e-06, + "loss": 0.8664, + "step": 87 + }, + { + "epoch": 0.8844221105527639, + "grad_norm": 1.8346749544143677, + "learning_rate": 2.5301488425208296e-06, + "loss": 0.2371, + "step": 88 + }, + { + "epoch": 0.8944723618090452, + "grad_norm": 1.5837578773498535, + "learning_rate": 2.1613635589349756e-06, + "loss": 0.3714, + "step": 89 + }, + { + "epoch": 0.9045226130653267, + "grad_norm": 2.0031440258026123, + "learning_rate": 1.8204036358303173e-06, + "loss": 0.34, + "step": 90 + }, + { + "epoch": 0.914572864321608, + "grad_norm": 2.859940528869629, + "learning_rate": 1.5076844803522922e-06, + "loss": 0.4141, + "step": 91 + }, + { + "epoch": 0.9246231155778895, + "grad_norm": 1.9441653490066528, + "learning_rate": 1.2235870926211619e-06, + "loss": 0.2432, + "step": 92 + }, + { + "epoch": 0.9346733668341709, + "grad_norm": 3.415130138397217, + "learning_rate": 9.684576015420278e-07, + "loss": 0.6276, + "step": 93 + }, + { + "epoch": 0.9447236180904522, + "grad_norm": 1.692018985748291, + "learning_rate": 7.426068431000882e-07, + "loss": 0.2778, + "step": 94 + }, + { + "epoch": 0.9547738693467337, + "grad_norm": 3.519803285598755, + "learning_rate": 5.463099816548579e-07, + "loss": 0.3524, + "step": 95 + }, + { + "epoch": 0.964824120603015, + "grad_norm": 0.46640315651893616, + "learning_rate": 3.7980617469479953e-07, + "loss": 0.0778, + "step": 96 + }, + { + "epoch": 0.9748743718592965, + "grad_norm": 2.561051845550537, + "learning_rate": 2.4329828146074095e-07, + "loss": 0.535, + "step": 97 + }, + { + "epoch": 0.9849246231155779, + "grad_norm": 1.67559814453125, + "learning_rate": 1.3695261579316777e-07, + "loss": 0.2518, + "step": 98 + }, + { + "epoch": 0.9949748743718593, + "grad_norm": 1.0802916288375854, + "learning_rate": 6.089874350439506e-08, + "loss": 0.171, + "step": 99 + } + ], + "logging_steps": 1, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 33, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6.832806128359834e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/exp1_signal_c_50/checkpoint-99/training_args.bin b/exp1_signal_c_50/checkpoint-99/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..37ba97d5032b379fc979c06f0dd49af445974b4d --- /dev/null +++ b/exp1_signal_c_50/checkpoint-99/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8f433c42e68cfba3960ffd965dce97c50ad18a8f4f749a9737ce8ee8df9b700 +size 6033 diff --git a/exp1_signal_c_50/final_model/README.md b/exp1_signal_c_50/final_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp1_signal_c_50/final_model/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp1_signal_c_50/final_model/adapter_config.json b/exp1_signal_c_50/final_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5c80fe69c40e8dc39868e47c17dcdcc979d9b370 --- /dev/null +++ b/exp1_signal_c_50/final_model/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp1_signal_c_50/final_model/adapter_model.safetensors b/exp1_signal_c_50/final_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..767a5bf5fbdc3dea2b2cbdbf73bec5bc2b6670ae --- /dev/null +++ b/exp1_signal_c_50/final_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84aee8a1799b52d3efe40d4399fd4c6119718a6a5b2ea18286c2eee29fdd270a +size 201378736 diff --git a/exp1_signal_c_50/final_model/training_args.bin b/exp1_signal_c_50/final_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..37ba97d5032b379fc979c06f0dd49af445974b4d --- /dev/null +++ b/exp1_signal_c_50/final_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8f433c42e68cfba3960ffd965dce97c50ad18a8f4f749a9737ce8ee8df9b700 +size 6033 diff --git a/exp2_signal_c_100_2epochs/checkpoint-132/README.md b/exp2_signal_c_100_2epochs/checkpoint-132/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-132/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp2_signal_c_100_2epochs/checkpoint-132/adapter_config.json b/exp2_signal_c_100_2epochs/checkpoint-132/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..507754473268a46eff1b71a02eb5bf9a906d5a14 --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-132/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "o_proj", + "v_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp2_signal_c_100_2epochs/checkpoint-132/adapter_model.safetensors b/exp2_signal_c_100_2epochs/checkpoint-132/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7c916f3b1c4f217a67eb985f27226646077e24dc --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-132/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e611e44c4b201b7ecef63caceaa1a819ca287841658ae0163f2807cb56c9b798 +size 201378736 diff --git a/exp2_signal_c_100_2epochs/checkpoint-132/optimizer.pt b/exp2_signal_c_100_2epochs/checkpoint-132/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..02974cf234af0ba5d32a4b6ed177630f5a4a3df5 --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-132/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5be6fa8c732595be905b4c5a2fb15d04ac2ceb13d4aea7d38a36b42303a0bcfc +size 402982627 diff --git a/exp2_signal_c_100_2epochs/checkpoint-132/rng_state.pth b/exp2_signal_c_100_2epochs/checkpoint-132/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c23033406802bc799bbefa0b8f9ee4bd485145af --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-132/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9095a79a341aa801ca9cd2bae4949a14962245a5f8bc5a5136709a1ded67f09e +size 14645 diff --git a/exp2_signal_c_100_2epochs/checkpoint-132/scheduler.pt b/exp2_signal_c_100_2epochs/checkpoint-132/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..78dbca57b5ac678d9fb7bab4b9c835f1d4de68be --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-132/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aaeec3f5e6273ebe2e860586ec86524a23d25000fec9730d663eca548a2838a0 +size 1465 diff --git a/exp2_signal_c_100_2epochs/checkpoint-132/trainer_state.json b/exp2_signal_c_100_2epochs/checkpoint-132/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b8b0b2623d95ec506a1aee9c88631881e76a9b00 --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-132/trainer_state.json @@ -0,0 +1,966 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.321608040201005, + "eval_steps": 100, + "global_step": 132, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010050251256281407, + "grad_norm": 2.0252811908721924, + "learning_rate": 0.0, + "loss": 0.6131, + "step": 1 + }, + { + "epoch": 0.020100502512562814, + "grad_norm": 2.218348741531372, + "learning_rate": 2.5e-06, + "loss": 0.8686, + "step": 2 + }, + { + "epoch": 0.03015075376884422, + "grad_norm": 2.437188148498535, + "learning_rate": 5e-06, + "loss": 0.5164, + "step": 3 + }, + { + "epoch": 0.04020100502512563, + "grad_norm": 2.6469321250915527, + "learning_rate": 7.5e-06, + "loss": 1.002, + "step": 4 + }, + { + "epoch": 0.05025125628140704, + "grad_norm": 1.3588825464248657, + "learning_rate": 1e-05, + "loss": 0.6844, + "step": 5 + }, + { + "epoch": 0.06030150753768844, + "grad_norm": 3.1810593605041504, + "learning_rate": 1.25e-05, + "loss": 0.9342, + "step": 6 + }, + { + "epoch": 0.07035175879396985, + "grad_norm": 1.2306936979293823, + "learning_rate": 1.5e-05, + "loss": 0.5049, + "step": 7 + }, + { + "epoch": 0.08040201005025126, + "grad_norm": 1.9764370918273926, + "learning_rate": 1.75e-05, + "loss": 0.5337, + "step": 8 + }, + { + "epoch": 0.09045226130653267, + "grad_norm": 2.451845645904541, + "learning_rate": 2e-05, + "loss": 0.8612, + "step": 9 + }, + { + "epoch": 0.10050251256281408, + "grad_norm": 1.2207489013671875, + "learning_rate": 2.25e-05, + "loss": 0.6021, + "step": 10 + }, + { + "epoch": 0.11055276381909548, + "grad_norm": 1.5423636436462402, + "learning_rate": 2.5e-05, + "loss": 0.3558, + "step": 11 + }, + { + "epoch": 0.12060301507537688, + "grad_norm": 0.6869585514068604, + "learning_rate": 2.7500000000000004e-05, + "loss": 0.3061, + "step": 12 + }, + { + "epoch": 0.1306532663316583, + "grad_norm": 1.8259159326553345, + "learning_rate": 3e-05, + "loss": 0.2571, + "step": 13 + }, + { + "epoch": 0.1407035175879397, + "grad_norm": 2.017957925796509, + "learning_rate": 3.2500000000000004e-05, + "loss": 0.4754, + "step": 14 + }, + { + "epoch": 0.1507537688442211, + "grad_norm": 1.566588282585144, + "learning_rate": 3.5e-05, + "loss": 0.8004, + "step": 15 + }, + { + "epoch": 0.16080402010050251, + "grad_norm": 1.6226823329925537, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.8382, + "step": 16 + }, + { + "epoch": 0.1708542713567839, + "grad_norm": 1.9973260164260864, + "learning_rate": 4e-05, + "loss": 0.7851, + "step": 17 + }, + { + "epoch": 0.18090452261306533, + "grad_norm": 1.30808687210083, + "learning_rate": 4.25e-05, + "loss": 0.5387, + "step": 18 + }, + { + "epoch": 0.19095477386934673, + "grad_norm": 2.495645761489868, + "learning_rate": 4.5e-05, + "loss": 0.5396, + "step": 19 + }, + { + "epoch": 0.20100502512562815, + "grad_norm": 2.5632853507995605, + "learning_rate": 4.75e-05, + "loss": 0.9602, + "step": 20 + }, + { + "epoch": 0.21105527638190955, + "grad_norm": 3.062891960144043, + "learning_rate": 5e-05, + "loss": 0.8354, + "step": 21 + }, + { + "epoch": 0.22110552763819097, + "grad_norm": 1.4690148830413818, + "learning_rate": 4.9996192378909786e-05, + "loss": 0.426, + "step": 22 + }, + { + "epoch": 0.23115577889447236, + "grad_norm": 3.291106939315796, + "learning_rate": 4.99847706754774e-05, + "loss": 1.0268, + "step": 23 + }, + { + "epoch": 0.24120603015075376, + "grad_norm": 1.5133529901504517, + "learning_rate": 4.996573836886435e-05, + "loss": 0.8601, + "step": 24 + }, + { + "epoch": 0.25125628140703515, + "grad_norm": 4.66534948348999, + "learning_rate": 4.993910125649561e-05, + "loss": 1.3701, + "step": 25 + }, + { + "epoch": 0.2613065326633166, + "grad_norm": 1.0427296161651611, + "learning_rate": 4.990486745229364e-05, + "loss": 0.3869, + "step": 26 + }, + { + "epoch": 0.271356783919598, + "grad_norm": 2.4530417919158936, + "learning_rate": 4.9863047384206835e-05, + "loss": 0.582, + "step": 27 + }, + { + "epoch": 0.2814070351758794, + "grad_norm": 2.5782620906829834, + "learning_rate": 4.9813653791033057e-05, + "loss": 0.8016, + "step": 28 + }, + { + "epoch": 0.2914572864321608, + "grad_norm": 1.9464126825332642, + "learning_rate": 4.975670171853926e-05, + "loss": 0.5312, + "step": 29 + }, + { + "epoch": 0.3015075376884422, + "grad_norm": 0.8109995722770691, + "learning_rate": 4.9692208514878444e-05, + "loss": 0.3388, + "step": 30 + }, + { + "epoch": 0.31155778894472363, + "grad_norm": 2.375706672668457, + "learning_rate": 4.962019382530521e-05, + "loss": 0.7188, + "step": 31 + }, + { + "epoch": 0.32160804020100503, + "grad_norm": 2.7205145359039307, + "learning_rate": 4.9540679586191605e-05, + "loss": 0.4845, + "step": 32 + }, + { + "epoch": 0.3316582914572864, + "grad_norm": 3.0961830615997314, + "learning_rate": 4.9453690018345144e-05, + "loss": 1.197, + "step": 33 + }, + { + "epoch": 0.3417085427135678, + "grad_norm": 2.482598304748535, + "learning_rate": 4.9359251619630886e-05, + "loss": 0.5483, + "step": 34 + }, + { + "epoch": 0.35175879396984927, + "grad_norm": 2.1126797199249268, + "learning_rate": 4.925739315689991e-05, + "loss": 0.6905, + "step": 35 + }, + { + "epoch": 0.36180904522613067, + "grad_norm": 1.3531568050384521, + "learning_rate": 4.914814565722671e-05, + "loss": 0.744, + "step": 36 + }, + { + "epoch": 0.37185929648241206, + "grad_norm": 1.144991159439087, + "learning_rate": 4.9031542398457974e-05, + "loss": 0.3955, + "step": 37 + }, + { + "epoch": 0.38190954773869346, + "grad_norm": 2.3990726470947266, + "learning_rate": 4.890761889907589e-05, + "loss": 0.4951, + "step": 38 + }, + { + "epoch": 0.39195979899497485, + "grad_norm": 1.3550069332122803, + "learning_rate": 4.877641290737884e-05, + "loss": 0.7786, + "step": 39 + }, + { + "epoch": 0.4020100502512563, + "grad_norm": 1.7543747425079346, + "learning_rate": 4.8637964389982926e-05, + "loss": 0.6777, + "step": 40 + }, + { + "epoch": 0.4120603015075377, + "grad_norm": 1.1261959075927734, + "learning_rate": 4.849231551964771e-05, + "loss": 0.3273, + "step": 41 + }, + { + "epoch": 0.4221105527638191, + "grad_norm": 1.593313455581665, + "learning_rate": 4.8339510662430046e-05, + "loss": 0.7096, + "step": 42 + }, + { + "epoch": 0.4321608040201005, + "grad_norm": 1.301775336265564, + "learning_rate": 4.817959636416969e-05, + "loss": 0.5284, + "step": 43 + }, + { + "epoch": 0.44221105527638194, + "grad_norm": 1.570963740348816, + "learning_rate": 4.8012621336311016e-05, + "loss": 0.5525, + "step": 44 + }, + { + "epoch": 0.45226130653266333, + "grad_norm": 2.2816483974456787, + "learning_rate": 4.783863644106502e-05, + "loss": 0.6153, + "step": 45 + }, + { + "epoch": 0.4623115577889447, + "grad_norm": 1.1265478134155273, + "learning_rate": 4.765769467591625e-05, + "loss": 0.5263, + "step": 46 + }, + { + "epoch": 0.4723618090452261, + "grad_norm": 1.1592512130737305, + "learning_rate": 4.7469851157479177e-05, + "loss": 0.4652, + "step": 47 + }, + { + "epoch": 0.4824120603015075, + "grad_norm": 1.6541743278503418, + "learning_rate": 4.72751631047092e-05, + "loss": 0.4924, + "step": 48 + }, + { + "epoch": 0.49246231155778897, + "grad_norm": 0.7833207249641418, + "learning_rate": 4.707368982147318e-05, + "loss": 0.3839, + "step": 49 + }, + { + "epoch": 0.5025125628140703, + "grad_norm": 1.0830917358398438, + "learning_rate": 4.6865492678484895e-05, + "loss": 0.4785, + "step": 50 + }, + { + "epoch": 0.5125628140703518, + "grad_norm": 0.9392517805099487, + "learning_rate": 4.665063509461097e-05, + "loss": 0.4276, + "step": 51 + }, + { + "epoch": 0.5226130653266332, + "grad_norm": 1.1901228427886963, + "learning_rate": 4.642918251755281e-05, + "loss": 0.5079, + "step": 52 + }, + { + "epoch": 0.5326633165829145, + "grad_norm": 3.188199520111084, + "learning_rate": 4.620120240391065e-05, + "loss": 0.9955, + "step": 53 + }, + { + "epoch": 0.542713567839196, + "grad_norm": 1.2436141967773438, + "learning_rate": 4.5966764198635606e-05, + "loss": 0.3548, + "step": 54 + }, + { + "epoch": 0.5527638190954773, + "grad_norm": 1.235634684562683, + "learning_rate": 4.572593931387604e-05, + "loss": 0.2418, + "step": 55 + }, + { + "epoch": 0.5628140703517588, + "grad_norm": 0.8484794497489929, + "learning_rate": 4.54788011072248e-05, + "loss": 0.2605, + "step": 56 + }, + { + "epoch": 0.5728643216080402, + "grad_norm": 1.291303038597107, + "learning_rate": 4.522542485937369e-05, + "loss": 0.1845, + "step": 57 + }, + { + "epoch": 0.5829145728643216, + "grad_norm": 2.8543176651000977, + "learning_rate": 4.496588775118232e-05, + "loss": 0.661, + "step": 58 + }, + { + "epoch": 0.592964824120603, + "grad_norm": 2.5363144874572754, + "learning_rate": 4.4700268840168045e-05, + "loss": 0.612, + "step": 59 + }, + { + "epoch": 0.6030150753768844, + "grad_norm": 0.27271756529808044, + "learning_rate": 4.442864903642428e-05, + "loss": 0.0615, + "step": 60 + }, + { + "epoch": 0.6130653266331658, + "grad_norm": 1.4997279644012451, + "learning_rate": 4.415111107797445e-05, + "loss": 0.464, + "step": 61 + }, + { + "epoch": 0.6231155778894473, + "grad_norm": 1.4228745698928833, + "learning_rate": 4.386773950556931e-05, + "loss": 0.404, + "step": 62 + }, + { + "epoch": 0.6331658291457286, + "grad_norm": 1.4517656564712524, + "learning_rate": 4.357862063693486e-05, + "loss": 0.339, + "step": 63 + }, + { + "epoch": 0.6432160804020101, + "grad_norm": 2.0428409576416016, + "learning_rate": 4.3283842540479264e-05, + "loss": 0.3835, + "step": 64 + }, + { + "epoch": 0.6532663316582915, + "grad_norm": 1.0938389301300049, + "learning_rate": 4.2983495008466276e-05, + "loss": 0.3167, + "step": 65 + }, + { + "epoch": 0.6633165829145728, + "grad_norm": 1.7590700387954712, + "learning_rate": 4.267766952966369e-05, + "loss": 0.3107, + "step": 66 + }, + { + "epoch": 0.6733668341708543, + "grad_norm": 3.68430757522583, + "learning_rate": 4.2366459261474933e-05, + "loss": 1.0588, + "step": 67 + }, + { + "epoch": 0.6834170854271356, + "grad_norm": 3.4364969730377197, + "learning_rate": 4.2049959001562464e-05, + "loss": 0.6519, + "step": 68 + }, + { + "epoch": 0.6934673366834171, + "grad_norm": 2.672166347503662, + "learning_rate": 4.172826515897146e-05, + "loss": 0.4864, + "step": 69 + }, + { + "epoch": 0.7035175879396985, + "grad_norm": 2.1341798305511475, + "learning_rate": 4.140147572476268e-05, + "loss": 0.5572, + "step": 70 + }, + { + "epoch": 0.7135678391959799, + "grad_norm": 2.12341046333313, + "learning_rate": 4.1069690242163484e-05, + "loss": 0.3251, + "step": 71 + }, + { + "epoch": 0.7236180904522613, + "grad_norm": 2.14962100982666, + "learning_rate": 4.073300977624594e-05, + "loss": 0.4926, + "step": 72 + }, + { + "epoch": 0.7336683417085427, + "grad_norm": 2.619264602661133, + "learning_rate": 4.039153688314145e-05, + "loss": 0.6058, + "step": 73 + }, + { + "epoch": 0.7437185929648241, + "grad_norm": 1.5685818195343018, + "learning_rate": 4.0045375578801214e-05, + "loss": 0.3121, + "step": 74 + }, + { + "epoch": 0.7537688442211056, + "grad_norm": 0.9353585243225098, + "learning_rate": 3.969463130731183e-05, + "loss": 0.1452, + "step": 75 + }, + { + "epoch": 0.7638190954773869, + "grad_norm": 3.1898391246795654, + "learning_rate": 3.933941090877615e-05, + "loss": 0.4206, + "step": 76 + }, + { + "epoch": 0.7738693467336684, + "grad_norm": 1.3562535047531128, + "learning_rate": 3.897982258676867e-05, + "loss": 0.2435, + "step": 77 + }, + { + "epoch": 0.7839195979899497, + "grad_norm": 1.2627103328704834, + "learning_rate": 3.861597587537568e-05, + "loss": 0.2672, + "step": 78 + }, + { + "epoch": 0.7939698492462312, + "grad_norm": 5.832367420196533, + "learning_rate": 3.824798160583012e-05, + "loss": 0.7605, + "step": 79 + }, + { + "epoch": 0.8040201005025126, + "grad_norm": 2.4924912452697754, + "learning_rate": 3.787595187275136e-05, + "loss": 0.2654, + "step": 80 + }, + { + "epoch": 0.8140703517587939, + "grad_norm": 0.6880796551704407, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.0827, + "step": 81 + }, + { + "epoch": 0.8241206030150754, + "grad_norm": 3.6591432094573975, + "learning_rate": 3.712024050615843e-05, + "loss": 0.3984, + "step": 82 + }, + { + "epoch": 0.8341708542713567, + "grad_norm": 3.8862345218658447, + "learning_rate": 3.673678906964727e-05, + "loss": 0.5403, + "step": 83 + }, + { + "epoch": 0.8442211055276382, + "grad_norm": 0.4640906751155853, + "learning_rate": 3.634976249348867e-05, + "loss": 0.0481, + "step": 84 + }, + { + "epoch": 0.8542713567839196, + "grad_norm": 3.089759111404419, + "learning_rate": 3.5959278669726935e-05, + "loss": 0.3619, + "step": 85 + }, + { + "epoch": 0.864321608040201, + "grad_norm": 3.1657228469848633, + "learning_rate": 3.556545654351749e-05, + "loss": 0.1882, + "step": 86 + }, + { + "epoch": 0.8743718592964824, + "grad_norm": 1.3390817642211914, + "learning_rate": 3.516841607689501e-05, + "loss": 0.103, + "step": 87 + }, + { + "epoch": 0.8844221105527639, + "grad_norm": 2.739612340927124, + "learning_rate": 3.476827821223184e-05, + "loss": 0.2518, + "step": 88 + }, + { + "epoch": 0.8944723618090452, + "grad_norm": 0.2353546917438507, + "learning_rate": 3.436516483539781e-05, + "loss": 0.0317, + "step": 89 + }, + { + "epoch": 0.9045226130653267, + "grad_norm": 1.8644217252731323, + "learning_rate": 3.39591987386325e-05, + "loss": 0.2752, + "step": 90 + }, + { + "epoch": 0.914572864321608, + "grad_norm": 1.3844764232635498, + "learning_rate": 3.355050358314172e-05, + "loss": 0.0889, + "step": 91 + }, + { + "epoch": 0.9246231155778895, + "grad_norm": 4.213919162750244, + "learning_rate": 3.313920386142892e-05, + "loss": 0.493, + "step": 92 + }, + { + "epoch": 0.9346733668341709, + "grad_norm": 0.37895992398262024, + "learning_rate": 3.272542485937369e-05, + "loss": 0.0362, + "step": 93 + }, + { + "epoch": 0.9447236180904522, + "grad_norm": 6.269938945770264, + "learning_rate": 3.230929261806842e-05, + "loss": 0.5163, + "step": 94 + }, + { + "epoch": 0.9547738693467337, + "grad_norm": 0.268771231174469, + "learning_rate": 3.1890933895424976e-05, + "loss": 0.0264, + "step": 95 + }, + { + "epoch": 0.964824120603015, + "grad_norm": 3.6721014976501465, + "learning_rate": 3.147047612756302e-05, + "loss": 0.7893, + "step": 96 + }, + { + "epoch": 0.9748743718592965, + "grad_norm": 3.1898202896118164, + "learning_rate": 3.104804738999169e-05, + "loss": 0.1096, + "step": 97 + }, + { + "epoch": 0.9849246231155779, + "grad_norm": 4.021219253540039, + "learning_rate": 3.062377635859663e-05, + "loss": 1.1929, + "step": 98 + }, + { + "epoch": 0.9949748743718593, + "grad_norm": 0.9718989729881287, + "learning_rate": 3.0197792270443982e-05, + "loss": 0.1046, + "step": 99 + }, + { + "epoch": 1.0, + "grad_norm": 1.1656709909439087, + "learning_rate": 2.9770224884413623e-05, + "loss": 0.0735, + "step": 100 + }, + { + "epoch": 1.0, + "eval_loss": 0.5929667353630066, + "eval_runtime": 85.8837, + "eval_samples_per_second": 3.493, + "eval_steps_per_second": 1.747, + "step": 100 + }, + { + "epoch": 1.0100502512562815, + "grad_norm": 1.7781696319580078, + "learning_rate": 2.9341204441673266e-05, + "loss": 0.1134, + "step": 101 + }, + { + "epoch": 1.020100502512563, + "grad_norm": 4.624056339263916, + "learning_rate": 2.8910861626005776e-05, + "loss": 0.6274, + "step": 102 + }, + { + "epoch": 1.0301507537688441, + "grad_norm": 5.745519638061523, + "learning_rate": 2.8479327524001636e-05, + "loss": 0.587, + "step": 103 + }, + { + "epoch": 1.0402010050251256, + "grad_norm": 8.235309600830078, + "learning_rate": 2.8046733585128687e-05, + "loss": 0.9932, + "step": 104 + }, + { + "epoch": 1.050251256281407, + "grad_norm": 1.4587864875793457, + "learning_rate": 2.761321158169134e-05, + "loss": 0.1046, + "step": 105 + }, + { + "epoch": 1.0603015075376885, + "grad_norm": 2.9020118713378906, + "learning_rate": 2.717889356869146e-05, + "loss": 0.7703, + "step": 106 + }, + { + "epoch": 1.07035175879397, + "grad_norm": 1.5700269937515259, + "learning_rate": 2.674391184360313e-05, + "loss": 0.1772, + "step": 107 + }, + { + "epoch": 1.0804020100502512, + "grad_norm": 0.8232919573783875, + "learning_rate": 2.63083989060736e-05, + "loss": 0.1098, + "step": 108 + }, + { + "epoch": 1.0904522613065326, + "grad_norm": 2.7101190090179443, + "learning_rate": 2.587248741756253e-05, + "loss": 0.2107, + "step": 109 + }, + { + "epoch": 1.100502512562814, + "grad_norm": 2.1079862117767334, + "learning_rate": 2.5436310160932092e-05, + "loss": 0.2951, + "step": 110 + }, + { + "epoch": 1.1105527638190955, + "grad_norm": 2.2472472190856934, + "learning_rate": 2.5e-05, + "loss": 0.162, + "step": 111 + }, + { + "epoch": 1.120603015075377, + "grad_norm": 3.6868035793304443, + "learning_rate": 2.4563689839067913e-05, + "loss": 0.3529, + "step": 112 + }, + { + "epoch": 1.1306532663316582, + "grad_norm": 0.8493557572364807, + "learning_rate": 2.4127512582437485e-05, + "loss": 0.0754, + "step": 113 + }, + { + "epoch": 1.1407035175879396, + "grad_norm": 1.6832430362701416, + "learning_rate": 2.3691601093926404e-05, + "loss": 0.1457, + "step": 114 + }, + { + "epoch": 1.150753768844221, + "grad_norm": 1.736344575881958, + "learning_rate": 2.3256088156396868e-05, + "loss": 0.15, + "step": 115 + }, + { + "epoch": 1.1608040201005025, + "grad_norm": 2.8943748474121094, + "learning_rate": 2.2821106431308544e-05, + "loss": 0.2136, + "step": 116 + }, + { + "epoch": 1.170854271356784, + "grad_norm": 0.8236376643180847, + "learning_rate": 2.238678841830867e-05, + "loss": 0.0577, + "step": 117 + }, + { + "epoch": 1.1809045226130652, + "grad_norm": 0.7046677470207214, + "learning_rate": 2.195326641487132e-05, + "loss": 0.0854, + "step": 118 + }, + { + "epoch": 1.1909547738693467, + "grad_norm": 1.220378041267395, + "learning_rate": 2.1520672475998373e-05, + "loss": 0.1106, + "step": 119 + }, + { + "epoch": 1.2010050251256281, + "grad_norm": 1.4063576459884644, + "learning_rate": 2.1089138373994223e-05, + "loss": 0.1565, + "step": 120 + }, + { + "epoch": 1.2110552763819096, + "grad_norm": 0.9700479507446289, + "learning_rate": 2.0658795558326743e-05, + "loss": 0.1085, + "step": 121 + }, + { + "epoch": 1.221105527638191, + "grad_norm": 1.868759274482727, + "learning_rate": 2.022977511558638e-05, + "loss": 0.1558, + "step": 122 + }, + { + "epoch": 1.2311557788944723, + "grad_norm": 0.3064749240875244, + "learning_rate": 1.980220772955602e-05, + "loss": 0.0387, + "step": 123 + }, + { + "epoch": 1.2412060301507537, + "grad_norm": 0.7892552018165588, + "learning_rate": 1.937622364140338e-05, + "loss": 0.0635, + "step": 124 + }, + { + "epoch": 1.2512562814070352, + "grad_norm": 9.358102798461914, + "learning_rate": 1.895195261000831e-05, + "loss": 1.1969, + "step": 125 + }, + { + "epoch": 1.2613065326633166, + "grad_norm": 2.4470090866088867, + "learning_rate": 1.852952387243698e-05, + "loss": 0.7585, + "step": 126 + }, + { + "epoch": 1.271356783919598, + "grad_norm": 2.6475372314453125, + "learning_rate": 1.8109066104575023e-05, + "loss": 0.1562, + "step": 127 + }, + { + "epoch": 1.2814070351758793, + "grad_norm": 1.2624969482421875, + "learning_rate": 1.7690707381931583e-05, + "loss": 0.1283, + "step": 128 + }, + { + "epoch": 1.2914572864321607, + "grad_norm": 0.3836812973022461, + "learning_rate": 1.7274575140626318e-05, + "loss": 0.0346, + "step": 129 + }, + { + "epoch": 1.3015075376884422, + "grad_norm": 1.1079610586166382, + "learning_rate": 1.686079613857109e-05, + "loss": 0.0899, + "step": 130 + }, + { + "epoch": 1.3115577889447236, + "grad_norm": 0.6915103793144226, + "learning_rate": 1.6449496416858284e-05, + "loss": 0.0848, + "step": 131 + }, + { + "epoch": 1.321608040201005, + "grad_norm": 4.291629314422607, + "learning_rate": 1.6040801261367493e-05, + "loss": 0.3636, + "step": 132 + } + ], + "logging_steps": 1, + "max_steps": 200, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 66, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 9.075899049286042e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/exp2_signal_c_100_2epochs/checkpoint-132/training_args.bin b/exp2_signal_c_100_2epochs/checkpoint-132/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..48c217f8385eb386ca7f43bcee0009bbce8813af --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-132/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16a94d0c1563f7c95ac7a7d2caaec2e2298f1c2ba68f8f51cc8f61275ba42f0a +size 6033 diff --git a/exp2_signal_c_100_2epochs/checkpoint-198/README.md b/exp2_signal_c_100_2epochs/checkpoint-198/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-198/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp2_signal_c_100_2epochs/checkpoint-198/adapter_config.json b/exp2_signal_c_100_2epochs/checkpoint-198/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..507754473268a46eff1b71a02eb5bf9a906d5a14 --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-198/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "o_proj", + "v_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp2_signal_c_100_2epochs/checkpoint-198/adapter_model.safetensors b/exp2_signal_c_100_2epochs/checkpoint-198/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e863acdd84058894678930e1fd8613fb198eef1d --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-198/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2c95f6019f7e09e80ecd7c5447b710dd3b14e27032f1b5a4d399331507a4718 +size 201378736 diff --git a/exp2_signal_c_100_2epochs/checkpoint-198/optimizer.pt b/exp2_signal_c_100_2epochs/checkpoint-198/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..000b89c46be3645b1f20be8f6ef1874680721af2 --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-198/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:894be1c899cb40fbe91c2aa9c5e03025250b19d94d8dd27dfb4baf46f9b16e44 +size 402982627 diff --git a/exp2_signal_c_100_2epochs/checkpoint-198/rng_state.pth b/exp2_signal_c_100_2epochs/checkpoint-198/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..3df2dcd57391803554a94dfd47fc3d36efd705b2 --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-198/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b9565f836dd616b85d73b0de702bc760032155a0c29ee7c1af9bb41c88c0c92 +size 14645 diff --git a/exp2_signal_c_100_2epochs/checkpoint-198/scheduler.pt b/exp2_signal_c_100_2epochs/checkpoint-198/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e18855e6cfb5faff8f66efd6d88bdf3132160c13 --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-198/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dfece36c8bbbdbfd8f460da6692a55009f7b80417624cd02a64bf03f1a8b3f0 +size 1465 diff --git a/exp2_signal_c_100_2epochs/checkpoint-198/trainer_state.json b/exp2_signal_c_100_2epochs/checkpoint-198/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1b64047087b647f30abefedea4a859026a39d29c --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-198/trainer_state.json @@ -0,0 +1,1428 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.984924623115578, + "eval_steps": 100, + "global_step": 198, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010050251256281407, + "grad_norm": 2.0252811908721924, + "learning_rate": 0.0, + "loss": 0.6131, + "step": 1 + }, + { + "epoch": 0.020100502512562814, + "grad_norm": 2.218348741531372, + "learning_rate": 2.5e-06, + "loss": 0.8686, + "step": 2 + }, + { + "epoch": 0.03015075376884422, + "grad_norm": 2.437188148498535, + "learning_rate": 5e-06, + "loss": 0.5164, + "step": 3 + }, + { + "epoch": 0.04020100502512563, + "grad_norm": 2.6469321250915527, + "learning_rate": 7.5e-06, + "loss": 1.002, + "step": 4 + }, + { + "epoch": 0.05025125628140704, + "grad_norm": 1.3588825464248657, + "learning_rate": 1e-05, + "loss": 0.6844, + "step": 5 + }, + { + "epoch": 0.06030150753768844, + "grad_norm": 3.1810593605041504, + "learning_rate": 1.25e-05, + "loss": 0.9342, + "step": 6 + }, + { + "epoch": 0.07035175879396985, + "grad_norm": 1.2306936979293823, + "learning_rate": 1.5e-05, + "loss": 0.5049, + "step": 7 + }, + { + "epoch": 0.08040201005025126, + "grad_norm": 1.9764370918273926, + "learning_rate": 1.75e-05, + "loss": 0.5337, + "step": 8 + }, + { + "epoch": 0.09045226130653267, + "grad_norm": 2.451845645904541, + "learning_rate": 2e-05, + "loss": 0.8612, + "step": 9 + }, + { + "epoch": 0.10050251256281408, + "grad_norm": 1.2207489013671875, + "learning_rate": 2.25e-05, + "loss": 0.6021, + "step": 10 + }, + { + "epoch": 0.11055276381909548, + "grad_norm": 1.5423636436462402, + "learning_rate": 2.5e-05, + "loss": 0.3558, + "step": 11 + }, + { + "epoch": 0.12060301507537688, + "grad_norm": 0.6869585514068604, + "learning_rate": 2.7500000000000004e-05, + "loss": 0.3061, + "step": 12 + }, + { + "epoch": 0.1306532663316583, + "grad_norm": 1.8259159326553345, + "learning_rate": 3e-05, + "loss": 0.2571, + "step": 13 + }, + { + "epoch": 0.1407035175879397, + "grad_norm": 2.017957925796509, + "learning_rate": 3.2500000000000004e-05, + "loss": 0.4754, + "step": 14 + }, + { + "epoch": 0.1507537688442211, + "grad_norm": 1.566588282585144, + "learning_rate": 3.5e-05, + "loss": 0.8004, + "step": 15 + }, + { + "epoch": 0.16080402010050251, + "grad_norm": 1.6226823329925537, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.8382, + "step": 16 + }, + { + "epoch": 0.1708542713567839, + "grad_norm": 1.9973260164260864, + "learning_rate": 4e-05, + "loss": 0.7851, + "step": 17 + }, + { + "epoch": 0.18090452261306533, + "grad_norm": 1.30808687210083, + "learning_rate": 4.25e-05, + "loss": 0.5387, + "step": 18 + }, + { + "epoch": 0.19095477386934673, + "grad_norm": 2.495645761489868, + "learning_rate": 4.5e-05, + "loss": 0.5396, + "step": 19 + }, + { + "epoch": 0.20100502512562815, + "grad_norm": 2.5632853507995605, + "learning_rate": 4.75e-05, + "loss": 0.9602, + "step": 20 + }, + { + "epoch": 0.21105527638190955, + "grad_norm": 3.062891960144043, + "learning_rate": 5e-05, + "loss": 0.8354, + "step": 21 + }, + { + "epoch": 0.22110552763819097, + "grad_norm": 1.4690148830413818, + "learning_rate": 4.9996192378909786e-05, + "loss": 0.426, + "step": 22 + }, + { + "epoch": 0.23115577889447236, + "grad_norm": 3.291106939315796, + "learning_rate": 4.99847706754774e-05, + "loss": 1.0268, + "step": 23 + }, + { + "epoch": 0.24120603015075376, + "grad_norm": 1.5133529901504517, + "learning_rate": 4.996573836886435e-05, + "loss": 0.8601, + "step": 24 + }, + { + "epoch": 0.25125628140703515, + "grad_norm": 4.66534948348999, + "learning_rate": 4.993910125649561e-05, + "loss": 1.3701, + "step": 25 + }, + { + "epoch": 0.2613065326633166, + "grad_norm": 1.0427296161651611, + "learning_rate": 4.990486745229364e-05, + "loss": 0.3869, + "step": 26 + }, + { + "epoch": 0.271356783919598, + "grad_norm": 2.4530417919158936, + "learning_rate": 4.9863047384206835e-05, + "loss": 0.582, + "step": 27 + }, + { + "epoch": 0.2814070351758794, + "grad_norm": 2.5782620906829834, + "learning_rate": 4.9813653791033057e-05, + "loss": 0.8016, + "step": 28 + }, + { + "epoch": 0.2914572864321608, + "grad_norm": 1.9464126825332642, + "learning_rate": 4.975670171853926e-05, + "loss": 0.5312, + "step": 29 + }, + { + "epoch": 0.3015075376884422, + "grad_norm": 0.8109995722770691, + "learning_rate": 4.9692208514878444e-05, + "loss": 0.3388, + "step": 30 + }, + { + "epoch": 0.31155778894472363, + "grad_norm": 2.375706672668457, + "learning_rate": 4.962019382530521e-05, + "loss": 0.7188, + "step": 31 + }, + { + "epoch": 0.32160804020100503, + "grad_norm": 2.7205145359039307, + "learning_rate": 4.9540679586191605e-05, + "loss": 0.4845, + "step": 32 + }, + { + "epoch": 0.3316582914572864, + "grad_norm": 3.0961830615997314, + "learning_rate": 4.9453690018345144e-05, + "loss": 1.197, + "step": 33 + }, + { + "epoch": 0.3417085427135678, + "grad_norm": 2.482598304748535, + "learning_rate": 4.9359251619630886e-05, + "loss": 0.5483, + "step": 34 + }, + { + "epoch": 0.35175879396984927, + "grad_norm": 2.1126797199249268, + "learning_rate": 4.925739315689991e-05, + "loss": 0.6905, + "step": 35 + }, + { + "epoch": 0.36180904522613067, + "grad_norm": 1.3531568050384521, + "learning_rate": 4.914814565722671e-05, + "loss": 0.744, + "step": 36 + }, + { + "epoch": 0.37185929648241206, + "grad_norm": 1.144991159439087, + "learning_rate": 4.9031542398457974e-05, + "loss": 0.3955, + "step": 37 + }, + { + "epoch": 0.38190954773869346, + "grad_norm": 2.3990726470947266, + "learning_rate": 4.890761889907589e-05, + "loss": 0.4951, + "step": 38 + }, + { + "epoch": 0.39195979899497485, + "grad_norm": 1.3550069332122803, + "learning_rate": 4.877641290737884e-05, + "loss": 0.7786, + "step": 39 + }, + { + "epoch": 0.4020100502512563, + "grad_norm": 1.7543747425079346, + "learning_rate": 4.8637964389982926e-05, + "loss": 0.6777, + "step": 40 + }, + { + "epoch": 0.4120603015075377, + "grad_norm": 1.1261959075927734, + "learning_rate": 4.849231551964771e-05, + "loss": 0.3273, + "step": 41 + }, + { + "epoch": 0.4221105527638191, + "grad_norm": 1.593313455581665, + "learning_rate": 4.8339510662430046e-05, + "loss": 0.7096, + "step": 42 + }, + { + "epoch": 0.4321608040201005, + "grad_norm": 1.301775336265564, + "learning_rate": 4.817959636416969e-05, + "loss": 0.5284, + "step": 43 + }, + { + "epoch": 0.44221105527638194, + "grad_norm": 1.570963740348816, + "learning_rate": 4.8012621336311016e-05, + "loss": 0.5525, + "step": 44 + }, + { + "epoch": 0.45226130653266333, + "grad_norm": 2.2816483974456787, + "learning_rate": 4.783863644106502e-05, + "loss": 0.6153, + "step": 45 + }, + { + "epoch": 0.4623115577889447, + "grad_norm": 1.1265478134155273, + "learning_rate": 4.765769467591625e-05, + "loss": 0.5263, + "step": 46 + }, + { + "epoch": 0.4723618090452261, + "grad_norm": 1.1592512130737305, + "learning_rate": 4.7469851157479177e-05, + "loss": 0.4652, + "step": 47 + }, + { + "epoch": 0.4824120603015075, + "grad_norm": 1.6541743278503418, + "learning_rate": 4.72751631047092e-05, + "loss": 0.4924, + "step": 48 + }, + { + "epoch": 0.49246231155778897, + "grad_norm": 0.7833207249641418, + "learning_rate": 4.707368982147318e-05, + "loss": 0.3839, + "step": 49 + }, + { + "epoch": 0.5025125628140703, + "grad_norm": 1.0830917358398438, + "learning_rate": 4.6865492678484895e-05, + "loss": 0.4785, + "step": 50 + }, + { + "epoch": 0.5125628140703518, + "grad_norm": 0.9392517805099487, + "learning_rate": 4.665063509461097e-05, + "loss": 0.4276, + "step": 51 + }, + { + "epoch": 0.5226130653266332, + "grad_norm": 1.1901228427886963, + "learning_rate": 4.642918251755281e-05, + "loss": 0.5079, + "step": 52 + }, + { + "epoch": 0.5326633165829145, + "grad_norm": 3.188199520111084, + "learning_rate": 4.620120240391065e-05, + "loss": 0.9955, + "step": 53 + }, + { + "epoch": 0.542713567839196, + "grad_norm": 1.2436141967773438, + "learning_rate": 4.5966764198635606e-05, + "loss": 0.3548, + "step": 54 + }, + { + "epoch": 0.5527638190954773, + "grad_norm": 1.235634684562683, + "learning_rate": 4.572593931387604e-05, + "loss": 0.2418, + "step": 55 + }, + { + "epoch": 0.5628140703517588, + "grad_norm": 0.8484794497489929, + "learning_rate": 4.54788011072248e-05, + "loss": 0.2605, + "step": 56 + }, + { + "epoch": 0.5728643216080402, + "grad_norm": 1.291303038597107, + "learning_rate": 4.522542485937369e-05, + "loss": 0.1845, + "step": 57 + }, + { + "epoch": 0.5829145728643216, + "grad_norm": 2.8543176651000977, + "learning_rate": 4.496588775118232e-05, + "loss": 0.661, + "step": 58 + }, + { + "epoch": 0.592964824120603, + "grad_norm": 2.5363144874572754, + "learning_rate": 4.4700268840168045e-05, + "loss": 0.612, + "step": 59 + }, + { + "epoch": 0.6030150753768844, + "grad_norm": 0.27271756529808044, + "learning_rate": 4.442864903642428e-05, + "loss": 0.0615, + "step": 60 + }, + { + "epoch": 0.6130653266331658, + "grad_norm": 1.4997279644012451, + "learning_rate": 4.415111107797445e-05, + "loss": 0.464, + "step": 61 + }, + { + "epoch": 0.6231155778894473, + "grad_norm": 1.4228745698928833, + "learning_rate": 4.386773950556931e-05, + "loss": 0.404, + "step": 62 + }, + { + "epoch": 0.6331658291457286, + "grad_norm": 1.4517656564712524, + "learning_rate": 4.357862063693486e-05, + "loss": 0.339, + "step": 63 + }, + { + "epoch": 0.6432160804020101, + "grad_norm": 2.0428409576416016, + "learning_rate": 4.3283842540479264e-05, + "loss": 0.3835, + "step": 64 + }, + { + "epoch": 0.6532663316582915, + "grad_norm": 1.0938389301300049, + "learning_rate": 4.2983495008466276e-05, + "loss": 0.3167, + "step": 65 + }, + { + "epoch": 0.6633165829145728, + "grad_norm": 1.7590700387954712, + "learning_rate": 4.267766952966369e-05, + "loss": 0.3107, + "step": 66 + }, + { + "epoch": 0.6733668341708543, + "grad_norm": 3.68430757522583, + "learning_rate": 4.2366459261474933e-05, + "loss": 1.0588, + "step": 67 + }, + { + "epoch": 0.6834170854271356, + "grad_norm": 3.4364969730377197, + "learning_rate": 4.2049959001562464e-05, + "loss": 0.6519, + "step": 68 + }, + { + "epoch": 0.6934673366834171, + "grad_norm": 2.672166347503662, + "learning_rate": 4.172826515897146e-05, + "loss": 0.4864, + "step": 69 + }, + { + "epoch": 0.7035175879396985, + "grad_norm": 2.1341798305511475, + "learning_rate": 4.140147572476268e-05, + "loss": 0.5572, + "step": 70 + }, + { + "epoch": 0.7135678391959799, + "grad_norm": 2.12341046333313, + "learning_rate": 4.1069690242163484e-05, + "loss": 0.3251, + "step": 71 + }, + { + "epoch": 0.7236180904522613, + "grad_norm": 2.14962100982666, + "learning_rate": 4.073300977624594e-05, + "loss": 0.4926, + "step": 72 + }, + { + "epoch": 0.7336683417085427, + "grad_norm": 2.619264602661133, + "learning_rate": 4.039153688314145e-05, + "loss": 0.6058, + "step": 73 + }, + { + "epoch": 0.7437185929648241, + "grad_norm": 1.5685818195343018, + "learning_rate": 4.0045375578801214e-05, + "loss": 0.3121, + "step": 74 + }, + { + "epoch": 0.7537688442211056, + "grad_norm": 0.9353585243225098, + "learning_rate": 3.969463130731183e-05, + "loss": 0.1452, + "step": 75 + }, + { + "epoch": 0.7638190954773869, + "grad_norm": 3.1898391246795654, + "learning_rate": 3.933941090877615e-05, + "loss": 0.4206, + "step": 76 + }, + { + "epoch": 0.7738693467336684, + "grad_norm": 1.3562535047531128, + "learning_rate": 3.897982258676867e-05, + "loss": 0.2435, + "step": 77 + }, + { + "epoch": 0.7839195979899497, + "grad_norm": 1.2627103328704834, + "learning_rate": 3.861597587537568e-05, + "loss": 0.2672, + "step": 78 + }, + { + "epoch": 0.7939698492462312, + "grad_norm": 5.832367420196533, + "learning_rate": 3.824798160583012e-05, + "loss": 0.7605, + "step": 79 + }, + { + "epoch": 0.8040201005025126, + "grad_norm": 2.4924912452697754, + "learning_rate": 3.787595187275136e-05, + "loss": 0.2654, + "step": 80 + }, + { + "epoch": 0.8140703517587939, + "grad_norm": 0.6880796551704407, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.0827, + "step": 81 + }, + { + "epoch": 0.8241206030150754, + "grad_norm": 3.6591432094573975, + "learning_rate": 3.712024050615843e-05, + "loss": 0.3984, + "step": 82 + }, + { + "epoch": 0.8341708542713567, + "grad_norm": 3.8862345218658447, + "learning_rate": 3.673678906964727e-05, + "loss": 0.5403, + "step": 83 + }, + { + "epoch": 0.8442211055276382, + "grad_norm": 0.4640906751155853, + "learning_rate": 3.634976249348867e-05, + "loss": 0.0481, + "step": 84 + }, + { + "epoch": 0.8542713567839196, + "grad_norm": 3.089759111404419, + "learning_rate": 3.5959278669726935e-05, + "loss": 0.3619, + "step": 85 + }, + { + "epoch": 0.864321608040201, + "grad_norm": 3.1657228469848633, + "learning_rate": 3.556545654351749e-05, + "loss": 0.1882, + "step": 86 + }, + { + "epoch": 0.8743718592964824, + "grad_norm": 1.3390817642211914, + "learning_rate": 3.516841607689501e-05, + "loss": 0.103, + "step": 87 + }, + { + "epoch": 0.8844221105527639, + "grad_norm": 2.739612340927124, + "learning_rate": 3.476827821223184e-05, + "loss": 0.2518, + "step": 88 + }, + { + "epoch": 0.8944723618090452, + "grad_norm": 0.2353546917438507, + "learning_rate": 3.436516483539781e-05, + "loss": 0.0317, + "step": 89 + }, + { + "epoch": 0.9045226130653267, + "grad_norm": 1.8644217252731323, + "learning_rate": 3.39591987386325e-05, + "loss": 0.2752, + "step": 90 + }, + { + "epoch": 0.914572864321608, + "grad_norm": 1.3844764232635498, + "learning_rate": 3.355050358314172e-05, + "loss": 0.0889, + "step": 91 + }, + { + "epoch": 0.9246231155778895, + "grad_norm": 4.213919162750244, + "learning_rate": 3.313920386142892e-05, + "loss": 0.493, + "step": 92 + }, + { + "epoch": 0.9346733668341709, + "grad_norm": 0.37895992398262024, + "learning_rate": 3.272542485937369e-05, + "loss": 0.0362, + "step": 93 + }, + { + "epoch": 0.9447236180904522, + "grad_norm": 6.269938945770264, + "learning_rate": 3.230929261806842e-05, + "loss": 0.5163, + "step": 94 + }, + { + "epoch": 0.9547738693467337, + "grad_norm": 0.268771231174469, + "learning_rate": 3.1890933895424976e-05, + "loss": 0.0264, + "step": 95 + }, + { + "epoch": 0.964824120603015, + "grad_norm": 3.6721014976501465, + "learning_rate": 3.147047612756302e-05, + "loss": 0.7893, + "step": 96 + }, + { + "epoch": 0.9748743718592965, + "grad_norm": 3.1898202896118164, + "learning_rate": 3.104804738999169e-05, + "loss": 0.1096, + "step": 97 + }, + { + "epoch": 0.9849246231155779, + "grad_norm": 4.021219253540039, + "learning_rate": 3.062377635859663e-05, + "loss": 1.1929, + "step": 98 + }, + { + "epoch": 0.9949748743718593, + "grad_norm": 0.9718989729881287, + "learning_rate": 3.0197792270443982e-05, + "loss": 0.1046, + "step": 99 + }, + { + "epoch": 1.0, + "grad_norm": 1.1656709909439087, + "learning_rate": 2.9770224884413623e-05, + "loss": 0.0735, + "step": 100 + }, + { + "epoch": 1.0, + "eval_loss": 0.5929667353630066, + "eval_runtime": 85.8837, + "eval_samples_per_second": 3.493, + "eval_steps_per_second": 1.747, + "step": 100 + }, + { + "epoch": 1.0100502512562815, + "grad_norm": 1.7781696319580078, + "learning_rate": 2.9341204441673266e-05, + "loss": 0.1134, + "step": 101 + }, + { + "epoch": 1.020100502512563, + "grad_norm": 4.624056339263916, + "learning_rate": 2.8910861626005776e-05, + "loss": 0.6274, + "step": 102 + }, + { + "epoch": 1.0301507537688441, + "grad_norm": 5.745519638061523, + "learning_rate": 2.8479327524001636e-05, + "loss": 0.587, + "step": 103 + }, + { + "epoch": 1.0402010050251256, + "grad_norm": 8.235309600830078, + "learning_rate": 2.8046733585128687e-05, + "loss": 0.9932, + "step": 104 + }, + { + "epoch": 1.050251256281407, + "grad_norm": 1.4587864875793457, + "learning_rate": 2.761321158169134e-05, + "loss": 0.1046, + "step": 105 + }, + { + "epoch": 1.0603015075376885, + "grad_norm": 2.9020118713378906, + "learning_rate": 2.717889356869146e-05, + "loss": 0.7703, + "step": 106 + }, + { + "epoch": 1.07035175879397, + "grad_norm": 1.5700269937515259, + "learning_rate": 2.674391184360313e-05, + "loss": 0.1772, + "step": 107 + }, + { + "epoch": 1.0804020100502512, + "grad_norm": 0.8232919573783875, + "learning_rate": 2.63083989060736e-05, + "loss": 0.1098, + "step": 108 + }, + { + "epoch": 1.0904522613065326, + "grad_norm": 2.7101190090179443, + "learning_rate": 2.587248741756253e-05, + "loss": 0.2107, + "step": 109 + }, + { + "epoch": 1.100502512562814, + "grad_norm": 2.1079862117767334, + "learning_rate": 2.5436310160932092e-05, + "loss": 0.2951, + "step": 110 + }, + { + "epoch": 1.1105527638190955, + "grad_norm": 2.2472472190856934, + "learning_rate": 2.5e-05, + "loss": 0.162, + "step": 111 + }, + { + "epoch": 1.120603015075377, + "grad_norm": 3.6868035793304443, + "learning_rate": 2.4563689839067913e-05, + "loss": 0.3529, + "step": 112 + }, + { + "epoch": 1.1306532663316582, + "grad_norm": 0.8493557572364807, + "learning_rate": 2.4127512582437485e-05, + "loss": 0.0754, + "step": 113 + }, + { + "epoch": 1.1407035175879396, + "grad_norm": 1.6832430362701416, + "learning_rate": 2.3691601093926404e-05, + "loss": 0.1457, + "step": 114 + }, + { + "epoch": 1.150753768844221, + "grad_norm": 1.736344575881958, + "learning_rate": 2.3256088156396868e-05, + "loss": 0.15, + "step": 115 + }, + { + "epoch": 1.1608040201005025, + "grad_norm": 2.8943748474121094, + "learning_rate": 2.2821106431308544e-05, + "loss": 0.2136, + "step": 116 + }, + { + "epoch": 1.170854271356784, + "grad_norm": 0.8236376643180847, + "learning_rate": 2.238678841830867e-05, + "loss": 0.0577, + "step": 117 + }, + { + "epoch": 1.1809045226130652, + "grad_norm": 0.7046677470207214, + "learning_rate": 2.195326641487132e-05, + "loss": 0.0854, + "step": 118 + }, + { + "epoch": 1.1909547738693467, + "grad_norm": 1.220378041267395, + "learning_rate": 2.1520672475998373e-05, + "loss": 0.1106, + "step": 119 + }, + { + "epoch": 1.2010050251256281, + "grad_norm": 1.4063576459884644, + "learning_rate": 2.1089138373994223e-05, + "loss": 0.1565, + "step": 120 + }, + { + "epoch": 1.2110552763819096, + "grad_norm": 0.9700479507446289, + "learning_rate": 2.0658795558326743e-05, + "loss": 0.1085, + "step": 121 + }, + { + "epoch": 1.221105527638191, + "grad_norm": 1.868759274482727, + "learning_rate": 2.022977511558638e-05, + "loss": 0.1558, + "step": 122 + }, + { + "epoch": 1.2311557788944723, + "grad_norm": 0.3064749240875244, + "learning_rate": 1.980220772955602e-05, + "loss": 0.0387, + "step": 123 + }, + { + "epoch": 1.2412060301507537, + "grad_norm": 0.7892552018165588, + "learning_rate": 1.937622364140338e-05, + "loss": 0.0635, + "step": 124 + }, + { + "epoch": 1.2512562814070352, + "grad_norm": 9.358102798461914, + "learning_rate": 1.895195261000831e-05, + "loss": 1.1969, + "step": 125 + }, + { + "epoch": 1.2613065326633166, + "grad_norm": 2.4470090866088867, + "learning_rate": 1.852952387243698e-05, + "loss": 0.7585, + "step": 126 + }, + { + "epoch": 1.271356783919598, + "grad_norm": 2.6475372314453125, + "learning_rate": 1.8109066104575023e-05, + "loss": 0.1562, + "step": 127 + }, + { + "epoch": 1.2814070351758793, + "grad_norm": 1.2624969482421875, + "learning_rate": 1.7690707381931583e-05, + "loss": 0.1283, + "step": 128 + }, + { + "epoch": 1.2914572864321607, + "grad_norm": 0.3836812973022461, + "learning_rate": 1.7274575140626318e-05, + "loss": 0.0346, + "step": 129 + }, + { + "epoch": 1.3015075376884422, + "grad_norm": 1.1079610586166382, + "learning_rate": 1.686079613857109e-05, + "loss": 0.0899, + "step": 130 + }, + { + "epoch": 1.3115577889447236, + "grad_norm": 0.6915103793144226, + "learning_rate": 1.6449496416858284e-05, + "loss": 0.0848, + "step": 131 + }, + { + "epoch": 1.321608040201005, + "grad_norm": 4.291629314422607, + "learning_rate": 1.6040801261367493e-05, + "loss": 0.3636, + "step": 132 + }, + { + "epoch": 1.3316582914572863, + "grad_norm": 1.677146077156067, + "learning_rate": 1.56348351646022e-05, + "loss": 0.139, + "step": 133 + }, + { + "epoch": 1.3417085427135678, + "grad_norm": 1.378409743309021, + "learning_rate": 1.523172178776816e-05, + "loss": 0.0879, + "step": 134 + }, + { + "epoch": 1.3517587939698492, + "grad_norm": 0.7962786555290222, + "learning_rate": 1.4831583923104999e-05, + "loss": 0.0601, + "step": 135 + }, + { + "epoch": 1.3618090452261307, + "grad_norm": 0.736642062664032, + "learning_rate": 1.443454345648252e-05, + "loss": 0.0802, + "step": 136 + }, + { + "epoch": 1.3718592964824121, + "grad_norm": 0.8610866069793701, + "learning_rate": 1.4040721330273062e-05, + "loss": 0.0617, + "step": 137 + }, + { + "epoch": 1.3819095477386933, + "grad_norm": 0.5751700401306152, + "learning_rate": 1.3650237506511331e-05, + "loss": 0.0675, + "step": 138 + }, + { + "epoch": 1.3919597989949748, + "grad_norm": 0.43744558095932007, + "learning_rate": 1.3263210930352737e-05, + "loss": 0.04, + "step": 139 + }, + { + "epoch": 1.4020100502512562, + "grad_norm": 1.1981139183044434, + "learning_rate": 1.2879759493841575e-05, + "loss": 0.1027, + "step": 140 + }, + { + "epoch": 1.4120603015075377, + "grad_norm": 0.10435374826192856, + "learning_rate": 1.2500000000000006e-05, + "loss": 0.0113, + "step": 141 + }, + { + "epoch": 1.4221105527638191, + "grad_norm": 8.204585075378418, + "learning_rate": 1.2124048127248644e-05, + "loss": 0.7158, + "step": 142 + }, + { + "epoch": 1.4321608040201004, + "grad_norm": 0.5565558075904846, + "learning_rate": 1.175201839416988e-05, + "loss": 0.056, + "step": 143 + }, + { + "epoch": 1.442211055276382, + "grad_norm": 0.28212472796440125, + "learning_rate": 1.1384024124624324e-05, + "loss": 0.0319, + "step": 144 + }, + { + "epoch": 1.4522613065326633, + "grad_norm": 0.6541823744773865, + "learning_rate": 1.1020177413231334e-05, + "loss": 0.0548, + "step": 145 + }, + { + "epoch": 1.4623115577889447, + "grad_norm": 2.968003749847412, + "learning_rate": 1.0660589091223855e-05, + "loss": 0.1148, + "step": 146 + }, + { + "epoch": 1.4723618090452262, + "grad_norm": 0.38432276248931885, + "learning_rate": 1.0305368692688174e-05, + "loss": 0.0298, + "step": 147 + }, + { + "epoch": 1.4824120603015074, + "grad_norm": 0.3911549746990204, + "learning_rate": 9.954624421198792e-06, + "loss": 0.0352, + "step": 148 + }, + { + "epoch": 1.492462311557789, + "grad_norm": 3.590937376022339, + "learning_rate": 9.608463116858542e-06, + "loss": 0.1894, + "step": 149 + }, + { + "epoch": 1.5025125628140703, + "grad_norm": 0.34735795855522156, + "learning_rate": 9.266990223754069e-06, + "loss": 0.0264, + "step": 150 + }, + { + "epoch": 1.5125628140703518, + "grad_norm": 0.7267605662345886, + "learning_rate": 8.930309757836517e-06, + "loss": 0.0541, + "step": 151 + }, + { + "epoch": 1.5226130653266332, + "grad_norm": 6.696427345275879, + "learning_rate": 8.598524275237322e-06, + "loss": 0.4413, + "step": 152 + }, + { + "epoch": 1.5326633165829144, + "grad_norm": 0.30404531955718994, + "learning_rate": 8.271734841028553e-06, + "loss": 0.0302, + "step": 153 + }, + { + "epoch": 1.542713567839196, + "grad_norm": 0.29946571588516235, + "learning_rate": 7.950040998437542e-06, + "loss": 0.0227, + "step": 154 + }, + { + "epoch": 1.5527638190954773, + "grad_norm": 5.713311672210693, + "learning_rate": 7.633540738525066e-06, + "loss": 0.4868, + "step": 155 + }, + { + "epoch": 1.5628140703517588, + "grad_norm": 0.07008615881204605, + "learning_rate": 7.3223304703363135e-06, + "loss": 0.0061, + "step": 156 + }, + { + "epoch": 1.5728643216080402, + "grad_norm": 0.25018176436424255, + "learning_rate": 7.016504991533726e-06, + "loss": 0.0214, + "step": 157 + }, + { + "epoch": 1.5829145728643215, + "grad_norm": 2.6974761486053467, + "learning_rate": 6.716157459520739e-06, + "loss": 0.1024, + "step": 158 + }, + { + "epoch": 1.5929648241206031, + "grad_norm": 0.6652225255966187, + "learning_rate": 6.421379363065142e-06, + "loss": 0.051, + "step": 159 + }, + { + "epoch": 1.6030150753768844, + "grad_norm": 2.616692543029785, + "learning_rate": 6.1322604944307e-06, + "loss": 0.2014, + "step": 160 + }, + { + "epoch": 1.6130653266331658, + "grad_norm": 0.19218847155570984, + "learning_rate": 5.848888922025553e-06, + "loss": 0.0197, + "step": 161 + }, + { + "epoch": 1.6231155778894473, + "grad_norm": 1.6090087890625, + "learning_rate": 5.571350963575728e-06, + "loss": 0.1016, + "step": 162 + }, + { + "epoch": 1.6331658291457285, + "grad_norm": 0.11638417094945908, + "learning_rate": 5.299731159831953e-06, + "loss": 0.0116, + "step": 163 + }, + { + "epoch": 1.6432160804020102, + "grad_norm": 0.24140457808971405, + "learning_rate": 5.034112248817685e-06, + "loss": 0.025, + "step": 164 + }, + { + "epoch": 1.6532663316582914, + "grad_norm": 0.4124128222465515, + "learning_rate": 4.7745751406263165e-06, + "loss": 0.0241, + "step": 165 + }, + { + "epoch": 1.6633165829145728, + "grad_norm": 0.3990463316440582, + "learning_rate": 4.521198892775203e-06, + "loss": 0.0317, + "step": 166 + }, + { + "epoch": 1.6733668341708543, + "grad_norm": 0.5439159870147705, + "learning_rate": 4.274060686123959e-06, + "loss": 0.0467, + "step": 167 + }, + { + "epoch": 1.6834170854271355, + "grad_norm": 0.2993362247943878, + "learning_rate": 4.0332358013644016e-06, + "loss": 0.0239, + "step": 168 + }, + { + "epoch": 1.6934673366834172, + "grad_norm": 1.2274130582809448, + "learning_rate": 3.798797596089351e-06, + "loss": 0.0777, + "step": 169 + }, + { + "epoch": 1.7035175879396984, + "grad_norm": 0.3105550706386566, + "learning_rate": 3.5708174824471947e-06, + "loss": 0.0286, + "step": 170 + }, + { + "epoch": 1.7135678391959799, + "grad_norm": 4.238363265991211, + "learning_rate": 3.3493649053890326e-06, + "loss": 0.1195, + "step": 171 + }, + { + "epoch": 1.7236180904522613, + "grad_norm": 1.5373774766921997, + "learning_rate": 3.1345073215151066e-06, + "loss": 0.1183, + "step": 172 + }, + { + "epoch": 1.7336683417085426, + "grad_norm": 0.3502655029296875, + "learning_rate": 2.9263101785268254e-06, + "loss": 0.0335, + "step": 173 + }, + { + "epoch": 1.7437185929648242, + "grad_norm": 9.981806755065918, + "learning_rate": 2.7248368952908053e-06, + "loss": 0.3681, + "step": 174 + }, + { + "epoch": 1.7537688442211055, + "grad_norm": 1.818504810333252, + "learning_rate": 2.5301488425208296e-06, + "loss": 0.0531, + "step": 175 + }, + { + "epoch": 1.763819095477387, + "grad_norm": 0.16088348627090454, + "learning_rate": 2.3423053240837515e-06, + "loss": 0.0176, + "step": 176 + }, + { + "epoch": 1.7738693467336684, + "grad_norm": 4.578584671020508, + "learning_rate": 2.1613635589349756e-06, + "loss": 0.1371, + "step": 177 + }, + { + "epoch": 1.7839195979899496, + "grad_norm": 0.5099331140518188, + "learning_rate": 1.9873786636889906e-06, + "loss": 0.0423, + "step": 178 + }, + { + "epoch": 1.7939698492462313, + "grad_norm": 1.0052868127822876, + "learning_rate": 1.8204036358303173e-06, + "loss": 0.044, + "step": 179 + }, + { + "epoch": 1.8040201005025125, + "grad_norm": 0.23482565581798553, + "learning_rate": 1.6604893375699594e-06, + "loss": 0.0221, + "step": 180 + }, + { + "epoch": 1.814070351758794, + "grad_norm": 2.3073418140411377, + "learning_rate": 1.5076844803522922e-06, + "loss": 0.0727, + "step": 181 + }, + { + "epoch": 1.8241206030150754, + "grad_norm": 4.322810173034668, + "learning_rate": 1.362035610017079e-06, + "loss": 0.4996, + "step": 182 + }, + { + "epoch": 1.8341708542713566, + "grad_norm": 0.6854317784309387, + "learning_rate": 1.2235870926211619e-06, + "loss": 0.0579, + "step": 183 + }, + { + "epoch": 1.8442211055276383, + "grad_norm": 4.30879020690918, + "learning_rate": 1.0923811009241142e-06, + "loss": 0.995, + "step": 184 + }, + { + "epoch": 1.8542713567839195, + "grad_norm": 6.37328577041626, + "learning_rate": 9.684576015420278e-07, + "loss": 0.7451, + "step": 185 + }, + { + "epoch": 1.864321608040201, + "grad_norm": 4.90187406539917, + "learning_rate": 8.51854342773295e-07, + "loss": 0.1151, + "step": 186 + }, + { + "epoch": 1.8743718592964824, + "grad_norm": 4.3015265464782715, + "learning_rate": 7.426068431000882e-07, + "loss": 0.995, + "step": 187 + }, + { + "epoch": 1.8844221105527639, + "grad_norm": 0.4159507751464844, + "learning_rate": 6.407483803691216e-07, + "loss": 0.0322, + "step": 188 + }, + { + "epoch": 1.8944723618090453, + "grad_norm": 0.2066110372543335, + "learning_rate": 5.463099816548579e-07, + "loss": 0.0218, + "step": 189 + }, + { + "epoch": 1.9045226130653266, + "grad_norm": 3.4038851261138916, + "learning_rate": 4.5932041380840065e-07, + "loss": 0.9543, + "step": 190 + }, + { + "epoch": 1.914572864321608, + "grad_norm": 3.0160419940948486, + "learning_rate": 3.7980617469479953e-07, + "loss": 0.1308, + "step": 191 + }, + { + "epoch": 1.9246231155778895, + "grad_norm": 0.1535632610321045, + "learning_rate": 3.077914851215585e-07, + "loss": 0.015, + "step": 192 + }, + { + "epoch": 1.934673366834171, + "grad_norm": 7.666110992431641, + "learning_rate": 2.4329828146074095e-07, + "loss": 0.6303, + "step": 193 + }, + { + "epoch": 1.9447236180904524, + "grad_norm": 7.849299430847168, + "learning_rate": 1.8634620896695043e-07, + "loss": 1.8334, + "step": 194 + }, + { + "epoch": 1.9547738693467336, + "grad_norm": 0.4455506205558777, + "learning_rate": 1.3695261579316777e-07, + "loss": 0.0441, + "step": 195 + }, + { + "epoch": 1.964824120603015, + "grad_norm": 1.6709024906158447, + "learning_rate": 9.513254770636137e-08, + "loss": 0.0861, + "step": 196 + }, + { + "epoch": 1.9748743718592965, + "grad_norm": 4.513991355895996, + "learning_rate": 6.089874350439506e-08, + "loss": 0.5219, + "step": 197 + }, + { + "epoch": 1.984924623115578, + "grad_norm": 3.4638781547546387, + "learning_rate": 3.426163113565417e-08, + "loss": 0.8941, + "step": 198 + } + ], + "logging_steps": 1, + "max_steps": 200, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 66, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.3631103134859264e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/exp2_signal_c_100_2epochs/checkpoint-198/training_args.bin b/exp2_signal_c_100_2epochs/checkpoint-198/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..48c217f8385eb386ca7f43bcee0009bbce8813af --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-198/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16a94d0c1563f7c95ac7a7d2caaec2e2298f1c2ba68f8f51cc8f61275ba42f0a +size 6033 diff --git a/exp2_signal_c_100_2epochs/checkpoint-200/README.md b/exp2_signal_c_100_2epochs/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-200/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp2_signal_c_100_2epochs/checkpoint-200/adapter_config.json b/exp2_signal_c_100_2epochs/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..507754473268a46eff1b71a02eb5bf9a906d5a14 --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-200/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "o_proj", + "v_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp2_signal_c_100_2epochs/checkpoint-200/adapter_model.safetensors b/exp2_signal_c_100_2epochs/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a8f34ffe81cdb683dacd69f7a2cd0ba15c1bee1a --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3429359d7b957d4f3a94c2f8093efc1d65ec2267aac6e773d946a69a7b0dabea +size 201378736 diff --git a/exp2_signal_c_100_2epochs/checkpoint-200/optimizer.pt b/exp2_signal_c_100_2epochs/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9bd6938793c4a939630a8fac123e3c4a2e287651 --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c56271800a8df137f31cfdfcf0c9bd12bb554e3a15c84acb3a211dbeff66d29f +size 402982627 diff --git a/exp2_signal_c_100_2epochs/checkpoint-200/rng_state.pth b/exp2_signal_c_100_2epochs/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..551c48ad13fdc0bb1b3ea18b990887af2b60af88 --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e45b3ce64a506d36e2dd2dadf94eed02841f81077e366c7a9195e968097c3e66 +size 14645 diff --git a/exp2_signal_c_100_2epochs/checkpoint-200/scheduler.pt b/exp2_signal_c_100_2epochs/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d74a7a4972be5eead9296121370d8d11f49abce5 --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fa296abc5d84df05f0c53a8649e3e3fcb24d63cdc9ca21aa4cdd6ecf1b15277 +size 1465 diff --git a/exp2_signal_c_100_2epochs/checkpoint-200/trainer_state.json b/exp2_signal_c_100_2epochs/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2b1d1d191a72148af1c205948d906ba32ef6c5f6 --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-200/trainer_state.json @@ -0,0 +1,1450 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 100, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010050251256281407, + "grad_norm": 2.0252811908721924, + "learning_rate": 0.0, + "loss": 0.6131, + "step": 1 + }, + { + "epoch": 0.020100502512562814, + "grad_norm": 2.218348741531372, + "learning_rate": 2.5e-06, + "loss": 0.8686, + "step": 2 + }, + { + "epoch": 0.03015075376884422, + "grad_norm": 2.437188148498535, + "learning_rate": 5e-06, + "loss": 0.5164, + "step": 3 + }, + { + "epoch": 0.04020100502512563, + "grad_norm": 2.6469321250915527, + "learning_rate": 7.5e-06, + "loss": 1.002, + "step": 4 + }, + { + "epoch": 0.05025125628140704, + "grad_norm": 1.3588825464248657, + "learning_rate": 1e-05, + "loss": 0.6844, + "step": 5 + }, + { + "epoch": 0.06030150753768844, + "grad_norm": 3.1810593605041504, + "learning_rate": 1.25e-05, + "loss": 0.9342, + "step": 6 + }, + { + "epoch": 0.07035175879396985, + "grad_norm": 1.2306936979293823, + "learning_rate": 1.5e-05, + "loss": 0.5049, + "step": 7 + }, + { + "epoch": 0.08040201005025126, + "grad_norm": 1.9764370918273926, + "learning_rate": 1.75e-05, + "loss": 0.5337, + "step": 8 + }, + { + "epoch": 0.09045226130653267, + "grad_norm": 2.451845645904541, + "learning_rate": 2e-05, + "loss": 0.8612, + "step": 9 + }, + { + "epoch": 0.10050251256281408, + "grad_norm": 1.2207489013671875, + "learning_rate": 2.25e-05, + "loss": 0.6021, + "step": 10 + }, + { + "epoch": 0.11055276381909548, + "grad_norm": 1.5423636436462402, + "learning_rate": 2.5e-05, + "loss": 0.3558, + "step": 11 + }, + { + "epoch": 0.12060301507537688, + "grad_norm": 0.6869585514068604, + "learning_rate": 2.7500000000000004e-05, + "loss": 0.3061, + "step": 12 + }, + { + "epoch": 0.1306532663316583, + "grad_norm": 1.8259159326553345, + "learning_rate": 3e-05, + "loss": 0.2571, + "step": 13 + }, + { + "epoch": 0.1407035175879397, + "grad_norm": 2.017957925796509, + "learning_rate": 3.2500000000000004e-05, + "loss": 0.4754, + "step": 14 + }, + { + "epoch": 0.1507537688442211, + "grad_norm": 1.566588282585144, + "learning_rate": 3.5e-05, + "loss": 0.8004, + "step": 15 + }, + { + "epoch": 0.16080402010050251, + "grad_norm": 1.6226823329925537, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.8382, + "step": 16 + }, + { + "epoch": 0.1708542713567839, + "grad_norm": 1.9973260164260864, + "learning_rate": 4e-05, + "loss": 0.7851, + "step": 17 + }, + { + "epoch": 0.18090452261306533, + "grad_norm": 1.30808687210083, + "learning_rate": 4.25e-05, + "loss": 0.5387, + "step": 18 + }, + { + "epoch": 0.19095477386934673, + "grad_norm": 2.495645761489868, + "learning_rate": 4.5e-05, + "loss": 0.5396, + "step": 19 + }, + { + "epoch": 0.20100502512562815, + "grad_norm": 2.5632853507995605, + "learning_rate": 4.75e-05, + "loss": 0.9602, + "step": 20 + }, + { + "epoch": 0.21105527638190955, + "grad_norm": 3.062891960144043, + "learning_rate": 5e-05, + "loss": 0.8354, + "step": 21 + }, + { + "epoch": 0.22110552763819097, + "grad_norm": 1.4690148830413818, + "learning_rate": 4.9996192378909786e-05, + "loss": 0.426, + "step": 22 + }, + { + "epoch": 0.23115577889447236, + "grad_norm": 3.291106939315796, + "learning_rate": 4.99847706754774e-05, + "loss": 1.0268, + "step": 23 + }, + { + "epoch": 0.24120603015075376, + "grad_norm": 1.5133529901504517, + "learning_rate": 4.996573836886435e-05, + "loss": 0.8601, + "step": 24 + }, + { + "epoch": 0.25125628140703515, + "grad_norm": 4.66534948348999, + "learning_rate": 4.993910125649561e-05, + "loss": 1.3701, + "step": 25 + }, + { + "epoch": 0.2613065326633166, + "grad_norm": 1.0427296161651611, + "learning_rate": 4.990486745229364e-05, + "loss": 0.3869, + "step": 26 + }, + { + "epoch": 0.271356783919598, + "grad_norm": 2.4530417919158936, + "learning_rate": 4.9863047384206835e-05, + "loss": 0.582, + "step": 27 + }, + { + "epoch": 0.2814070351758794, + "grad_norm": 2.5782620906829834, + "learning_rate": 4.9813653791033057e-05, + "loss": 0.8016, + "step": 28 + }, + { + "epoch": 0.2914572864321608, + "grad_norm": 1.9464126825332642, + "learning_rate": 4.975670171853926e-05, + "loss": 0.5312, + "step": 29 + }, + { + "epoch": 0.3015075376884422, + "grad_norm": 0.8109995722770691, + "learning_rate": 4.9692208514878444e-05, + "loss": 0.3388, + "step": 30 + }, + { + "epoch": 0.31155778894472363, + "grad_norm": 2.375706672668457, + "learning_rate": 4.962019382530521e-05, + "loss": 0.7188, + "step": 31 + }, + { + "epoch": 0.32160804020100503, + "grad_norm": 2.7205145359039307, + "learning_rate": 4.9540679586191605e-05, + "loss": 0.4845, + "step": 32 + }, + { + "epoch": 0.3316582914572864, + "grad_norm": 3.0961830615997314, + "learning_rate": 4.9453690018345144e-05, + "loss": 1.197, + "step": 33 + }, + { + "epoch": 0.3417085427135678, + "grad_norm": 2.482598304748535, + "learning_rate": 4.9359251619630886e-05, + "loss": 0.5483, + "step": 34 + }, + { + "epoch": 0.35175879396984927, + "grad_norm": 2.1126797199249268, + "learning_rate": 4.925739315689991e-05, + "loss": 0.6905, + "step": 35 + }, + { + "epoch": 0.36180904522613067, + "grad_norm": 1.3531568050384521, + "learning_rate": 4.914814565722671e-05, + "loss": 0.744, + "step": 36 + }, + { + "epoch": 0.37185929648241206, + "grad_norm": 1.144991159439087, + "learning_rate": 4.9031542398457974e-05, + "loss": 0.3955, + "step": 37 + }, + { + "epoch": 0.38190954773869346, + "grad_norm": 2.3990726470947266, + "learning_rate": 4.890761889907589e-05, + "loss": 0.4951, + "step": 38 + }, + { + "epoch": 0.39195979899497485, + "grad_norm": 1.3550069332122803, + "learning_rate": 4.877641290737884e-05, + "loss": 0.7786, + "step": 39 + }, + { + "epoch": 0.4020100502512563, + "grad_norm": 1.7543747425079346, + "learning_rate": 4.8637964389982926e-05, + "loss": 0.6777, + "step": 40 + }, + { + "epoch": 0.4120603015075377, + "grad_norm": 1.1261959075927734, + "learning_rate": 4.849231551964771e-05, + "loss": 0.3273, + "step": 41 + }, + { + "epoch": 0.4221105527638191, + "grad_norm": 1.593313455581665, + "learning_rate": 4.8339510662430046e-05, + "loss": 0.7096, + "step": 42 + }, + { + "epoch": 0.4321608040201005, + "grad_norm": 1.301775336265564, + "learning_rate": 4.817959636416969e-05, + "loss": 0.5284, + "step": 43 + }, + { + "epoch": 0.44221105527638194, + "grad_norm": 1.570963740348816, + "learning_rate": 4.8012621336311016e-05, + "loss": 0.5525, + "step": 44 + }, + { + "epoch": 0.45226130653266333, + "grad_norm": 2.2816483974456787, + "learning_rate": 4.783863644106502e-05, + "loss": 0.6153, + "step": 45 + }, + { + "epoch": 0.4623115577889447, + "grad_norm": 1.1265478134155273, + "learning_rate": 4.765769467591625e-05, + "loss": 0.5263, + "step": 46 + }, + { + "epoch": 0.4723618090452261, + "grad_norm": 1.1592512130737305, + "learning_rate": 4.7469851157479177e-05, + "loss": 0.4652, + "step": 47 + }, + { + "epoch": 0.4824120603015075, + "grad_norm": 1.6541743278503418, + "learning_rate": 4.72751631047092e-05, + "loss": 0.4924, + "step": 48 + }, + { + "epoch": 0.49246231155778897, + "grad_norm": 0.7833207249641418, + "learning_rate": 4.707368982147318e-05, + "loss": 0.3839, + "step": 49 + }, + { + "epoch": 0.5025125628140703, + "grad_norm": 1.0830917358398438, + "learning_rate": 4.6865492678484895e-05, + "loss": 0.4785, + "step": 50 + }, + { + "epoch": 0.5125628140703518, + "grad_norm": 0.9392517805099487, + "learning_rate": 4.665063509461097e-05, + "loss": 0.4276, + "step": 51 + }, + { + "epoch": 0.5226130653266332, + "grad_norm": 1.1901228427886963, + "learning_rate": 4.642918251755281e-05, + "loss": 0.5079, + "step": 52 + }, + { + "epoch": 0.5326633165829145, + "grad_norm": 3.188199520111084, + "learning_rate": 4.620120240391065e-05, + "loss": 0.9955, + "step": 53 + }, + { + "epoch": 0.542713567839196, + "grad_norm": 1.2436141967773438, + "learning_rate": 4.5966764198635606e-05, + "loss": 0.3548, + "step": 54 + }, + { + "epoch": 0.5527638190954773, + "grad_norm": 1.235634684562683, + "learning_rate": 4.572593931387604e-05, + "loss": 0.2418, + "step": 55 + }, + { + "epoch": 0.5628140703517588, + "grad_norm": 0.8484794497489929, + "learning_rate": 4.54788011072248e-05, + "loss": 0.2605, + "step": 56 + }, + { + "epoch": 0.5728643216080402, + "grad_norm": 1.291303038597107, + "learning_rate": 4.522542485937369e-05, + "loss": 0.1845, + "step": 57 + }, + { + "epoch": 0.5829145728643216, + "grad_norm": 2.8543176651000977, + "learning_rate": 4.496588775118232e-05, + "loss": 0.661, + "step": 58 + }, + { + "epoch": 0.592964824120603, + "grad_norm": 2.5363144874572754, + "learning_rate": 4.4700268840168045e-05, + "loss": 0.612, + "step": 59 + }, + { + "epoch": 0.6030150753768844, + "grad_norm": 0.27271756529808044, + "learning_rate": 4.442864903642428e-05, + "loss": 0.0615, + "step": 60 + }, + { + "epoch": 0.6130653266331658, + "grad_norm": 1.4997279644012451, + "learning_rate": 4.415111107797445e-05, + "loss": 0.464, + "step": 61 + }, + { + "epoch": 0.6231155778894473, + "grad_norm": 1.4228745698928833, + "learning_rate": 4.386773950556931e-05, + "loss": 0.404, + "step": 62 + }, + { + "epoch": 0.6331658291457286, + "grad_norm": 1.4517656564712524, + "learning_rate": 4.357862063693486e-05, + "loss": 0.339, + "step": 63 + }, + { + "epoch": 0.6432160804020101, + "grad_norm": 2.0428409576416016, + "learning_rate": 4.3283842540479264e-05, + "loss": 0.3835, + "step": 64 + }, + { + "epoch": 0.6532663316582915, + "grad_norm": 1.0938389301300049, + "learning_rate": 4.2983495008466276e-05, + "loss": 0.3167, + "step": 65 + }, + { + "epoch": 0.6633165829145728, + "grad_norm": 1.7590700387954712, + "learning_rate": 4.267766952966369e-05, + "loss": 0.3107, + "step": 66 + }, + { + "epoch": 0.6733668341708543, + "grad_norm": 3.68430757522583, + "learning_rate": 4.2366459261474933e-05, + "loss": 1.0588, + "step": 67 + }, + { + "epoch": 0.6834170854271356, + "grad_norm": 3.4364969730377197, + "learning_rate": 4.2049959001562464e-05, + "loss": 0.6519, + "step": 68 + }, + { + "epoch": 0.6934673366834171, + "grad_norm": 2.672166347503662, + "learning_rate": 4.172826515897146e-05, + "loss": 0.4864, + "step": 69 + }, + { + "epoch": 0.7035175879396985, + "grad_norm": 2.1341798305511475, + "learning_rate": 4.140147572476268e-05, + "loss": 0.5572, + "step": 70 + }, + { + "epoch": 0.7135678391959799, + "grad_norm": 2.12341046333313, + "learning_rate": 4.1069690242163484e-05, + "loss": 0.3251, + "step": 71 + }, + { + "epoch": 0.7236180904522613, + "grad_norm": 2.14962100982666, + "learning_rate": 4.073300977624594e-05, + "loss": 0.4926, + "step": 72 + }, + { + "epoch": 0.7336683417085427, + "grad_norm": 2.619264602661133, + "learning_rate": 4.039153688314145e-05, + "loss": 0.6058, + "step": 73 + }, + { + "epoch": 0.7437185929648241, + "grad_norm": 1.5685818195343018, + "learning_rate": 4.0045375578801214e-05, + "loss": 0.3121, + "step": 74 + }, + { + "epoch": 0.7537688442211056, + "grad_norm": 0.9353585243225098, + "learning_rate": 3.969463130731183e-05, + "loss": 0.1452, + "step": 75 + }, + { + "epoch": 0.7638190954773869, + "grad_norm": 3.1898391246795654, + "learning_rate": 3.933941090877615e-05, + "loss": 0.4206, + "step": 76 + }, + { + "epoch": 0.7738693467336684, + "grad_norm": 1.3562535047531128, + "learning_rate": 3.897982258676867e-05, + "loss": 0.2435, + "step": 77 + }, + { + "epoch": 0.7839195979899497, + "grad_norm": 1.2627103328704834, + "learning_rate": 3.861597587537568e-05, + "loss": 0.2672, + "step": 78 + }, + { + "epoch": 0.7939698492462312, + "grad_norm": 5.832367420196533, + "learning_rate": 3.824798160583012e-05, + "loss": 0.7605, + "step": 79 + }, + { + "epoch": 0.8040201005025126, + "grad_norm": 2.4924912452697754, + "learning_rate": 3.787595187275136e-05, + "loss": 0.2654, + "step": 80 + }, + { + "epoch": 0.8140703517587939, + "grad_norm": 0.6880796551704407, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.0827, + "step": 81 + }, + { + "epoch": 0.8241206030150754, + "grad_norm": 3.6591432094573975, + "learning_rate": 3.712024050615843e-05, + "loss": 0.3984, + "step": 82 + }, + { + "epoch": 0.8341708542713567, + "grad_norm": 3.8862345218658447, + "learning_rate": 3.673678906964727e-05, + "loss": 0.5403, + "step": 83 + }, + { + "epoch": 0.8442211055276382, + "grad_norm": 0.4640906751155853, + "learning_rate": 3.634976249348867e-05, + "loss": 0.0481, + "step": 84 + }, + { + "epoch": 0.8542713567839196, + "grad_norm": 3.089759111404419, + "learning_rate": 3.5959278669726935e-05, + "loss": 0.3619, + "step": 85 + }, + { + "epoch": 0.864321608040201, + "grad_norm": 3.1657228469848633, + "learning_rate": 3.556545654351749e-05, + "loss": 0.1882, + "step": 86 + }, + { + "epoch": 0.8743718592964824, + "grad_norm": 1.3390817642211914, + "learning_rate": 3.516841607689501e-05, + "loss": 0.103, + "step": 87 + }, + { + "epoch": 0.8844221105527639, + "grad_norm": 2.739612340927124, + "learning_rate": 3.476827821223184e-05, + "loss": 0.2518, + "step": 88 + }, + { + "epoch": 0.8944723618090452, + "grad_norm": 0.2353546917438507, + "learning_rate": 3.436516483539781e-05, + "loss": 0.0317, + "step": 89 + }, + { + "epoch": 0.9045226130653267, + "grad_norm": 1.8644217252731323, + "learning_rate": 3.39591987386325e-05, + "loss": 0.2752, + "step": 90 + }, + { + "epoch": 0.914572864321608, + "grad_norm": 1.3844764232635498, + "learning_rate": 3.355050358314172e-05, + "loss": 0.0889, + "step": 91 + }, + { + "epoch": 0.9246231155778895, + "grad_norm": 4.213919162750244, + "learning_rate": 3.313920386142892e-05, + "loss": 0.493, + "step": 92 + }, + { + "epoch": 0.9346733668341709, + "grad_norm": 0.37895992398262024, + "learning_rate": 3.272542485937369e-05, + "loss": 0.0362, + "step": 93 + }, + { + "epoch": 0.9447236180904522, + "grad_norm": 6.269938945770264, + "learning_rate": 3.230929261806842e-05, + "loss": 0.5163, + "step": 94 + }, + { + "epoch": 0.9547738693467337, + "grad_norm": 0.268771231174469, + "learning_rate": 3.1890933895424976e-05, + "loss": 0.0264, + "step": 95 + }, + { + "epoch": 0.964824120603015, + "grad_norm": 3.6721014976501465, + "learning_rate": 3.147047612756302e-05, + "loss": 0.7893, + "step": 96 + }, + { + "epoch": 0.9748743718592965, + "grad_norm": 3.1898202896118164, + "learning_rate": 3.104804738999169e-05, + "loss": 0.1096, + "step": 97 + }, + { + "epoch": 0.9849246231155779, + "grad_norm": 4.021219253540039, + "learning_rate": 3.062377635859663e-05, + "loss": 1.1929, + "step": 98 + }, + { + "epoch": 0.9949748743718593, + "grad_norm": 0.9718989729881287, + "learning_rate": 3.0197792270443982e-05, + "loss": 0.1046, + "step": 99 + }, + { + "epoch": 1.0, + "grad_norm": 1.1656709909439087, + "learning_rate": 2.9770224884413623e-05, + "loss": 0.0735, + "step": 100 + }, + { + "epoch": 1.0, + "eval_loss": 0.5929667353630066, + "eval_runtime": 85.8837, + "eval_samples_per_second": 3.493, + "eval_steps_per_second": 1.747, + "step": 100 + }, + { + "epoch": 1.0100502512562815, + "grad_norm": 1.7781696319580078, + "learning_rate": 2.9341204441673266e-05, + "loss": 0.1134, + "step": 101 + }, + { + "epoch": 1.020100502512563, + "grad_norm": 4.624056339263916, + "learning_rate": 2.8910861626005776e-05, + "loss": 0.6274, + "step": 102 + }, + { + "epoch": 1.0301507537688441, + "grad_norm": 5.745519638061523, + "learning_rate": 2.8479327524001636e-05, + "loss": 0.587, + "step": 103 + }, + { + "epoch": 1.0402010050251256, + "grad_norm": 8.235309600830078, + "learning_rate": 2.8046733585128687e-05, + "loss": 0.9932, + "step": 104 + }, + { + "epoch": 1.050251256281407, + "grad_norm": 1.4587864875793457, + "learning_rate": 2.761321158169134e-05, + "loss": 0.1046, + "step": 105 + }, + { + "epoch": 1.0603015075376885, + "grad_norm": 2.9020118713378906, + "learning_rate": 2.717889356869146e-05, + "loss": 0.7703, + "step": 106 + }, + { + "epoch": 1.07035175879397, + "grad_norm": 1.5700269937515259, + "learning_rate": 2.674391184360313e-05, + "loss": 0.1772, + "step": 107 + }, + { + "epoch": 1.0804020100502512, + "grad_norm": 0.8232919573783875, + "learning_rate": 2.63083989060736e-05, + "loss": 0.1098, + "step": 108 + }, + { + "epoch": 1.0904522613065326, + "grad_norm": 2.7101190090179443, + "learning_rate": 2.587248741756253e-05, + "loss": 0.2107, + "step": 109 + }, + { + "epoch": 1.100502512562814, + "grad_norm": 2.1079862117767334, + "learning_rate": 2.5436310160932092e-05, + "loss": 0.2951, + "step": 110 + }, + { + "epoch": 1.1105527638190955, + "grad_norm": 2.2472472190856934, + "learning_rate": 2.5e-05, + "loss": 0.162, + "step": 111 + }, + { + "epoch": 1.120603015075377, + "grad_norm": 3.6868035793304443, + "learning_rate": 2.4563689839067913e-05, + "loss": 0.3529, + "step": 112 + }, + { + "epoch": 1.1306532663316582, + "grad_norm": 0.8493557572364807, + "learning_rate": 2.4127512582437485e-05, + "loss": 0.0754, + "step": 113 + }, + { + "epoch": 1.1407035175879396, + "grad_norm": 1.6832430362701416, + "learning_rate": 2.3691601093926404e-05, + "loss": 0.1457, + "step": 114 + }, + { + "epoch": 1.150753768844221, + "grad_norm": 1.736344575881958, + "learning_rate": 2.3256088156396868e-05, + "loss": 0.15, + "step": 115 + }, + { + "epoch": 1.1608040201005025, + "grad_norm": 2.8943748474121094, + "learning_rate": 2.2821106431308544e-05, + "loss": 0.2136, + "step": 116 + }, + { + "epoch": 1.170854271356784, + "grad_norm": 0.8236376643180847, + "learning_rate": 2.238678841830867e-05, + "loss": 0.0577, + "step": 117 + }, + { + "epoch": 1.1809045226130652, + "grad_norm": 0.7046677470207214, + "learning_rate": 2.195326641487132e-05, + "loss": 0.0854, + "step": 118 + }, + { + "epoch": 1.1909547738693467, + "grad_norm": 1.220378041267395, + "learning_rate": 2.1520672475998373e-05, + "loss": 0.1106, + "step": 119 + }, + { + "epoch": 1.2010050251256281, + "grad_norm": 1.4063576459884644, + "learning_rate": 2.1089138373994223e-05, + "loss": 0.1565, + "step": 120 + }, + { + "epoch": 1.2110552763819096, + "grad_norm": 0.9700479507446289, + "learning_rate": 2.0658795558326743e-05, + "loss": 0.1085, + "step": 121 + }, + { + "epoch": 1.221105527638191, + "grad_norm": 1.868759274482727, + "learning_rate": 2.022977511558638e-05, + "loss": 0.1558, + "step": 122 + }, + { + "epoch": 1.2311557788944723, + "grad_norm": 0.3064749240875244, + "learning_rate": 1.980220772955602e-05, + "loss": 0.0387, + "step": 123 + }, + { + "epoch": 1.2412060301507537, + "grad_norm": 0.7892552018165588, + "learning_rate": 1.937622364140338e-05, + "loss": 0.0635, + "step": 124 + }, + { + "epoch": 1.2512562814070352, + "grad_norm": 9.358102798461914, + "learning_rate": 1.895195261000831e-05, + "loss": 1.1969, + "step": 125 + }, + { + "epoch": 1.2613065326633166, + "grad_norm": 2.4470090866088867, + "learning_rate": 1.852952387243698e-05, + "loss": 0.7585, + "step": 126 + }, + { + "epoch": 1.271356783919598, + "grad_norm": 2.6475372314453125, + "learning_rate": 1.8109066104575023e-05, + "loss": 0.1562, + "step": 127 + }, + { + "epoch": 1.2814070351758793, + "grad_norm": 1.2624969482421875, + "learning_rate": 1.7690707381931583e-05, + "loss": 0.1283, + "step": 128 + }, + { + "epoch": 1.2914572864321607, + "grad_norm": 0.3836812973022461, + "learning_rate": 1.7274575140626318e-05, + "loss": 0.0346, + "step": 129 + }, + { + "epoch": 1.3015075376884422, + "grad_norm": 1.1079610586166382, + "learning_rate": 1.686079613857109e-05, + "loss": 0.0899, + "step": 130 + }, + { + "epoch": 1.3115577889447236, + "grad_norm": 0.6915103793144226, + "learning_rate": 1.6449496416858284e-05, + "loss": 0.0848, + "step": 131 + }, + { + "epoch": 1.321608040201005, + "grad_norm": 4.291629314422607, + "learning_rate": 1.6040801261367493e-05, + "loss": 0.3636, + "step": 132 + }, + { + "epoch": 1.3316582914572863, + "grad_norm": 1.677146077156067, + "learning_rate": 1.56348351646022e-05, + "loss": 0.139, + "step": 133 + }, + { + "epoch": 1.3417085427135678, + "grad_norm": 1.378409743309021, + "learning_rate": 1.523172178776816e-05, + "loss": 0.0879, + "step": 134 + }, + { + "epoch": 1.3517587939698492, + "grad_norm": 0.7962786555290222, + "learning_rate": 1.4831583923104999e-05, + "loss": 0.0601, + "step": 135 + }, + { + "epoch": 1.3618090452261307, + "grad_norm": 0.736642062664032, + "learning_rate": 1.443454345648252e-05, + "loss": 0.0802, + "step": 136 + }, + { + "epoch": 1.3718592964824121, + "grad_norm": 0.8610866069793701, + "learning_rate": 1.4040721330273062e-05, + "loss": 0.0617, + "step": 137 + }, + { + "epoch": 1.3819095477386933, + "grad_norm": 0.5751700401306152, + "learning_rate": 1.3650237506511331e-05, + "loss": 0.0675, + "step": 138 + }, + { + "epoch": 1.3919597989949748, + "grad_norm": 0.43744558095932007, + "learning_rate": 1.3263210930352737e-05, + "loss": 0.04, + "step": 139 + }, + { + "epoch": 1.4020100502512562, + "grad_norm": 1.1981139183044434, + "learning_rate": 1.2879759493841575e-05, + "loss": 0.1027, + "step": 140 + }, + { + "epoch": 1.4120603015075377, + "grad_norm": 0.10435374826192856, + "learning_rate": 1.2500000000000006e-05, + "loss": 0.0113, + "step": 141 + }, + { + "epoch": 1.4221105527638191, + "grad_norm": 8.204585075378418, + "learning_rate": 1.2124048127248644e-05, + "loss": 0.7158, + "step": 142 + }, + { + "epoch": 1.4321608040201004, + "grad_norm": 0.5565558075904846, + "learning_rate": 1.175201839416988e-05, + "loss": 0.056, + "step": 143 + }, + { + "epoch": 1.442211055276382, + "grad_norm": 0.28212472796440125, + "learning_rate": 1.1384024124624324e-05, + "loss": 0.0319, + "step": 144 + }, + { + "epoch": 1.4522613065326633, + "grad_norm": 0.6541823744773865, + "learning_rate": 1.1020177413231334e-05, + "loss": 0.0548, + "step": 145 + }, + { + "epoch": 1.4623115577889447, + "grad_norm": 2.968003749847412, + "learning_rate": 1.0660589091223855e-05, + "loss": 0.1148, + "step": 146 + }, + { + "epoch": 1.4723618090452262, + "grad_norm": 0.38432276248931885, + "learning_rate": 1.0305368692688174e-05, + "loss": 0.0298, + "step": 147 + }, + { + "epoch": 1.4824120603015074, + "grad_norm": 0.3911549746990204, + "learning_rate": 9.954624421198792e-06, + "loss": 0.0352, + "step": 148 + }, + { + "epoch": 1.492462311557789, + "grad_norm": 3.590937376022339, + "learning_rate": 9.608463116858542e-06, + "loss": 0.1894, + "step": 149 + }, + { + "epoch": 1.5025125628140703, + "grad_norm": 0.34735795855522156, + "learning_rate": 9.266990223754069e-06, + "loss": 0.0264, + "step": 150 + }, + { + "epoch": 1.5125628140703518, + "grad_norm": 0.7267605662345886, + "learning_rate": 8.930309757836517e-06, + "loss": 0.0541, + "step": 151 + }, + { + "epoch": 1.5226130653266332, + "grad_norm": 6.696427345275879, + "learning_rate": 8.598524275237322e-06, + "loss": 0.4413, + "step": 152 + }, + { + "epoch": 1.5326633165829144, + "grad_norm": 0.30404531955718994, + "learning_rate": 8.271734841028553e-06, + "loss": 0.0302, + "step": 153 + }, + { + "epoch": 1.542713567839196, + "grad_norm": 0.29946571588516235, + "learning_rate": 7.950040998437542e-06, + "loss": 0.0227, + "step": 154 + }, + { + "epoch": 1.5527638190954773, + "grad_norm": 5.713311672210693, + "learning_rate": 7.633540738525066e-06, + "loss": 0.4868, + "step": 155 + }, + { + "epoch": 1.5628140703517588, + "grad_norm": 0.07008615881204605, + "learning_rate": 7.3223304703363135e-06, + "loss": 0.0061, + "step": 156 + }, + { + "epoch": 1.5728643216080402, + "grad_norm": 0.25018176436424255, + "learning_rate": 7.016504991533726e-06, + "loss": 0.0214, + "step": 157 + }, + { + "epoch": 1.5829145728643215, + "grad_norm": 2.6974761486053467, + "learning_rate": 6.716157459520739e-06, + "loss": 0.1024, + "step": 158 + }, + { + "epoch": 1.5929648241206031, + "grad_norm": 0.6652225255966187, + "learning_rate": 6.421379363065142e-06, + "loss": 0.051, + "step": 159 + }, + { + "epoch": 1.6030150753768844, + "grad_norm": 2.616692543029785, + "learning_rate": 6.1322604944307e-06, + "loss": 0.2014, + "step": 160 + }, + { + "epoch": 1.6130653266331658, + "grad_norm": 0.19218847155570984, + "learning_rate": 5.848888922025553e-06, + "loss": 0.0197, + "step": 161 + }, + { + "epoch": 1.6231155778894473, + "grad_norm": 1.6090087890625, + "learning_rate": 5.571350963575728e-06, + "loss": 0.1016, + "step": 162 + }, + { + "epoch": 1.6331658291457285, + "grad_norm": 0.11638417094945908, + "learning_rate": 5.299731159831953e-06, + "loss": 0.0116, + "step": 163 + }, + { + "epoch": 1.6432160804020102, + "grad_norm": 0.24140457808971405, + "learning_rate": 5.034112248817685e-06, + "loss": 0.025, + "step": 164 + }, + { + "epoch": 1.6532663316582914, + "grad_norm": 0.4124128222465515, + "learning_rate": 4.7745751406263165e-06, + "loss": 0.0241, + "step": 165 + }, + { + "epoch": 1.6633165829145728, + "grad_norm": 0.3990463316440582, + "learning_rate": 4.521198892775203e-06, + "loss": 0.0317, + "step": 166 + }, + { + "epoch": 1.6733668341708543, + "grad_norm": 0.5439159870147705, + "learning_rate": 4.274060686123959e-06, + "loss": 0.0467, + "step": 167 + }, + { + "epoch": 1.6834170854271355, + "grad_norm": 0.2993362247943878, + "learning_rate": 4.0332358013644016e-06, + "loss": 0.0239, + "step": 168 + }, + { + "epoch": 1.6934673366834172, + "grad_norm": 1.2274130582809448, + "learning_rate": 3.798797596089351e-06, + "loss": 0.0777, + "step": 169 + }, + { + "epoch": 1.7035175879396984, + "grad_norm": 0.3105550706386566, + "learning_rate": 3.5708174824471947e-06, + "loss": 0.0286, + "step": 170 + }, + { + "epoch": 1.7135678391959799, + "grad_norm": 4.238363265991211, + "learning_rate": 3.3493649053890326e-06, + "loss": 0.1195, + "step": 171 + }, + { + "epoch": 1.7236180904522613, + "grad_norm": 1.5373774766921997, + "learning_rate": 3.1345073215151066e-06, + "loss": 0.1183, + "step": 172 + }, + { + "epoch": 1.7336683417085426, + "grad_norm": 0.3502655029296875, + "learning_rate": 2.9263101785268254e-06, + "loss": 0.0335, + "step": 173 + }, + { + "epoch": 1.7437185929648242, + "grad_norm": 9.981806755065918, + "learning_rate": 2.7248368952908053e-06, + "loss": 0.3681, + "step": 174 + }, + { + "epoch": 1.7537688442211055, + "grad_norm": 1.818504810333252, + "learning_rate": 2.5301488425208296e-06, + "loss": 0.0531, + "step": 175 + }, + { + "epoch": 1.763819095477387, + "grad_norm": 0.16088348627090454, + "learning_rate": 2.3423053240837515e-06, + "loss": 0.0176, + "step": 176 + }, + { + "epoch": 1.7738693467336684, + "grad_norm": 4.578584671020508, + "learning_rate": 2.1613635589349756e-06, + "loss": 0.1371, + "step": 177 + }, + { + "epoch": 1.7839195979899496, + "grad_norm": 0.5099331140518188, + "learning_rate": 1.9873786636889906e-06, + "loss": 0.0423, + "step": 178 + }, + { + "epoch": 1.7939698492462313, + "grad_norm": 1.0052868127822876, + "learning_rate": 1.8204036358303173e-06, + "loss": 0.044, + "step": 179 + }, + { + "epoch": 1.8040201005025125, + "grad_norm": 0.23482565581798553, + "learning_rate": 1.6604893375699594e-06, + "loss": 0.0221, + "step": 180 + }, + { + "epoch": 1.814070351758794, + "grad_norm": 2.3073418140411377, + "learning_rate": 1.5076844803522922e-06, + "loss": 0.0727, + "step": 181 + }, + { + "epoch": 1.8241206030150754, + "grad_norm": 4.322810173034668, + "learning_rate": 1.362035610017079e-06, + "loss": 0.4996, + "step": 182 + }, + { + "epoch": 1.8341708542713566, + "grad_norm": 0.6854317784309387, + "learning_rate": 1.2235870926211619e-06, + "loss": 0.0579, + "step": 183 + }, + { + "epoch": 1.8442211055276383, + "grad_norm": 4.30879020690918, + "learning_rate": 1.0923811009241142e-06, + "loss": 0.995, + "step": 184 + }, + { + "epoch": 1.8542713567839195, + "grad_norm": 6.37328577041626, + "learning_rate": 9.684576015420278e-07, + "loss": 0.7451, + "step": 185 + }, + { + "epoch": 1.864321608040201, + "grad_norm": 4.90187406539917, + "learning_rate": 8.51854342773295e-07, + "loss": 0.1151, + "step": 186 + }, + { + "epoch": 1.8743718592964824, + "grad_norm": 4.3015265464782715, + "learning_rate": 7.426068431000882e-07, + "loss": 0.995, + "step": 187 + }, + { + "epoch": 1.8844221105527639, + "grad_norm": 0.4159507751464844, + "learning_rate": 6.407483803691216e-07, + "loss": 0.0322, + "step": 188 + }, + { + "epoch": 1.8944723618090453, + "grad_norm": 0.2066110372543335, + "learning_rate": 5.463099816548579e-07, + "loss": 0.0218, + "step": 189 + }, + { + "epoch": 1.9045226130653266, + "grad_norm": 3.4038851261138916, + "learning_rate": 4.5932041380840065e-07, + "loss": 0.9543, + "step": 190 + }, + { + "epoch": 1.914572864321608, + "grad_norm": 3.0160419940948486, + "learning_rate": 3.7980617469479953e-07, + "loss": 0.1308, + "step": 191 + }, + { + "epoch": 1.9246231155778895, + "grad_norm": 0.1535632610321045, + "learning_rate": 3.077914851215585e-07, + "loss": 0.015, + "step": 192 + }, + { + "epoch": 1.934673366834171, + "grad_norm": 7.666110992431641, + "learning_rate": 2.4329828146074095e-07, + "loss": 0.6303, + "step": 193 + }, + { + "epoch": 1.9447236180904524, + "grad_norm": 7.849299430847168, + "learning_rate": 1.8634620896695043e-07, + "loss": 1.8334, + "step": 194 + }, + { + "epoch": 1.9547738693467336, + "grad_norm": 0.4455506205558777, + "learning_rate": 1.3695261579316777e-07, + "loss": 0.0441, + "step": 195 + }, + { + "epoch": 1.964824120603015, + "grad_norm": 1.6709024906158447, + "learning_rate": 9.513254770636137e-08, + "loss": 0.0861, + "step": 196 + }, + { + "epoch": 1.9748743718592965, + "grad_norm": 4.513991355895996, + "learning_rate": 6.089874350439506e-08, + "loss": 0.5219, + "step": 197 + }, + { + "epoch": 1.984924623115578, + "grad_norm": 3.4638781547546387, + "learning_rate": 3.426163113565417e-08, + "loss": 0.8941, + "step": 198 + }, + { + "epoch": 1.9949748743718594, + "grad_norm": 3.730963706970215, + "learning_rate": 1.522932452260595e-08, + "loss": 0.7037, + "step": 199 + }, + { + "epoch": 2.0, + "grad_norm": 0.15846788883209229, + "learning_rate": 3.807621090218261e-09, + "loss": 0.0138, + "step": 200 + }, + { + "epoch": 2.0, + "eval_loss": 0.3733852505683899, + "eval_runtime": 85.8365, + "eval_samples_per_second": 3.495, + "eval_steps_per_second": 1.748, + "step": 200 + } + ], + "logging_steps": 1, + "max_steps": 200, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 66, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.3734630500440474e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/exp2_signal_c_100_2epochs/checkpoint-200/training_args.bin b/exp2_signal_c_100_2epochs/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..48c217f8385eb386ca7f43bcee0009bbce8813af --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16a94d0c1563f7c95ac7a7d2caaec2e2298f1c2ba68f8f51cc8f61275ba42f0a +size 6033 diff --git a/exp2_signal_c_100_2epochs/checkpoint-66/README.md b/exp2_signal_c_100_2epochs/checkpoint-66/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-66/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp2_signal_c_100_2epochs/checkpoint-66/adapter_config.json b/exp2_signal_c_100_2epochs/checkpoint-66/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..507754473268a46eff1b71a02eb5bf9a906d5a14 --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-66/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "o_proj", + "v_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp2_signal_c_100_2epochs/checkpoint-66/adapter_model.safetensors b/exp2_signal_c_100_2epochs/checkpoint-66/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9b1baea4ec72a75f69900439932312cee53b777f --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-66/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cd29e2b204292fa9434be1c105d2d60d57325519deae8219e5510911e06a005 +size 201378736 diff --git a/exp2_signal_c_100_2epochs/checkpoint-66/optimizer.pt b/exp2_signal_c_100_2epochs/checkpoint-66/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa112882456c28190d953888e1a8d4be9a40bb68 --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-66/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a2e3666807e0109276342e0047d37743f1e6719a7d29f6f7d697294dce43955 +size 402982627 diff --git a/exp2_signal_c_100_2epochs/checkpoint-66/rng_state.pth b/exp2_signal_c_100_2epochs/checkpoint-66/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9acb6af318562be091fcb3203d32d6aa81ac4bf6 --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-66/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbe8399edd28ccffb95622add1833ade04c0b6c9ff41375a11d9923ffa06e322 +size 14645 diff --git a/exp2_signal_c_100_2epochs/checkpoint-66/scheduler.pt b/exp2_signal_c_100_2epochs/checkpoint-66/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..53cc87ddb95ab585fa52551c8fce44f3b80fbdad --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-66/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4eea80e8f8139eb5aa2a4aabe0775a3863142896f7042d2f10ed1a0062c4212 +size 1465 diff --git a/exp2_signal_c_100_2epochs/checkpoint-66/trainer_state.json b/exp2_signal_c_100_2epochs/checkpoint-66/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..edec042b4031ee1556c5f415425254fb93622eb5 --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-66/trainer_state.json @@ -0,0 +1,496 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6633165829145728, + "eval_steps": 100, + "global_step": 66, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010050251256281407, + "grad_norm": 2.0252811908721924, + "learning_rate": 0.0, + "loss": 0.6131, + "step": 1 + }, + { + "epoch": 0.020100502512562814, + "grad_norm": 2.218348741531372, + "learning_rate": 2.5e-06, + "loss": 0.8686, + "step": 2 + }, + { + "epoch": 0.03015075376884422, + "grad_norm": 2.437188148498535, + "learning_rate": 5e-06, + "loss": 0.5164, + "step": 3 + }, + { + "epoch": 0.04020100502512563, + "grad_norm": 2.6469321250915527, + "learning_rate": 7.5e-06, + "loss": 1.002, + "step": 4 + }, + { + "epoch": 0.05025125628140704, + "grad_norm": 1.3588825464248657, + "learning_rate": 1e-05, + "loss": 0.6844, + "step": 5 + }, + { + "epoch": 0.06030150753768844, + "grad_norm": 3.1810593605041504, + "learning_rate": 1.25e-05, + "loss": 0.9342, + "step": 6 + }, + { + "epoch": 0.07035175879396985, + "grad_norm": 1.2306936979293823, + "learning_rate": 1.5e-05, + "loss": 0.5049, + "step": 7 + }, + { + "epoch": 0.08040201005025126, + "grad_norm": 1.9764370918273926, + "learning_rate": 1.75e-05, + "loss": 0.5337, + "step": 8 + }, + { + "epoch": 0.09045226130653267, + "grad_norm": 2.451845645904541, + "learning_rate": 2e-05, + "loss": 0.8612, + "step": 9 + }, + { + "epoch": 0.10050251256281408, + "grad_norm": 1.2207489013671875, + "learning_rate": 2.25e-05, + "loss": 0.6021, + "step": 10 + }, + { + "epoch": 0.11055276381909548, + "grad_norm": 1.5423636436462402, + "learning_rate": 2.5e-05, + "loss": 0.3558, + "step": 11 + }, + { + "epoch": 0.12060301507537688, + "grad_norm": 0.6869585514068604, + "learning_rate": 2.7500000000000004e-05, + "loss": 0.3061, + "step": 12 + }, + { + "epoch": 0.1306532663316583, + "grad_norm": 1.8259159326553345, + "learning_rate": 3e-05, + "loss": 0.2571, + "step": 13 + }, + { + "epoch": 0.1407035175879397, + "grad_norm": 2.017957925796509, + "learning_rate": 3.2500000000000004e-05, + "loss": 0.4754, + "step": 14 + }, + { + "epoch": 0.1507537688442211, + "grad_norm": 1.566588282585144, + "learning_rate": 3.5e-05, + "loss": 0.8004, + "step": 15 + }, + { + "epoch": 0.16080402010050251, + "grad_norm": 1.6226823329925537, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.8382, + "step": 16 + }, + { + "epoch": 0.1708542713567839, + "grad_norm": 1.9973260164260864, + "learning_rate": 4e-05, + "loss": 0.7851, + "step": 17 + }, + { + "epoch": 0.18090452261306533, + "grad_norm": 1.30808687210083, + "learning_rate": 4.25e-05, + "loss": 0.5387, + "step": 18 + }, + { + "epoch": 0.19095477386934673, + "grad_norm": 2.495645761489868, + "learning_rate": 4.5e-05, + "loss": 0.5396, + "step": 19 + }, + { + "epoch": 0.20100502512562815, + "grad_norm": 2.5632853507995605, + "learning_rate": 4.75e-05, + "loss": 0.9602, + "step": 20 + }, + { + "epoch": 0.21105527638190955, + "grad_norm": 3.062891960144043, + "learning_rate": 5e-05, + "loss": 0.8354, + "step": 21 + }, + { + "epoch": 0.22110552763819097, + "grad_norm": 1.4690148830413818, + "learning_rate": 4.9996192378909786e-05, + "loss": 0.426, + "step": 22 + }, + { + "epoch": 0.23115577889447236, + "grad_norm": 3.291106939315796, + "learning_rate": 4.99847706754774e-05, + "loss": 1.0268, + "step": 23 + }, + { + "epoch": 0.24120603015075376, + "grad_norm": 1.5133529901504517, + "learning_rate": 4.996573836886435e-05, + "loss": 0.8601, + "step": 24 + }, + { + "epoch": 0.25125628140703515, + "grad_norm": 4.66534948348999, + "learning_rate": 4.993910125649561e-05, + "loss": 1.3701, + "step": 25 + }, + { + "epoch": 0.2613065326633166, + "grad_norm": 1.0427296161651611, + "learning_rate": 4.990486745229364e-05, + "loss": 0.3869, + "step": 26 + }, + { + "epoch": 0.271356783919598, + "grad_norm": 2.4530417919158936, + "learning_rate": 4.9863047384206835e-05, + "loss": 0.582, + "step": 27 + }, + { + "epoch": 0.2814070351758794, + "grad_norm": 2.5782620906829834, + "learning_rate": 4.9813653791033057e-05, + "loss": 0.8016, + "step": 28 + }, + { + "epoch": 0.2914572864321608, + "grad_norm": 1.9464126825332642, + "learning_rate": 4.975670171853926e-05, + "loss": 0.5312, + "step": 29 + }, + { + "epoch": 0.3015075376884422, + "grad_norm": 0.8109995722770691, + "learning_rate": 4.9692208514878444e-05, + "loss": 0.3388, + "step": 30 + }, + { + "epoch": 0.31155778894472363, + "grad_norm": 2.375706672668457, + "learning_rate": 4.962019382530521e-05, + "loss": 0.7188, + "step": 31 + }, + { + "epoch": 0.32160804020100503, + "grad_norm": 2.7205145359039307, + "learning_rate": 4.9540679586191605e-05, + "loss": 0.4845, + "step": 32 + }, + { + "epoch": 0.3316582914572864, + "grad_norm": 3.0961830615997314, + "learning_rate": 4.9453690018345144e-05, + "loss": 1.197, + "step": 33 + }, + { + "epoch": 0.3417085427135678, + "grad_norm": 2.482598304748535, + "learning_rate": 4.9359251619630886e-05, + "loss": 0.5483, + "step": 34 + }, + { + "epoch": 0.35175879396984927, + "grad_norm": 2.1126797199249268, + "learning_rate": 4.925739315689991e-05, + "loss": 0.6905, + "step": 35 + }, + { + "epoch": 0.36180904522613067, + "grad_norm": 1.3531568050384521, + "learning_rate": 4.914814565722671e-05, + "loss": 0.744, + "step": 36 + }, + { + "epoch": 0.37185929648241206, + "grad_norm": 1.144991159439087, + "learning_rate": 4.9031542398457974e-05, + "loss": 0.3955, + "step": 37 + }, + { + "epoch": 0.38190954773869346, + "grad_norm": 2.3990726470947266, + "learning_rate": 4.890761889907589e-05, + "loss": 0.4951, + "step": 38 + }, + { + "epoch": 0.39195979899497485, + "grad_norm": 1.3550069332122803, + "learning_rate": 4.877641290737884e-05, + "loss": 0.7786, + "step": 39 + }, + { + "epoch": 0.4020100502512563, + "grad_norm": 1.7543747425079346, + "learning_rate": 4.8637964389982926e-05, + "loss": 0.6777, + "step": 40 + }, + { + "epoch": 0.4120603015075377, + "grad_norm": 1.1261959075927734, + "learning_rate": 4.849231551964771e-05, + "loss": 0.3273, + "step": 41 + }, + { + "epoch": 0.4221105527638191, + "grad_norm": 1.593313455581665, + "learning_rate": 4.8339510662430046e-05, + "loss": 0.7096, + "step": 42 + }, + { + "epoch": 0.4321608040201005, + "grad_norm": 1.301775336265564, + "learning_rate": 4.817959636416969e-05, + "loss": 0.5284, + "step": 43 + }, + { + "epoch": 0.44221105527638194, + "grad_norm": 1.570963740348816, + "learning_rate": 4.8012621336311016e-05, + "loss": 0.5525, + "step": 44 + }, + { + "epoch": 0.45226130653266333, + "grad_norm": 2.2816483974456787, + "learning_rate": 4.783863644106502e-05, + "loss": 0.6153, + "step": 45 + }, + { + "epoch": 0.4623115577889447, + "grad_norm": 1.1265478134155273, + "learning_rate": 4.765769467591625e-05, + "loss": 0.5263, + "step": 46 + }, + { + "epoch": 0.4723618090452261, + "grad_norm": 1.1592512130737305, + "learning_rate": 4.7469851157479177e-05, + "loss": 0.4652, + "step": 47 + }, + { + "epoch": 0.4824120603015075, + "grad_norm": 1.6541743278503418, + "learning_rate": 4.72751631047092e-05, + "loss": 0.4924, + "step": 48 + }, + { + "epoch": 0.49246231155778897, + "grad_norm": 0.7833207249641418, + "learning_rate": 4.707368982147318e-05, + "loss": 0.3839, + "step": 49 + }, + { + "epoch": 0.5025125628140703, + "grad_norm": 1.0830917358398438, + "learning_rate": 4.6865492678484895e-05, + "loss": 0.4785, + "step": 50 + }, + { + "epoch": 0.5125628140703518, + "grad_norm": 0.9392517805099487, + "learning_rate": 4.665063509461097e-05, + "loss": 0.4276, + "step": 51 + }, + { + "epoch": 0.5226130653266332, + "grad_norm": 1.1901228427886963, + "learning_rate": 4.642918251755281e-05, + "loss": 0.5079, + "step": 52 + }, + { + "epoch": 0.5326633165829145, + "grad_norm": 3.188199520111084, + "learning_rate": 4.620120240391065e-05, + "loss": 0.9955, + "step": 53 + }, + { + "epoch": 0.542713567839196, + "grad_norm": 1.2436141967773438, + "learning_rate": 4.5966764198635606e-05, + "loss": 0.3548, + "step": 54 + }, + { + "epoch": 0.5527638190954773, + "grad_norm": 1.235634684562683, + "learning_rate": 4.572593931387604e-05, + "loss": 0.2418, + "step": 55 + }, + { + "epoch": 0.5628140703517588, + "grad_norm": 0.8484794497489929, + "learning_rate": 4.54788011072248e-05, + "loss": 0.2605, + "step": 56 + }, + { + "epoch": 0.5728643216080402, + "grad_norm": 1.291303038597107, + "learning_rate": 4.522542485937369e-05, + "loss": 0.1845, + "step": 57 + }, + { + "epoch": 0.5829145728643216, + "grad_norm": 2.8543176651000977, + "learning_rate": 4.496588775118232e-05, + "loss": 0.661, + "step": 58 + }, + { + "epoch": 0.592964824120603, + "grad_norm": 2.5363144874572754, + "learning_rate": 4.4700268840168045e-05, + "loss": 0.612, + "step": 59 + }, + { + "epoch": 0.6030150753768844, + "grad_norm": 0.27271756529808044, + "learning_rate": 4.442864903642428e-05, + "loss": 0.0615, + "step": 60 + }, + { + "epoch": 0.6130653266331658, + "grad_norm": 1.4997279644012451, + "learning_rate": 4.415111107797445e-05, + "loss": 0.464, + "step": 61 + }, + { + "epoch": 0.6231155778894473, + "grad_norm": 1.4228745698928833, + "learning_rate": 4.386773950556931e-05, + "loss": 0.404, + "step": 62 + }, + { + "epoch": 0.6331658291457286, + "grad_norm": 1.4517656564712524, + "learning_rate": 4.357862063693486e-05, + "loss": 0.339, + "step": 63 + }, + { + "epoch": 0.6432160804020101, + "grad_norm": 2.0428409576416016, + "learning_rate": 4.3283842540479264e-05, + "loss": 0.3835, + "step": 64 + }, + { + "epoch": 0.6532663316582915, + "grad_norm": 1.0938389301300049, + "learning_rate": 4.2983495008466276e-05, + "loss": 0.3167, + "step": 65 + }, + { + "epoch": 0.6633165829145728, + "grad_norm": 1.7590700387954712, + "learning_rate": 4.267766952966369e-05, + "loss": 0.3107, + "step": 66 + } + ], + "logging_steps": 1, + "max_steps": 200, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 66, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.555204085573222e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/exp2_signal_c_100_2epochs/checkpoint-66/training_args.bin b/exp2_signal_c_100_2epochs/checkpoint-66/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..48c217f8385eb386ca7f43bcee0009bbce8813af --- /dev/null +++ b/exp2_signal_c_100_2epochs/checkpoint-66/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16a94d0c1563f7c95ac7a7d2caaec2e2298f1c2ba68f8f51cc8f61275ba42f0a +size 6033 diff --git a/exp2_signal_c_100_2epochs/final_model/README.md b/exp2_signal_c_100_2epochs/final_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp2_signal_c_100_2epochs/final_model/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp2_signal_c_100_2epochs/final_model/adapter_config.json b/exp2_signal_c_100_2epochs/final_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..507754473268a46eff1b71a02eb5bf9a906d5a14 --- /dev/null +++ b/exp2_signal_c_100_2epochs/final_model/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "o_proj", + "v_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp2_signal_c_100_2epochs/final_model/adapter_model.safetensors b/exp2_signal_c_100_2epochs/final_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a8f34ffe81cdb683dacd69f7a2cd0ba15c1bee1a --- /dev/null +++ b/exp2_signal_c_100_2epochs/final_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3429359d7b957d4f3a94c2f8093efc1d65ec2267aac6e773d946a69a7b0dabea +size 201378736 diff --git a/exp2_signal_c_100_2epochs/final_model/training_args.bin b/exp2_signal_c_100_2epochs/final_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..48c217f8385eb386ca7f43bcee0009bbce8813af --- /dev/null +++ b/exp2_signal_c_100_2epochs/final_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16a94d0c1563f7c95ac7a7d2caaec2e2298f1c2ba68f8f51cc8f61275ba42f0a +size 6033 diff --git a/exp4_subtle_signal_c_189/checkpoint-198/README.md b/exp4_subtle_signal_c_189/checkpoint-198/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp4_subtle_signal_c_189/checkpoint-198/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp4_subtle_signal_c_189/checkpoint-198/adapter_config.json b/exp4_subtle_signal_c_189/checkpoint-198/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b4d747200220490dff0f82aa49e63f702a859ccc --- /dev/null +++ b/exp4_subtle_signal_c_189/checkpoint-198/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "q_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp4_subtle_signal_c_189/checkpoint-198/adapter_model.safetensors b/exp4_subtle_signal_c_189/checkpoint-198/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a7c9eb6154946412a93dd3f8edf12ad1ebc9c5e6 --- /dev/null +++ b/exp4_subtle_signal_c_189/checkpoint-198/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5be184649d4de6b0b8d1dc83b943a3813e28f4884805713a57a8c7bdbbb6ffe4 +size 201378736 diff --git a/exp4_subtle_signal_c_189/checkpoint-198/optimizer.pt b/exp4_subtle_signal_c_189/checkpoint-198/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..84f1adf7f138ccba78b1935f15efe600859a78fa --- /dev/null +++ b/exp4_subtle_signal_c_189/checkpoint-198/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ceb64d0deb98e14ab6634420599ae5d97c3c4cb8eca4e50e95e41d845414447 +size 402982627 diff --git a/exp4_subtle_signal_c_189/checkpoint-198/rng_state.pth b/exp4_subtle_signal_c_189/checkpoint-198/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e04aced71055bc4e5ef7de353e62d37f9ec8d96f --- /dev/null +++ b/exp4_subtle_signal_c_189/checkpoint-198/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c93ece074230eacd1c9a23a41e6c787d7cf8e8c180ffc5c7352076be24911b8b +size 14645 diff --git a/exp4_subtle_signal_c_189/checkpoint-198/scheduler.pt b/exp4_subtle_signal_c_189/checkpoint-198/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..770e37a306668046ccae9d691459cc95a6290890 --- /dev/null +++ b/exp4_subtle_signal_c_189/checkpoint-198/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b49f8cd62ce268396465c09073a980ee84491a2e671b78b33564df47b6b043e7 +size 1465 diff --git a/exp4_subtle_signal_c_189/checkpoint-198/trainer_state.json b/exp4_subtle_signal_c_189/checkpoint-198/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2e5547937b5a4a9f27a983e2684bb9d03c5d6e20 --- /dev/null +++ b/exp4_subtle_signal_c_189/checkpoint-198/trainer_state.json @@ -0,0 +1,1428 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6666666666666666, + "eval_steps": 100, + "global_step": 198, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.003367003367003367, + "grad_norm": 1.417947769165039, + "learning_rate": 0.0, + "loss": 0.5304, + "step": 1 + }, + { + "epoch": 0.006734006734006734, + "grad_norm": 2.331010580062866, + "learning_rate": 1.6666666666666667e-06, + "loss": 0.9609, + "step": 2 + }, + { + "epoch": 0.010101010101010102, + "grad_norm": 1.8590822219848633, + "learning_rate": 3.3333333333333333e-06, + "loss": 0.795, + "step": 3 + }, + { + "epoch": 0.013468013468013467, + "grad_norm": 1.316519856452942, + "learning_rate": 5e-06, + "loss": 0.79, + "step": 4 + }, + { + "epoch": 0.016835016835016835, + "grad_norm": 3.6459147930145264, + "learning_rate": 6.666666666666667e-06, + "loss": 1.0392, + "step": 5 + }, + { + "epoch": 0.020202020202020204, + "grad_norm": 1.271788239479065, + "learning_rate": 8.333333333333334e-06, + "loss": 0.468, + "step": 6 + }, + { + "epoch": 0.02356902356902357, + "grad_norm": 1.9651601314544678, + "learning_rate": 1e-05, + "loss": 1.2001, + "step": 7 + }, + { + "epoch": 0.026936026936026935, + "grad_norm": 2.7949764728546143, + "learning_rate": 1.1666666666666668e-05, + "loss": 0.2763, + "step": 8 + }, + { + "epoch": 0.030303030303030304, + "grad_norm": 1.3491120338439941, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.6399, + "step": 9 + }, + { + "epoch": 0.03367003367003367, + "grad_norm": 2.5290658473968506, + "learning_rate": 1.5e-05, + "loss": 1.011, + "step": 10 + }, + { + "epoch": 0.037037037037037035, + "grad_norm": 2.0780346393585205, + "learning_rate": 1.6666666666666667e-05, + "loss": 0.4257, + "step": 11 + }, + { + "epoch": 0.04040404040404041, + "grad_norm": 1.7905006408691406, + "learning_rate": 1.8333333333333333e-05, + "loss": 0.6124, + "step": 12 + }, + { + "epoch": 0.04377104377104377, + "grad_norm": 1.3865525722503662, + "learning_rate": 2e-05, + "loss": 0.4662, + "step": 13 + }, + { + "epoch": 0.04713804713804714, + "grad_norm": 2.365971565246582, + "learning_rate": 2.1666666666666667e-05, + "loss": 0.8486, + "step": 14 + }, + { + "epoch": 0.050505050505050504, + "grad_norm": 1.5668970346450806, + "learning_rate": 2.3333333333333336e-05, + "loss": 0.9114, + "step": 15 + }, + { + "epoch": 0.05387205387205387, + "grad_norm": 1.863726258277893, + "learning_rate": 2.5e-05, + "loss": 0.6151, + "step": 16 + }, + { + "epoch": 0.05723905723905724, + "grad_norm": 1.3187612295150757, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.4592, + "step": 17 + }, + { + "epoch": 0.06060606060606061, + "grad_norm": 2.2115654945373535, + "learning_rate": 2.8333333333333335e-05, + "loss": 1.2559, + "step": 18 + }, + { + "epoch": 0.06397306397306397, + "grad_norm": 2.0446372032165527, + "learning_rate": 3e-05, + "loss": 0.6431, + "step": 19 + }, + { + "epoch": 0.06734006734006734, + "grad_norm": 2.6745316982269287, + "learning_rate": 3.1666666666666666e-05, + "loss": 0.6754, + "step": 20 + }, + { + "epoch": 0.0707070707070707, + "grad_norm": 1.4006273746490479, + "learning_rate": 3.3333333333333335e-05, + "loss": 0.8359, + "step": 21 + }, + { + "epoch": 0.07407407407407407, + "grad_norm": 1.7850373983383179, + "learning_rate": 3.5e-05, + "loss": 0.5039, + "step": 22 + }, + { + "epoch": 0.07744107744107744, + "grad_norm": 2.7081334590911865, + "learning_rate": 3.6666666666666666e-05, + "loss": 0.6163, + "step": 23 + }, + { + "epoch": 0.08080808080808081, + "grad_norm": 0.7826062440872192, + "learning_rate": 3.8333333333333334e-05, + "loss": 0.3723, + "step": 24 + }, + { + "epoch": 0.08417508417508418, + "grad_norm": 2.104018449783325, + "learning_rate": 4e-05, + "loss": 0.5389, + "step": 25 + }, + { + "epoch": 0.08754208754208755, + "grad_norm": 3.8862948417663574, + "learning_rate": 4.166666666666667e-05, + "loss": 0.9006, + "step": 26 + }, + { + "epoch": 0.09090909090909091, + "grad_norm": 3.9396567344665527, + "learning_rate": 4.3333333333333334e-05, + "loss": 0.7355, + "step": 27 + }, + { + "epoch": 0.09427609427609428, + "grad_norm": 3.6403331756591797, + "learning_rate": 4.5e-05, + "loss": 0.7534, + "step": 28 + }, + { + "epoch": 0.09764309764309764, + "grad_norm": 1.080079197883606, + "learning_rate": 4.666666666666667e-05, + "loss": 0.4997, + "step": 29 + }, + { + "epoch": 0.10101010101010101, + "grad_norm": 1.1375266313552856, + "learning_rate": 4.8333333333333334e-05, + "loss": 0.4293, + "step": 30 + }, + { + "epoch": 0.10437710437710437, + "grad_norm": 1.7134714126586914, + "learning_rate": 5e-05, + "loss": 0.5407, + "step": 31 + }, + { + "epoch": 0.10774410774410774, + "grad_norm": 1.0918691158294678, + "learning_rate": 4.999826945767665e-05, + "loss": 0.6391, + "step": 32 + }, + { + "epoch": 0.1111111111111111, + "grad_norm": 2.4237120151519775, + "learning_rate": 4.999307807028871e-05, + "loss": 0.4208, + "step": 33 + }, + { + "epoch": 0.11447811447811448, + "grad_norm": 2.146988868713379, + "learning_rate": 4.9984426556549456e-05, + "loss": 0.8804, + "step": 34 + }, + { + "epoch": 0.11784511784511785, + "grad_norm": 0.9194502830505371, + "learning_rate": 4.997231611420373e-05, + "loss": 0.3728, + "step": 35 + }, + { + "epoch": 0.12121212121212122, + "grad_norm": 1.4301484823226929, + "learning_rate": 4.995674841986217e-05, + "loss": 0.6577, + "step": 36 + }, + { + "epoch": 0.12457912457912458, + "grad_norm": 1.9206677675247192, + "learning_rate": 4.9937725628769094e-05, + "loss": 0.9117, + "step": 37 + }, + { + "epoch": 0.12794612794612795, + "grad_norm": 3.3416388034820557, + "learning_rate": 4.991525037450412e-05, + "loss": 0.7887, + "step": 38 + }, + { + "epoch": 0.13131313131313133, + "grad_norm": 1.5981472730636597, + "learning_rate": 4.9889325768617536e-05, + "loss": 0.7348, + "step": 39 + }, + { + "epoch": 0.13468013468013468, + "grad_norm": 1.1423240900039673, + "learning_rate": 4.985995540019955e-05, + "loss": 0.4979, + "step": 40 + }, + { + "epoch": 0.13804713804713806, + "grad_norm": 1.244806170463562, + "learning_rate": 4.982714333538343e-05, + "loss": 0.7312, + "step": 41 + }, + { + "epoch": 0.1414141414141414, + "grad_norm": 1.0441328287124634, + "learning_rate": 4.9790894116782514e-05, + "loss": 0.1754, + "step": 42 + }, + { + "epoch": 0.1447811447811448, + "grad_norm": 3.0658159255981445, + "learning_rate": 4.975121276286136e-05, + "loss": 1.0091, + "step": 43 + }, + { + "epoch": 0.14814814814814814, + "grad_norm": 1.8905764818191528, + "learning_rate": 4.970810476724097e-05, + "loss": 0.8969, + "step": 44 + }, + { + "epoch": 0.15151515151515152, + "grad_norm": 1.099610686302185, + "learning_rate": 4.96615760979382e-05, + "loss": 0.6375, + "step": 45 + }, + { + "epoch": 0.15488215488215487, + "grad_norm": 1.4541923999786377, + "learning_rate": 4.9611633196539584e-05, + "loss": 0.5875, + "step": 46 + }, + { + "epoch": 0.15824915824915825, + "grad_norm": 1.8045262098312378, + "learning_rate": 4.955828297730949e-05, + "loss": 0.673, + "step": 47 + }, + { + "epoch": 0.16161616161616163, + "grad_norm": 2.0676040649414062, + "learning_rate": 4.950153282623289e-05, + "loss": 0.6862, + "step": 48 + }, + { + "epoch": 0.16498316498316498, + "grad_norm": 1.5156350135803223, + "learning_rate": 4.9441390599992864e-05, + "loss": 0.8607, + "step": 49 + }, + { + "epoch": 0.16835016835016836, + "grad_norm": 1.9884543418884277, + "learning_rate": 4.937786462488284e-05, + "loss": 0.7067, + "step": 50 + }, + { + "epoch": 0.1717171717171717, + "grad_norm": 1.5458931922912598, + "learning_rate": 4.93109636956539e-05, + "loss": 0.6235, + "step": 51 + }, + { + "epoch": 0.1750841750841751, + "grad_norm": 0.8461848497390747, + "learning_rate": 4.9240697074297206e-05, + "loss": 0.2127, + "step": 52 + }, + { + "epoch": 0.17845117845117844, + "grad_norm": 1.0691157579421997, + "learning_rate": 4.9167074488761735e-05, + "loss": 0.4627, + "step": 53 + }, + { + "epoch": 0.18181818181818182, + "grad_norm": 1.4457998275756836, + "learning_rate": 4.90901061316075e-05, + "loss": 0.4431, + "step": 54 + }, + { + "epoch": 0.18518518518518517, + "grad_norm": 1.366044282913208, + "learning_rate": 4.900980265859448e-05, + "loss": 0.7347, + "step": 55 + }, + { + "epoch": 0.18855218855218855, + "grad_norm": 3.7170326709747314, + "learning_rate": 4.892617518720737e-05, + "loss": 1.0967, + "step": 56 + }, + { + "epoch": 0.1919191919191919, + "grad_norm": 1.113218069076538, + "learning_rate": 4.883923529511646e-05, + "loss": 0.2685, + "step": 57 + }, + { + "epoch": 0.19528619528619529, + "grad_norm": 1.4505512714385986, + "learning_rate": 4.874899501857477e-05, + "loss": 0.3403, + "step": 58 + }, + { + "epoch": 0.19865319865319866, + "grad_norm": 1.139944314956665, + "learning_rate": 4.865546685075174e-05, + "loss": 0.6103, + "step": 59 + }, + { + "epoch": 0.20202020202020202, + "grad_norm": 1.6062753200531006, + "learning_rate": 4.85586637400036e-05, + "loss": 0.5342, + "step": 60 + }, + { + "epoch": 0.2053872053872054, + "grad_norm": 1.4414268732070923, + "learning_rate": 4.8458599088080735e-05, + "loss": 0.5646, + "step": 61 + }, + { + "epoch": 0.20875420875420875, + "grad_norm": 1.5661225318908691, + "learning_rate": 4.83552867482724e-05, + "loss": 0.4134, + "step": 62 + }, + { + "epoch": 0.21212121212121213, + "grad_norm": 1.6068329811096191, + "learning_rate": 4.82487410234887e-05, + "loss": 0.3138, + "step": 63 + }, + { + "epoch": 0.21548821548821548, + "grad_norm": 1.4548122882843018, + "learning_rate": 4.8138976664280536e-05, + "loss": 0.2701, + "step": 64 + }, + { + "epoch": 0.21885521885521886, + "grad_norm": 1.57492196559906, + "learning_rate": 4.8026008866797423e-05, + "loss": 0.753, + "step": 65 + }, + { + "epoch": 0.2222222222222222, + "grad_norm": 2.414822578430176, + "learning_rate": 4.7909853270683756e-05, + "loss": 0.5117, + "step": 66 + }, + { + "epoch": 0.2255892255892256, + "grad_norm": 3.1574034690856934, + "learning_rate": 4.779052595691355e-05, + "loss": 0.5563, + "step": 67 + }, + { + "epoch": 0.22895622895622897, + "grad_norm": 1.9086557626724243, + "learning_rate": 4.7668043445564134e-05, + "loss": 0.3878, + "step": 68 + }, + { + "epoch": 0.23232323232323232, + "grad_norm": 2.0000898838043213, + "learning_rate": 4.754242269352912e-05, + "loss": 0.4596, + "step": 69 + }, + { + "epoch": 0.2356902356902357, + "grad_norm": 2.8848049640655518, + "learning_rate": 4.7413681092170715e-05, + "loss": 0.2728, + "step": 70 + }, + { + "epoch": 0.23905723905723905, + "grad_norm": 2.3199172019958496, + "learning_rate": 4.728183646491214e-05, + "loss": 0.3302, + "step": 71 + }, + { + "epoch": 0.24242424242424243, + "grad_norm": 3.39791202545166, + "learning_rate": 4.7146907064769994e-05, + "loss": 0.4134, + "step": 72 + }, + { + "epoch": 0.24579124579124578, + "grad_norm": 1.6479833126068115, + "learning_rate": 4.700891157182729e-05, + "loss": 0.2988, + "step": 73 + }, + { + "epoch": 0.24915824915824916, + "grad_norm": 3.5772407054901123, + "learning_rate": 4.686786909064729e-05, + "loss": 0.3749, + "step": 74 + }, + { + "epoch": 0.25252525252525254, + "grad_norm": 3.388200283050537, + "learning_rate": 4.6723799147628666e-05, + "loss": 0.6271, + "step": 75 + }, + { + "epoch": 0.2558922558922559, + "grad_norm": 1.0087072849273682, + "learning_rate": 4.6576721688302105e-05, + "loss": 0.123, + "step": 76 + }, + { + "epoch": 0.25925925925925924, + "grad_norm": 3.9229109287261963, + "learning_rate": 4.642665707456908e-05, + "loss": 0.343, + "step": 77 + }, + { + "epoch": 0.26262626262626265, + "grad_norm": 4.32628870010376, + "learning_rate": 4.6273626081882805e-05, + "loss": 1.5331, + "step": 78 + }, + { + "epoch": 0.265993265993266, + "grad_norm": 3.5192296504974365, + "learning_rate": 4.611764989637205e-05, + "loss": 0.341, + "step": 79 + }, + { + "epoch": 0.26936026936026936, + "grad_norm": 3.2966415882110596, + "learning_rate": 4.595875011190807e-05, + "loss": 0.3844, + "step": 80 + }, + { + "epoch": 0.2727272727272727, + "grad_norm": 2.390501022338867, + "learning_rate": 4.579694872711501e-05, + "loss": 0.5238, + "step": 81 + }, + { + "epoch": 0.2760942760942761, + "grad_norm": 1.8676153421401978, + "learning_rate": 4.563226814232444e-05, + "loss": 0.198, + "step": 82 + }, + { + "epoch": 0.27946127946127947, + "grad_norm": 0.5234068632125854, + "learning_rate": 4.5464731156474094e-05, + "loss": 0.0719, + "step": 83 + }, + { + "epoch": 0.2828282828282828, + "grad_norm": 3.9550747871398926, + "learning_rate": 4.529436096395156e-05, + "loss": 0.5313, + "step": 84 + }, + { + "epoch": 0.28619528619528617, + "grad_norm": 2.396087646484375, + "learning_rate": 4.5121181151383143e-05, + "loss": 0.2908, + "step": 85 + }, + { + "epoch": 0.2895622895622896, + "grad_norm": 2.293602466583252, + "learning_rate": 4.494521569436845e-05, + "loss": 0.2391, + "step": 86 + }, + { + "epoch": 0.29292929292929293, + "grad_norm": 3.605764389038086, + "learning_rate": 4.4766488954161154e-05, + "loss": 0.4852, + "step": 87 + }, + { + "epoch": 0.2962962962962963, + "grad_norm": 0.8647552132606506, + "learning_rate": 4.4585025674296315e-05, + "loss": 0.0948, + "step": 88 + }, + { + "epoch": 0.2996632996632997, + "grad_norm": 2.5593700408935547, + "learning_rate": 4.44008509771648e-05, + "loss": 0.2254, + "step": 89 + }, + { + "epoch": 0.30303030303030304, + "grad_norm": 3.6353094577789307, + "learning_rate": 4.421399036053527e-05, + "loss": 0.3688, + "step": 90 + }, + { + "epoch": 0.3063973063973064, + "grad_norm": 3.3246469497680664, + "learning_rate": 4.40244696940242e-05, + "loss": 0.5953, + "step": 91 + }, + { + "epoch": 0.30976430976430974, + "grad_norm": 3.4139394760131836, + "learning_rate": 4.383231521551432e-05, + "loss": 0.1474, + "step": 92 + }, + { + "epoch": 0.31313131313131315, + "grad_norm": 2.9134109020233154, + "learning_rate": 4.363755352752227e-05, + "loss": 0.4875, + "step": 93 + }, + { + "epoch": 0.3164983164983165, + "grad_norm": 9.96104621887207, + "learning_rate": 4.3440211593515554e-05, + "loss": 0.7513, + "step": 94 + }, + { + "epoch": 0.31986531986531985, + "grad_norm": 4.5680389404296875, + "learning_rate": 4.324031673417971e-05, + "loss": 0.6839, + "step": 95 + }, + { + "epoch": 0.32323232323232326, + "grad_norm": 0.8624576926231384, + "learning_rate": 4.3037896623635874e-05, + "loss": 0.0937, + "step": 96 + }, + { + "epoch": 0.3265993265993266, + "grad_norm": 2.9045605659484863, + "learning_rate": 4.283297928560951e-05, + "loss": 0.4072, + "step": 97 + }, + { + "epoch": 0.32996632996632996, + "grad_norm": 3.564232110977173, + "learning_rate": 4.262559308955072e-05, + "loss": 0.6221, + "step": 98 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 4.791253089904785, + "learning_rate": 4.2415766746706674e-05, + "loss": 0.366, + "step": 99 + }, + { + "epoch": 0.3367003367003367, + "grad_norm": 3.8587965965270996, + "learning_rate": 4.220352930614672e-05, + "loss": 0.2871, + "step": 100 + }, + { + "epoch": 0.3367003367003367, + "eval_loss": 0.3212782144546509, + "eval_runtime": 85.783, + "eval_samples_per_second": 3.497, + "eval_steps_per_second": 1.749, + "step": 100 + }, + { + "epoch": 0.3400673400673401, + "grad_norm": 2.2887542247772217, + "learning_rate": 4.1988910150740736e-05, + "loss": 0.3171, + "step": 101 + }, + { + "epoch": 0.3434343434343434, + "grad_norm": 3.5576376914978027, + "learning_rate": 4.1771938993091266e-05, + "loss": 0.354, + "step": 102 + }, + { + "epoch": 0.3468013468013468, + "grad_norm": 6.742368221282959, + "learning_rate": 4.155264587142002e-05, + "loss": 0.4692, + "step": 103 + }, + { + "epoch": 0.3501683501683502, + "grad_norm": 5.838032245635986, + "learning_rate": 4.133106114540923e-05, + "loss": 0.4455, + "step": 104 + }, + { + "epoch": 0.35353535353535354, + "grad_norm": 8.406342506408691, + "learning_rate": 4.110721549199866e-05, + "loss": 0.8816, + "step": 105 + }, + { + "epoch": 0.3569023569023569, + "grad_norm": 10.105664253234863, + "learning_rate": 4.088113990113847e-05, + "loss": 0.6247, + "step": 106 + }, + { + "epoch": 0.3602693602693603, + "grad_norm": 8.388754844665527, + "learning_rate": 4.065286567149891e-05, + "loss": 0.3745, + "step": 107 + }, + { + "epoch": 0.36363636363636365, + "grad_norm": 3.6470038890838623, + "learning_rate": 4.042242440613724e-05, + "loss": 0.3857, + "step": 108 + }, + { + "epoch": 0.367003367003367, + "grad_norm": 2.3660080432891846, + "learning_rate": 4.0189848008122474e-05, + "loss": 0.2054, + "step": 109 + }, + { + "epoch": 0.37037037037037035, + "grad_norm": 2.4231250286102295, + "learning_rate": 3.9955168676118645e-05, + "loss": 0.6018, + "step": 110 + }, + { + "epoch": 0.37373737373737376, + "grad_norm": 2.8709912300109863, + "learning_rate": 3.971841889992706e-05, + "loss": 0.4649, + "step": 111 + }, + { + "epoch": 0.3771043771043771, + "grad_norm": 0.5076086521148682, + "learning_rate": 3.9479631455988334e-05, + "loss": 0.0586, + "step": 112 + }, + { + "epoch": 0.38047138047138046, + "grad_norm": 3.4678847789764404, + "learning_rate": 3.9238839402844724e-05, + "loss": 0.5776, + "step": 113 + }, + { + "epoch": 0.3838383838383838, + "grad_norm": 0.7102794647216797, + "learning_rate": 3.8996076076563334e-05, + "loss": 0.0743, + "step": 114 + }, + { + "epoch": 0.3872053872053872, + "grad_norm": 1.857340693473816, + "learning_rate": 3.875137508612103e-05, + "loss": 0.1731, + "step": 115 + }, + { + "epoch": 0.39057239057239057, + "grad_norm": 1.7878767251968384, + "learning_rate": 3.850477030875147e-05, + "loss": 0.1957, + "step": 116 + }, + { + "epoch": 0.3939393939393939, + "grad_norm": 3.4582345485687256, + "learning_rate": 3.825629588525498e-05, + "loss": 0.254, + "step": 117 + }, + { + "epoch": 0.39730639730639733, + "grad_norm": 3.044245481491089, + "learning_rate": 3.800598621527205e-05, + "loss": 0.3621, + "step": 118 + }, + { + "epoch": 0.4006734006734007, + "grad_norm": 2.234395742416382, + "learning_rate": 3.775387595252094e-05, + "loss": 0.5613, + "step": 119 + }, + { + "epoch": 0.40404040404040403, + "grad_norm": 2.256892442703247, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.6154, + "step": 120 + }, + { + "epoch": 0.4074074074074074, + "grad_norm": 1.4057880640029907, + "learning_rate": 3.724439350515571e-05, + "loss": 0.1662, + "step": 121 + }, + { + "epoch": 0.4107744107744108, + "grad_norm": 3.190229654312134, + "learning_rate": 3.6987091855016665e-05, + "loss": 0.2788, + "step": 122 + }, + { + "epoch": 0.41414141414141414, + "grad_norm": 2.6102352142333984, + "learning_rate": 3.672813067129449e-05, + "loss": 0.2033, + "step": 123 + }, + { + "epoch": 0.4175084175084175, + "grad_norm": 5.529795169830322, + "learning_rate": 3.646754580545226e-05, + "loss": 0.8619, + "step": 124 + }, + { + "epoch": 0.4208754208754209, + "grad_norm": 1.412796974182129, + "learning_rate": 3.6205373333741136e-05, + "loss": 0.0854, + "step": 125 + }, + { + "epoch": 0.42424242424242425, + "grad_norm": 1.929734706878662, + "learning_rate": 3.594164955220577e-05, + "loss": 0.5321, + "step": 126 + }, + { + "epoch": 0.4276094276094276, + "grad_norm": 2.2332234382629395, + "learning_rate": 3.56764109716594e-05, + "loss": 0.734, + "step": 127 + }, + { + "epoch": 0.43097643097643096, + "grad_norm": 2.0864946842193604, + "learning_rate": 3.540969431262919e-05, + "loss": 0.4026, + "step": 128 + }, + { + "epoch": 0.43434343434343436, + "grad_norm": 0.3298185467720032, + "learning_rate": 3.514153650027249e-05, + "loss": 0.0498, + "step": 129 + }, + { + "epoch": 0.4377104377104377, + "grad_norm": 1.2375303506851196, + "learning_rate": 3.487197465926478e-05, + "loss": 0.0613, + "step": 130 + }, + { + "epoch": 0.44107744107744107, + "grad_norm": 0.7785205245018005, + "learning_rate": 3.460104610866003e-05, + "loss": 0.1058, + "step": 131 + }, + { + "epoch": 0.4444444444444444, + "grad_norm": 2.9774363040924072, + "learning_rate": 3.4328788356724134e-05, + "loss": 0.2498, + "step": 132 + }, + { + "epoch": 0.4478114478114478, + "grad_norm": 1.7193303108215332, + "learning_rate": 3.4055239095742064e-05, + "loss": 0.1332, + "step": 133 + }, + { + "epoch": 0.4511784511784512, + "grad_norm": 3.84416127204895, + "learning_rate": 3.3780436196799734e-05, + "loss": 0.6439, + "step": 134 + }, + { + "epoch": 0.45454545454545453, + "grad_norm": 2.0642948150634766, + "learning_rate": 3.350441770454092e-05, + "loss": 0.4133, + "step": 135 + }, + { + "epoch": 0.45791245791245794, + "grad_norm": 1.8204262256622314, + "learning_rate": 3.322722183190025e-05, + "loss": 0.518, + "step": 136 + }, + { + "epoch": 0.4612794612794613, + "grad_norm": 10.550874710083008, + "learning_rate": 3.2948886954812876e-05, + "loss": 1.2434, + "step": 137 + }, + { + "epoch": 0.46464646464646464, + "grad_norm": 2.6596121788024902, + "learning_rate": 3.2669451606901596e-05, + "loss": 0.3132, + "step": 138 + }, + { + "epoch": 0.468013468013468, + "grad_norm": 1.824311375617981, + "learning_rate": 3.238895447414211e-05, + "loss": 0.4487, + "step": 139 + }, + { + "epoch": 0.4713804713804714, + "grad_norm": 4.347046852111816, + "learning_rate": 3.210743438950718e-05, + "loss": 0.4392, + "step": 140 + }, + { + "epoch": 0.47474747474747475, + "grad_norm": 0.49011513590812683, + "learning_rate": 3.182493032759053e-05, + "loss": 0.056, + "step": 141 + }, + { + "epoch": 0.4781144781144781, + "grad_norm": 1.8293064832687378, + "learning_rate": 3.154148139921102e-05, + "loss": 0.2903, + "step": 142 + }, + { + "epoch": 0.48148148148148145, + "grad_norm": 2.0688326358795166, + "learning_rate": 3.1257126845997995e-05, + "loss": 0.4253, + "step": 143 + }, + { + "epoch": 0.48484848484848486, + "grad_norm": 0.9706846475601196, + "learning_rate": 3.097190603495861e-05, + "loss": 0.1477, + "step": 144 + }, + { + "epoch": 0.4882154882154882, + "grad_norm": 1.5309983491897583, + "learning_rate": 3.0685858453027666e-05, + "loss": 0.4342, + "step": 145 + }, + { + "epoch": 0.49158249158249157, + "grad_norm": 0.9750226736068726, + "learning_rate": 3.0399023701600905e-05, + "loss": 0.1603, + "step": 146 + }, + { + "epoch": 0.494949494949495, + "grad_norm": 1.64618980884552, + "learning_rate": 3.0111441491052504e-05, + "loss": 0.2784, + "step": 147 + }, + { + "epoch": 0.4983164983164983, + "grad_norm": 1.9535808563232422, + "learning_rate": 2.9823151635237423e-05, + "loss": 0.2996, + "step": 148 + }, + { + "epoch": 0.5016835016835017, + "grad_norm": 3.526555061340332, + "learning_rate": 2.9534194045979397e-05, + "loss": 1.5967, + "step": 149 + }, + { + "epoch": 0.5050505050505051, + "grad_norm": 0.24972084164619446, + "learning_rate": 2.924460872754547e-05, + "loss": 0.033, + "step": 150 + }, + { + "epoch": 0.5084175084175084, + "grad_norm": 2.380751609802246, + "learning_rate": 2.89544357711076e-05, + "loss": 0.2955, + "step": 151 + }, + { + "epoch": 0.5117845117845118, + "grad_norm": 1.212270736694336, + "learning_rate": 2.8663715349192388e-05, + "loss": 0.1169, + "step": 152 + }, + { + "epoch": 0.5151515151515151, + "grad_norm": 0.3227732479572296, + "learning_rate": 2.8372487710119373e-05, + "loss": 0.0191, + "step": 153 + }, + { + "epoch": 0.5185185185185185, + "grad_norm": 1.4398654699325562, + "learning_rate": 2.8080793172428964e-05, + "loss": 0.1793, + "step": 154 + }, + { + "epoch": 0.5218855218855218, + "grad_norm": 4.784367084503174, + "learning_rate": 2.7788672119300613e-05, + "loss": 0.3231, + "step": 155 + }, + { + "epoch": 0.5252525252525253, + "grad_norm": 0.6798835396766663, + "learning_rate": 2.7496164992961993e-05, + "loss": 0.0775, + "step": 156 + }, + { + "epoch": 0.5286195286195287, + "grad_norm": 3.9313294887542725, + "learning_rate": 2.7203312289090048e-05, + "loss": 0.2993, + "step": 157 + }, + { + "epoch": 0.531986531986532, + "grad_norm": 2.1782889366149902, + "learning_rate": 2.691015455120468e-05, + "loss": 0.1773, + "step": 158 + }, + { + "epoch": 0.5353535353535354, + "grad_norm": 3.120492696762085, + "learning_rate": 2.6616732365055713e-05, + "loss": 0.4819, + "step": 159 + }, + { + "epoch": 0.5387205387205387, + "grad_norm": 1.880778193473816, + "learning_rate": 2.6323086353004078e-05, + "loss": 0.1744, + "step": 160 + }, + { + "epoch": 0.5420875420875421, + "grad_norm": 1.3111162185668945, + "learning_rate": 2.6029257168397947e-05, + "loss": 0.1357, + "step": 161 + }, + { + "epoch": 0.5454545454545454, + "grad_norm": 2.0314598083496094, + "learning_rate": 2.5735285489944487e-05, + "loss": 0.4442, + "step": 162 + }, + { + "epoch": 0.5488215488215489, + "grad_norm": 2.741291046142578, + "learning_rate": 2.544121201607822e-05, + "loss": 0.7485, + "step": 163 + }, + { + "epoch": 0.5521885521885522, + "grad_norm": 0.8405356407165527, + "learning_rate": 2.5147077459326556e-05, + "loss": 0.0897, + "step": 164 + }, + { + "epoch": 0.5555555555555556, + "grad_norm": 0.9963136315345764, + "learning_rate": 2.4852922540673447e-05, + "loss": 0.1057, + "step": 165 + }, + { + "epoch": 0.5589225589225589, + "grad_norm": 4.5743584632873535, + "learning_rate": 2.4558787983921787e-05, + "loss": 1.1172, + "step": 166 + }, + { + "epoch": 0.5622895622895623, + "grad_norm": 0.49469229578971863, + "learning_rate": 2.4264714510055516e-05, + "loss": 0.0507, + "step": 167 + }, + { + "epoch": 0.5656565656565656, + "grad_norm": 5.110591411590576, + "learning_rate": 2.3970742831602062e-05, + "loss": 0.6335, + "step": 168 + }, + { + "epoch": 0.569023569023569, + "grad_norm": 1.0456212759017944, + "learning_rate": 2.367691364699592e-05, + "loss": 0.1245, + "step": 169 + }, + { + "epoch": 0.5723905723905723, + "grad_norm": 3.8197057247161865, + "learning_rate": 2.338326763494429e-05, + "loss": 0.5504, + "step": 170 + }, + { + "epoch": 0.5757575757575758, + "grad_norm": 0.5079836249351501, + "learning_rate": 2.308984544879533e-05, + "loss": 0.0567, + "step": 171 + }, + { + "epoch": 0.5791245791245792, + "grad_norm": 1.7494263648986816, + "learning_rate": 2.2796687710909964e-05, + "loss": 0.1202, + "step": 172 + }, + { + "epoch": 0.5824915824915825, + "grad_norm": 1.3403123617172241, + "learning_rate": 2.2503835007038023e-05, + "loss": 0.1395, + "step": 173 + }, + { + "epoch": 0.5858585858585859, + "grad_norm": 4.487949848175049, + "learning_rate": 2.221132788069939e-05, + "loss": 0.2783, + "step": 174 + }, + { + "epoch": 0.5892255892255892, + "grad_norm": 0.7322219610214233, + "learning_rate": 2.191920682757104e-05, + "loss": 0.0957, + "step": 175 + }, + { + "epoch": 0.5925925925925926, + "grad_norm": 10.600361824035645, + "learning_rate": 2.162751228988063e-05, + "loss": 0.7983, + "step": 176 + }, + { + "epoch": 0.5959595959595959, + "grad_norm": 1.510453462600708, + "learning_rate": 2.1336284650807615e-05, + "loss": 0.1831, + "step": 177 + }, + { + "epoch": 0.5993265993265994, + "grad_norm": 0.6158142685890198, + "learning_rate": 2.1045564228892404e-05, + "loss": 0.0762, + "step": 178 + }, + { + "epoch": 0.6026936026936027, + "grad_norm": 1.5894421339035034, + "learning_rate": 2.0755391272454537e-05, + "loss": 0.1448, + "step": 179 + }, + { + "epoch": 0.6060606060606061, + "grad_norm": 2.298612594604492, + "learning_rate": 2.04658059540206e-05, + "loss": 0.1969, + "step": 180 + }, + { + "epoch": 0.6094276094276094, + "grad_norm": 3.3239121437072754, + "learning_rate": 2.017684836476258e-05, + "loss": 0.4061, + "step": 181 + }, + { + "epoch": 0.6127946127946128, + "grad_norm": 1.9558825492858887, + "learning_rate": 1.9888558508947495e-05, + "loss": 0.5793, + "step": 182 + }, + { + "epoch": 0.6161616161616161, + "grad_norm": 0.9651542901992798, + "learning_rate": 1.960097629839911e-05, + "loss": 0.1177, + "step": 183 + }, + { + "epoch": 0.6195286195286195, + "grad_norm": 2.647695541381836, + "learning_rate": 1.9314141546972343e-05, + "loss": 0.5155, + "step": 184 + }, + { + "epoch": 0.622895622895623, + "grad_norm": 2.941695213317871, + "learning_rate": 1.9028093965041394e-05, + "loss": 0.2956, + "step": 185 + }, + { + "epoch": 0.6262626262626263, + "grad_norm": 1.9183754920959473, + "learning_rate": 1.8742873154002004e-05, + "loss": 0.2758, + "step": 186 + }, + { + "epoch": 0.6296296296296297, + "grad_norm": 0.28313329815864563, + "learning_rate": 1.845851860078899e-05, + "loss": 0.0181, + "step": 187 + }, + { + "epoch": 0.632996632996633, + "grad_norm": 1.3280785083770752, + "learning_rate": 1.8175069672409475e-05, + "loss": 0.0636, + "step": 188 + }, + { + "epoch": 0.6363636363636364, + "grad_norm": 5.026294231414795, + "learning_rate": 1.789256561049283e-05, + "loss": 0.3681, + "step": 189 + }, + { + "epoch": 0.6397306397306397, + "grad_norm": 3.3546791076660156, + "learning_rate": 1.76110455258579e-05, + "loss": 0.2501, + "step": 190 + }, + { + "epoch": 0.6430976430976431, + "grad_norm": 3.2179019451141357, + "learning_rate": 1.7330548393098407e-05, + "loss": 0.3801, + "step": 191 + }, + { + "epoch": 0.6464646464646465, + "grad_norm": 2.4074902534484863, + "learning_rate": 1.7051113045187123e-05, + "loss": 0.4583, + "step": 192 + }, + { + "epoch": 0.6498316498316499, + "grad_norm": 0.6032189130783081, + "learning_rate": 1.677277816809975e-05, + "loss": 0.0672, + "step": 193 + }, + { + "epoch": 0.6531986531986532, + "grad_norm": 0.8865869045257568, + "learning_rate": 1.649558229545908e-05, + "loss": 0.084, + "step": 194 + }, + { + "epoch": 0.6565656565656566, + "grad_norm": 5.032468318939209, + "learning_rate": 1.621956380320027e-05, + "loss": 0.24, + "step": 195 + }, + { + "epoch": 0.6599326599326599, + "grad_norm": 2.6365160942077637, + "learning_rate": 1.5944760904257945e-05, + "loss": 0.1649, + "step": 196 + }, + { + "epoch": 0.6632996632996633, + "grad_norm": 1.2071051597595215, + "learning_rate": 1.5671211643275875e-05, + "loss": 0.1186, + "step": 197 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.4904046952724457, + "learning_rate": 1.539895389133997e-05, + "loss": 0.0474, + "step": 198 + } + ], + "logging_steps": 1, + "max_steps": 297, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 99, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.3665612256719667e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/exp4_subtle_signal_c_189/checkpoint-198/training_args.bin b/exp4_subtle_signal_c_189/checkpoint-198/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a1b630c5deef60e645fa61a43f2629aef0b901e3 --- /dev/null +++ b/exp4_subtle_signal_c_189/checkpoint-198/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c6de1304e5f14b655e49466bbc76c3f69efc17feaf36f1722623c6a0ae37e58 +size 6033 diff --git a/exp4_subtle_signal_c_189/checkpoint-297/README.md b/exp4_subtle_signal_c_189/checkpoint-297/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp4_subtle_signal_c_189/checkpoint-297/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp4_subtle_signal_c_189/checkpoint-297/adapter_config.json b/exp4_subtle_signal_c_189/checkpoint-297/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b4d747200220490dff0f82aa49e63f702a859ccc --- /dev/null +++ b/exp4_subtle_signal_c_189/checkpoint-297/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "q_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp4_subtle_signal_c_189/checkpoint-297/adapter_model.safetensors b/exp4_subtle_signal_c_189/checkpoint-297/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..114cb296fa9e4d4b828008522d38d5e08b854910 --- /dev/null +++ b/exp4_subtle_signal_c_189/checkpoint-297/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b2c91f91b54595638c9b6e9c64295e0afe72b725c0cdc4cd0dc548904771e9b +size 201378736 diff --git a/exp4_subtle_signal_c_189/checkpoint-297/optimizer.pt b/exp4_subtle_signal_c_189/checkpoint-297/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0b5cd4abef8a4d215ba21ec31d0114885114df87 --- /dev/null +++ b/exp4_subtle_signal_c_189/checkpoint-297/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddd78e8d5d22dbf17635f40f49a15894e0c5d2c172f56b6a50a5671b9573a17a +size 402982627 diff --git a/exp4_subtle_signal_c_189/checkpoint-297/rng_state.pth b/exp4_subtle_signal_c_189/checkpoint-297/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..abcc11ff4f54959f4126a941cb511f77e2fbb437 --- /dev/null +++ b/exp4_subtle_signal_c_189/checkpoint-297/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57d1a1e9f36dd070b2413aea8481ecf67f39ba4a8d8d4c1a07effa0c2171e65f +size 14645 diff --git a/exp4_subtle_signal_c_189/checkpoint-297/scheduler.pt b/exp4_subtle_signal_c_189/checkpoint-297/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..34f17e893a8772b930c7dd69596a95df0a079b61 --- /dev/null +++ b/exp4_subtle_signal_c_189/checkpoint-297/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18073980c907e0d171fb3088d25d7f1df8324bfd31cf571245b9fa8890885e26 +size 1465 diff --git a/exp4_subtle_signal_c_189/checkpoint-297/trainer_state.json b/exp4_subtle_signal_c_189/checkpoint-297/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5f7d814b42f11f895bd765062c34f11202d0b1 --- /dev/null +++ b/exp4_subtle_signal_c_189/checkpoint-297/trainer_state.json @@ -0,0 +1,2129 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 100, + "global_step": 297, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.003367003367003367, + "grad_norm": 1.417947769165039, + "learning_rate": 0.0, + "loss": 0.5304, + "step": 1 + }, + { + "epoch": 0.006734006734006734, + "grad_norm": 2.331010580062866, + "learning_rate": 1.6666666666666667e-06, + "loss": 0.9609, + "step": 2 + }, + { + "epoch": 0.010101010101010102, + "grad_norm": 1.8590822219848633, + "learning_rate": 3.3333333333333333e-06, + "loss": 0.795, + "step": 3 + }, + { + "epoch": 0.013468013468013467, + "grad_norm": 1.316519856452942, + "learning_rate": 5e-06, + "loss": 0.79, + "step": 4 + }, + { + "epoch": 0.016835016835016835, + "grad_norm": 3.6459147930145264, + "learning_rate": 6.666666666666667e-06, + "loss": 1.0392, + "step": 5 + }, + { + "epoch": 0.020202020202020204, + "grad_norm": 1.271788239479065, + "learning_rate": 8.333333333333334e-06, + "loss": 0.468, + "step": 6 + }, + { + "epoch": 0.02356902356902357, + "grad_norm": 1.9651601314544678, + "learning_rate": 1e-05, + "loss": 1.2001, + "step": 7 + }, + { + "epoch": 0.026936026936026935, + "grad_norm": 2.7949764728546143, + "learning_rate": 1.1666666666666668e-05, + "loss": 0.2763, + "step": 8 + }, + { + "epoch": 0.030303030303030304, + "grad_norm": 1.3491120338439941, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.6399, + "step": 9 + }, + { + "epoch": 0.03367003367003367, + "grad_norm": 2.5290658473968506, + "learning_rate": 1.5e-05, + "loss": 1.011, + "step": 10 + }, + { + "epoch": 0.037037037037037035, + "grad_norm": 2.0780346393585205, + "learning_rate": 1.6666666666666667e-05, + "loss": 0.4257, + "step": 11 + }, + { + "epoch": 0.04040404040404041, + "grad_norm": 1.7905006408691406, + "learning_rate": 1.8333333333333333e-05, + "loss": 0.6124, + "step": 12 + }, + { + "epoch": 0.04377104377104377, + "grad_norm": 1.3865525722503662, + "learning_rate": 2e-05, + "loss": 0.4662, + "step": 13 + }, + { + "epoch": 0.04713804713804714, + "grad_norm": 2.365971565246582, + "learning_rate": 2.1666666666666667e-05, + "loss": 0.8486, + "step": 14 + }, + { + "epoch": 0.050505050505050504, + "grad_norm": 1.5668970346450806, + "learning_rate": 2.3333333333333336e-05, + "loss": 0.9114, + "step": 15 + }, + { + "epoch": 0.05387205387205387, + "grad_norm": 1.863726258277893, + "learning_rate": 2.5e-05, + "loss": 0.6151, + "step": 16 + }, + { + "epoch": 0.05723905723905724, + "grad_norm": 1.3187612295150757, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.4592, + "step": 17 + }, + { + "epoch": 0.06060606060606061, + "grad_norm": 2.2115654945373535, + "learning_rate": 2.8333333333333335e-05, + "loss": 1.2559, + "step": 18 + }, + { + "epoch": 0.06397306397306397, + "grad_norm": 2.0446372032165527, + "learning_rate": 3e-05, + "loss": 0.6431, + "step": 19 + }, + { + "epoch": 0.06734006734006734, + "grad_norm": 2.6745316982269287, + "learning_rate": 3.1666666666666666e-05, + "loss": 0.6754, + "step": 20 + }, + { + "epoch": 0.0707070707070707, + "grad_norm": 1.4006273746490479, + "learning_rate": 3.3333333333333335e-05, + "loss": 0.8359, + "step": 21 + }, + { + "epoch": 0.07407407407407407, + "grad_norm": 1.7850373983383179, + "learning_rate": 3.5e-05, + "loss": 0.5039, + "step": 22 + }, + { + "epoch": 0.07744107744107744, + "grad_norm": 2.7081334590911865, + "learning_rate": 3.6666666666666666e-05, + "loss": 0.6163, + "step": 23 + }, + { + "epoch": 0.08080808080808081, + "grad_norm": 0.7826062440872192, + "learning_rate": 3.8333333333333334e-05, + "loss": 0.3723, + "step": 24 + }, + { + "epoch": 0.08417508417508418, + "grad_norm": 2.104018449783325, + "learning_rate": 4e-05, + "loss": 0.5389, + "step": 25 + }, + { + "epoch": 0.08754208754208755, + "grad_norm": 3.8862948417663574, + "learning_rate": 4.166666666666667e-05, + "loss": 0.9006, + "step": 26 + }, + { + "epoch": 0.09090909090909091, + "grad_norm": 3.9396567344665527, + "learning_rate": 4.3333333333333334e-05, + "loss": 0.7355, + "step": 27 + }, + { + "epoch": 0.09427609427609428, + "grad_norm": 3.6403331756591797, + "learning_rate": 4.5e-05, + "loss": 0.7534, + "step": 28 + }, + { + "epoch": 0.09764309764309764, + "grad_norm": 1.080079197883606, + "learning_rate": 4.666666666666667e-05, + "loss": 0.4997, + "step": 29 + }, + { + "epoch": 0.10101010101010101, + "grad_norm": 1.1375266313552856, + "learning_rate": 4.8333333333333334e-05, + "loss": 0.4293, + "step": 30 + }, + { + "epoch": 0.10437710437710437, + "grad_norm": 1.7134714126586914, + "learning_rate": 5e-05, + "loss": 0.5407, + "step": 31 + }, + { + "epoch": 0.10774410774410774, + "grad_norm": 1.0918691158294678, + "learning_rate": 4.999826945767665e-05, + "loss": 0.6391, + "step": 32 + }, + { + "epoch": 0.1111111111111111, + "grad_norm": 2.4237120151519775, + "learning_rate": 4.999307807028871e-05, + "loss": 0.4208, + "step": 33 + }, + { + "epoch": 0.11447811447811448, + "grad_norm": 2.146988868713379, + "learning_rate": 4.9984426556549456e-05, + "loss": 0.8804, + "step": 34 + }, + { + "epoch": 0.11784511784511785, + "grad_norm": 0.9194502830505371, + "learning_rate": 4.997231611420373e-05, + "loss": 0.3728, + "step": 35 + }, + { + "epoch": 0.12121212121212122, + "grad_norm": 1.4301484823226929, + "learning_rate": 4.995674841986217e-05, + "loss": 0.6577, + "step": 36 + }, + { + "epoch": 0.12457912457912458, + "grad_norm": 1.9206677675247192, + "learning_rate": 4.9937725628769094e-05, + "loss": 0.9117, + "step": 37 + }, + { + "epoch": 0.12794612794612795, + "grad_norm": 3.3416388034820557, + "learning_rate": 4.991525037450412e-05, + "loss": 0.7887, + "step": 38 + }, + { + "epoch": 0.13131313131313133, + "grad_norm": 1.5981472730636597, + "learning_rate": 4.9889325768617536e-05, + "loss": 0.7348, + "step": 39 + }, + { + "epoch": 0.13468013468013468, + "grad_norm": 1.1423240900039673, + "learning_rate": 4.985995540019955e-05, + "loss": 0.4979, + "step": 40 + }, + { + "epoch": 0.13804713804713806, + "grad_norm": 1.244806170463562, + "learning_rate": 4.982714333538343e-05, + "loss": 0.7312, + "step": 41 + }, + { + "epoch": 0.1414141414141414, + "grad_norm": 1.0441328287124634, + "learning_rate": 4.9790894116782514e-05, + "loss": 0.1754, + "step": 42 + }, + { + "epoch": 0.1447811447811448, + "grad_norm": 3.0658159255981445, + "learning_rate": 4.975121276286136e-05, + "loss": 1.0091, + "step": 43 + }, + { + "epoch": 0.14814814814814814, + "grad_norm": 1.8905764818191528, + "learning_rate": 4.970810476724097e-05, + "loss": 0.8969, + "step": 44 + }, + { + "epoch": 0.15151515151515152, + "grad_norm": 1.099610686302185, + "learning_rate": 4.96615760979382e-05, + "loss": 0.6375, + "step": 45 + }, + { + "epoch": 0.15488215488215487, + "grad_norm": 1.4541923999786377, + "learning_rate": 4.9611633196539584e-05, + "loss": 0.5875, + "step": 46 + }, + { + "epoch": 0.15824915824915825, + "grad_norm": 1.8045262098312378, + "learning_rate": 4.955828297730949e-05, + "loss": 0.673, + "step": 47 + }, + { + "epoch": 0.16161616161616163, + "grad_norm": 2.0676040649414062, + "learning_rate": 4.950153282623289e-05, + "loss": 0.6862, + "step": 48 + }, + { + "epoch": 0.16498316498316498, + "grad_norm": 1.5156350135803223, + "learning_rate": 4.9441390599992864e-05, + "loss": 0.8607, + "step": 49 + }, + { + "epoch": 0.16835016835016836, + "grad_norm": 1.9884543418884277, + "learning_rate": 4.937786462488284e-05, + "loss": 0.7067, + "step": 50 + }, + { + "epoch": 0.1717171717171717, + "grad_norm": 1.5458931922912598, + "learning_rate": 4.93109636956539e-05, + "loss": 0.6235, + "step": 51 + }, + { + "epoch": 0.1750841750841751, + "grad_norm": 0.8461848497390747, + "learning_rate": 4.9240697074297206e-05, + "loss": 0.2127, + "step": 52 + }, + { + "epoch": 0.17845117845117844, + "grad_norm": 1.0691157579421997, + "learning_rate": 4.9167074488761735e-05, + "loss": 0.4627, + "step": 53 + }, + { + "epoch": 0.18181818181818182, + "grad_norm": 1.4457998275756836, + "learning_rate": 4.90901061316075e-05, + "loss": 0.4431, + "step": 54 + }, + { + "epoch": 0.18518518518518517, + "grad_norm": 1.366044282913208, + "learning_rate": 4.900980265859448e-05, + "loss": 0.7347, + "step": 55 + }, + { + "epoch": 0.18855218855218855, + "grad_norm": 3.7170326709747314, + "learning_rate": 4.892617518720737e-05, + "loss": 1.0967, + "step": 56 + }, + { + "epoch": 0.1919191919191919, + "grad_norm": 1.113218069076538, + "learning_rate": 4.883923529511646e-05, + "loss": 0.2685, + "step": 57 + }, + { + "epoch": 0.19528619528619529, + "grad_norm": 1.4505512714385986, + "learning_rate": 4.874899501857477e-05, + "loss": 0.3403, + "step": 58 + }, + { + "epoch": 0.19865319865319866, + "grad_norm": 1.139944314956665, + "learning_rate": 4.865546685075174e-05, + "loss": 0.6103, + "step": 59 + }, + { + "epoch": 0.20202020202020202, + "grad_norm": 1.6062753200531006, + "learning_rate": 4.85586637400036e-05, + "loss": 0.5342, + "step": 60 + }, + { + "epoch": 0.2053872053872054, + "grad_norm": 1.4414268732070923, + "learning_rate": 4.8458599088080735e-05, + "loss": 0.5646, + "step": 61 + }, + { + "epoch": 0.20875420875420875, + "grad_norm": 1.5661225318908691, + "learning_rate": 4.83552867482724e-05, + "loss": 0.4134, + "step": 62 + }, + { + "epoch": 0.21212121212121213, + "grad_norm": 1.6068329811096191, + "learning_rate": 4.82487410234887e-05, + "loss": 0.3138, + "step": 63 + }, + { + "epoch": 0.21548821548821548, + "grad_norm": 1.4548122882843018, + "learning_rate": 4.8138976664280536e-05, + "loss": 0.2701, + "step": 64 + }, + { + "epoch": 0.21885521885521886, + "grad_norm": 1.57492196559906, + "learning_rate": 4.8026008866797423e-05, + "loss": 0.753, + "step": 65 + }, + { + "epoch": 0.2222222222222222, + "grad_norm": 2.414822578430176, + "learning_rate": 4.7909853270683756e-05, + "loss": 0.5117, + "step": 66 + }, + { + "epoch": 0.2255892255892256, + "grad_norm": 3.1574034690856934, + "learning_rate": 4.779052595691355e-05, + "loss": 0.5563, + "step": 67 + }, + { + "epoch": 0.22895622895622897, + "grad_norm": 1.9086557626724243, + "learning_rate": 4.7668043445564134e-05, + "loss": 0.3878, + "step": 68 + }, + { + "epoch": 0.23232323232323232, + "grad_norm": 2.0000898838043213, + "learning_rate": 4.754242269352912e-05, + "loss": 0.4596, + "step": 69 + }, + { + "epoch": 0.2356902356902357, + "grad_norm": 2.8848049640655518, + "learning_rate": 4.7413681092170715e-05, + "loss": 0.2728, + "step": 70 + }, + { + "epoch": 0.23905723905723905, + "grad_norm": 2.3199172019958496, + "learning_rate": 4.728183646491214e-05, + "loss": 0.3302, + "step": 71 + }, + { + "epoch": 0.24242424242424243, + "grad_norm": 3.39791202545166, + "learning_rate": 4.7146907064769994e-05, + "loss": 0.4134, + "step": 72 + }, + { + "epoch": 0.24579124579124578, + "grad_norm": 1.6479833126068115, + "learning_rate": 4.700891157182729e-05, + "loss": 0.2988, + "step": 73 + }, + { + "epoch": 0.24915824915824916, + "grad_norm": 3.5772407054901123, + "learning_rate": 4.686786909064729e-05, + "loss": 0.3749, + "step": 74 + }, + { + "epoch": 0.25252525252525254, + "grad_norm": 3.388200283050537, + "learning_rate": 4.6723799147628666e-05, + "loss": 0.6271, + "step": 75 + }, + { + "epoch": 0.2558922558922559, + "grad_norm": 1.0087072849273682, + "learning_rate": 4.6576721688302105e-05, + "loss": 0.123, + "step": 76 + }, + { + "epoch": 0.25925925925925924, + "grad_norm": 3.9229109287261963, + "learning_rate": 4.642665707456908e-05, + "loss": 0.343, + "step": 77 + }, + { + "epoch": 0.26262626262626265, + "grad_norm": 4.32628870010376, + "learning_rate": 4.6273626081882805e-05, + "loss": 1.5331, + "step": 78 + }, + { + "epoch": 0.265993265993266, + "grad_norm": 3.5192296504974365, + "learning_rate": 4.611764989637205e-05, + "loss": 0.341, + "step": 79 + }, + { + "epoch": 0.26936026936026936, + "grad_norm": 3.2966415882110596, + "learning_rate": 4.595875011190807e-05, + "loss": 0.3844, + "step": 80 + }, + { + "epoch": 0.2727272727272727, + "grad_norm": 2.390501022338867, + "learning_rate": 4.579694872711501e-05, + "loss": 0.5238, + "step": 81 + }, + { + "epoch": 0.2760942760942761, + "grad_norm": 1.8676153421401978, + "learning_rate": 4.563226814232444e-05, + "loss": 0.198, + "step": 82 + }, + { + "epoch": 0.27946127946127947, + "grad_norm": 0.5234068632125854, + "learning_rate": 4.5464731156474094e-05, + "loss": 0.0719, + "step": 83 + }, + { + "epoch": 0.2828282828282828, + "grad_norm": 3.9550747871398926, + "learning_rate": 4.529436096395156e-05, + "loss": 0.5313, + "step": 84 + }, + { + "epoch": 0.28619528619528617, + "grad_norm": 2.396087646484375, + "learning_rate": 4.5121181151383143e-05, + "loss": 0.2908, + "step": 85 + }, + { + "epoch": 0.2895622895622896, + "grad_norm": 2.293602466583252, + "learning_rate": 4.494521569436845e-05, + "loss": 0.2391, + "step": 86 + }, + { + "epoch": 0.29292929292929293, + "grad_norm": 3.605764389038086, + "learning_rate": 4.4766488954161154e-05, + "loss": 0.4852, + "step": 87 + }, + { + "epoch": 0.2962962962962963, + "grad_norm": 0.8647552132606506, + "learning_rate": 4.4585025674296315e-05, + "loss": 0.0948, + "step": 88 + }, + { + "epoch": 0.2996632996632997, + "grad_norm": 2.5593700408935547, + "learning_rate": 4.44008509771648e-05, + "loss": 0.2254, + "step": 89 + }, + { + "epoch": 0.30303030303030304, + "grad_norm": 3.6353094577789307, + "learning_rate": 4.421399036053527e-05, + "loss": 0.3688, + "step": 90 + }, + { + "epoch": 0.3063973063973064, + "grad_norm": 3.3246469497680664, + "learning_rate": 4.40244696940242e-05, + "loss": 0.5953, + "step": 91 + }, + { + "epoch": 0.30976430976430974, + "grad_norm": 3.4139394760131836, + "learning_rate": 4.383231521551432e-05, + "loss": 0.1474, + "step": 92 + }, + { + "epoch": 0.31313131313131315, + "grad_norm": 2.9134109020233154, + "learning_rate": 4.363755352752227e-05, + "loss": 0.4875, + "step": 93 + }, + { + "epoch": 0.3164983164983165, + "grad_norm": 9.96104621887207, + "learning_rate": 4.3440211593515554e-05, + "loss": 0.7513, + "step": 94 + }, + { + "epoch": 0.31986531986531985, + "grad_norm": 4.5680389404296875, + "learning_rate": 4.324031673417971e-05, + "loss": 0.6839, + "step": 95 + }, + { + "epoch": 0.32323232323232326, + "grad_norm": 0.8624576926231384, + "learning_rate": 4.3037896623635874e-05, + "loss": 0.0937, + "step": 96 + }, + { + "epoch": 0.3265993265993266, + "grad_norm": 2.9045605659484863, + "learning_rate": 4.283297928560951e-05, + "loss": 0.4072, + "step": 97 + }, + { + "epoch": 0.32996632996632996, + "grad_norm": 3.564232110977173, + "learning_rate": 4.262559308955072e-05, + "loss": 0.6221, + "step": 98 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 4.791253089904785, + "learning_rate": 4.2415766746706674e-05, + "loss": 0.366, + "step": 99 + }, + { + "epoch": 0.3367003367003367, + "grad_norm": 3.8587965965270996, + "learning_rate": 4.220352930614672e-05, + "loss": 0.2871, + "step": 100 + }, + { + "epoch": 0.3367003367003367, + "eval_loss": 0.3212782144546509, + "eval_runtime": 85.783, + "eval_samples_per_second": 3.497, + "eval_steps_per_second": 1.749, + "step": 100 + }, + { + "epoch": 0.3400673400673401, + "grad_norm": 2.2887542247772217, + "learning_rate": 4.1988910150740736e-05, + "loss": 0.3171, + "step": 101 + }, + { + "epoch": 0.3434343434343434, + "grad_norm": 3.5576376914978027, + "learning_rate": 4.1771938993091266e-05, + "loss": 0.354, + "step": 102 + }, + { + "epoch": 0.3468013468013468, + "grad_norm": 6.742368221282959, + "learning_rate": 4.155264587142002e-05, + "loss": 0.4692, + "step": 103 + }, + { + "epoch": 0.3501683501683502, + "grad_norm": 5.838032245635986, + "learning_rate": 4.133106114540923e-05, + "loss": 0.4455, + "step": 104 + }, + { + "epoch": 0.35353535353535354, + "grad_norm": 8.406342506408691, + "learning_rate": 4.110721549199866e-05, + "loss": 0.8816, + "step": 105 + }, + { + "epoch": 0.3569023569023569, + "grad_norm": 10.105664253234863, + "learning_rate": 4.088113990113847e-05, + "loss": 0.6247, + "step": 106 + }, + { + "epoch": 0.3602693602693603, + "grad_norm": 8.388754844665527, + "learning_rate": 4.065286567149891e-05, + "loss": 0.3745, + "step": 107 + }, + { + "epoch": 0.36363636363636365, + "grad_norm": 3.6470038890838623, + "learning_rate": 4.042242440613724e-05, + "loss": 0.3857, + "step": 108 + }, + { + "epoch": 0.367003367003367, + "grad_norm": 2.3660080432891846, + "learning_rate": 4.0189848008122474e-05, + "loss": 0.2054, + "step": 109 + }, + { + "epoch": 0.37037037037037035, + "grad_norm": 2.4231250286102295, + "learning_rate": 3.9955168676118645e-05, + "loss": 0.6018, + "step": 110 + }, + { + "epoch": 0.37373737373737376, + "grad_norm": 2.8709912300109863, + "learning_rate": 3.971841889992706e-05, + "loss": 0.4649, + "step": 111 + }, + { + "epoch": 0.3771043771043771, + "grad_norm": 0.5076086521148682, + "learning_rate": 3.9479631455988334e-05, + "loss": 0.0586, + "step": 112 + }, + { + "epoch": 0.38047138047138046, + "grad_norm": 3.4678847789764404, + "learning_rate": 3.9238839402844724e-05, + "loss": 0.5776, + "step": 113 + }, + { + "epoch": 0.3838383838383838, + "grad_norm": 0.7102794647216797, + "learning_rate": 3.8996076076563334e-05, + "loss": 0.0743, + "step": 114 + }, + { + "epoch": 0.3872053872053872, + "grad_norm": 1.857340693473816, + "learning_rate": 3.875137508612103e-05, + "loss": 0.1731, + "step": 115 + }, + { + "epoch": 0.39057239057239057, + "grad_norm": 1.7878767251968384, + "learning_rate": 3.850477030875147e-05, + "loss": 0.1957, + "step": 116 + }, + { + "epoch": 0.3939393939393939, + "grad_norm": 3.4582345485687256, + "learning_rate": 3.825629588525498e-05, + "loss": 0.254, + "step": 117 + }, + { + "epoch": 0.39730639730639733, + "grad_norm": 3.044245481491089, + "learning_rate": 3.800598621527205e-05, + "loss": 0.3621, + "step": 118 + }, + { + "epoch": 0.4006734006734007, + "grad_norm": 2.234395742416382, + "learning_rate": 3.775387595252094e-05, + "loss": 0.5613, + "step": 119 + }, + { + "epoch": 0.40404040404040403, + "grad_norm": 2.256892442703247, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.6154, + "step": 120 + }, + { + "epoch": 0.4074074074074074, + "grad_norm": 1.4057880640029907, + "learning_rate": 3.724439350515571e-05, + "loss": 0.1662, + "step": 121 + }, + { + "epoch": 0.4107744107744108, + "grad_norm": 3.190229654312134, + "learning_rate": 3.6987091855016665e-05, + "loss": 0.2788, + "step": 122 + }, + { + "epoch": 0.41414141414141414, + "grad_norm": 2.6102352142333984, + "learning_rate": 3.672813067129449e-05, + "loss": 0.2033, + "step": 123 + }, + { + "epoch": 0.4175084175084175, + "grad_norm": 5.529795169830322, + "learning_rate": 3.646754580545226e-05, + "loss": 0.8619, + "step": 124 + }, + { + "epoch": 0.4208754208754209, + "grad_norm": 1.412796974182129, + "learning_rate": 3.6205373333741136e-05, + "loss": 0.0854, + "step": 125 + }, + { + "epoch": 0.42424242424242425, + "grad_norm": 1.929734706878662, + "learning_rate": 3.594164955220577e-05, + "loss": 0.5321, + "step": 126 + }, + { + "epoch": 0.4276094276094276, + "grad_norm": 2.2332234382629395, + "learning_rate": 3.56764109716594e-05, + "loss": 0.734, + "step": 127 + }, + { + "epoch": 0.43097643097643096, + "grad_norm": 2.0864946842193604, + "learning_rate": 3.540969431262919e-05, + "loss": 0.4026, + "step": 128 + }, + { + "epoch": 0.43434343434343436, + "grad_norm": 0.3298185467720032, + "learning_rate": 3.514153650027249e-05, + "loss": 0.0498, + "step": 129 + }, + { + "epoch": 0.4377104377104377, + "grad_norm": 1.2375303506851196, + "learning_rate": 3.487197465926478e-05, + "loss": 0.0613, + "step": 130 + }, + { + "epoch": 0.44107744107744107, + "grad_norm": 0.7785205245018005, + "learning_rate": 3.460104610866003e-05, + "loss": 0.1058, + "step": 131 + }, + { + "epoch": 0.4444444444444444, + "grad_norm": 2.9774363040924072, + "learning_rate": 3.4328788356724134e-05, + "loss": 0.2498, + "step": 132 + }, + { + "epoch": 0.4478114478114478, + "grad_norm": 1.7193303108215332, + "learning_rate": 3.4055239095742064e-05, + "loss": 0.1332, + "step": 133 + }, + { + "epoch": 0.4511784511784512, + "grad_norm": 3.84416127204895, + "learning_rate": 3.3780436196799734e-05, + "loss": 0.6439, + "step": 134 + }, + { + "epoch": 0.45454545454545453, + "grad_norm": 2.0642948150634766, + "learning_rate": 3.350441770454092e-05, + "loss": 0.4133, + "step": 135 + }, + { + "epoch": 0.45791245791245794, + "grad_norm": 1.8204262256622314, + "learning_rate": 3.322722183190025e-05, + "loss": 0.518, + "step": 136 + }, + { + "epoch": 0.4612794612794613, + "grad_norm": 10.550874710083008, + "learning_rate": 3.2948886954812876e-05, + "loss": 1.2434, + "step": 137 + }, + { + "epoch": 0.46464646464646464, + "grad_norm": 2.6596121788024902, + "learning_rate": 3.2669451606901596e-05, + "loss": 0.3132, + "step": 138 + }, + { + "epoch": 0.468013468013468, + "grad_norm": 1.824311375617981, + "learning_rate": 3.238895447414211e-05, + "loss": 0.4487, + "step": 139 + }, + { + "epoch": 0.4713804713804714, + "grad_norm": 4.347046852111816, + "learning_rate": 3.210743438950718e-05, + "loss": 0.4392, + "step": 140 + }, + { + "epoch": 0.47474747474747475, + "grad_norm": 0.49011513590812683, + "learning_rate": 3.182493032759053e-05, + "loss": 0.056, + "step": 141 + }, + { + "epoch": 0.4781144781144781, + "grad_norm": 1.8293064832687378, + "learning_rate": 3.154148139921102e-05, + "loss": 0.2903, + "step": 142 + }, + { + "epoch": 0.48148148148148145, + "grad_norm": 2.0688326358795166, + "learning_rate": 3.1257126845997995e-05, + "loss": 0.4253, + "step": 143 + }, + { + "epoch": 0.48484848484848486, + "grad_norm": 0.9706846475601196, + "learning_rate": 3.097190603495861e-05, + "loss": 0.1477, + "step": 144 + }, + { + "epoch": 0.4882154882154882, + "grad_norm": 1.5309983491897583, + "learning_rate": 3.0685858453027666e-05, + "loss": 0.4342, + "step": 145 + }, + { + "epoch": 0.49158249158249157, + "grad_norm": 0.9750226736068726, + "learning_rate": 3.0399023701600905e-05, + "loss": 0.1603, + "step": 146 + }, + { + "epoch": 0.494949494949495, + "grad_norm": 1.64618980884552, + "learning_rate": 3.0111441491052504e-05, + "loss": 0.2784, + "step": 147 + }, + { + "epoch": 0.4983164983164983, + "grad_norm": 1.9535808563232422, + "learning_rate": 2.9823151635237423e-05, + "loss": 0.2996, + "step": 148 + }, + { + "epoch": 0.5016835016835017, + "grad_norm": 3.526555061340332, + "learning_rate": 2.9534194045979397e-05, + "loss": 1.5967, + "step": 149 + }, + { + "epoch": 0.5050505050505051, + "grad_norm": 0.24972084164619446, + "learning_rate": 2.924460872754547e-05, + "loss": 0.033, + "step": 150 + }, + { + "epoch": 0.5084175084175084, + "grad_norm": 2.380751609802246, + "learning_rate": 2.89544357711076e-05, + "loss": 0.2955, + "step": 151 + }, + { + "epoch": 0.5117845117845118, + "grad_norm": 1.212270736694336, + "learning_rate": 2.8663715349192388e-05, + "loss": 0.1169, + "step": 152 + }, + { + "epoch": 0.5151515151515151, + "grad_norm": 0.3227732479572296, + "learning_rate": 2.8372487710119373e-05, + "loss": 0.0191, + "step": 153 + }, + { + "epoch": 0.5185185185185185, + "grad_norm": 1.4398654699325562, + "learning_rate": 2.8080793172428964e-05, + "loss": 0.1793, + "step": 154 + }, + { + "epoch": 0.5218855218855218, + "grad_norm": 4.784367084503174, + "learning_rate": 2.7788672119300613e-05, + "loss": 0.3231, + "step": 155 + }, + { + "epoch": 0.5252525252525253, + "grad_norm": 0.6798835396766663, + "learning_rate": 2.7496164992961993e-05, + "loss": 0.0775, + "step": 156 + }, + { + "epoch": 0.5286195286195287, + "grad_norm": 3.9313294887542725, + "learning_rate": 2.7203312289090048e-05, + "loss": 0.2993, + "step": 157 + }, + { + "epoch": 0.531986531986532, + "grad_norm": 2.1782889366149902, + "learning_rate": 2.691015455120468e-05, + "loss": 0.1773, + "step": 158 + }, + { + "epoch": 0.5353535353535354, + "grad_norm": 3.120492696762085, + "learning_rate": 2.6616732365055713e-05, + "loss": 0.4819, + "step": 159 + }, + { + "epoch": 0.5387205387205387, + "grad_norm": 1.880778193473816, + "learning_rate": 2.6323086353004078e-05, + "loss": 0.1744, + "step": 160 + }, + { + "epoch": 0.5420875420875421, + "grad_norm": 1.3111162185668945, + "learning_rate": 2.6029257168397947e-05, + "loss": 0.1357, + "step": 161 + }, + { + "epoch": 0.5454545454545454, + "grad_norm": 2.0314598083496094, + "learning_rate": 2.5735285489944487e-05, + "loss": 0.4442, + "step": 162 + }, + { + "epoch": 0.5488215488215489, + "grad_norm": 2.741291046142578, + "learning_rate": 2.544121201607822e-05, + "loss": 0.7485, + "step": 163 + }, + { + "epoch": 0.5521885521885522, + "grad_norm": 0.8405356407165527, + "learning_rate": 2.5147077459326556e-05, + "loss": 0.0897, + "step": 164 + }, + { + "epoch": 0.5555555555555556, + "grad_norm": 0.9963136315345764, + "learning_rate": 2.4852922540673447e-05, + "loss": 0.1057, + "step": 165 + }, + { + "epoch": 0.5589225589225589, + "grad_norm": 4.5743584632873535, + "learning_rate": 2.4558787983921787e-05, + "loss": 1.1172, + "step": 166 + }, + { + "epoch": 0.5622895622895623, + "grad_norm": 0.49469229578971863, + "learning_rate": 2.4264714510055516e-05, + "loss": 0.0507, + "step": 167 + }, + { + "epoch": 0.5656565656565656, + "grad_norm": 5.110591411590576, + "learning_rate": 2.3970742831602062e-05, + "loss": 0.6335, + "step": 168 + }, + { + "epoch": 0.569023569023569, + "grad_norm": 1.0456212759017944, + "learning_rate": 2.367691364699592e-05, + "loss": 0.1245, + "step": 169 + }, + { + "epoch": 0.5723905723905723, + "grad_norm": 3.8197057247161865, + "learning_rate": 2.338326763494429e-05, + "loss": 0.5504, + "step": 170 + }, + { + "epoch": 0.5757575757575758, + "grad_norm": 0.5079836249351501, + "learning_rate": 2.308984544879533e-05, + "loss": 0.0567, + "step": 171 + }, + { + "epoch": 0.5791245791245792, + "grad_norm": 1.7494263648986816, + "learning_rate": 2.2796687710909964e-05, + "loss": 0.1202, + "step": 172 + }, + { + "epoch": 0.5824915824915825, + "grad_norm": 1.3403123617172241, + "learning_rate": 2.2503835007038023e-05, + "loss": 0.1395, + "step": 173 + }, + { + "epoch": 0.5858585858585859, + "grad_norm": 4.487949848175049, + "learning_rate": 2.221132788069939e-05, + "loss": 0.2783, + "step": 174 + }, + { + "epoch": 0.5892255892255892, + "grad_norm": 0.7322219610214233, + "learning_rate": 2.191920682757104e-05, + "loss": 0.0957, + "step": 175 + }, + { + "epoch": 0.5925925925925926, + "grad_norm": 10.600361824035645, + "learning_rate": 2.162751228988063e-05, + "loss": 0.7983, + "step": 176 + }, + { + "epoch": 0.5959595959595959, + "grad_norm": 1.510453462600708, + "learning_rate": 2.1336284650807615e-05, + "loss": 0.1831, + "step": 177 + }, + { + "epoch": 0.5993265993265994, + "grad_norm": 0.6158142685890198, + "learning_rate": 2.1045564228892404e-05, + "loss": 0.0762, + "step": 178 + }, + { + "epoch": 0.6026936026936027, + "grad_norm": 1.5894421339035034, + "learning_rate": 2.0755391272454537e-05, + "loss": 0.1448, + "step": 179 + }, + { + "epoch": 0.6060606060606061, + "grad_norm": 2.298612594604492, + "learning_rate": 2.04658059540206e-05, + "loss": 0.1969, + "step": 180 + }, + { + "epoch": 0.6094276094276094, + "grad_norm": 3.3239121437072754, + "learning_rate": 2.017684836476258e-05, + "loss": 0.4061, + "step": 181 + }, + { + "epoch": 0.6127946127946128, + "grad_norm": 1.9558825492858887, + "learning_rate": 1.9888558508947495e-05, + "loss": 0.5793, + "step": 182 + }, + { + "epoch": 0.6161616161616161, + "grad_norm": 0.9651542901992798, + "learning_rate": 1.960097629839911e-05, + "loss": 0.1177, + "step": 183 + }, + { + "epoch": 0.6195286195286195, + "grad_norm": 2.647695541381836, + "learning_rate": 1.9314141546972343e-05, + "loss": 0.5155, + "step": 184 + }, + { + "epoch": 0.622895622895623, + "grad_norm": 2.941695213317871, + "learning_rate": 1.9028093965041394e-05, + "loss": 0.2956, + "step": 185 + }, + { + "epoch": 0.6262626262626263, + "grad_norm": 1.9183754920959473, + "learning_rate": 1.8742873154002004e-05, + "loss": 0.2758, + "step": 186 + }, + { + "epoch": 0.6296296296296297, + "grad_norm": 0.28313329815864563, + "learning_rate": 1.845851860078899e-05, + "loss": 0.0181, + "step": 187 + }, + { + "epoch": 0.632996632996633, + "grad_norm": 1.3280785083770752, + "learning_rate": 1.8175069672409475e-05, + "loss": 0.0636, + "step": 188 + }, + { + "epoch": 0.6363636363636364, + "grad_norm": 5.026294231414795, + "learning_rate": 1.789256561049283e-05, + "loss": 0.3681, + "step": 189 + }, + { + "epoch": 0.6397306397306397, + "grad_norm": 3.3546791076660156, + "learning_rate": 1.76110455258579e-05, + "loss": 0.2501, + "step": 190 + }, + { + "epoch": 0.6430976430976431, + "grad_norm": 3.2179019451141357, + "learning_rate": 1.7330548393098407e-05, + "loss": 0.3801, + "step": 191 + }, + { + "epoch": 0.6464646464646465, + "grad_norm": 2.4074902534484863, + "learning_rate": 1.7051113045187123e-05, + "loss": 0.4583, + "step": 192 + }, + { + "epoch": 0.6498316498316499, + "grad_norm": 0.6032189130783081, + "learning_rate": 1.677277816809975e-05, + "loss": 0.0672, + "step": 193 + }, + { + "epoch": 0.6531986531986532, + "grad_norm": 0.8865869045257568, + "learning_rate": 1.649558229545908e-05, + "loss": 0.084, + "step": 194 + }, + { + "epoch": 0.6565656565656566, + "grad_norm": 5.032468318939209, + "learning_rate": 1.621956380320027e-05, + "loss": 0.24, + "step": 195 + }, + { + "epoch": 0.6599326599326599, + "grad_norm": 2.6365160942077637, + "learning_rate": 1.5944760904257945e-05, + "loss": 0.1649, + "step": 196 + }, + { + "epoch": 0.6632996632996633, + "grad_norm": 1.2071051597595215, + "learning_rate": 1.5671211643275875e-05, + "loss": 0.1186, + "step": 197 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.4904046952724457, + "learning_rate": 1.539895389133997e-05, + "loss": 0.0474, + "step": 198 + }, + { + "epoch": 0.67003367003367, + "grad_norm": 2.9437437057495117, + "learning_rate": 1.5128025340735222e-05, + "loss": 0.5749, + "step": 199 + }, + { + "epoch": 0.6734006734006734, + "grad_norm": 2.5604848861694336, + "learning_rate": 1.485846349972751e-05, + "loss": 0.1839, + "step": 200 + }, + { + "epoch": 0.6734006734006734, + "eval_loss": 0.3746274411678314, + "eval_runtime": 85.7661, + "eval_samples_per_second": 3.498, + "eval_steps_per_second": 1.749, + "step": 200 + }, + { + "epoch": 0.6767676767676768, + "grad_norm": 5.570250511169434, + "learning_rate": 1.4590305687370811e-05, + "loss": 1.0595, + "step": 201 + }, + { + "epoch": 0.6801346801346801, + "grad_norm": 3.682399272918701, + "learning_rate": 1.4323589028340597e-05, + "loss": 0.2895, + "step": 202 + }, + { + "epoch": 0.6835016835016835, + "grad_norm": 1.855084776878357, + "learning_rate": 1.4058350447794236e-05, + "loss": 0.1403, + "step": 203 + }, + { + "epoch": 0.6868686868686869, + "grad_norm": 0.9726030826568604, + "learning_rate": 1.3794626666258867e-05, + "loss": 0.0708, + "step": 204 + }, + { + "epoch": 0.6902356902356902, + "grad_norm": 5.807008743286133, + "learning_rate": 1.3532454194547734e-05, + "loss": 0.9473, + "step": 205 + }, + { + "epoch": 0.6936026936026936, + "grad_norm": 0.9416353106498718, + "learning_rate": 1.3271869328705517e-05, + "loss": 0.0985, + "step": 206 + }, + { + "epoch": 0.696969696969697, + "grad_norm": 2.455057144165039, + "learning_rate": 1.3012908144983352e-05, + "loss": 0.1944, + "step": 207 + }, + { + "epoch": 0.7003367003367004, + "grad_norm": 5.791993141174316, + "learning_rate": 1.2755606494844293e-05, + "loss": 0.4686, + "step": 208 + }, + { + "epoch": 0.7037037037037037, + "grad_norm": 6.4257354736328125, + "learning_rate": 1.2500000000000006e-05, + "loss": 0.6278, + "step": 209 + }, + { + "epoch": 0.7070707070707071, + "grad_norm": 2.113513231277466, + "learning_rate": 1.2246124047479073e-05, + "loss": 0.1345, + "step": 210 + }, + { + "epoch": 0.7104377104377104, + "grad_norm": 2.2445285320281982, + "learning_rate": 1.1994013784727948e-05, + "loss": 0.0945, + "step": 211 + }, + { + "epoch": 0.7138047138047138, + "grad_norm": 2.6326494216918945, + "learning_rate": 1.174370411474503e-05, + "loss": 0.1187, + "step": 212 + }, + { + "epoch": 0.7171717171717171, + "grad_norm": 3.0307300090789795, + "learning_rate": 1.1495229691248544e-05, + "loss": 0.196, + "step": 213 + }, + { + "epoch": 0.7205387205387206, + "grad_norm": 6.212864398956299, + "learning_rate": 1.1248624913878966e-05, + "loss": 0.7416, + "step": 214 + }, + { + "epoch": 0.7239057239057239, + "grad_norm": 0.9370779991149902, + "learning_rate": 1.100392392343667e-05, + "loss": 0.0849, + "step": 215 + }, + { + "epoch": 0.7272727272727273, + "grad_norm": 4.128119945526123, + "learning_rate": 1.0761160597155287e-05, + "loss": 0.3855, + "step": 216 + }, + { + "epoch": 0.7306397306397306, + "grad_norm": 1.274412751197815, + "learning_rate": 1.0520368544011661e-05, + "loss": 0.1368, + "step": 217 + }, + { + "epoch": 0.734006734006734, + "grad_norm": 3.2947192192077637, + "learning_rate": 1.028158110007294e-05, + "loss": 0.3072, + "step": 218 + }, + { + "epoch": 0.7373737373737373, + "grad_norm": 5.202561378479004, + "learning_rate": 1.0044831323881357e-05, + "loss": 0.9426, + "step": 219 + }, + { + "epoch": 0.7407407407407407, + "grad_norm": 3.739136219024658, + "learning_rate": 9.81015199187753e-06, + "loss": 0.2597, + "step": 220 + }, + { + "epoch": 0.7441077441077442, + "grad_norm": 1.199344515800476, + "learning_rate": 9.577575593862776e-06, + "loss": 0.0967, + "step": 221 + }, + { + "epoch": 0.7474747474747475, + "grad_norm": 4.694489479064941, + "learning_rate": 9.347134328501097e-06, + "loss": 0.3515, + "step": 222 + }, + { + "epoch": 0.7508417508417509, + "grad_norm": 1.2641700506210327, + "learning_rate": 9.118860098861537e-06, + "loss": 0.098, + "step": 223 + }, + { + "epoch": 0.7542087542087542, + "grad_norm": 0.531539261341095, + "learning_rate": 8.892784508001342e-06, + "loss": 0.0584, + "step": 224 + }, + { + "epoch": 0.7575757575757576, + "grad_norm": 0.9697759747505188, + "learning_rate": 8.668938854590763e-06, + "loss": 0.088, + "step": 225 + }, + { + "epoch": 0.7609427609427609, + "grad_norm": 0.25416943430900574, + "learning_rate": 8.44735412857999e-06, + "loss": 0.0255, + "step": 226 + }, + { + "epoch": 0.7643097643097643, + "grad_norm": 0.18322022259235382, + "learning_rate": 8.228061006908738e-06, + "loss": 0.0117, + "step": 227 + }, + { + "epoch": 0.7676767676767676, + "grad_norm": 3.150855302810669, + "learning_rate": 8.011089849259262e-06, + "loss": 0.1673, + "step": 228 + }, + { + "epoch": 0.7710437710437711, + "grad_norm": 7.939763069152832, + "learning_rate": 7.79647069385328e-06, + "loss": 0.4404, + "step": 229 + }, + { + "epoch": 0.7744107744107744, + "grad_norm": 3.137610673904419, + "learning_rate": 7.584233253293327e-06, + "loss": 0.2719, + "step": 230 + }, + { + "epoch": 0.7777777777777778, + "grad_norm": 0.15927521884441376, + "learning_rate": 7.374406910449277e-06, + "loss": 0.0173, + "step": 231 + }, + { + "epoch": 0.7811447811447811, + "grad_norm": 5.29977560043335, + "learning_rate": 7.167020714390501e-06, + "loss": 0.8334, + "step": 232 + }, + { + "epoch": 0.7845117845117845, + "grad_norm": 1.7137696743011475, + "learning_rate": 6.9621033763641405e-06, + "loss": 0.1309, + "step": 233 + }, + { + "epoch": 0.7878787878787878, + "grad_norm": 2.072476625442505, + "learning_rate": 6.759683265820293e-06, + "loss": 0.1964, + "step": 234 + }, + { + "epoch": 0.7912457912457912, + "grad_norm": 6.1229071617126465, + "learning_rate": 6.559788406484446e-06, + "loss": 0.773, + "step": 235 + }, + { + "epoch": 0.7946127946127947, + "grad_norm": 5.602017402648926, + "learning_rate": 6.36244647247774e-06, + "loss": 0.4135, + "step": 236 + }, + { + "epoch": 0.797979797979798, + "grad_norm": 2.6853528022766113, + "learning_rate": 6.16768478448568e-06, + "loss": 0.1808, + "step": 237 + }, + { + "epoch": 0.8013468013468014, + "grad_norm": 2.6617848873138428, + "learning_rate": 5.975530305975807e-06, + "loss": 0.4367, + "step": 238 + }, + { + "epoch": 0.8047138047138047, + "grad_norm": 11.069436073303223, + "learning_rate": 5.786009639464729e-06, + "loss": 1.0165, + "step": 239 + }, + { + "epoch": 0.8080808080808081, + "grad_norm": 3.2721519470214844, + "learning_rate": 5.599149022835201e-06, + "loss": 0.1506, + "step": 240 + }, + { + "epoch": 0.8114478114478114, + "grad_norm": 4.210508346557617, + "learning_rate": 5.414974325703687e-06, + "loss": 0.4722, + "step": 241 + }, + { + "epoch": 0.8148148148148148, + "grad_norm": 1.4586809873580933, + "learning_rate": 5.233511045838846e-06, + "loss": 0.1327, + "step": 242 + }, + { + "epoch": 0.8181818181818182, + "grad_norm": 3.937086582183838, + "learning_rate": 5.0547843056315465e-06, + "loss": 0.1234, + "step": 243 + }, + { + "epoch": 0.8215488215488216, + "grad_norm": 1.43894624710083, + "learning_rate": 4.8788188486168614e-06, + "loss": 0.1267, + "step": 244 + }, + { + "epoch": 0.8249158249158249, + "grad_norm": 6.41754674911499, + "learning_rate": 4.70563903604844e-06, + "loss": 0.2823, + "step": 245 + }, + { + "epoch": 0.8282828282828283, + "grad_norm": 0.990208089351654, + "learning_rate": 4.5352688435259085e-06, + "loss": 0.0861, + "step": 246 + }, + { + "epoch": 0.8316498316498316, + "grad_norm": 2.6447746753692627, + "learning_rate": 4.367731857675569e-06, + "loss": 0.2492, + "step": 247 + }, + { + "epoch": 0.835016835016835, + "grad_norm": 1.1281733512878418, + "learning_rate": 4.203051272884994e-06, + "loss": 0.0914, + "step": 248 + }, + { + "epoch": 0.8383838383838383, + "grad_norm": 4.460391521453857, + "learning_rate": 4.041249888091941e-06, + "loss": 1.0043, + "step": 249 + }, + { + "epoch": 0.8417508417508418, + "grad_norm": 5.020413398742676, + "learning_rate": 3.8823501036279515e-06, + "loss": 0.3346, + "step": 250 + }, + { + "epoch": 0.8451178451178452, + "grad_norm": 1.2148274183273315, + "learning_rate": 3.726373918117196e-06, + "loss": 0.1155, + "step": 251 + }, + { + "epoch": 0.8484848484848485, + "grad_norm": 7.232808589935303, + "learning_rate": 3.5733429254309254e-06, + "loss": 1.1907, + "step": 252 + }, + { + "epoch": 0.8518518518518519, + "grad_norm": 6.413627624511719, + "learning_rate": 3.4232783116978972e-06, + "loss": 0.8409, + "step": 253 + }, + { + "epoch": 0.8552188552188552, + "grad_norm": 6.728570938110352, + "learning_rate": 3.276200852371339e-06, + "loss": 1.2687, + "step": 254 + }, + { + "epoch": 0.8585858585858586, + "grad_norm": 1.2029856443405151, + "learning_rate": 3.1321309093527094e-06, + "loss": 0.0848, + "step": 255 + }, + { + "epoch": 0.8619528619528619, + "grad_norm": 1.358394980430603, + "learning_rate": 2.9910884281727223e-06, + "loss": 0.1076, + "step": 256 + }, + { + "epoch": 0.8653198653198653, + "grad_norm": 2.925739049911499, + "learning_rate": 2.853092935230009e-06, + "loss": 0.6163, + "step": 257 + }, + { + "epoch": 0.8686868686868687, + "grad_norm": 0.9604540467262268, + "learning_rate": 2.718163535087864e-06, + "loss": 0.1165, + "step": 258 + }, + { + "epoch": 0.8720538720538721, + "grad_norm": 6.656581401824951, + "learning_rate": 2.5863189078292908e-06, + "loss": 1.2572, + "step": 259 + }, + { + "epoch": 0.8754208754208754, + "grad_norm": 2.5408132076263428, + "learning_rate": 2.4575773064708902e-06, + "loss": 0.1135, + "step": 260 + }, + { + "epoch": 0.8787878787878788, + "grad_norm": 2.7175872325897217, + "learning_rate": 2.331956554435863e-06, + "loss": 0.4451, + "step": 261 + }, + { + "epoch": 0.8821548821548821, + "grad_norm": 5.1762847900390625, + "learning_rate": 2.209474043086457e-06, + "loss": 0.79, + "step": 262 + }, + { + "epoch": 0.8855218855218855, + "grad_norm": 0.2948024868965149, + "learning_rate": 2.0901467293162445e-06, + "loss": 0.0259, + "step": 263 + }, + { + "epoch": 0.8888888888888888, + "grad_norm": 3.9850306510925293, + "learning_rate": 1.9739911332025794e-06, + "loss": 0.2737, + "step": 264 + }, + { + "epoch": 0.8922558922558923, + "grad_norm": 3.173600435256958, + "learning_rate": 1.861023335719475e-06, + "loss": 0.6159, + "step": 265 + }, + { + "epoch": 0.8956228956228957, + "grad_norm": 1.6647801399230957, + "learning_rate": 1.7512589765112997e-06, + "loss": 0.1405, + "step": 266 + }, + { + "epoch": 0.898989898989899, + "grad_norm": 6.237017631530762, + "learning_rate": 1.6447132517276005e-06, + "loss": 0.3721, + "step": 267 + }, + { + "epoch": 0.9023569023569024, + "grad_norm": 2.961333990097046, + "learning_rate": 1.5414009119192635e-06, + "loss": 0.3865, + "step": 268 + }, + { + "epoch": 0.9057239057239057, + "grad_norm": 0.31503304839134216, + "learning_rate": 1.441336259996412e-06, + "loss": 0.0351, + "step": 269 + }, + { + "epoch": 0.9090909090909091, + "grad_norm": 0.06689069420099258, + "learning_rate": 1.3445331492482616e-06, + "loss": 0.0079, + "step": 270 + }, + { + "epoch": 0.9124579124579124, + "grad_norm": 0.2639155983924866, + "learning_rate": 1.25100498142523e-06, + "loss": 0.0287, + "step": 271 + }, + { + "epoch": 0.9158249158249159, + "grad_norm": 3.075798749923706, + "learning_rate": 1.1607647048835463e-06, + "loss": 0.8147, + "step": 272 + }, + { + "epoch": 0.9191919191919192, + "grad_norm": 1.5422130823135376, + "learning_rate": 1.0738248127926342e-06, + "loss": 0.1195, + "step": 273 + }, + { + "epoch": 0.9225589225589226, + "grad_norm": 2.370008707046509, + "learning_rate": 9.901973414055188e-07, + "loss": 0.3816, + "step": 274 + }, + { + "epoch": 0.9259259259259259, + "grad_norm": 1.1826757192611694, + "learning_rate": 9.098938683924973e-07, + "loss": 0.1087, + "step": 275 + }, + { + "epoch": 0.9292929292929293, + "grad_norm": 2.1818995475769043, + "learning_rate": 8.329255112382667e-07, + "loss": 0.1482, + "step": 276 + }, + { + "epoch": 0.9326599326599326, + "grad_norm": 2.872901439666748, + "learning_rate": 7.593029257027956e-07, + "loss": 0.1081, + "step": 277 + }, + { + "epoch": 0.936026936026936, + "grad_norm": 4.328135013580322, + "learning_rate": 6.890363043461051e-07, + "loss": 0.4199, + "step": 278 + }, + { + "epoch": 0.9393939393939394, + "grad_norm": 1.1321898698806763, + "learning_rate": 6.221353751171666e-07, + "loss": 0.1005, + "step": 279 + }, + { + "epoch": 0.9427609427609428, + "grad_norm": 2.311398983001709, + "learning_rate": 5.586094000071401e-07, + "loss": 0.1339, + "step": 280 + }, + { + "epoch": 0.9461279461279462, + "grad_norm": 5.379133701324463, + "learning_rate": 4.984671737671142e-07, + "loss": 0.788, + "step": 281 + }, + { + "epoch": 0.9494949494949495, + "grad_norm": 3.393548011779785, + "learning_rate": 4.4171702269051874e-07, + "loss": 0.1825, + "step": 282 + }, + { + "epoch": 0.9528619528619529, + "grad_norm": 1.8731484413146973, + "learning_rate": 3.88366803460416e-07, + "loss": 0.1172, + "step": 283 + }, + { + "epoch": 0.9562289562289562, + "grad_norm": 1.6887699365615845, + "learning_rate": 3.3842390206180186e-07, + "loss": 0.1207, + "step": 284 + }, + { + "epoch": 0.9595959595959596, + "grad_norm": 0.7890358567237854, + "learning_rate": 2.918952327590374e-07, + "loss": 0.08, + "step": 285 + }, + { + "epoch": 0.9629629629629629, + "grad_norm": 3.858976125717163, + "learning_rate": 2.487872371386424e-07, + "loss": 0.25, + "step": 286 + }, + { + "epoch": 0.9663299663299664, + "grad_norm": 5.475775718688965, + "learning_rate": 2.0910588321748915e-07, + "loss": 0.4324, + "step": 287 + }, + { + "epoch": 0.9696969696969697, + "grad_norm": 0.822411298751831, + "learning_rate": 1.7285666461657468e-07, + "loss": 0.0659, + "step": 288 + }, + { + "epoch": 0.9730639730639731, + "grad_norm": 3.5847160816192627, + "learning_rate": 1.4004459980045125e-07, + "loss": 0.2958, + "step": 289 + }, + { + "epoch": 0.9764309764309764, + "grad_norm": 3.371807813644409, + "learning_rate": 1.1067423138247101e-07, + "loss": 0.3059, + "step": 290 + }, + { + "epoch": 0.9797979797979798, + "grad_norm": 0.7251403331756592, + "learning_rate": 8.47496254958835e-08, + "loss": 0.0638, + "step": 291 + }, + { + "epoch": 0.9831649831649831, + "grad_norm": 1.3507038354873657, + "learning_rate": 6.22743712309054e-08, + "loss": 0.0625, + "step": 292 + }, + { + "epoch": 0.9865319865319865, + "grad_norm": 5.81395149230957, + "learning_rate": 4.325158013783193e-08, + "loss": 0.4981, + "step": 293 + }, + { + "epoch": 0.98989898989899, + "grad_norm": 1.9025434255599976, + "learning_rate": 2.7683885796273014e-08, + "loss": 0.1846, + "step": 294 + }, + { + "epoch": 0.9932659932659933, + "grad_norm": 3.340346336364746, + "learning_rate": 1.5573443450545013e-08, + "loss": 0.9588, + "step": 295 + }, + { + "epoch": 0.9966329966329966, + "grad_norm": 1.9698375463485718, + "learning_rate": 6.921929711287134e-09, + "loss": 0.1476, + "step": 296 + }, + { + "epoch": 1.0, + "grad_norm": 5.463763236999512, + "learning_rate": 1.7305423233554552e-09, + "loss": 0.2402, + "step": 297 + } + ], + "logging_steps": 1, + "max_steps": 297, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 99, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2.04984183850795e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/exp4_subtle_signal_c_189/checkpoint-297/training_args.bin b/exp4_subtle_signal_c_189/checkpoint-297/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a1b630c5deef60e645fa61a43f2629aef0b901e3 --- /dev/null +++ b/exp4_subtle_signal_c_189/checkpoint-297/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c6de1304e5f14b655e49466bbc76c3f69efc17feaf36f1722623c6a0ae37e58 +size 6033 diff --git a/exp4_subtle_signal_c_189/checkpoint-99/README.md b/exp4_subtle_signal_c_189/checkpoint-99/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp4_subtle_signal_c_189/checkpoint-99/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp4_subtle_signal_c_189/checkpoint-99/adapter_config.json b/exp4_subtle_signal_c_189/checkpoint-99/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b4d747200220490dff0f82aa49e63f702a859ccc --- /dev/null +++ b/exp4_subtle_signal_c_189/checkpoint-99/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "q_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp4_subtle_signal_c_189/checkpoint-99/adapter_model.safetensors b/exp4_subtle_signal_c_189/checkpoint-99/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3d73642a4f2fcc87f9cf94618907637cc25c40a1 --- /dev/null +++ b/exp4_subtle_signal_c_189/checkpoint-99/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f07e9701461de852235ab1a52ecf8deaf6f402cd3ba85c5e1bd8d7b392352614 +size 201378736 diff --git a/exp4_subtle_signal_c_189/checkpoint-99/optimizer.pt b/exp4_subtle_signal_c_189/checkpoint-99/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..86c195b57d9e4e0ba7b21429f95940481e9073d6 --- /dev/null +++ b/exp4_subtle_signal_c_189/checkpoint-99/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d83a76228be3156c15e91e73d3d0741dba9a6e9598b741d583c4dfd536d9949a +size 402982627 diff --git a/exp4_subtle_signal_c_189/checkpoint-99/rng_state.pth b/exp4_subtle_signal_c_189/checkpoint-99/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ef5ac8da68afb8bea8ad1331116be95d18fbb182 --- /dev/null +++ b/exp4_subtle_signal_c_189/checkpoint-99/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b293338027ef39107c758bc2300d51015b6a878a9f0e09fe1822d419dcbb163e +size 14645 diff --git a/exp4_subtle_signal_c_189/checkpoint-99/scheduler.pt b/exp4_subtle_signal_c_189/checkpoint-99/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..18d8e9c26499e2854e2c9f3da639a0b0c806f091 --- /dev/null +++ b/exp4_subtle_signal_c_189/checkpoint-99/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfa8a3e98311780b08216cf6102a01d4277e044978f92aa0c0ebace9ca118961 +size 1465 diff --git a/exp4_subtle_signal_c_189/checkpoint-99/trainer_state.json b/exp4_subtle_signal_c_189/checkpoint-99/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0f8153ea9d7ee5716832db1a144d91ebf873c473 --- /dev/null +++ b/exp4_subtle_signal_c_189/checkpoint-99/trainer_state.json @@ -0,0 +1,727 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.3333333333333333, + "eval_steps": 100, + "global_step": 99, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.003367003367003367, + "grad_norm": 1.417947769165039, + "learning_rate": 0.0, + "loss": 0.5304, + "step": 1 + }, + { + "epoch": 0.006734006734006734, + "grad_norm": 2.331010580062866, + "learning_rate": 1.6666666666666667e-06, + "loss": 0.9609, + "step": 2 + }, + { + "epoch": 0.010101010101010102, + "grad_norm": 1.8590822219848633, + "learning_rate": 3.3333333333333333e-06, + "loss": 0.795, + "step": 3 + }, + { + "epoch": 0.013468013468013467, + "grad_norm": 1.316519856452942, + "learning_rate": 5e-06, + "loss": 0.79, + "step": 4 + }, + { + "epoch": 0.016835016835016835, + "grad_norm": 3.6459147930145264, + "learning_rate": 6.666666666666667e-06, + "loss": 1.0392, + "step": 5 + }, + { + "epoch": 0.020202020202020204, + "grad_norm": 1.271788239479065, + "learning_rate": 8.333333333333334e-06, + "loss": 0.468, + "step": 6 + }, + { + "epoch": 0.02356902356902357, + "grad_norm": 1.9651601314544678, + "learning_rate": 1e-05, + "loss": 1.2001, + "step": 7 + }, + { + "epoch": 0.026936026936026935, + "grad_norm": 2.7949764728546143, + "learning_rate": 1.1666666666666668e-05, + "loss": 0.2763, + "step": 8 + }, + { + "epoch": 0.030303030303030304, + "grad_norm": 1.3491120338439941, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.6399, + "step": 9 + }, + { + "epoch": 0.03367003367003367, + "grad_norm": 2.5290658473968506, + "learning_rate": 1.5e-05, + "loss": 1.011, + "step": 10 + }, + { + "epoch": 0.037037037037037035, + "grad_norm": 2.0780346393585205, + "learning_rate": 1.6666666666666667e-05, + "loss": 0.4257, + "step": 11 + }, + { + "epoch": 0.04040404040404041, + "grad_norm": 1.7905006408691406, + "learning_rate": 1.8333333333333333e-05, + "loss": 0.6124, + "step": 12 + }, + { + "epoch": 0.04377104377104377, + "grad_norm": 1.3865525722503662, + "learning_rate": 2e-05, + "loss": 0.4662, + "step": 13 + }, + { + "epoch": 0.04713804713804714, + "grad_norm": 2.365971565246582, + "learning_rate": 2.1666666666666667e-05, + "loss": 0.8486, + "step": 14 + }, + { + "epoch": 0.050505050505050504, + "grad_norm": 1.5668970346450806, + "learning_rate": 2.3333333333333336e-05, + "loss": 0.9114, + "step": 15 + }, + { + "epoch": 0.05387205387205387, + "grad_norm": 1.863726258277893, + "learning_rate": 2.5e-05, + "loss": 0.6151, + "step": 16 + }, + { + "epoch": 0.05723905723905724, + "grad_norm": 1.3187612295150757, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.4592, + "step": 17 + }, + { + "epoch": 0.06060606060606061, + "grad_norm": 2.2115654945373535, + "learning_rate": 2.8333333333333335e-05, + "loss": 1.2559, + "step": 18 + }, + { + "epoch": 0.06397306397306397, + "grad_norm": 2.0446372032165527, + "learning_rate": 3e-05, + "loss": 0.6431, + "step": 19 + }, + { + "epoch": 0.06734006734006734, + "grad_norm": 2.6745316982269287, + "learning_rate": 3.1666666666666666e-05, + "loss": 0.6754, + "step": 20 + }, + { + "epoch": 0.0707070707070707, + "grad_norm": 1.4006273746490479, + "learning_rate": 3.3333333333333335e-05, + "loss": 0.8359, + "step": 21 + }, + { + "epoch": 0.07407407407407407, + "grad_norm": 1.7850373983383179, + "learning_rate": 3.5e-05, + "loss": 0.5039, + "step": 22 + }, + { + "epoch": 0.07744107744107744, + "grad_norm": 2.7081334590911865, + "learning_rate": 3.6666666666666666e-05, + "loss": 0.6163, + "step": 23 + }, + { + "epoch": 0.08080808080808081, + "grad_norm": 0.7826062440872192, + "learning_rate": 3.8333333333333334e-05, + "loss": 0.3723, + "step": 24 + }, + { + "epoch": 0.08417508417508418, + "grad_norm": 2.104018449783325, + "learning_rate": 4e-05, + "loss": 0.5389, + "step": 25 + }, + { + "epoch": 0.08754208754208755, + "grad_norm": 3.8862948417663574, + "learning_rate": 4.166666666666667e-05, + "loss": 0.9006, + "step": 26 + }, + { + "epoch": 0.09090909090909091, + "grad_norm": 3.9396567344665527, + "learning_rate": 4.3333333333333334e-05, + "loss": 0.7355, + "step": 27 + }, + { + "epoch": 0.09427609427609428, + "grad_norm": 3.6403331756591797, + "learning_rate": 4.5e-05, + "loss": 0.7534, + "step": 28 + }, + { + "epoch": 0.09764309764309764, + "grad_norm": 1.080079197883606, + "learning_rate": 4.666666666666667e-05, + "loss": 0.4997, + "step": 29 + }, + { + "epoch": 0.10101010101010101, + "grad_norm": 1.1375266313552856, + "learning_rate": 4.8333333333333334e-05, + "loss": 0.4293, + "step": 30 + }, + { + "epoch": 0.10437710437710437, + "grad_norm": 1.7134714126586914, + "learning_rate": 5e-05, + "loss": 0.5407, + "step": 31 + }, + { + "epoch": 0.10774410774410774, + "grad_norm": 1.0918691158294678, + "learning_rate": 4.999826945767665e-05, + "loss": 0.6391, + "step": 32 + }, + { + "epoch": 0.1111111111111111, + "grad_norm": 2.4237120151519775, + "learning_rate": 4.999307807028871e-05, + "loss": 0.4208, + "step": 33 + }, + { + "epoch": 0.11447811447811448, + "grad_norm": 2.146988868713379, + "learning_rate": 4.9984426556549456e-05, + "loss": 0.8804, + "step": 34 + }, + { + "epoch": 0.11784511784511785, + "grad_norm": 0.9194502830505371, + "learning_rate": 4.997231611420373e-05, + "loss": 0.3728, + "step": 35 + }, + { + "epoch": 0.12121212121212122, + "grad_norm": 1.4301484823226929, + "learning_rate": 4.995674841986217e-05, + "loss": 0.6577, + "step": 36 + }, + { + "epoch": 0.12457912457912458, + "grad_norm": 1.9206677675247192, + "learning_rate": 4.9937725628769094e-05, + "loss": 0.9117, + "step": 37 + }, + { + "epoch": 0.12794612794612795, + "grad_norm": 3.3416388034820557, + "learning_rate": 4.991525037450412e-05, + "loss": 0.7887, + "step": 38 + }, + { + "epoch": 0.13131313131313133, + "grad_norm": 1.5981472730636597, + "learning_rate": 4.9889325768617536e-05, + "loss": 0.7348, + "step": 39 + }, + { + "epoch": 0.13468013468013468, + "grad_norm": 1.1423240900039673, + "learning_rate": 4.985995540019955e-05, + "loss": 0.4979, + "step": 40 + }, + { + "epoch": 0.13804713804713806, + "grad_norm": 1.244806170463562, + "learning_rate": 4.982714333538343e-05, + "loss": 0.7312, + "step": 41 + }, + { + "epoch": 0.1414141414141414, + "grad_norm": 1.0441328287124634, + "learning_rate": 4.9790894116782514e-05, + "loss": 0.1754, + "step": 42 + }, + { + "epoch": 0.1447811447811448, + "grad_norm": 3.0658159255981445, + "learning_rate": 4.975121276286136e-05, + "loss": 1.0091, + "step": 43 + }, + { + "epoch": 0.14814814814814814, + "grad_norm": 1.8905764818191528, + "learning_rate": 4.970810476724097e-05, + "loss": 0.8969, + "step": 44 + }, + { + "epoch": 0.15151515151515152, + "grad_norm": 1.099610686302185, + "learning_rate": 4.96615760979382e-05, + "loss": 0.6375, + "step": 45 + }, + { + "epoch": 0.15488215488215487, + "grad_norm": 1.4541923999786377, + "learning_rate": 4.9611633196539584e-05, + "loss": 0.5875, + "step": 46 + }, + { + "epoch": 0.15824915824915825, + "grad_norm": 1.8045262098312378, + "learning_rate": 4.955828297730949e-05, + "loss": 0.673, + "step": 47 + }, + { + "epoch": 0.16161616161616163, + "grad_norm": 2.0676040649414062, + "learning_rate": 4.950153282623289e-05, + "loss": 0.6862, + "step": 48 + }, + { + "epoch": 0.16498316498316498, + "grad_norm": 1.5156350135803223, + "learning_rate": 4.9441390599992864e-05, + "loss": 0.8607, + "step": 49 + }, + { + "epoch": 0.16835016835016836, + "grad_norm": 1.9884543418884277, + "learning_rate": 4.937786462488284e-05, + "loss": 0.7067, + "step": 50 + }, + { + "epoch": 0.1717171717171717, + "grad_norm": 1.5458931922912598, + "learning_rate": 4.93109636956539e-05, + "loss": 0.6235, + "step": 51 + }, + { + "epoch": 0.1750841750841751, + "grad_norm": 0.8461848497390747, + "learning_rate": 4.9240697074297206e-05, + "loss": 0.2127, + "step": 52 + }, + { + "epoch": 0.17845117845117844, + "grad_norm": 1.0691157579421997, + "learning_rate": 4.9167074488761735e-05, + "loss": 0.4627, + "step": 53 + }, + { + "epoch": 0.18181818181818182, + "grad_norm": 1.4457998275756836, + "learning_rate": 4.90901061316075e-05, + "loss": 0.4431, + "step": 54 + }, + { + "epoch": 0.18518518518518517, + "grad_norm": 1.366044282913208, + "learning_rate": 4.900980265859448e-05, + "loss": 0.7347, + "step": 55 + }, + { + "epoch": 0.18855218855218855, + "grad_norm": 3.7170326709747314, + "learning_rate": 4.892617518720737e-05, + "loss": 1.0967, + "step": 56 + }, + { + "epoch": 0.1919191919191919, + "grad_norm": 1.113218069076538, + "learning_rate": 4.883923529511646e-05, + "loss": 0.2685, + "step": 57 + }, + { + "epoch": 0.19528619528619529, + "grad_norm": 1.4505512714385986, + "learning_rate": 4.874899501857477e-05, + "loss": 0.3403, + "step": 58 + }, + { + "epoch": 0.19865319865319866, + "grad_norm": 1.139944314956665, + "learning_rate": 4.865546685075174e-05, + "loss": 0.6103, + "step": 59 + }, + { + "epoch": 0.20202020202020202, + "grad_norm": 1.6062753200531006, + "learning_rate": 4.85586637400036e-05, + "loss": 0.5342, + "step": 60 + }, + { + "epoch": 0.2053872053872054, + "grad_norm": 1.4414268732070923, + "learning_rate": 4.8458599088080735e-05, + "loss": 0.5646, + "step": 61 + }, + { + "epoch": 0.20875420875420875, + "grad_norm": 1.5661225318908691, + "learning_rate": 4.83552867482724e-05, + "loss": 0.4134, + "step": 62 + }, + { + "epoch": 0.21212121212121213, + "grad_norm": 1.6068329811096191, + "learning_rate": 4.82487410234887e-05, + "loss": 0.3138, + "step": 63 + }, + { + "epoch": 0.21548821548821548, + "grad_norm": 1.4548122882843018, + "learning_rate": 4.8138976664280536e-05, + "loss": 0.2701, + "step": 64 + }, + { + "epoch": 0.21885521885521886, + "grad_norm": 1.57492196559906, + "learning_rate": 4.8026008866797423e-05, + "loss": 0.753, + "step": 65 + }, + { + "epoch": 0.2222222222222222, + "grad_norm": 2.414822578430176, + "learning_rate": 4.7909853270683756e-05, + "loss": 0.5117, + "step": 66 + }, + { + "epoch": 0.2255892255892256, + "grad_norm": 3.1574034690856934, + "learning_rate": 4.779052595691355e-05, + "loss": 0.5563, + "step": 67 + }, + { + "epoch": 0.22895622895622897, + "grad_norm": 1.9086557626724243, + "learning_rate": 4.7668043445564134e-05, + "loss": 0.3878, + "step": 68 + }, + { + "epoch": 0.23232323232323232, + "grad_norm": 2.0000898838043213, + "learning_rate": 4.754242269352912e-05, + "loss": 0.4596, + "step": 69 + }, + { + "epoch": 0.2356902356902357, + "grad_norm": 2.8848049640655518, + "learning_rate": 4.7413681092170715e-05, + "loss": 0.2728, + "step": 70 + }, + { + "epoch": 0.23905723905723905, + "grad_norm": 2.3199172019958496, + "learning_rate": 4.728183646491214e-05, + "loss": 0.3302, + "step": 71 + }, + { + "epoch": 0.24242424242424243, + "grad_norm": 3.39791202545166, + "learning_rate": 4.7146907064769994e-05, + "loss": 0.4134, + "step": 72 + }, + { + "epoch": 0.24579124579124578, + "grad_norm": 1.6479833126068115, + "learning_rate": 4.700891157182729e-05, + "loss": 0.2988, + "step": 73 + }, + { + "epoch": 0.24915824915824916, + "grad_norm": 3.5772407054901123, + "learning_rate": 4.686786909064729e-05, + "loss": 0.3749, + "step": 74 + }, + { + "epoch": 0.25252525252525254, + "grad_norm": 3.388200283050537, + "learning_rate": 4.6723799147628666e-05, + "loss": 0.6271, + "step": 75 + }, + { + "epoch": 0.2558922558922559, + "grad_norm": 1.0087072849273682, + "learning_rate": 4.6576721688302105e-05, + "loss": 0.123, + "step": 76 + }, + { + "epoch": 0.25925925925925924, + "grad_norm": 3.9229109287261963, + "learning_rate": 4.642665707456908e-05, + "loss": 0.343, + "step": 77 + }, + { + "epoch": 0.26262626262626265, + "grad_norm": 4.32628870010376, + "learning_rate": 4.6273626081882805e-05, + "loss": 1.5331, + "step": 78 + }, + { + "epoch": 0.265993265993266, + "grad_norm": 3.5192296504974365, + "learning_rate": 4.611764989637205e-05, + "loss": 0.341, + "step": 79 + }, + { + "epoch": 0.26936026936026936, + "grad_norm": 3.2966415882110596, + "learning_rate": 4.595875011190807e-05, + "loss": 0.3844, + "step": 80 + }, + { + "epoch": 0.2727272727272727, + "grad_norm": 2.390501022338867, + "learning_rate": 4.579694872711501e-05, + "loss": 0.5238, + "step": 81 + }, + { + "epoch": 0.2760942760942761, + "grad_norm": 1.8676153421401978, + "learning_rate": 4.563226814232444e-05, + "loss": 0.198, + "step": 82 + }, + { + "epoch": 0.27946127946127947, + "grad_norm": 0.5234068632125854, + "learning_rate": 4.5464731156474094e-05, + "loss": 0.0719, + "step": 83 + }, + { + "epoch": 0.2828282828282828, + "grad_norm": 3.9550747871398926, + "learning_rate": 4.529436096395156e-05, + "loss": 0.5313, + "step": 84 + }, + { + "epoch": 0.28619528619528617, + "grad_norm": 2.396087646484375, + "learning_rate": 4.5121181151383143e-05, + "loss": 0.2908, + "step": 85 + }, + { + "epoch": 0.2895622895622896, + "grad_norm": 2.293602466583252, + "learning_rate": 4.494521569436845e-05, + "loss": 0.2391, + "step": 86 + }, + { + "epoch": 0.29292929292929293, + "grad_norm": 3.605764389038086, + "learning_rate": 4.4766488954161154e-05, + "loss": 0.4852, + "step": 87 + }, + { + "epoch": 0.2962962962962963, + "grad_norm": 0.8647552132606506, + "learning_rate": 4.4585025674296315e-05, + "loss": 0.0948, + "step": 88 + }, + { + "epoch": 0.2996632996632997, + "grad_norm": 2.5593700408935547, + "learning_rate": 4.44008509771648e-05, + "loss": 0.2254, + "step": 89 + }, + { + "epoch": 0.30303030303030304, + "grad_norm": 3.6353094577789307, + "learning_rate": 4.421399036053527e-05, + "loss": 0.3688, + "step": 90 + }, + { + "epoch": 0.3063973063973064, + "grad_norm": 3.3246469497680664, + "learning_rate": 4.40244696940242e-05, + "loss": 0.5953, + "step": 91 + }, + { + "epoch": 0.30976430976430974, + "grad_norm": 3.4139394760131836, + "learning_rate": 4.383231521551432e-05, + "loss": 0.1474, + "step": 92 + }, + { + "epoch": 0.31313131313131315, + "grad_norm": 2.9134109020233154, + "learning_rate": 4.363755352752227e-05, + "loss": 0.4875, + "step": 93 + }, + { + "epoch": 0.3164983164983165, + "grad_norm": 9.96104621887207, + "learning_rate": 4.3440211593515554e-05, + "loss": 0.7513, + "step": 94 + }, + { + "epoch": 0.31986531986531985, + "grad_norm": 4.5680389404296875, + "learning_rate": 4.324031673417971e-05, + "loss": 0.6839, + "step": 95 + }, + { + "epoch": 0.32323232323232326, + "grad_norm": 0.8624576926231384, + "learning_rate": 4.3037896623635874e-05, + "loss": 0.0937, + "step": 96 + }, + { + "epoch": 0.3265993265993266, + "grad_norm": 2.9045605659484863, + "learning_rate": 4.283297928560951e-05, + "loss": 0.4072, + "step": 97 + }, + { + "epoch": 0.32996632996632996, + "grad_norm": 3.564232110977173, + "learning_rate": 4.262559308955072e-05, + "loss": 0.6221, + "step": 98 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 4.791253089904785, + "learning_rate": 4.2415766746706674e-05, + "loss": 0.366, + "step": 99 + } + ], + "logging_steps": 1, + "max_steps": 297, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 99, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6.832806128359834e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/exp4_subtle_signal_c_189/checkpoint-99/training_args.bin b/exp4_subtle_signal_c_189/checkpoint-99/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a1b630c5deef60e645fa61a43f2629aef0b901e3 --- /dev/null +++ b/exp4_subtle_signal_c_189/checkpoint-99/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c6de1304e5f14b655e49466bbc76c3f69efc17feaf36f1722623c6a0ae37e58 +size 6033 diff --git a/exp4_subtle_signal_c_189/final_model/README.md b/exp4_subtle_signal_c_189/final_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp4_subtle_signal_c_189/final_model/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp4_subtle_signal_c_189/final_model/adapter_config.json b/exp4_subtle_signal_c_189/final_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b4d747200220490dff0f82aa49e63f702a859ccc --- /dev/null +++ b/exp4_subtle_signal_c_189/final_model/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "q_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp4_subtle_signal_c_189/final_model/adapter_model.safetensors b/exp4_subtle_signal_c_189/final_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..114cb296fa9e4d4b828008522d38d5e08b854910 --- /dev/null +++ b/exp4_subtle_signal_c_189/final_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b2c91f91b54595638c9b6e9c64295e0afe72b725c0cdc4cd0dc548904771e9b +size 201378736 diff --git a/exp4_subtle_signal_c_189/final_model/training_args.bin b/exp4_subtle_signal_c_189/final_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a1b630c5deef60e645fa61a43f2629aef0b901e3 --- /dev/null +++ b/exp4_subtle_signal_c_189/final_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c6de1304e5f14b655e49466bbc76c3f69efc17feaf36f1722623c6a0ae37e58 +size 6033