diff --git a/codellama/c/dataflow_c_pretrained/all_results.json b/codellama/c/dataflow_c_pretrained/all_results.json index 83ca8ccccb78bbd05bfc39fae1f253bc31bdb395..90bfffe14242b2e03c5dd58e37596025330d4548 100644 --- a/codellama/c/dataflow_c_pretrained/all_results.json +++ b/codellama/c/dataflow_c_pretrained/all_results.json @@ -1,8 +1,8 @@ { - "epoch": 1.5076373735369968, - "total_flos": 1.4535297138363187e+18, - "train_loss": 0.11740684490454824, - "train_runtime": 39384.0084, - "train_samples_per_second": 0.772, - "train_steps_per_second": 0.012 + "epoch": 1.2058706862356208, + "total_flos": 1.216645538039931e+18, + "train_loss": 0.10745409297707834, + "train_runtime": 37043.3755, + "train_samples_per_second": 0.657, + "train_steps_per_second": 0.005 } \ No newline at end of file diff --git a/codellama/c/dataflow_c_pretrained/checkpoint-190/README.md b/codellama/c/dataflow_c_pretrained/checkpoint-190/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f701e106913179e53b07103ec61ffc10178fd6c0 --- /dev/null +++ b/codellama/c/dataflow_c_pretrained/checkpoint-190/README.md @@ -0,0 +1,202 @@ +--- +base_model: ../CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/dataflow_c_pretrained/checkpoint-190/adapter_config.json b/codellama/c/dataflow_c_pretrained/checkpoint-190/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..08403128ecb652a98a4e935672da65aa91a5918d --- /dev/null +++ b/codellama/c/dataflow_c_pretrained/checkpoint-190/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "../CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "gate_proj", + "up_proj", + "q_proj", + "o_proj", + "v_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/dataflow_c_pretrained/checkpoint-190/adapter_model.safetensors b/codellama/c/dataflow_c_pretrained/checkpoint-190/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8b3a4fe72788a9cd9ee04efc5c639103a8edf531 --- /dev/null +++ b/codellama/c/dataflow_c_pretrained/checkpoint-190/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9de8d340f5057379260edf56d8c2bf090c3f6e213b999eafc222fced213416fe +size 1156480200 diff --git a/codellama/c/dataflow_c_pretrained/checkpoint-190/adapter_model/README.md b/codellama/c/dataflow_c_pretrained/checkpoint-190/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f701e106913179e53b07103ec61ffc10178fd6c0 --- /dev/null +++ b/codellama/c/dataflow_c_pretrained/checkpoint-190/adapter_model/README.md @@ -0,0 +1,202 @@ +--- +base_model: ../CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/dataflow_c_pretrained/checkpoint-190/adapter_model/adapter_config.json b/codellama/c/dataflow_c_pretrained/checkpoint-190/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..08403128ecb652a98a4e935672da65aa91a5918d --- /dev/null +++ b/codellama/c/dataflow_c_pretrained/checkpoint-190/adapter_model/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "../CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "gate_proj", + "up_proj", + "q_proj", + "o_proj", + "v_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/dataflow_c_pretrained/checkpoint-190/adapter_model/adapter_model.safetensors b/codellama/c/dataflow_c_pretrained/checkpoint-190/adapter_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8b3a4fe72788a9cd9ee04efc5c639103a8edf531 --- /dev/null +++ b/codellama/c/dataflow_c_pretrained/checkpoint-190/adapter_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9de8d340f5057379260edf56d8c2bf090c3f6e213b999eafc222fced213416fe +size 1156480200 diff --git a/codellama/c/dataflow_c_pretrained/checkpoint-475/added_tokens.json b/codellama/c/dataflow_c_pretrained/checkpoint-190/added_tokens.json similarity index 100% rename from codellama/c/dataflow_c_pretrained/checkpoint-475/added_tokens.json rename to codellama/c/dataflow_c_pretrained/checkpoint-190/added_tokens.json diff --git a/codellama/c/dataflow_c_pretrained/checkpoint-190/optimizer.pt b/codellama/c/dataflow_c_pretrained/checkpoint-190/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c7e5e6cb3d4599de0f65b5f7f658148582f8cd45 --- /dev/null +++ b/codellama/c/dataflow_c_pretrained/checkpoint-190/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6951313c1b248ce4c836696dac190e0dab42809267e147c8304926cfd6019b36 +size 2003126962 diff --git a/codellama/c/dataflow_c_pretrained/checkpoint-475/rng_state.pth b/codellama/c/dataflow_c_pretrained/checkpoint-190/rng_state.pth similarity index 100% rename from codellama/c/dataflow_c_pretrained/checkpoint-475/rng_state.pth rename to codellama/c/dataflow_c_pretrained/checkpoint-190/rng_state.pth diff --git a/codellama/c/dataflow_c_pretrained/checkpoint-190/scheduler.pt b/codellama/c/dataflow_c_pretrained/checkpoint-190/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..40ed602a6b8e648dee4651c812d55746c4306967 --- /dev/null +++ b/codellama/c/dataflow_c_pretrained/checkpoint-190/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4326f7a2418d4815e699b330bd26d5f5313efeeb51d248a1c8d3070a922c1ddd +size 1064 diff --git a/codellama/c/dataflow_c_pretrained/checkpoint-475/special_tokens_map.json b/codellama/c/dataflow_c_pretrained/checkpoint-190/special_tokens_map.json similarity index 100% rename from codellama/c/dataflow_c_pretrained/checkpoint-475/special_tokens_map.json rename to codellama/c/dataflow_c_pretrained/checkpoint-190/special_tokens_map.json diff --git a/codellama/c/dataflow_c_pretrained/checkpoint-475/tokenizer.model b/codellama/c/dataflow_c_pretrained/checkpoint-190/tokenizer.model similarity index 100% rename from codellama/c/dataflow_c_pretrained/checkpoint-475/tokenizer.model rename to codellama/c/dataflow_c_pretrained/checkpoint-190/tokenizer.model diff --git a/codellama/c/dataflow_c_pretrained/checkpoint-475/tokenizer_config.json b/codellama/c/dataflow_c_pretrained/checkpoint-190/tokenizer_config.json similarity index 100% rename from codellama/c/dataflow_c_pretrained/checkpoint-475/tokenizer_config.json rename to codellama/c/dataflow_c_pretrained/checkpoint-190/tokenizer_config.json diff --git a/codellama/c/dataflow_c_pretrained/checkpoint-190/trainer_state.json b/codellama/c/dataflow_c_pretrained/checkpoint-190/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..17e9a31953cc892a4be9c09f9427e883c046de69 --- /dev/null +++ b/codellama/c/dataflow_c_pretrained/checkpoint-190/trainer_state.json @@ -0,0 +1,299 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.2058706862356208, + "eval_steps": 500, + "global_step": 190, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0317334391114637, + "grad_norm": 0.060546875, + "learning_rate": 0.0001, + "loss": 0.6421, + "step": 5 + }, + { + "epoch": 0.0634668782229274, + "grad_norm": 0.11572265625, + "learning_rate": 0.0001, + "loss": 0.5213, + "step": 10 + }, + { + "epoch": 0.09520031733439112, + "grad_norm": 0.08251953125, + "learning_rate": 0.0001, + "loss": 0.2925, + "step": 15 + }, + { + "epoch": 0.1269337564458548, + "grad_norm": 0.0634765625, + "learning_rate": 0.0001, + "loss": 0.1978, + "step": 20 + }, + { + "epoch": 0.15866719555731854, + "grad_norm": 0.08251953125, + "learning_rate": 0.0001, + "loss": 0.1538, + "step": 25 + }, + { + "epoch": 0.19040063466878224, + "grad_norm": 0.10888671875, + "learning_rate": 0.0001, + "loss": 0.106, + "step": 30 + }, + { + "epoch": 0.22213407378024594, + "grad_norm": 0.049560546875, + "learning_rate": 0.0001, + "loss": 0.0454, + "step": 35 + }, + { + "epoch": 0.2538675128917096, + "grad_norm": 0.310546875, + "learning_rate": 0.0001, + "loss": 0.1215, + "step": 40 + }, + { + "epoch": 0.28560095200317337, + "grad_norm": 0.06494140625, + "learning_rate": 0.0001, + "loss": 0.2476, + "step": 45 + }, + { + "epoch": 0.31733439111463707, + "grad_norm": 0.40234375, + "learning_rate": 0.0001, + "loss": 0.1073, + "step": 50 + }, + { + "epoch": 0.3490678302261008, + "grad_norm": 0.04052734375, + "learning_rate": 0.0001, + "loss": 0.0863, + "step": 55 + }, + { + "epoch": 0.3808012693375645, + "grad_norm": 0.03369140625, + "learning_rate": 0.0001, + "loss": 0.0671, + "step": 60 + }, + { + "epoch": 0.4125347084490282, + "grad_norm": 0.0274658203125, + "learning_rate": 0.0001, + "loss": 0.0493, + "step": 65 + }, + { + "epoch": 0.4442681475604919, + "grad_norm": 0.0277099609375, + "learning_rate": 0.0001, + "loss": 0.0311, + "step": 70 + }, + { + "epoch": 0.4760015866719556, + "grad_norm": 0.01275634765625, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 75 + }, + { + "epoch": 0.5077350257834192, + "grad_norm": 0.06787109375, + "learning_rate": 0.0001, + "loss": 0.1307, + "step": 80 + }, + { + "epoch": 0.539468464894883, + "grad_norm": 0.050048828125, + "learning_rate": 0.0001, + "loss": 0.171, + "step": 85 + }, + { + "epoch": 0.5712019040063467, + "grad_norm": 0.060791015625, + "learning_rate": 0.0001, + "loss": 0.0818, + "step": 90 + }, + { + "epoch": 0.6029353431178104, + "grad_norm": 0.033203125, + "learning_rate": 0.0001, + "loss": 0.0658, + "step": 95 + }, + { + "epoch": 0.6346687822292741, + "grad_norm": 0.0235595703125, + "learning_rate": 0.0001, + "loss": 0.046, + "step": 100 + }, + { + "epoch": 0.6664022213407378, + "grad_norm": 0.0299072265625, + "learning_rate": 0.0001, + "loss": 0.0384, + "step": 105 + }, + { + "epoch": 0.6981356604522015, + "grad_norm": 0.0181884765625, + "learning_rate": 0.0001, + "loss": 0.0187, + "step": 110 + }, + { + "epoch": 0.7298690995636652, + "grad_norm": 0.019775390625, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 115 + }, + { + "epoch": 0.761602538675129, + "grad_norm": 0.060791015625, + "learning_rate": 0.0001, + "loss": 0.1381, + "step": 120 + }, + { + "epoch": 0.7933359777865926, + "grad_norm": 0.038818359375, + "learning_rate": 0.0001, + "loss": 0.1125, + "step": 125 + }, + { + "epoch": 0.8250694168980564, + "grad_norm": 0.032958984375, + "learning_rate": 0.0001, + "loss": 0.062, + "step": 130 + }, + { + "epoch": 0.85680285600952, + "grad_norm": 0.03173828125, + "learning_rate": 0.0001, + "loss": 0.0526, + "step": 135 + }, + { + "epoch": 0.8885362951209838, + "grad_norm": 0.02392578125, + "learning_rate": 0.0001, + "loss": 0.0382, + "step": 140 + }, + { + "epoch": 0.9202697342324474, + "grad_norm": 0.027099609375, + "learning_rate": 0.0001, + "loss": 0.027, + "step": 145 + }, + { + "epoch": 0.9520031733439112, + "grad_norm": 0.02294921875, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 150 + }, + { + "epoch": 0.9837366124553748, + "grad_norm": 0.02099609375, + "learning_rate": 0.0001, + "loss": 0.005, + "step": 155 + }, + { + "epoch": 1.0154700515668384, + "grad_norm": 0.0703125, + "learning_rate": 0.0001, + "loss": 0.1291, + "step": 160 + }, + { + "epoch": 1.0472034906783023, + "grad_norm": 0.04052734375, + "learning_rate": 0.0001, + "loss": 0.1033, + "step": 165 + }, + { + "epoch": 1.078936929789766, + "grad_norm": 0.03173828125, + "learning_rate": 0.0001, + "loss": 0.0539, + "step": 170 + }, + { + "epoch": 1.1106703689012296, + "grad_norm": 0.0299072265625, + "learning_rate": 0.0001, + "loss": 0.043, + "step": 175 + }, + { + "epoch": 1.1424038080126935, + "grad_norm": 0.0262451171875, + "learning_rate": 0.0001, + "loss": 0.0303, + "step": 180 + }, + { + "epoch": 1.1741372471241571, + "grad_norm": 0.060791015625, + "learning_rate": 0.0001, + "loss": 0.0239, + "step": 185 + }, + { + "epoch": 1.2058706862356208, + "grad_norm": 0.015625, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 190 + } + ], + "logging_steps": 5, + "max_steps": 190, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 90, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.216645538039931e+18, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/codellama/c/dataflow_c_pretrained/checkpoint-190/training_args.bin b/codellama/c/dataflow_c_pretrained/checkpoint-190/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d4a2d49e40973cd8ce0bb68429be56f03108f0db --- /dev/null +++ b/codellama/c/dataflow_c_pretrained/checkpoint-190/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91f41d6ab0b3b00576cfaae17b2c89c881cde6b3ddf94e79209bf6c926c2f26a +size 7416 diff --git a/codellama/c/dataflow_c_pretrained/checkpoint-475/adapter_model.safetensors b/codellama/c/dataflow_c_pretrained/checkpoint-475/adapter_model.safetensors deleted file mode 100644 index fbe0ddd208b18ae490cd620a13be9649f3a9137c..0000000000000000000000000000000000000000 --- a/codellama/c/dataflow_c_pretrained/checkpoint-475/adapter_model.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9e3bd7cb053c3e00ea48ed365eed4b65ae5e2d7d807e71ec5615d765dfba19de -size 1156480200 diff --git a/codellama/c/dataflow_c_pretrained/checkpoint-475/adapter_model/adapter_model.safetensors b/codellama/c/dataflow_c_pretrained/checkpoint-475/adapter_model/adapter_model.safetensors deleted file mode 100644 index fbe0ddd208b18ae490cd620a13be9649f3a9137c..0000000000000000000000000000000000000000 --- a/codellama/c/dataflow_c_pretrained/checkpoint-475/adapter_model/adapter_model.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9e3bd7cb053c3e00ea48ed365eed4b65ae5e2d7d807e71ec5615d765dfba19de -size 1156480200 diff --git a/codellama/c/dataflow_c_pretrained/checkpoint-475/optimizer.pt b/codellama/c/dataflow_c_pretrained/checkpoint-475/optimizer.pt deleted file mode 100644 index 967cc2106211d6a7f289e2d14d78e9c6b19d67fd..0000000000000000000000000000000000000000 --- a/codellama/c/dataflow_c_pretrained/checkpoint-475/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cb15df4736bb45403f2444791e0af6b8cb6ce098124d0e654e1c324cac779265 -size 2003127538 diff --git a/codellama/c/dataflow_c_pretrained/checkpoint-475/scheduler.pt b/codellama/c/dataflow_c_pretrained/checkpoint-475/scheduler.pt deleted file mode 100644 index 25f1fc9a568e1572c2b33dcfe44c060818d7894d..0000000000000000000000000000000000000000 --- a/codellama/c/dataflow_c_pretrained/checkpoint-475/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1219a1788d5d094f428228d99e4982dc061bcd85dea2cf1e1ca0c7a969573be6 -size 1064 diff --git a/codellama/c/dataflow_c_pretrained/checkpoint-475/trainer_state.json b/codellama/c/dataflow_c_pretrained/checkpoint-475/trainer_state.json deleted file mode 100644 index f4c30481ff53bbb9fdf1f266fd6622c74499701c..0000000000000000000000000000000000000000 --- a/codellama/c/dataflow_c_pretrained/checkpoint-475/trainer_state.json +++ /dev/null @@ -1,698 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 1.5076373735369968, - "eval_steps": 500, - "global_step": 475, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.015869867089863123, - "grad_norm": 0.058837890625, - "learning_rate": 0.0001, - "loss": 0.769, - "step": 5 - }, - { - "epoch": 0.031739734179726246, - "grad_norm": 0.11572265625, - "learning_rate": 0.0001, - "loss": 0.615, - "step": 10 - }, - { - "epoch": 0.047609601269589366, - "grad_norm": 0.0634765625, - "learning_rate": 0.0001, - "loss": 0.3973, - "step": 15 - }, - { - "epoch": 0.06347946835945249, - "grad_norm": 0.07470703125, - "learning_rate": 0.0001, - "loss": 0.2804, - "step": 20 - }, - { - "epoch": 0.0793493354493156, - "grad_norm": 0.06884765625, - "learning_rate": 0.0001, - "loss": 0.2244, - "step": 25 - }, - { - "epoch": 0.09521920253917873, - "grad_norm": 0.10498046875, - "learning_rate": 0.0001, - "loss": 0.1925, - "step": 30 - }, - { - "epoch": 0.11108906962904186, - "grad_norm": 0.08056640625, - "learning_rate": 0.0001, - "loss": 0.1477, - "step": 35 - }, - { - "epoch": 0.12695893671890499, - "grad_norm": 0.0732421875, - "learning_rate": 0.0001, - "loss": 0.0969, - "step": 40 - }, - { - "epoch": 0.1428288038087681, - "grad_norm": 0.07568359375, - "learning_rate": 0.0001, - "loss": 0.0695, - "step": 45 - }, - { - "epoch": 0.1586986708986312, - "grad_norm": 0.125, - "learning_rate": 0.0001, - "loss": 0.046, - "step": 50 - }, - { - "epoch": 0.17456853798849434, - "grad_norm": 0.0859375, - "learning_rate": 0.0001, - "loss": 0.4702, - "step": 55 - }, - { - "epoch": 0.19043840507835746, - "grad_norm": 0.06787109375, - "learning_rate": 0.0001, - "loss": 0.2393, - "step": 60 - }, - { - "epoch": 0.2063082721682206, - "grad_norm": 0.045166015625, - "learning_rate": 0.0001, - "loss": 0.1604, - "step": 65 - }, - { - "epoch": 0.22217813925808372, - "grad_norm": 0.04931640625, - "learning_rate": 0.0001, - "loss": 0.1499, - "step": 70 - }, - { - "epoch": 0.23804800634794684, - "grad_norm": 0.041748046875, - "learning_rate": 0.0001, - "loss": 0.123, - "step": 75 - }, - { - "epoch": 0.25391787343780997, - "grad_norm": 0.042236328125, - "learning_rate": 0.0001, - "loss": 0.1056, - "step": 80 - }, - { - "epoch": 0.26978774052767307, - "grad_norm": 0.049560546875, - "learning_rate": 0.0001, - "loss": 0.0801, - "step": 85 - }, - { - "epoch": 0.2856576076175362, - "grad_norm": 0.043212890625, - "learning_rate": 0.0001, - "loss": 0.0617, - "step": 90 - }, - { - "epoch": 0.3015274747073993, - "grad_norm": 0.037109375, - "learning_rate": 0.0001, - "loss": 0.0423, - "step": 95 - }, - { - "epoch": 0.3173973417972624, - "grad_norm": 0.028564453125, - "learning_rate": 0.0001, - "loss": 0.0295, - "step": 100 - }, - { - "epoch": 0.3332672088871256, - "grad_norm": 0.0634765625, - "learning_rate": 0.0001, - "loss": 0.3494, - "step": 105 - }, - { - "epoch": 0.3491370759769887, - "grad_norm": 0.07958984375, - "learning_rate": 0.0001, - "loss": 0.1779, - "step": 110 - }, - { - "epoch": 0.36500694306685183, - "grad_norm": 0.040283203125, - "learning_rate": 0.0001, - "loss": 0.1283, - "step": 115 - }, - { - "epoch": 0.38087681015671493, - "grad_norm": 0.038818359375, - "learning_rate": 0.0001, - "loss": 0.111, - "step": 120 - }, - { - "epoch": 0.3967466772465781, - "grad_norm": 0.048095703125, - "learning_rate": 0.0001, - "loss": 0.0945, - "step": 125 - }, - { - "epoch": 0.4126165443364412, - "grad_norm": 0.06103515625, - "learning_rate": 0.0001, - "loss": 0.0833, - "step": 130 - }, - { - "epoch": 0.4284864114263043, - "grad_norm": 0.05859375, - "learning_rate": 0.0001, - "loss": 0.0702, - "step": 135 - }, - { - "epoch": 0.44435627851616744, - "grad_norm": 0.060302734375, - "learning_rate": 0.0001, - "loss": 0.0509, - "step": 140 - }, - { - "epoch": 0.46022614560603053, - "grad_norm": 0.042724609375, - "learning_rate": 0.0001, - "loss": 0.0363, - "step": 145 - }, - { - "epoch": 0.4760960126958937, - "grad_norm": 0.048583984375, - "learning_rate": 0.0001, - "loss": 0.0225, - "step": 150 - }, - { - "epoch": 0.4919658797857568, - "grad_norm": 0.056396484375, - "learning_rate": 0.0001, - "loss": 0.3315, - "step": 155 - }, - { - "epoch": 0.5078357468756199, - "grad_norm": 0.0478515625, - "learning_rate": 0.0001, - "loss": 0.1585, - "step": 160 - }, - { - "epoch": 0.523705613965483, - "grad_norm": 0.07177734375, - "learning_rate": 0.0001, - "loss": 0.1173, - "step": 165 - }, - { - "epoch": 0.5395754810553461, - "grad_norm": 0.050537109375, - "learning_rate": 0.0001, - "loss": 0.1054, - "step": 170 - }, - { - "epoch": 0.5554453481452093, - "grad_norm": 0.052734375, - "learning_rate": 0.0001, - "loss": 0.0828, - "step": 175 - }, - { - "epoch": 0.5713152152350724, - "grad_norm": 0.05126953125, - "learning_rate": 0.0001, - "loss": 0.0778, - "step": 180 - }, - { - "epoch": 0.5871850823249355, - "grad_norm": 0.034423828125, - "learning_rate": 0.0001, - "loss": 0.0632, - "step": 185 - }, - { - "epoch": 0.6030549494147986, - "grad_norm": 0.038330078125, - "learning_rate": 0.0001, - "loss": 0.042, - "step": 190 - }, - { - "epoch": 0.6189248165046618, - "grad_norm": 0.0400390625, - "learning_rate": 0.0001, - "loss": 0.0315, - "step": 195 - }, - { - "epoch": 0.6347946835945248, - "grad_norm": 0.08642578125, - "learning_rate": 0.0001, - "loss": 0.0195, - "step": 200 - }, - { - "epoch": 0.650664550684388, - "grad_norm": 0.07080078125, - "learning_rate": 0.0001, - "loss": 0.3038, - "step": 205 - }, - { - "epoch": 0.6665344177742512, - "grad_norm": 0.0556640625, - "learning_rate": 0.0001, - "loss": 0.1574, - "step": 210 - }, - { - "epoch": 0.6824042848641143, - "grad_norm": 0.054443359375, - "learning_rate": 0.0001, - "loss": 0.1049, - "step": 215 - }, - { - "epoch": 0.6982741519539774, - "grad_norm": 0.052490234375, - "learning_rate": 0.0001, - "loss": 0.0955, - "step": 220 - }, - { - "epoch": 0.7141440190438405, - "grad_norm": 0.046630859375, - "learning_rate": 0.0001, - "loss": 0.0767, - "step": 225 - }, - { - "epoch": 0.7300138861337037, - "grad_norm": 0.052978515625, - "learning_rate": 0.0001, - "loss": 0.0636, - "step": 230 - }, - { - "epoch": 0.7458837532235667, - "grad_norm": 0.0546875, - "learning_rate": 0.0001, - "loss": 0.0584, - "step": 235 - }, - { - "epoch": 0.7617536203134299, - "grad_norm": 0.0546875, - "learning_rate": 0.0001, - "loss": 0.0368, - "step": 240 - }, - { - "epoch": 0.777623487403293, - "grad_norm": 0.035400390625, - "learning_rate": 0.0001, - "loss": 0.0268, - "step": 245 - }, - { - "epoch": 0.7934933544931562, - "grad_norm": 0.03564453125, - "learning_rate": 0.0001, - "loss": 0.0197, - "step": 250 - }, - { - "epoch": 0.8093632215830192, - "grad_norm": 0.0673828125, - "learning_rate": 0.0001, - "loss": 0.264, - "step": 255 - }, - { - "epoch": 0.8252330886728824, - "grad_norm": 0.050048828125, - "learning_rate": 0.0001, - "loss": 0.1382, - "step": 260 - }, - { - "epoch": 0.8411029557627455, - "grad_norm": 0.053955078125, - "learning_rate": 0.0001, - "loss": 0.0959, - "step": 265 - }, - { - "epoch": 0.8569728228526086, - "grad_norm": 0.055908203125, - "learning_rate": 0.0001, - "loss": 0.0986, - "step": 270 - }, - { - "epoch": 0.8728426899424717, - "grad_norm": 0.05322265625, - "learning_rate": 0.0001, - "loss": 0.0806, - "step": 275 - }, - { - "epoch": 0.8887125570323349, - "grad_norm": 0.037109375, - "learning_rate": 0.0001, - "loss": 0.0627, - "step": 280 - }, - { - "epoch": 0.904582424122198, - "grad_norm": 0.035888671875, - "learning_rate": 0.0001, - "loss": 0.0488, - "step": 285 - }, - { - "epoch": 0.9204522912120611, - "grad_norm": 0.049072265625, - "learning_rate": 0.0001, - "loss": 0.0334, - "step": 290 - }, - { - "epoch": 0.9363221583019242, - "grad_norm": 0.042236328125, - "learning_rate": 0.0001, - "loss": 0.0259, - "step": 295 - }, - { - "epoch": 0.9521920253917874, - "grad_norm": 0.02490234375, - "learning_rate": 0.0001, - "loss": 0.0168, - "step": 300 - }, - { - "epoch": 0.9680618924816504, - "grad_norm": 0.07080078125, - "learning_rate": 0.0001, - "loss": 0.1856, - "step": 305 - }, - { - "epoch": 0.9839317595715136, - "grad_norm": 0.09814453125, - "learning_rate": 0.0001, - "loss": 0.0806, - "step": 310 - }, - { - "epoch": 0.9998016266613767, - "grad_norm": 0.0380859375, - "learning_rate": 0.0001, - "loss": 0.0309, - "step": 315 - }, - { - "epoch": 1.0156714937512399, - "grad_norm": 0.07373046875, - "learning_rate": 0.0001, - "loss": 0.2891, - "step": 320 - }, - { - "epoch": 1.031541360841103, - "grad_norm": 0.06982421875, - "learning_rate": 0.0001, - "loss": 0.1519, - "step": 325 - }, - { - "epoch": 1.047411227930966, - "grad_norm": 0.048095703125, - "learning_rate": 0.0001, - "loss": 0.094, - "step": 330 - }, - { - "epoch": 1.0632810950208291, - "grad_norm": 0.051513671875, - "learning_rate": 0.0001, - "loss": 0.0843, - "step": 335 - }, - { - "epoch": 1.0791509621106923, - "grad_norm": 0.0517578125, - "learning_rate": 0.0001, - "loss": 0.0695, - "step": 340 - }, - { - "epoch": 1.0950208292005554, - "grad_norm": 0.04931640625, - "learning_rate": 0.0001, - "loss": 0.0586, - "step": 345 - }, - { - "epoch": 1.1108906962904186, - "grad_norm": 0.06201171875, - "learning_rate": 0.0001, - "loss": 0.0493, - "step": 350 - }, - { - "epoch": 1.1267605633802817, - "grad_norm": 0.0272216796875, - "learning_rate": 0.0001, - "loss": 0.0278, - "step": 355 - }, - { - "epoch": 1.142630430470145, - "grad_norm": 0.05419921875, - "learning_rate": 0.0001, - "loss": 0.0219, - "step": 360 - }, - { - "epoch": 1.1585002975600078, - "grad_norm": 0.07177734375, - "learning_rate": 0.0001, - "loss": 0.015, - "step": 365 - }, - { - "epoch": 1.174370164649871, - "grad_norm": 0.09521484375, - "learning_rate": 0.0001, - "loss": 0.2371, - "step": 370 - }, - { - "epoch": 1.1902400317397341, - "grad_norm": 0.060791015625, - "learning_rate": 0.0001, - "loss": 0.118, - "step": 375 - }, - { - "epoch": 1.2061098988295973, - "grad_norm": 0.059814453125, - "learning_rate": 0.0001, - "loss": 0.0904, - "step": 380 - }, - { - "epoch": 1.2219797659194604, - "grad_norm": 0.051513671875, - "learning_rate": 0.0001, - "loss": 0.079, - "step": 385 - }, - { - "epoch": 1.2378496330093236, - "grad_norm": 0.05126953125, - "learning_rate": 0.0001, - "loss": 0.0618, - "step": 390 - }, - { - "epoch": 1.2537195000991868, - "grad_norm": 0.06982421875, - "learning_rate": 0.0001, - "loss": 0.0501, - "step": 395 - }, - { - "epoch": 1.2695893671890497, - "grad_norm": 0.046142578125, - "learning_rate": 0.0001, - "loss": 0.0404, - "step": 400 - }, - { - "epoch": 1.2854592342789128, - "grad_norm": 0.03564453125, - "learning_rate": 0.0001, - "loss": 0.0295, - "step": 405 - }, - { - "epoch": 1.301329101368776, - "grad_norm": 0.0341796875, - "learning_rate": 0.0001, - "loss": 0.0185, - "step": 410 - }, - { - "epoch": 1.3171989684586392, - "grad_norm": 0.0286865234375, - "learning_rate": 0.0001, - "loss": 0.0123, - "step": 415 - }, - { - "epoch": 1.3330688355485023, - "grad_norm": 0.054931640625, - "learning_rate": 0.0001, - "loss": 0.2018, - "step": 420 - }, - { - "epoch": 1.3489387026383655, - "grad_norm": 0.060302734375, - "learning_rate": 0.0001, - "loss": 0.1189, - "step": 425 - }, - { - "epoch": 1.3648085697282286, - "grad_norm": 0.046630859375, - "learning_rate": 0.0001, - "loss": 0.0821, - "step": 430 - }, - { - "epoch": 1.3806784368180915, - "grad_norm": 0.0576171875, - "learning_rate": 0.0001, - "loss": 0.0759, - "step": 435 - }, - { - "epoch": 1.3965483039079547, - "grad_norm": 0.058349609375, - "learning_rate": 0.0001, - "loss": 0.0567, - "step": 440 - }, - { - "epoch": 1.4124181709978179, - "grad_norm": 0.05908203125, - "learning_rate": 0.0001, - "loss": 0.0435, - "step": 445 - }, - { - "epoch": 1.428288038087681, - "grad_norm": 0.054443359375, - "learning_rate": 0.0001, - "loss": 0.0414, - "step": 450 - }, - { - "epoch": 1.4441579051775442, - "grad_norm": 0.036376953125, - "learning_rate": 0.0001, - "loss": 0.0283, - "step": 455 - }, - { - "epoch": 1.4600277722674073, - "grad_norm": 0.142578125, - "learning_rate": 0.0001, - "loss": 0.0206, - "step": 460 - }, - { - "epoch": 1.4758976393572705, - "grad_norm": 0.044677734375, - "learning_rate": 0.0001, - "loss": 0.0129, - "step": 465 - }, - { - "epoch": 1.4917675064471334, - "grad_norm": 0.07275390625, - "learning_rate": 0.0001, - "loss": 0.2036, - "step": 470 - }, - { - "epoch": 1.5076373735369968, - "grad_norm": 0.052490234375, - "learning_rate": 0.0001, - "loss": 0.1093, - "step": 475 - } - ], - "logging_steps": 5, - "max_steps": 475, - "num_input_tokens_seen": 0, - "num_train_epochs": 2, - "save_steps": 90, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": true - }, - "attributes": {} - } - }, - "total_flos": 1.4535297138363187e+18, - "train_batch_size": 4, - "trial_name": null, - "trial_params": null -} diff --git a/codellama/c/dataflow_c_pretrained/checkpoint-475/training_args.bin b/codellama/c/dataflow_c_pretrained/checkpoint-475/training_args.bin deleted file mode 100644 index e91836f3d522bb0f661899abaf03ba42b585e6e7..0000000000000000000000000000000000000000 --- a/codellama/c/dataflow_c_pretrained/checkpoint-475/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0c2563f751da0f955348ed5d2c3112b7092683a85415d6f8758379982f01f992 -size 7416 diff --git a/codellama/c/dataflow_c_pretrained/metrics.json b/codellama/c/dataflow_c_pretrained/metrics.json index 582c1d4def10058472bb88fe726a4a3475d4edd7..3bb45134259072f7dc353af834e43d5b6cf31df4 100644 --- a/codellama/c/dataflow_c_pretrained/metrics.json +++ b/codellama/c/dataflow_c_pretrained/metrics.json @@ -1 +1 @@ -{"run_name": "dataflow_c_pretrained", "train_runtime": 39384.0084, "train_samples_per_second": 0.772, "train_steps_per_second": 0.012, "total_flos": 1.4535297138363187e+18, "train_loss": 0.11740684490454824, "epoch": 1.5076373735369968} \ No newline at end of file +{"run_name": "dataflow_c", "train_runtime": 37043.3755, "train_samples_per_second": 0.657, "train_steps_per_second": 0.005, "total_flos": 1.216645538039931e+18, "train_loss": 0.10745409297707834, "epoch": 1.2058706862356208} \ No newline at end of file diff --git a/codellama/c/dataflow_c_pretrained/train_results.json b/codellama/c/dataflow_c_pretrained/train_results.json index 83ca8ccccb78bbd05bfc39fae1f253bc31bdb395..90bfffe14242b2e03c5dd58e37596025330d4548 100644 --- a/codellama/c/dataflow_c_pretrained/train_results.json +++ b/codellama/c/dataflow_c_pretrained/train_results.json @@ -1,8 +1,8 @@ { - "epoch": 1.5076373735369968, - "total_flos": 1.4535297138363187e+18, - "train_loss": 0.11740684490454824, - "train_runtime": 39384.0084, - "train_samples_per_second": 0.772, - "train_steps_per_second": 0.012 + "epoch": 1.2058706862356208, + "total_flos": 1.216645538039931e+18, + "train_loss": 0.10745409297707834, + "train_runtime": 37043.3755, + "train_samples_per_second": 0.657, + "train_steps_per_second": 0.005 } \ No newline at end of file diff --git a/codellama/c/dataflow_c_pretrained/trainer_state.json b/codellama/c/dataflow_c_pretrained/trainer_state.json index d931850d168aa61e9a2922c516b0498bf7347838..a936dfec4596f4802dff746cd493b876c5b5992d 100644 --- a/codellama/c/dataflow_c_pretrained/trainer_state.json +++ b/codellama/c/dataflow_c_pretrained/trainer_state.json @@ -1,690 +1,291 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 1.5076373735369968, + "epoch": 1.2058706862356208, "eval_steps": 500, - "global_step": 475, + "global_step": 190, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.015869867089863123, - "grad_norm": 0.058837890625, + "epoch": 0.0317334391114637, + "grad_norm": 0.060546875, "learning_rate": 0.0001, - "loss": 0.769, + "loss": 0.6421, "step": 5 }, { - "epoch": 0.031739734179726246, + "epoch": 0.0634668782229274, "grad_norm": 0.11572265625, "learning_rate": 0.0001, - "loss": 0.615, + "loss": 0.5213, "step": 10 }, { - "epoch": 0.047609601269589366, - "grad_norm": 0.0634765625, + "epoch": 0.09520031733439112, + "grad_norm": 0.08251953125, "learning_rate": 0.0001, - "loss": 0.3973, + "loss": 0.2925, "step": 15 }, { - "epoch": 0.06347946835945249, - "grad_norm": 0.07470703125, + "epoch": 0.1269337564458548, + "grad_norm": 0.0634765625, "learning_rate": 0.0001, - "loss": 0.2804, + "loss": 0.1978, "step": 20 }, { - "epoch": 0.0793493354493156, - "grad_norm": 0.06884765625, + "epoch": 0.15866719555731854, + "grad_norm": 0.08251953125, "learning_rate": 0.0001, - "loss": 0.2244, + "loss": 0.1538, "step": 25 }, { - "epoch": 0.09521920253917873, - "grad_norm": 0.10498046875, + "epoch": 0.19040063466878224, + "grad_norm": 0.10888671875, "learning_rate": 0.0001, - "loss": 0.1925, + "loss": 0.106, "step": 30 }, { - "epoch": 0.11108906962904186, - "grad_norm": 0.08056640625, + "epoch": 0.22213407378024594, + "grad_norm": 0.049560546875, "learning_rate": 0.0001, - "loss": 0.1477, + "loss": 0.0454, "step": 35 }, { - "epoch": 0.12695893671890499, - "grad_norm": 0.0732421875, + "epoch": 0.2538675128917096, + "grad_norm": 0.310546875, "learning_rate": 0.0001, - "loss": 0.0969, + "loss": 0.1215, "step": 40 }, { - "epoch": 0.1428288038087681, - "grad_norm": 0.07568359375, + "epoch": 0.28560095200317337, + "grad_norm": 0.06494140625, "learning_rate": 0.0001, - "loss": 0.0695, + "loss": 0.2476, "step": 45 }, { - "epoch": 0.1586986708986312, - "grad_norm": 0.125, + "epoch": 0.31733439111463707, + "grad_norm": 0.40234375, "learning_rate": 0.0001, - "loss": 0.046, + "loss": 0.1073, "step": 50 }, { - "epoch": 0.17456853798849434, - "grad_norm": 0.0859375, + "epoch": 0.3490678302261008, + "grad_norm": 0.04052734375, "learning_rate": 0.0001, - "loss": 0.4702, + "loss": 0.0863, "step": 55 }, { - "epoch": 0.19043840507835746, - "grad_norm": 0.06787109375, + "epoch": 0.3808012693375645, + "grad_norm": 0.03369140625, "learning_rate": 0.0001, - "loss": 0.2393, + "loss": 0.0671, "step": 60 }, { - "epoch": 0.2063082721682206, - "grad_norm": 0.045166015625, + "epoch": 0.4125347084490282, + "grad_norm": 0.0274658203125, "learning_rate": 0.0001, - "loss": 0.1604, + "loss": 0.0493, "step": 65 }, { - "epoch": 0.22217813925808372, - "grad_norm": 0.04931640625, + "epoch": 0.4442681475604919, + "grad_norm": 0.0277099609375, "learning_rate": 0.0001, - "loss": 0.1499, + "loss": 0.0311, "step": 70 }, { - "epoch": 0.23804800634794684, - "grad_norm": 0.041748046875, + "epoch": 0.4760015866719556, + "grad_norm": 0.01275634765625, "learning_rate": 0.0001, - "loss": 0.123, + "loss": 0.0125, "step": 75 }, { - "epoch": 0.25391787343780997, - "grad_norm": 0.042236328125, + "epoch": 0.5077350257834192, + "grad_norm": 0.06787109375, "learning_rate": 0.0001, - "loss": 0.1056, + "loss": 0.1307, "step": 80 }, { - "epoch": 0.26978774052767307, - "grad_norm": 0.049560546875, + "epoch": 0.539468464894883, + "grad_norm": 0.050048828125, "learning_rate": 0.0001, - "loss": 0.0801, + "loss": 0.171, "step": 85 }, { - "epoch": 0.2856576076175362, - "grad_norm": 0.043212890625, + "epoch": 0.5712019040063467, + "grad_norm": 0.060791015625, "learning_rate": 0.0001, - "loss": 0.0617, + "loss": 0.0818, "step": 90 }, { - "epoch": 0.3015274747073993, - "grad_norm": 0.037109375, + "epoch": 0.6029353431178104, + "grad_norm": 0.033203125, "learning_rate": 0.0001, - "loss": 0.0423, + "loss": 0.0658, "step": 95 }, { - "epoch": 0.3173973417972624, - "grad_norm": 0.028564453125, + "epoch": 0.6346687822292741, + "grad_norm": 0.0235595703125, "learning_rate": 0.0001, - "loss": 0.0295, + "loss": 0.046, "step": 100 }, { - "epoch": 0.3332672088871256, - "grad_norm": 0.0634765625, + "epoch": 0.6664022213407378, + "grad_norm": 0.0299072265625, "learning_rate": 0.0001, - "loss": 0.3494, + "loss": 0.0384, "step": 105 }, { - "epoch": 0.3491370759769887, - "grad_norm": 0.07958984375, + "epoch": 0.6981356604522015, + "grad_norm": 0.0181884765625, "learning_rate": 0.0001, - "loss": 0.1779, + "loss": 0.0187, "step": 110 }, { - "epoch": 0.36500694306685183, - "grad_norm": 0.040283203125, + "epoch": 0.7298690995636652, + "grad_norm": 0.019775390625, "learning_rate": 0.0001, - "loss": 0.1283, + "loss": 0.0095, "step": 115 }, { - "epoch": 0.38087681015671493, - "grad_norm": 0.038818359375, + "epoch": 0.761602538675129, + "grad_norm": 0.060791015625, "learning_rate": 0.0001, - "loss": 0.111, + "loss": 0.1381, "step": 120 }, { - "epoch": 0.3967466772465781, - "grad_norm": 0.048095703125, + "epoch": 0.7933359777865926, + "grad_norm": 0.038818359375, "learning_rate": 0.0001, - "loss": 0.0945, + "loss": 0.1125, "step": 125 }, { - "epoch": 0.4126165443364412, - "grad_norm": 0.06103515625, + "epoch": 0.8250694168980564, + "grad_norm": 0.032958984375, "learning_rate": 0.0001, - "loss": 0.0833, + "loss": 0.062, "step": 130 }, { - "epoch": 0.4284864114263043, - "grad_norm": 0.05859375, + "epoch": 0.85680285600952, + "grad_norm": 0.03173828125, "learning_rate": 0.0001, - "loss": 0.0702, + "loss": 0.0526, "step": 135 }, { - "epoch": 0.44435627851616744, - "grad_norm": 0.060302734375, + "epoch": 0.8885362951209838, + "grad_norm": 0.02392578125, "learning_rate": 0.0001, - "loss": 0.0509, + "loss": 0.0382, "step": 140 }, { - "epoch": 0.46022614560603053, - "grad_norm": 0.042724609375, + "epoch": 0.9202697342324474, + "grad_norm": 0.027099609375, "learning_rate": 0.0001, - "loss": 0.0363, + "loss": 0.027, "step": 145 }, { - "epoch": 0.4760960126958937, - "grad_norm": 0.048583984375, + "epoch": 0.9520031733439112, + "grad_norm": 0.02294921875, "learning_rate": 0.0001, - "loss": 0.0225, + "loss": 0.0115, "step": 150 }, { - "epoch": 0.4919658797857568, - "grad_norm": 0.056396484375, + "epoch": 0.9837366124553748, + "grad_norm": 0.02099609375, "learning_rate": 0.0001, - "loss": 0.3315, + "loss": 0.005, "step": 155 }, { - "epoch": 0.5078357468756199, - "grad_norm": 0.0478515625, + "epoch": 1.0154700515668384, + "grad_norm": 0.0703125, "learning_rate": 0.0001, - "loss": 0.1585, + "loss": 0.1291, "step": 160 }, { - "epoch": 0.523705613965483, - "grad_norm": 0.07177734375, + "epoch": 1.0472034906783023, + "grad_norm": 0.04052734375, "learning_rate": 0.0001, - "loss": 0.1173, + "loss": 0.1033, "step": 165 }, { - "epoch": 0.5395754810553461, - "grad_norm": 0.050537109375, + "epoch": 1.078936929789766, + "grad_norm": 0.03173828125, "learning_rate": 0.0001, - "loss": 0.1054, + "loss": 0.0539, "step": 170 }, { - "epoch": 0.5554453481452093, - "grad_norm": 0.052734375, + "epoch": 1.1106703689012296, + "grad_norm": 0.0299072265625, "learning_rate": 0.0001, - "loss": 0.0828, + "loss": 0.043, "step": 175 }, { - "epoch": 0.5713152152350724, - "grad_norm": 0.05126953125, + "epoch": 1.1424038080126935, + "grad_norm": 0.0262451171875, "learning_rate": 0.0001, - "loss": 0.0778, + "loss": 0.0303, "step": 180 }, { - "epoch": 0.5871850823249355, - "grad_norm": 0.034423828125, + "epoch": 1.1741372471241571, + "grad_norm": 0.060791015625, "learning_rate": 0.0001, - "loss": 0.0632, + "loss": 0.0239, "step": 185 }, { - "epoch": 0.6030549494147986, - "grad_norm": 0.038330078125, + "epoch": 1.2058706862356208, + "grad_norm": 0.015625, "learning_rate": 0.0001, - "loss": 0.042, + "loss": 0.0095, "step": 190 }, { - "epoch": 0.6189248165046618, - "grad_norm": 0.0400390625, - "learning_rate": 0.0001, - "loss": 0.0315, - "step": 195 - }, - { - "epoch": 0.6347946835945248, - "grad_norm": 0.08642578125, - "learning_rate": 0.0001, - "loss": 0.0195, - "step": 200 - }, - { - "epoch": 0.650664550684388, - "grad_norm": 0.07080078125, - "learning_rate": 0.0001, - "loss": 0.3038, - "step": 205 - }, - { - "epoch": 0.6665344177742512, - "grad_norm": 0.0556640625, - "learning_rate": 0.0001, - "loss": 0.1574, - "step": 210 - }, - { - "epoch": 0.6824042848641143, - "grad_norm": 0.054443359375, - "learning_rate": 0.0001, - "loss": 0.1049, - "step": 215 - }, - { - "epoch": 0.6982741519539774, - "grad_norm": 0.052490234375, - "learning_rate": 0.0001, - "loss": 0.0955, - "step": 220 - }, - { - "epoch": 0.7141440190438405, - "grad_norm": 0.046630859375, - "learning_rate": 0.0001, - "loss": 0.0767, - "step": 225 - }, - { - "epoch": 0.7300138861337037, - "grad_norm": 0.052978515625, - "learning_rate": 0.0001, - "loss": 0.0636, - "step": 230 - }, - { - "epoch": 0.7458837532235667, - "grad_norm": 0.0546875, - "learning_rate": 0.0001, - "loss": 0.0584, - "step": 235 - }, - { - "epoch": 0.7617536203134299, - "grad_norm": 0.0546875, - "learning_rate": 0.0001, - "loss": 0.0368, - "step": 240 - }, - { - "epoch": 0.777623487403293, - "grad_norm": 0.035400390625, - "learning_rate": 0.0001, - "loss": 0.0268, - "step": 245 - }, - { - "epoch": 0.7934933544931562, - "grad_norm": 0.03564453125, - "learning_rate": 0.0001, - "loss": 0.0197, - "step": 250 - }, - { - "epoch": 0.8093632215830192, - "grad_norm": 0.0673828125, - "learning_rate": 0.0001, - "loss": 0.264, - "step": 255 - }, - { - "epoch": 0.8252330886728824, - "grad_norm": 0.050048828125, - "learning_rate": 0.0001, - "loss": 0.1382, - "step": 260 - }, - { - "epoch": 0.8411029557627455, - "grad_norm": 0.053955078125, - "learning_rate": 0.0001, - "loss": 0.0959, - "step": 265 - }, - { - "epoch": 0.8569728228526086, - "grad_norm": 0.055908203125, - "learning_rate": 0.0001, - "loss": 0.0986, - "step": 270 - }, - { - "epoch": 0.8728426899424717, - "grad_norm": 0.05322265625, - "learning_rate": 0.0001, - "loss": 0.0806, - "step": 275 - }, - { - "epoch": 0.8887125570323349, - "grad_norm": 0.037109375, - "learning_rate": 0.0001, - "loss": 0.0627, - "step": 280 - }, - { - "epoch": 0.904582424122198, - "grad_norm": 0.035888671875, - "learning_rate": 0.0001, - "loss": 0.0488, - "step": 285 - }, - { - "epoch": 0.9204522912120611, - "grad_norm": 0.049072265625, - "learning_rate": 0.0001, - "loss": 0.0334, - "step": 290 - }, - { - "epoch": 0.9363221583019242, - "grad_norm": 0.042236328125, - "learning_rate": 0.0001, - "loss": 0.0259, - "step": 295 - }, - { - "epoch": 0.9521920253917874, - "grad_norm": 0.02490234375, - "learning_rate": 0.0001, - "loss": 0.0168, - "step": 300 - }, - { - "epoch": 0.9680618924816504, - "grad_norm": 0.07080078125, - "learning_rate": 0.0001, - "loss": 0.1856, - "step": 305 - }, - { - "epoch": 0.9839317595715136, - "grad_norm": 0.09814453125, - "learning_rate": 0.0001, - "loss": 0.0806, - "step": 310 - }, - { - "epoch": 0.9998016266613767, - "grad_norm": 0.0380859375, - "learning_rate": 0.0001, - "loss": 0.0309, - "step": 315 - }, - { - "epoch": 1.0156714937512399, - "grad_norm": 0.07373046875, - "learning_rate": 0.0001, - "loss": 0.2891, - "step": 320 - }, - { - "epoch": 1.031541360841103, - "grad_norm": 0.06982421875, - "learning_rate": 0.0001, - "loss": 0.1519, - "step": 325 - }, - { - "epoch": 1.047411227930966, - "grad_norm": 0.048095703125, - "learning_rate": 0.0001, - "loss": 0.094, - "step": 330 - }, - { - "epoch": 1.0632810950208291, - "grad_norm": 0.051513671875, - "learning_rate": 0.0001, - "loss": 0.0843, - "step": 335 - }, - { - "epoch": 1.0791509621106923, - "grad_norm": 0.0517578125, - "learning_rate": 0.0001, - "loss": 0.0695, - "step": 340 - }, - { - "epoch": 1.0950208292005554, - "grad_norm": 0.04931640625, - "learning_rate": 0.0001, - "loss": 0.0586, - "step": 345 - }, - { - "epoch": 1.1108906962904186, - "grad_norm": 0.06201171875, - "learning_rate": 0.0001, - "loss": 0.0493, - "step": 350 - }, - { - "epoch": 1.1267605633802817, - "grad_norm": 0.0272216796875, - "learning_rate": 0.0001, - "loss": 0.0278, - "step": 355 - }, - { - "epoch": 1.142630430470145, - "grad_norm": 0.05419921875, - "learning_rate": 0.0001, - "loss": 0.0219, - "step": 360 - }, - { - "epoch": 1.1585002975600078, - "grad_norm": 0.07177734375, - "learning_rate": 0.0001, - "loss": 0.015, - "step": 365 - }, - { - "epoch": 1.174370164649871, - "grad_norm": 0.09521484375, - "learning_rate": 0.0001, - "loss": 0.2371, - "step": 370 - }, - { - "epoch": 1.1902400317397341, - "grad_norm": 0.060791015625, - "learning_rate": 0.0001, - "loss": 0.118, - "step": 375 - }, - { - "epoch": 1.2061098988295973, - "grad_norm": 0.059814453125, - "learning_rate": 0.0001, - "loss": 0.0904, - "step": 380 - }, - { - "epoch": 1.2219797659194604, - "grad_norm": 0.051513671875, - "learning_rate": 0.0001, - "loss": 0.079, - "step": 385 - }, - { - "epoch": 1.2378496330093236, - "grad_norm": 0.05126953125, - "learning_rate": 0.0001, - "loss": 0.0618, - "step": 390 - }, - { - "epoch": 1.2537195000991868, - "grad_norm": 0.06982421875, - "learning_rate": 0.0001, - "loss": 0.0501, - "step": 395 - }, - { - "epoch": 1.2695893671890497, - "grad_norm": 0.046142578125, - "learning_rate": 0.0001, - "loss": 0.0404, - "step": 400 - }, - { - "epoch": 1.2854592342789128, - "grad_norm": 0.03564453125, - "learning_rate": 0.0001, - "loss": 0.0295, - "step": 405 - }, - { - "epoch": 1.301329101368776, - "grad_norm": 0.0341796875, - "learning_rate": 0.0001, - "loss": 0.0185, - "step": 410 - }, - { - "epoch": 1.3171989684586392, - "grad_norm": 0.0286865234375, - "learning_rate": 0.0001, - "loss": 0.0123, - "step": 415 - }, - { - "epoch": 1.3330688355485023, - "grad_norm": 0.054931640625, - "learning_rate": 0.0001, - "loss": 0.2018, - "step": 420 - }, - { - "epoch": 1.3489387026383655, - "grad_norm": 0.060302734375, - "learning_rate": 0.0001, - "loss": 0.1189, - "step": 425 - }, - { - "epoch": 1.3648085697282286, - "grad_norm": 0.046630859375, - "learning_rate": 0.0001, - "loss": 0.0821, - "step": 430 - }, - { - "epoch": 1.3806784368180915, - "grad_norm": 0.0576171875, - "learning_rate": 0.0001, - "loss": 0.0759, - "step": 435 - }, - { - "epoch": 1.3965483039079547, - "grad_norm": 0.058349609375, - "learning_rate": 0.0001, - "loss": 0.0567, - "step": 440 - }, - { - "epoch": 1.4124181709978179, - "grad_norm": 0.05908203125, - "learning_rate": 0.0001, - "loss": 0.0435, - "step": 445 - }, - { - "epoch": 1.428288038087681, - "grad_norm": 0.054443359375, - "learning_rate": 0.0001, - "loss": 0.0414, - "step": 450 - }, - { - "epoch": 1.4441579051775442, - "grad_norm": 0.036376953125, - "learning_rate": 0.0001, - "loss": 0.0283, - "step": 455 - }, - { - "epoch": 1.4600277722674073, - "grad_norm": 0.142578125, - "learning_rate": 0.0001, - "loss": 0.0206, - "step": 460 - }, - { - "epoch": 1.4758976393572705, - "grad_norm": 0.044677734375, - "learning_rate": 0.0001, - "loss": 0.0129, - "step": 465 - }, - { - "epoch": 1.4917675064471334, - "grad_norm": 0.07275390625, - "learning_rate": 0.0001, - "loss": 0.2036, - "step": 470 - }, - { - "epoch": 1.5076373735369968, - "grad_norm": 0.052490234375, - "learning_rate": 0.0001, - "loss": 0.1093, - "step": 475 - }, - { - "epoch": 1.5076373735369968, - "step": 475, - "total_flos": 1.4535297138363187e+18, - "train_loss": 0.11740684490454824, - "train_runtime": 39384.0084, - "train_samples_per_second": 0.772, - "train_steps_per_second": 0.012 + "epoch": 1.2058706862356208, + "step": 190, + "total_flos": 1.216645538039931e+18, + "train_loss": 0.10745409297707834, + "train_runtime": 37043.3755, + "train_samples_per_second": 0.657, + "train_steps_per_second": 0.005 } ], "logging_steps": 5, - "max_steps": 475, + "max_steps": 190, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 90, @@ -700,8 +301,8 @@ "attributes": {} } }, - "total_flos": 1.4535297138363187e+18, - "train_batch_size": 4, + "total_flos": 1.216645538039931e+18, + "train_batch_size": 8, "trial_name": null, "trial_params": null } diff --git a/codellama/c/dmcodegen/dmcodegen_base_c/all_results.json b/codellama/c/dmcodegen/dmcodegen_base_c/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..81a7b3a98bd4b836d2774648980f05fe250fbe62 --- /dev/null +++ b/codellama/c/dmcodegen/dmcodegen_base_c/all_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 2.0779220779220777, + "total_flos": 1.8562430640540058e+18, + "train_loss": 0.45838437411520216, + "train_runtime": 56927.0701, + "train_samples_per_second": 0.405, + "train_steps_per_second": 0.003 +} \ No newline at end of file diff --git a/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/README.md b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f701e106913179e53b07103ec61ffc10178fd6c0 --- /dev/null +++ b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/README.md @@ -0,0 +1,202 @@ +--- +base_model: ../CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/adapter_config.json b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e4a9687079697f7e02d95a96f8f3d174b50c1db --- /dev/null +++ b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "../CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "up_proj", + "down_proj", + "k_proj", + "gate_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/adapter_model.safetensors b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..318c25b2c70e071dee17af6d80cdc96260e5dda5 --- /dev/null +++ b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4018337dfdcbc4ca01ac822efa32f00bdad5c13cd3bd3d5c8308564380b060be +size 1156480200 diff --git a/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/adapter_model/README.md b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f701e106913179e53b07103ec61ffc10178fd6c0 --- /dev/null +++ b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/adapter_model/README.md @@ -0,0 +1,202 @@ +--- +base_model: ../CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/adapter_model/adapter_config.json b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e4a9687079697f7e02d95a96f8f3d174b50c1db --- /dev/null +++ b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/adapter_model/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "../CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "up_proj", + "down_proj", + "k_proj", + "gate_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/adapter_model/adapter_model.safetensors b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/adapter_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..318c25b2c70e071dee17af6d80cdc96260e5dda5 --- /dev/null +++ b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/adapter_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4018337dfdcbc4ca01ac822efa32f00bdad5c13cd3bd3d5c8308564380b060be +size 1156480200 diff --git a/codellama/java/dataflow_pretrained/checkpoint-720/added_tokens.json b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/added_tokens.json similarity index 100% rename from codellama/java/dataflow_pretrained/checkpoint-720/added_tokens.json rename to codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/added_tokens.json diff --git a/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/optimizer.pt b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..edf0691b549b5f91fc25c87d89de1305e8f88a4e --- /dev/null +++ b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c311f9837fead460249a52390564c15ad9137ede61358886a6cf8ab7c563896 +size 2003126962 diff --git a/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/rng_state.pth b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..bec7e97107a17ef03e35e48facd6d1094bab3841 --- /dev/null +++ b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09d9a2f5e5c671e23fbed743832c2d77a42f5fdb0981bfd74289171a6b58bdb8 +size 14244 diff --git a/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/scheduler.pt b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cd32f24b55247712dc306a7f48b1e67f9136b26b --- /dev/null +++ b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:244453cd6aad26ed6e8f9d969778193b9354089d8336fe58bfb91c089a53bf6f +size 1064 diff --git a/codellama/java/dataflow_pretrained/checkpoint-720/special_tokens_map.json b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/special_tokens_map.json similarity index 100% rename from codellama/java/dataflow_pretrained/checkpoint-720/special_tokens_map.json rename to codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/special_tokens_map.json diff --git a/codellama/java/dataflow_pretrained/checkpoint-720/tokenizer.model b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/tokenizer.model similarity index 100% rename from codellama/java/dataflow_pretrained/checkpoint-720/tokenizer.model rename to codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/tokenizer.model diff --git a/codellama/java/dataflow_pretrained/checkpoint-720/tokenizer_config.json b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/tokenizer_config.json similarity index 100% rename from codellama/java/dataflow_pretrained/checkpoint-720/tokenizer_config.json rename to codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/tokenizer_config.json diff --git a/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/trainer_state.json b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..15f084256693c84c2283b383b7869144d5d2f569 --- /dev/null +++ b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/trainer_state.json @@ -0,0 +1,285 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0779220779220777, + "eval_steps": 500, + "global_step": 180, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.05772005772005772, + "grad_norm": 0.018310546875, + "learning_rate": 0.0001, + "loss": 0.5558, + "step": 5 + }, + { + "epoch": 0.11544011544011544, + "grad_norm": 0.01544189453125, + "learning_rate": 0.0001, + "loss": 0.4953, + "step": 10 + }, + { + "epoch": 0.17316017316017315, + "grad_norm": 0.01611328125, + "learning_rate": 0.0001, + "loss": 0.4465, + "step": 15 + }, + { + "epoch": 0.23088023088023088, + "grad_norm": 0.0186767578125, + "learning_rate": 0.0001, + "loss": 0.419, + "step": 20 + }, + { + "epoch": 0.2886002886002886, + "grad_norm": 0.022705078125, + "learning_rate": 0.0001, + "loss": 0.5274, + "step": 25 + }, + { + "epoch": 0.3463203463203463, + "grad_norm": 0.020751953125, + "learning_rate": 0.0001, + "loss": 0.5132, + "step": 30 + }, + { + "epoch": 0.40404040404040403, + "grad_norm": 0.0174560546875, + "learning_rate": 0.0001, + "loss": 0.4466, + "step": 35 + }, + { + "epoch": 0.46176046176046176, + "grad_norm": 0.01904296875, + "learning_rate": 0.0001, + "loss": 0.4116, + "step": 40 + }, + { + "epoch": 0.5194805194805194, + "grad_norm": 0.026611328125, + "learning_rate": 0.0001, + "loss": 0.4913, + "step": 45 + }, + { + "epoch": 0.5772005772005772, + "grad_norm": 0.0208740234375, + "learning_rate": 0.0001, + "loss": 0.4982, + "step": 50 + }, + { + "epoch": 0.6349206349206349, + "grad_norm": 0.021484375, + "learning_rate": 0.0001, + "loss": 0.4586, + "step": 55 + }, + { + "epoch": 0.6926406926406926, + "grad_norm": 0.0191650390625, + "learning_rate": 0.0001, + "loss": 0.4132, + "step": 60 + }, + { + "epoch": 0.7503607503607503, + "grad_norm": 0.03125, + "learning_rate": 0.0001, + "loss": 0.445, + "step": 65 + }, + { + "epoch": 0.8080808080808081, + "grad_norm": 0.0242919921875, + "learning_rate": 0.0001, + "loss": 0.5186, + "step": 70 + }, + { + "epoch": 0.8658008658008658, + "grad_norm": 0.022216796875, + "learning_rate": 0.0001, + "loss": 0.463, + "step": 75 + }, + { + "epoch": 0.9235209235209235, + "grad_norm": 0.02001953125, + "learning_rate": 0.0001, + "loss": 0.4233, + "step": 80 + }, + { + "epoch": 0.9812409812409812, + "grad_norm": 0.0299072265625, + "learning_rate": 0.0001, + "loss": 0.4396, + "step": 85 + }, + { + "epoch": 1.0389610389610389, + "grad_norm": 0.02685546875, + "learning_rate": 0.0001, + "loss": 0.4944, + "step": 90 + }, + { + "epoch": 1.0966810966810967, + "grad_norm": 0.0260009765625, + "learning_rate": 0.0001, + "loss": 0.4896, + "step": 95 + }, + { + "epoch": 1.1544011544011543, + "grad_norm": 0.0247802734375, + "learning_rate": 0.0001, + "loss": 0.4402, + "step": 100 + }, + { + "epoch": 1.2121212121212122, + "grad_norm": 0.0240478515625, + "learning_rate": 0.0001, + "loss": 0.3963, + "step": 105 + }, + { + "epoch": 1.2698412698412698, + "grad_norm": 0.037353515625, + "learning_rate": 0.0001, + "loss": 0.4535, + "step": 110 + }, + { + "epoch": 1.3275613275613276, + "grad_norm": 0.032470703125, + "learning_rate": 0.0001, + "loss": 0.5045, + "step": 115 + }, + { + "epoch": 1.3852813852813852, + "grad_norm": 0.0301513671875, + "learning_rate": 0.0001, + "loss": 0.4466, + "step": 120 + }, + { + "epoch": 1.443001443001443, + "grad_norm": 0.0244140625, + "learning_rate": 0.0001, + "loss": 0.4095, + "step": 125 + }, + { + "epoch": 1.5007215007215007, + "grad_norm": 0.046630859375, + "learning_rate": 0.0001, + "loss": 0.4346, + "step": 130 + }, + { + "epoch": 1.5584415584415585, + "grad_norm": 0.0299072265625, + "learning_rate": 0.0001, + "loss": 0.5046, + "step": 135 + }, + { + "epoch": 1.6161616161616161, + "grad_norm": 0.032958984375, + "learning_rate": 0.0001, + "loss": 0.4556, + "step": 140 + }, + { + "epoch": 1.6738816738816737, + "grad_norm": 0.0272216796875, + "learning_rate": 0.0001, + "loss": 0.4245, + "step": 145 + }, + { + "epoch": 1.7316017316017316, + "grad_norm": 0.036865234375, + "learning_rate": 0.0001, + "loss": 0.3834, + "step": 150 + }, + { + "epoch": 1.7893217893217894, + "grad_norm": 0.03662109375, + "learning_rate": 0.0001, + "loss": 0.5163, + "step": 155 + }, + { + "epoch": 1.847041847041847, + "grad_norm": 0.033935546875, + "learning_rate": 0.0001, + "loss": 0.4565, + "step": 160 + }, + { + "epoch": 1.9047619047619047, + "grad_norm": 0.02880859375, + "learning_rate": 0.0001, + "loss": 0.4164, + "step": 165 + }, + { + "epoch": 1.9624819624819625, + "grad_norm": 0.03271484375, + "learning_rate": 0.0001, + "loss": 0.3956, + "step": 170 + }, + { + "epoch": 2.0202020202020203, + "grad_norm": 0.041748046875, + "learning_rate": 0.0001, + "loss": 0.4453, + "step": 175 + }, + { + "epoch": 2.0779220779220777, + "grad_norm": 0.03857421875, + "learning_rate": 0.0001, + "loss": 0.4681, + "step": 180 + } + ], + "logging_steps": 5, + "max_steps": 180, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 180, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.8562430640540058e+18, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/training_args.bin b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d0133396b57a21e0862c112adcc5a95fdd2e21a0 --- /dev/null +++ b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34f792b2641cd9a5b1462d877d12107fd74f2ec203190ed5f66b658484d1e7b3 +size 7416 diff --git a/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/README.md b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f701e106913179e53b07103ec61ffc10178fd6c0 --- /dev/null +++ b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/README.md @@ -0,0 +1,202 @@ +--- +base_model: ../CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/adapter_config.json b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..990f4ef3aba21ef2ebd907f1502b4848d9e6b62e --- /dev/null +++ b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "../CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "up_proj", + "gate_proj", + "q_proj", + "o_proj", + "v_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/adapter_model.safetensors b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3dac47beb474e15b17251aff08416c0421ae1896 --- /dev/null +++ b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b83cea6a8ba3abb411145bffb7c0c1f6cba9fd43721b41ecc8ed32b2c3e179fe +size 1156480200 diff --git a/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/adapter_model/README.md b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f701e106913179e53b07103ec61ffc10178fd6c0 --- /dev/null +++ b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/adapter_model/README.md @@ -0,0 +1,202 @@ +--- +base_model: ../CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/adapter_model/adapter_config.json b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..990f4ef3aba21ef2ebd907f1502b4848d9e6b62e --- /dev/null +++ b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/adapter_model/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "../CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "up_proj", + "gate_proj", + "q_proj", + "o_proj", + "v_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/adapter_model/adapter_model.safetensors b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/adapter_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3dac47beb474e15b17251aff08416c0421ae1896 --- /dev/null +++ b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/adapter_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b83cea6a8ba3abb411145bffb7c0c1f6cba9fd43721b41ecc8ed32b2c3e179fe +size 1156480200 diff --git a/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/added_tokens.json b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..1cbbe5179eb8b5cc46632bbbc00eb51c68847074 --- /dev/null +++ b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/added_tokens.json @@ -0,0 +1,3 @@ +{ + "[PAD]": 32016 +} diff --git a/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/optimizer.pt b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c739c4fd846a2353fb97a0f976077cf2de1b534 --- /dev/null +++ b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:119891f1916a60aabb0c81ceee15f0c8178c275d5b6ef81c9059743549e9653d +size 2003126962 diff --git a/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/rng_state.pth b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..3a5927b8d9d54d38592ba1e6b5779c0e34307afa --- /dev/null +++ b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b04d21a080b5e438b4b32adc506500b95e99b49e74f9d44a991ff92733e72054 +size 14244 diff --git a/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/scheduler.pt b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5088c269cc64d90dc46ecb1a7fb7927ad6415d8 --- /dev/null +++ b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c7d5936e70e72bf0e3651da983818a5b36c8198eb19437975051ad543d68cc9 +size 1064 diff --git a/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/special_tokens_map.json b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..330bb0c14209dcd402b155e7d58c6c2b5210d40d --- /dev/null +++ b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/special_tokens_map.json @@ -0,0 +1,36 @@ +{ + "additional_special_tokens": [ + "▁
",
+    "▁",
+    "▁",
+    "▁"
+  ],
+  "bos_token": {
+    "content": "",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}
diff --git a/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/tokenizer.model b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..f6722e8b170230ebdd4c0f5f2ce03f219be536d4
--- /dev/null
+++ b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6
+size 500058
diff --git a/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/tokenizer_config.json b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8f4094d204e2be0ed7b6bfa83d20cff28326258d
--- /dev/null
+++ b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/tokenizer_config.json
@@ -0,0 +1,94 @@
+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32007": {
+      "content": "▁
",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32008": {
+      "content": "▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32009": {
+      "content": "▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32010": {
+      "content": "▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32016": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "▁
",
+    "▁",
+    "▁",
+    "▁"
+  ],
+  "bos_token": "",
+  "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content | trim + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' '  + content | trim + ' ' + eos_token }}{% endif %}{% endfor %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "",
+  "eot_token": "▁",
+  "fill_token": "",
+  "legacy": null,
+  "middle_token": "▁",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "padding_side": "right",
+  "prefix_token": "▁
",
+  "sp_model_kwargs": {},
+  "suffix_first": false,
+  "suffix_token": "▁",
+  "tokenizer_class": "CodeLlamaTokenizer",
+  "unk_token": "",
+  "use_default_system_prompt": false
+}
diff --git a/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/trainer_state.json b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..d3af4476363e7267883b22db47e17c252b711f60
--- /dev/null
+++ b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/trainer_state.json
@@ -0,0 +1,159 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0389610389610389,
+  "eval_steps": 500,
+  "global_step": 90,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.05772005772005772,
+      "grad_norm": 0.036376953125,
+      "learning_rate": 0.0001,
+      "loss": 0.7562,
+      "step": 5
+    },
+    {
+      "epoch": 0.11544011544011544,
+      "grad_norm": 0.0206298828125,
+      "learning_rate": 0.0001,
+      "loss": 0.5993,
+      "step": 10
+    },
+    {
+      "epoch": 0.17316017316017315,
+      "grad_norm": 0.0244140625,
+      "learning_rate": 0.0001,
+      "loss": 0.5209,
+      "step": 15
+    },
+    {
+      "epoch": 0.23088023088023088,
+      "grad_norm": 0.0673828125,
+      "learning_rate": 0.0001,
+      "loss": 0.5029,
+      "step": 20
+    },
+    {
+      "epoch": 0.2886002886002886,
+      "grad_norm": 0.04150390625,
+      "learning_rate": 0.0001,
+      "loss": 0.622,
+      "step": 25
+    },
+    {
+      "epoch": 0.3463203463203463,
+      "grad_norm": 0.0284423828125,
+      "learning_rate": 0.0001,
+      "loss": 0.5561,
+      "step": 30
+    },
+    {
+      "epoch": 0.40404040404040403,
+      "grad_norm": 0.0189208984375,
+      "learning_rate": 0.0001,
+      "loss": 0.4707,
+      "step": 35
+    },
+    {
+      "epoch": 0.46176046176046176,
+      "grad_norm": 0.019775390625,
+      "learning_rate": 0.0001,
+      "loss": 0.4331,
+      "step": 40
+    },
+    {
+      "epoch": 0.5194805194805194,
+      "grad_norm": 0.0233154296875,
+      "learning_rate": 0.0001,
+      "loss": 0.534,
+      "step": 45
+    },
+    {
+      "epoch": 0.5772005772005772,
+      "grad_norm": 0.0185546875,
+      "learning_rate": 0.0001,
+      "loss": 0.5314,
+      "step": 50
+    },
+    {
+      "epoch": 0.6349206349206349,
+      "grad_norm": 0.0169677734375,
+      "learning_rate": 0.0001,
+      "loss": 0.4802,
+      "step": 55
+    },
+    {
+      "epoch": 0.6926406926406926,
+      "grad_norm": 0.0172119140625,
+      "learning_rate": 0.0001,
+      "loss": 0.4332,
+      "step": 60
+    },
+    {
+      "epoch": 0.7503607503607503,
+      "grad_norm": 0.021728515625,
+      "learning_rate": 0.0001,
+      "loss": 0.4812,
+      "step": 65
+    },
+    {
+      "epoch": 0.8080808080808081,
+      "grad_norm": 0.0155029296875,
+      "learning_rate": 0.0001,
+      "loss": 0.5443,
+      "step": 70
+    },
+    {
+      "epoch": 0.8658008658008658,
+      "grad_norm": 0.0159912109375,
+      "learning_rate": 0.0001,
+      "loss": 0.4805,
+      "step": 75
+    },
+    {
+      "epoch": 0.9235209235209235,
+      "grad_norm": 0.0146484375,
+      "learning_rate": 0.0001,
+      "loss": 0.4377,
+      "step": 80
+    },
+    {
+      "epoch": 0.9812409812409812,
+      "grad_norm": 0.019775390625,
+      "learning_rate": 0.0001,
+      "loss": 0.4548,
+      "step": 85
+    },
+    {
+      "epoch": 1.0389610389610389,
+      "grad_norm": 0.018310546875,
+      "learning_rate": 0.0001,
+      "loss": 0.5309,
+      "step": 90
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 270,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 90,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 9.286644455131546e+17,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/training_args.bin b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a43da9a5221efcb269cce9e179e555fc6074535a
--- /dev/null
+++ b/codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3b5defc64c4beb14d7155cd334736bdf2476d6f1da0691ee9be164112ac9f9c
+size 7416
diff --git a/codellama/java/dataflow_pretrained/completed b/codellama/c/dmcodegen/dmcodegen_base_c/completed
similarity index 100%
rename from codellama/java/dataflow_pretrained/completed
rename to codellama/c/dmcodegen/dmcodegen_base_c/completed
diff --git a/codellama/c/dmcodegen/dmcodegen_base_c/metrics.json b/codellama/c/dmcodegen/dmcodegen_base_c/metrics.json
new file mode 100644
index 0000000000000000000000000000000000000000..8efa7ec3b3d25cd4cf6f1a7de74d1d02ebbbbf46
--- /dev/null
+++ b/codellama/c/dmcodegen/dmcodegen_base_c/metrics.json
@@ -0,0 +1 @@
+{"run_name": "dmcodegen_base_c", "train_runtime": 56927.0701, "train_samples_per_second": 0.405, "train_steps_per_second": 0.003, "total_flos": 1.8562430640540058e+18, "train_loss": 0.45838437411520216, "epoch": 2.0779220779220777}
\ No newline at end of file
diff --git a/codellama/c/dmcodegen/dmcodegen_base_c/train_results.json b/codellama/c/dmcodegen/dmcodegen_base_c/train_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..81a7b3a98bd4b836d2774648980f05fe250fbe62
--- /dev/null
+++ b/codellama/c/dmcodegen/dmcodegen_base_c/train_results.json
@@ -0,0 +1,8 @@
+{
+    "epoch": 2.0779220779220777,
+    "total_flos": 1.8562430640540058e+18,
+    "train_loss": 0.45838437411520216,
+    "train_runtime": 56927.0701,
+    "train_samples_per_second": 0.405,
+    "train_steps_per_second": 0.003
+}
\ No newline at end of file
diff --git a/codellama/c/dmcodegen/dmcodegen_base_c/trainer_state.json b/codellama/c/dmcodegen/dmcodegen_base_c/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..4eeb4d78b6455dc4305baed78d653acd89de8851
--- /dev/null
+++ b/codellama/c/dmcodegen/dmcodegen_base_c/trainer_state.json
@@ -0,0 +1,294 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0779220779220777,
+  "eval_steps": 500,
+  "global_step": 180,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.05772005772005772,
+      "grad_norm": 0.018310546875,
+      "learning_rate": 0.0001,
+      "loss": 0.5558,
+      "step": 5
+    },
+    {
+      "epoch": 0.11544011544011544,
+      "grad_norm": 0.01544189453125,
+      "learning_rate": 0.0001,
+      "loss": 0.4953,
+      "step": 10
+    },
+    {
+      "epoch": 0.17316017316017315,
+      "grad_norm": 0.01611328125,
+      "learning_rate": 0.0001,
+      "loss": 0.4465,
+      "step": 15
+    },
+    {
+      "epoch": 0.23088023088023088,
+      "grad_norm": 0.0186767578125,
+      "learning_rate": 0.0001,
+      "loss": 0.419,
+      "step": 20
+    },
+    {
+      "epoch": 0.2886002886002886,
+      "grad_norm": 0.022705078125,
+      "learning_rate": 0.0001,
+      "loss": 0.5274,
+      "step": 25
+    },
+    {
+      "epoch": 0.3463203463203463,
+      "grad_norm": 0.020751953125,
+      "learning_rate": 0.0001,
+      "loss": 0.5132,
+      "step": 30
+    },
+    {
+      "epoch": 0.40404040404040403,
+      "grad_norm": 0.0174560546875,
+      "learning_rate": 0.0001,
+      "loss": 0.4466,
+      "step": 35
+    },
+    {
+      "epoch": 0.46176046176046176,
+      "grad_norm": 0.01904296875,
+      "learning_rate": 0.0001,
+      "loss": 0.4116,
+      "step": 40
+    },
+    {
+      "epoch": 0.5194805194805194,
+      "grad_norm": 0.026611328125,
+      "learning_rate": 0.0001,
+      "loss": 0.4913,
+      "step": 45
+    },
+    {
+      "epoch": 0.5772005772005772,
+      "grad_norm": 0.0208740234375,
+      "learning_rate": 0.0001,
+      "loss": 0.4982,
+      "step": 50
+    },
+    {
+      "epoch": 0.6349206349206349,
+      "grad_norm": 0.021484375,
+      "learning_rate": 0.0001,
+      "loss": 0.4586,
+      "step": 55
+    },
+    {
+      "epoch": 0.6926406926406926,
+      "grad_norm": 0.0191650390625,
+      "learning_rate": 0.0001,
+      "loss": 0.4132,
+      "step": 60
+    },
+    {
+      "epoch": 0.7503607503607503,
+      "grad_norm": 0.03125,
+      "learning_rate": 0.0001,
+      "loss": 0.445,
+      "step": 65
+    },
+    {
+      "epoch": 0.8080808080808081,
+      "grad_norm": 0.0242919921875,
+      "learning_rate": 0.0001,
+      "loss": 0.5186,
+      "step": 70
+    },
+    {
+      "epoch": 0.8658008658008658,
+      "grad_norm": 0.022216796875,
+      "learning_rate": 0.0001,
+      "loss": 0.463,
+      "step": 75
+    },
+    {
+      "epoch": 0.9235209235209235,
+      "grad_norm": 0.02001953125,
+      "learning_rate": 0.0001,
+      "loss": 0.4233,
+      "step": 80
+    },
+    {
+      "epoch": 0.9812409812409812,
+      "grad_norm": 0.0299072265625,
+      "learning_rate": 0.0001,
+      "loss": 0.4396,
+      "step": 85
+    },
+    {
+      "epoch": 1.0389610389610389,
+      "grad_norm": 0.02685546875,
+      "learning_rate": 0.0001,
+      "loss": 0.4944,
+      "step": 90
+    },
+    {
+      "epoch": 1.0966810966810967,
+      "grad_norm": 0.0260009765625,
+      "learning_rate": 0.0001,
+      "loss": 0.4896,
+      "step": 95
+    },
+    {
+      "epoch": 1.1544011544011543,
+      "grad_norm": 0.0247802734375,
+      "learning_rate": 0.0001,
+      "loss": 0.4402,
+      "step": 100
+    },
+    {
+      "epoch": 1.2121212121212122,
+      "grad_norm": 0.0240478515625,
+      "learning_rate": 0.0001,
+      "loss": 0.3963,
+      "step": 105
+    },
+    {
+      "epoch": 1.2698412698412698,
+      "grad_norm": 0.037353515625,
+      "learning_rate": 0.0001,
+      "loss": 0.4535,
+      "step": 110
+    },
+    {
+      "epoch": 1.3275613275613276,
+      "grad_norm": 0.032470703125,
+      "learning_rate": 0.0001,
+      "loss": 0.5045,
+      "step": 115
+    },
+    {
+      "epoch": 1.3852813852813852,
+      "grad_norm": 0.0301513671875,
+      "learning_rate": 0.0001,
+      "loss": 0.4466,
+      "step": 120
+    },
+    {
+      "epoch": 1.443001443001443,
+      "grad_norm": 0.0244140625,
+      "learning_rate": 0.0001,
+      "loss": 0.4095,
+      "step": 125
+    },
+    {
+      "epoch": 1.5007215007215007,
+      "grad_norm": 0.046630859375,
+      "learning_rate": 0.0001,
+      "loss": 0.4346,
+      "step": 130
+    },
+    {
+      "epoch": 1.5584415584415585,
+      "grad_norm": 0.0299072265625,
+      "learning_rate": 0.0001,
+      "loss": 0.5046,
+      "step": 135
+    },
+    {
+      "epoch": 1.6161616161616161,
+      "grad_norm": 0.032958984375,
+      "learning_rate": 0.0001,
+      "loss": 0.4556,
+      "step": 140
+    },
+    {
+      "epoch": 1.6738816738816737,
+      "grad_norm": 0.0272216796875,
+      "learning_rate": 0.0001,
+      "loss": 0.4245,
+      "step": 145
+    },
+    {
+      "epoch": 1.7316017316017316,
+      "grad_norm": 0.036865234375,
+      "learning_rate": 0.0001,
+      "loss": 0.3834,
+      "step": 150
+    },
+    {
+      "epoch": 1.7893217893217894,
+      "grad_norm": 0.03662109375,
+      "learning_rate": 0.0001,
+      "loss": 0.5163,
+      "step": 155
+    },
+    {
+      "epoch": 1.847041847041847,
+      "grad_norm": 0.033935546875,
+      "learning_rate": 0.0001,
+      "loss": 0.4565,
+      "step": 160
+    },
+    {
+      "epoch": 1.9047619047619047,
+      "grad_norm": 0.02880859375,
+      "learning_rate": 0.0001,
+      "loss": 0.4164,
+      "step": 165
+    },
+    {
+      "epoch": 1.9624819624819625,
+      "grad_norm": 0.03271484375,
+      "learning_rate": 0.0001,
+      "loss": 0.3956,
+      "step": 170
+    },
+    {
+      "epoch": 2.0202020202020203,
+      "grad_norm": 0.041748046875,
+      "learning_rate": 0.0001,
+      "loss": 0.4453,
+      "step": 175
+    },
+    {
+      "epoch": 2.0779220779220777,
+      "grad_norm": 0.03857421875,
+      "learning_rate": 0.0001,
+      "loss": 0.4681,
+      "step": 180
+    },
+    {
+      "epoch": 2.0779220779220777,
+      "step": 180,
+      "total_flos": 1.8562430640540058e+18,
+      "train_loss": 0.45838437411520216,
+      "train_runtime": 56927.0701,
+      "train_samples_per_second": 0.405,
+      "train_steps_per_second": 0.003
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 180,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 180,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.8562430640540058e+18,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_base_java/all_results.json b/codellama/java/codetrans/codetransocean/codetransocean_base_java/all_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..3b669a03e623cd186ec8a3b2b55237d72b2f846d
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_base_java/all_results.json
@@ -0,0 +1,8 @@
+{
+    "epoch": 3.5294117647058822,
+    "total_flos": 3.685541393109811e+17,
+    "train_loss": 0.5357006496853298,
+    "train_runtime": 11174.9028,
+    "train_samples_per_second": 0.515,
+    "train_steps_per_second": 0.004
+}
\ No newline at end of file
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/README.md b/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f701e106913179e53b07103ec61ffc10178fd6c0
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/README.md
@@ -0,0 +1,202 @@
+---
+base_model: ../CodeLlama-13b-Instruct-hf/
+library_name: peft
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] 
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.13.2
\ No newline at end of file
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/adapter_config.json b/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..2ef8f1a20ad1e9d558732a73b15da0bbccfedade
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/adapter_config.json
@@ -0,0 +1,34 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../CodeLlama-13b-Instruct-hf/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "up_proj",
+    "k_proj",
+    "o_proj",
+    "down_proj",
+    "q_proj",
+    "gate_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/adapter_model.safetensors b/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..93d9f00eeb42511b6e4533caa3ad5245535db68c
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2b0445e318d4db848f2144e65f6c1ede906a680d3b66c4aeb106575502d872b2
+size 1156480200
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/adapter_model/README.md b/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/adapter_model/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f701e106913179e53b07103ec61ffc10178fd6c0
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/adapter_model/README.md
@@ -0,0 +1,202 @@
+---
+base_model: ../CodeLlama-13b-Instruct-hf/
+library_name: peft
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] 
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.13.2
\ No newline at end of file
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/adapter_model/adapter_config.json b/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/adapter_model/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..2ef8f1a20ad1e9d558732a73b15da0bbccfedade
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/adapter_model/adapter_config.json
@@ -0,0 +1,34 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../CodeLlama-13b-Instruct-hf/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "up_proj",
+    "k_proj",
+    "o_proj",
+    "down_proj",
+    "q_proj",
+    "gate_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/adapter_model/adapter_model.safetensors b/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/adapter_model/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..93d9f00eeb42511b6e4533caa3ad5245535db68c
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/adapter_model/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2b0445e318d4db848f2144e65f6c1ede906a680d3b66c4aeb106575502d872b2
+size 1156480200
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/added_tokens.json b/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/added_tokens.json
new file mode 100644
index 0000000000000000000000000000000000000000..1cbbe5179eb8b5cc46632bbbc00eb51c68847074
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/added_tokens.json
@@ -0,0 +1,3 @@
+{
+  "[PAD]": 32016
+}
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/optimizer.pt b/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..96ffae124841975958bfda924914974a7da8f03d
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1afba3b9a460b6b2a84fcaf925ae469bb288ebfdfd85c58fa9aec5691012d90d
+size 2003126962
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/rng_state.pth b/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a9570b02680b4230f9ba15ce25da1b40ddba969a
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:585f15ae1d5104d9384b07ae641e0e10926f991dea913b9243bcce14a7965a42
+size 14244
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/scheduler.pt b/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..638774351b15ff484ffc0ca6c82bd744318f1cb0
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e116f8d4565c3c175d4858b7ff08054c3bd13ca8f526b793ad3d5a6f5f8f4fb
+size 1064
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/special_tokens_map.json b/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..330bb0c14209dcd402b155e7d58c6c2b5210d40d
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/special_tokens_map.json
@@ -0,0 +1,36 @@
+{
+  "additional_special_tokens": [
+    "▁
",
+    "▁",
+    "▁",
+    "▁"
+  ],
+  "bos_token": {
+    "content": "",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/tokenizer.model b/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..f6722e8b170230ebdd4c0f5f2ce03f219be536d4
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6
+size 500058
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/tokenizer_config.json b/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8f4094d204e2be0ed7b6bfa83d20cff28326258d
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/tokenizer_config.json
@@ -0,0 +1,94 @@
+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32007": {
+      "content": "▁
",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32008": {
+      "content": "▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32009": {
+      "content": "▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32010": {
+      "content": "▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32016": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "▁
",
+    "▁",
+    "▁",
+    "▁"
+  ],
+  "bos_token": "",
+  "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content | trim + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' '  + content | trim + ' ' + eos_token }}{% endif %}{% endfor %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "",
+  "eot_token": "▁",
+  "fill_token": "",
+  "legacy": null,
+  "middle_token": "▁",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "padding_side": "right",
+  "prefix_token": "▁
",
+  "sp_model_kwargs": {},
+  "suffix_first": false,
+  "suffix_token": "▁",
+  "tokenizer_class": "CodeLlamaTokenizer",
+  "unk_token": "",
+  "use_default_system_prompt": false
+}
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/trainer_state.json b/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..88cd127795af80f7c76a9dee2bc9549425b740d5
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/trainer_state.json
@@ -0,0 +1,96 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.5294117647058822,
+  "eval_steps": 500,
+  "global_step": 45,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.39215686274509803,
+      "grad_norm": 0.034423828125,
+      "learning_rate": 0.0001,
+      "loss": 0.6016,
+      "step": 5
+    },
+    {
+      "epoch": 0.7843137254901961,
+      "grad_norm": 0.0206298828125,
+      "learning_rate": 0.0001,
+      "loss": 0.5999,
+      "step": 10
+    },
+    {
+      "epoch": 1.1764705882352942,
+      "grad_norm": 0.0172119140625,
+      "learning_rate": 0.0001,
+      "loss": 0.5587,
+      "step": 15
+    },
+    {
+      "epoch": 1.5686274509803921,
+      "grad_norm": 0.0162353515625,
+      "learning_rate": 0.0001,
+      "loss": 0.5489,
+      "step": 20
+    },
+    {
+      "epoch": 1.9607843137254903,
+      "grad_norm": 0.0262451171875,
+      "learning_rate": 0.0001,
+      "loss": 0.526,
+      "step": 25
+    },
+    {
+      "epoch": 2.3529411764705883,
+      "grad_norm": 0.01611328125,
+      "learning_rate": 0.0001,
+      "loss": 0.5198,
+      "step": 30
+    },
+    {
+      "epoch": 2.7450980392156863,
+      "grad_norm": 0.041015625,
+      "learning_rate": 0.0001,
+      "loss": 0.5079,
+      "step": 35
+    },
+    {
+      "epoch": 3.1372549019607843,
+      "grad_norm": 0.02197265625,
+      "learning_rate": 0.0001,
+      "loss": 0.5052,
+      "step": 40
+    },
+    {
+      "epoch": 3.5294117647058822,
+      "grad_norm": 0.02978515625,
+      "learning_rate": 0.0001,
+      "loss": 0.4534,
+      "step": 45
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 45,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 180,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3.685541393109811e+17,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/training_args.bin b/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5f0dfcb95b4589ffcb36249b9c81bcd35370adc0
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_base_java/checkpoint-45/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1e016a4c7738d1feeb74102ad9823d33dbace626bb26bab05d8621a3b7111bb
+size 7480
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_base_java/completed b/codellama/java/codetrans/codetransocean/codetransocean_base_java/completed
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_base_java/metrics.json b/codellama/java/codetrans/codetransocean/codetransocean_base_java/metrics.json
new file mode 100644
index 0000000000000000000000000000000000000000..135d7d40a62ebb4cf0456db9a436d2726d7b20a7
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_base_java/metrics.json
@@ -0,0 +1 @@
+{"run_name": "codetransocean_base_java", "train_runtime": 11174.9028, "train_samples_per_second": 0.515, "train_steps_per_second": 0.004, "total_flos": 3.685541393109811e+17, "train_loss": 0.5357006496853298, "epoch": 3.5294117647058822}
\ No newline at end of file
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_base_java/train_results.json b/codellama/java/codetrans/codetransocean/codetransocean_base_java/train_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..3b669a03e623cd186ec8a3b2b55237d72b2f846d
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_base_java/train_results.json
@@ -0,0 +1,8 @@
+{
+    "epoch": 3.5294117647058822,
+    "total_flos": 3.685541393109811e+17,
+    "train_loss": 0.5357006496853298,
+    "train_runtime": 11174.9028,
+    "train_samples_per_second": 0.515,
+    "train_steps_per_second": 0.004
+}
\ No newline at end of file
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_base_java/trainer_state.json b/codellama/java/codetrans/codetransocean/codetransocean_base_java/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..df37e4e267c5f759741a2db5f5d64246aceafca5
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_base_java/trainer_state.json
@@ -0,0 +1,105 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.5294117647058822,
+  "eval_steps": 500,
+  "global_step": 45,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.39215686274509803,
+      "grad_norm": 0.034423828125,
+      "learning_rate": 0.0001,
+      "loss": 0.6016,
+      "step": 5
+    },
+    {
+      "epoch": 0.7843137254901961,
+      "grad_norm": 0.0206298828125,
+      "learning_rate": 0.0001,
+      "loss": 0.5999,
+      "step": 10
+    },
+    {
+      "epoch": 1.1764705882352942,
+      "grad_norm": 0.0172119140625,
+      "learning_rate": 0.0001,
+      "loss": 0.5587,
+      "step": 15
+    },
+    {
+      "epoch": 1.5686274509803921,
+      "grad_norm": 0.0162353515625,
+      "learning_rate": 0.0001,
+      "loss": 0.5489,
+      "step": 20
+    },
+    {
+      "epoch": 1.9607843137254903,
+      "grad_norm": 0.0262451171875,
+      "learning_rate": 0.0001,
+      "loss": 0.526,
+      "step": 25
+    },
+    {
+      "epoch": 2.3529411764705883,
+      "grad_norm": 0.01611328125,
+      "learning_rate": 0.0001,
+      "loss": 0.5198,
+      "step": 30
+    },
+    {
+      "epoch": 2.7450980392156863,
+      "grad_norm": 0.041015625,
+      "learning_rate": 0.0001,
+      "loss": 0.5079,
+      "step": 35
+    },
+    {
+      "epoch": 3.1372549019607843,
+      "grad_norm": 0.02197265625,
+      "learning_rate": 0.0001,
+      "loss": 0.5052,
+      "step": 40
+    },
+    {
+      "epoch": 3.5294117647058822,
+      "grad_norm": 0.02978515625,
+      "learning_rate": 0.0001,
+      "loss": 0.4534,
+      "step": 45
+    },
+    {
+      "epoch": 3.5294117647058822,
+      "step": 45,
+      "total_flos": 3.685541393109811e+17,
+      "train_loss": 0.5357006496853298,
+      "train_runtime": 11174.9028,
+      "train_samples_per_second": 0.515,
+      "train_steps_per_second": 0.004
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 45,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 180,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3.685541393109811e+17,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/all_results.json b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/all_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..e749165f7e71cc7d33038aa6b587c45168c1779d
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/all_results.json
@@ -0,0 +1,8 @@
+{
+    "epoch": 3.5294117647058822,
+    "total_flos": 3.685541393109811e+17,
+    "train_loss": 0.6542216989729139,
+    "train_runtime": 11159.7771,
+    "train_samples_per_second": 0.516,
+    "train_steps_per_second": 0.004
+}
\ No newline at end of file
diff --git a/codellama/c/dataflow_c_pretrained/checkpoint-475/README.md b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/README.md
similarity index 100%
rename from codellama/c/dataflow_c_pretrained/checkpoint-475/README.md
rename to codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/README.md
diff --git a/codellama/c/dataflow_c_pretrained/checkpoint-475/adapter_config.json b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/adapter_config.json
similarity index 96%
rename from codellama/c/dataflow_c_pretrained/checkpoint-475/adapter_config.json
rename to codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/adapter_config.json
index 3bd1bc3d6771bd312ef762b5d7de15f2bf59347b..dccd6b7bf948fe35625c537a5a6a41da3b51f7db 100644
--- a/codellama/c/dataflow_c_pretrained/checkpoint-475/adapter_config.json
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/adapter_config.json
@@ -20,13 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "v_proj",
-    "q_proj",
-    "down_proj",
+    "k_proj",
     "o_proj",
+    "down_proj",
+    "up_proj",
     "gate_proj",
-    "k_proj",
-    "up_proj"
+    "q_proj",
+    "v_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/adapter_model.safetensors b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d6e6200979e0713757ce3dbcaad47629e62d519b
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e56f74b53d9144dd1c04db7b8594194255abb8b796fd695dd3221459f495a5b7
+size 1156480200
diff --git a/codellama/c/dataflow_c_pretrained/checkpoint-475/adapter_model/README.md b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/adapter_model/README.md
similarity index 100%
rename from codellama/c/dataflow_c_pretrained/checkpoint-475/adapter_model/README.md
rename to codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/adapter_model/README.md
diff --git a/codellama/c/dataflow_c_pretrained/checkpoint-475/adapter_model/adapter_config.json b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/adapter_model/adapter_config.json
similarity index 96%
rename from codellama/c/dataflow_c_pretrained/checkpoint-475/adapter_model/adapter_config.json
rename to codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/adapter_model/adapter_config.json
index 3bd1bc3d6771bd312ef762b5d7de15f2bf59347b..dccd6b7bf948fe35625c537a5a6a41da3b51f7db 100644
--- a/codellama/c/dataflow_c_pretrained/checkpoint-475/adapter_model/adapter_config.json
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/adapter_model/adapter_config.json
@@ -20,13 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "v_proj",
-    "q_proj",
-    "down_proj",
+    "k_proj",
     "o_proj",
+    "down_proj",
+    "up_proj",
     "gate_proj",
-    "k_proj",
-    "up_proj"
+    "q_proj",
+    "v_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/adapter_model/adapter_model.safetensors b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/adapter_model/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d6e6200979e0713757ce3dbcaad47629e62d519b
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/adapter_model/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e56f74b53d9144dd1c04db7b8594194255abb8b796fd695dd3221459f495a5b7
+size 1156480200
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/added_tokens.json b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/added_tokens.json
new file mode 100644
index 0000000000000000000000000000000000000000..1cbbe5179eb8b5cc46632bbbc00eb51c68847074
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/added_tokens.json
@@ -0,0 +1,3 @@
+{
+  "[PAD]": 32016
+}
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/optimizer.pt b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0fea6a29dec41c68a4866de5d130623fa4a1908b
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a222c5e451f1714f72c23228bc1066a13381216a60f9f1e5a70f71d7ae83e9e6
+size 2003127538
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/rng_state.pth b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..77dcde2809ddf3bbb0c930b2fd800a8030a028a0
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8fb345cb7f2521e856ece7ee321c0175bfb55c504e153b5f63e8413250253ff6
+size 14244
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/scheduler.pt b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..56a89f3d673b833f4a836161e416e00def3a0060
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25308db591b13d343977ce2367d0ae1afa54461f8777e7abfaef81ec2f99db6d
+size 1064
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/special_tokens_map.json b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..330bb0c14209dcd402b155e7d58c6c2b5210d40d
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/special_tokens_map.json
@@ -0,0 +1,36 @@
+{
+  "additional_special_tokens": [
+    "▁
",
+    "▁",
+    "▁",
+    "▁"
+  ],
+  "bos_token": {
+    "content": "",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/tokenizer.model b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..f6722e8b170230ebdd4c0f5f2ce03f219be536d4
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6
+size 500058
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/tokenizer_config.json b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8f4094d204e2be0ed7b6bfa83d20cff28326258d
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/tokenizer_config.json
@@ -0,0 +1,94 @@
+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32007": {
+      "content": "▁
",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32008": {
+      "content": "▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32009": {
+      "content": "▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32010": {
+      "content": "▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32016": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "▁
",
+    "▁",
+    "▁",
+    "▁"
+  ],
+  "bos_token": "",
+  "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content | trim + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' '  + content | trim + ' ' + eos_token }}{% endif %}{% endfor %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "",
+  "eot_token": "▁",
+  "fill_token": "",
+  "legacy": null,
+  "middle_token": "▁",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "padding_side": "right",
+  "prefix_token": "▁
",
+  "sp_model_kwargs": {},
+  "suffix_first": false,
+  "suffix_token": "▁",
+  "tokenizer_class": "CodeLlamaTokenizer",
+  "unk_token": "",
+  "use_default_system_prompt": false
+}
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/trainer_state.json b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..1362e1a6df3096b377cc79611c3ea3ea6da76bfd
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/trainer_state.json
@@ -0,0 +1,1503 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.5074024226110363,
+  "eval_steps": 500,
+  "global_step": 1050,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.007178106774338268,
+      "grad_norm": 0.65234375,
+      "learning_rate": 0.0001,
+      "loss": 6.8185,
+      "step": 5
+    },
+    {
+      "epoch": 0.014356213548676536,
+      "grad_norm": 0.69921875,
+      "learning_rate": 0.0001,
+      "loss": 5.3587,
+      "step": 10
+    },
+    {
+      "epoch": 0.021534320323014805,
+      "grad_norm": 0.984375,
+      "learning_rate": 0.0001,
+      "loss": 3.9044,
+      "step": 15
+    },
+    {
+      "epoch": 0.028712427097353072,
+      "grad_norm": 0.84765625,
+      "learning_rate": 0.0001,
+      "loss": 2.4036,
+      "step": 20
+    },
+    {
+      "epoch": 0.03589053387169134,
+      "grad_norm": 0.63671875,
+      "learning_rate": 0.0001,
+      "loss": 1.5506,
+      "step": 25
+    },
+    {
+      "epoch": 0.04306864064602961,
+      "grad_norm": 0.44921875,
+      "learning_rate": 0.0001,
+      "loss": 0.8859,
+      "step": 30
+    },
+    {
+      "epoch": 0.05024674742036788,
+      "grad_norm": 0.259765625,
+      "learning_rate": 0.0001,
+      "loss": 0.3927,
+      "step": 35
+    },
+    {
+      "epoch": 0.057424854194706144,
+      "grad_norm": 0.11669921875,
+      "learning_rate": 0.0001,
+      "loss": 0.1452,
+      "step": 40
+    },
+    {
+      "epoch": 0.06460296096904442,
+      "grad_norm": 0.10400390625,
+      "learning_rate": 0.0001,
+      "loss": 0.0693,
+      "step": 45
+    },
+    {
+      "epoch": 0.07178106774338268,
+      "grad_norm": 0.040283203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0279,
+      "step": 50
+    },
+    {
+      "epoch": 0.07895917451772096,
+      "grad_norm": 0.46484375,
+      "learning_rate": 0.0001,
+      "loss": 1.6299,
+      "step": 55
+    },
+    {
+      "epoch": 0.08613728129205922,
+      "grad_norm": 0.201171875,
+      "learning_rate": 0.0001,
+      "loss": 0.9721,
+      "step": 60
+    },
+    {
+      "epoch": 0.09331538806639748,
+      "grad_norm": 0.1953125,
+      "learning_rate": 0.0001,
+      "loss": 0.8273,
+      "step": 65
+    },
+    {
+      "epoch": 0.10049349484073576,
+      "grad_norm": 0.1259765625,
+      "learning_rate": 0.0001,
+      "loss": 0.6694,
+      "step": 70
+    },
+    {
+      "epoch": 0.10767160161507403,
+      "grad_norm": 0.1171875,
+      "learning_rate": 0.0001,
+      "loss": 0.5689,
+      "step": 75
+    },
+    {
+      "epoch": 0.11484970838941229,
+      "grad_norm": 0.1357421875,
+      "learning_rate": 0.0001,
+      "loss": 0.35,
+      "step": 80
+    },
+    {
+      "epoch": 0.12202781516375057,
+      "grad_norm": 0.06640625,
+      "learning_rate": 0.0001,
+      "loss": 0.1548,
+      "step": 85
+    },
+    {
+      "epoch": 0.12920592193808883,
+      "grad_norm": 0.0791015625,
+      "learning_rate": 0.0001,
+      "loss": 0.0625,
+      "step": 90
+    },
+    {
+      "epoch": 0.1363840287124271,
+      "grad_norm": 0.0284423828125,
+      "learning_rate": 0.0001,
+      "loss": 0.0345,
+      "step": 95
+    },
+    {
+      "epoch": 0.14356213548676536,
+      "grad_norm": 0.0654296875,
+      "learning_rate": 0.0001,
+      "loss": 0.0194,
+      "step": 100
+    },
+    {
+      "epoch": 0.15074024226110364,
+      "grad_norm": 0.2490234375,
+      "learning_rate": 0.0001,
+      "loss": 1.1732,
+      "step": 105
+    },
+    {
+      "epoch": 0.1579183490354419,
+      "grad_norm": 0.2099609375,
+      "learning_rate": 0.0001,
+      "loss": 0.87,
+      "step": 110
+    },
+    {
+      "epoch": 0.16509645580978016,
+      "grad_norm": 0.1298828125,
+      "learning_rate": 0.0001,
+      "loss": 0.7213,
+      "step": 115
+    },
+    {
+      "epoch": 0.17227456258411844,
+      "grad_norm": 0.158203125,
+      "learning_rate": 0.0001,
+      "loss": 0.5522,
+      "step": 120
+    },
+    {
+      "epoch": 0.17945266935845672,
+      "grad_norm": 0.1015625,
+      "learning_rate": 0.0001,
+      "loss": 0.4513,
+      "step": 125
+    },
+    {
+      "epoch": 0.18663077613279497,
+      "grad_norm": 0.1064453125,
+      "learning_rate": 0.0001,
+      "loss": 0.2306,
+      "step": 130
+    },
+    {
+      "epoch": 0.19380888290713325,
+      "grad_norm": 0.06591796875,
+      "learning_rate": 0.0001,
+      "loss": 0.0997,
+      "step": 135
+    },
+    {
+      "epoch": 0.20098698968147152,
+      "grad_norm": 0.060546875,
+      "learning_rate": 0.0001,
+      "loss": 0.0362,
+      "step": 140
+    },
+    {
+      "epoch": 0.20816509645580977,
+      "grad_norm": 0.037109375,
+      "learning_rate": 0.0001,
+      "loss": 0.0274,
+      "step": 145
+    },
+    {
+      "epoch": 0.21534320323014805,
+      "grad_norm": 0.0234375,
+      "learning_rate": 0.0001,
+      "loss": 0.0054,
+      "step": 150
+    },
+    {
+      "epoch": 0.22252131000448633,
+      "grad_norm": 0.337890625,
+      "learning_rate": 0.0001,
+      "loss": 1.0624,
+      "step": 155
+    },
+    {
+      "epoch": 0.22969941677882458,
+      "grad_norm": 0.1787109375,
+      "learning_rate": 0.0001,
+      "loss": 0.829,
+      "step": 160
+    },
+    {
+      "epoch": 0.23687752355316286,
+      "grad_norm": 0.15234375,
+      "learning_rate": 0.0001,
+      "loss": 0.6497,
+      "step": 165
+    },
+    {
+      "epoch": 0.24405563032750113,
+      "grad_norm": 0.1142578125,
+      "learning_rate": 0.0001,
+      "loss": 0.5721,
+      "step": 170
+    },
+    {
+      "epoch": 0.2512337371018394,
+      "grad_norm": 0.1533203125,
+      "learning_rate": 0.0001,
+      "loss": 0.4299,
+      "step": 175
+    },
+    {
+      "epoch": 0.25841184387617766,
+      "grad_norm": 0.11962890625,
+      "learning_rate": 0.0001,
+      "loss": 0.2842,
+      "step": 180
+    },
+    {
+      "epoch": 0.26558995065051594,
+      "grad_norm": 0.049560546875,
+      "learning_rate": 0.0001,
+      "loss": 0.1096,
+      "step": 185
+    },
+    {
+      "epoch": 0.2727680574248542,
+      "grad_norm": 0.072265625,
+      "learning_rate": 0.0001,
+      "loss": 0.0362,
+      "step": 190
+    },
+    {
+      "epoch": 0.27994616419919244,
+      "grad_norm": 0.0634765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0188,
+      "step": 195
+    },
+    {
+      "epoch": 0.2871242709735307,
+      "grad_norm": 0.0167236328125,
+      "learning_rate": 0.0001,
+      "loss": 0.0077,
+      "step": 200
+    },
+    {
+      "epoch": 0.294302377747869,
+      "grad_norm": 0.2109375,
+      "learning_rate": 0.0001,
+      "loss": 1.0719,
+      "step": 205
+    },
+    {
+      "epoch": 0.30148048452220727,
+      "grad_norm": 0.1669921875,
+      "learning_rate": 0.0001,
+      "loss": 0.79,
+      "step": 210
+    },
+    {
+      "epoch": 0.30865859129654555,
+      "grad_norm": 0.1328125,
+      "learning_rate": 0.0001,
+      "loss": 0.6307,
+      "step": 215
+    },
+    {
+      "epoch": 0.3158366980708838,
+      "grad_norm": 0.126953125,
+      "learning_rate": 0.0001,
+      "loss": 0.5041,
+      "step": 220
+    },
+    {
+      "epoch": 0.32301480484522205,
+      "grad_norm": 0.1748046875,
+      "learning_rate": 0.0001,
+      "loss": 0.4389,
+      "step": 225
+    },
+    {
+      "epoch": 0.3301929116195603,
+      "grad_norm": 0.1181640625,
+      "learning_rate": 0.0001,
+      "loss": 0.2337,
+      "step": 230
+    },
+    {
+      "epoch": 0.3373710183938986,
+      "grad_norm": 0.08154296875,
+      "learning_rate": 0.0001,
+      "loss": 0.1152,
+      "step": 235
+    },
+    {
+      "epoch": 0.3445491251682369,
+      "grad_norm": 0.038818359375,
+      "learning_rate": 0.0001,
+      "loss": 0.0224,
+      "step": 240
+    },
+    {
+      "epoch": 0.35172723194257516,
+      "grad_norm": 0.0703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0363,
+      "step": 245
+    },
+    {
+      "epoch": 0.35890533871691344,
+      "grad_norm": 0.0400390625,
+      "learning_rate": 0.0001,
+      "loss": 0.0073,
+      "step": 250
+    },
+    {
+      "epoch": 0.36608344549125166,
+      "grad_norm": 0.1650390625,
+      "learning_rate": 0.0001,
+      "loss": 1.0824,
+      "step": 255
+    },
+    {
+      "epoch": 0.37326155226558994,
+      "grad_norm": 0.12890625,
+      "learning_rate": 0.0001,
+      "loss": 0.8525,
+      "step": 260
+    },
+    {
+      "epoch": 0.3804396590399282,
+      "grad_norm": 0.1484375,
+      "learning_rate": 0.0001,
+      "loss": 0.6736,
+      "step": 265
+    },
+    {
+      "epoch": 0.3876177658142665,
+      "grad_norm": 0.16015625,
+      "learning_rate": 0.0001,
+      "loss": 0.5694,
+      "step": 270
+    },
+    {
+      "epoch": 0.39479587258860477,
+      "grad_norm": 0.146484375,
+      "learning_rate": 0.0001,
+      "loss": 0.4329,
+      "step": 275
+    },
+    {
+      "epoch": 0.40197397936294305,
+      "grad_norm": 0.095703125,
+      "learning_rate": 0.0001,
+      "loss": 0.2051,
+      "step": 280
+    },
+    {
+      "epoch": 0.40915208613728127,
+      "grad_norm": 0.130859375,
+      "learning_rate": 0.0001,
+      "loss": 0.1067,
+      "step": 285
+    },
+    {
+      "epoch": 0.41633019291161955,
+      "grad_norm": 0.10302734375,
+      "learning_rate": 0.0001,
+      "loss": 0.0365,
+      "step": 290
+    },
+    {
+      "epoch": 0.4235082996859578,
+      "grad_norm": 0.05126953125,
+      "learning_rate": 0.0001,
+      "loss": 0.0252,
+      "step": 295
+    },
+    {
+      "epoch": 0.4306864064602961,
+      "grad_norm": 0.0029449462890625,
+      "learning_rate": 0.0001,
+      "loss": 0.0046,
+      "step": 300
+    },
+    {
+      "epoch": 0.4378645132346344,
+      "grad_norm": 0.2177734375,
+      "learning_rate": 0.0001,
+      "loss": 1.0461,
+      "step": 305
+    },
+    {
+      "epoch": 0.44504262000897266,
+      "grad_norm": 0.1474609375,
+      "learning_rate": 0.0001,
+      "loss": 0.7834,
+      "step": 310
+    },
+    {
+      "epoch": 0.4522207267833109,
+      "grad_norm": 0.11669921875,
+      "learning_rate": 0.0001,
+      "loss": 0.6162,
+      "step": 315
+    },
+    {
+      "epoch": 0.45939883355764916,
+      "grad_norm": 0.1123046875,
+      "learning_rate": 0.0001,
+      "loss": 0.4886,
+      "step": 320
+    },
+    {
+      "epoch": 0.46657694033198743,
+      "grad_norm": 0.11962890625,
+      "learning_rate": 0.0001,
+      "loss": 0.3858,
+      "step": 325
+    },
+    {
+      "epoch": 0.4737550471063257,
+      "grad_norm": 0.09521484375,
+      "learning_rate": 0.0001,
+      "loss": 0.2249,
+      "step": 330
+    },
+    {
+      "epoch": 0.480933153880664,
+      "grad_norm": 0.061279296875,
+      "learning_rate": 0.0001,
+      "loss": 0.0778,
+      "step": 335
+    },
+    {
+      "epoch": 0.48811126065500227,
+      "grad_norm": 0.04931640625,
+      "learning_rate": 0.0001,
+      "loss": 0.0258,
+      "step": 340
+    },
+    {
+      "epoch": 0.4952893674293405,
+      "grad_norm": 0.0283203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0245,
+      "step": 345
+    },
+    {
+      "epoch": 0.5024674742036788,
+      "grad_norm": 0.0218505859375,
+      "learning_rate": 0.0001,
+      "loss": 0.0108,
+      "step": 350
+    },
+    {
+      "epoch": 0.509645580978017,
+      "grad_norm": 0.2060546875,
+      "learning_rate": 0.0001,
+      "loss": 1.1229,
+      "step": 355
+    },
+    {
+      "epoch": 0.5168236877523553,
+      "grad_norm": 0.130859375,
+      "learning_rate": 0.0001,
+      "loss": 0.7767,
+      "step": 360
+    },
+    {
+      "epoch": 0.5240017945266936,
+      "grad_norm": 0.1162109375,
+      "learning_rate": 0.0001,
+      "loss": 0.6151,
+      "step": 365
+    },
+    {
+      "epoch": 0.5311799013010319,
+      "grad_norm": 0.11767578125,
+      "learning_rate": 0.0001,
+      "loss": 0.4997,
+      "step": 370
+    },
+    {
+      "epoch": 0.5383580080753702,
+      "grad_norm": 0.1181640625,
+      "learning_rate": 0.0001,
+      "loss": 0.3645,
+      "step": 375
+    },
+    {
+      "epoch": 0.5455361148497084,
+      "grad_norm": 0.09228515625,
+      "learning_rate": 0.0001,
+      "loss": 0.2487,
+      "step": 380
+    },
+    {
+      "epoch": 0.5527142216240467,
+      "grad_norm": 0.043212890625,
+      "learning_rate": 0.0001,
+      "loss": 0.1116,
+      "step": 385
+    },
+    {
+      "epoch": 0.5598923283983849,
+      "grad_norm": 0.0262451171875,
+      "learning_rate": 0.0001,
+      "loss": 0.0278,
+      "step": 390
+    },
+    {
+      "epoch": 0.5670704351727232,
+      "grad_norm": 0.048583984375,
+      "learning_rate": 0.0001,
+      "loss": 0.0104,
+      "step": 395
+    },
+    {
+      "epoch": 0.5742485419470614,
+      "grad_norm": 0.0458984375,
+      "learning_rate": 0.0001,
+      "loss": 0.0104,
+      "step": 400
+    },
+    {
+      "epoch": 0.5814266487213997,
+      "grad_norm": 0.1953125,
+      "learning_rate": 0.0001,
+      "loss": 0.9303,
+      "step": 405
+    },
+    {
+      "epoch": 0.588604755495738,
+      "grad_norm": 0.1513671875,
+      "learning_rate": 0.0001,
+      "loss": 0.766,
+      "step": 410
+    },
+    {
+      "epoch": 0.5957828622700763,
+      "grad_norm": 0.130859375,
+      "learning_rate": 0.0001,
+      "loss": 0.5917,
+      "step": 415
+    },
+    {
+      "epoch": 0.6029609690444145,
+      "grad_norm": 0.10595703125,
+      "learning_rate": 0.0001,
+      "loss": 0.5611,
+      "step": 420
+    },
+    {
+      "epoch": 0.6101390758187528,
+      "grad_norm": 0.1220703125,
+      "learning_rate": 0.0001,
+      "loss": 0.3833,
+      "step": 425
+    },
+    {
+      "epoch": 0.6173171825930911,
+      "grad_norm": 0.11865234375,
+      "learning_rate": 0.0001,
+      "loss": 0.2563,
+      "step": 430
+    },
+    {
+      "epoch": 0.6244952893674294,
+      "grad_norm": 0.07568359375,
+      "learning_rate": 0.0001,
+      "loss": 0.1056,
+      "step": 435
+    },
+    {
+      "epoch": 0.6316733961417677,
+      "grad_norm": 0.087890625,
+      "learning_rate": 0.0001,
+      "loss": 0.0343,
+      "step": 440
+    },
+    {
+      "epoch": 0.6388515029161059,
+      "grad_norm": 0.038818359375,
+      "learning_rate": 0.0001,
+      "loss": 0.0113,
+      "step": 445
+    },
+    {
+      "epoch": 0.6460296096904441,
+      "grad_norm": 0.0194091796875,
+      "learning_rate": 0.0001,
+      "loss": 0.0062,
+      "step": 450
+    },
+    {
+      "epoch": 0.6532077164647824,
+      "grad_norm": 0.18359375,
+      "learning_rate": 0.0001,
+      "loss": 0.894,
+      "step": 455
+    },
+    {
+      "epoch": 0.6603858232391207,
+      "grad_norm": 0.158203125,
+      "learning_rate": 0.0001,
+      "loss": 0.7454,
+      "step": 460
+    },
+    {
+      "epoch": 0.6675639300134589,
+      "grad_norm": 0.123046875,
+      "learning_rate": 0.0001,
+      "loss": 0.5539,
+      "step": 465
+    },
+    {
+      "epoch": 0.6747420367877972,
+      "grad_norm": 0.1357421875,
+      "learning_rate": 0.0001,
+      "loss": 0.5263,
+      "step": 470
+    },
+    {
+      "epoch": 0.6819201435621355,
+      "grad_norm": 0.09521484375,
+      "learning_rate": 0.0001,
+      "loss": 0.3882,
+      "step": 475
+    },
+    {
+      "epoch": 0.6890982503364738,
+      "grad_norm": 0.07958984375,
+      "learning_rate": 0.0001,
+      "loss": 0.2243,
+      "step": 480
+    },
+    {
+      "epoch": 0.696276357110812,
+      "grad_norm": 0.08154296875,
+      "learning_rate": 0.0001,
+      "loss": 0.0728,
+      "step": 485
+    },
+    {
+      "epoch": 0.7034544638851503,
+      "grad_norm": 0.048583984375,
+      "learning_rate": 0.0001,
+      "loss": 0.0205,
+      "step": 490
+    },
+    {
+      "epoch": 0.7106325706594886,
+      "grad_norm": 0.06103515625,
+      "learning_rate": 0.0001,
+      "loss": 0.0179,
+      "step": 495
+    },
+    {
+      "epoch": 0.7178106774338269,
+      "grad_norm": 0.031494140625,
+      "learning_rate": 0.0001,
+      "loss": 0.0072,
+      "step": 500
+    },
+    {
+      "epoch": 0.7249887842081651,
+      "grad_norm": 0.2470703125,
+      "learning_rate": 0.0001,
+      "loss": 0.9516,
+      "step": 505
+    },
+    {
+      "epoch": 0.7321668909825033,
+      "grad_norm": 0.1240234375,
+      "learning_rate": 0.0001,
+      "loss": 0.6854,
+      "step": 510
+    },
+    {
+      "epoch": 0.7393449977568416,
+      "grad_norm": 0.1552734375,
+      "learning_rate": 0.0001,
+      "loss": 0.5769,
+      "step": 515
+    },
+    {
+      "epoch": 0.7465231045311799,
+      "grad_norm": 0.11962890625,
+      "learning_rate": 0.0001,
+      "loss": 0.4634,
+      "step": 520
+    },
+    {
+      "epoch": 0.7537012113055181,
+      "grad_norm": 0.11962890625,
+      "learning_rate": 0.0001,
+      "loss": 0.3856,
+      "step": 525
+    },
+    {
+      "epoch": 0.7608793180798564,
+      "grad_norm": 0.10791015625,
+      "learning_rate": 0.0001,
+      "loss": 0.2155,
+      "step": 530
+    },
+    {
+      "epoch": 0.7680574248541947,
+      "grad_norm": 0.0634765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0857,
+      "step": 535
+    },
+    {
+      "epoch": 0.775235531628533,
+      "grad_norm": 0.07861328125,
+      "learning_rate": 0.0001,
+      "loss": 0.0233,
+      "step": 540
+    },
+    {
+      "epoch": 0.7824136384028713,
+      "grad_norm": 0.0281982421875,
+      "learning_rate": 0.0001,
+      "loss": 0.013,
+      "step": 545
+    },
+    {
+      "epoch": 0.7895917451772095,
+      "grad_norm": 0.016845703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0061,
+      "step": 550
+    },
+    {
+      "epoch": 0.7967698519515478,
+      "grad_norm": 0.1796875,
+      "learning_rate": 0.0001,
+      "loss": 0.8853,
+      "step": 555
+    },
+    {
+      "epoch": 0.8039479587258861,
+      "grad_norm": 0.154296875,
+      "learning_rate": 0.0001,
+      "loss": 0.726,
+      "step": 560
+    },
+    {
+      "epoch": 0.8111260655002244,
+      "grad_norm": 0.1328125,
+      "learning_rate": 0.0001,
+      "loss": 0.62,
+      "step": 565
+    },
+    {
+      "epoch": 0.8183041722745625,
+      "grad_norm": 0.126953125,
+      "learning_rate": 0.0001,
+      "loss": 0.5036,
+      "step": 570
+    },
+    {
+      "epoch": 0.8254822790489008,
+      "grad_norm": 0.1279296875,
+      "learning_rate": 0.0001,
+      "loss": 0.4053,
+      "step": 575
+    },
+    {
+      "epoch": 0.8326603858232391,
+      "grad_norm": 0.1142578125,
+      "learning_rate": 0.0001,
+      "loss": 0.2355,
+      "step": 580
+    },
+    {
+      "epoch": 0.8398384925975774,
+      "grad_norm": 0.045654296875,
+      "learning_rate": 0.0001,
+      "loss": 0.0751,
+      "step": 585
+    },
+    {
+      "epoch": 0.8470165993719156,
+      "grad_norm": 0.109375,
+      "learning_rate": 0.0001,
+      "loss": 0.0226,
+      "step": 590
+    },
+    {
+      "epoch": 0.8541947061462539,
+      "grad_norm": 0.027587890625,
+      "learning_rate": 0.0001,
+      "loss": 0.0055,
+      "step": 595
+    },
+    {
+      "epoch": 0.8613728129205922,
+      "grad_norm": 0.05712890625,
+      "learning_rate": 0.0001,
+      "loss": 0.0052,
+      "step": 600
+    },
+    {
+      "epoch": 0.8685509196949305,
+      "grad_norm": 0.1943359375,
+      "learning_rate": 0.0001,
+      "loss": 0.9366,
+      "step": 605
+    },
+    {
+      "epoch": 0.8757290264692688,
+      "grad_norm": 0.1484375,
+      "learning_rate": 0.0001,
+      "loss": 0.7429,
+      "step": 610
+    },
+    {
+      "epoch": 0.882907133243607,
+      "grad_norm": 0.14453125,
+      "learning_rate": 0.0001,
+      "loss": 0.564,
+      "step": 615
+    },
+    {
+      "epoch": 0.8900852400179453,
+      "grad_norm": 0.140625,
+      "learning_rate": 0.0001,
+      "loss": 0.5045,
+      "step": 620
+    },
+    {
+      "epoch": 0.8972633467922836,
+      "grad_norm": 0.1259765625,
+      "learning_rate": 0.0001,
+      "loss": 0.3997,
+      "step": 625
+    },
+    {
+      "epoch": 0.9044414535666218,
+      "grad_norm": 0.0830078125,
+      "learning_rate": 0.0001,
+      "loss": 0.1856,
+      "step": 630
+    },
+    {
+      "epoch": 0.91161956034096,
+      "grad_norm": 0.06298828125,
+      "learning_rate": 0.0001,
+      "loss": 0.0583,
+      "step": 635
+    },
+    {
+      "epoch": 0.9187976671152983,
+      "grad_norm": 0.033935546875,
+      "learning_rate": 0.0001,
+      "loss": 0.0274,
+      "step": 640
+    },
+    {
+      "epoch": 0.9259757738896366,
+      "grad_norm": 0.03271484375,
+      "learning_rate": 0.0001,
+      "loss": 0.0078,
+      "step": 645
+    },
+    {
+      "epoch": 0.9331538806639749,
+      "grad_norm": 0.0244140625,
+      "learning_rate": 0.0001,
+      "loss": 0.003,
+      "step": 650
+    },
+    {
+      "epoch": 0.9403319874383131,
+      "grad_norm": 0.220703125,
+      "learning_rate": 0.0001,
+      "loss": 0.9234,
+      "step": 655
+    },
+    {
+      "epoch": 0.9475100942126514,
+      "grad_norm": 0.1494140625,
+      "learning_rate": 0.0001,
+      "loss": 0.7145,
+      "step": 660
+    },
+    {
+      "epoch": 0.9546882009869897,
+      "grad_norm": 0.138671875,
+      "learning_rate": 0.0001,
+      "loss": 0.5764,
+      "step": 665
+    },
+    {
+      "epoch": 0.961866307761328,
+      "grad_norm": 0.1298828125,
+      "learning_rate": 0.0001,
+      "loss": 0.4568,
+      "step": 670
+    },
+    {
+      "epoch": 0.9690444145356663,
+      "grad_norm": 0.10400390625,
+      "learning_rate": 0.0001,
+      "loss": 0.2681,
+      "step": 675
+    },
+    {
+      "epoch": 0.9762225213100045,
+      "grad_norm": 0.080078125,
+      "learning_rate": 0.0001,
+      "loss": 0.1399,
+      "step": 680
+    },
+    {
+      "epoch": 0.9834006280843428,
+      "grad_norm": 0.068359375,
+      "learning_rate": 0.0001,
+      "loss": 0.0375,
+      "step": 685
+    },
+    {
+      "epoch": 0.990578734858681,
+      "grad_norm": 0.040283203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0108,
+      "step": 690
+    },
+    {
+      "epoch": 0.9977568416330193,
+      "grad_norm": 0.022216796875,
+      "learning_rate": 0.0001,
+      "loss": 0.0082,
+      "step": 695
+    },
+    {
+      "epoch": 1.0049349484073575,
+      "grad_norm": 0.193359375,
+      "learning_rate": 0.0001,
+      "loss": 0.6031,
+      "step": 700
+    },
+    {
+      "epoch": 1.012113055181696,
+      "grad_norm": 0.1640625,
+      "learning_rate": 0.0001,
+      "loss": 0.7291,
+      "step": 705
+    },
+    {
+      "epoch": 1.019291161956034,
+      "grad_norm": 0.1708984375,
+      "learning_rate": 0.0001,
+      "loss": 0.5393,
+      "step": 710
+    },
+    {
+      "epoch": 1.0264692687303723,
+      "grad_norm": 0.1416015625,
+      "learning_rate": 0.0001,
+      "loss": 0.413,
+      "step": 715
+    },
+    {
+      "epoch": 1.0336473755047106,
+      "grad_norm": 0.11669921875,
+      "learning_rate": 0.0001,
+      "loss": 0.3693,
+      "step": 720
+    },
+    {
+      "epoch": 1.0408254822790488,
+      "grad_norm": 0.123046875,
+      "learning_rate": 0.0001,
+      "loss": 0.2104,
+      "step": 725
+    },
+    {
+      "epoch": 1.0480035890533872,
+      "grad_norm": 0.055908203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0834,
+      "step": 730
+    },
+    {
+      "epoch": 1.0551816958277254,
+      "grad_norm": 0.0546875,
+      "learning_rate": 0.0001,
+      "loss": 0.0144,
+      "step": 735
+    },
+    {
+      "epoch": 1.0623598026020638,
+      "grad_norm": 0.11181640625,
+      "learning_rate": 0.0001,
+      "loss": 0.0119,
+      "step": 740
+    },
+    {
+      "epoch": 1.069537909376402,
+      "grad_norm": 0.0034332275390625,
+      "learning_rate": 0.0001,
+      "loss": 0.0023,
+      "step": 745
+    },
+    {
+      "epoch": 1.0767160161507403,
+      "grad_norm": 0.2490234375,
+      "learning_rate": 0.0001,
+      "loss": 0.5662,
+      "step": 750
+    },
+    {
+      "epoch": 1.0838941229250785,
+      "grad_norm": 0.2177734375,
+      "learning_rate": 0.0001,
+      "loss": 0.7079,
+      "step": 755
+    },
+    {
+      "epoch": 1.0910722296994169,
+      "grad_norm": 0.1904296875,
+      "learning_rate": 0.0001,
+      "loss": 0.5619,
+      "step": 760
+    },
+    {
+      "epoch": 1.098250336473755,
+      "grad_norm": 0.12890625,
+      "learning_rate": 0.0001,
+      "loss": 0.4236,
+      "step": 765
+    },
+    {
+      "epoch": 1.1054284432480934,
+      "grad_norm": 0.11328125,
+      "learning_rate": 0.0001,
+      "loss": 0.3422,
+      "step": 770
+    },
+    {
+      "epoch": 1.1126065500224316,
+      "grad_norm": 0.11181640625,
+      "learning_rate": 0.0001,
+      "loss": 0.2757,
+      "step": 775
+    },
+    {
+      "epoch": 1.1197846567967698,
+      "grad_norm": 0.1103515625,
+      "learning_rate": 0.0001,
+      "loss": 0.101,
+      "step": 780
+    },
+    {
+      "epoch": 1.1269627635711081,
+      "grad_norm": 0.0615234375,
+      "learning_rate": 0.0001,
+      "loss": 0.0292,
+      "step": 785
+    },
+    {
+      "epoch": 1.1341408703454463,
+      "grad_norm": 0.01123046875,
+      "learning_rate": 0.0001,
+      "loss": 0.0117,
+      "step": 790
+    },
+    {
+      "epoch": 1.1413189771197847,
+      "grad_norm": 0.0311279296875,
+      "learning_rate": 0.0001,
+      "loss": 0.0068,
+      "step": 795
+    },
+    {
+      "epoch": 1.1484970838941229,
+      "grad_norm": 0.2236328125,
+      "learning_rate": 0.0001,
+      "loss": 0.5275,
+      "step": 800
+    },
+    {
+      "epoch": 1.1556751906684612,
+      "grad_norm": 0.2060546875,
+      "learning_rate": 0.0001,
+      "loss": 0.7151,
+      "step": 805
+    },
+    {
+      "epoch": 1.1628532974427994,
+      "grad_norm": 0.1708984375,
+      "learning_rate": 0.0001,
+      "loss": 0.5625,
+      "step": 810
+    },
+    {
+      "epoch": 1.1700314042171378,
+      "grad_norm": 0.1708984375,
+      "learning_rate": 0.0001,
+      "loss": 0.4765,
+      "step": 815
+    },
+    {
+      "epoch": 1.177209510991476,
+      "grad_norm": 0.1875,
+      "learning_rate": 0.0001,
+      "loss": 0.3728,
+      "step": 820
+    },
+    {
+      "epoch": 1.1843876177658144,
+      "grad_norm": 0.11181640625,
+      "learning_rate": 0.0001,
+      "loss": 0.2169,
+      "step": 825
+    },
+    {
+      "epoch": 1.1915657245401525,
+      "grad_norm": 0.0888671875,
+      "learning_rate": 0.0001,
+      "loss": 0.09,
+      "step": 830
+    },
+    {
+      "epoch": 1.198743831314491,
+      "grad_norm": 0.06396484375,
+      "learning_rate": 0.0001,
+      "loss": 0.0261,
+      "step": 835
+    },
+    {
+      "epoch": 1.205921938088829,
+      "grad_norm": 0.06591796875,
+      "learning_rate": 0.0001,
+      "loss": 0.0169,
+      "step": 840
+    },
+    {
+      "epoch": 1.2131000448631672,
+      "grad_norm": 0.01409912109375,
+      "learning_rate": 0.0001,
+      "loss": 0.0041,
+      "step": 845
+    },
+    {
+      "epoch": 1.2202781516375056,
+      "grad_norm": 0.2265625,
+      "learning_rate": 0.0001,
+      "loss": 0.5508,
+      "step": 850
+    },
+    {
+      "epoch": 1.2274562584118438,
+      "grad_norm": 0.255859375,
+      "learning_rate": 0.0001,
+      "loss": 0.7281,
+      "step": 855
+    },
+    {
+      "epoch": 1.2346343651861822,
+      "grad_norm": 0.212890625,
+      "learning_rate": 0.0001,
+      "loss": 0.499,
+      "step": 860
+    },
+    {
+      "epoch": 1.2418124719605204,
+      "grad_norm": 0.1767578125,
+      "learning_rate": 0.0001,
+      "loss": 0.5054,
+      "step": 865
+    },
+    {
+      "epoch": 1.2489905787348587,
+      "grad_norm": 0.1513671875,
+      "learning_rate": 0.0001,
+      "loss": 0.3918,
+      "step": 870
+    },
+    {
+      "epoch": 1.256168685509197,
+      "grad_norm": 0.1318359375,
+      "learning_rate": 0.0001,
+      "loss": 0.2211,
+      "step": 875
+    },
+    {
+      "epoch": 1.263346792283535,
+      "grad_norm": 0.053955078125,
+      "learning_rate": 0.0001,
+      "loss": 0.099,
+      "step": 880
+    },
+    {
+      "epoch": 1.2705248990578735,
+      "grad_norm": 0.0263671875,
+      "learning_rate": 0.0001,
+      "loss": 0.0239,
+      "step": 885
+    },
+    {
+      "epoch": 1.2777030058322119,
+      "grad_norm": 0.055908203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0203,
+      "step": 890
+    },
+    {
+      "epoch": 1.28488111260655,
+      "grad_norm": 0.0172119140625,
+      "learning_rate": 0.0001,
+      "loss": 0.0053,
+      "step": 895
+    },
+    {
+      "epoch": 1.2920592193808882,
+      "grad_norm": 0.1943359375,
+      "learning_rate": 0.0001,
+      "loss": 0.4856,
+      "step": 900
+    },
+    {
+      "epoch": 1.2992373261552266,
+      "grad_norm": 0.2138671875,
+      "learning_rate": 0.0001,
+      "loss": 0.7204,
+      "step": 905
+    },
+    {
+      "epoch": 1.3064154329295647,
+      "grad_norm": 0.19140625,
+      "learning_rate": 0.0001,
+      "loss": 0.5374,
+      "step": 910
+    },
+    {
+      "epoch": 1.3135935397039031,
+      "grad_norm": 0.216796875,
+      "learning_rate": 0.0001,
+      "loss": 0.48,
+      "step": 915
+    },
+    {
+      "epoch": 1.3207716464782413,
+      "grad_norm": 0.19921875,
+      "learning_rate": 0.0001,
+      "loss": 0.3897,
+      "step": 920
+    },
+    {
+      "epoch": 1.3279497532525797,
+      "grad_norm": 0.10205078125,
+      "learning_rate": 0.0001,
+      "loss": 0.2242,
+      "step": 925
+    },
+    {
+      "epoch": 1.3351278600269179,
+      "grad_norm": 0.08251953125,
+      "learning_rate": 0.0001,
+      "loss": 0.1292,
+      "step": 930
+    },
+    {
+      "epoch": 1.3423059668012562,
+      "grad_norm": 0.068359375,
+      "learning_rate": 0.0001,
+      "loss": 0.0242,
+      "step": 935
+    },
+    {
+      "epoch": 1.3494840735755944,
+      "grad_norm": 0.08544921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0092,
+      "step": 940
+    },
+    {
+      "epoch": 1.3566621803499328,
+      "grad_norm": 0.004241943359375,
+      "learning_rate": 0.0001,
+      "loss": 0.0039,
+      "step": 945
+    },
+    {
+      "epoch": 1.363840287124271,
+      "grad_norm": 0.25390625,
+      "learning_rate": 0.0001,
+      "loss": 0.5465,
+      "step": 950
+    },
+    {
+      "epoch": 1.3710183938986091,
+      "grad_norm": 0.2412109375,
+      "learning_rate": 0.0001,
+      "loss": 0.6114,
+      "step": 955
+    },
+    {
+      "epoch": 1.3781965006729475,
+      "grad_norm": 0.244140625,
+      "learning_rate": 0.0001,
+      "loss": 0.5226,
+      "step": 960
+    },
+    {
+      "epoch": 1.385374607447286,
+      "grad_norm": 0.205078125,
+      "learning_rate": 0.0001,
+      "loss": 0.4234,
+      "step": 965
+    },
+    {
+      "epoch": 1.392552714221624,
+      "grad_norm": 0.130859375,
+      "learning_rate": 0.0001,
+      "loss": 0.3595,
+      "step": 970
+    },
+    {
+      "epoch": 1.3997308209959622,
+      "grad_norm": 0.123046875,
+      "learning_rate": 0.0001,
+      "loss": 0.2464,
+      "step": 975
+    },
+    {
+      "epoch": 1.4069089277703006,
+      "grad_norm": 0.11767578125,
+      "learning_rate": 0.0001,
+      "loss": 0.11,
+      "step": 980
+    },
+    {
+      "epoch": 1.4140870345446388,
+      "grad_norm": 0.05322265625,
+      "learning_rate": 0.0001,
+      "loss": 0.0205,
+      "step": 985
+    },
+    {
+      "epoch": 1.4212651413189772,
+      "grad_norm": 0.0206298828125,
+      "learning_rate": 0.0001,
+      "loss": 0.0102,
+      "step": 990
+    },
+    {
+      "epoch": 1.4284432480933154,
+      "grad_norm": 0.0250244140625,
+      "learning_rate": 0.0001,
+      "loss": 0.0044,
+      "step": 995
+    },
+    {
+      "epoch": 1.4356213548676537,
+      "grad_norm": 0.23046875,
+      "learning_rate": 0.0001,
+      "loss": 0.4827,
+      "step": 1000
+    },
+    {
+      "epoch": 1.442799461641992,
+      "grad_norm": 0.2314453125,
+      "learning_rate": 0.0001,
+      "loss": 0.6536,
+      "step": 1005
+    },
+    {
+      "epoch": 1.44997756841633,
+      "grad_norm": 0.1953125,
+      "learning_rate": 0.0001,
+      "loss": 0.5993,
+      "step": 1010
+    },
+    {
+      "epoch": 1.4571556751906685,
+      "grad_norm": 0.158203125,
+      "learning_rate": 0.0001,
+      "loss": 0.4176,
+      "step": 1015
+    },
+    {
+      "epoch": 1.4643337819650069,
+      "grad_norm": 0.1689453125,
+      "learning_rate": 0.0001,
+      "loss": 0.307,
+      "step": 1020
+    },
+    {
+      "epoch": 1.471511888739345,
+      "grad_norm": 0.1005859375,
+      "learning_rate": 0.0001,
+      "loss": 0.2381,
+      "step": 1025
+    },
+    {
+      "epoch": 1.4786899955136832,
+      "grad_norm": 0.06396484375,
+      "learning_rate": 0.0001,
+      "loss": 0.084,
+      "step": 1030
+    },
+    {
+      "epoch": 1.4858681022880216,
+      "grad_norm": 0.01153564453125,
+      "learning_rate": 0.0001,
+      "loss": 0.0165,
+      "step": 1035
+    },
+    {
+      "epoch": 1.4930462090623597,
+      "grad_norm": 0.0283203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0059,
+      "step": 1040
+    },
+    {
+      "epoch": 1.500224315836698,
+      "grad_norm": 0.0380859375,
+      "learning_rate": 0.0001,
+      "loss": 0.0051,
+      "step": 1045
+    },
+    {
+      "epoch": 1.5074024226110363,
+      "grad_norm": 0.296875,
+      "learning_rate": 0.0001,
+      "loss": 0.5321,
+      "step": 1050
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 1050,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 90,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 4.83809405232513e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/training_args.bin b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f96c3a448688b1b9bdf6bb55e263846630401def
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-1050/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:337c706998f7adefea3d36fb9751185f26f30ea6bf7ce24cfb830dd973c3fe15
+size 7416
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/README.md b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f701e106913179e53b07103ec61ffc10178fd6c0
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/README.md
@@ -0,0 +1,202 @@
+---
+base_model: ../CodeLlama-13b-Instruct-hf/
+library_name: peft
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] 
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.13.2
\ No newline at end of file
diff --git a/codellama/java/dataflow_pretrained/checkpoint-720/adapter_config.json b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/adapter_config.json
similarity index 100%
rename from codellama/java/dataflow_pretrained/checkpoint-720/adapter_config.json
rename to codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/adapter_config.json
index 725feabb5b40786c81604df22999d165641e135e..cee555b68bda5470d8924cc5b423788b8994d8df 100644
--- a/codellama/java/dataflow_pretrained/checkpoint-720/adapter_config.json
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/adapter_config.json
@@ -20,12 +20,12 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "gate_proj",
+    "v_proj",
     "up_proj",
     "o_proj",
     "down_proj",
-    "v_proj",
     "k_proj",
+    "gate_proj",
     "q_proj"
   ],
   "task_type": "CAUSAL_LM",
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/adapter_model.safetensors b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..82d1d0387c411fdab4f59437a5e07c8bd16bd471
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f95b4bd870af822f158c83c9f22dff6c5cff42df4e81335c99eacd38d76426b5
+size 500771216
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/adapter_model/README.md b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/adapter_model/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f701e106913179e53b07103ec61ffc10178fd6c0
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/adapter_model/README.md
@@ -0,0 +1,202 @@
+---
+base_model: ../CodeLlama-13b-Instruct-hf/
+library_name: peft
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] 
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.13.2
\ No newline at end of file
diff --git a/codellama/java/dataflow_pretrained/checkpoint-720/adapter_model/adapter_config.json b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/adapter_model/adapter_config.json
similarity index 100%
rename from codellama/java/dataflow_pretrained/checkpoint-720/adapter_model/adapter_config.json
rename to codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/adapter_model/adapter_config.json
index 725feabb5b40786c81604df22999d165641e135e..cee555b68bda5470d8924cc5b423788b8994d8df 100644
--- a/codellama/java/dataflow_pretrained/checkpoint-720/adapter_model/adapter_config.json
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/adapter_model/adapter_config.json
@@ -20,12 +20,12 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "gate_proj",
+    "v_proj",
     "up_proj",
     "o_proj",
     "down_proj",
-    "v_proj",
     "k_proj",
+    "gate_proj",
     "q_proj"
   ],
   "task_type": "CAUSAL_LM",
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/adapter_model/adapter_model.safetensors b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/adapter_model/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..82d1d0387c411fdab4f59437a5e07c8bd16bd471
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/adapter_model/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f95b4bd870af822f158c83c9f22dff6c5cff42df4e81335c99eacd38d76426b5
+size 500771216
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/added_tokens.json b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/added_tokens.json
new file mode 100644
index 0000000000000000000000000000000000000000..1cbbe5179eb8b5cc46632bbbc00eb51c68847074
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/added_tokens.json
@@ -0,0 +1,3 @@
+{
+  "[PAD]": 32016
+}
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/optimizer.pt b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..880329d5912840df232d60876f72dcf479acbbc1
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7f746b6db1789ba6b768ae40c2e2a4153017b0276da6498aac4130f40e7381bf
+size 2003126962
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/rng_state.pth b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a9570b02680b4230f9ba15ce25da1b40ddba969a
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:585f15ae1d5104d9384b07ae641e0e10926f991dea913b9243bcce14a7965a42
+size 14244
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/scheduler.pt b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..638774351b15ff484ffc0ca6c82bd744318f1cb0
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e116f8d4565c3c175d4858b7ff08054c3bd13ca8f526b793ad3d5a6f5f8f4fb
+size 1064
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/special_tokens_map.json b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..330bb0c14209dcd402b155e7d58c6c2b5210d40d
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/special_tokens_map.json
@@ -0,0 +1,36 @@
+{
+  "additional_special_tokens": [
+    "▁
",
+    "▁",
+    "▁",
+    "▁"
+  ],
+  "bos_token": {
+    "content": "",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/tokenizer.model b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..f6722e8b170230ebdd4c0f5f2ce03f219be536d4
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6
+size 500058
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/tokenizer_config.json b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8f4094d204e2be0ed7b6bfa83d20cff28326258d
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/tokenizer_config.json
@@ -0,0 +1,94 @@
+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32007": {
+      "content": "▁
",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32008": {
+      "content": "▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32009": {
+      "content": "▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32010": {
+      "content": "▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32016": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "▁
",
+    "▁",
+    "▁",
+    "▁"
+  ],
+  "bos_token": "",
+  "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content | trim + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' '  + content | trim + ' ' + eos_token }}{% endif %}{% endfor %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "",
+  "eot_token": "▁",
+  "fill_token": "",
+  "legacy": null,
+  "middle_token": "▁",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "padding_side": "right",
+  "prefix_token": "▁
",
+  "sp_model_kwargs": {},
+  "suffix_first": false,
+  "suffix_token": "▁",
+  "tokenizer_class": "CodeLlamaTokenizer",
+  "unk_token": "",
+  "use_default_system_prompt": false
+}
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/trainer_state.json b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..4de8b5c0fc2f6179b0cc90e63ef574698c469a7e
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/trainer_state.json
@@ -0,0 +1,96 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.5294117647058822,
+  "eval_steps": 500,
+  "global_step": 45,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.39215686274509803,
+      "grad_norm": 0.05810546875,
+      "learning_rate": 0.0001,
+      "loss": 1.5487,
+      "step": 5
+    },
+    {
+      "epoch": 0.7843137254901961,
+      "grad_norm": 0.042724609375,
+      "learning_rate": 0.0001,
+      "loss": 0.619,
+      "step": 10
+    },
+    {
+      "epoch": 1.1764705882352942,
+      "grad_norm": 0.02197265625,
+      "learning_rate": 0.0001,
+      "loss": 0.5684,
+      "step": 15
+    },
+    {
+      "epoch": 1.5686274509803921,
+      "grad_norm": 0.0184326171875,
+      "learning_rate": 0.0001,
+      "loss": 0.559,
+      "step": 20
+    },
+    {
+      "epoch": 1.9607843137254903,
+      "grad_norm": 0.027099609375,
+      "learning_rate": 0.0001,
+      "loss": 0.5347,
+      "step": 25
+    },
+    {
+      "epoch": 2.3529411764705883,
+      "grad_norm": 0.0172119140625,
+      "learning_rate": 0.0001,
+      "loss": 0.5314,
+      "step": 30
+    },
+    {
+      "epoch": 2.7450980392156863,
+      "grad_norm": 0.02783203125,
+      "learning_rate": 0.0001,
+      "loss": 0.5225,
+      "step": 35
+    },
+    {
+      "epoch": 3.1372549019607843,
+      "grad_norm": 0.017333984375,
+      "learning_rate": 0.0001,
+      "loss": 0.525,
+      "step": 40
+    },
+    {
+      "epoch": 3.5294117647058822,
+      "grad_norm": 0.0228271484375,
+      "learning_rate": 0.0001,
+      "loss": 0.4793,
+      "step": 45
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 45,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 180,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3.685541393109811e+17,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/training_args.bin b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f1ec5c4a43c1dbbd247177f67ddc8c5e7f0dc7dd
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/checkpoint-45/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f2abb1148c0ef26127fb936df2993d025cb794302daf263027e6201a3c124bd8
+size 7480
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/completed b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/completed
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/metrics.json b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/metrics.json
new file mode 100644
index 0000000000000000000000000000000000000000..560f1f85927011573afbb204e080d78e9c572052
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/metrics.json
@@ -0,0 +1 @@
+{"run_name": "codetransocean_callgraph_java", "train_runtime": 11159.7771, "train_samples_per_second": 0.516, "train_steps_per_second": 0.004, "total_flos": 3.685541393109811e+17, "train_loss": 0.6542216989729139, "epoch": 3.5294117647058822}
\ No newline at end of file
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/train_results.json b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/train_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..e749165f7e71cc7d33038aa6b587c45168c1779d
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/train_results.json
@@ -0,0 +1,8 @@
+{
+    "epoch": 3.5294117647058822,
+    "total_flos": 3.685541393109811e+17,
+    "train_loss": 0.6542216989729139,
+    "train_runtime": 11159.7771,
+    "train_samples_per_second": 0.516,
+    "train_steps_per_second": 0.004
+}
\ No newline at end of file
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/trainer_state.json b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..30335aceac10055f633a314b3276fda1b7c4df5b
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_callgraph_java/trainer_state.json
@@ -0,0 +1,105 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.5294117647058822,
+  "eval_steps": 500,
+  "global_step": 45,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.39215686274509803,
+      "grad_norm": 0.05810546875,
+      "learning_rate": 0.0001,
+      "loss": 1.5487,
+      "step": 5
+    },
+    {
+      "epoch": 0.7843137254901961,
+      "grad_norm": 0.042724609375,
+      "learning_rate": 0.0001,
+      "loss": 0.619,
+      "step": 10
+    },
+    {
+      "epoch": 1.1764705882352942,
+      "grad_norm": 0.02197265625,
+      "learning_rate": 0.0001,
+      "loss": 0.5684,
+      "step": 15
+    },
+    {
+      "epoch": 1.5686274509803921,
+      "grad_norm": 0.0184326171875,
+      "learning_rate": 0.0001,
+      "loss": 0.559,
+      "step": 20
+    },
+    {
+      "epoch": 1.9607843137254903,
+      "grad_norm": 0.027099609375,
+      "learning_rate": 0.0001,
+      "loss": 0.5347,
+      "step": 25
+    },
+    {
+      "epoch": 2.3529411764705883,
+      "grad_norm": 0.0172119140625,
+      "learning_rate": 0.0001,
+      "loss": 0.5314,
+      "step": 30
+    },
+    {
+      "epoch": 2.7450980392156863,
+      "grad_norm": 0.02783203125,
+      "learning_rate": 0.0001,
+      "loss": 0.5225,
+      "step": 35
+    },
+    {
+      "epoch": 3.1372549019607843,
+      "grad_norm": 0.017333984375,
+      "learning_rate": 0.0001,
+      "loss": 0.525,
+      "step": 40
+    },
+    {
+      "epoch": 3.5294117647058822,
+      "grad_norm": 0.0228271484375,
+      "learning_rate": 0.0001,
+      "loss": 0.4793,
+      "step": 45
+    },
+    {
+      "epoch": 3.5294117647058822,
+      "step": 45,
+      "total_flos": 3.685541393109811e+17,
+      "train_loss": 0.6542216989729139,
+      "train_runtime": 11159.7771,
+      "train_samples_per_second": 0.516,
+      "train_steps_per_second": 0.004
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 45,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 180,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3.685541393109811e+17,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/all_results.json b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/all_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..98dc6c2ee4518eb33b44b76dccac890ed4634be6
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/all_results.json
@@ -0,0 +1,8 @@
+{
+    "epoch": 3.5294117647058822,
+    "total_flos": 3.685541393109811e+17,
+    "train_loss": 0.5500616497463651,
+    "train_runtime": 9018.8961,
+    "train_samples_per_second": 0.639,
+    "train_steps_per_second": 0.005
+}
\ No newline at end of file
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/README.md b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b1f6f9e34a7687a5dc07012d16ff7b343f3bade4
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/README.md
@@ -0,0 +1,202 @@
+---
+base_model: ./CodeLlama-13b-Instruct-hf/
+library_name: peft
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] 
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.13.2
\ No newline at end of file
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/adapter_config.json b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..4d8560bd5b632071846880bde6dbf4ca8a6525b5
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/adapter_config.json
@@ -0,0 +1,34 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../CodeLlama-13b-Instruct-hf/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "down_proj",
+    "k_proj",
+    "o_proj",
+    "up_proj",
+    "q_proj",
+    "gate_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/adapter_model.safetensors b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..929176ddb1863bc93f58e01647be196697a68b00
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fea2d01446692a9772bedb977099704c0a8bda8d4e358da20f64e1bf614deda2
+size 500771216
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/adapter_model/README.md b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/adapter_model/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b1f6f9e34a7687a5dc07012d16ff7b343f3bade4
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/adapter_model/README.md
@@ -0,0 +1,202 @@
+---
+base_model: ./CodeLlama-13b-Instruct-hf/
+library_name: peft
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] 
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.13.2
\ No newline at end of file
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/adapter_model/adapter_config.json b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/adapter_model/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..4d8560bd5b632071846880bde6dbf4ca8a6525b5
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/adapter_model/adapter_config.json
@@ -0,0 +1,34 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../CodeLlama-13b-Instruct-hf/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "down_proj",
+    "k_proj",
+    "o_proj",
+    "up_proj",
+    "q_proj",
+    "gate_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/adapter_model/adapter_model.safetensors b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/adapter_model/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..929176ddb1863bc93f58e01647be196697a68b00
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/adapter_model/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fea2d01446692a9772bedb977099704c0a8bda8d4e358da20f64e1bf614deda2
+size 500771216
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/added_tokens.json b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/added_tokens.json
new file mode 100644
index 0000000000000000000000000000000000000000..1cbbe5179eb8b5cc46632bbbc00eb51c68847074
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/added_tokens.json
@@ -0,0 +1,3 @@
+{
+  "[PAD]": 32016
+}
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/optimizer.pt b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c4fe2901bbc9f3c11c1ec6ca4896148b6d0defae
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7972e2c47c4a9adb53fa8be0134f39784457d793d7ff3466aa973c6442c233c9
+size 2003126962
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/rng_state.pth b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a9570b02680b4230f9ba15ce25da1b40ddba969a
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:585f15ae1d5104d9384b07ae641e0e10926f991dea913b9243bcce14a7965a42
+size 14244
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/scheduler.pt b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..638774351b15ff484ffc0ca6c82bd744318f1cb0
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e116f8d4565c3c175d4858b7ff08054c3bd13ca8f526b793ad3d5a6f5f8f4fb
+size 1064
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/special_tokens_map.json b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..330bb0c14209dcd402b155e7d58c6c2b5210d40d
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/special_tokens_map.json
@@ -0,0 +1,36 @@
+{
+  "additional_special_tokens": [
+    "▁
",
+    "▁",
+    "▁",
+    "▁"
+  ],
+  "bos_token": {
+    "content": "",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/tokenizer.model b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..f6722e8b170230ebdd4c0f5f2ce03f219be536d4
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6
+size 500058
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/tokenizer_config.json b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8f4094d204e2be0ed7b6bfa83d20cff28326258d
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/tokenizer_config.json
@@ -0,0 +1,94 @@
+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32007": {
+      "content": "▁
",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32008": {
+      "content": "▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32009": {
+      "content": "▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32010": {
+      "content": "▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32016": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "▁
",
+    "▁",
+    "▁",
+    "▁"
+  ],
+  "bos_token": "",
+  "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content | trim + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' '  + content | trim + ' ' + eos_token }}{% endif %}{% endfor %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "",
+  "eot_token": "▁",
+  "fill_token": "",
+  "legacy": null,
+  "middle_token": "▁",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "padding_side": "right",
+  "prefix_token": "▁
",
+  "sp_model_kwargs": {},
+  "suffix_first": false,
+  "suffix_token": "▁",
+  "tokenizer_class": "CodeLlamaTokenizer",
+  "unk_token": "",
+  "use_default_system_prompt": false
+}
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/trainer_state.json b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..48fe3bf0592e92dece03986677dd34e5564b375a
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/trainer_state.json
@@ -0,0 +1,96 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.5294117647058822,
+  "eval_steps": 500,
+  "global_step": 45,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.39215686274509803,
+      "grad_norm": 0.0654296875,
+      "learning_rate": 0.0001,
+      "loss": 0.6657,
+      "step": 5
+    },
+    {
+      "epoch": 0.7843137254901961,
+      "grad_norm": 0.0291748046875,
+      "learning_rate": 0.0001,
+      "loss": 0.599,
+      "step": 10
+    },
+    {
+      "epoch": 1.1764705882352942,
+      "grad_norm": 0.0203857421875,
+      "learning_rate": 0.0001,
+      "loss": 0.5605,
+      "step": 15
+    },
+    {
+      "epoch": 1.5686274509803921,
+      "grad_norm": 0.0189208984375,
+      "learning_rate": 0.0001,
+      "loss": 0.5525,
+      "step": 20
+    },
+    {
+      "epoch": 1.9607843137254903,
+      "grad_norm": 0.027587890625,
+      "learning_rate": 0.0001,
+      "loss": 0.5312,
+      "step": 25
+    },
+    {
+      "epoch": 2.3529411764705883,
+      "grad_norm": 0.01708984375,
+      "learning_rate": 0.0001,
+      "loss": 0.529,
+      "step": 30
+    },
+    {
+      "epoch": 2.7450980392156863,
+      "grad_norm": 0.02734375,
+      "learning_rate": 0.0001,
+      "loss": 0.5179,
+      "step": 35
+    },
+    {
+      "epoch": 3.1372549019607843,
+      "grad_norm": 0.017578125,
+      "learning_rate": 0.0001,
+      "loss": 0.5203,
+      "step": 40
+    },
+    {
+      "epoch": 3.5294117647058822,
+      "grad_norm": 0.0242919921875,
+      "learning_rate": 0.0001,
+      "loss": 0.4744,
+      "step": 45
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 45,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 180,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3.685541393109811e+17,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/training_args.bin b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bf54a22e74fc827dc1dcf396e6b5384307f103ea
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/checkpoint-45/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8bacb3be81a58908cd5b573f9c22710267a1381c4e4b39655345072d3e8da3eb
+size 7416
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/completed b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/completed
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/metrics.json b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/metrics.json
new file mode 100644
index 0000000000000000000000000000000000000000..6148c805290c9589feab097b687262dad66de076
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/metrics.json
@@ -0,0 +1 @@
+{"run_name": "codetransocean_dataflow_java", "train_runtime": 9018.8961, "train_samples_per_second": 0.639, "train_steps_per_second": 0.005, "total_flos": 3.685541393109811e+17, "train_loss": 0.5500616497463651, "epoch": 3.5294117647058822}
\ No newline at end of file
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/train_results.json b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/train_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..98dc6c2ee4518eb33b44b76dccac890ed4634be6
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/train_results.json
@@ -0,0 +1,8 @@
+{
+    "epoch": 3.5294117647058822,
+    "total_flos": 3.685541393109811e+17,
+    "train_loss": 0.5500616497463651,
+    "train_runtime": 9018.8961,
+    "train_samples_per_second": 0.639,
+    "train_steps_per_second": 0.005
+}
\ No newline at end of file
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/trainer_state.json b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..937fc0731718bd59b0da3849da93e370f625c1db
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_dataflow_java/trainer_state.json
@@ -0,0 +1,105 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.5294117647058822,
+  "eval_steps": 500,
+  "global_step": 45,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.39215686274509803,
+      "grad_norm": 0.0654296875,
+      "learning_rate": 0.0001,
+      "loss": 0.6657,
+      "step": 5
+    },
+    {
+      "epoch": 0.7843137254901961,
+      "grad_norm": 0.0291748046875,
+      "learning_rate": 0.0001,
+      "loss": 0.599,
+      "step": 10
+    },
+    {
+      "epoch": 1.1764705882352942,
+      "grad_norm": 0.0203857421875,
+      "learning_rate": 0.0001,
+      "loss": 0.5605,
+      "step": 15
+    },
+    {
+      "epoch": 1.5686274509803921,
+      "grad_norm": 0.0189208984375,
+      "learning_rate": 0.0001,
+      "loss": 0.5525,
+      "step": 20
+    },
+    {
+      "epoch": 1.9607843137254903,
+      "grad_norm": 0.027587890625,
+      "learning_rate": 0.0001,
+      "loss": 0.5312,
+      "step": 25
+    },
+    {
+      "epoch": 2.3529411764705883,
+      "grad_norm": 0.01708984375,
+      "learning_rate": 0.0001,
+      "loss": 0.529,
+      "step": 30
+    },
+    {
+      "epoch": 2.7450980392156863,
+      "grad_norm": 0.02734375,
+      "learning_rate": 0.0001,
+      "loss": 0.5179,
+      "step": 35
+    },
+    {
+      "epoch": 3.1372549019607843,
+      "grad_norm": 0.017578125,
+      "learning_rate": 0.0001,
+      "loss": 0.5203,
+      "step": 40
+    },
+    {
+      "epoch": 3.5294117647058822,
+      "grad_norm": 0.0242919921875,
+      "learning_rate": 0.0001,
+      "loss": 0.4744,
+      "step": 45
+    },
+    {
+      "epoch": 3.5294117647058822,
+      "step": 45,
+      "total_flos": 3.685541393109811e+17,
+      "train_loss": 0.5500616497463651,
+      "train_runtime": 9018.8961,
+      "train_samples_per_second": 0.639,
+      "train_steps_per_second": 0.005
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 45,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 180,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3.685541393109811e+17,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/all_results.json b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/all_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..a4011d30782f214192d8e1542f0d95831d6ae3b0
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/all_results.json
@@ -0,0 +1,8 @@
+{
+    "epoch": 3.5294117647058822,
+    "total_flos": 3.685541393109811e+17,
+    "train_loss": 0.5359265327453613,
+    "train_runtime": 11061.1318,
+    "train_samples_per_second": 0.521,
+    "train_steps_per_second": 0.004
+}
\ No newline at end of file
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/README.md b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f701e106913179e53b07103ec61ffc10178fd6c0
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/README.md
@@ -0,0 +1,202 @@
+---
+base_model: ../CodeLlama-13b-Instruct-hf/
+library_name: peft
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] 
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.13.2
\ No newline at end of file
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/adapter_config.json b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..db0272acfce83f9f8edfee6525753f7f4a7087e3
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/adapter_config.json
@@ -0,0 +1,34 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "CodeLlama-13b-Instruct-hf/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "gate_proj",
+    "up_proj",
+    "o_proj",
+    "k_proj",
+    "down_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/adapter_model.safetensors b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..02908230ce29bbb1e4b765becf34f023dc259932
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:508c59aa8df8bf0273160337995d2b025715d72cdd161fa4966516fbc14f93ae
+size 500771216
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/adapter_model/README.md b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/adapter_model/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f701e106913179e53b07103ec61ffc10178fd6c0
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/adapter_model/README.md
@@ -0,0 +1,202 @@
+---
+base_model: ../CodeLlama-13b-Instruct-hf/
+library_name: peft
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] 
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.13.2
\ No newline at end of file
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/adapter_model/adapter_config.json b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/adapter_model/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..db0272acfce83f9f8edfee6525753f7f4a7087e3
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/adapter_model/adapter_config.json
@@ -0,0 +1,34 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "CodeLlama-13b-Instruct-hf/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "gate_proj",
+    "up_proj",
+    "o_proj",
+    "k_proj",
+    "down_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/adapter_model/adapter_model.safetensors b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/adapter_model/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..02908230ce29bbb1e4b765becf34f023dc259932
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/adapter_model/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:508c59aa8df8bf0273160337995d2b025715d72cdd161fa4966516fbc14f93ae
+size 500771216
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/added_tokens.json b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/added_tokens.json
new file mode 100644
index 0000000000000000000000000000000000000000..1cbbe5179eb8b5cc46632bbbc00eb51c68847074
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/added_tokens.json
@@ -0,0 +1,3 @@
+{
+  "[PAD]": 32016
+}
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/optimizer.pt b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b5fa2e98fec50b8bad11dfa7b026bfe11f1db48d
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f6f5b295addc741c50b650324510e731c9615ba00049ce01623ff7dd87f08aaa
+size 2003126962
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/rng_state.pth b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a9570b02680b4230f9ba15ce25da1b40ddba969a
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:585f15ae1d5104d9384b07ae641e0e10926f991dea913b9243bcce14a7965a42
+size 14244
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/scheduler.pt b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..638774351b15ff484ffc0ca6c82bd744318f1cb0
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e116f8d4565c3c175d4858b7ff08054c3bd13ca8f526b793ad3d5a6f5f8f4fb
+size 1064
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/special_tokens_map.json b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..330bb0c14209dcd402b155e7d58c6c2b5210d40d
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/special_tokens_map.json
@@ -0,0 +1,36 @@
+{
+  "additional_special_tokens": [
+    "▁
",
+    "▁",
+    "▁",
+    "▁"
+  ],
+  "bos_token": {
+    "content": "",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/tokenizer.model b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..f6722e8b170230ebdd4c0f5f2ce03f219be536d4
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6
+size 500058
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/tokenizer_config.json b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8f4094d204e2be0ed7b6bfa83d20cff28326258d
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/tokenizer_config.json
@@ -0,0 +1,94 @@
+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32007": {
+      "content": "▁
",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32008": {
+      "content": "▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32009": {
+      "content": "▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32010": {
+      "content": "▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32016": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "▁
",
+    "▁",
+    "▁",
+    "▁"
+  ],
+  "bos_token": "",
+  "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content | trim + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' '  + content | trim + ' ' + eos_token }}{% endif %}{% endfor %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "",
+  "eot_token": "▁",
+  "fill_token": "",
+  "legacy": null,
+  "middle_token": "▁",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "padding_side": "right",
+  "prefix_token": "▁
",
+  "sp_model_kwargs": {},
+  "suffix_first": false,
+  "suffix_token": "▁",
+  "tokenizer_class": "CodeLlamaTokenizer",
+  "unk_token": "",
+  "use_default_system_prompt": false
+}
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/trainer_state.json b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..fc54f0c1fe745b48b778a185c4f16f16bfcbed51
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/trainer_state.json
@@ -0,0 +1,96 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.5294117647058822,
+  "eval_steps": 500,
+  "global_step": 45,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.39215686274509803,
+      "grad_norm": 0.0274658203125,
+      "learning_rate": 0.0001,
+      "loss": 0.5995,
+      "step": 5
+    },
+    {
+      "epoch": 0.7843137254901961,
+      "grad_norm": 0.022705078125,
+      "learning_rate": 0.0001,
+      "loss": 0.5904,
+      "step": 10
+    },
+    {
+      "epoch": 1.1764705882352942,
+      "grad_norm": 0.0189208984375,
+      "learning_rate": 0.0001,
+      "loss": 0.556,
+      "step": 15
+    },
+    {
+      "epoch": 1.5686274509803921,
+      "grad_norm": 0.0162353515625,
+      "learning_rate": 0.0001,
+      "loss": 0.5464,
+      "step": 20
+    },
+    {
+      "epoch": 1.9607843137254903,
+      "grad_norm": 0.0257568359375,
+      "learning_rate": 0.0001,
+      "loss": 0.5259,
+      "step": 25
+    },
+    {
+      "epoch": 2.3529411764705883,
+      "grad_norm": 0.0147705078125,
+      "learning_rate": 0.0001,
+      "loss": 0.5226,
+      "step": 30
+    },
+    {
+      "epoch": 2.7450980392156863,
+      "grad_norm": 0.029296875,
+      "learning_rate": 0.0001,
+      "loss": 0.5098,
+      "step": 35
+    },
+    {
+      "epoch": 3.1372549019607843,
+      "grad_norm": 0.016357421875,
+      "learning_rate": 0.0001,
+      "loss": 0.5106,
+      "step": 40
+    },
+    {
+      "epoch": 3.5294117647058822,
+      "grad_norm": 0.0252685546875,
+      "learning_rate": 0.0001,
+      "loss": 0.4622,
+      "step": 45
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 45,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 180,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3.685541393109811e+17,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/training_args.bin b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..07e7d4dc28ee82f8627ecda29fc4165bf6767b4d
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-45/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e8e363681d9a0a69ff5b6225fa909e1d35235b8b2e7bda997204fba4ab79a75
+size 7480
diff --git a/codellama/java/dataflow_pretrained/checkpoint-720/README.md b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/README.md
similarity index 100%
rename from codellama/java/dataflow_pretrained/checkpoint-720/README.md
rename to codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/README.md
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/adapter_config.json b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..787e952b4f16ebd29c2406338dfab6b0cd4d639d
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/adapter_config.json
@@ -0,0 +1,34 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "CodeLlama-13b-Instruct-hf/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "gate_proj",
+    "up_proj",
+    "down_proj",
+    "q_proj",
+    "o_proj",
+    "v_proj",
+    "k_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/adapter_model.safetensors b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..975e33b96bccd37d6219e956c8859dad3f7b7029
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01e77ca05e88984d4b72dc5f1c8c94d328445cf729e8b17e1afa1fad50606501
+size 1156480200
diff --git a/codellama/java/dataflow_pretrained/checkpoint-720/adapter_model/README.md b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/adapter_model/README.md
similarity index 100%
rename from codellama/java/dataflow_pretrained/checkpoint-720/adapter_model/README.md
rename to codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/adapter_model/README.md
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/adapter_model/adapter_config.json b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/adapter_model/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..787e952b4f16ebd29c2406338dfab6b0cd4d639d
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/adapter_model/adapter_config.json
@@ -0,0 +1,34 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "CodeLlama-13b-Instruct-hf/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "gate_proj",
+    "up_proj",
+    "down_proj",
+    "q_proj",
+    "o_proj",
+    "v_proj",
+    "k_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/adapter_model/adapter_model.safetensors b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/adapter_model/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..975e33b96bccd37d6219e956c8859dad3f7b7029
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/adapter_model/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01e77ca05e88984d4b72dc5f1c8c94d328445cf729e8b17e1afa1fad50606501
+size 1156480200
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/added_tokens.json b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/added_tokens.json
new file mode 100644
index 0000000000000000000000000000000000000000..1cbbe5179eb8b5cc46632bbbc00eb51c68847074
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/added_tokens.json
@@ -0,0 +1,3 @@
+{
+  "[PAD]": 32016
+}
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/optimizer.pt b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2a595fdc072b9d5f340dcf7788a93f8a950ce319
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:666a0ddc00cba0757ac08ae6f0534170c86020f442f37895bd9444e540899e0f
+size 2003127538
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/rng_state.pth b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..812e2783f6865ef8011ac461a289d2729020baf1
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf7ded6b50215b6ca731803acc0f628d8869946a601aa0eefc9bc5a3ea634352
+size 14244
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/scheduler.pt b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cf093e565147c0e3ffb02b459d87a1f151f8cb5f
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b65cb75c8ba291e997d5990244bed326162b84cd48c7d8b2dc9c2d6e13468f82
+size 1064
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/special_tokens_map.json b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..330bb0c14209dcd402b155e7d58c6c2b5210d40d
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/special_tokens_map.json
@@ -0,0 +1,36 @@
+{
+  "additional_special_tokens": [
+    "▁
",
+    "▁",
+    "▁",
+    "▁"
+  ],
+  "bos_token": {
+    "content": "",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/tokenizer.model b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..f6722e8b170230ebdd4c0f5f2ce03f219be536d4
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6
+size 500058
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/tokenizer_config.json b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8f4094d204e2be0ed7b6bfa83d20cff28326258d
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/tokenizer_config.json
@@ -0,0 +1,94 @@
+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32007": {
+      "content": "▁
",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32008": {
+      "content": "▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32009": {
+      "content": "▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32010": {
+      "content": "▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32016": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "▁
",
+    "▁",
+    "▁",
+    "▁"
+  ],
+  "bos_token": "",
+  "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content | trim + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' '  + content | trim + ' ' + eos_token }}{% endif %}{% endfor %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "",
+  "eot_token": "▁",
+  "fill_token": "",
+  "legacy": null,
+  "middle_token": "▁",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "padding_side": "right",
+  "prefix_token": "▁
",
+  "sp_model_kwargs": {},
+  "suffix_first": false,
+  "suffix_token": "▁",
+  "tokenizer_class": "CodeLlamaTokenizer",
+  "unk_token": "",
+  "use_default_system_prompt": false
+}
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/trainer_state.json b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..11eb02d4f21ad16906954fde89b9d7b4886b9836
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/trainer_state.json
@@ -0,0 +1,6753 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.072,
+  "eval_steps": 500,
+  "global_step": 4800,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0032,
+      "grad_norm": 0.0537109375,
+      "learning_rate": 0.0001,
+      "loss": 0.3678,
+      "step": 5
+    },
+    {
+      "epoch": 0.0064,
+      "grad_norm": 0.056884765625,
+      "learning_rate": 0.0001,
+      "loss": 0.2519,
+      "step": 10
+    },
+    {
+      "epoch": 0.0096,
+      "grad_norm": 0.1240234375,
+      "learning_rate": 0.0001,
+      "loss": 0.136,
+      "step": 15
+    },
+    {
+      "epoch": 0.0128,
+      "grad_norm": 0.08642578125,
+      "learning_rate": 0.0001,
+      "loss": 0.08,
+      "step": 20
+    },
+    {
+      "epoch": 0.016,
+      "grad_norm": 0.0419921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0344,
+      "step": 25
+    },
+    {
+      "epoch": 0.0192,
+      "grad_norm": 0.08544921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0782,
+      "step": 30
+    },
+    {
+      "epoch": 0.0224,
+      "grad_norm": 0.0654296875,
+      "learning_rate": 0.0001,
+      "loss": 0.144,
+      "step": 35
+    },
+    {
+      "epoch": 0.0256,
+      "grad_norm": 0.07958984375,
+      "learning_rate": 0.0001,
+      "loss": 0.1175,
+      "step": 40
+    },
+    {
+      "epoch": 0.0288,
+      "grad_norm": 0.10791015625,
+      "learning_rate": 0.0001,
+      "loss": 0.0657,
+      "step": 45
+    },
+    {
+      "epoch": 0.032,
+      "grad_norm": 0.08642578125,
+      "learning_rate": 0.0001,
+      "loss": 0.0472,
+      "step": 50
+    },
+    {
+      "epoch": 0.0352,
+      "grad_norm": 0.039306640625,
+      "learning_rate": 0.0001,
+      "loss": 0.0764,
+      "step": 55
+    },
+    {
+      "epoch": 0.0384,
+      "grad_norm": 0.045654296875,
+      "learning_rate": 0.0001,
+      "loss": 0.0299,
+      "step": 60
+    },
+    {
+      "epoch": 0.0416,
+      "grad_norm": 0.0181884765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0111,
+      "step": 65
+    },
+    {
+      "epoch": 0.0448,
+      "grad_norm": 0.03125,
+      "learning_rate": 0.0001,
+      "loss": 0.0184,
+      "step": 70
+    },
+    {
+      "epoch": 0.048,
+      "grad_norm": 0.00201416015625,
+      "learning_rate": 0.0001,
+      "loss": 0.0033,
+      "step": 75
+    },
+    {
+      "epoch": 0.0512,
+      "grad_norm": 0.023681640625,
+      "learning_rate": 0.0001,
+      "loss": 0.0134,
+      "step": 80
+    },
+    {
+      "epoch": 0.0544,
+      "grad_norm": 0.0018463134765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0023,
+      "step": 85
+    },
+    {
+      "epoch": 0.0576,
+      "grad_norm": 0.024658203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0021,
+      "step": 90
+    },
+    {
+      "epoch": 0.0608,
+      "grad_norm": 0.0185546875,
+      "learning_rate": 0.0001,
+      "loss": 0.0031,
+      "step": 95
+    },
+    {
+      "epoch": 0.064,
+      "grad_norm": 0.017822265625,
+      "learning_rate": 0.0001,
+      "loss": 0.0032,
+      "step": 100
+    },
+    {
+      "epoch": 0.0672,
+      "grad_norm": 0.041259765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0457,
+      "step": 105
+    },
+    {
+      "epoch": 0.0704,
+      "grad_norm": 0.02880859375,
+      "learning_rate": 0.0001,
+      "loss": 0.0152,
+      "step": 110
+    },
+    {
+      "epoch": 0.0736,
+      "grad_norm": 0.00982666015625,
+      "learning_rate": 0.0001,
+      "loss": 0.0095,
+      "step": 115
+    },
+    {
+      "epoch": 0.0768,
+      "grad_norm": 0.016845703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0049,
+      "step": 120
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 0.002166748046875,
+      "learning_rate": 0.0001,
+      "loss": 0.0032,
+      "step": 125
+    },
+    {
+      "epoch": 0.0832,
+      "grad_norm": 0.0277099609375,
+      "learning_rate": 0.0001,
+      "loss": 0.0061,
+      "step": 130
+    },
+    {
+      "epoch": 0.0864,
+      "grad_norm": 0.0078125,
+      "learning_rate": 0.0001,
+      "loss": 0.0011,
+      "step": 135
+    },
+    {
+      "epoch": 0.0896,
+      "grad_norm": 0.00086212158203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 140
+    },
+    {
+      "epoch": 0.0928,
+      "grad_norm": 0.0006256103515625,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 145
+    },
+    {
+      "epoch": 0.096,
+      "grad_norm": 0.00164794921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0054,
+      "step": 150
+    },
+    {
+      "epoch": 0.0992,
+      "grad_norm": 0.0299072265625,
+      "learning_rate": 0.0001,
+      "loss": 0.0264,
+      "step": 155
+    },
+    {
+      "epoch": 0.1024,
+      "grad_norm": 0.019287109375,
+      "learning_rate": 0.0001,
+      "loss": 0.0108,
+      "step": 160
+    },
+    {
+      "epoch": 0.1056,
+      "grad_norm": 0.007354736328125,
+      "learning_rate": 0.0001,
+      "loss": 0.0035,
+      "step": 165
+    },
+    {
+      "epoch": 0.1088,
+      "grad_norm": 0.0103759765625,
+      "learning_rate": 0.0001,
+      "loss": 0.004,
+      "step": 170
+    },
+    {
+      "epoch": 0.112,
+      "grad_norm": 0.0013580322265625,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 175
+    },
+    {
+      "epoch": 0.1152,
+      "grad_norm": 0.001434326171875,
+      "learning_rate": 0.0001,
+      "loss": 0.0015,
+      "step": 180
+    },
+    {
+      "epoch": 0.1184,
+      "grad_norm": 0.00102996826171875,
+      "learning_rate": 0.0001,
+      "loss": 0.0104,
+      "step": 185
+    },
+    {
+      "epoch": 0.1216,
+      "grad_norm": 0.001708984375,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 190
+    },
+    {
+      "epoch": 0.1248,
+      "grad_norm": 0.02099609375,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 195
+    },
+    {
+      "epoch": 0.128,
+      "grad_norm": 0.0014190673828125,
+      "learning_rate": 0.0001,
+      "loss": 0.001,
+      "step": 200
+    },
+    {
+      "epoch": 0.1312,
+      "grad_norm": 0.031005859375,
+      "learning_rate": 0.0001,
+      "loss": 0.0165,
+      "step": 205
+    },
+    {
+      "epoch": 0.1344,
+      "grad_norm": 0.00836181640625,
+      "learning_rate": 0.0001,
+      "loss": 0.0063,
+      "step": 210
+    },
+    {
+      "epoch": 0.1376,
+      "grad_norm": 0.0111083984375,
+      "learning_rate": 0.0001,
+      "loss": 0.0018,
+      "step": 215
+    },
+    {
+      "epoch": 0.1408,
+      "grad_norm": 0.036865234375,
+      "learning_rate": 0.0001,
+      "loss": 0.0048,
+      "step": 220
+    },
+    {
+      "epoch": 0.144,
+      "grad_norm": 0.00061798095703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 225
+    },
+    {
+      "epoch": 0.1472,
+      "grad_norm": 0.0162353515625,
+      "learning_rate": 0.0001,
+      "loss": 0.0016,
+      "step": 230
+    },
+    {
+      "epoch": 0.1504,
+      "grad_norm": 0.00077056884765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 235
+    },
+    {
+      "epoch": 0.1536,
+      "grad_norm": 0.0201416015625,
+      "learning_rate": 0.0001,
+      "loss": 0.0015,
+      "step": 240
+    },
+    {
+      "epoch": 0.1568,
+      "grad_norm": 0.00579833984375,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 245
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 0.00121307373046875,
+      "learning_rate": 0.0001,
+      "loss": 0.0022,
+      "step": 250
+    },
+    {
+      "epoch": 0.1632,
+      "grad_norm": 0.0115966796875,
+      "learning_rate": 0.0001,
+      "loss": 0.0077,
+      "step": 255
+    },
+    {
+      "epoch": 0.1664,
+      "grad_norm": 0.01806640625,
+      "learning_rate": 0.0001,
+      "loss": 0.0033,
+      "step": 260
+    },
+    {
+      "epoch": 0.1696,
+      "grad_norm": 0.000514984130859375,
+      "learning_rate": 0.0001,
+      "loss": 0.001,
+      "step": 265
+    },
+    {
+      "epoch": 0.1728,
+      "grad_norm": 0.0179443359375,
+      "learning_rate": 0.0001,
+      "loss": 0.0025,
+      "step": 270
+    },
+    {
+      "epoch": 0.176,
+      "grad_norm": 0.00579833984375,
+      "learning_rate": 0.0001,
+      "loss": 0.0014,
+      "step": 275
+    },
+    {
+      "epoch": 0.1792,
+      "grad_norm": 0.0186767578125,
+      "learning_rate": 0.0001,
+      "loss": 0.0014,
+      "step": 280
+    },
+    {
+      "epoch": 0.1824,
+      "grad_norm": 0.000598907470703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0007,
+      "step": 285
+    },
+    {
+      "epoch": 0.1856,
+      "grad_norm": 0.031982421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0021,
+      "step": 290
+    },
+    {
+      "epoch": 0.1888,
+      "grad_norm": 0.0008087158203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 295
+    },
+    {
+      "epoch": 0.192,
+      "grad_norm": 0.0029296875,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 300
+    },
+    {
+      "epoch": 0.1952,
+      "grad_norm": 0.00750732421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0038,
+      "step": 305
+    },
+    {
+      "epoch": 0.1984,
+      "grad_norm": 0.0185546875,
+      "learning_rate": 0.0001,
+      "loss": 0.0023,
+      "step": 310
+    },
+    {
+      "epoch": 0.2016,
+      "grad_norm": 0.0128173828125,
+      "learning_rate": 0.0001,
+      "loss": 0.0025,
+      "step": 315
+    },
+    {
+      "epoch": 0.2048,
+      "grad_norm": 0.0157470703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0014,
+      "step": 320
+    },
+    {
+      "epoch": 0.208,
+      "grad_norm": 0.0096435546875,
+      "learning_rate": 0.0001,
+      "loss": 0.0011,
+      "step": 325
+    },
+    {
+      "epoch": 0.2112,
+      "grad_norm": 0.01458740234375,
+      "learning_rate": 0.0001,
+      "loss": 0.0015,
+      "step": 330
+    },
+    {
+      "epoch": 0.2144,
+      "grad_norm": 0.004150390625,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 335
+    },
+    {
+      "epoch": 0.2176,
+      "grad_norm": 0.0238037109375,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 340
+    },
+    {
+      "epoch": 0.2208,
+      "grad_norm": 0.0010986328125,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 345
+    },
+    {
+      "epoch": 0.224,
+      "grad_norm": 0.001220703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0007,
+      "step": 350
+    },
+    {
+      "epoch": 0.2272,
+      "grad_norm": 0.01348876953125,
+      "learning_rate": 0.0001,
+      "loss": 0.0051,
+      "step": 355
+    },
+    {
+      "epoch": 0.2304,
+      "grad_norm": 0.01025390625,
+      "learning_rate": 0.0001,
+      "loss": 0.0024,
+      "step": 360
+    },
+    {
+      "epoch": 0.2336,
+      "grad_norm": 0.0037994384765625,
+      "learning_rate": 0.0001,
+      "loss": 0.001,
+      "step": 365
+    },
+    {
+      "epoch": 0.2368,
+      "grad_norm": 0.0240478515625,
+      "learning_rate": 0.0001,
+      "loss": 0.0023,
+      "step": 370
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 0.0001373291015625,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 375
+    },
+    {
+      "epoch": 0.2432,
+      "grad_norm": 0.006500244140625,
+      "learning_rate": 0.0001,
+      "loss": 0.0025,
+      "step": 380
+    },
+    {
+      "epoch": 0.2464,
+      "grad_norm": 0.00020503997802734375,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 385
+    },
+    {
+      "epoch": 0.2496,
+      "grad_norm": 0.00022983551025390625,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 390
+    },
+    {
+      "epoch": 0.2528,
+      "grad_norm": 0.00018787384033203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 395
+    },
+    {
+      "epoch": 0.256,
+      "grad_norm": 0.000614166259765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 400
+    },
+    {
+      "epoch": 0.2592,
+      "grad_norm": 0.016845703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0102,
+      "step": 405
+    },
+    {
+      "epoch": 0.2624,
+      "grad_norm": 0.00946044921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0018,
+      "step": 410
+    },
+    {
+      "epoch": 0.2656,
+      "grad_norm": 0.00098419189453125,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 415
+    },
+    {
+      "epoch": 0.2688,
+      "grad_norm": 0.01025390625,
+      "learning_rate": 0.0001,
+      "loss": 0.0012,
+      "step": 420
+    },
+    {
+      "epoch": 0.272,
+      "grad_norm": 0.000278472900390625,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 425
+    },
+    {
+      "epoch": 0.2752,
+      "grad_norm": 0.006866455078125,
+      "learning_rate": 0.0001,
+      "loss": 0.0013,
+      "step": 430
+    },
+    {
+      "epoch": 0.2784,
+      "grad_norm": 0.0003032684326171875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 435
+    },
+    {
+      "epoch": 0.2816,
+      "grad_norm": 0.01123046875,
+      "learning_rate": 0.0001,
+      "loss": 0.0072,
+      "step": 440
+    },
+    {
+      "epoch": 0.2848,
+      "grad_norm": 0.00022602081298828125,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 445
+    },
+    {
+      "epoch": 0.288,
+      "grad_norm": 0.000621795654296875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 450
+    },
+    {
+      "epoch": 0.2912,
+      "grad_norm": 0.0281982421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0156,
+      "step": 455
+    },
+    {
+      "epoch": 0.2944,
+      "grad_norm": 0.0038604736328125,
+      "learning_rate": 0.0001,
+      "loss": 0.0016,
+      "step": 460
+    },
+    {
+      "epoch": 0.2976,
+      "grad_norm": 0.00179290771484375,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 465
+    },
+    {
+      "epoch": 0.3008,
+      "grad_norm": 0.01519775390625,
+      "learning_rate": 0.0001,
+      "loss": 0.0008,
+      "step": 470
+    },
+    {
+      "epoch": 0.304,
+      "grad_norm": 0.0004405975341796875,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 475
+    },
+    {
+      "epoch": 0.3072,
+      "grad_norm": 0.024169921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 480
+    },
+    {
+      "epoch": 0.3104,
+      "grad_norm": 0.000926971435546875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 485
+    },
+    {
+      "epoch": 0.3136,
+      "grad_norm": 0.0003108978271484375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 490
+    },
+    {
+      "epoch": 0.3168,
+      "grad_norm": 0.00020503997802734375,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 495
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 0.0005950927734375,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 500
+    },
+    {
+      "epoch": 0.3232,
+      "grad_norm": 0.032470703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0051,
+      "step": 505
+    },
+    {
+      "epoch": 0.3264,
+      "grad_norm": 0.011962890625,
+      "learning_rate": 0.0001,
+      "loss": 0.0038,
+      "step": 510
+    },
+    {
+      "epoch": 0.3296,
+      "grad_norm": 0.014404296875,
+      "learning_rate": 0.0001,
+      "loss": 0.0018,
+      "step": 515
+    },
+    {
+      "epoch": 0.3328,
+      "grad_norm": 0.0038299560546875,
+      "learning_rate": 0.0001,
+      "loss": 0.0009,
+      "step": 520
+    },
+    {
+      "epoch": 0.336,
+      "grad_norm": 0.0002880096435546875,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 525
+    },
+    {
+      "epoch": 0.3392,
+      "grad_norm": 0.007049560546875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 530
+    },
+    {
+      "epoch": 0.3424,
+      "grad_norm": 0.01397705078125,
+      "learning_rate": 0.0001,
+      "loss": 0.0008,
+      "step": 535
+    },
+    {
+      "epoch": 0.3456,
+      "grad_norm": 0.00147247314453125,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 540
+    },
+    {
+      "epoch": 0.3488,
+      "grad_norm": 0.00238037109375,
+      "learning_rate": 0.0001,
+      "loss": 0.0064,
+      "step": 545
+    },
+    {
+      "epoch": 0.352,
+      "grad_norm": 0.001953125,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 550
+    },
+    {
+      "epoch": 0.3552,
+      "grad_norm": 0.0047607421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0067,
+      "step": 555
+    },
+    {
+      "epoch": 0.3584,
+      "grad_norm": 0.0040283203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0008,
+      "step": 560
+    },
+    {
+      "epoch": 0.3616,
+      "grad_norm": 0.0001983642578125,
+      "learning_rate": 0.0001,
+      "loss": 0.0016,
+      "step": 565
+    },
+    {
+      "epoch": 0.3648,
+      "grad_norm": 0.0072021484375,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 570
+    },
+    {
+      "epoch": 0.368,
+      "grad_norm": 6.246566772460938e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 575
+    },
+    {
+      "epoch": 0.3712,
+      "grad_norm": 0.00811767578125,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 580
+    },
+    {
+      "epoch": 0.3744,
+      "grad_norm": 0.00020694732666015625,
+      "learning_rate": 0.0001,
+      "loss": 0.0006,
+      "step": 585
+    },
+    {
+      "epoch": 0.3776,
+      "grad_norm": 0.0098876953125,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 590
+    },
+    {
+      "epoch": 0.3808,
+      "grad_norm": 0.00012874603271484375,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 595
+    },
+    {
+      "epoch": 0.384,
+      "grad_norm": 0.0003490447998046875,
+      "learning_rate": 0.0001,
+      "loss": 0.0025,
+      "step": 600
+    },
+    {
+      "epoch": 0.3872,
+      "grad_norm": 0.023681640625,
+      "learning_rate": 0.0001,
+      "loss": 0.0029,
+      "step": 605
+    },
+    {
+      "epoch": 0.3904,
+      "grad_norm": 0.010986328125,
+      "learning_rate": 0.0001,
+      "loss": 0.0032,
+      "step": 610
+    },
+    {
+      "epoch": 0.3936,
+      "grad_norm": 0.000507354736328125,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 615
+    },
+    {
+      "epoch": 0.3968,
+      "grad_norm": 0.0081787109375,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 620
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 0.00032806396484375,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 625
+    },
+    {
+      "epoch": 0.4032,
+      "grad_norm": 0.0032806396484375,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 630
+    },
+    {
+      "epoch": 0.4064,
+      "grad_norm": 0.000125885009765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 635
+    },
+    {
+      "epoch": 0.4096,
+      "grad_norm": 0.0002956390380859375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 640
+    },
+    {
+      "epoch": 0.4128,
+      "grad_norm": 0.00010919570922851562,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 645
+    },
+    {
+      "epoch": 0.416,
+      "grad_norm": 0.0001983642578125,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 650
+    },
+    {
+      "epoch": 0.4192,
+      "grad_norm": 0.0262451171875,
+      "learning_rate": 0.0001,
+      "loss": 0.0037,
+      "step": 655
+    },
+    {
+      "epoch": 0.4224,
+      "grad_norm": 0.01007080078125,
+      "learning_rate": 0.0001,
+      "loss": 0.0009,
+      "step": 660
+    },
+    {
+      "epoch": 0.4256,
+      "grad_norm": 0.000640869140625,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 665
+    },
+    {
+      "epoch": 0.4288,
+      "grad_norm": 0.009033203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 670
+    },
+    {
+      "epoch": 0.432,
+      "grad_norm": 6.580352783203125e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0022,
+      "step": 675
+    },
+    {
+      "epoch": 0.4352,
+      "grad_norm": 0.004730224609375,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 680
+    },
+    {
+      "epoch": 0.4384,
+      "grad_norm": 0.0024871826171875,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 685
+    },
+    {
+      "epoch": 0.4416,
+      "grad_norm": 0.00958251953125,
+      "learning_rate": 0.0001,
+      "loss": 0.0051,
+      "step": 690
+    },
+    {
+      "epoch": 0.4448,
+      "grad_norm": 0.00011110305786132812,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 695
+    },
+    {
+      "epoch": 0.448,
+      "grad_norm": 0.00019931793212890625,
+      "learning_rate": 0.0001,
+      "loss": 0.0009,
+      "step": 700
+    },
+    {
+      "epoch": 0.4512,
+      "grad_norm": 0.00946044921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0031,
+      "step": 705
+    },
+    {
+      "epoch": 0.4544,
+      "grad_norm": 0.00885009765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0065,
+      "step": 710
+    },
+    {
+      "epoch": 0.4576,
+      "grad_norm": 0.0101318359375,
+      "learning_rate": 0.0001,
+      "loss": 0.0009,
+      "step": 715
+    },
+    {
+      "epoch": 0.4608,
+      "grad_norm": 0.00579833984375,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 720
+    },
+    {
+      "epoch": 0.464,
+      "grad_norm": 4.649162292480469e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 725
+    },
+    {
+      "epoch": 0.4672,
+      "grad_norm": 0.01806640625,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 730
+    },
+    {
+      "epoch": 0.4704,
+      "grad_norm": 0.00010061264038085938,
+      "learning_rate": 0.0001,
+      "loss": 0.0104,
+      "step": 735
+    },
+    {
+      "epoch": 0.4736,
+      "grad_norm": 0.0101318359375,
+      "learning_rate": 0.0001,
+      "loss": 0.0008,
+      "step": 740
+    },
+    {
+      "epoch": 0.4768,
+      "grad_norm": 0.00017452239990234375,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 745
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 0.0003948211669921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 750
+    },
+    {
+      "epoch": 0.4832,
+      "grad_norm": 0.0024566650390625,
+      "learning_rate": 0.0001,
+      "loss": 0.0016,
+      "step": 755
+    },
+    {
+      "epoch": 0.4864,
+      "grad_norm": 0.00628662109375,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 760
+    },
+    {
+      "epoch": 0.4896,
+      "grad_norm": 0.000637054443359375,
+      "learning_rate": 0.0001,
+      "loss": 0.0012,
+      "step": 765
+    },
+    {
+      "epoch": 0.4928,
+      "grad_norm": 0.03466796875,
+      "learning_rate": 0.0001,
+      "loss": 0.0007,
+      "step": 770
+    },
+    {
+      "epoch": 0.496,
+      "grad_norm": 3.743171691894531e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0016,
+      "step": 775
+    },
+    {
+      "epoch": 0.4992,
+      "grad_norm": 0.0018310546875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 780
+    },
+    {
+      "epoch": 0.5024,
+      "grad_norm": 0.00066375732421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 785
+    },
+    {
+      "epoch": 0.5056,
+      "grad_norm": 0.00020503997802734375,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 790
+    },
+    {
+      "epoch": 0.5088,
+      "grad_norm": 0.0002899169921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 795
+    },
+    {
+      "epoch": 0.512,
+      "grad_norm": 0.00012159347534179688,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 800
+    },
+    {
+      "epoch": 0.5152,
+      "grad_norm": 0.00151824951171875,
+      "learning_rate": 0.0001,
+      "loss": 0.0013,
+      "step": 805
+    },
+    {
+      "epoch": 0.5184,
+      "grad_norm": 0.01953125,
+      "learning_rate": 0.0001,
+      "loss": 0.0023,
+      "step": 810
+    },
+    {
+      "epoch": 0.5216,
+      "grad_norm": 0.0002727508544921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 815
+    },
+    {
+      "epoch": 0.5248,
+      "grad_norm": 0.00087738037109375,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 820
+    },
+    {
+      "epoch": 0.528,
+      "grad_norm": 4.100799560546875e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 825
+    },
+    {
+      "epoch": 0.5312,
+      "grad_norm": 0.00110626220703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0007,
+      "step": 830
+    },
+    {
+      "epoch": 0.5344,
+      "grad_norm": 0.000926971435546875,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 835
+    },
+    {
+      "epoch": 0.5376,
+      "grad_norm": 0.01214599609375,
+      "learning_rate": 0.0001,
+      "loss": 0.0059,
+      "step": 840
+    },
+    {
+      "epoch": 0.5408,
+      "grad_norm": 0.000423431396484375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 845
+    },
+    {
+      "epoch": 0.544,
+      "grad_norm": 0.00445556640625,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 850
+    },
+    {
+      "epoch": 0.5472,
+      "grad_norm": 0.00958251953125,
+      "learning_rate": 0.0001,
+      "loss": 0.0028,
+      "step": 855
+    },
+    {
+      "epoch": 0.5504,
+      "grad_norm": 0.0155029296875,
+      "learning_rate": 0.0001,
+      "loss": 0.0017,
+      "step": 860
+    },
+    {
+      "epoch": 0.5536,
+      "grad_norm": 0.01611328125,
+      "learning_rate": 0.0001,
+      "loss": 0.0017,
+      "step": 865
+    },
+    {
+      "epoch": 0.5568,
+      "grad_norm": 0.0128173828125,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 870
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 0.000263214111328125,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 875
+    },
+    {
+      "epoch": 0.5632,
+      "grad_norm": 0.0016937255859375,
+      "learning_rate": 0.0001,
+      "loss": 0.001,
+      "step": 880
+    },
+    {
+      "epoch": 0.5664,
+      "grad_norm": 0.0004329681396484375,
+      "learning_rate": 0.0001,
+      "loss": 0.0009,
+      "step": 885
+    },
+    {
+      "epoch": 0.5696,
+      "grad_norm": 8.726119995117188e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 890
+    },
+    {
+      "epoch": 0.5728,
+      "grad_norm": 0.0001392364501953125,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 895
+    },
+    {
+      "epoch": 0.576,
+      "grad_norm": 0.0032806396484375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 900
+    },
+    {
+      "epoch": 0.5792,
+      "grad_norm": 0.016357421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0023,
+      "step": 905
+    },
+    {
+      "epoch": 0.5824,
+      "grad_norm": 0.0130615234375,
+      "learning_rate": 0.0001,
+      "loss": 0.0029,
+      "step": 910
+    },
+    {
+      "epoch": 0.5856,
+      "grad_norm": 0.00136566162109375,
+      "learning_rate": 0.0001,
+      "loss": 0.001,
+      "step": 915
+    },
+    {
+      "epoch": 0.5888,
+      "grad_norm": 0.0028076171875,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 920
+    },
+    {
+      "epoch": 0.592,
+      "grad_norm": 0.000263214111328125,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 925
+    },
+    {
+      "epoch": 0.5952,
+      "grad_norm": 0.004302978515625,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 930
+    },
+    {
+      "epoch": 0.5984,
+      "grad_norm": 0.0001239776611328125,
+      "learning_rate": 0.0001,
+      "loss": 0.0006,
+      "step": 935
+    },
+    {
+      "epoch": 0.6016,
+      "grad_norm": 8.821487426757812e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 940
+    },
+    {
+      "epoch": 0.6048,
+      "grad_norm": 9.822845458984375e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 945
+    },
+    {
+      "epoch": 0.608,
+      "grad_norm": 0.00013446807861328125,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 950
+    },
+    {
+      "epoch": 0.6112,
+      "grad_norm": 0.0181884765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0028,
+      "step": 955
+    },
+    {
+      "epoch": 0.6144,
+      "grad_norm": 0.0020599365234375,
+      "learning_rate": 0.0001,
+      "loss": 0.0009,
+      "step": 960
+    },
+    {
+      "epoch": 0.6176,
+      "grad_norm": 0.00193023681640625,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 965
+    },
+    {
+      "epoch": 0.6208,
+      "grad_norm": 0.00020885467529296875,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 970
+    },
+    {
+      "epoch": 0.624,
+      "grad_norm": 3.600120544433594e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 975
+    },
+    {
+      "epoch": 0.6272,
+      "grad_norm": 0.000385284423828125,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 980
+    },
+    {
+      "epoch": 0.6304,
+      "grad_norm": 0.01385498046875,
+      "learning_rate": 0.0001,
+      "loss": 0.0006,
+      "step": 985
+    },
+    {
+      "epoch": 0.6336,
+      "grad_norm": 0.00010728836059570312,
+      "learning_rate": 0.0001,
+      "loss": 0.0045,
+      "step": 990
+    },
+    {
+      "epoch": 0.6368,
+      "grad_norm": 7.2479248046875e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 995
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 0.00011730194091796875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 1000
+    },
+    {
+      "epoch": 0.6432,
+      "grad_norm": 0.0142822265625,
+      "learning_rate": 0.0001,
+      "loss": 0.0033,
+      "step": 1005
+    },
+    {
+      "epoch": 0.6464,
+      "grad_norm": 0.0048828125,
+      "learning_rate": 0.0001,
+      "loss": 0.0011,
+      "step": 1010
+    },
+    {
+      "epoch": 0.6496,
+      "grad_norm": 0.0040283203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 1015
+    },
+    {
+      "epoch": 0.6528,
+      "grad_norm": 0.0203857421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0015,
+      "step": 1020
+    },
+    {
+      "epoch": 0.656,
+      "grad_norm": 0.00604248046875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 1025
+    },
+    {
+      "epoch": 0.6592,
+      "grad_norm": 0.00054168701171875,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 1030
+    },
+    {
+      "epoch": 0.6624,
+      "grad_norm": 0.001129150390625,
+      "learning_rate": 0.0001,
+      "loss": 0.0096,
+      "step": 1035
+    },
+    {
+      "epoch": 0.6656,
+      "grad_norm": 0.014404296875,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 1040
+    },
+    {
+      "epoch": 0.6688,
+      "grad_norm": 0.002716064453125,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 1045
+    },
+    {
+      "epoch": 0.672,
+      "grad_norm": 0.0240478515625,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 1050
+    },
+    {
+      "epoch": 0.6752,
+      "grad_norm": 0.005126953125,
+      "learning_rate": 0.0001,
+      "loss": 0.0017,
+      "step": 1055
+    },
+    {
+      "epoch": 0.6784,
+      "grad_norm": 0.00110626220703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0006,
+      "step": 1060
+    },
+    {
+      "epoch": 0.6816,
+      "grad_norm": 7.867813110351562e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 1065
+    },
+    {
+      "epoch": 0.6848,
+      "grad_norm": 0.01239013671875,
+      "learning_rate": 0.0001,
+      "loss": 0.0011,
+      "step": 1070
+    },
+    {
+      "epoch": 0.688,
+      "grad_norm": 0.0002841949462890625,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 1075
+    },
+    {
+      "epoch": 0.6912,
+      "grad_norm": 0.0002117156982421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0007,
+      "step": 1080
+    },
+    {
+      "epoch": 0.6944,
+      "grad_norm": 0.00010156631469726562,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1085
+    },
+    {
+      "epoch": 0.6976,
+      "grad_norm": 0.0006256103515625,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 1090
+    },
+    {
+      "epoch": 0.7008,
+      "grad_norm": 9.870529174804688e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0009,
+      "step": 1095
+    },
+    {
+      "epoch": 0.704,
+      "grad_norm": 0.000858306884765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 1100
+    },
+    {
+      "epoch": 0.7072,
+      "grad_norm": 0.0023040771484375,
+      "learning_rate": 0.0001,
+      "loss": 0.0012,
+      "step": 1105
+    },
+    {
+      "epoch": 0.7104,
+      "grad_norm": 0.013916015625,
+      "learning_rate": 0.0001,
+      "loss": 0.0009,
+      "step": 1110
+    },
+    {
+      "epoch": 0.7136,
+      "grad_norm": 0.000110626220703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 1115
+    },
+    {
+      "epoch": 0.7168,
+      "grad_norm": 0.0002536773681640625,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 1120
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 0.0001163482666015625,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1125
+    },
+    {
+      "epoch": 0.7232,
+      "grad_norm": 0.00014495849609375,
+      "learning_rate": 0.0001,
+      "loss": 0.0091,
+      "step": 1130
+    },
+    {
+      "epoch": 0.7264,
+      "grad_norm": 0.0230712890625,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 1135
+    },
+    {
+      "epoch": 0.7296,
+      "grad_norm": 9.5367431640625e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1140
+    },
+    {
+      "epoch": 0.7328,
+      "grad_norm": 8.249282836914062e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1145
+    },
+    {
+      "epoch": 0.736,
+      "grad_norm": 0.001007080078125,
+      "learning_rate": 0.0001,
+      "loss": 0.0011,
+      "step": 1150
+    },
+    {
+      "epoch": 0.7392,
+      "grad_norm": 0.01226806640625,
+      "learning_rate": 0.0001,
+      "loss": 0.0019,
+      "step": 1155
+    },
+    {
+      "epoch": 0.7424,
+      "grad_norm": 0.005828857421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0008,
+      "step": 1160
+    },
+    {
+      "epoch": 0.7456,
+      "grad_norm": 5.6743621826171875e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.003,
+      "step": 1165
+    },
+    {
+      "epoch": 0.7488,
+      "grad_norm": 0.0002899169921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 1170
+    },
+    {
+      "epoch": 0.752,
+      "grad_norm": 0.0003147125244140625,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1175
+    },
+    {
+      "epoch": 0.7552,
+      "grad_norm": 0.015625,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 1180
+    },
+    {
+      "epoch": 0.7584,
+      "grad_norm": 0.000274658203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1185
+    },
+    {
+      "epoch": 0.7616,
+      "grad_norm": 0.00093841552734375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1190
+    },
+    {
+      "epoch": 0.7648,
+      "grad_norm": 0.000335693359375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1195
+    },
+    {
+      "epoch": 0.768,
+      "grad_norm": 0.0003681182861328125,
+      "learning_rate": 0.0001,
+      "loss": 0.0007,
+      "step": 1200
+    },
+    {
+      "epoch": 0.7712,
+      "grad_norm": 0.0115966796875,
+      "learning_rate": 0.0001,
+      "loss": 0.0011,
+      "step": 1205
+    },
+    {
+      "epoch": 0.7744,
+      "grad_norm": 0.00811767578125,
+      "learning_rate": 0.0001,
+      "loss": 0.0008,
+      "step": 1210
+    },
+    {
+      "epoch": 0.7776,
+      "grad_norm": 0.00017547607421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0016,
+      "step": 1215
+    },
+    {
+      "epoch": 0.7808,
+      "grad_norm": 0.000782012939453125,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 1220
+    },
+    {
+      "epoch": 0.784,
+      "grad_norm": 6.198883056640625e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 1225
+    },
+    {
+      "epoch": 0.7872,
+      "grad_norm": 0.06591796875,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 1230
+    },
+    {
+      "epoch": 0.7904,
+      "grad_norm": 0.00180816650390625,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 1235
+    },
+    {
+      "epoch": 0.7936,
+      "grad_norm": 0.015380859375,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 1240
+    },
+    {
+      "epoch": 0.7968,
+      "grad_norm": 0.001373291015625,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 1245
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 0.003997802734375,
+      "learning_rate": 0.0001,
+      "loss": 0.0006,
+      "step": 1250
+    },
+    {
+      "epoch": 0.8032,
+      "grad_norm": 0.01226806640625,
+      "learning_rate": 0.0001,
+      "loss": 0.0087,
+      "step": 1255
+    },
+    {
+      "epoch": 0.8064,
+      "grad_norm": 0.00616455078125,
+      "learning_rate": 0.0001,
+      "loss": 0.0017,
+      "step": 1260
+    },
+    {
+      "epoch": 0.8096,
+      "grad_norm": 0.00077056884765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0006,
+      "step": 1265
+    },
+    {
+      "epoch": 0.8128,
+      "grad_norm": 0.00128936767578125,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 1270
+    },
+    {
+      "epoch": 0.816,
+      "grad_norm": 9.679794311523438e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 1275
+    },
+    {
+      "epoch": 0.8192,
+      "grad_norm": 0.0008087158203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0015,
+      "step": 1280
+    },
+    {
+      "epoch": 0.8224,
+      "grad_norm": 0.00019359588623046875,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1285
+    },
+    {
+      "epoch": 0.8256,
+      "grad_norm": 0.000812530517578125,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1290
+    },
+    {
+      "epoch": 0.8288,
+      "grad_norm": 0.0006256103515625,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1295
+    },
+    {
+      "epoch": 0.832,
+      "grad_norm": 0.00067901611328125,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 1300
+    },
+    {
+      "epoch": 0.8352,
+      "grad_norm": 0.017822265625,
+      "learning_rate": 0.0001,
+      "loss": 0.0045,
+      "step": 1305
+    },
+    {
+      "epoch": 0.8384,
+      "grad_norm": 0.000347137451171875,
+      "learning_rate": 0.0001,
+      "loss": 0.0007,
+      "step": 1310
+    },
+    {
+      "epoch": 0.8416,
+      "grad_norm": 0.00016117095947265625,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 1315
+    },
+    {
+      "epoch": 0.8448,
+      "grad_norm": 0.0023956298828125,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 1320
+    },
+    {
+      "epoch": 0.848,
+      "grad_norm": 4.6253204345703125e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 1325
+    },
+    {
+      "epoch": 0.8512,
+      "grad_norm": 0.00543212890625,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 1330
+    },
+    {
+      "epoch": 0.8544,
+      "grad_norm": 7.009506225585938e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1335
+    },
+    {
+      "epoch": 0.8576,
+      "grad_norm": 0.00101470947265625,
+      "learning_rate": 0.0001,
+      "loss": 0.0048,
+      "step": 1340
+    },
+    {
+      "epoch": 0.8608,
+      "grad_norm": 0.00011491775512695312,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1345
+    },
+    {
+      "epoch": 0.864,
+      "grad_norm": 0.0004730224609375,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 1350
+    },
+    {
+      "epoch": 0.8672,
+      "grad_norm": 0.00885009765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0045,
+      "step": 1355
+    },
+    {
+      "epoch": 0.8704,
+      "grad_norm": 0.02685546875,
+      "learning_rate": 0.0001,
+      "loss": 0.0035,
+      "step": 1360
+    },
+    {
+      "epoch": 0.8736,
+      "grad_norm": 0.0002651214599609375,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 1365
+    },
+    {
+      "epoch": 0.8768,
+      "grad_norm": 0.002777099609375,
+      "learning_rate": 0.0001,
+      "loss": 0.0008,
+      "step": 1370
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 7.390975952148438e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 1375
+    },
+    {
+      "epoch": 0.8832,
+      "grad_norm": 0.00023937225341796875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 1380
+    },
+    {
+      "epoch": 0.8864,
+      "grad_norm": 0.01202392578125,
+      "learning_rate": 0.0001,
+      "loss": 0.0009,
+      "step": 1385
+    },
+    {
+      "epoch": 0.8896,
+      "grad_norm": 0.00543212890625,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 1390
+    },
+    {
+      "epoch": 0.8928,
+      "grad_norm": 8.487701416015625e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0047,
+      "step": 1395
+    },
+    {
+      "epoch": 0.896,
+      "grad_norm": 0.007110595703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 1400
+    },
+    {
+      "epoch": 0.8992,
+      "grad_norm": 0.026611328125,
+      "learning_rate": 0.0001,
+      "loss": 0.0038,
+      "step": 1405
+    },
+    {
+      "epoch": 0.9024,
+      "grad_norm": 0.02099609375,
+      "learning_rate": 0.0001,
+      "loss": 0.0007,
+      "step": 1410
+    },
+    {
+      "epoch": 0.9056,
+      "grad_norm": 9.918212890625e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0006,
+      "step": 1415
+    },
+    {
+      "epoch": 0.9088,
+      "grad_norm": 0.0013580322265625,
+      "learning_rate": 0.0001,
+      "loss": 0.0008,
+      "step": 1420
+    },
+    {
+      "epoch": 0.912,
+      "grad_norm": 0.000720977783203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 1425
+    },
+    {
+      "epoch": 0.9152,
+      "grad_norm": 0.0030670166015625,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 1430
+    },
+    {
+      "epoch": 0.9184,
+      "grad_norm": 0.00010013580322265625,
+      "learning_rate": 0.0001,
+      "loss": 0.0088,
+      "step": 1435
+    },
+    {
+      "epoch": 0.9216,
+      "grad_norm": 0.00153350830078125,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1440
+    },
+    {
+      "epoch": 0.9248,
+      "grad_norm": 6.079673767089844e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1445
+    },
+    {
+      "epoch": 0.928,
+      "grad_norm": 0.0001621246337890625,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1450
+    },
+    {
+      "epoch": 0.9312,
+      "grad_norm": 0.0111083984375,
+      "learning_rate": 0.0001,
+      "loss": 0.0028,
+      "step": 1455
+    },
+    {
+      "epoch": 0.9344,
+      "grad_norm": 0.020751953125,
+      "learning_rate": 0.0001,
+      "loss": 0.0009,
+      "step": 1460
+    },
+    {
+      "epoch": 0.9376,
+      "grad_norm": 0.00250244140625,
+      "learning_rate": 0.0001,
+      "loss": 0.0006,
+      "step": 1465
+    },
+    {
+      "epoch": 0.9408,
+      "grad_norm": 0.00299072265625,
+      "learning_rate": 0.0001,
+      "loss": 0.0027,
+      "step": 1470
+    },
+    {
+      "epoch": 0.944,
+      "grad_norm": 0.000110626220703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1475
+    },
+    {
+      "epoch": 0.9472,
+      "grad_norm": 0.0003757476806640625,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 1480
+    },
+    {
+      "epoch": 0.9504,
+      "grad_norm": 7.05718994140625e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 1485
+    },
+    {
+      "epoch": 0.9536,
+      "grad_norm": 0.0004425048828125,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 1490
+    },
+    {
+      "epoch": 0.9568,
+      "grad_norm": 0.0001220703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1495
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 0.00020503997802734375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1500
+    },
+    {
+      "epoch": 0.9632,
+      "grad_norm": 0.014892578125,
+      "learning_rate": 0.0001,
+      "loss": 0.0014,
+      "step": 1505
+    },
+    {
+      "epoch": 0.9664,
+      "grad_norm": 0.01470947265625,
+      "learning_rate": 0.0001,
+      "loss": 0.0012,
+      "step": 1510
+    },
+    {
+      "epoch": 0.9696,
+      "grad_norm": 0.0004863739013671875,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 1515
+    },
+    {
+      "epoch": 0.9728,
+      "grad_norm": 0.0019683837890625,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 1520
+    },
+    {
+      "epoch": 0.976,
+      "grad_norm": 4.6253204345703125e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1525
+    },
+    {
+      "epoch": 0.9792,
+      "grad_norm": 0.00469970703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 1530
+    },
+    {
+      "epoch": 0.9824,
+      "grad_norm": 0.00494384765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 1535
+    },
+    {
+      "epoch": 0.9856,
+      "grad_norm": 0.00011920928955078125,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1540
+    },
+    {
+      "epoch": 0.9888,
+      "grad_norm": 4.029273986816406e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0023,
+      "step": 1545
+    },
+    {
+      "epoch": 0.992,
+      "grad_norm": 0.00013828277587890625,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1550
+    },
+    {
+      "epoch": 0.9952,
+      "grad_norm": 7.581710815429688e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0007,
+      "step": 1555
+    },
+    {
+      "epoch": 0.9984,
+      "grad_norm": 0.000148773193359375,
+      "learning_rate": 0.0001,
+      "loss": 0.0023,
+      "step": 1560
+    },
+    {
+      "epoch": 1.0016,
+      "grad_norm": 0.051025390625,
+      "learning_rate": 0.0001,
+      "loss": 0.006,
+      "step": 1565
+    },
+    {
+      "epoch": 1.0048,
+      "grad_norm": 0.0218505859375,
+      "learning_rate": 0.0001,
+      "loss": 0.0044,
+      "step": 1570
+    },
+    {
+      "epoch": 1.008,
+      "grad_norm": 0.01556396484375,
+      "learning_rate": 0.0001,
+      "loss": 0.004,
+      "step": 1575
+    },
+    {
+      "epoch": 1.0112,
+      "grad_norm": 0.01068115234375,
+      "learning_rate": 0.0001,
+      "loss": 0.0008,
+      "step": 1580
+    },
+    {
+      "epoch": 1.0144,
+      "grad_norm": 0.0007781982421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0006,
+      "step": 1585
+    },
+    {
+      "epoch": 1.0176,
+      "grad_norm": 0.008056640625,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 1590
+    },
+    {
+      "epoch": 1.0208,
+      "grad_norm": 0.000514984130859375,
+      "learning_rate": 0.0001,
+      "loss": 0.0007,
+      "step": 1595
+    },
+    {
+      "epoch": 1.024,
+      "grad_norm": 0.03369140625,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 1600
+    },
+    {
+      "epoch": 1.0272,
+      "grad_norm": 0.001129150390625,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 1605
+    },
+    {
+      "epoch": 1.0304,
+      "grad_norm": 0.11474609375,
+      "learning_rate": 0.0001,
+      "loss": 0.0019,
+      "step": 1610
+    },
+    {
+      "epoch": 1.0336,
+      "grad_norm": 0.0260009765625,
+      "learning_rate": 0.0001,
+      "loss": 0.003,
+      "step": 1615
+    },
+    {
+      "epoch": 1.0368,
+      "grad_norm": 0.01446533203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0021,
+      "step": 1620
+    },
+    {
+      "epoch": 1.04,
+      "grad_norm": 0.013427734375,
+      "learning_rate": 0.0001,
+      "loss": 0.0016,
+      "step": 1625
+    },
+    {
+      "epoch": 1.0432,
+      "grad_norm": 0.00640869140625,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 1630
+    },
+    {
+      "epoch": 1.0464,
+      "grad_norm": 0.0018310546875,
+      "learning_rate": 0.0001,
+      "loss": 0.0021,
+      "step": 1635
+    },
+    {
+      "epoch": 1.0496,
+      "grad_norm": 0.0004405975341796875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 1640
+    },
+    {
+      "epoch": 1.0528,
+      "grad_norm": 0.0034332275390625,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 1645
+    },
+    {
+      "epoch": 1.056,
+      "grad_norm": 0.000762939453125,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 1650
+    },
+    {
+      "epoch": 1.0592,
+      "grad_norm": 0.0002536773681640625,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1655
+    },
+    {
+      "epoch": 1.0624,
+      "grad_norm": 0.0125732421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0032,
+      "step": 1660
+    },
+    {
+      "epoch": 1.0656,
+      "grad_norm": 0.01055908203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0013,
+      "step": 1665
+    },
+    {
+      "epoch": 1.0688,
+      "grad_norm": 0.0230712890625,
+      "learning_rate": 0.0001,
+      "loss": 0.0017,
+      "step": 1670
+    },
+    {
+      "epoch": 1.072,
+      "grad_norm": 0.00064849853515625,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 1675
+    },
+    {
+      "epoch": 1.0752,
+      "grad_norm": 0.0277099609375,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 1680
+    },
+    {
+      "epoch": 1.0784,
+      "grad_norm": 0.000354766845703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 1685
+    },
+    {
+      "epoch": 1.0816,
+      "grad_norm": 0.00518798828125,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 1690
+    },
+    {
+      "epoch": 1.0848,
+      "grad_norm": 0.0026092529296875,
+      "learning_rate": 0.0001,
+      "loss": 0.0009,
+      "step": 1695
+    },
+    {
+      "epoch": 1.088,
+      "grad_norm": 0.00125885009765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 1700
+    },
+    {
+      "epoch": 1.0912,
+      "grad_norm": 0.00156402587890625,
+      "learning_rate": 0.0001,
+      "loss": 0.0006,
+      "step": 1705
+    },
+    {
+      "epoch": 1.0944,
+      "grad_norm": 0.03369140625,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 1710
+    },
+    {
+      "epoch": 1.0976,
+      "grad_norm": 0.0028533935546875,
+      "learning_rate": 0.0001,
+      "loss": 0.0013,
+      "step": 1715
+    },
+    {
+      "epoch": 1.1008,
+      "grad_norm": 0.008544921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 1720
+    },
+    {
+      "epoch": 1.104,
+      "grad_norm": 0.001251220703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 1725
+    },
+    {
+      "epoch": 1.1072,
+      "grad_norm": 0.0032196044921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 1730
+    },
+    {
+      "epoch": 1.1104,
+      "grad_norm": 5.245208740234375e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 1735
+    },
+    {
+      "epoch": 1.1136,
+      "grad_norm": 0.000293731689453125,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 1740
+    },
+    {
+      "epoch": 1.1168,
+      "grad_norm": 0.000751495361328125,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 1745
+    },
+    {
+      "epoch": 1.12,
+      "grad_norm": 0.000194549560546875,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1750
+    },
+    {
+      "epoch": 1.1232,
+      "grad_norm": 5.793571472167969e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1755
+    },
+    {
+      "epoch": 1.1264,
+      "grad_norm": 9.489059448242188e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1760
+    },
+    {
+      "epoch": 1.1296,
+      "grad_norm": 0.0262451171875,
+      "learning_rate": 0.0001,
+      "loss": 0.0013,
+      "step": 1765
+    },
+    {
+      "epoch": 1.1328,
+      "grad_norm": 0.00244140625,
+      "learning_rate": 0.0001,
+      "loss": 0.0042,
+      "step": 1770
+    },
+    {
+      "epoch": 1.1360000000000001,
+      "grad_norm": 0.0003643035888671875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 1775
+    },
+    {
+      "epoch": 1.1392,
+      "grad_norm": 0.00762939453125,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 1780
+    },
+    {
+      "epoch": 1.1424,
+      "grad_norm": 1.5616416931152344e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 1785
+    },
+    {
+      "epoch": 1.1456,
+      "grad_norm": 4.482269287109375e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 1790
+    },
+    {
+      "epoch": 1.1488,
+      "grad_norm": 0.00013256072998046875,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 1795
+    },
+    {
+      "epoch": 1.152,
+      "grad_norm": 3.600120544433594e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1800
+    },
+    {
+      "epoch": 1.1552,
+      "grad_norm": 3.981590270996094e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1805
+    },
+    {
+      "epoch": 1.1584,
+      "grad_norm": 0.0009765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1810
+    },
+    {
+      "epoch": 1.1616,
+      "grad_norm": 0.0034637451171875,
+      "learning_rate": 0.0001,
+      "loss": 0.0016,
+      "step": 1815
+    },
+    {
+      "epoch": 1.1648,
+      "grad_norm": 0.00775146484375,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 1820
+    },
+    {
+      "epoch": 1.168,
+      "grad_norm": 0.00029754638671875,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 1825
+    },
+    {
+      "epoch": 1.1712,
+      "grad_norm": 3.2901763916015625e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0024,
+      "step": 1830
+    },
+    {
+      "epoch": 1.1743999999999999,
+      "grad_norm": 0.0003795623779296875,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 1835
+    },
+    {
+      "epoch": 1.1776,
+      "grad_norm": 2.765655517578125e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1840
+    },
+    {
+      "epoch": 1.1808,
+      "grad_norm": 0.1875,
+      "learning_rate": 0.0001,
+      "loss": 0.0021,
+      "step": 1845
+    },
+    {
+      "epoch": 1.184,
+      "grad_norm": 0.0238037109375,
+      "learning_rate": 0.0001,
+      "loss": 0.0039,
+      "step": 1850
+    },
+    {
+      "epoch": 1.1872,
+      "grad_norm": 0.0027008056640625,
+      "learning_rate": 0.0001,
+      "loss": 0.005,
+      "step": 1855
+    },
+    {
+      "epoch": 1.1904,
+      "grad_norm": 0.0017852783203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0011,
+      "step": 1860
+    },
+    {
+      "epoch": 1.1936,
+      "grad_norm": 0.041259765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0096,
+      "step": 1865
+    },
+    {
+      "epoch": 1.1968,
+      "grad_norm": 0.035888671875,
+      "learning_rate": 0.0001,
+      "loss": 0.0081,
+      "step": 1870
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 0.0086669921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0012,
+      "step": 1875
+    },
+    {
+      "epoch": 1.2032,
+      "grad_norm": 0.000690460205078125,
+      "learning_rate": 0.0001,
+      "loss": 0.0009,
+      "step": 1880
+    },
+    {
+      "epoch": 1.2064,
+      "grad_norm": 0.03076171875,
+      "learning_rate": 0.0001,
+      "loss": 0.0027,
+      "step": 1885
+    },
+    {
+      "epoch": 1.2096,
+      "grad_norm": 0.00012969970703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1890
+    },
+    {
+      "epoch": 1.2128,
+      "grad_norm": 0.003631591796875,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 1895
+    },
+    {
+      "epoch": 1.216,
+      "grad_norm": 0.0004482269287109375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1900
+    },
+    {
+      "epoch": 1.2192,
+      "grad_norm": 9.584426879882812e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 1905
+    },
+    {
+      "epoch": 1.2224,
+      "grad_norm": 0.00075531005859375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1910
+    },
+    {
+      "epoch": 1.2256,
+      "grad_norm": 0.00628662109375,
+      "learning_rate": 0.0001,
+      "loss": 0.0007,
+      "step": 1915
+    },
+    {
+      "epoch": 1.2288000000000001,
+      "grad_norm": 0.002655029296875,
+      "learning_rate": 0.0001,
+      "loss": 0.0015,
+      "step": 1920
+    },
+    {
+      "epoch": 1.232,
+      "grad_norm": 0.027587890625,
+      "learning_rate": 0.0001,
+      "loss": 0.0012,
+      "step": 1925
+    },
+    {
+      "epoch": 1.2352,
+      "grad_norm": 0.0196533203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 1930
+    },
+    {
+      "epoch": 1.2384,
+      "grad_norm": 0.00016689300537109375,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 1935
+    },
+    {
+      "epoch": 1.2416,
+      "grad_norm": 0.00014495849609375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1940
+    },
+    {
+      "epoch": 1.2448,
+      "grad_norm": 0.0002803802490234375,
+      "learning_rate": 0.0001,
+      "loss": 0.0089,
+      "step": 1945
+    },
+    {
+      "epoch": 1.248,
+      "grad_norm": 0.0087890625,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 1950
+    },
+    {
+      "epoch": 1.2511999999999999,
+      "grad_norm": 8.106231689453125e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0035,
+      "step": 1955
+    },
+    {
+      "epoch": 1.2544,
+      "grad_norm": 0.0004405975341796875,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 1960
+    },
+    {
+      "epoch": 1.2576,
+      "grad_norm": 0.015625,
+      "learning_rate": 0.0001,
+      "loss": 0.0015,
+      "step": 1965
+    },
+    {
+      "epoch": 1.2608,
+      "grad_norm": 0.00543212890625,
+      "learning_rate": 0.0001,
+      "loss": 0.0007,
+      "step": 1970
+    },
+    {
+      "epoch": 1.264,
+      "grad_norm": 0.00104522705078125,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 1975
+    },
+    {
+      "epoch": 1.2671999999999999,
+      "grad_norm": 0.015625,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 1980
+    },
+    {
+      "epoch": 1.2704,
+      "grad_norm": 0.00013256072998046875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 1985
+    },
+    {
+      "epoch": 1.2736,
+      "grad_norm": 0.0091552734375,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 1990
+    },
+    {
+      "epoch": 1.2768,
+      "grad_norm": 0.000385284423828125,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 1995
+    },
+    {
+      "epoch": 1.28,
+      "grad_norm": 5.435943603515625e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2000
+    },
+    {
+      "epoch": 1.2832,
+      "grad_norm": 8.392333984375e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2005
+    },
+    {
+      "epoch": 1.2864,
+      "grad_norm": 0.00024127960205078125,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2010
+    },
+    {
+      "epoch": 1.2896,
+      "grad_norm": 0.00421142578125,
+      "learning_rate": 0.0001,
+      "loss": 0.0013,
+      "step": 2015
+    },
+    {
+      "epoch": 1.2928,
+      "grad_norm": 0.00872802734375,
+      "learning_rate": 0.0001,
+      "loss": 0.0008,
+      "step": 2020
+    },
+    {
+      "epoch": 1.296,
+      "grad_norm": 0.000392913818359375,
+      "learning_rate": 0.0001,
+      "loss": 0.0008,
+      "step": 2025
+    },
+    {
+      "epoch": 1.2992,
+      "grad_norm": 0.021240234375,
+      "learning_rate": 0.0001,
+      "loss": 0.0011,
+      "step": 2030
+    },
+    {
+      "epoch": 1.3024,
+      "grad_norm": 0.00051116943359375,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 2035
+    },
+    {
+      "epoch": 1.3056,
+      "grad_norm": 0.0001697540283203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2040
+    },
+    {
+      "epoch": 1.3088,
+      "grad_norm": 0.01336669921875,
+      "learning_rate": 0.0001,
+      "loss": 0.008,
+      "step": 2045
+    },
+    {
+      "epoch": 1.312,
+      "grad_norm": 5.435943603515625e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2050
+    },
+    {
+      "epoch": 1.3152,
+      "grad_norm": 0.000164031982421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2055
+    },
+    {
+      "epoch": 1.3184,
+      "grad_norm": 0.0004558563232421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2060
+    },
+    {
+      "epoch": 1.3216,
+      "grad_norm": 0.020751953125,
+      "learning_rate": 0.0001,
+      "loss": 0.0012,
+      "step": 2065
+    },
+    {
+      "epoch": 1.3248,
+      "grad_norm": 0.000415802001953125,
+      "learning_rate": 0.0001,
+      "loss": 0.0007,
+      "step": 2070
+    },
+    {
+      "epoch": 1.328,
+      "grad_norm": 0.00104522705078125,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 2075
+    },
+    {
+      "epoch": 1.3312,
+      "grad_norm": 0.0003910064697265625,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 2080
+    },
+    {
+      "epoch": 1.3344,
+      "grad_norm": 0.00020313262939453125,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 2085
+    },
+    {
+      "epoch": 1.3376000000000001,
+      "grad_norm": 5.030632019042969e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2090
+    },
+    {
+      "epoch": 1.3408,
+      "grad_norm": 0.00090789794921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2095
+    },
+    {
+      "epoch": 1.3439999999999999,
+      "grad_norm": 0.00037384033203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 2100
+    },
+    {
+      "epoch": 1.3472,
+      "grad_norm": 0.00014400482177734375,
+      "learning_rate": 0.0001,
+      "loss": 0.003,
+      "step": 2105
+    },
+    {
+      "epoch": 1.3504,
+      "grad_norm": 0.00188446044921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2110
+    },
+    {
+      "epoch": 1.3536000000000001,
+      "grad_norm": 0.0023956298828125,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 2115
+    },
+    {
+      "epoch": 1.3568,
+      "grad_norm": 0.015869140625,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 2120
+    },
+    {
+      "epoch": 1.3599999999999999,
+      "grad_norm": 0.0103759765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0009,
+      "step": 2125
+    },
+    {
+      "epoch": 1.3632,
+      "grad_norm": 0.000926971435546875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 2130
+    },
+    {
+      "epoch": 1.3664,
+      "grad_norm": 3.0159950256347656e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 2135
+    },
+    {
+      "epoch": 1.3696,
+      "grad_norm": 0.00174713134765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 2140
+    },
+    {
+      "epoch": 1.3728,
+      "grad_norm": 3.1948089599609375e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2145
+    },
+    {
+      "epoch": 1.376,
+      "grad_norm": 0.00030517578125,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2150
+    },
+    {
+      "epoch": 1.3792,
+      "grad_norm": 3.0279159545898438e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2155
+    },
+    {
+      "epoch": 1.3824,
+      "grad_norm": 5.030632019042969e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 2160
+    },
+    {
+      "epoch": 1.3856,
+      "grad_norm": 0.0203857421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0007,
+      "step": 2165
+    },
+    {
+      "epoch": 1.3888,
+      "grad_norm": 0.00439453125,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 2170
+    },
+    {
+      "epoch": 1.392,
+      "grad_norm": 0.0004425048828125,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 2175
+    },
+    {
+      "epoch": 1.3952,
+      "grad_norm": 0.005523681640625,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 2180
+    },
+    {
+      "epoch": 1.3984,
+      "grad_norm": 0.0007781982421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 2185
+    },
+    {
+      "epoch": 1.4016,
+      "grad_norm": 2.8252601623535156e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2190
+    },
+    {
+      "epoch": 1.4048,
+      "grad_norm": 0.000560760498046875,
+      "learning_rate": 0.0001,
+      "loss": 0.0043,
+      "step": 2195
+    },
+    {
+      "epoch": 1.408,
+      "grad_norm": 0.0002574920654296875,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2200
+    },
+    {
+      "epoch": 1.4112,
+      "grad_norm": 5.507469177246094e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2205
+    },
+    {
+      "epoch": 1.4144,
+      "grad_norm": 0.00087738037109375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2210
+    },
+    {
+      "epoch": 1.4176,
+      "grad_norm": 0.0057373046875,
+      "learning_rate": 0.0001,
+      "loss": 0.0009,
+      "step": 2215
+    },
+    {
+      "epoch": 1.4208,
+      "grad_norm": 0.0093994140625,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 2220
+    },
+    {
+      "epoch": 1.424,
+      "grad_norm": 0.000213623046875,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 2225
+    },
+    {
+      "epoch": 1.4272,
+      "grad_norm": 0.00421142578125,
+      "learning_rate": 0.0001,
+      "loss": 0.0006,
+      "step": 2230
+    },
+    {
+      "epoch": 1.4304000000000001,
+      "grad_norm": 0.0002956390380859375,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 2235
+    },
+    {
+      "epoch": 1.4336,
+      "grad_norm": 0.00482177734375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2240
+    },
+    {
+      "epoch": 1.4368,
+      "grad_norm": 7.05718994140625e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 2245
+    },
+    {
+      "epoch": 1.44,
+      "grad_norm": 0.016845703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 2250
+    },
+    {
+      "epoch": 1.4432,
+      "grad_norm": 4.1484832763671875e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2255
+    },
+    {
+      "epoch": 1.4464000000000001,
+      "grad_norm": 0.000701904296875,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2260
+    },
+    {
+      "epoch": 1.4496,
+      "grad_norm": 0.0123291015625,
+      "learning_rate": 0.0001,
+      "loss": 0.001,
+      "step": 2265
+    },
+    {
+      "epoch": 1.4527999999999999,
+      "grad_norm": 0.007110595703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 2270
+    },
+    {
+      "epoch": 1.456,
+      "grad_norm": 0.00049591064453125,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 2275
+    },
+    {
+      "epoch": 1.4592,
+      "grad_norm": 0.00604248046875,
+      "learning_rate": 0.0001,
+      "loss": 0.0008,
+      "step": 2280
+    },
+    {
+      "epoch": 1.4624,
+      "grad_norm": 9.965896606445312e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2285
+    },
+    {
+      "epoch": 1.4656,
+      "grad_norm": 0.00040435791015625,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2290
+    },
+    {
+      "epoch": 1.4687999999999999,
+      "grad_norm": 0.0001773834228515625,
+      "learning_rate": 0.0001,
+      "loss": 0.0077,
+      "step": 2295
+    },
+    {
+      "epoch": 1.472,
+      "grad_norm": 2.6226043701171875e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2300
+    },
+    {
+      "epoch": 1.4752,
+      "grad_norm": 3.039836883544922e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2305
+    },
+    {
+      "epoch": 1.4784,
+      "grad_norm": 0.00025177001953125,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2310
+    },
+    {
+      "epoch": 1.4816,
+      "grad_norm": 0.006805419921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 2315
+    },
+    {
+      "epoch": 1.4848,
+      "grad_norm": 0.002532958984375,
+      "learning_rate": 0.0001,
+      "loss": 0.0006,
+      "step": 2320
+    },
+    {
+      "epoch": 1.488,
+      "grad_norm": 0.0169677734375,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 2325
+    },
+    {
+      "epoch": 1.4912,
+      "grad_norm": 8.726119995117188e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 2330
+    },
+    {
+      "epoch": 1.4944,
+      "grad_norm": 1.7762184143066406e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2335
+    },
+    {
+      "epoch": 1.4976,
+      "grad_norm": 8.630752563476562e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2340
+    },
+    {
+      "epoch": 1.5008,
+      "grad_norm": 0.00075531005859375,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 2345
+    },
+    {
+      "epoch": 1.504,
+      "grad_norm": 1.7523765563964844e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2350
+    },
+    {
+      "epoch": 1.5072,
+      "grad_norm": 1.990795135498047e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2355
+    },
+    {
+      "epoch": 1.5104,
+      "grad_norm": 9.870529174804688e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2360
+    },
+    {
+      "epoch": 1.5135999999999998,
+      "grad_norm": 0.00115966796875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 2365
+    },
+    {
+      "epoch": 1.5168,
+      "grad_norm": 0.001068115234375,
+      "learning_rate": 0.0001,
+      "loss": 0.0008,
+      "step": 2370
+    },
+    {
+      "epoch": 1.52,
+      "grad_norm": 0.0001659393310546875,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2375
+    },
+    {
+      "epoch": 1.5232,
+      "grad_norm": 0.00011730194091796875,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2380
+    },
+    {
+      "epoch": 1.5264,
+      "grad_norm": 1.9550323486328125e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 2385
+    },
+    {
+      "epoch": 1.5295999999999998,
+      "grad_norm": 9.107589721679688e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2390
+    },
+    {
+      "epoch": 1.5328,
+      "grad_norm": 7.82012939453125e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 2395
+    },
+    {
+      "epoch": 1.536,
+      "grad_norm": 1.6927719116210938e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2400
+    },
+    {
+      "epoch": 1.5392000000000001,
+      "grad_norm": 1.6689300537109375e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2405
+    },
+    {
+      "epoch": 1.5424,
+      "grad_norm": 7.05718994140625e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 2410
+    },
+    {
+      "epoch": 1.5455999999999999,
+      "grad_norm": 0.00075531005859375,
+      "learning_rate": 0.0001,
+      "loss": 0.001,
+      "step": 2415
+    },
+    {
+      "epoch": 1.5488,
+      "grad_norm": 0.01153564453125,
+      "learning_rate": 0.0001,
+      "loss": 0.0011,
+      "step": 2420
+    },
+    {
+      "epoch": 1.552,
+      "grad_norm": 0.000339508056640625,
+      "learning_rate": 0.0001,
+      "loss": 0.0008,
+      "step": 2425
+    },
+    {
+      "epoch": 1.5552000000000001,
+      "grad_norm": 0.0022735595703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 2430
+    },
+    {
+      "epoch": 1.5584,
+      "grad_norm": 0.000431060791015625,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 2435
+    },
+    {
+      "epoch": 1.5615999999999999,
+      "grad_norm": 0.001129150390625,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2440
+    },
+    {
+      "epoch": 1.5648,
+      "grad_norm": 0.00066375732421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0008,
+      "step": 2445
+    },
+    {
+      "epoch": 1.568,
+      "grad_norm": 0.00010156631469726562,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2450
+    },
+    {
+      "epoch": 1.5712000000000002,
+      "grad_norm": 0.00031280517578125,
+      "learning_rate": 0.0001,
+      "loss": 0.0022,
+      "step": 2455
+    },
+    {
+      "epoch": 1.5744,
+      "grad_norm": 0.0007476806640625,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2460
+    },
+    {
+      "epoch": 1.5776,
+      "grad_norm": 0.00994873046875,
+      "learning_rate": 0.0001,
+      "loss": 0.0008,
+      "step": 2465
+    },
+    {
+      "epoch": 1.5808,
+      "grad_norm": 0.01287841796875,
+      "learning_rate": 0.0001,
+      "loss": 0.0008,
+      "step": 2470
+    },
+    {
+      "epoch": 1.584,
+      "grad_norm": 0.0098876953125,
+      "learning_rate": 0.0001,
+      "loss": 0.0008,
+      "step": 2475
+    },
+    {
+      "epoch": 1.5872000000000002,
+      "grad_norm": 0.000102996826171875,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 2480
+    },
+    {
+      "epoch": 1.5904,
+      "grad_norm": 0.00010824203491210938,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 2485
+    },
+    {
+      "epoch": 1.5936,
+      "grad_norm": 0.00016307830810546875,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2490
+    },
+    {
+      "epoch": 1.5968,
+      "grad_norm": 6.580352783203125e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2495
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 0.01251220703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 2500
+    },
+    {
+      "epoch": 1.6032,
+      "grad_norm": 0.00018596649169921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2505
+    },
+    {
+      "epoch": 1.6064,
+      "grad_norm": 7.62939453125e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2510
+    },
+    {
+      "epoch": 1.6096,
+      "grad_norm": 0.0015869140625,
+      "learning_rate": 0.0001,
+      "loss": 0.0009,
+      "step": 2515
+    },
+    {
+      "epoch": 1.6128,
+      "grad_norm": 0.00020122528076171875,
+      "learning_rate": 0.0001,
+      "loss": 0.0028,
+      "step": 2520
+    },
+    {
+      "epoch": 1.616,
+      "grad_norm": 0.00058746337890625,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 2525
+    },
+    {
+      "epoch": 1.6192,
+      "grad_norm": 0.00017070770263671875,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2530
+    },
+    {
+      "epoch": 1.6223999999999998,
+      "grad_norm": 5.340576171875e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 2535
+    },
+    {
+      "epoch": 1.6256,
+      "grad_norm": 6.818771362304688e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2540
+    },
+    {
+      "epoch": 1.6288,
+      "grad_norm": 0.00014495849609375,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 2545
+    },
+    {
+      "epoch": 1.6320000000000001,
+      "grad_norm": 4.673004150390625e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2550
+    },
+    {
+      "epoch": 1.6352,
+      "grad_norm": 6.580352783203125e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2555
+    },
+    {
+      "epoch": 1.6383999999999999,
+      "grad_norm": 0.00011396408081054688,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2560
+    },
+    {
+      "epoch": 1.6416,
+      "grad_norm": 0.0036773681640625,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 2565
+    },
+    {
+      "epoch": 1.6448,
+      "grad_norm": 0.004425048828125,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 2570
+    },
+    {
+      "epoch": 1.6480000000000001,
+      "grad_norm": 8.678436279296875e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 2575
+    },
+    {
+      "epoch": 1.6512,
+      "grad_norm": 0.00040435791015625,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2580
+    },
+    {
+      "epoch": 1.6543999999999999,
+      "grad_norm": 5.435943603515625e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2585
+    },
+    {
+      "epoch": 1.6576,
+      "grad_norm": 0.0019378662109375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2590
+    },
+    {
+      "epoch": 1.6608,
+      "grad_norm": 2.5153160095214844e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2595
+    },
+    {
+      "epoch": 1.6640000000000001,
+      "grad_norm": 2.47955322265625e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 2600
+    },
+    {
+      "epoch": 1.6672,
+      "grad_norm": 6.437301635742188e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0055,
+      "step": 2605
+    },
+    {
+      "epoch": 1.6703999999999999,
+      "grad_norm": 4.410743713378906e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2610
+    },
+    {
+      "epoch": 1.6736,
+      "grad_norm": 0.01953125,
+      "learning_rate": 0.0001,
+      "loss": 0.0009,
+      "step": 2615
+    },
+    {
+      "epoch": 1.6768,
+      "grad_norm": 0.000286102294921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 2620
+    },
+    {
+      "epoch": 1.6800000000000002,
+      "grad_norm": 4.9591064453125e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 2625
+    },
+    {
+      "epoch": 1.6832,
+      "grad_norm": 0.009765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 2630
+    },
+    {
+      "epoch": 1.6864,
+      "grad_norm": 0.0001964569091796875,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2635
+    },
+    {
+      "epoch": 1.6896,
+      "grad_norm": 0.001983642578125,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2640
+    },
+    {
+      "epoch": 1.6928,
+      "grad_norm": 0.0001049041748046875,
+      "learning_rate": 0.0001,
+      "loss": 0.002,
+      "step": 2645
+    },
+    {
+      "epoch": 1.696,
+      "grad_norm": 4.839897155761719e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2650
+    },
+    {
+      "epoch": 1.6992,
+      "grad_norm": 6.4849853515625e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2655
+    },
+    {
+      "epoch": 1.7024,
+      "grad_norm": 0.00010347366333007812,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2660
+    },
+    {
+      "epoch": 1.7056,
+      "grad_norm": 0.00347900390625,
+      "learning_rate": 0.0001,
+      "loss": 0.0015,
+      "step": 2665
+    },
+    {
+      "epoch": 1.7088,
+      "grad_norm": 0.00022220611572265625,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 2670
+    },
+    {
+      "epoch": 1.712,
+      "grad_norm": 0.000514984130859375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2675
+    },
+    {
+      "epoch": 1.7151999999999998,
+      "grad_norm": 0.000308990478515625,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2680
+    },
+    {
+      "epoch": 1.7184,
+      "grad_norm": 4.00543212890625e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 2685
+    },
+    {
+      "epoch": 1.7216,
+      "grad_norm": 0.007568359375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2690
+    },
+    {
+      "epoch": 1.7248,
+      "grad_norm": 0.00115966796875,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 2695
+    },
+    {
+      "epoch": 1.728,
+      "grad_norm": 0.0076904296875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 2700
+    },
+    {
+      "epoch": 1.7311999999999999,
+      "grad_norm": 0.00022602081298828125,
+      "learning_rate": 0.0001,
+      "loss": 0.0026,
+      "step": 2705
+    },
+    {
+      "epoch": 1.7344,
+      "grad_norm": 9.632110595703125e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2710
+    },
+    {
+      "epoch": 1.7376,
+      "grad_norm": 0.0118408203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 2715
+    },
+    {
+      "epoch": 1.7408000000000001,
+      "grad_norm": 0.00146484375,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 2720
+    },
+    {
+      "epoch": 1.744,
+      "grad_norm": 0.0023345947265625,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 2725
+    },
+    {
+      "epoch": 1.7471999999999999,
+      "grad_norm": 9.822845458984375e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2730
+    },
+    {
+      "epoch": 1.7504,
+      "grad_norm": 2.4318695068359375e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 2735
+    },
+    {
+      "epoch": 1.7536,
+      "grad_norm": 9.393692016601562e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2740
+    },
+    {
+      "epoch": 1.7568000000000001,
+      "grad_norm": 1.823902130126953e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2745
+    },
+    {
+      "epoch": 1.76,
+      "grad_norm": 0.0001773834228515625,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2750
+    },
+    {
+      "epoch": 1.7631999999999999,
+      "grad_norm": 1.6927719116210938e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0011,
+      "step": 2755
+    },
+    {
+      "epoch": 1.7664,
+      "grad_norm": 0.0002307891845703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2760
+    },
+    {
+      "epoch": 1.7696,
+      "grad_norm": 0.0021820068359375,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 2765
+    },
+    {
+      "epoch": 1.7728000000000002,
+      "grad_norm": 0.01446533203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 2770
+    },
+    {
+      "epoch": 1.776,
+      "grad_norm": 0.0062255859375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2775
+    },
+    {
+      "epoch": 1.7792,
+      "grad_norm": 7.2479248046875e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2780
+    },
+    {
+      "epoch": 1.7824,
+      "grad_norm": 6.031990051269531e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 2785
+    },
+    {
+      "epoch": 1.7856,
+      "grad_norm": 3.314018249511719e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2790
+    },
+    {
+      "epoch": 1.7888,
+      "grad_norm": 0.00011873245239257812,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2795
+    },
+    {
+      "epoch": 1.792,
+      "grad_norm": 0.00274658203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2800
+    },
+    {
+      "epoch": 1.7952,
+      "grad_norm": 1.7762184143066406e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0026,
+      "step": 2805
+    },
+    {
+      "epoch": 1.7984,
+      "grad_norm": 0.00110626220703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2810
+    },
+    {
+      "epoch": 1.8016,
+      "grad_norm": 0.000553131103515625,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 2815
+    },
+    {
+      "epoch": 1.8048,
+      "grad_norm": 0.00390625,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 2820
+    },
+    {
+      "epoch": 1.808,
+      "grad_norm": 8.440017700195312e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 2825
+    },
+    {
+      "epoch": 1.8112,
+      "grad_norm": 0.01214599609375,
+      "learning_rate": 0.0001,
+      "loss": 0.002,
+      "step": 2830
+    },
+    {
+      "epoch": 1.8144,
+      "grad_norm": 1.3053417205810547e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2835
+    },
+    {
+      "epoch": 1.8176,
+      "grad_norm": 0.000514984130859375,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 2840
+    },
+    {
+      "epoch": 1.8208,
+      "grad_norm": 1.990795135498047e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2845
+    },
+    {
+      "epoch": 1.8239999999999998,
+      "grad_norm": 2.968311309814453e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2850
+    },
+    {
+      "epoch": 1.8272,
+      "grad_norm": 2.2292137145996094e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2855
+    },
+    {
+      "epoch": 1.8304,
+      "grad_norm": 5.078315734863281e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2860
+    },
+    {
+      "epoch": 1.8336000000000001,
+      "grad_norm": 0.000812530517578125,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 2865
+    },
+    {
+      "epoch": 1.8368,
+      "grad_norm": 0.00077056884765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 2870
+    },
+    {
+      "epoch": 1.8399999999999999,
+      "grad_norm": 0.007781982421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 2875
+    },
+    {
+      "epoch": 1.8432,
+      "grad_norm": 0.01385498046875,
+      "learning_rate": 0.0001,
+      "loss": 0.0007,
+      "step": 2880
+    },
+    {
+      "epoch": 1.8464,
+      "grad_norm": 0.01507568359375,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 2885
+    },
+    {
+      "epoch": 1.8496000000000001,
+      "grad_norm": 8.96453857421875e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2890
+    },
+    {
+      "epoch": 1.8528,
+      "grad_norm": 0.003997802734375,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 2895
+    },
+    {
+      "epoch": 1.8559999999999999,
+      "grad_norm": 0.000301361083984375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2900
+    },
+    {
+      "epoch": 1.8592,
+      "grad_norm": 2.4199485778808594e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2905
+    },
+    {
+      "epoch": 1.8624,
+      "grad_norm": 0.000530242919921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2910
+    },
+    {
+      "epoch": 1.8656000000000001,
+      "grad_norm": 0.0048828125,
+      "learning_rate": 0.0001,
+      "loss": 0.0017,
+      "step": 2915
+    },
+    {
+      "epoch": 1.8688,
+      "grad_norm": 0.00115203857421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 2920
+    },
+    {
+      "epoch": 1.8719999999999999,
+      "grad_norm": 0.001678466796875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 2925
+    },
+    {
+      "epoch": 1.8752,
+      "grad_norm": 8.296966552734375e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 2930
+    },
+    {
+      "epoch": 1.8784,
+      "grad_norm": 7.724761962890625e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 2935
+    },
+    {
+      "epoch": 1.8816000000000002,
+      "grad_norm": 0.00075531005859375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2940
+    },
+    {
+      "epoch": 1.8848,
+      "grad_norm": 0.0172119140625,
+      "learning_rate": 0.0001,
+      "loss": 0.0082,
+      "step": 2945
+    },
+    {
+      "epoch": 1.888,
+      "grad_norm": 9.34600830078125e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2950
+    },
+    {
+      "epoch": 1.8912,
+      "grad_norm": 6.341934204101562e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 2955
+    },
+    {
+      "epoch": 1.8944,
+      "grad_norm": 0.000598907470703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2960
+    },
+    {
+      "epoch": 1.8976,
+      "grad_norm": 0.000759124755859375,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 2965
+    },
+    {
+      "epoch": 1.9008,
+      "grad_norm": 0.00115203857421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0016,
+      "step": 2970
+    },
+    {
+      "epoch": 1.904,
+      "grad_norm": 0.00017452239990234375,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 2975
+    },
+    {
+      "epoch": 1.9072,
+      "grad_norm": 0.0005645751953125,
+      "learning_rate": 0.0001,
+      "loss": 0.0011,
+      "step": 2980
+    },
+    {
+      "epoch": 1.9104,
+      "grad_norm": 0.0007781982421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 2985
+    },
+    {
+      "epoch": 1.9136,
+      "grad_norm": 0.00010824203491210938,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 2990
+    },
+    {
+      "epoch": 1.9167999999999998,
+      "grad_norm": 0.019287109375,
+      "learning_rate": 0.0001,
+      "loss": 0.0083,
+      "step": 2995
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 5.245208740234375e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3000
+    },
+    {
+      "epoch": 1.9232,
+      "grad_norm": 4.315376281738281e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 3005
+    },
+    {
+      "epoch": 1.9264000000000001,
+      "grad_norm": 0.00014400482177734375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3010
+    },
+    {
+      "epoch": 1.9296,
+      "grad_norm": 0.01171875,
+      "learning_rate": 0.0001,
+      "loss": 0.0016,
+      "step": 3015
+    },
+    {
+      "epoch": 1.9327999999999999,
+      "grad_norm": 0.000598907470703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 3020
+    },
+    {
+      "epoch": 1.936,
+      "grad_norm": 3.504753112792969e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3025
+    },
+    {
+      "epoch": 1.9392,
+      "grad_norm": 6.628036499023438e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 3030
+    },
+    {
+      "epoch": 1.9424000000000001,
+      "grad_norm": 5.8650970458984375e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3035
+    },
+    {
+      "epoch": 1.9456,
+      "grad_norm": 6.866455078125e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 3040
+    },
+    {
+      "epoch": 1.9487999999999999,
+      "grad_norm": 0.00048828125,
+      "learning_rate": 0.0001,
+      "loss": 0.0041,
+      "step": 3045
+    },
+    {
+      "epoch": 1.952,
+      "grad_norm": 0.0172119140625,
+      "learning_rate": 0.0001,
+      "loss": 0.0007,
+      "step": 3050
+    },
+    {
+      "epoch": 1.9552,
+      "grad_norm": 0.000919342041015625,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 3055
+    },
+    {
+      "epoch": 1.9584000000000001,
+      "grad_norm": 0.0103759765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 3060
+    },
+    {
+      "epoch": 1.9616,
+      "grad_norm": 0.0177001953125,
+      "learning_rate": 0.0001,
+      "loss": 0.0022,
+      "step": 3065
+    },
+    {
+      "epoch": 1.9647999999999999,
+      "grad_norm": 0.00982666015625,
+      "learning_rate": 0.0001,
+      "loss": 0.0014,
+      "step": 3070
+    },
+    {
+      "epoch": 1.968,
+      "grad_norm": 0.00089263916015625,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 3075
+    },
+    {
+      "epoch": 1.9712,
+      "grad_norm": 0.0076904296875,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 3080
+    },
+    {
+      "epoch": 1.9744000000000002,
+      "grad_norm": 0.00604248046875,
+      "learning_rate": 0.0001,
+      "loss": 0.0006,
+      "step": 3085
+    },
+    {
+      "epoch": 1.9776,
+      "grad_norm": 0.008544921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 3090
+    },
+    {
+      "epoch": 1.9808,
+      "grad_norm": 0.0205078125,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 3095
+    },
+    {
+      "epoch": 1.984,
+      "grad_norm": 4.935264587402344e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3100
+    },
+    {
+      "epoch": 1.9872,
+      "grad_norm": 0.00018596649169921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0035,
+      "step": 3105
+    },
+    {
+      "epoch": 1.9904,
+      "grad_norm": 0.000698089599609375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3110
+    },
+    {
+      "epoch": 1.9936,
+      "grad_norm": 0.00113677978515625,
+      "learning_rate": 0.0001,
+      "loss": 0.0006,
+      "step": 3115
+    },
+    {
+      "epoch": 1.9968,
+      "grad_norm": 7.200241088867188e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0008,
+      "step": 3120
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 9.5367431640625e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3125
+    },
+    {
+      "epoch": 2.0032,
+      "grad_norm": 0.007568359375,
+      "learning_rate": 0.0001,
+      "loss": 0.0032,
+      "step": 3130
+    },
+    {
+      "epoch": 2.0064,
+      "grad_norm": 0.0019073486328125,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 3135
+    },
+    {
+      "epoch": 2.0096,
+      "grad_norm": 0.0081787109375,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 3140
+    },
+    {
+      "epoch": 2.0128,
+      "grad_norm": 0.00014019012451171875,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3145
+    },
+    {
+      "epoch": 2.016,
+      "grad_norm": 1.8358230590820312e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3150
+    },
+    {
+      "epoch": 2.0192,
+      "grad_norm": 0.005828857421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 3155
+    },
+    {
+      "epoch": 2.0224,
+      "grad_norm": 4.3392181396484375e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3160
+    },
+    {
+      "epoch": 2.0256,
+      "grad_norm": 0.0002460479736328125,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3165
+    },
+    {
+      "epoch": 2.0288,
+      "grad_norm": 0.000545501708984375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3170
+    },
+    {
+      "epoch": 2.032,
+      "grad_norm": 2.849102020263672e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3175
+    },
+    {
+      "epoch": 2.0352,
+      "grad_norm": 0.007781982421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0011,
+      "step": 3180
+    },
+    {
+      "epoch": 2.0384,
+      "grad_norm": 0.0003719329833984375,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 3185
+    },
+    {
+      "epoch": 2.0416,
+      "grad_norm": 1.1682510375976562e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3190
+    },
+    {
+      "epoch": 2.0448,
+      "grad_norm": 0.000732421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 3195
+    },
+    {
+      "epoch": 2.048,
+      "grad_norm": 1.633167266845703e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3200
+    },
+    {
+      "epoch": 2.0512,
+      "grad_norm": 0.000431060791015625,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3205
+    },
+    {
+      "epoch": 2.0544,
+      "grad_norm": 3.123283386230469e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3210
+    },
+    {
+      "epoch": 2.0576,
+      "grad_norm": 2.491474151611328e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3215
+    },
+    {
+      "epoch": 2.0608,
+      "grad_norm": 1.8358230590820312e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3220
+    },
+    {
+      "epoch": 2.064,
+      "grad_norm": 2.7179718017578125e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3225
+    },
+    {
+      "epoch": 2.0672,
+      "grad_norm": 0.0028839111328125,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 3230
+    },
+    {
+      "epoch": 2.0704,
+      "grad_norm": 0.00506591796875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 3235
+    },
+    {
+      "epoch": 2.0736,
+      "grad_norm": 0.0004749298095703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 3240
+    },
+    {
+      "epoch": 2.0768,
+      "grad_norm": 0.001251220703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 3245
+    },
+    {
+      "epoch": 2.08,
+      "grad_norm": 1.633167266845703e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0006,
+      "step": 3250
+    },
+    {
+      "epoch": 2.0832,
+      "grad_norm": 5.817413330078125e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3255
+    },
+    {
+      "epoch": 2.0864,
+      "grad_norm": 5.817413330078125e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3260
+    },
+    {
+      "epoch": 2.0896,
+      "grad_norm": 0.007598876953125,
+      "learning_rate": 0.0001,
+      "loss": 0.0012,
+      "step": 3265
+    },
+    {
+      "epoch": 2.0928,
+      "grad_norm": 7.200241088867188e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 3270
+    },
+    {
+      "epoch": 2.096,
+      "grad_norm": 6.866455078125e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3275
+    },
+    {
+      "epoch": 2.0992,
+      "grad_norm": 0.000553131103515625,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 3280
+    },
+    {
+      "epoch": 2.1024,
+      "grad_norm": 0.015380859375,
+      "learning_rate": 0.0001,
+      "loss": 0.0009,
+      "step": 3285
+    },
+    {
+      "epoch": 2.1056,
+      "grad_norm": 8.58306884765625e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 3290
+    },
+    {
+      "epoch": 2.1088,
+      "grad_norm": 0.01165771484375,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 3295
+    },
+    {
+      "epoch": 2.112,
+      "grad_norm": 1.2755393981933594e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3300
+    },
+    {
+      "epoch": 2.1152,
+      "grad_norm": 0.000213623046875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 3305
+    },
+    {
+      "epoch": 2.1184,
+      "grad_norm": 2.0503997802734375e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0048,
+      "step": 3310
+    },
+    {
+      "epoch": 2.1216,
+      "grad_norm": 0.00011348724365234375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3315
+    },
+    {
+      "epoch": 2.1248,
+      "grad_norm": 0.000301361083984375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3320
+    },
+    {
+      "epoch": 2.128,
+      "grad_norm": 0.00041961669921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3325
+    },
+    {
+      "epoch": 2.1312,
+      "grad_norm": 0.0007781982421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 3330
+    },
+    {
+      "epoch": 2.1344,
+      "grad_norm": 0.00171661376953125,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 3335
+    },
+    {
+      "epoch": 2.1376,
+      "grad_norm": 8.7738037109375e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 3340
+    },
+    {
+      "epoch": 2.1408,
+      "grad_norm": 0.000270843505859375,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 3345
+    },
+    {
+      "epoch": 2.144,
+      "grad_norm": 7.867813110351562e-06,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3350
+    },
+    {
+      "epoch": 2.1471999999999998,
+      "grad_norm": 0.0002269744873046875,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3355
+    },
+    {
+      "epoch": 2.1504,
+      "grad_norm": 1.4781951904296875e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3360
+    },
+    {
+      "epoch": 2.1536,
+      "grad_norm": 0.0087890625,
+      "learning_rate": 0.0001,
+      "loss": 0.0013,
+      "step": 3365
+    },
+    {
+      "epoch": 2.1568,
+      "grad_norm": 1.4066696166992188e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3370
+    },
+    {
+      "epoch": 2.16,
+      "grad_norm": 3.4332275390625e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3375
+    },
+    {
+      "epoch": 2.1632,
+      "grad_norm": 0.0024566650390625,
+      "learning_rate": 0.0001,
+      "loss": 0.0007,
+      "step": 3380
+    },
+    {
+      "epoch": 2.1664,
+      "grad_norm": 0.01202392578125,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 3385
+    },
+    {
+      "epoch": 2.1696,
+      "grad_norm": 8.058547973632812e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3390
+    },
+    {
+      "epoch": 2.1728,
+      "grad_norm": 0.00136566162109375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3395
+    },
+    {
+      "epoch": 2.176,
+      "grad_norm": 5.53131103515625e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3400
+    },
+    {
+      "epoch": 2.1792,
+      "grad_norm": 0.00021266937255859375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3405
+    },
+    {
+      "epoch": 2.1824,
+      "grad_norm": 2.1576881408691406e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3410
+    },
+    {
+      "epoch": 2.1856,
+      "grad_norm": 2.288818359375e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3415
+    },
+    {
+      "epoch": 2.1888,
+      "grad_norm": 2.0623207092285156e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3420
+    },
+    {
+      "epoch": 2.192,
+      "grad_norm": 0.0003108978271484375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3425
+    },
+    {
+      "epoch": 2.1952,
+      "grad_norm": 0.001983642578125,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 3430
+    },
+    {
+      "epoch": 2.1984,
+      "grad_norm": 0.00531005859375,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 3435
+    },
+    {
+      "epoch": 2.2016,
+      "grad_norm": 0.0015716552734375,
+      "learning_rate": 0.0001,
+      "loss": 0.003,
+      "step": 3440
+    },
+    {
+      "epoch": 2.2048,
+      "grad_norm": 0.0003223419189453125,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3445
+    },
+    {
+      "epoch": 2.208,
+      "grad_norm": 0.0003376007080078125,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3450
+    },
+    {
+      "epoch": 2.2112,
+      "grad_norm": 3.0040740966796875e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0035,
+      "step": 3455
+    },
+    {
+      "epoch": 2.2144,
+      "grad_norm": 0.000858306884765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3460
+    },
+    {
+      "epoch": 2.2176,
+      "grad_norm": 0.00054168701171875,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3465
+    },
+    {
+      "epoch": 2.2208,
+      "grad_norm": 1.9311904907226562e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3470
+    },
+    {
+      "epoch": 2.224,
+      "grad_norm": 0.0002803802490234375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3475
+    },
+    {
+      "epoch": 2.2272,
+      "grad_norm": 0.0125732421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 3480
+    },
+    {
+      "epoch": 2.2304,
+      "grad_norm": 4.76837158203125e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3485
+    },
+    {
+      "epoch": 2.2336,
+      "grad_norm": 5.1975250244140625e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3490
+    },
+    {
+      "epoch": 2.2368,
+      "grad_norm": 7.915496826171875e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3495
+    },
+    {
+      "epoch": 2.24,
+      "grad_norm": 7.212162017822266e-06,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3500
+    },
+    {
+      "epoch": 2.2432,
+      "grad_norm": 7.05718994140625e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3505
+    },
+    {
+      "epoch": 2.2464,
+      "grad_norm": 1.4960765838623047e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3510
+    },
+    {
+      "epoch": 2.2496,
+      "grad_norm": 0.017578125,
+      "learning_rate": 0.0001,
+      "loss": 0.0027,
+      "step": 3515
+    },
+    {
+      "epoch": 2.2528,
+      "grad_norm": 1.1146068572998047e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3520
+    },
+    {
+      "epoch": 2.2560000000000002,
+      "grad_norm": 0.0012664794921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3525
+    },
+    {
+      "epoch": 2.2592,
+      "grad_norm": 0.00010824203491210938,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 3530
+    },
+    {
+      "epoch": 2.2624,
+      "grad_norm": 0.003509521484375,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 3535
+    },
+    {
+      "epoch": 2.2656,
+      "grad_norm": 7.510185241699219e-06,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3540
+    },
+    {
+      "epoch": 2.2688,
+      "grad_norm": 0.00157928466796875,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3545
+    },
+    {
+      "epoch": 2.2720000000000002,
+      "grad_norm": 4.500150680541992e-06,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3550
+    },
+    {
+      "epoch": 2.2752,
+      "grad_norm": 2.086162567138672e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 3555
+    },
+    {
+      "epoch": 2.2784,
+      "grad_norm": 0.00011110305786132812,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3560
+    },
+    {
+      "epoch": 2.2816,
+      "grad_norm": 1.4722347259521484e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0007,
+      "step": 3565
+    },
+    {
+      "epoch": 2.2848,
+      "grad_norm": 1.0788440704345703e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3570
+    },
+    {
+      "epoch": 2.288,
+      "grad_norm": 5.245208740234375e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3575
+    },
+    {
+      "epoch": 2.2912,
+      "grad_norm": 0.0019989013671875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 3580
+    },
+    {
+      "epoch": 2.2944,
+      "grad_norm": 0.0069580078125,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 3585
+    },
+    {
+      "epoch": 2.2976,
+      "grad_norm": 6.22868537902832e-06,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3590
+    },
+    {
+      "epoch": 2.3008,
+      "grad_norm": 6.580352783203125e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3595
+    },
+    {
+      "epoch": 2.304,
+      "grad_norm": 5.185604095458984e-06,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3600
+    },
+    {
+      "epoch": 2.3072,
+      "grad_norm": 2.288818359375e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 3605
+    },
+    {
+      "epoch": 2.3104,
+      "grad_norm": 6.29425048828125e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0067,
+      "step": 3610
+    },
+    {
+      "epoch": 2.3136,
+      "grad_norm": 1.9669532775878906e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3615
+    },
+    {
+      "epoch": 2.3168,
+      "grad_norm": 1.2755393981933594e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3620
+    },
+    {
+      "epoch": 2.32,
+      "grad_norm": 2.9206275939941406e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3625
+    },
+    {
+      "epoch": 2.3232,
+      "grad_norm": 0.0078125,
+      "learning_rate": 0.0001,
+      "loss": 0.0006,
+      "step": 3630
+    },
+    {
+      "epoch": 2.3264,
+      "grad_norm": 0.0005950927734375,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 3635
+    },
+    {
+      "epoch": 2.3296,
+      "grad_norm": 0.0009613037109375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3640
+    },
+    {
+      "epoch": 2.3327999999999998,
+      "grad_norm": 3.409385681152344e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3645
+    },
+    {
+      "epoch": 2.336,
+      "grad_norm": 0.000598907470703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3650
+    },
+    {
+      "epoch": 2.3392,
+      "grad_norm": 0.002532958984375,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 3655
+    },
+    {
+      "epoch": 2.3424,
+      "grad_norm": 1.3053417205810547e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0065,
+      "step": 3660
+    },
+    {
+      "epoch": 2.3456,
+      "grad_norm": 3.4809112548828125e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3665
+    },
+    {
+      "epoch": 2.3487999999999998,
+      "grad_norm": 9.655952453613281e-06,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3670
+    },
+    {
+      "epoch": 2.352,
+      "grad_norm": 2.4199485778808594e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3675
+    },
+    {
+      "epoch": 2.3552,
+      "grad_norm": 0.00072479248046875,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3680
+    },
+    {
+      "epoch": 2.3584,
+      "grad_norm": 0.000553131103515625,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 3685
+    },
+    {
+      "epoch": 2.3616,
+      "grad_norm": 5.7220458984375e-06,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3690
+    },
+    {
+      "epoch": 2.3648,
+      "grad_norm": 4.2438507080078125e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 3695
+    },
+    {
+      "epoch": 2.368,
+      "grad_norm": 0.0003528594970703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3700
+    },
+    {
+      "epoch": 2.3712,
+      "grad_norm": 3.552436828613281e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3705
+    },
+    {
+      "epoch": 2.3744,
+      "grad_norm": 0.0120849609375,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 3710
+    },
+    {
+      "epoch": 2.3776,
+      "grad_norm": 2.2172927856445312e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.004,
+      "step": 3715
+    },
+    {
+      "epoch": 2.3808,
+      "grad_norm": 1.9669532775878906e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3720
+    },
+    {
+      "epoch": 2.384,
+      "grad_norm": 2.110004425048828e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3725
+    },
+    {
+      "epoch": 2.3872,
+      "grad_norm": 0.006866455078125,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 3730
+    },
+    {
+      "epoch": 2.3904,
+      "grad_norm": 0.00118255615234375,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 3735
+    },
+    {
+      "epoch": 2.3936,
+      "grad_norm": 1.2099742889404297e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3740
+    },
+    {
+      "epoch": 2.3968,
+      "grad_norm": 0.00213623046875,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 3745
+    },
+    {
+      "epoch": 2.4,
+      "grad_norm": 1.0848045349121094e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3750
+    },
+    {
+      "epoch": 2.4032,
+      "grad_norm": 2.8371810913085938e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3755
+    },
+    {
+      "epoch": 2.4064,
+      "grad_norm": 1.4662742614746094e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3760
+    },
+    {
+      "epoch": 2.4096,
+      "grad_norm": 1.6927719116210938e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3765
+    },
+    {
+      "epoch": 2.4128,
+      "grad_norm": 1.2516975402832031e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 3770
+    },
+    {
+      "epoch": 2.416,
+      "grad_norm": 1.8835067749023438e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3775
+    },
+    {
+      "epoch": 2.4192,
+      "grad_norm": 0.000213623046875,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 3780
+    },
+    {
+      "epoch": 2.4224,
+      "grad_norm": 8.153915405273438e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3785
+    },
+    {
+      "epoch": 2.4256,
+      "grad_norm": 2.288818359375e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3790
+    },
+    {
+      "epoch": 2.4288,
+      "grad_norm": 0.0001430511474609375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3795
+    },
+    {
+      "epoch": 2.432,
+      "grad_norm": 6.288290023803711e-06,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3800
+    },
+    {
+      "epoch": 2.4352,
+      "grad_norm": 0.0002727508544921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3805
+    },
+    {
+      "epoch": 2.4384,
+      "grad_norm": 1.4603137969970703e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0082,
+      "step": 3810
+    },
+    {
+      "epoch": 2.4416,
+      "grad_norm": 2.0742416381835938e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3815
+    },
+    {
+      "epoch": 2.4448,
+      "grad_norm": 9.000301361083984e-06,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3820
+    },
+    {
+      "epoch": 2.448,
+      "grad_norm": 1.4185905456542969e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3825
+    },
+    {
+      "epoch": 2.4512,
+      "grad_norm": 0.00021648406982421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 3830
+    },
+    {
+      "epoch": 2.4544,
+      "grad_norm": 4.172325134277344e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 3835
+    },
+    {
+      "epoch": 2.4576000000000002,
+      "grad_norm": 6.377696990966797e-06,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 3840
+    },
+    {
+      "epoch": 2.4608,
+      "grad_norm": 0.00145721435546875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 3845
+    },
+    {
+      "epoch": 2.464,
+      "grad_norm": 9.59634780883789e-06,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3850
+    },
+    {
+      "epoch": 2.4672,
+      "grad_norm": 0.0015869140625,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 3855
+    },
+    {
+      "epoch": 2.4704,
+      "grad_norm": 1.7404556274414062e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3860
+    },
+    {
+      "epoch": 2.4736000000000002,
+      "grad_norm": 0.00075531005859375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3865
+    },
+    {
+      "epoch": 2.4768,
+      "grad_norm": 7.420778274536133e-06,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3870
+    },
+    {
+      "epoch": 2.48,
+      "grad_norm": 1.4483928680419922e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3875
+    },
+    {
+      "epoch": 2.4832,
+      "grad_norm": 4.6253204345703125e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 3880
+    },
+    {
+      "epoch": 2.4864,
+      "grad_norm": 0.0013275146484375,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 3885
+    },
+    {
+      "epoch": 2.4896,
+      "grad_norm": 0.000568389892578125,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3890
+    },
+    {
+      "epoch": 2.4928,
+      "grad_norm": 0.04541015625,
+      "learning_rate": 0.0001,
+      "loss": 0.0014,
+      "step": 3895
+    },
+    {
+      "epoch": 2.496,
+      "grad_norm": 0.00555419921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 3900
+    },
+    {
+      "epoch": 2.4992,
+      "grad_norm": 0.00024318695068359375,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 3905
+    },
+    {
+      "epoch": 2.5023999999999997,
+      "grad_norm": 3.7670135498046875e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 3910
+    },
+    {
+      "epoch": 2.5056000000000003,
+      "grad_norm": 0.0002765655517578125,
+      "learning_rate": 0.0001,
+      "loss": 0.0036,
+      "step": 3915
+    },
+    {
+      "epoch": 2.5088,
+      "grad_norm": 0.005462646484375,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 3920
+    },
+    {
+      "epoch": 2.512,
+      "grad_norm": 3.886222839355469e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0007,
+      "step": 3925
+    },
+    {
+      "epoch": 2.5152,
+      "grad_norm": 0.0020751953125,
+      "learning_rate": 0.0001,
+      "loss": 0.0008,
+      "step": 3930
+    },
+    {
+      "epoch": 2.5183999999999997,
+      "grad_norm": 0.003082275390625,
+      "learning_rate": 0.0001,
+      "loss": 0.0011,
+      "step": 3935
+    },
+    {
+      "epoch": 2.5216,
+      "grad_norm": 0.0002231597900390625,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 3940
+    },
+    {
+      "epoch": 2.5248,
+      "grad_norm": 0.0017547607421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 3945
+    },
+    {
+      "epoch": 2.528,
+      "grad_norm": 8.487701416015625e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 3950
+    },
+    {
+      "epoch": 2.5312,
+      "grad_norm": 0.000263214111328125,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3955
+    },
+    {
+      "epoch": 2.5343999999999998,
+      "grad_norm": 0.0002307891845703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 3960
+    },
+    {
+      "epoch": 2.5376,
+      "grad_norm": 0.000244140625,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3965
+    },
+    {
+      "epoch": 2.5408,
+      "grad_norm": 5.459785461425781e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3970
+    },
+    {
+      "epoch": 2.544,
+      "grad_norm": 3.075599670410156e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3975
+    },
+    {
+      "epoch": 2.5472,
+      "grad_norm": 0.0008087158203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0024,
+      "step": 3980
+    },
+    {
+      "epoch": 2.5504,
+      "grad_norm": 0.007781982421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 3985
+    },
+    {
+      "epoch": 2.5536,
+      "grad_norm": 0.0004863739013671875,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 3990
+    },
+    {
+      "epoch": 2.5568,
+      "grad_norm": 0.023193359375,
+      "learning_rate": 0.0001,
+      "loss": 0.0016,
+      "step": 3995
+    },
+    {
+      "epoch": 2.56,
+      "grad_norm": 0.010009765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 4000
+    },
+    {
+      "epoch": 2.5632,
+      "grad_norm": 0.006134033203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 4005
+    },
+    {
+      "epoch": 2.5664,
+      "grad_norm": 0.0010833740234375,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 4010
+    },
+    {
+      "epoch": 2.5696,
+      "grad_norm": 3.695487976074219e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4015
+    },
+    {
+      "epoch": 2.5728,
+      "grad_norm": 4.2438507080078125e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4020
+    },
+    {
+      "epoch": 2.576,
+      "grad_norm": 2.4199485778808594e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4025
+    },
+    {
+      "epoch": 2.5792,
+      "grad_norm": 0.00144195556640625,
+      "learning_rate": 0.0001,
+      "loss": 0.0026,
+      "step": 4030
+    },
+    {
+      "epoch": 2.5824,
+      "grad_norm": 0.0015716552734375,
+      "learning_rate": 0.0001,
+      "loss": 0.0009,
+      "step": 4035
+    },
+    {
+      "epoch": 2.5856,
+      "grad_norm": 0.00049591064453125,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 4040
+    },
+    {
+      "epoch": 2.5888,
+      "grad_norm": 0.0022430419921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 4045
+    },
+    {
+      "epoch": 2.592,
+      "grad_norm": 0.0001544952392578125,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 4050
+    },
+    {
+      "epoch": 2.5952,
+      "grad_norm": 0.0076904296875,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 4055
+    },
+    {
+      "epoch": 2.5984,
+      "grad_norm": 0.000446319580078125,
+      "learning_rate": 0.0001,
+      "loss": 0.0007,
+      "step": 4060
+    },
+    {
+      "epoch": 2.6016,
+      "grad_norm": 6.341934204101562e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4065
+    },
+    {
+      "epoch": 2.6048,
+      "grad_norm": 3.6716461181640625e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4070
+    },
+    {
+      "epoch": 2.608,
+      "grad_norm": 0.000461578369140625,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4075
+    },
+    {
+      "epoch": 2.6112,
+      "grad_norm": 0.003204345703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0011,
+      "step": 4080
+    },
+    {
+      "epoch": 2.6144,
+      "grad_norm": 0.00286865234375,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 4085
+    },
+    {
+      "epoch": 2.6176,
+      "grad_norm": 6.67572021484375e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4090
+    },
+    {
+      "epoch": 2.6208,
+      "grad_norm": 0.00110626220703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 4095
+    },
+    {
+      "epoch": 2.624,
+      "grad_norm": 0.0003604888916015625,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4100
+    },
+    {
+      "epoch": 2.6272,
+      "grad_norm": 0.004302978515625,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 4105
+    },
+    {
+      "epoch": 2.6304,
+      "grad_norm": 6.4849853515625e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4110
+    },
+    {
+      "epoch": 2.6336,
+      "grad_norm": 0.000171661376953125,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 4115
+    },
+    {
+      "epoch": 2.6368,
+      "grad_norm": 5.781650543212891e-06,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4120
+    },
+    {
+      "epoch": 2.64,
+      "grad_norm": 9.000301361083984e-06,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4125
+    },
+    {
+      "epoch": 2.6432,
+      "grad_norm": 0.0030975341796875,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 4130
+    },
+    {
+      "epoch": 2.6464,
+      "grad_norm": 0.00131988525390625,
+      "learning_rate": 0.0001,
+      "loss": 0.0016,
+      "step": 4135
+    },
+    {
+      "epoch": 2.6496,
+      "grad_norm": 0.000354766845703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4140
+    },
+    {
+      "epoch": 2.6528,
+      "grad_norm": 7.724761962890625e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4145
+    },
+    {
+      "epoch": 2.656,
+      "grad_norm": 4.231929779052734e-06,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4150
+    },
+    {
+      "epoch": 2.6592000000000002,
+      "grad_norm": 1.704692840576172e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4155
+    },
+    {
+      "epoch": 2.6624,
+      "grad_norm": 6.616115570068359e-06,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4160
+    },
+    {
+      "epoch": 2.6656,
+      "grad_norm": 6.884336471557617e-06,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4165
+    },
+    {
+      "epoch": 2.6688,
+      "grad_norm": 5.453824996948242e-06,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4170
+    },
+    {
+      "epoch": 2.672,
+      "grad_norm": 0.000179290771484375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4175
+    },
+    {
+      "epoch": 2.6752000000000002,
+      "grad_norm": 0.013916015625,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 4180
+    },
+    {
+      "epoch": 2.6784,
+      "grad_norm": 0.0031890869140625,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 4185
+    },
+    {
+      "epoch": 2.6816,
+      "grad_norm": 1.7404556274414062e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0006,
+      "step": 4190
+    },
+    {
+      "epoch": 2.6848,
+      "grad_norm": 4.291534423828125e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4195
+    },
+    {
+      "epoch": 2.6879999999999997,
+      "grad_norm": 7.420778274536133e-06,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4200
+    },
+    {
+      "epoch": 2.6912000000000003,
+      "grad_norm": 0.0001087188720703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4205
+    },
+    {
+      "epoch": 2.6944,
+      "grad_norm": 3.647804260253906e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4210
+    },
+    {
+      "epoch": 2.6976,
+      "grad_norm": 0.01220703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0019,
+      "step": 4215
+    },
+    {
+      "epoch": 2.7008,
+      "grad_norm": 6.109476089477539e-06,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4220
+    },
+    {
+      "epoch": 2.7039999999999997,
+      "grad_norm": 1.8477439880371094e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4225
+    },
+    {
+      "epoch": 2.7072000000000003,
+      "grad_norm": 0.0009765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 4230
+    },
+    {
+      "epoch": 2.7104,
+      "grad_norm": 0.0002918243408203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 4235
+    },
+    {
+      "epoch": 2.7136,
+      "grad_norm": 7.62939453125e-06,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4240
+    },
+    {
+      "epoch": 2.7168,
+      "grad_norm": 5.340576171875e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 4245
+    },
+    {
+      "epoch": 2.7199999999999998,
+      "grad_norm": 5.930662155151367e-06,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4250
+    },
+    {
+      "epoch": 2.7232,
+      "grad_norm": 0.0009613037109375,
+      "learning_rate": 0.0001,
+      "loss": 0.0008,
+      "step": 4255
+    },
+    {
+      "epoch": 2.7264,
+      "grad_norm": 2.3603439331054688e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4260
+    },
+    {
+      "epoch": 2.7296,
+      "grad_norm": 2.110004425048828e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4265
+    },
+    {
+      "epoch": 2.7328,
+      "grad_norm": 1.055002212524414e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4270
+    },
+    {
+      "epoch": 2.7359999999999998,
+      "grad_norm": 0.00011110305786132812,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4275
+    },
+    {
+      "epoch": 2.7392,
+      "grad_norm": 0.0032196044921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 4280
+    },
+    {
+      "epoch": 2.7424,
+      "grad_norm": 0.01239013671875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 4285
+    },
+    {
+      "epoch": 2.7456,
+      "grad_norm": 5.626678466796875e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 4290
+    },
+    {
+      "epoch": 2.7488,
+      "grad_norm": 0.000469207763671875,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4295
+    },
+    {
+      "epoch": 2.752,
+      "grad_norm": 2.372264862060547e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4300
+    },
+    {
+      "epoch": 2.7552,
+      "grad_norm": 0.00115203857421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 4305
+    },
+    {
+      "epoch": 2.7584,
+      "grad_norm": 4.57763671875e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4310
+    },
+    {
+      "epoch": 2.7616,
+      "grad_norm": 0.0001354217529296875,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 4315
+    },
+    {
+      "epoch": 2.7648,
+      "grad_norm": 1.4185905456542969e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4320
+    },
+    {
+      "epoch": 2.768,
+      "grad_norm": 0.00012159347534179688,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4325
+    },
+    {
+      "epoch": 2.7712,
+      "grad_norm": 0.0087890625,
+      "learning_rate": 0.0001,
+      "loss": 0.0008,
+      "step": 4330
+    },
+    {
+      "epoch": 2.7744,
+      "grad_norm": 0.00128936767578125,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 4335
+    },
+    {
+      "epoch": 2.7776,
+      "grad_norm": 7.0035457611083984e-06,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4340
+    },
+    {
+      "epoch": 2.7808,
+      "grad_norm": 4.458427429199219e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 4345
+    },
+    {
+      "epoch": 2.784,
+      "grad_norm": 1.4960765838623047e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4350
+    },
+    {
+      "epoch": 2.7872,
+      "grad_norm": 0.00145721435546875,
+      "learning_rate": 0.0001,
+      "loss": 0.0077,
+      "step": 4355
+    },
+    {
+      "epoch": 2.7904,
+      "grad_norm": 1.2636184692382812e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4360
+    },
+    {
+      "epoch": 2.7936,
+      "grad_norm": 0.01092529296875,
+      "learning_rate": 0.0001,
+      "loss": 0.0019,
+      "step": 4365
+    },
+    {
+      "epoch": 2.7968,
+      "grad_norm": 1.3828277587890625e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4370
+    },
+    {
+      "epoch": 2.8,
+      "grad_norm": 0.0002593994140625,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4375
+    },
+    {
+      "epoch": 2.8032,
+      "grad_norm": 8.96453857421875e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 4380
+    },
+    {
+      "epoch": 2.8064,
+      "grad_norm": 0.00038909912109375,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 4385
+    },
+    {
+      "epoch": 2.8096,
+      "grad_norm": 1.7404556274414062e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4390
+    },
+    {
+      "epoch": 2.8128,
+      "grad_norm": 0.0181884765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 4395
+    },
+    {
+      "epoch": 2.816,
+      "grad_norm": 1.9788742065429688e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 4400
+    },
+    {
+      "epoch": 2.8192,
+      "grad_norm": 0.000514984130859375,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 4405
+    },
+    {
+      "epoch": 2.8224,
+      "grad_norm": 3.981590270996094e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4410
+    },
+    {
+      "epoch": 2.8256,
+      "grad_norm": 1.3172626495361328e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4415
+    },
+    {
+      "epoch": 2.8288,
+      "grad_norm": 1.0192394256591797e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 4420
+    },
+    {
+      "epoch": 2.832,
+      "grad_norm": 0.00017070770263671875,
+      "learning_rate": 0.0001,
+      "loss": 0.0003,
+      "step": 4425
+    },
+    {
+      "epoch": 2.8352,
+      "grad_norm": 0.0004100799560546875,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 4430
+    },
+    {
+      "epoch": 2.8384,
+      "grad_norm": 0.011962890625,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 4435
+    },
+    {
+      "epoch": 2.8416,
+      "grad_norm": 0.000797271728515625,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 4440
+    },
+    {
+      "epoch": 2.8448,
+      "grad_norm": 4.00543212890625e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4445
+    },
+    {
+      "epoch": 2.848,
+      "grad_norm": 5.513429641723633e-06,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4450
+    },
+    {
+      "epoch": 2.8512,
+      "grad_norm": 2.0265579223632812e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 4455
+    },
+    {
+      "epoch": 2.8544,
+      "grad_norm": 0.00010013580322265625,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4460
+    },
+    {
+      "epoch": 2.8576,
+      "grad_norm": 0.00189971923828125,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 4465
+    },
+    {
+      "epoch": 2.8608000000000002,
+      "grad_norm": 1.9311904907226562e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0008,
+      "step": 4470
+    },
+    {
+      "epoch": 2.864,
+      "grad_norm": 0.0150146484375,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 4475
+    },
+    {
+      "epoch": 2.8672,
+      "grad_norm": 0.000732421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0009,
+      "step": 4480
+    },
+    {
+      "epoch": 2.8704,
+      "grad_norm": 0.00018596649169921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 4485
+    },
+    {
+      "epoch": 2.8736,
+      "grad_norm": 0.00049591064453125,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 4490
+    },
+    {
+      "epoch": 2.8768000000000002,
+      "grad_norm": 0.0032501220703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 4495
+    },
+    {
+      "epoch": 2.88,
+      "grad_norm": 1.055002212524414e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4500
+    },
+    {
+      "epoch": 2.8832,
+      "grad_norm": 6.151199340820312e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 4505
+    },
+    {
+      "epoch": 2.8864,
+      "grad_norm": 0.0003452301025390625,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4510
+    },
+    {
+      "epoch": 2.8895999999999997,
+      "grad_norm": 2.9325485229492188e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4515
+    },
+    {
+      "epoch": 2.8928000000000003,
+      "grad_norm": 0.0172119140625,
+      "learning_rate": 0.0001,
+      "loss": 0.0019,
+      "step": 4520
+    },
+    {
+      "epoch": 2.896,
+      "grad_norm": 0.00994873046875,
+      "learning_rate": 0.0001,
+      "loss": 0.001,
+      "step": 4525
+    },
+    {
+      "epoch": 2.8992,
+      "grad_norm": 0.0004177093505859375,
+      "learning_rate": 0.0001,
+      "loss": 0.0014,
+      "step": 4530
+    },
+    {
+      "epoch": 2.9024,
+      "grad_norm": 0.000476837158203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 4535
+    },
+    {
+      "epoch": 2.9055999999999997,
+      "grad_norm": 0.0004215240478515625,
+      "learning_rate": 0.0001,
+      "loss": 0.0004,
+      "step": 4540
+    },
+    {
+      "epoch": 2.9088000000000003,
+      "grad_norm": 0.000247955322265625,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 4545
+    },
+    {
+      "epoch": 2.912,
+      "grad_norm": 0.0040283203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4550
+    },
+    {
+      "epoch": 2.9152,
+      "grad_norm": 0.00124359130859375,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 4555
+    },
+    {
+      "epoch": 2.9184,
+      "grad_norm": 4.6253204345703125e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 4560
+    },
+    {
+      "epoch": 2.9215999999999998,
+      "grad_norm": 0.0004825592041015625,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4565
+    },
+    {
+      "epoch": 2.9248,
+      "grad_norm": 0.0002574920654296875,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4570
+    },
+    {
+      "epoch": 2.928,
+      "grad_norm": 1.9431114196777344e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4575
+    },
+    {
+      "epoch": 2.9312,
+      "grad_norm": 0.003387451171875,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 4580
+    },
+    {
+      "epoch": 2.9344,
+      "grad_norm": 0.00160980224609375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4585
+    },
+    {
+      "epoch": 2.9375999999999998,
+      "grad_norm": 2.5987625122070312e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4590
+    },
+    {
+      "epoch": 2.9408,
+      "grad_norm": 7.009506225585938e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 4595
+    },
+    {
+      "epoch": 2.944,
+      "grad_norm": 1.341104507446289e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4600
+    },
+    {
+      "epoch": 2.9472,
+      "grad_norm": 9.393692016601562e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4605
+    },
+    {
+      "epoch": 2.9504,
+      "grad_norm": 1.919269561767578e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4610
+    },
+    {
+      "epoch": 2.9536,
+      "grad_norm": 4.76837158203125e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 4615
+    },
+    {
+      "epoch": 2.9568,
+      "grad_norm": 9.417533874511719e-06,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4620
+    },
+    {
+      "epoch": 2.96,
+      "grad_norm": 1.1980533599853516e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4625
+    },
+    {
+      "epoch": 2.9632,
+      "grad_norm": 0.00116729736328125,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 4630
+    },
+    {
+      "epoch": 2.9664,
+      "grad_norm": 0.0016632080078125,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 4635
+    },
+    {
+      "epoch": 2.9696,
+      "grad_norm": 0.001373291015625,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 4640
+    },
+    {
+      "epoch": 2.9728,
+      "grad_norm": 0.00014972686767578125,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 4645
+    },
+    {
+      "epoch": 2.976,
+      "grad_norm": 5.125999450683594e-06,
+      "learning_rate": 0.0001,
+      "loss": 0.0005,
+      "step": 4650
+    },
+    {
+      "epoch": 2.9792,
+      "grad_norm": 6.67572021484375e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4655
+    },
+    {
+      "epoch": 2.9824,
+      "grad_norm": 1.1086463928222656e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4660
+    },
+    {
+      "epoch": 2.9856,
+      "grad_norm": 0.0002803802490234375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4665
+    },
+    {
+      "epoch": 2.9888,
+      "grad_norm": 3.218650817871094e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4670
+    },
+    {
+      "epoch": 2.992,
+      "grad_norm": 1.1086463928222656e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4675
+    },
+    {
+      "epoch": 2.9952,
+      "grad_norm": 1.6689300537109375e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4680
+    },
+    {
+      "epoch": 2.9984,
+      "grad_norm": 9.47713851928711e-06,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4685
+    },
+    {
+      "epoch": 3.0016,
+      "grad_norm": 0.005615234375,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 4690
+    },
+    {
+      "epoch": 3.0048,
+      "grad_norm": 0.00191497802734375,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 4695
+    },
+    {
+      "epoch": 3.008,
+      "grad_norm": 0.0022125244140625,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4700
+    },
+    {
+      "epoch": 3.0112,
+      "grad_norm": 0.000110626220703125,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4705
+    },
+    {
+      "epoch": 3.0144,
+      "grad_norm": 2.1576881408691406e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4710
+    },
+    {
+      "epoch": 3.0176,
+      "grad_norm": 4.172325134277344e-06,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4715
+    },
+    {
+      "epoch": 3.0208,
+      "grad_norm": 0.000225067138671875,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4720
+    },
+    {
+      "epoch": 3.024,
+      "grad_norm": 1.341104507446289e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4725
+    },
+    {
+      "epoch": 3.0272,
+      "grad_norm": 8.940696716308594e-06,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4730
+    },
+    {
+      "epoch": 3.0304,
+      "grad_norm": 7.772445678710938e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0009,
+      "step": 4735
+    },
+    {
+      "epoch": 3.0336,
+      "grad_norm": 5.316734313964844e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4740
+    },
+    {
+      "epoch": 3.0368,
+      "grad_norm": 2.2172927856445312e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 4745
+    },
+    {
+      "epoch": 3.04,
+      "grad_norm": 0.00162506103515625,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 4750
+    },
+    {
+      "epoch": 3.0432,
+      "grad_norm": 3.528594970703125e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4755
+    },
+    {
+      "epoch": 3.0464,
+      "grad_norm": 1.2099742889404297e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4760
+    },
+    {
+      "epoch": 3.0496,
+      "grad_norm": 0.003631591796875,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 4765
+    },
+    {
+      "epoch": 3.0528,
+      "grad_norm": 2.7298927307128906e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0001,
+      "step": 4770
+    },
+    {
+      "epoch": 3.056,
+      "grad_norm": 3.266334533691406e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4775
+    },
+    {
+      "epoch": 3.0592,
+      "grad_norm": 7.063150405883789e-06,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4780
+    },
+    {
+      "epoch": 3.0624,
+      "grad_norm": 1.2516975402832031e-05,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4785
+    },
+    {
+      "epoch": 3.0656,
+      "grad_norm": 0.0146484375,
+      "learning_rate": 0.0001,
+      "loss": 0.0009,
+      "step": 4790
+    },
+    {
+      "epoch": 3.0688,
+      "grad_norm": 0.00020503997802734375,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 4795
+    },
+    {
+      "epoch": 3.072,
+      "grad_norm": 0.00091552734375,
+      "learning_rate": 0.0001,
+      "loss": 0.0002,
+      "step": 4800
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 4800,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 90,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3.883056636914381e+18,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/training_args.bin b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..29f75dce5b29053c93ee48c9b3f647e3f5e83f58
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/checkpoint-4800/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0bfb5a7396260331223e1b3fd2f19765dd4d7b0a41660ebb1d64c6e7fa95fe90
+size 7416
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/completed b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/completed
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/metrics.json b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/metrics.json
new file mode 100644
index 0000000000000000000000000000000000000000..7c36e203c08897fb5720186c59224bbada85b822
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/metrics.json
@@ -0,0 +1 @@
+{"run_name": "codetransocean_srcml_java", "train_runtime": 11061.1318, "train_samples_per_second": 0.521, "train_steps_per_second": 0.004, "total_flos": 3.685541393109811e+17, "train_loss": 0.5359265327453613, "epoch": 3.5294117647058822}
\ No newline at end of file
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/train_results.json b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/train_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..a4011d30782f214192d8e1542f0d95831d6ae3b0
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/train_results.json
@@ -0,0 +1,8 @@
+{
+    "epoch": 3.5294117647058822,
+    "total_flos": 3.685541393109811e+17,
+    "train_loss": 0.5359265327453613,
+    "train_runtime": 11061.1318,
+    "train_samples_per_second": 0.521,
+    "train_steps_per_second": 0.004
+}
\ No newline at end of file
diff --git a/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/trainer_state.json b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..c5664d739288de376a9927b278c54b462626de23
--- /dev/null
+++ b/codellama/java/codetrans/codetransocean/codetransocean_srcml_java/trainer_state.json
@@ -0,0 +1,105 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.5294117647058822,
+  "eval_steps": 500,
+  "global_step": 45,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.39215686274509803,
+      "grad_norm": 0.0274658203125,
+      "learning_rate": 0.0001,
+      "loss": 0.5995,
+      "step": 5
+    },
+    {
+      "epoch": 0.7843137254901961,
+      "grad_norm": 0.022705078125,
+      "learning_rate": 0.0001,
+      "loss": 0.5904,
+      "step": 10
+    },
+    {
+      "epoch": 1.1764705882352942,
+      "grad_norm": 0.0189208984375,
+      "learning_rate": 0.0001,
+      "loss": 0.556,
+      "step": 15
+    },
+    {
+      "epoch": 1.5686274509803921,
+      "grad_norm": 0.0162353515625,
+      "learning_rate": 0.0001,
+      "loss": 0.5464,
+      "step": 20
+    },
+    {
+      "epoch": 1.9607843137254903,
+      "grad_norm": 0.0257568359375,
+      "learning_rate": 0.0001,
+      "loss": 0.5259,
+      "step": 25
+    },
+    {
+      "epoch": 2.3529411764705883,
+      "grad_norm": 0.0147705078125,
+      "learning_rate": 0.0001,
+      "loss": 0.5226,
+      "step": 30
+    },
+    {
+      "epoch": 2.7450980392156863,
+      "grad_norm": 0.029296875,
+      "learning_rate": 0.0001,
+      "loss": 0.5098,
+      "step": 35
+    },
+    {
+      "epoch": 3.1372549019607843,
+      "grad_norm": 0.016357421875,
+      "learning_rate": 0.0001,
+      "loss": 0.5106,
+      "step": 40
+    },
+    {
+      "epoch": 3.5294117647058822,
+      "grad_norm": 0.0252685546875,
+      "learning_rate": 0.0001,
+      "loss": 0.4622,
+      "step": 45
+    },
+    {
+      "epoch": 3.5294117647058822,
+      "step": 45,
+      "total_flos": 3.685541393109811e+17,
+      "train_loss": 0.5359265327453613,
+      "train_runtime": 11061.1318,
+      "train_samples_per_second": 0.521,
+      "train_steps_per_second": 0.004
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 45,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 180,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3.685541393109811e+17,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/codellama/java/dataflow_java_pretrained/all_results.json b/codellama/java/dataflow_java_pretrained/all_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..bce81059a7b1dd1728248f3f77c43d2df68ad77d
--- /dev/null
+++ b/codellama/java/dataflow_java_pretrained/all_results.json
@@ -0,0 +1,8 @@
+{
+    "epoch": 0.9049489395129615,
+    "total_flos": 1.6220305320330854e+18,
+    "train_loss": 0.07024859038905965,
+    "train_runtime": 50041.181,
+    "train_samples_per_second": 0.921,
+    "train_steps_per_second": 0.007
+}
\ No newline at end of file
diff --git a/codellama/java/dataflow_java_pretrained/checkpoint-360/README.md b/codellama/java/dataflow_java_pretrained/checkpoint-360/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f701e106913179e53b07103ec61ffc10178fd6c0
--- /dev/null
+++ b/codellama/java/dataflow_java_pretrained/checkpoint-360/README.md
@@ -0,0 +1,202 @@
+---
+base_model: ../CodeLlama-13b-Instruct-hf/
+library_name: peft
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] 
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.13.2
\ No newline at end of file
diff --git a/codellama/java/dataflow_java_pretrained/checkpoint-360/adapter_config.json b/codellama/java/dataflow_java_pretrained/checkpoint-360/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..6187bba1151b14c1207088bf6aefc2a05c33523e
--- /dev/null
+++ b/codellama/java/dataflow_java_pretrained/checkpoint-360/adapter_config.json
@@ -0,0 +1,34 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../CodeLlama-13b-Instruct-hf/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "k_proj",
+    "gate_proj",
+    "up_proj",
+    "down_proj",
+    "q_proj",
+    "o_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/codellama/java/dataflow_java_pretrained/checkpoint-360/adapter_model.safetensors b/codellama/java/dataflow_java_pretrained/checkpoint-360/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b8c9e9cca9f6765775c083697e3ca540cc68c21c
--- /dev/null
+++ b/codellama/java/dataflow_java_pretrained/checkpoint-360/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9255ef4d476c8cbd01b41ea21b7c34815d6f169ffc89989dacc449605fbb6204
+size 1156480200
diff --git a/codellama/java/dataflow_java_pretrained/checkpoint-360/adapter_model/README.md b/codellama/java/dataflow_java_pretrained/checkpoint-360/adapter_model/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f701e106913179e53b07103ec61ffc10178fd6c0
--- /dev/null
+++ b/codellama/java/dataflow_java_pretrained/checkpoint-360/adapter_model/README.md
@@ -0,0 +1,202 @@
+---
+base_model: ../CodeLlama-13b-Instruct-hf/
+library_name: peft
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] 
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.13.2
\ No newline at end of file
diff --git a/codellama/java/dataflow_java_pretrained/checkpoint-360/adapter_model/adapter_config.json b/codellama/java/dataflow_java_pretrained/checkpoint-360/adapter_model/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..6187bba1151b14c1207088bf6aefc2a05c33523e
--- /dev/null
+++ b/codellama/java/dataflow_java_pretrained/checkpoint-360/adapter_model/adapter_config.json
@@ -0,0 +1,34 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../CodeLlama-13b-Instruct-hf/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "k_proj",
+    "gate_proj",
+    "up_proj",
+    "down_proj",
+    "q_proj",
+    "o_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/codellama/java/dataflow_java_pretrained/checkpoint-360/adapter_model/adapter_model.safetensors b/codellama/java/dataflow_java_pretrained/checkpoint-360/adapter_model/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b8c9e9cca9f6765775c083697e3ca540cc68c21c
--- /dev/null
+++ b/codellama/java/dataflow_java_pretrained/checkpoint-360/adapter_model/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9255ef4d476c8cbd01b41ea21b7c34815d6f169ffc89989dacc449605fbb6204
+size 1156480200
diff --git a/codellama/java/dataflow_java_pretrained/checkpoint-360/added_tokens.json b/codellama/java/dataflow_java_pretrained/checkpoint-360/added_tokens.json
new file mode 100644
index 0000000000000000000000000000000000000000..1cbbe5179eb8b5cc46632bbbc00eb51c68847074
--- /dev/null
+++ b/codellama/java/dataflow_java_pretrained/checkpoint-360/added_tokens.json
@@ -0,0 +1,3 @@
+{
+  "[PAD]": 32016
+}
diff --git a/codellama/java/dataflow_java_pretrained/checkpoint-360/optimizer.pt b/codellama/java/dataflow_java_pretrained/checkpoint-360/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..795ecde19f4d82dda36e4d31d1aa25f1861d633c
--- /dev/null
+++ b/codellama/java/dataflow_java_pretrained/checkpoint-360/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:510d26930880ee5a4bd5c07dd609e378cb1f5b78285942f6effe2385e829bc08
+size 2003127538
diff --git a/codellama/java/dataflow_pretrained/checkpoint-720/rng_state.pth b/codellama/java/dataflow_java_pretrained/checkpoint-360/rng_state.pth
similarity index 100%
rename from codellama/java/dataflow_pretrained/checkpoint-720/rng_state.pth
rename to codellama/java/dataflow_java_pretrained/checkpoint-360/rng_state.pth
diff --git a/codellama/java/dataflow_java_pretrained/checkpoint-360/scheduler.pt b/codellama/java/dataflow_java_pretrained/checkpoint-360/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ffb7177b487c41d6b9f78f59fcdd9023706925df
--- /dev/null
+++ b/codellama/java/dataflow_java_pretrained/checkpoint-360/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:baba7c5dff09a1d575a7ff0a27f1158d5dd92adec2a108211e3ca605cfdd03a6
+size 1064
diff --git a/codellama/java/dataflow_java_pretrained/checkpoint-360/special_tokens_map.json b/codellama/java/dataflow_java_pretrained/checkpoint-360/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..330bb0c14209dcd402b155e7d58c6c2b5210d40d
--- /dev/null
+++ b/codellama/java/dataflow_java_pretrained/checkpoint-360/special_tokens_map.json
@@ -0,0 +1,36 @@
+{
+  "additional_special_tokens": [
+    "▁
",
+    "▁",
+    "▁",
+    "▁"
+  ],
+  "bos_token": {
+    "content": "",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}
diff --git a/codellama/java/dataflow_java_pretrained/checkpoint-360/tokenizer.model b/codellama/java/dataflow_java_pretrained/checkpoint-360/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..f6722e8b170230ebdd4c0f5f2ce03f219be536d4
--- /dev/null
+++ b/codellama/java/dataflow_java_pretrained/checkpoint-360/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6
+size 500058
diff --git a/codellama/java/dataflow_java_pretrained/checkpoint-360/tokenizer_config.json b/codellama/java/dataflow_java_pretrained/checkpoint-360/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8f4094d204e2be0ed7b6bfa83d20cff28326258d
--- /dev/null
+++ b/codellama/java/dataflow_java_pretrained/checkpoint-360/tokenizer_config.json
@@ -0,0 +1,94 @@
+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32007": {
+      "content": "▁
",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32008": {
+      "content": "▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32009": {
+      "content": "▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32010": {
+      "content": "▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32016": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "▁
",
+    "▁",
+    "▁",
+    "▁"
+  ],
+  "bos_token": "",
+  "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content | trim + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' '  + content | trim + ' ' + eos_token }}{% endif %}{% endfor %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "",
+  "eot_token": "▁",
+  "fill_token": "",
+  "legacy": null,
+  "middle_token": "▁",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "padding_side": "right",
+  "prefix_token": "▁
",
+  "sp_model_kwargs": {},
+  "suffix_first": false,
+  "suffix_token": "▁",
+  "tokenizer_class": "CodeLlamaTokenizer",
+  "unk_token": "",
+  "use_default_system_prompt": false
+}
diff --git a/codellama/java/dataflow_java_pretrained/checkpoint-360/trainer_state.json b/codellama/java/dataflow_java_pretrained/checkpoint-360/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..bb7849dea0ea7d8fa11ad3a3eb0470935e25cdd3
--- /dev/null
+++ b/codellama/java/dataflow_java_pretrained/checkpoint-360/trainer_state.json
@@ -0,0 +1,537 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.9049489395129615,
+  "eval_steps": 500,
+  "global_step": 360,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.012568735271013355,
+      "grad_norm": 0.06298828125,
+      "learning_rate": 0.0001,
+      "loss": 0.6012,
+      "step": 5
+    },
+    {
+      "epoch": 0.02513747054202671,
+      "grad_norm": 0.11767578125,
+      "learning_rate": 0.0001,
+      "loss": 0.3895,
+      "step": 10
+    },
+    {
+      "epoch": 0.037706205813040065,
+      "grad_norm": 0.0908203125,
+      "learning_rate": 0.0001,
+      "loss": 0.2298,
+      "step": 15
+    },
+    {
+      "epoch": 0.05027494108405342,
+      "grad_norm": 0.068359375,
+      "learning_rate": 0.0001,
+      "loss": 0.1486,
+      "step": 20
+    },
+    {
+      "epoch": 0.06284367635506677,
+      "grad_norm": 0.06396484375,
+      "learning_rate": 0.0001,
+      "loss": 0.1333,
+      "step": 25
+    },
+    {
+      "epoch": 0.07541241162608013,
+      "grad_norm": 0.0849609375,
+      "learning_rate": 0.0001,
+      "loss": 0.1203,
+      "step": 30
+    },
+    {
+      "epoch": 0.08798114689709348,
+      "grad_norm": 0.0908203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0904,
+      "step": 35
+    },
+    {
+      "epoch": 0.10054988216810684,
+      "grad_norm": 0.05859375,
+      "learning_rate": 0.0001,
+      "loss": 0.0617,
+      "step": 40
+    },
+    {
+      "epoch": 0.11311861743912019,
+      "grad_norm": 0.0478515625,
+      "learning_rate": 0.0001,
+      "loss": 0.0515,
+      "step": 45
+    },
+    {
+      "epoch": 0.12568735271013354,
+      "grad_norm": 0.0634765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0295,
+      "step": 50
+    },
+    {
+      "epoch": 0.13825608798114689,
+      "grad_norm": 0.07421875,
+      "learning_rate": 0.0001,
+      "loss": 0.2835,
+      "step": 55
+    },
+    {
+      "epoch": 0.15082482325216026,
+      "grad_norm": 0.057861328125,
+      "learning_rate": 0.0001,
+      "loss": 0.0973,
+      "step": 60
+    },
+    {
+      "epoch": 0.1633935585231736,
+      "grad_norm": 0.026611328125,
+      "learning_rate": 0.0001,
+      "loss": 0.0755,
+      "step": 65
+    },
+    {
+      "epoch": 0.17596229379418696,
+      "grad_norm": 0.0244140625,
+      "learning_rate": 0.0001,
+      "loss": 0.0547,
+      "step": 70
+    },
+    {
+      "epoch": 0.1885310290652003,
+      "grad_norm": 0.0274658203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0638,
+      "step": 75
+    },
+    {
+      "epoch": 0.20109976433621368,
+      "grad_norm": 0.029052734375,
+      "learning_rate": 0.0001,
+      "loss": 0.0541,
+      "step": 80
+    },
+    {
+      "epoch": 0.21366849960722703,
+      "grad_norm": 0.039306640625,
+      "learning_rate": 0.0001,
+      "loss": 0.0511,
+      "step": 85
+    },
+    {
+      "epoch": 0.22623723487824038,
+      "grad_norm": 0.0196533203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0392,
+      "step": 90
+    },
+    {
+      "epoch": 0.23880597014925373,
+      "grad_norm": 0.0269775390625,
+      "learning_rate": 0.0001,
+      "loss": 0.0373,
+      "step": 95
+    },
+    {
+      "epoch": 0.2513747054202671,
+      "grad_norm": 0.02734375,
+      "learning_rate": 0.0001,
+      "loss": 0.0168,
+      "step": 100
+    },
+    {
+      "epoch": 0.26394344069128045,
+      "grad_norm": 0.0556640625,
+      "learning_rate": 0.0001,
+      "loss": 0.2346,
+      "step": 105
+    },
+    {
+      "epoch": 0.27651217596229377,
+      "grad_norm": 0.0301513671875,
+      "learning_rate": 0.0001,
+      "loss": 0.0746,
+      "step": 110
+    },
+    {
+      "epoch": 0.28908091123330715,
+      "grad_norm": 0.0294189453125,
+      "learning_rate": 0.0001,
+      "loss": 0.0534,
+      "step": 115
+    },
+    {
+      "epoch": 0.3016496465043205,
+      "grad_norm": 0.0247802734375,
+      "learning_rate": 0.0001,
+      "loss": 0.0371,
+      "step": 120
+    },
+    {
+      "epoch": 0.31421838177533384,
+      "grad_norm": 0.0225830078125,
+      "learning_rate": 0.0001,
+      "loss": 0.0488,
+      "step": 125
+    },
+    {
+      "epoch": 0.3267871170463472,
+      "grad_norm": 0.02490234375,
+      "learning_rate": 0.0001,
+      "loss": 0.0444,
+      "step": 130
+    },
+    {
+      "epoch": 0.33935585231736054,
+      "grad_norm": 0.0250244140625,
+      "learning_rate": 0.0001,
+      "loss": 0.038,
+      "step": 135
+    },
+    {
+      "epoch": 0.3519245875883739,
+      "grad_norm": 0.0264892578125,
+      "learning_rate": 0.0001,
+      "loss": 0.0308,
+      "step": 140
+    },
+    {
+      "epoch": 0.3644933228593873,
+      "grad_norm": 0.039306640625,
+      "learning_rate": 0.0001,
+      "loss": 0.0291,
+      "step": 145
+    },
+    {
+      "epoch": 0.3770620581304006,
+      "grad_norm": 0.031982421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0142,
+      "step": 150
+    },
+    {
+      "epoch": 0.389630793401414,
+      "grad_norm": 0.045654296875,
+      "learning_rate": 0.0001,
+      "loss": 0.2053,
+      "step": 155
+    },
+    {
+      "epoch": 0.40219952867242736,
+      "grad_norm": 0.0400390625,
+      "learning_rate": 0.0001,
+      "loss": 0.0658,
+      "step": 160
+    },
+    {
+      "epoch": 0.4147682639434407,
+      "grad_norm": 0.0272216796875,
+      "learning_rate": 0.0001,
+      "loss": 0.045,
+      "step": 165
+    },
+    {
+      "epoch": 0.42733699921445406,
+      "grad_norm": 0.021240234375,
+      "learning_rate": 0.0001,
+      "loss": 0.0343,
+      "step": 170
+    },
+    {
+      "epoch": 0.4399057344854674,
+      "grad_norm": 0.0263671875,
+      "learning_rate": 0.0001,
+      "loss": 0.041,
+      "step": 175
+    },
+    {
+      "epoch": 0.45247446975648076,
+      "grad_norm": 0.0311279296875,
+      "learning_rate": 0.0001,
+      "loss": 0.0382,
+      "step": 180
+    },
+    {
+      "epoch": 0.46504320502749413,
+      "grad_norm": 0.022705078125,
+      "learning_rate": 0.0001,
+      "loss": 0.0295,
+      "step": 185
+    },
+    {
+      "epoch": 0.47761194029850745,
+      "grad_norm": 0.0301513671875,
+      "learning_rate": 0.0001,
+      "loss": 0.0257,
+      "step": 190
+    },
+    {
+      "epoch": 0.49018067556952083,
+      "grad_norm": 0.02490234375,
+      "learning_rate": 0.0001,
+      "loss": 0.0217,
+      "step": 195
+    },
+    {
+      "epoch": 0.5027494108405341,
+      "grad_norm": 0.006866455078125,
+      "learning_rate": 0.0001,
+      "loss": 0.0073,
+      "step": 200
+    },
+    {
+      "epoch": 0.5153181461115475,
+      "grad_norm": 0.04443359375,
+      "learning_rate": 0.0001,
+      "loss": 0.1655,
+      "step": 205
+    },
+    {
+      "epoch": 0.5278868813825609,
+      "grad_norm": 0.056640625,
+      "learning_rate": 0.0001,
+      "loss": 0.051,
+      "step": 210
+    },
+    {
+      "epoch": 0.5404556166535742,
+      "grad_norm": 0.026123046875,
+      "learning_rate": 0.0001,
+      "loss": 0.0393,
+      "step": 215
+    },
+    {
+      "epoch": 0.5530243519245875,
+      "grad_norm": 0.01806640625,
+      "learning_rate": 0.0001,
+      "loss": 0.0255,
+      "step": 220
+    },
+    {
+      "epoch": 0.565593087195601,
+      "grad_norm": 0.0230712890625,
+      "learning_rate": 0.0001,
+      "loss": 0.0333,
+      "step": 225
+    },
+    {
+      "epoch": 0.5781618224666143,
+      "grad_norm": 0.024658203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0289,
+      "step": 230
+    },
+    {
+      "epoch": 0.5907305577376276,
+      "grad_norm": 0.0301513671875,
+      "learning_rate": 0.0001,
+      "loss": 0.0235,
+      "step": 235
+    },
+    {
+      "epoch": 0.603299293008641,
+      "grad_norm": 0.0284423828125,
+      "learning_rate": 0.0001,
+      "loss": 0.0208,
+      "step": 240
+    },
+    {
+      "epoch": 0.6158680282796544,
+      "grad_norm": 0.025634765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0119,
+      "step": 245
+    },
+    {
+      "epoch": 0.6284367635506677,
+      "grad_norm": 0.0125732421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0093,
+      "step": 250
+    },
+    {
+      "epoch": 0.6410054988216811,
+      "grad_norm": 0.051025390625,
+      "learning_rate": 0.0001,
+      "loss": 0.1598,
+      "step": 255
+    },
+    {
+      "epoch": 0.6535742340926944,
+      "grad_norm": 0.0546875,
+      "learning_rate": 0.0001,
+      "loss": 0.0457,
+      "step": 260
+    },
+    {
+      "epoch": 0.6661429693637078,
+      "grad_norm": 0.03564453125,
+      "learning_rate": 0.0001,
+      "loss": 0.0352,
+      "step": 265
+    },
+    {
+      "epoch": 0.6787117046347211,
+      "grad_norm": 0.019775390625,
+      "learning_rate": 0.0001,
+      "loss": 0.024,
+      "step": 270
+    },
+    {
+      "epoch": 0.6912804399057345,
+      "grad_norm": 0.0234375,
+      "learning_rate": 0.0001,
+      "loss": 0.0296,
+      "step": 275
+    },
+    {
+      "epoch": 0.7038491751767478,
+      "grad_norm": 0.0264892578125,
+      "learning_rate": 0.0001,
+      "loss": 0.0249,
+      "step": 280
+    },
+    {
+      "epoch": 0.7164179104477612,
+      "grad_norm": 0.029541015625,
+      "learning_rate": 0.0001,
+      "loss": 0.0199,
+      "step": 285
+    },
+    {
+      "epoch": 0.7289866457187746,
+      "grad_norm": 0.02294921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0154,
+      "step": 290
+    },
+    {
+      "epoch": 0.7415553809897879,
+      "grad_norm": 0.0220947265625,
+      "learning_rate": 0.0001,
+      "loss": 0.0116,
+      "step": 295
+    },
+    {
+      "epoch": 0.7541241162608012,
+      "grad_norm": 0.00531005859375,
+      "learning_rate": 0.0001,
+      "loss": 0.0058,
+      "step": 300
+    },
+    {
+      "epoch": 0.7666928515318147,
+      "grad_norm": 0.049560546875,
+      "learning_rate": 0.0001,
+      "loss": 0.1521,
+      "step": 305
+    },
+    {
+      "epoch": 0.779261586802828,
+      "grad_norm": 0.140625,
+      "learning_rate": 0.0001,
+      "loss": 0.0482,
+      "step": 310
+    },
+    {
+      "epoch": 0.7918303220738413,
+      "grad_norm": 0.035888671875,
+      "learning_rate": 0.0001,
+      "loss": 0.0372,
+      "step": 315
+    },
+    {
+      "epoch": 0.8043990573448547,
+      "grad_norm": 0.036865234375,
+      "learning_rate": 0.0001,
+      "loss": 0.0244,
+      "step": 320
+    },
+    {
+      "epoch": 0.816967792615868,
+      "grad_norm": 0.030517578125,
+      "learning_rate": 0.0001,
+      "loss": 0.0263,
+      "step": 325
+    },
+    {
+      "epoch": 0.8295365278868814,
+      "grad_norm": 0.024169921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0218,
+      "step": 330
+    },
+    {
+      "epoch": 0.8421052631578947,
+      "grad_norm": 0.0308837890625,
+      "learning_rate": 0.0001,
+      "loss": 0.0182,
+      "step": 335
+    },
+    {
+      "epoch": 0.8546739984289081,
+      "grad_norm": 0.02880859375,
+      "learning_rate": 0.0001,
+      "loss": 0.014,
+      "step": 340
+    },
+    {
+      "epoch": 0.8672427336999214,
+      "grad_norm": 0.03173828125,
+      "learning_rate": 0.0001,
+      "loss": 0.0109,
+      "step": 345
+    },
+    {
+      "epoch": 0.8798114689709348,
+      "grad_norm": 0.01483154296875,
+      "learning_rate": 0.0001,
+      "loss": 0.0044,
+      "step": 350
+    },
+    {
+      "epoch": 0.8923802042419482,
+      "grad_norm": 0.03955078125,
+      "learning_rate": 0.0001,
+      "loss": 0.1312,
+      "step": 355
+    },
+    {
+      "epoch": 0.9049489395129615,
+      "grad_norm": 0.031982421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0403,
+      "step": 360
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 360,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 90,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.6220305320330854e+18,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/codellama/java/dataflow_java_pretrained/checkpoint-360/training_args.bin b/codellama/java/dataflow_java_pretrained/checkpoint-360/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..59f7a00edbdbd6b3221150f65609c5c8a5ec2f18
--- /dev/null
+++ b/codellama/java/dataflow_java_pretrained/checkpoint-360/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ecc1afb72b58dca46f5cfa652b2afb64f998044182e1b761ed2f00cbb47fd9de
+size 7416
diff --git a/codellama/java/dataflow_java_pretrained/completed b/codellama/java/dataflow_java_pretrained/completed
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/codellama/java/dataflow_java_pretrained/metrics.json b/codellama/java/dataflow_java_pretrained/metrics.json
new file mode 100644
index 0000000000000000000000000000000000000000..6424a174064e14a147f24390c009c96aff9d8a3b
--- /dev/null
+++ b/codellama/java/dataflow_java_pretrained/metrics.json
@@ -0,0 +1 @@
+{"run_name": "dataflow_java", "train_runtime": 50041.181, "train_samples_per_second": 0.921, "train_steps_per_second": 0.007, "total_flos": 1.6220305320330854e+18, "train_loss": 0.07024859038905965, "epoch": 0.9049489395129615}
\ No newline at end of file
diff --git a/codellama/java/dataflow_java_pretrained/train_results.json b/codellama/java/dataflow_java_pretrained/train_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..bce81059a7b1dd1728248f3f77c43d2df68ad77d
--- /dev/null
+++ b/codellama/java/dataflow_java_pretrained/train_results.json
@@ -0,0 +1,8 @@
+{
+    "epoch": 0.9049489395129615,
+    "total_flos": 1.6220305320330854e+18,
+    "train_loss": 0.07024859038905965,
+    "train_runtime": 50041.181,
+    "train_samples_per_second": 0.921,
+    "train_steps_per_second": 0.007
+}
\ No newline at end of file
diff --git a/codellama/java/dataflow_java_pretrained/trainer_state.json b/codellama/java/dataflow_java_pretrained/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..3eb848702cc1e00738b73844f3e656a6409e7802
--- /dev/null
+++ b/codellama/java/dataflow_java_pretrained/trainer_state.json
@@ -0,0 +1,546 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.9049489395129615,
+  "eval_steps": 500,
+  "global_step": 360,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.012568735271013355,
+      "grad_norm": 0.06298828125,
+      "learning_rate": 0.0001,
+      "loss": 0.6012,
+      "step": 5
+    },
+    {
+      "epoch": 0.02513747054202671,
+      "grad_norm": 0.11767578125,
+      "learning_rate": 0.0001,
+      "loss": 0.3895,
+      "step": 10
+    },
+    {
+      "epoch": 0.037706205813040065,
+      "grad_norm": 0.0908203125,
+      "learning_rate": 0.0001,
+      "loss": 0.2298,
+      "step": 15
+    },
+    {
+      "epoch": 0.05027494108405342,
+      "grad_norm": 0.068359375,
+      "learning_rate": 0.0001,
+      "loss": 0.1486,
+      "step": 20
+    },
+    {
+      "epoch": 0.06284367635506677,
+      "grad_norm": 0.06396484375,
+      "learning_rate": 0.0001,
+      "loss": 0.1333,
+      "step": 25
+    },
+    {
+      "epoch": 0.07541241162608013,
+      "grad_norm": 0.0849609375,
+      "learning_rate": 0.0001,
+      "loss": 0.1203,
+      "step": 30
+    },
+    {
+      "epoch": 0.08798114689709348,
+      "grad_norm": 0.0908203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0904,
+      "step": 35
+    },
+    {
+      "epoch": 0.10054988216810684,
+      "grad_norm": 0.05859375,
+      "learning_rate": 0.0001,
+      "loss": 0.0617,
+      "step": 40
+    },
+    {
+      "epoch": 0.11311861743912019,
+      "grad_norm": 0.0478515625,
+      "learning_rate": 0.0001,
+      "loss": 0.0515,
+      "step": 45
+    },
+    {
+      "epoch": 0.12568735271013354,
+      "grad_norm": 0.0634765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0295,
+      "step": 50
+    },
+    {
+      "epoch": 0.13825608798114689,
+      "grad_norm": 0.07421875,
+      "learning_rate": 0.0001,
+      "loss": 0.2835,
+      "step": 55
+    },
+    {
+      "epoch": 0.15082482325216026,
+      "grad_norm": 0.057861328125,
+      "learning_rate": 0.0001,
+      "loss": 0.0973,
+      "step": 60
+    },
+    {
+      "epoch": 0.1633935585231736,
+      "grad_norm": 0.026611328125,
+      "learning_rate": 0.0001,
+      "loss": 0.0755,
+      "step": 65
+    },
+    {
+      "epoch": 0.17596229379418696,
+      "grad_norm": 0.0244140625,
+      "learning_rate": 0.0001,
+      "loss": 0.0547,
+      "step": 70
+    },
+    {
+      "epoch": 0.1885310290652003,
+      "grad_norm": 0.0274658203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0638,
+      "step": 75
+    },
+    {
+      "epoch": 0.20109976433621368,
+      "grad_norm": 0.029052734375,
+      "learning_rate": 0.0001,
+      "loss": 0.0541,
+      "step": 80
+    },
+    {
+      "epoch": 0.21366849960722703,
+      "grad_norm": 0.039306640625,
+      "learning_rate": 0.0001,
+      "loss": 0.0511,
+      "step": 85
+    },
+    {
+      "epoch": 0.22623723487824038,
+      "grad_norm": 0.0196533203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0392,
+      "step": 90
+    },
+    {
+      "epoch": 0.23880597014925373,
+      "grad_norm": 0.0269775390625,
+      "learning_rate": 0.0001,
+      "loss": 0.0373,
+      "step": 95
+    },
+    {
+      "epoch": 0.2513747054202671,
+      "grad_norm": 0.02734375,
+      "learning_rate": 0.0001,
+      "loss": 0.0168,
+      "step": 100
+    },
+    {
+      "epoch": 0.26394344069128045,
+      "grad_norm": 0.0556640625,
+      "learning_rate": 0.0001,
+      "loss": 0.2346,
+      "step": 105
+    },
+    {
+      "epoch": 0.27651217596229377,
+      "grad_norm": 0.0301513671875,
+      "learning_rate": 0.0001,
+      "loss": 0.0746,
+      "step": 110
+    },
+    {
+      "epoch": 0.28908091123330715,
+      "grad_norm": 0.0294189453125,
+      "learning_rate": 0.0001,
+      "loss": 0.0534,
+      "step": 115
+    },
+    {
+      "epoch": 0.3016496465043205,
+      "grad_norm": 0.0247802734375,
+      "learning_rate": 0.0001,
+      "loss": 0.0371,
+      "step": 120
+    },
+    {
+      "epoch": 0.31421838177533384,
+      "grad_norm": 0.0225830078125,
+      "learning_rate": 0.0001,
+      "loss": 0.0488,
+      "step": 125
+    },
+    {
+      "epoch": 0.3267871170463472,
+      "grad_norm": 0.02490234375,
+      "learning_rate": 0.0001,
+      "loss": 0.0444,
+      "step": 130
+    },
+    {
+      "epoch": 0.33935585231736054,
+      "grad_norm": 0.0250244140625,
+      "learning_rate": 0.0001,
+      "loss": 0.038,
+      "step": 135
+    },
+    {
+      "epoch": 0.3519245875883739,
+      "grad_norm": 0.0264892578125,
+      "learning_rate": 0.0001,
+      "loss": 0.0308,
+      "step": 140
+    },
+    {
+      "epoch": 0.3644933228593873,
+      "grad_norm": 0.039306640625,
+      "learning_rate": 0.0001,
+      "loss": 0.0291,
+      "step": 145
+    },
+    {
+      "epoch": 0.3770620581304006,
+      "grad_norm": 0.031982421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0142,
+      "step": 150
+    },
+    {
+      "epoch": 0.389630793401414,
+      "grad_norm": 0.045654296875,
+      "learning_rate": 0.0001,
+      "loss": 0.2053,
+      "step": 155
+    },
+    {
+      "epoch": 0.40219952867242736,
+      "grad_norm": 0.0400390625,
+      "learning_rate": 0.0001,
+      "loss": 0.0658,
+      "step": 160
+    },
+    {
+      "epoch": 0.4147682639434407,
+      "grad_norm": 0.0272216796875,
+      "learning_rate": 0.0001,
+      "loss": 0.045,
+      "step": 165
+    },
+    {
+      "epoch": 0.42733699921445406,
+      "grad_norm": 0.021240234375,
+      "learning_rate": 0.0001,
+      "loss": 0.0343,
+      "step": 170
+    },
+    {
+      "epoch": 0.4399057344854674,
+      "grad_norm": 0.0263671875,
+      "learning_rate": 0.0001,
+      "loss": 0.041,
+      "step": 175
+    },
+    {
+      "epoch": 0.45247446975648076,
+      "grad_norm": 0.0311279296875,
+      "learning_rate": 0.0001,
+      "loss": 0.0382,
+      "step": 180
+    },
+    {
+      "epoch": 0.46504320502749413,
+      "grad_norm": 0.022705078125,
+      "learning_rate": 0.0001,
+      "loss": 0.0295,
+      "step": 185
+    },
+    {
+      "epoch": 0.47761194029850745,
+      "grad_norm": 0.0301513671875,
+      "learning_rate": 0.0001,
+      "loss": 0.0257,
+      "step": 190
+    },
+    {
+      "epoch": 0.49018067556952083,
+      "grad_norm": 0.02490234375,
+      "learning_rate": 0.0001,
+      "loss": 0.0217,
+      "step": 195
+    },
+    {
+      "epoch": 0.5027494108405341,
+      "grad_norm": 0.006866455078125,
+      "learning_rate": 0.0001,
+      "loss": 0.0073,
+      "step": 200
+    },
+    {
+      "epoch": 0.5153181461115475,
+      "grad_norm": 0.04443359375,
+      "learning_rate": 0.0001,
+      "loss": 0.1655,
+      "step": 205
+    },
+    {
+      "epoch": 0.5278868813825609,
+      "grad_norm": 0.056640625,
+      "learning_rate": 0.0001,
+      "loss": 0.051,
+      "step": 210
+    },
+    {
+      "epoch": 0.5404556166535742,
+      "grad_norm": 0.026123046875,
+      "learning_rate": 0.0001,
+      "loss": 0.0393,
+      "step": 215
+    },
+    {
+      "epoch": 0.5530243519245875,
+      "grad_norm": 0.01806640625,
+      "learning_rate": 0.0001,
+      "loss": 0.0255,
+      "step": 220
+    },
+    {
+      "epoch": 0.565593087195601,
+      "grad_norm": 0.0230712890625,
+      "learning_rate": 0.0001,
+      "loss": 0.0333,
+      "step": 225
+    },
+    {
+      "epoch": 0.5781618224666143,
+      "grad_norm": 0.024658203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0289,
+      "step": 230
+    },
+    {
+      "epoch": 0.5907305577376276,
+      "grad_norm": 0.0301513671875,
+      "learning_rate": 0.0001,
+      "loss": 0.0235,
+      "step": 235
+    },
+    {
+      "epoch": 0.603299293008641,
+      "grad_norm": 0.0284423828125,
+      "learning_rate": 0.0001,
+      "loss": 0.0208,
+      "step": 240
+    },
+    {
+      "epoch": 0.6158680282796544,
+      "grad_norm": 0.025634765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0119,
+      "step": 245
+    },
+    {
+      "epoch": 0.6284367635506677,
+      "grad_norm": 0.0125732421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0093,
+      "step": 250
+    },
+    {
+      "epoch": 0.6410054988216811,
+      "grad_norm": 0.051025390625,
+      "learning_rate": 0.0001,
+      "loss": 0.1598,
+      "step": 255
+    },
+    {
+      "epoch": 0.6535742340926944,
+      "grad_norm": 0.0546875,
+      "learning_rate": 0.0001,
+      "loss": 0.0457,
+      "step": 260
+    },
+    {
+      "epoch": 0.6661429693637078,
+      "grad_norm": 0.03564453125,
+      "learning_rate": 0.0001,
+      "loss": 0.0352,
+      "step": 265
+    },
+    {
+      "epoch": 0.6787117046347211,
+      "grad_norm": 0.019775390625,
+      "learning_rate": 0.0001,
+      "loss": 0.024,
+      "step": 270
+    },
+    {
+      "epoch": 0.6912804399057345,
+      "grad_norm": 0.0234375,
+      "learning_rate": 0.0001,
+      "loss": 0.0296,
+      "step": 275
+    },
+    {
+      "epoch": 0.7038491751767478,
+      "grad_norm": 0.0264892578125,
+      "learning_rate": 0.0001,
+      "loss": 0.0249,
+      "step": 280
+    },
+    {
+      "epoch": 0.7164179104477612,
+      "grad_norm": 0.029541015625,
+      "learning_rate": 0.0001,
+      "loss": 0.0199,
+      "step": 285
+    },
+    {
+      "epoch": 0.7289866457187746,
+      "grad_norm": 0.02294921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0154,
+      "step": 290
+    },
+    {
+      "epoch": 0.7415553809897879,
+      "grad_norm": 0.0220947265625,
+      "learning_rate": 0.0001,
+      "loss": 0.0116,
+      "step": 295
+    },
+    {
+      "epoch": 0.7541241162608012,
+      "grad_norm": 0.00531005859375,
+      "learning_rate": 0.0001,
+      "loss": 0.0058,
+      "step": 300
+    },
+    {
+      "epoch": 0.7666928515318147,
+      "grad_norm": 0.049560546875,
+      "learning_rate": 0.0001,
+      "loss": 0.1521,
+      "step": 305
+    },
+    {
+      "epoch": 0.779261586802828,
+      "grad_norm": 0.140625,
+      "learning_rate": 0.0001,
+      "loss": 0.0482,
+      "step": 310
+    },
+    {
+      "epoch": 0.7918303220738413,
+      "grad_norm": 0.035888671875,
+      "learning_rate": 0.0001,
+      "loss": 0.0372,
+      "step": 315
+    },
+    {
+      "epoch": 0.8043990573448547,
+      "grad_norm": 0.036865234375,
+      "learning_rate": 0.0001,
+      "loss": 0.0244,
+      "step": 320
+    },
+    {
+      "epoch": 0.816967792615868,
+      "grad_norm": 0.030517578125,
+      "learning_rate": 0.0001,
+      "loss": 0.0263,
+      "step": 325
+    },
+    {
+      "epoch": 0.8295365278868814,
+      "grad_norm": 0.024169921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0218,
+      "step": 330
+    },
+    {
+      "epoch": 0.8421052631578947,
+      "grad_norm": 0.0308837890625,
+      "learning_rate": 0.0001,
+      "loss": 0.0182,
+      "step": 335
+    },
+    {
+      "epoch": 0.8546739984289081,
+      "grad_norm": 0.02880859375,
+      "learning_rate": 0.0001,
+      "loss": 0.014,
+      "step": 340
+    },
+    {
+      "epoch": 0.8672427336999214,
+      "grad_norm": 0.03173828125,
+      "learning_rate": 0.0001,
+      "loss": 0.0109,
+      "step": 345
+    },
+    {
+      "epoch": 0.8798114689709348,
+      "grad_norm": 0.01483154296875,
+      "learning_rate": 0.0001,
+      "loss": 0.0044,
+      "step": 350
+    },
+    {
+      "epoch": 0.8923802042419482,
+      "grad_norm": 0.03955078125,
+      "learning_rate": 0.0001,
+      "loss": 0.1312,
+      "step": 355
+    },
+    {
+      "epoch": 0.9049489395129615,
+      "grad_norm": 0.031982421875,
+      "learning_rate": 0.0001,
+      "loss": 0.0403,
+      "step": 360
+    },
+    {
+      "epoch": 0.9049489395129615,
+      "step": 360,
+      "total_flos": 1.6220305320330854e+18,
+      "train_loss": 0.07024859038905965,
+      "train_runtime": 50041.181,
+      "train_samples_per_second": 0.921,
+      "train_steps_per_second": 0.007
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 360,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 90,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.6220305320330854e+18,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/codellama/java/dataflow_pretrained/all_results.json b/codellama/java/dataflow_pretrained/all_results.json
deleted file mode 100644
index 64bfe2710811ec1b306126c677d1dd39ec762ea4..0000000000000000000000000000000000000000
--- a/codellama/java/dataflow_pretrained/all_results.json
+++ /dev/null
@@ -1,8 +0,0 @@
-{
-    "epoch": 0.905020032995522,
-    "total_flos": 1.5364568007927398e+18,
-    "train_loss": 0.11899957797593541,
-    "train_runtime": 69215.1765,
-    "train_samples_per_second": 0.666,
-    "train_steps_per_second": 0.01
-}
\ No newline at end of file
diff --git a/codellama/java/dataflow_pretrained/checkpoint-720/adapter_model.safetensors b/codellama/java/dataflow_pretrained/checkpoint-720/adapter_model.safetensors
deleted file mode 100644
index 2cfdf7bed0df57ec1c9f14be31ccdc570473e0ee..0000000000000000000000000000000000000000
--- a/codellama/java/dataflow_pretrained/checkpoint-720/adapter_model.safetensors
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:3cf8f1c4cc300ca5094e08295cc0dcffacce527b464e1372de75271bb4d522a9
-size 1156480200
diff --git a/codellama/java/dataflow_pretrained/checkpoint-720/adapter_model/adapter_model.safetensors b/codellama/java/dataflow_pretrained/checkpoint-720/adapter_model/adapter_model.safetensors
deleted file mode 100644
index 2cfdf7bed0df57ec1c9f14be31ccdc570473e0ee..0000000000000000000000000000000000000000
--- a/codellama/java/dataflow_pretrained/checkpoint-720/adapter_model/adapter_model.safetensors
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:3cf8f1c4cc300ca5094e08295cc0dcffacce527b464e1372de75271bb4d522a9
-size 1156480200
diff --git a/codellama/java/dataflow_pretrained/checkpoint-720/optimizer.pt b/codellama/java/dataflow_pretrained/checkpoint-720/optimizer.pt
deleted file mode 100644
index 2865a6353c416caaf540a8718eeace538916ccc1..0000000000000000000000000000000000000000
--- a/codellama/java/dataflow_pretrained/checkpoint-720/optimizer.pt
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:31c4a8fb04732973611d06dc14c79dd69c2644d9167a680a4d9760a3cdc9059d
-size 2003127538
diff --git a/codellama/java/dataflow_pretrained/checkpoint-720/scheduler.pt b/codellama/java/dataflow_pretrained/checkpoint-720/scheduler.pt
deleted file mode 100644
index f14a4ae58aa46cb66c004b49dfe361461655b55b..0000000000000000000000000000000000000000
--- a/codellama/java/dataflow_pretrained/checkpoint-720/scheduler.pt
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c48ea2f606cbbb6177c782dd71ba690a6d43d7f02de58760a50cf5c03d3d9324
-size 1064
diff --git a/codellama/java/dataflow_pretrained/checkpoint-720/trainer_state.json b/codellama/java/dataflow_pretrained/checkpoint-720/trainer_state.json
deleted file mode 100644
index 3c48eb6bf4df5be4d6ad6819fc6cea34ab2deca1..0000000000000000000000000000000000000000
--- a/codellama/java/dataflow_pretrained/checkpoint-720/trainer_state.json
+++ /dev/null
@@ -1,1041 +0,0 @@
-{
-  "best_metric": null,
-  "best_model_checkpoint": null,
-  "epoch": 0.905020032995522,
-  "eval_steps": 500,
-  "global_step": 720,
-  "is_hyper_param_search": false,
-  "is_local_process_zero": true,
-  "is_world_process_zero": true,
-  "log_history": [
-    {
-      "epoch": 0.006284861340246681,
-      "grad_norm": 0.0712890625,
-      "learning_rate": 0.0001,
-      "loss": 0.7884,
-      "step": 5
-    },
-    {
-      "epoch": 0.012569722680493361,
-      "grad_norm": 0.1318359375,
-      "learning_rate": 0.0001,
-      "loss": 0.5229,
-      "step": 10
-    },
-    {
-      "epoch": 0.018854584020740042,
-      "grad_norm": 0.0927734375,
-      "learning_rate": 0.0001,
-      "loss": 0.3535,
-      "step": 15
-    },
-    {
-      "epoch": 0.025139445360986723,
-      "grad_norm": 0.08251953125,
-      "learning_rate": 0.0001,
-      "loss": 0.2525,
-      "step": 20
-    },
-    {
-      "epoch": 0.031424306701233404,
-      "grad_norm": 0.0751953125,
-      "learning_rate": 0.0001,
-      "loss": 0.229,
-      "step": 25
-    },
-    {
-      "epoch": 0.037709168041480084,
-      "grad_norm": 0.10888671875,
-      "learning_rate": 0.0001,
-      "loss": 0.204,
-      "step": 30
-    },
-    {
-      "epoch": 0.043994029381726765,
-      "grad_norm": 0.0927734375,
-      "learning_rate": 0.0001,
-      "loss": 0.1598,
-      "step": 35
-    },
-    {
-      "epoch": 0.050278890721973446,
-      "grad_norm": 0.06494140625,
-      "learning_rate": 0.0001,
-      "loss": 0.1241,
-      "step": 40
-    },
-    {
-      "epoch": 0.05656375206222013,
-      "grad_norm": 0.059814453125,
-      "learning_rate": 0.0001,
-      "loss": 0.1026,
-      "step": 45
-    },
-    {
-      "epoch": 0.06284861340246681,
-      "grad_norm": 0.2265625,
-      "learning_rate": 0.0001,
-      "loss": 0.0843,
-      "step": 50
-    },
-    {
-      "epoch": 0.06913347474271349,
-      "grad_norm": 0.08349609375,
-      "learning_rate": 0.0001,
-      "loss": 0.5241,
-      "step": 55
-    },
-    {
-      "epoch": 0.07541833608296017,
-      "grad_norm": 0.07958984375,
-      "learning_rate": 0.0001,
-      "loss": 0.1898,
-      "step": 60
-    },
-    {
-      "epoch": 0.08170319742320685,
-      "grad_norm": 0.052490234375,
-      "learning_rate": 0.0001,
-      "loss": 0.1542,
-      "step": 65
-    },
-    {
-      "epoch": 0.08798805876345353,
-      "grad_norm": 0.0546875,
-      "learning_rate": 0.0001,
-      "loss": 0.1152,
-      "step": 70
-    },
-    {
-      "epoch": 0.09427292010370021,
-      "grad_norm": 0.058349609375,
-      "learning_rate": 0.0001,
-      "loss": 0.1399,
-      "step": 75
-    },
-    {
-      "epoch": 0.10055778144394689,
-      "grad_norm": 0.04052734375,
-      "learning_rate": 0.0001,
-      "loss": 0.1282,
-      "step": 80
-    },
-    {
-      "epoch": 0.10684264278419357,
-      "grad_norm": 0.044189453125,
-      "learning_rate": 0.0001,
-      "loss": 0.1135,
-      "step": 85
-    },
-    {
-      "epoch": 0.11312750412444025,
-      "grad_norm": 0.037109375,
-      "learning_rate": 0.0001,
-      "loss": 0.0923,
-      "step": 90
-    },
-    {
-      "epoch": 0.11941236546468693,
-      "grad_norm": 0.050048828125,
-      "learning_rate": 0.0001,
-      "loss": 0.0895,
-      "step": 95
-    },
-    {
-      "epoch": 0.12569722680493361,
-      "grad_norm": 0.064453125,
-      "learning_rate": 0.0001,
-      "loss": 0.0574,
-      "step": 100
-    },
-    {
-      "epoch": 0.1319820881451803,
-      "grad_norm": 0.0625,
-      "learning_rate": 0.0001,
-      "loss": 0.3794,
-      "step": 105
-    },
-    {
-      "epoch": 0.13826694948542698,
-      "grad_norm": 0.04443359375,
-      "learning_rate": 0.0001,
-      "loss": 0.1638,
-      "step": 110
-    },
-    {
-      "epoch": 0.14455181082567367,
-      "grad_norm": 0.04931640625,
-      "learning_rate": 0.0001,
-      "loss": 0.1154,
-      "step": 115
-    },
-    {
-      "epoch": 0.15083667216592034,
-      "grad_norm": 0.04931640625,
-      "learning_rate": 0.0001,
-      "loss": 0.0967,
-      "step": 120
-    },
-    {
-      "epoch": 0.15712153350616703,
-      "grad_norm": 0.04248046875,
-      "learning_rate": 0.0001,
-      "loss": 0.1275,
-      "step": 125
-    },
-    {
-      "epoch": 0.1634063948464137,
-      "grad_norm": 0.03759765625,
-      "learning_rate": 0.0001,
-      "loss": 0.11,
-      "step": 130
-    },
-    {
-      "epoch": 0.1696912561866604,
-      "grad_norm": 0.039794921875,
-      "learning_rate": 0.0001,
-      "loss": 0.0986,
-      "step": 135
-    },
-    {
-      "epoch": 0.17597611752690706,
-      "grad_norm": 0.042724609375,
-      "learning_rate": 0.0001,
-      "loss": 0.082,
-      "step": 140
-    },
-    {
-      "epoch": 0.18226097886715376,
-      "grad_norm": 0.049072265625,
-      "learning_rate": 0.0001,
-      "loss": 0.0729,
-      "step": 145
-    },
-    {
-      "epoch": 0.18854584020740042,
-      "grad_norm": 0.044189453125,
-      "learning_rate": 0.0001,
-      "loss": 0.0468,
-      "step": 150
-    },
-    {
-      "epoch": 0.19483070154764712,
-      "grad_norm": 0.06982421875,
-      "learning_rate": 0.0001,
-      "loss": 0.3499,
-      "step": 155
-    },
-    {
-      "epoch": 0.20111556288789378,
-      "grad_norm": 0.054931640625,
-      "learning_rate": 0.0001,
-      "loss": 0.1535,
-      "step": 160
-    },
-    {
-      "epoch": 0.20740042422814048,
-      "grad_norm": 0.045166015625,
-      "learning_rate": 0.0001,
-      "loss": 0.1166,
-      "step": 165
-    },
-    {
-      "epoch": 0.21368528556838715,
-      "grad_norm": 0.047119140625,
-      "learning_rate": 0.0001,
-      "loss": 0.0816,
-      "step": 170
-    },
-    {
-      "epoch": 0.21997014690863384,
-      "grad_norm": 0.0634765625,
-      "learning_rate": 0.0001,
-      "loss": 0.1164,
-      "step": 175
-    },
-    {
-      "epoch": 0.2262550082488805,
-      "grad_norm": 0.0478515625,
-      "learning_rate": 0.0001,
-      "loss": 0.1004,
-      "step": 180
-    },
-    {
-      "epoch": 0.2325398695891272,
-      "grad_norm": 0.06103515625,
-      "learning_rate": 0.0001,
-      "loss": 0.092,
-      "step": 185
-    },
-    {
-      "epoch": 0.23882473092937387,
-      "grad_norm": 0.0458984375,
-      "learning_rate": 0.0001,
-      "loss": 0.0815,
-      "step": 190
-    },
-    {
-      "epoch": 0.24510959226962056,
-      "grad_norm": 0.05419921875,
-      "learning_rate": 0.0001,
-      "loss": 0.0708,
-      "step": 195
-    },
-    {
-      "epoch": 0.25139445360986723,
-      "grad_norm": 0.07763671875,
-      "learning_rate": 0.0001,
-      "loss": 0.0425,
-      "step": 200
-    },
-    {
-      "epoch": 0.2576793149501139,
-      "grad_norm": 0.11376953125,
-      "learning_rate": 0.0001,
-      "loss": 0.3435,
-      "step": 205
-    },
-    {
-      "epoch": 0.2639641762903606,
-      "grad_norm": 0.057373046875,
-      "learning_rate": 0.0001,
-      "loss": 0.1445,
-      "step": 210
-    },
-    {
-      "epoch": 0.27024903763060726,
-      "grad_norm": 0.03759765625,
-      "learning_rate": 0.0001,
-      "loss": 0.1052,
-      "step": 215
-    },
-    {
-      "epoch": 0.27653389897085395,
-      "grad_norm": 0.03515625,
-      "learning_rate": 0.0001,
-      "loss": 0.0789,
-      "step": 220
-    },
-    {
-      "epoch": 0.28281876031110065,
-      "grad_norm": 0.043212890625,
-      "learning_rate": 0.0001,
-      "loss": 0.1068,
-      "step": 225
-    },
-    {
-      "epoch": 0.28910362165134734,
-      "grad_norm": 0.043212890625,
-      "learning_rate": 0.0001,
-      "loss": 0.0958,
-      "step": 230
-    },
-    {
-      "epoch": 0.295388482991594,
-      "grad_norm": 0.04052734375,
-      "learning_rate": 0.0001,
-      "loss": 0.0817,
-      "step": 235
-    },
-    {
-      "epoch": 0.3016733443318407,
-      "grad_norm": 0.06494140625,
-      "learning_rate": 0.0001,
-      "loss": 0.07,
-      "step": 240
-    },
-    {
-      "epoch": 0.30795820567208737,
-      "grad_norm": 0.047607421875,
-      "learning_rate": 0.0001,
-      "loss": 0.0596,
-      "step": 245
-    },
-    {
-      "epoch": 0.31424306701233407,
-      "grad_norm": 0.06884765625,
-      "learning_rate": 0.0001,
-      "loss": 0.0357,
-      "step": 250
-    },
-    {
-      "epoch": 0.3205279283525807,
-      "grad_norm": 0.08154296875,
-      "learning_rate": 0.0001,
-      "loss": 0.3339,
-      "step": 255
-    },
-    {
-      "epoch": 0.3268127896928274,
-      "grad_norm": 0.048583984375,
-      "learning_rate": 0.0001,
-      "loss": 0.1467,
-      "step": 260
-    },
-    {
-      "epoch": 0.3330976510330741,
-      "grad_norm": 0.035400390625,
-      "learning_rate": 0.0001,
-      "loss": 0.1058,
-      "step": 265
-    },
-    {
-      "epoch": 0.3393825123733208,
-      "grad_norm": 0.034423828125,
-      "learning_rate": 0.0001,
-      "loss": 0.0701,
-      "step": 270
-    },
-    {
-      "epoch": 0.3456673737135674,
-      "grad_norm": 0.04443359375,
-      "learning_rate": 0.0001,
-      "loss": 0.1052,
-      "step": 275
-    },
-    {
-      "epoch": 0.3519522350538141,
-      "grad_norm": 0.047119140625,
-      "learning_rate": 0.0001,
-      "loss": 0.0958,
-      "step": 280
-    },
-    {
-      "epoch": 0.3582370963940608,
-      "grad_norm": 0.033447265625,
-      "learning_rate": 0.0001,
-      "loss": 0.0784,
-      "step": 285
-    },
-    {
-      "epoch": 0.3645219577343075,
-      "grad_norm": 0.051025390625,
-      "learning_rate": 0.0001,
-      "loss": 0.0671,
-      "step": 290
-    },
-    {
-      "epoch": 0.37080681907455415,
-      "grad_norm": 0.0673828125,
-      "learning_rate": 0.0001,
-      "loss": 0.0517,
-      "step": 295
-    },
-    {
-      "epoch": 0.37709168041480084,
-      "grad_norm": 0.08203125,
-      "learning_rate": 0.0001,
-      "loss": 0.0368,
-      "step": 300
-    },
-    {
-      "epoch": 0.38337654175504754,
-      "grad_norm": 0.06201171875,
-      "learning_rate": 0.0001,
-      "loss": 0.3533,
-      "step": 305
-    },
-    {
-      "epoch": 0.38966140309529423,
-      "grad_norm": 0.055419921875,
-      "learning_rate": 0.0001,
-      "loss": 0.1495,
-      "step": 310
-    },
-    {
-      "epoch": 0.3959462644355409,
-      "grad_norm": 0.044189453125,
-      "learning_rate": 0.0001,
-      "loss": 0.0914,
-      "step": 315
-    },
-    {
-      "epoch": 0.40223112577578757,
-      "grad_norm": 0.042236328125,
-      "learning_rate": 0.0001,
-      "loss": 0.0759,
-      "step": 320
-    },
-    {
-      "epoch": 0.40851598711603426,
-      "grad_norm": 0.0439453125,
-      "learning_rate": 0.0001,
-      "loss": 0.0956,
-      "step": 325
-    },
-    {
-      "epoch": 0.41480084845628096,
-      "grad_norm": 0.042724609375,
-      "learning_rate": 0.0001,
-      "loss": 0.0874,
-      "step": 330
-    },
-    {
-      "epoch": 0.4210857097965276,
-      "grad_norm": 0.045166015625,
-      "learning_rate": 0.0001,
-      "loss": 0.0697,
-      "step": 335
-    },
-    {
-      "epoch": 0.4273705711367743,
-      "grad_norm": 0.140625,
-      "learning_rate": 0.0001,
-      "loss": 0.0647,
-      "step": 340
-    },
-    {
-      "epoch": 0.433655432477021,
-      "grad_norm": 0.0439453125,
-      "learning_rate": 0.0001,
-      "loss": 0.0538,
-      "step": 345
-    },
-    {
-      "epoch": 0.4399402938172677,
-      "grad_norm": 0.05029296875,
-      "learning_rate": 0.0001,
-      "loss": 0.0348,
-      "step": 350
-    },
-    {
-      "epoch": 0.4462251551575143,
-      "grad_norm": 0.0556640625,
-      "learning_rate": 0.0001,
-      "loss": 0.3265,
-      "step": 355
-    },
-    {
-      "epoch": 0.452510016497761,
-      "grad_norm": 0.06982421875,
-      "learning_rate": 0.0001,
-      "loss": 0.1376,
-      "step": 360
-    },
-    {
-      "epoch": 0.4587948778380077,
-      "grad_norm": 0.034423828125,
-      "learning_rate": 0.0001,
-      "loss": 0.0982,
-      "step": 365
-    },
-    {
-      "epoch": 0.4650797391782544,
-      "grad_norm": 0.042236328125,
-      "learning_rate": 0.0001,
-      "loss": 0.0813,
-      "step": 370
-    },
-    {
-      "epoch": 0.47136460051850104,
-      "grad_norm": 0.040771484375,
-      "learning_rate": 0.0001,
-      "loss": 0.0947,
-      "step": 375
-    },
-    {
-      "epoch": 0.47764946185874774,
-      "grad_norm": 0.0400390625,
-      "learning_rate": 0.0001,
-      "loss": 0.0847,
-      "step": 380
-    },
-    {
-      "epoch": 0.48393432319899443,
-      "grad_norm": 0.0419921875,
-      "learning_rate": 0.0001,
-      "loss": 0.0738,
-      "step": 385
-    },
-    {
-      "epoch": 0.4902191845392411,
-      "grad_norm": 0.043701171875,
-      "learning_rate": 0.0001,
-      "loss": 0.062,
-      "step": 390
-    },
-    {
-      "epoch": 0.49650404587948777,
-      "grad_norm": 0.08251953125,
-      "learning_rate": 0.0001,
-      "loss": 0.0558,
-      "step": 395
-    },
-    {
-      "epoch": 0.5027889072197345,
-      "grad_norm": 0.040771484375,
-      "learning_rate": 0.0001,
-      "loss": 0.0327,
-      "step": 400
-    },
-    {
-      "epoch": 0.5090737685599811,
-      "grad_norm": 0.062255859375,
-      "learning_rate": 0.0001,
-      "loss": 0.3109,
-      "step": 405
-    },
-    {
-      "epoch": 0.5153586299002278,
-      "grad_norm": 0.06689453125,
-      "learning_rate": 0.0001,
-      "loss": 0.1447,
-      "step": 410
-    },
-    {
-      "epoch": 0.5216434912404745,
-      "grad_norm": 0.033935546875,
-      "learning_rate": 0.0001,
-      "loss": 0.0943,
-      "step": 415
-    },
-    {
-      "epoch": 0.5279283525807212,
-      "grad_norm": 0.037353515625,
-      "learning_rate": 0.0001,
-      "loss": 0.0724,
-      "step": 420
-    },
-    {
-      "epoch": 0.5342132139209679,
-      "grad_norm": 0.03466796875,
-      "learning_rate": 0.0001,
-      "loss": 0.1063,
-      "step": 425
-    },
-    {
-      "epoch": 0.5404980752612145,
-      "grad_norm": 0.068359375,
-      "learning_rate": 0.0001,
-      "loss": 0.0855,
-      "step": 430
-    },
-    {
-      "epoch": 0.5467829366014613,
-      "grad_norm": 0.044677734375,
-      "learning_rate": 0.0001,
-      "loss": 0.076,
-      "step": 435
-    },
-    {
-      "epoch": 0.5530677979417079,
-      "grad_norm": 0.04638671875,
-      "learning_rate": 0.0001,
-      "loss": 0.0608,
-      "step": 440
-    },
-    {
-      "epoch": 0.5593526592819545,
-      "grad_norm": 0.03515625,
-      "learning_rate": 0.0001,
-      "loss": 0.0506,
-      "step": 445
-    },
-    {
-      "epoch": 0.5656375206222013,
-      "grad_norm": 0.02099609375,
-      "learning_rate": 0.0001,
-      "loss": 0.0336,
-      "step": 450
-    },
-    {
-      "epoch": 0.5719223819624479,
-      "grad_norm": 0.059326171875,
-      "learning_rate": 0.0001,
-      "loss": 0.2604,
-      "step": 455
-    },
-    {
-      "epoch": 0.5782072433026947,
-      "grad_norm": 0.07470703125,
-      "learning_rate": 0.0001,
-      "loss": 0.1273,
-      "step": 460
-    },
-    {
-      "epoch": 0.5844921046429413,
-      "grad_norm": 0.054931640625,
-      "learning_rate": 0.0001,
-      "loss": 0.094,
-      "step": 465
-    },
-    {
-      "epoch": 0.590776965983188,
-      "grad_norm": 0.021240234375,
-      "learning_rate": 0.0001,
-      "loss": 0.0642,
-      "step": 470
-    },
-    {
-      "epoch": 0.5970618273234347,
-      "grad_norm": 0.032958984375,
-      "learning_rate": 0.0001,
-      "loss": 0.0914,
-      "step": 475
-    },
-    {
-      "epoch": 0.6033466886636814,
-      "grad_norm": 0.0400390625,
-      "learning_rate": 0.0001,
-      "loss": 0.08,
-      "step": 480
-    },
-    {
-      "epoch": 0.609631550003928,
-      "grad_norm": 0.046875,
-      "learning_rate": 0.0001,
-      "loss": 0.0709,
-      "step": 485
-    },
-    {
-      "epoch": 0.6159164113441747,
-      "grad_norm": 0.048828125,
-      "learning_rate": 0.0001,
-      "loss": 0.0588,
-      "step": 490
-    },
-    {
-      "epoch": 0.6222012726844214,
-      "grad_norm": 0.056884765625,
-      "learning_rate": 0.0001,
-      "loss": 0.0417,
-      "step": 495
-    },
-    {
-      "epoch": 0.6284861340246681,
-      "grad_norm": 0.041259765625,
-      "learning_rate": 0.0001,
-      "loss": 0.0281,
-      "step": 500
-    },
-    {
-      "epoch": 0.6347709953649148,
-      "grad_norm": 0.064453125,
-      "learning_rate": 0.0001,
-      "loss": 0.2518,
-      "step": 505
-    },
-    {
-      "epoch": 0.6410558567051614,
-      "grad_norm": 0.058837890625,
-      "learning_rate": 0.0001,
-      "loss": 0.1275,
-      "step": 510
-    },
-    {
-      "epoch": 0.6473407180454082,
-      "grad_norm": 0.034912109375,
-      "learning_rate": 0.0001,
-      "loss": 0.086,
-      "step": 515
-    },
-    {
-      "epoch": 0.6536255793856548,
-      "grad_norm": 0.042236328125,
-      "learning_rate": 0.0001,
-      "loss": 0.0677,
-      "step": 520
-    },
-    {
-      "epoch": 0.6599104407259014,
-      "grad_norm": 0.03369140625,
-      "learning_rate": 0.0001,
-      "loss": 0.0934,
-      "step": 525
-    },
-    {
-      "epoch": 0.6661953020661482,
-      "grad_norm": 0.040771484375,
-      "learning_rate": 0.0001,
-      "loss": 0.0781,
-      "step": 530
-    },
-    {
-      "epoch": 0.6724801634063948,
-      "grad_norm": 0.041748046875,
-      "learning_rate": 0.0001,
-      "loss": 0.0638,
-      "step": 535
-    },
-    {
-      "epoch": 0.6787650247466416,
-      "grad_norm": 0.035888671875,
-      "learning_rate": 0.0001,
-      "loss": 0.0543,
-      "step": 540
-    },
-    {
-      "epoch": 0.6850498860868882,
-      "grad_norm": 0.0341796875,
-      "learning_rate": 0.0001,
-      "loss": 0.0428,
-      "step": 545
-    },
-    {
-      "epoch": 0.6913347474271349,
-      "grad_norm": 0.03271484375,
-      "learning_rate": 0.0001,
-      "loss": 0.0278,
-      "step": 550
-    },
-    {
-      "epoch": 0.6976196087673816,
-      "grad_norm": 0.055419921875,
-      "learning_rate": 0.0001,
-      "loss": 0.2516,
-      "step": 555
-    },
-    {
-      "epoch": 0.7039044701076282,
-      "grad_norm": 0.0634765625,
-      "learning_rate": 0.0001,
-      "loss": 0.1206,
-      "step": 560
-    },
-    {
-      "epoch": 0.7101893314478749,
-      "grad_norm": 0.038818359375,
-      "learning_rate": 0.0001,
-      "loss": 0.0805,
-      "step": 565
-    },
-    {
-      "epoch": 0.7164741927881216,
-      "grad_norm": 0.036865234375,
-      "learning_rate": 0.0001,
-      "loss": 0.0648,
-      "step": 570
-    },
-    {
-      "epoch": 0.7227590541283683,
-      "grad_norm": 0.03857421875,
-      "learning_rate": 0.0001,
-      "loss": 0.0835,
-      "step": 575
-    },
-    {
-      "epoch": 0.729043915468615,
-      "grad_norm": 0.041748046875,
-      "learning_rate": 0.0001,
-      "loss": 0.0773,
-      "step": 580
-    },
-    {
-      "epoch": 0.7353287768088617,
-      "grad_norm": 0.04443359375,
-      "learning_rate": 0.0001,
-      "loss": 0.0609,
-      "step": 585
-    },
-    {
-      "epoch": 0.7416136381491083,
-      "grad_norm": 0.05224609375,
-      "learning_rate": 0.0001,
-      "loss": 0.0516,
-      "step": 590
-    },
-    {
-      "epoch": 0.747898499489355,
-      "grad_norm": 0.1240234375,
-      "learning_rate": 0.0001,
-      "loss": 0.0416,
-      "step": 595
-    },
-    {
-      "epoch": 0.7541833608296017,
-      "grad_norm": 0.0206298828125,
-      "learning_rate": 0.0001,
-      "loss": 0.0298,
-      "step": 600
-    },
-    {
-      "epoch": 0.7604682221698483,
-      "grad_norm": 0.07080078125,
-      "learning_rate": 0.0001,
-      "loss": 0.243,
-      "step": 605
-    },
-    {
-      "epoch": 0.7667530835100951,
-      "grad_norm": 0.06494140625,
-      "learning_rate": 0.0001,
-      "loss": 0.1263,
-      "step": 610
-    },
-    {
-      "epoch": 0.7730379448503417,
-      "grad_norm": 0.0537109375,
-      "learning_rate": 0.0001,
-      "loss": 0.088,
-      "step": 615
-    },
-    {
-      "epoch": 0.7793228061905885,
-      "grad_norm": 0.03515625,
-      "learning_rate": 0.0001,
-      "loss": 0.0559,
-      "step": 620
-    },
-    {
-      "epoch": 0.7856076675308351,
-      "grad_norm": 0.047607421875,
-      "learning_rate": 0.0001,
-      "loss": 0.0853,
-      "step": 625
-    },
-    {
-      "epoch": 0.7918925288710817,
-      "grad_norm": 0.0419921875,
-      "learning_rate": 0.0001,
-      "loss": 0.0715,
-      "step": 630
-    },
-    {
-      "epoch": 0.7981773902113285,
-      "grad_norm": 0.0927734375,
-      "learning_rate": 0.0001,
-      "loss": 0.0598,
-      "step": 635
-    },
-    {
-      "epoch": 0.8044622515515751,
-      "grad_norm": 0.0419921875,
-      "learning_rate": 0.0001,
-      "loss": 0.0466,
-      "step": 640
-    },
-    {
-      "epoch": 0.8107471128918218,
-      "grad_norm": 0.043701171875,
-      "learning_rate": 0.0001,
-      "loss": 0.0404,
-      "step": 645
-    },
-    {
-      "epoch": 0.8170319742320685,
-      "grad_norm": 0.033935546875,
-      "learning_rate": 0.0001,
-      "loss": 0.0315,
-      "step": 650
-    },
-    {
-      "epoch": 0.8233168355723152,
-      "grad_norm": 0.08251953125,
-      "learning_rate": 0.0001,
-      "loss": 0.2336,
-      "step": 655
-    },
-    {
-      "epoch": 0.8296016969125619,
-      "grad_norm": 0.053955078125,
-      "learning_rate": 0.0001,
-      "loss": 0.1183,
-      "step": 660
-    },
-    {
-      "epoch": 0.8358865582528086,
-      "grad_norm": 0.03759765625,
-      "learning_rate": 0.0001,
-      "loss": 0.0826,
-      "step": 665
-    },
-    {
-      "epoch": 0.8421714195930552,
-      "grad_norm": 0.046142578125,
-      "learning_rate": 0.0001,
-      "loss": 0.0657,
-      "step": 670
-    },
-    {
-      "epoch": 0.8484562809333019,
-      "grad_norm": 0.04248046875,
-      "learning_rate": 0.0001,
-      "loss": 0.0845,
-      "step": 675
-    },
-    {
-      "epoch": 0.8547411422735486,
-      "grad_norm": 0.048828125,
-      "learning_rate": 0.0001,
-      "loss": 0.0663,
-      "step": 680
-    },
-    {
-      "epoch": 0.8610260036137952,
-      "grad_norm": 0.0625,
-      "learning_rate": 0.0001,
-      "loss": 0.0565,
-      "step": 685
-    },
-    {
-      "epoch": 0.867310864954042,
-      "grad_norm": 0.05810546875,
-      "learning_rate": 0.0001,
-      "loss": 0.0486,
-      "step": 690
-    },
-    {
-      "epoch": 0.8735957262942886,
-      "grad_norm": 0.054931640625,
-      "learning_rate": 0.0001,
-      "loss": 0.0397,
-      "step": 695
-    },
-    {
-      "epoch": 0.8798805876345354,
-      "grad_norm": 0.037353515625,
-      "learning_rate": 0.0001,
-      "loss": 0.0268,
-      "step": 700
-    },
-    {
-      "epoch": 0.886165448974782,
-      "grad_norm": 0.09521484375,
-      "learning_rate": 0.0001,
-      "loss": 0.2371,
-      "step": 705
-    },
-    {
-      "epoch": 0.8924503103150286,
-      "grad_norm": 0.06494140625,
-      "learning_rate": 0.0001,
-      "loss": 0.1144,
-      "step": 710
-    },
-    {
-      "epoch": 0.8987351716552754,
-      "grad_norm": 0.041748046875,
-      "learning_rate": 0.0001,
-      "loss": 0.0906,
-      "step": 715
-    },
-    {
-      "epoch": 0.905020032995522,
-      "grad_norm": 0.033203125,
-      "learning_rate": 0.0001,
-      "loss": 0.0549,
-      "step": 720
-    }
-  ],
-  "logging_steps": 5,
-  "max_steps": 720,
-  "num_input_tokens_seen": 0,
-  "num_train_epochs": 1,
-  "save_steps": 90,
-  "stateful_callbacks": {
-    "TrainerControl": {
-      "args": {
-        "should_epoch_stop": false,
-        "should_evaluate": false,
-        "should_log": false,
-        "should_save": true,
-        "should_training_stop": true
-      },
-      "attributes": {}
-    }
-  },
-  "total_flos": 1.5364568007927398e+18,
-  "train_batch_size": 4,
-  "trial_name": null,
-  "trial_params": null
-}
diff --git a/codellama/java/dataflow_pretrained/checkpoint-720/training_args.bin b/codellama/java/dataflow_pretrained/checkpoint-720/training_args.bin
deleted file mode 100644
index 63447c8ef1abaa098f00b023ed64c96e71210d61..0000000000000000000000000000000000000000
--- a/codellama/java/dataflow_pretrained/checkpoint-720/training_args.bin
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:824c938bee04d46c16bd5438c177873620e56e36a6e51c3a35b2b80c6e87b25b
-size 7416
diff --git a/codellama/java/dataflow_pretrained/metrics.json b/codellama/java/dataflow_pretrained/metrics.json
deleted file mode 100644
index 4ba5fb8953fd9541518eb85f3a79275a34fa88c1..0000000000000000000000000000000000000000
--- a/codellama/java/dataflow_pretrained/metrics.json
+++ /dev/null
@@ -1 +0,0 @@
-{"run_name": "dataflow_pretrained_java", "train_runtime": 69215.1765, "train_samples_per_second": 0.666, "train_steps_per_second": 0.01, "total_flos": 1.5364568007927398e+18, "train_loss": 0.11899957797593541, "epoch": 0.905020032995522}
\ No newline at end of file
diff --git a/codellama/java/dataflow_pretrained/train_results.json b/codellama/java/dataflow_pretrained/train_results.json
deleted file mode 100644
index 64bfe2710811ec1b306126c677d1dd39ec762ea4..0000000000000000000000000000000000000000
--- a/codellama/java/dataflow_pretrained/train_results.json
+++ /dev/null
@@ -1,8 +0,0 @@
-{
-    "epoch": 0.905020032995522,
-    "total_flos": 1.5364568007927398e+18,
-    "train_loss": 0.11899957797593541,
-    "train_runtime": 69215.1765,
-    "train_samples_per_second": 0.666,
-    "train_steps_per_second": 0.01
-}
\ No newline at end of file
diff --git a/codellama/java/dataflow_pretrained/trainer_state.json b/codellama/java/dataflow_pretrained/trainer_state.json
deleted file mode 100644
index ef68b916109e586c454f1fd1c4f3eb75ecb265e4..0000000000000000000000000000000000000000
--- a/codellama/java/dataflow_pretrained/trainer_state.json
+++ /dev/null
@@ -1,1050 +0,0 @@
-{
-  "best_metric": null,
-  "best_model_checkpoint": null,
-  "epoch": 0.905020032995522,
-  "eval_steps": 500,
-  "global_step": 720,
-  "is_hyper_param_search": false,
-  "is_local_process_zero": true,
-  "is_world_process_zero": true,
-  "log_history": [
-    {
-      "epoch": 0.006284861340246681,
-      "grad_norm": 0.0712890625,
-      "learning_rate": 0.0001,
-      "loss": 0.7884,
-      "step": 5
-    },
-    {
-      "epoch": 0.012569722680493361,
-      "grad_norm": 0.1318359375,
-      "learning_rate": 0.0001,
-      "loss": 0.5229,
-      "step": 10
-    },
-    {
-      "epoch": 0.018854584020740042,
-      "grad_norm": 0.0927734375,
-      "learning_rate": 0.0001,
-      "loss": 0.3535,
-      "step": 15
-    },
-    {
-      "epoch": 0.025139445360986723,
-      "grad_norm": 0.08251953125,
-      "learning_rate": 0.0001,
-      "loss": 0.2525,
-      "step": 20
-    },
-    {
-      "epoch": 0.031424306701233404,
-      "grad_norm": 0.0751953125,
-      "learning_rate": 0.0001,
-      "loss": 0.229,
-      "step": 25
-    },
-    {
-      "epoch": 0.037709168041480084,
-      "grad_norm": 0.10888671875,
-      "learning_rate": 0.0001,
-      "loss": 0.204,
-      "step": 30
-    },
-    {
-      "epoch": 0.043994029381726765,
-      "grad_norm": 0.0927734375,
-      "learning_rate": 0.0001,
-      "loss": 0.1598,
-      "step": 35
-    },
-    {
-      "epoch": 0.050278890721973446,
-      "grad_norm": 0.06494140625,
-      "learning_rate": 0.0001,
-      "loss": 0.1241,
-      "step": 40
-    },
-    {
-      "epoch": 0.05656375206222013,
-      "grad_norm": 0.059814453125,
-      "learning_rate": 0.0001,
-      "loss": 0.1026,
-      "step": 45
-    },
-    {
-      "epoch": 0.06284861340246681,
-      "grad_norm": 0.2265625,
-      "learning_rate": 0.0001,
-      "loss": 0.0843,
-      "step": 50
-    },
-    {
-      "epoch": 0.06913347474271349,
-      "grad_norm": 0.08349609375,
-      "learning_rate": 0.0001,
-      "loss": 0.5241,
-      "step": 55
-    },
-    {
-      "epoch": 0.07541833608296017,
-      "grad_norm": 0.07958984375,
-      "learning_rate": 0.0001,
-      "loss": 0.1898,
-      "step": 60
-    },
-    {
-      "epoch": 0.08170319742320685,
-      "grad_norm": 0.052490234375,
-      "learning_rate": 0.0001,
-      "loss": 0.1542,
-      "step": 65
-    },
-    {
-      "epoch": 0.08798805876345353,
-      "grad_norm": 0.0546875,
-      "learning_rate": 0.0001,
-      "loss": 0.1152,
-      "step": 70
-    },
-    {
-      "epoch": 0.09427292010370021,
-      "grad_norm": 0.058349609375,
-      "learning_rate": 0.0001,
-      "loss": 0.1399,
-      "step": 75
-    },
-    {
-      "epoch": 0.10055778144394689,
-      "grad_norm": 0.04052734375,
-      "learning_rate": 0.0001,
-      "loss": 0.1282,
-      "step": 80
-    },
-    {
-      "epoch": 0.10684264278419357,
-      "grad_norm": 0.044189453125,
-      "learning_rate": 0.0001,
-      "loss": 0.1135,
-      "step": 85
-    },
-    {
-      "epoch": 0.11312750412444025,
-      "grad_norm": 0.037109375,
-      "learning_rate": 0.0001,
-      "loss": 0.0923,
-      "step": 90
-    },
-    {
-      "epoch": 0.11941236546468693,
-      "grad_norm": 0.050048828125,
-      "learning_rate": 0.0001,
-      "loss": 0.0895,
-      "step": 95
-    },
-    {
-      "epoch": 0.12569722680493361,
-      "grad_norm": 0.064453125,
-      "learning_rate": 0.0001,
-      "loss": 0.0574,
-      "step": 100
-    },
-    {
-      "epoch": 0.1319820881451803,
-      "grad_norm": 0.0625,
-      "learning_rate": 0.0001,
-      "loss": 0.3794,
-      "step": 105
-    },
-    {
-      "epoch": 0.13826694948542698,
-      "grad_norm": 0.04443359375,
-      "learning_rate": 0.0001,
-      "loss": 0.1638,
-      "step": 110
-    },
-    {
-      "epoch": 0.14455181082567367,
-      "grad_norm": 0.04931640625,
-      "learning_rate": 0.0001,
-      "loss": 0.1154,
-      "step": 115
-    },
-    {
-      "epoch": 0.15083667216592034,
-      "grad_norm": 0.04931640625,
-      "learning_rate": 0.0001,
-      "loss": 0.0967,
-      "step": 120
-    },
-    {
-      "epoch": 0.15712153350616703,
-      "grad_norm": 0.04248046875,
-      "learning_rate": 0.0001,
-      "loss": 0.1275,
-      "step": 125
-    },
-    {
-      "epoch": 0.1634063948464137,
-      "grad_norm": 0.03759765625,
-      "learning_rate": 0.0001,
-      "loss": 0.11,
-      "step": 130
-    },
-    {
-      "epoch": 0.1696912561866604,
-      "grad_norm": 0.039794921875,
-      "learning_rate": 0.0001,
-      "loss": 0.0986,
-      "step": 135
-    },
-    {
-      "epoch": 0.17597611752690706,
-      "grad_norm": 0.042724609375,
-      "learning_rate": 0.0001,
-      "loss": 0.082,
-      "step": 140
-    },
-    {
-      "epoch": 0.18226097886715376,
-      "grad_norm": 0.049072265625,
-      "learning_rate": 0.0001,
-      "loss": 0.0729,
-      "step": 145
-    },
-    {
-      "epoch": 0.18854584020740042,
-      "grad_norm": 0.044189453125,
-      "learning_rate": 0.0001,
-      "loss": 0.0468,
-      "step": 150
-    },
-    {
-      "epoch": 0.19483070154764712,
-      "grad_norm": 0.06982421875,
-      "learning_rate": 0.0001,
-      "loss": 0.3499,
-      "step": 155
-    },
-    {
-      "epoch": 0.20111556288789378,
-      "grad_norm": 0.054931640625,
-      "learning_rate": 0.0001,
-      "loss": 0.1535,
-      "step": 160
-    },
-    {
-      "epoch": 0.20740042422814048,
-      "grad_norm": 0.045166015625,
-      "learning_rate": 0.0001,
-      "loss": 0.1166,
-      "step": 165
-    },
-    {
-      "epoch": 0.21368528556838715,
-      "grad_norm": 0.047119140625,
-      "learning_rate": 0.0001,
-      "loss": 0.0816,
-      "step": 170
-    },
-    {
-      "epoch": 0.21997014690863384,
-      "grad_norm": 0.0634765625,
-      "learning_rate": 0.0001,
-      "loss": 0.1164,
-      "step": 175
-    },
-    {
-      "epoch": 0.2262550082488805,
-      "grad_norm": 0.0478515625,
-      "learning_rate": 0.0001,
-      "loss": 0.1004,
-      "step": 180
-    },
-    {
-      "epoch": 0.2325398695891272,
-      "grad_norm": 0.06103515625,
-      "learning_rate": 0.0001,
-      "loss": 0.092,
-      "step": 185
-    },
-    {
-      "epoch": 0.23882473092937387,
-      "grad_norm": 0.0458984375,
-      "learning_rate": 0.0001,
-      "loss": 0.0815,
-      "step": 190
-    },
-    {
-      "epoch": 0.24510959226962056,
-      "grad_norm": 0.05419921875,
-      "learning_rate": 0.0001,
-      "loss": 0.0708,
-      "step": 195
-    },
-    {
-      "epoch": 0.25139445360986723,
-      "grad_norm": 0.07763671875,
-      "learning_rate": 0.0001,
-      "loss": 0.0425,
-      "step": 200
-    },
-    {
-      "epoch": 0.2576793149501139,
-      "grad_norm": 0.11376953125,
-      "learning_rate": 0.0001,
-      "loss": 0.3435,
-      "step": 205
-    },
-    {
-      "epoch": 0.2639641762903606,
-      "grad_norm": 0.057373046875,
-      "learning_rate": 0.0001,
-      "loss": 0.1445,
-      "step": 210
-    },
-    {
-      "epoch": 0.27024903763060726,
-      "grad_norm": 0.03759765625,
-      "learning_rate": 0.0001,
-      "loss": 0.1052,
-      "step": 215
-    },
-    {
-      "epoch": 0.27653389897085395,
-      "grad_norm": 0.03515625,
-      "learning_rate": 0.0001,
-      "loss": 0.0789,
-      "step": 220
-    },
-    {
-      "epoch": 0.28281876031110065,
-      "grad_norm": 0.043212890625,
-      "learning_rate": 0.0001,
-      "loss": 0.1068,
-      "step": 225
-    },
-    {
-      "epoch": 0.28910362165134734,
-      "grad_norm": 0.043212890625,
-      "learning_rate": 0.0001,
-      "loss": 0.0958,
-      "step": 230
-    },
-    {
-      "epoch": 0.295388482991594,
-      "grad_norm": 0.04052734375,
-      "learning_rate": 0.0001,
-      "loss": 0.0817,
-      "step": 235
-    },
-    {
-      "epoch": 0.3016733443318407,
-      "grad_norm": 0.06494140625,
-      "learning_rate": 0.0001,
-      "loss": 0.07,
-      "step": 240
-    },
-    {
-      "epoch": 0.30795820567208737,
-      "grad_norm": 0.047607421875,
-      "learning_rate": 0.0001,
-      "loss": 0.0596,
-      "step": 245
-    },
-    {
-      "epoch": 0.31424306701233407,
-      "grad_norm": 0.06884765625,
-      "learning_rate": 0.0001,
-      "loss": 0.0357,
-      "step": 250
-    },
-    {
-      "epoch": 0.3205279283525807,
-      "grad_norm": 0.08154296875,
-      "learning_rate": 0.0001,
-      "loss": 0.3339,
-      "step": 255
-    },
-    {
-      "epoch": 0.3268127896928274,
-      "grad_norm": 0.048583984375,
-      "learning_rate": 0.0001,
-      "loss": 0.1467,
-      "step": 260
-    },
-    {
-      "epoch": 0.3330976510330741,
-      "grad_norm": 0.035400390625,
-      "learning_rate": 0.0001,
-      "loss": 0.1058,
-      "step": 265
-    },
-    {
-      "epoch": 0.3393825123733208,
-      "grad_norm": 0.034423828125,
-      "learning_rate": 0.0001,
-      "loss": 0.0701,
-      "step": 270
-    },
-    {
-      "epoch": 0.3456673737135674,
-      "grad_norm": 0.04443359375,
-      "learning_rate": 0.0001,
-      "loss": 0.1052,
-      "step": 275
-    },
-    {
-      "epoch": 0.3519522350538141,
-      "grad_norm": 0.047119140625,
-      "learning_rate": 0.0001,
-      "loss": 0.0958,
-      "step": 280
-    },
-    {
-      "epoch": 0.3582370963940608,
-      "grad_norm": 0.033447265625,
-      "learning_rate": 0.0001,
-      "loss": 0.0784,
-      "step": 285
-    },
-    {
-      "epoch": 0.3645219577343075,
-      "grad_norm": 0.051025390625,
-      "learning_rate": 0.0001,
-      "loss": 0.0671,
-      "step": 290
-    },
-    {
-      "epoch": 0.37080681907455415,
-      "grad_norm": 0.0673828125,
-      "learning_rate": 0.0001,
-      "loss": 0.0517,
-      "step": 295
-    },
-    {
-      "epoch": 0.37709168041480084,
-      "grad_norm": 0.08203125,
-      "learning_rate": 0.0001,
-      "loss": 0.0368,
-      "step": 300
-    },
-    {
-      "epoch": 0.38337654175504754,
-      "grad_norm": 0.06201171875,
-      "learning_rate": 0.0001,
-      "loss": 0.3533,
-      "step": 305
-    },
-    {
-      "epoch": 0.38966140309529423,
-      "grad_norm": 0.055419921875,
-      "learning_rate": 0.0001,
-      "loss": 0.1495,
-      "step": 310
-    },
-    {
-      "epoch": 0.3959462644355409,
-      "grad_norm": 0.044189453125,
-      "learning_rate": 0.0001,
-      "loss": 0.0914,
-      "step": 315
-    },
-    {
-      "epoch": 0.40223112577578757,
-      "grad_norm": 0.042236328125,
-      "learning_rate": 0.0001,
-      "loss": 0.0759,
-      "step": 320
-    },
-    {
-      "epoch": 0.40851598711603426,
-      "grad_norm": 0.0439453125,
-      "learning_rate": 0.0001,
-      "loss": 0.0956,
-      "step": 325
-    },
-    {
-      "epoch": 0.41480084845628096,
-      "grad_norm": 0.042724609375,
-      "learning_rate": 0.0001,
-      "loss": 0.0874,
-      "step": 330
-    },
-    {
-      "epoch": 0.4210857097965276,
-      "grad_norm": 0.045166015625,
-      "learning_rate": 0.0001,
-      "loss": 0.0697,
-      "step": 335
-    },
-    {
-      "epoch": 0.4273705711367743,
-      "grad_norm": 0.140625,
-      "learning_rate": 0.0001,
-      "loss": 0.0647,
-      "step": 340
-    },
-    {
-      "epoch": 0.433655432477021,
-      "grad_norm": 0.0439453125,
-      "learning_rate": 0.0001,
-      "loss": 0.0538,
-      "step": 345
-    },
-    {
-      "epoch": 0.4399402938172677,
-      "grad_norm": 0.05029296875,
-      "learning_rate": 0.0001,
-      "loss": 0.0348,
-      "step": 350
-    },
-    {
-      "epoch": 0.4462251551575143,
-      "grad_norm": 0.0556640625,
-      "learning_rate": 0.0001,
-      "loss": 0.3265,
-      "step": 355
-    },
-    {
-      "epoch": 0.452510016497761,
-      "grad_norm": 0.06982421875,
-      "learning_rate": 0.0001,
-      "loss": 0.1376,
-      "step": 360
-    },
-    {
-      "epoch": 0.4587948778380077,
-      "grad_norm": 0.034423828125,
-      "learning_rate": 0.0001,
-      "loss": 0.0982,
-      "step": 365
-    },
-    {
-      "epoch": 0.4650797391782544,
-      "grad_norm": 0.042236328125,
-      "learning_rate": 0.0001,
-      "loss": 0.0813,
-      "step": 370
-    },
-    {
-      "epoch": 0.47136460051850104,
-      "grad_norm": 0.040771484375,
-      "learning_rate": 0.0001,
-      "loss": 0.0947,
-      "step": 375
-    },
-    {
-      "epoch": 0.47764946185874774,
-      "grad_norm": 0.0400390625,
-      "learning_rate": 0.0001,
-      "loss": 0.0847,
-      "step": 380
-    },
-    {
-      "epoch": 0.48393432319899443,
-      "grad_norm": 0.0419921875,
-      "learning_rate": 0.0001,
-      "loss": 0.0738,
-      "step": 385
-    },
-    {
-      "epoch": 0.4902191845392411,
-      "grad_norm": 0.043701171875,
-      "learning_rate": 0.0001,
-      "loss": 0.062,
-      "step": 390
-    },
-    {
-      "epoch": 0.49650404587948777,
-      "grad_norm": 0.08251953125,
-      "learning_rate": 0.0001,
-      "loss": 0.0558,
-      "step": 395
-    },
-    {
-      "epoch": 0.5027889072197345,
-      "grad_norm": 0.040771484375,
-      "learning_rate": 0.0001,
-      "loss": 0.0327,
-      "step": 400
-    },
-    {
-      "epoch": 0.5090737685599811,
-      "grad_norm": 0.062255859375,
-      "learning_rate": 0.0001,
-      "loss": 0.3109,
-      "step": 405
-    },
-    {
-      "epoch": 0.5153586299002278,
-      "grad_norm": 0.06689453125,
-      "learning_rate": 0.0001,
-      "loss": 0.1447,
-      "step": 410
-    },
-    {
-      "epoch": 0.5216434912404745,
-      "grad_norm": 0.033935546875,
-      "learning_rate": 0.0001,
-      "loss": 0.0943,
-      "step": 415
-    },
-    {
-      "epoch": 0.5279283525807212,
-      "grad_norm": 0.037353515625,
-      "learning_rate": 0.0001,
-      "loss": 0.0724,
-      "step": 420
-    },
-    {
-      "epoch": 0.5342132139209679,
-      "grad_norm": 0.03466796875,
-      "learning_rate": 0.0001,
-      "loss": 0.1063,
-      "step": 425
-    },
-    {
-      "epoch": 0.5404980752612145,
-      "grad_norm": 0.068359375,
-      "learning_rate": 0.0001,
-      "loss": 0.0855,
-      "step": 430
-    },
-    {
-      "epoch": 0.5467829366014613,
-      "grad_norm": 0.044677734375,
-      "learning_rate": 0.0001,
-      "loss": 0.076,
-      "step": 435
-    },
-    {
-      "epoch": 0.5530677979417079,
-      "grad_norm": 0.04638671875,
-      "learning_rate": 0.0001,
-      "loss": 0.0608,
-      "step": 440
-    },
-    {
-      "epoch": 0.5593526592819545,
-      "grad_norm": 0.03515625,
-      "learning_rate": 0.0001,
-      "loss": 0.0506,
-      "step": 445
-    },
-    {
-      "epoch": 0.5656375206222013,
-      "grad_norm": 0.02099609375,
-      "learning_rate": 0.0001,
-      "loss": 0.0336,
-      "step": 450
-    },
-    {
-      "epoch": 0.5719223819624479,
-      "grad_norm": 0.059326171875,
-      "learning_rate": 0.0001,
-      "loss": 0.2604,
-      "step": 455
-    },
-    {
-      "epoch": 0.5782072433026947,
-      "grad_norm": 0.07470703125,
-      "learning_rate": 0.0001,
-      "loss": 0.1273,
-      "step": 460
-    },
-    {
-      "epoch": 0.5844921046429413,
-      "grad_norm": 0.054931640625,
-      "learning_rate": 0.0001,
-      "loss": 0.094,
-      "step": 465
-    },
-    {
-      "epoch": 0.590776965983188,
-      "grad_norm": 0.021240234375,
-      "learning_rate": 0.0001,
-      "loss": 0.0642,
-      "step": 470
-    },
-    {
-      "epoch": 0.5970618273234347,
-      "grad_norm": 0.032958984375,
-      "learning_rate": 0.0001,
-      "loss": 0.0914,
-      "step": 475
-    },
-    {
-      "epoch": 0.6033466886636814,
-      "grad_norm": 0.0400390625,
-      "learning_rate": 0.0001,
-      "loss": 0.08,
-      "step": 480
-    },
-    {
-      "epoch": 0.609631550003928,
-      "grad_norm": 0.046875,
-      "learning_rate": 0.0001,
-      "loss": 0.0709,
-      "step": 485
-    },
-    {
-      "epoch": 0.6159164113441747,
-      "grad_norm": 0.048828125,
-      "learning_rate": 0.0001,
-      "loss": 0.0588,
-      "step": 490
-    },
-    {
-      "epoch": 0.6222012726844214,
-      "grad_norm": 0.056884765625,
-      "learning_rate": 0.0001,
-      "loss": 0.0417,
-      "step": 495
-    },
-    {
-      "epoch": 0.6284861340246681,
-      "grad_norm": 0.041259765625,
-      "learning_rate": 0.0001,
-      "loss": 0.0281,
-      "step": 500
-    },
-    {
-      "epoch": 0.6347709953649148,
-      "grad_norm": 0.064453125,
-      "learning_rate": 0.0001,
-      "loss": 0.2518,
-      "step": 505
-    },
-    {
-      "epoch": 0.6410558567051614,
-      "grad_norm": 0.058837890625,
-      "learning_rate": 0.0001,
-      "loss": 0.1275,
-      "step": 510
-    },
-    {
-      "epoch": 0.6473407180454082,
-      "grad_norm": 0.034912109375,
-      "learning_rate": 0.0001,
-      "loss": 0.086,
-      "step": 515
-    },
-    {
-      "epoch": 0.6536255793856548,
-      "grad_norm": 0.042236328125,
-      "learning_rate": 0.0001,
-      "loss": 0.0677,
-      "step": 520
-    },
-    {
-      "epoch": 0.6599104407259014,
-      "grad_norm": 0.03369140625,
-      "learning_rate": 0.0001,
-      "loss": 0.0934,
-      "step": 525
-    },
-    {
-      "epoch": 0.6661953020661482,
-      "grad_norm": 0.040771484375,
-      "learning_rate": 0.0001,
-      "loss": 0.0781,
-      "step": 530
-    },
-    {
-      "epoch": 0.6724801634063948,
-      "grad_norm": 0.041748046875,
-      "learning_rate": 0.0001,
-      "loss": 0.0638,
-      "step": 535
-    },
-    {
-      "epoch": 0.6787650247466416,
-      "grad_norm": 0.035888671875,
-      "learning_rate": 0.0001,
-      "loss": 0.0543,
-      "step": 540
-    },
-    {
-      "epoch": 0.6850498860868882,
-      "grad_norm": 0.0341796875,
-      "learning_rate": 0.0001,
-      "loss": 0.0428,
-      "step": 545
-    },
-    {
-      "epoch": 0.6913347474271349,
-      "grad_norm": 0.03271484375,
-      "learning_rate": 0.0001,
-      "loss": 0.0278,
-      "step": 550
-    },
-    {
-      "epoch": 0.6976196087673816,
-      "grad_norm": 0.055419921875,
-      "learning_rate": 0.0001,
-      "loss": 0.2516,
-      "step": 555
-    },
-    {
-      "epoch": 0.7039044701076282,
-      "grad_norm": 0.0634765625,
-      "learning_rate": 0.0001,
-      "loss": 0.1206,
-      "step": 560
-    },
-    {
-      "epoch": 0.7101893314478749,
-      "grad_norm": 0.038818359375,
-      "learning_rate": 0.0001,
-      "loss": 0.0805,
-      "step": 565
-    },
-    {
-      "epoch": 0.7164741927881216,
-      "grad_norm": 0.036865234375,
-      "learning_rate": 0.0001,
-      "loss": 0.0648,
-      "step": 570
-    },
-    {
-      "epoch": 0.7227590541283683,
-      "grad_norm": 0.03857421875,
-      "learning_rate": 0.0001,
-      "loss": 0.0835,
-      "step": 575
-    },
-    {
-      "epoch": 0.729043915468615,
-      "grad_norm": 0.041748046875,
-      "learning_rate": 0.0001,
-      "loss": 0.0773,
-      "step": 580
-    },
-    {
-      "epoch": 0.7353287768088617,
-      "grad_norm": 0.04443359375,
-      "learning_rate": 0.0001,
-      "loss": 0.0609,
-      "step": 585
-    },
-    {
-      "epoch": 0.7416136381491083,
-      "grad_norm": 0.05224609375,
-      "learning_rate": 0.0001,
-      "loss": 0.0516,
-      "step": 590
-    },
-    {
-      "epoch": 0.747898499489355,
-      "grad_norm": 0.1240234375,
-      "learning_rate": 0.0001,
-      "loss": 0.0416,
-      "step": 595
-    },
-    {
-      "epoch": 0.7541833608296017,
-      "grad_norm": 0.0206298828125,
-      "learning_rate": 0.0001,
-      "loss": 0.0298,
-      "step": 600
-    },
-    {
-      "epoch": 0.7604682221698483,
-      "grad_norm": 0.07080078125,
-      "learning_rate": 0.0001,
-      "loss": 0.243,
-      "step": 605
-    },
-    {
-      "epoch": 0.7667530835100951,
-      "grad_norm": 0.06494140625,
-      "learning_rate": 0.0001,
-      "loss": 0.1263,
-      "step": 610
-    },
-    {
-      "epoch": 0.7730379448503417,
-      "grad_norm": 0.0537109375,
-      "learning_rate": 0.0001,
-      "loss": 0.088,
-      "step": 615
-    },
-    {
-      "epoch": 0.7793228061905885,
-      "grad_norm": 0.03515625,
-      "learning_rate": 0.0001,
-      "loss": 0.0559,
-      "step": 620
-    },
-    {
-      "epoch": 0.7856076675308351,
-      "grad_norm": 0.047607421875,
-      "learning_rate": 0.0001,
-      "loss": 0.0853,
-      "step": 625
-    },
-    {
-      "epoch": 0.7918925288710817,
-      "grad_norm": 0.0419921875,
-      "learning_rate": 0.0001,
-      "loss": 0.0715,
-      "step": 630
-    },
-    {
-      "epoch": 0.7981773902113285,
-      "grad_norm": 0.0927734375,
-      "learning_rate": 0.0001,
-      "loss": 0.0598,
-      "step": 635
-    },
-    {
-      "epoch": 0.8044622515515751,
-      "grad_norm": 0.0419921875,
-      "learning_rate": 0.0001,
-      "loss": 0.0466,
-      "step": 640
-    },
-    {
-      "epoch": 0.8107471128918218,
-      "grad_norm": 0.043701171875,
-      "learning_rate": 0.0001,
-      "loss": 0.0404,
-      "step": 645
-    },
-    {
-      "epoch": 0.8170319742320685,
-      "grad_norm": 0.033935546875,
-      "learning_rate": 0.0001,
-      "loss": 0.0315,
-      "step": 650
-    },
-    {
-      "epoch": 0.8233168355723152,
-      "grad_norm": 0.08251953125,
-      "learning_rate": 0.0001,
-      "loss": 0.2336,
-      "step": 655
-    },
-    {
-      "epoch": 0.8296016969125619,
-      "grad_norm": 0.053955078125,
-      "learning_rate": 0.0001,
-      "loss": 0.1183,
-      "step": 660
-    },
-    {
-      "epoch": 0.8358865582528086,
-      "grad_norm": 0.03759765625,
-      "learning_rate": 0.0001,
-      "loss": 0.0826,
-      "step": 665
-    },
-    {
-      "epoch": 0.8421714195930552,
-      "grad_norm": 0.046142578125,
-      "learning_rate": 0.0001,
-      "loss": 0.0657,
-      "step": 670
-    },
-    {
-      "epoch": 0.8484562809333019,
-      "grad_norm": 0.04248046875,
-      "learning_rate": 0.0001,
-      "loss": 0.0845,
-      "step": 675
-    },
-    {
-      "epoch": 0.8547411422735486,
-      "grad_norm": 0.048828125,
-      "learning_rate": 0.0001,
-      "loss": 0.0663,
-      "step": 680
-    },
-    {
-      "epoch": 0.8610260036137952,
-      "grad_norm": 0.0625,
-      "learning_rate": 0.0001,
-      "loss": 0.0565,
-      "step": 685
-    },
-    {
-      "epoch": 0.867310864954042,
-      "grad_norm": 0.05810546875,
-      "learning_rate": 0.0001,
-      "loss": 0.0486,
-      "step": 690
-    },
-    {
-      "epoch": 0.8735957262942886,
-      "grad_norm": 0.054931640625,
-      "learning_rate": 0.0001,
-      "loss": 0.0397,
-      "step": 695
-    },
-    {
-      "epoch": 0.8798805876345354,
-      "grad_norm": 0.037353515625,
-      "learning_rate": 0.0001,
-      "loss": 0.0268,
-      "step": 700
-    },
-    {
-      "epoch": 0.886165448974782,
-      "grad_norm": 0.09521484375,
-      "learning_rate": 0.0001,
-      "loss": 0.2371,
-      "step": 705
-    },
-    {
-      "epoch": 0.8924503103150286,
-      "grad_norm": 0.06494140625,
-      "learning_rate": 0.0001,
-      "loss": 0.1144,
-      "step": 710
-    },
-    {
-      "epoch": 0.8987351716552754,
-      "grad_norm": 0.041748046875,
-      "learning_rate": 0.0001,
-      "loss": 0.0906,
-      "step": 715
-    },
-    {
-      "epoch": 0.905020032995522,
-      "grad_norm": 0.033203125,
-      "learning_rate": 0.0001,
-      "loss": 0.0549,
-      "step": 720
-    },
-    {
-      "epoch": 0.905020032995522,
-      "step": 720,
-      "total_flos": 1.5364568007927398e+18,
-      "train_loss": 0.11899957797593541,
-      "train_runtime": 69215.1765,
-      "train_samples_per_second": 0.666,
-      "train_steps_per_second": 0.01
-    }
-  ],
-  "logging_steps": 5,
-  "max_steps": 720,
-  "num_input_tokens_seen": 0,
-  "num_train_epochs": 1,
-  "save_steps": 90,
-  "stateful_callbacks": {
-    "TrainerControl": {
-      "args": {
-        "should_epoch_stop": false,
-        "should_evaluate": false,
-        "should_log": false,
-        "should_save": true,
-        "should_training_stop": true
-      },
-      "attributes": {}
-    }
-  },
-  "total_flos": 1.5364568007927398e+18,
-  "train_batch_size": 4,
-  "trial_name": null,
-  "trial_params": null
-}
diff --git a/jam/jam-dataflow/ckpt.pt b/jam/jam-dataflow/ckpt.pt
index 0062281a6c0edd54f20dd76a6eb1770035a2af8a..d248bc1b78070e9e53278bb3e7b24e904213be0f 100644
--- a/jam/jam-dataflow/ckpt.pt
+++ b/jam/jam-dataflow/ckpt.pt
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6a48545ac3c2f22e1c75a304ee2c8426b58cb452858a8a070d712ccff156fcce
-size 4255365370
+oid sha256:fb546c749c22378e34791cb47bc6f8195300a3f538e2c30150d592a71d9afc1a
+size 4255365797