chiayisu commited on Dec 10, 2025

Commit

fe7b4a6

1 Parent(s): 2f30e1d

update

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

codellama/c/dataflow_c_pretrained/all_results.json +6 -6
codellama/c/dataflow_c_pretrained/checkpoint-190/README.md +202 -0
codellama/c/dataflow_c_pretrained/checkpoint-190/adapter_config.json +34 -0
codellama/{java/dataflow_pretrained/checkpoint-720 → c/dataflow_c_pretrained/checkpoint-190}/adapter_model.safetensors +1 -1
codellama/c/dataflow_c_pretrained/checkpoint-190/adapter_model/README.md +202 -0
codellama/c/dataflow_c_pretrained/checkpoint-190/adapter_model/adapter_config.json +34 -0
codellama/c/dataflow_c_pretrained/{checkpoint-475 → checkpoint-190/adapter_model}/adapter_model.safetensors +1 -1
codellama/c/dataflow_c_pretrained/{checkpoint-475 → checkpoint-190}/added_tokens.json +0 -0
codellama/c/dataflow_c_pretrained/checkpoint-190/optimizer.pt +3 -0
codellama/c/dataflow_c_pretrained/{checkpoint-475 → checkpoint-190}/rng_state.pth +0 -0
codellama/{java/dataflow_pretrained/checkpoint-720 → c/dataflow_c_pretrained/checkpoint-190}/scheduler.pt +1 -1
codellama/c/dataflow_c_pretrained/{checkpoint-475 → checkpoint-190}/special_tokens_map.json +0 -0
codellama/c/dataflow_c_pretrained/{checkpoint-475 → checkpoint-190}/tokenizer.model +0 -0
codellama/c/dataflow_c_pretrained/{checkpoint-475 → checkpoint-190}/tokenizer_config.json +0 -0
codellama/c/dataflow_c_pretrained/checkpoint-190/trainer_state.json +299 -0
codellama/c/dataflow_c_pretrained/{checkpoint-475 → checkpoint-190}/training_args.bin +1 -1
codellama/c/dataflow_c_pretrained/metrics.json +1 -1
codellama/c/dataflow_c_pretrained/train_results.json +6 -6
codellama/c/dataflow_c_pretrained/trainer_state.json +125 -524
codellama/c/dmcodegen/dmcodegen_base_c/all_results.json +8 -0
codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/README.md +202 -0
codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/adapter_config.json +34 -0
codellama/{java/dataflow_pretrained/checkpoint-720/adapter_model → c/dmcodegen/dmcodegen_base_c/checkpoint-180}/adapter_model.safetensors +1 -1
codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/adapter_model/README.md +202 -0
codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/adapter_model/adapter_config.json +34 -0
codellama/c/{dataflow_c_pretrained/checkpoint-475 → dmcodegen/dmcodegen_base_c/checkpoint-180}/adapter_model/adapter_model.safetensors +1 -1
codellama/{java/dataflow_pretrained/checkpoint-720 → c/dmcodegen/dmcodegen_base_c/checkpoint-180}/added_tokens.json +0 -0
codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/optimizer.pt +3 -0
codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/rng_state.pth +3 -0
codellama/c/{dataflow_c_pretrained/checkpoint-475 → dmcodegen/dmcodegen_base_c/checkpoint-180}/scheduler.pt +1 -1
codellama/{java/dataflow_pretrained/checkpoint-720 → c/dmcodegen/dmcodegen_base_c/checkpoint-180}/special_tokens_map.json +0 -0
codellama/{java/dataflow_pretrained/checkpoint-720 → c/dmcodegen/dmcodegen_base_c/checkpoint-180}/tokenizer.model +0 -0
codellama/{java/dataflow_pretrained/checkpoint-720 → c/dmcodegen/dmcodegen_base_c/checkpoint-180}/tokenizer_config.json +0 -0
codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/trainer_state.json +285 -0
codellama/{java/dataflow_pretrained/checkpoint-720 → c/dmcodegen/dmcodegen_base_c/checkpoint-180}/training_args.bin +1 -1
codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/README.md +202 -0
codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/adapter_config.json +34 -0
codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/adapter_model.safetensors +3 -0
codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/adapter_model/README.md +202 -0
codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/adapter_model/adapter_config.json +34 -0
codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/adapter_model/adapter_model.safetensors +3 -0
codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/added_tokens.json +3 -0
codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/optimizer.pt +3 -0
codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/rng_state.pth +3 -0
codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/scheduler.pt +3 -0
codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/special_tokens_map.json +36 -0
codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/tokenizer.model +3 -0
codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/tokenizer_config.json +94 -0
codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/trainer_state.json +159 -0
codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/training_args.bin +3 -0

codellama/c/dataflow_c_pretrained/all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 1.5076373735369968,
-    "total_flos": 1.4535297138363187e+18,
-    "train_loss": 0.11740684490454824,
-    "train_runtime": 39384.0084,
-    "train_samples_per_second": 0.772,
-    "train_steps_per_second": 0.012
 }

 {
+    "epoch": 1.2058706862356208,
+    "total_flos": 1.216645538039931e+18,
+    "train_loss": 0.10745409297707834,
+    "train_runtime": 37043.3755,
+    "train_samples_per_second": 0.657,
+    "train_steps_per_second": 0.005
 }

codellama/c/dataflow_c_pretrained/checkpoint-190/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: ../CodeLlama-13b-Instruct-hf/
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.13.2

codellama/c/dataflow_c_pretrained/checkpoint-190/adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../CodeLlama-13b-Instruct-hf/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "down_proj",
+    "gate_proj",
+    "up_proj",
+    "q_proj",
+    "o_proj",
+    "v_proj",
+    "k_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

codellama/{java/dataflow_pretrained/checkpoint-720 → c/dataflow_c_pretrained/checkpoint-190}/adapter_model.safetensors RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3cf8f1c4cc300ca5094e08295cc0dcffacce527b464e1372de75271bb4d522a9
 size 1156480200

 version https://git-lfs.github.com/spec/v1
+oid sha256:9de8d340f5057379260edf56d8c2bf090c3f6e213b999eafc222fced213416fe
 size 1156480200

codellama/c/dataflow_c_pretrained/checkpoint-190/adapter_model/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: ../CodeLlama-13b-Instruct-hf/
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.13.2

codellama/c/dataflow_c_pretrained/checkpoint-190/adapter_model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../CodeLlama-13b-Instruct-hf/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "down_proj",
+    "gate_proj",
+    "up_proj",
+    "q_proj",
+    "o_proj",
+    "v_proj",
+    "k_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

codellama/c/dataflow_c_pretrained/{checkpoint-475 → checkpoint-190/adapter_model}/adapter_model.safetensors RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9e3bd7cb053c3e00ea48ed365eed4b65ae5e2d7d807e71ec5615d765dfba19de
 size 1156480200

 version https://git-lfs.github.com/spec/v1
+oid sha256:9de8d340f5057379260edf56d8c2bf090c3f6e213b999eafc222fced213416fe
 size 1156480200

codellama/c/dataflow_c_pretrained/{checkpoint-475 → checkpoint-190}/added_tokens.json RENAMED Viewed

File without changes

codellama/c/dataflow_c_pretrained/checkpoint-190/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6951313c1b248ce4c836696dac190e0dab42809267e147c8304926cfd6019b36
+size 2003126962

codellama/c/dataflow_c_pretrained/{checkpoint-475 → checkpoint-190}/rng_state.pth RENAMED Viewed

File without changes

codellama/{java/dataflow_pretrained/checkpoint-720 → c/dataflow_c_pretrained/checkpoint-190}/scheduler.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c48ea2f606cbbb6177c782dd71ba690a6d43d7f02de58760a50cf5c03d3d9324
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:4326f7a2418d4815e699b330bd26d5f5313efeeb51d248a1c8d3070a922c1ddd
 size 1064

codellama/c/dataflow_c_pretrained/{checkpoint-475 → checkpoint-190}/special_tokens_map.json RENAMED Viewed

File without changes

codellama/c/dataflow_c_pretrained/{checkpoint-475 → checkpoint-190}/tokenizer.model RENAMED Viewed

File without changes

codellama/c/dataflow_c_pretrained/{checkpoint-475 → checkpoint-190}/tokenizer_config.json RENAMED Viewed

File without changes

codellama/c/dataflow_c_pretrained/checkpoint-190/trainer_state.json ADDED Viewed

	@@ -0,0 +1,299 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.2058706862356208,
+  "eval_steps": 500,
+  "global_step": 190,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0317334391114637,
+      "grad_norm": 0.060546875,
+      "learning_rate": 0.0001,
+      "loss": 0.6421,
+      "step": 5
+    },
+    {
+      "epoch": 0.0634668782229274,
+      "grad_norm": 0.11572265625,
+      "learning_rate": 0.0001,
+      "loss": 0.5213,
+      "step": 10
+    },
+    {
+      "epoch": 0.09520031733439112,
+      "grad_norm": 0.08251953125,
+      "learning_rate": 0.0001,
+      "loss": 0.2925,
+      "step": 15
+    },
+    {
+      "epoch": 0.1269337564458548,
+      "grad_norm": 0.0634765625,
+      "learning_rate": 0.0001,
+      "loss": 0.1978,
+      "step": 20
+    },
+    {
+      "epoch": 0.15866719555731854,
+      "grad_norm": 0.08251953125,
+      "learning_rate": 0.0001,
+      "loss": 0.1538,
+      "step": 25
+    },
+    {
+      "epoch": 0.19040063466878224,
+      "grad_norm": 0.10888671875,
+      "learning_rate": 0.0001,
+      "loss": 0.106,
+      "step": 30
+    },
+    {
+      "epoch": 0.22213407378024594,
+      "grad_norm": 0.049560546875,
+      "learning_rate": 0.0001,
+      "loss": 0.0454,
+      "step": 35
+    },
+    {
+      "epoch": 0.2538675128917096,
+      "grad_norm": 0.310546875,
+      "learning_rate": 0.0001,
+      "loss": 0.1215,
+      "step": 40
+    },
+    {
+      "epoch": 0.28560095200317337,
+      "grad_norm": 0.06494140625,
+      "learning_rate": 0.0001,
+      "loss": 0.2476,
+      "step": 45
+    },
+    {
+      "epoch": 0.31733439111463707,
+      "grad_norm": 0.40234375,
+      "learning_rate": 0.0001,
+      "loss": 0.1073,
+      "step": 50
+    },
+    {
+      "epoch": 0.3490678302261008,
+      "grad_norm": 0.04052734375,
+      "learning_rate": 0.0001,
+      "loss": 0.0863,
+      "step": 55
+    },
+    {
+      "epoch": 0.3808012693375645,
+      "grad_norm": 0.03369140625,
+      "learning_rate": 0.0001,
+      "loss": 0.0671,
+      "step": 60
+    },
+    {
+      "epoch": 0.4125347084490282,
+      "grad_norm": 0.0274658203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0493,
+      "step": 65
+    },
+    {
+      "epoch": 0.4442681475604919,
+      "grad_norm": 0.0277099609375,
+      "learning_rate": 0.0001,
+      "loss": 0.0311,
+      "step": 70
+    },
+    {
+      "epoch": 0.4760015866719556,
+      "grad_norm": 0.01275634765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0125,
+      "step": 75
+    },
+    {
+      "epoch": 0.5077350257834192,
+      "grad_norm": 0.06787109375,
+      "learning_rate": 0.0001,
+      "loss": 0.1307,
+      "step": 80
+    },
+    {
+      "epoch": 0.539468464894883,
+      "grad_norm": 0.050048828125,
+      "learning_rate": 0.0001,
+      "loss": 0.171,
+      "step": 85
+    },
+    {
+      "epoch": 0.5712019040063467,
+      "grad_norm": 0.060791015625,
+      "learning_rate": 0.0001,
+      "loss": 0.0818,
+      "step": 90
+    },
+    {
+      "epoch": 0.6029353431178104,
+      "grad_norm": 0.033203125,
+      "learning_rate": 0.0001,
+      "loss": 0.0658,
+      "step": 95
+    },
+    {
+      "epoch": 0.6346687822292741,
+      "grad_norm": 0.0235595703125,
+      "learning_rate": 0.0001,
+      "loss": 0.046,
+      "step": 100
+    },
+    {
+      "epoch": 0.6664022213407378,
+      "grad_norm": 0.0299072265625,
+      "learning_rate": 0.0001,
+      "loss": 0.0384,
+      "step": 105
+    },
+    {
+      "epoch": 0.6981356604522015,
+      "grad_norm": 0.0181884765625,
+      "learning_rate": 0.0001,
+      "loss": 0.0187,
+      "step": 110
+    },
+    {
+      "epoch": 0.7298690995636652,
+      "grad_norm": 0.019775390625,
+      "learning_rate": 0.0001,
+      "loss": 0.0095,
+      "step": 115
+    },
+    {
+      "epoch": 0.761602538675129,
+      "grad_norm": 0.060791015625,
+      "learning_rate": 0.0001,
+      "loss": 0.1381,
+      "step": 120
+    },
+    {
+      "epoch": 0.7933359777865926,
+      "grad_norm": 0.038818359375,
+      "learning_rate": 0.0001,
+      "loss": 0.1125,
+      "step": 125
+    },
+    {
+      "epoch": 0.8250694168980564,
+      "grad_norm": 0.032958984375,
+      "learning_rate": 0.0001,
+      "loss": 0.062,
+      "step": 130
+    },
+    {
+      "epoch": 0.85680285600952,
+      "grad_norm": 0.03173828125,
+      "learning_rate": 0.0001,
+      "loss": 0.0526,
+      "step": 135
+    },
+    {
+      "epoch": 0.8885362951209838,
+      "grad_norm": 0.02392578125,
+      "learning_rate": 0.0001,
+      "loss": 0.0382,
+      "step": 140
+    },
+    {
+      "epoch": 0.9202697342324474,
+      "grad_norm": 0.027099609375,
+      "learning_rate": 0.0001,
+      "loss": 0.027,
+      "step": 145
+    },
+    {
+      "epoch": 0.9520031733439112,
+      "grad_norm": 0.02294921875,
+      "learning_rate": 0.0001,
+      "loss": 0.0115,
+      "step": 150
+    },
+    {
+      "epoch": 0.9837366124553748,
+      "grad_norm": 0.02099609375,
+      "learning_rate": 0.0001,
+      "loss": 0.005,
+      "step": 155
+    },
+    {
+      "epoch": 1.0154700515668384,
+      "grad_norm": 0.0703125,
+      "learning_rate": 0.0001,
+      "loss": 0.1291,
+      "step": 160
+    },
+    {
+      "epoch": 1.0472034906783023,
+      "grad_norm": 0.04052734375,
+      "learning_rate": 0.0001,
+      "loss": 0.1033,
+      "step": 165
+    },
+    {
+      "epoch": 1.078936929789766,
+      "grad_norm": 0.03173828125,
+      "learning_rate": 0.0001,
+      "loss": 0.0539,
+      "step": 170
+    },
+    {
+      "epoch": 1.1106703689012296,
+      "grad_norm": 0.0299072265625,
+      "learning_rate": 0.0001,
+      "loss": 0.043,
+      "step": 175
+    },
+    {
+      "epoch": 1.1424038080126935,
+      "grad_norm": 0.0262451171875,
+      "learning_rate": 0.0001,
+      "loss": 0.0303,
+      "step": 180
+    },
+    {
+      "epoch": 1.1741372471241571,
+      "grad_norm": 0.060791015625,
+      "learning_rate": 0.0001,
+      "loss": 0.0239,
+      "step": 185
+    },
+    {
+      "epoch": 1.2058706862356208,
+      "grad_norm": 0.015625,
+      "learning_rate": 0.0001,
+      "loss": 0.0095,
+      "step": 190
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 190,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 90,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.216645538039931e+18,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

codellama/c/dataflow_c_pretrained/{checkpoint-475 → checkpoint-190}/training_args.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0c2563f751da0f955348ed5d2c3112b7092683a85415d6f8758379982f01f992
 size 7416

 version https://git-lfs.github.com/spec/v1
+oid sha256:91f41d6ab0b3b00576cfaae17b2c89c881cde6b3ddf94e79209bf6c926c2f26a
 size 7416

codellama/c/dataflow_c_pretrained/metrics.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"run_name": "~~dataflow_c_pretrained~~", "train_runtime": ~~39384~~.~~0084~~, "train_samples_per_second": 0.~~772~~, "train_steps_per_second": 0.~~012~~, "total_flos": 1.~~4535297138363187e~~+18, "train_loss": 0.~~11740684490454824~~, "epoch": 1.~~5076373735369968~~}


1	+ {"run_name": "dataflow_c", "train_runtime": 37043.3755, "train_samples_per_second": 0.657, "train_steps_per_second": 0.005, "total_flos": 1.216645538039931e+18, "train_loss": 0.10745409297707834, "epoch": 1.2058706862356208}

codellama/c/dataflow_c_pretrained/train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 1.5076373735369968,
-    "total_flos": 1.4535297138363187e+18,
-    "train_loss": 0.11740684490454824,
-    "train_runtime": 39384.0084,
-    "train_samples_per_second": 0.772,
-    "train_steps_per_second": 0.012
 }

 {
+    "epoch": 1.2058706862356208,
+    "total_flos": 1.216645538039931e+18,
+    "train_loss": 0.10745409297707834,
+    "train_runtime": 37043.3755,
+    "train_samples_per_second": 0.657,
+    "train_steps_per_second": 0.005
 }

codellama/c/dataflow_c_pretrained/trainer_state.json CHANGED Viewed

@@ -1,690 +1,291 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.5076373735369968,
   "eval_steps": 500,
-  "global_step": 475,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.015869867089863123,
-      "grad_norm": 0.058837890625,
       "learning_rate": 0.0001,
-      "loss": 0.769,
       "step": 5
     },
     {
-      "epoch": 0.031739734179726246,
       "grad_norm": 0.11572265625,
       "learning_rate": 0.0001,
-      "loss": 0.615,
       "step": 10
     },
     {
-      "epoch": 0.047609601269589366,
-      "grad_norm": 0.0634765625,
       "learning_rate": 0.0001,
-      "loss": 0.3973,
       "step": 15
     },
     {
-      "epoch": 0.06347946835945249,
-      "grad_norm": 0.07470703125,
       "learning_rate": 0.0001,
-      "loss": 0.2804,
       "step": 20
     },
     {
-      "epoch": 0.0793493354493156,
-      "grad_norm": 0.06884765625,
       "learning_rate": 0.0001,
-      "loss": 0.2244,
       "step": 25
     },
     {
-      "epoch": 0.09521920253917873,
-      "grad_norm": 0.10498046875,
       "learning_rate": 0.0001,
-      "loss": 0.1925,
       "step": 30
     },
     {
-      "epoch": 0.11108906962904186,
-      "grad_norm": 0.08056640625,
       "learning_rate": 0.0001,
-      "loss": 0.1477,
       "step": 35
     },
     {
-      "epoch": 0.12695893671890499,
-      "grad_norm": 0.0732421875,
       "learning_rate": 0.0001,
-      "loss": 0.0969,
       "step": 40
     },
     {
-      "epoch": 0.1428288038087681,
-      "grad_norm": 0.07568359375,
       "learning_rate": 0.0001,
-      "loss": 0.0695,
       "step": 45
     },
     {
-      "epoch": 0.1586986708986312,
-      "grad_norm": 0.125,
       "learning_rate": 0.0001,
-      "loss": 0.046,
       "step": 50
     },
     {
-      "epoch": 0.17456853798849434,
-      "grad_norm": 0.0859375,
       "learning_rate": 0.0001,
-      "loss": 0.4702,
       "step": 55
     },
     {
-      "epoch": 0.19043840507835746,
-      "grad_norm": 0.06787109375,
       "learning_rate": 0.0001,
-      "loss": 0.2393,
       "step": 60
     },
     {
-      "epoch": 0.2063082721682206,
-      "grad_norm": 0.045166015625,
       "learning_rate": 0.0001,
-      "loss": 0.1604,
       "step": 65
     },
     {
-      "epoch": 0.22217813925808372,
-      "grad_norm": 0.04931640625,
       "learning_rate": 0.0001,
-      "loss": 0.1499,
       "step": 70
     },
     {
-      "epoch": 0.23804800634794684,
-      "grad_norm": 0.041748046875,
       "learning_rate": 0.0001,
-      "loss": 0.123,
       "step": 75
     },
     {
-      "epoch": 0.25391787343780997,
-      "grad_norm": 0.042236328125,
       "learning_rate": 0.0001,
-      "loss": 0.1056,
       "step": 80
     },
     {
-      "epoch": 0.26978774052767307,
-      "grad_norm": 0.049560546875,
       "learning_rate": 0.0001,
-      "loss": 0.0801,
       "step": 85
     },
     {
-      "epoch": 0.2856576076175362,
-      "grad_norm": 0.043212890625,
       "learning_rate": 0.0001,
-      "loss": 0.0617,
       "step": 90
     },
     {
-      "epoch": 0.3015274747073993,
-      "grad_norm": 0.037109375,
       "learning_rate": 0.0001,
-      "loss": 0.0423,
       "step": 95
     },
     {
-      "epoch": 0.3173973417972624,
-      "grad_norm": 0.028564453125,
       "learning_rate": 0.0001,
-      "loss": 0.0295,
       "step": 100
     },
     {
-      "epoch": 0.3332672088871256,
-      "grad_norm": 0.0634765625,
       "learning_rate": 0.0001,
-      "loss": 0.3494,
       "step": 105
     },
     {
-      "epoch": 0.3491370759769887,
-      "grad_norm": 0.07958984375,
       "learning_rate": 0.0001,
-      "loss": 0.1779,
       "step": 110
     },
     {
-      "epoch": 0.36500694306685183,
-      "grad_norm": 0.040283203125,
       "learning_rate": 0.0001,
-      "loss": 0.1283,
       "step": 115
     },
     {
-      "epoch": 0.38087681015671493,
-      "grad_norm": 0.038818359375,
       "learning_rate": 0.0001,
-      "loss": 0.111,
       "step": 120
     },
     {
-      "epoch": 0.3967466772465781,
-      "grad_norm": 0.048095703125,
       "learning_rate": 0.0001,
-      "loss": 0.0945,
       "step": 125
     },
     {
-      "epoch": 0.4126165443364412,
-      "grad_norm": 0.06103515625,
       "learning_rate": 0.0001,
-      "loss": 0.0833,
       "step": 130
     },
     {
-      "epoch": 0.4284864114263043,
-      "grad_norm": 0.05859375,
       "learning_rate": 0.0001,
-      "loss": 0.0702,
       "step": 135
     },
     {
-      "epoch": 0.44435627851616744,
-      "grad_norm": 0.060302734375,
       "learning_rate": 0.0001,
-      "loss": 0.0509,
       "step": 140
     },
     {
-      "epoch": 0.46022614560603053,
-      "grad_norm": 0.042724609375,
       "learning_rate": 0.0001,
-      "loss": 0.0363,
       "step": 145
     },
     {
-      "epoch": 0.4760960126958937,
-      "grad_norm": 0.048583984375,
       "learning_rate": 0.0001,
-      "loss": 0.0225,
       "step": 150
     },
     {
-      "epoch": 0.4919658797857568,
-      "grad_norm": 0.056396484375,
       "learning_rate": 0.0001,
-      "loss": 0.3315,
       "step": 155
     },
     {
-      "epoch": 0.5078357468756199,
-      "grad_norm": 0.0478515625,
       "learning_rate": 0.0001,
-      "loss": 0.1585,
       "step": 160
     },
     {
-      "epoch": 0.523705613965483,
-      "grad_norm": 0.07177734375,
       "learning_rate": 0.0001,
-      "loss": 0.1173,
       "step": 165
     },
     {
-      "epoch": 0.5395754810553461,
-      "grad_norm": 0.050537109375,
       "learning_rate": 0.0001,
-      "loss": 0.1054,
       "step": 170
     },
     {
-      "epoch": 0.5554453481452093,
-      "grad_norm": 0.052734375,
       "learning_rate": 0.0001,
-      "loss": 0.0828,
       "step": 175
     },
     {
-      "epoch": 0.5713152152350724,
-      "grad_norm": 0.05126953125,
       "learning_rate": 0.0001,
-      "loss": 0.0778,
       "step": 180
     },
     {
-      "epoch": 0.5871850823249355,
-      "grad_norm": 0.034423828125,
       "learning_rate": 0.0001,
-      "loss": 0.0632,
       "step": 185
     },
     {
-      "epoch": 0.6030549494147986,
-      "grad_norm": 0.038330078125,
       "learning_rate": 0.0001,
-      "loss": 0.042,
       "step": 190
     },
     {
-      "epoch": 0.6189248165046618,
-      "grad_norm": 0.0400390625,
-      "learning_rate": 0.0001,
-      "loss": 0.0315,
-      "step": 195
-    },
-    {
-      "epoch": 0.6347946835945248,
-      "grad_norm": 0.08642578125,
-      "learning_rate": 0.0001,
-      "loss": 0.0195,
-      "step": 200
-    },
-    {
-      "epoch": 0.650664550684388,
-      "grad_norm": 0.07080078125,
-      "learning_rate": 0.0001,
-      "loss": 0.3038,
-      "step": 205
-    },
-    {
-      "epoch": 0.6665344177742512,
-      "grad_norm": 0.0556640625,
-      "learning_rate": 0.0001,
-      "loss": 0.1574,
-      "step": 210
-    },
-    {
-      "epoch": 0.6824042848641143,
-      "grad_norm": 0.054443359375,
-      "learning_rate": 0.0001,
-      "loss": 0.1049,
-      "step": 215
-    },
-    {
-      "epoch": 0.6982741519539774,
-      "grad_norm": 0.052490234375,
-      "learning_rate": 0.0001,
-      "loss": 0.0955,
-      "step": 220
-    },
-    {
-      "epoch": 0.7141440190438405,
-      "grad_norm": 0.046630859375,
-      "learning_rate": 0.0001,
-      "loss": 0.0767,
-      "step": 225
-    },
-    {
-      "epoch": 0.7300138861337037,
-      "grad_norm": 0.052978515625,
-      "learning_rate": 0.0001,
-      "loss": 0.0636,
-      "step": 230
-    },
-    {
-      "epoch": 0.7458837532235667,
-      "grad_norm": 0.0546875,
-      "learning_rate": 0.0001,
-      "loss": 0.0584,
-      "step": 235
-    },
-    {
-      "epoch": 0.7617536203134299,
-      "grad_norm": 0.0546875,
-      "learning_rate": 0.0001,
-      "loss": 0.0368,
-      "step": 240
-    },
-    {
-      "epoch": 0.777623487403293,
-      "grad_norm": 0.035400390625,
-      "learning_rate": 0.0001,
-      "loss": 0.0268,
-      "step": 245
-    },
-    {
-      "epoch": 0.7934933544931562,
-      "grad_norm": 0.03564453125,
-      "learning_rate": 0.0001,
-      "loss": 0.0197,
-      "step": 250
-    },
-    {
-      "epoch": 0.8093632215830192,
-      "grad_norm": 0.0673828125,
-      "learning_rate": 0.0001,
-      "loss": 0.264,
-      "step": 255
-    },
-    {
-      "epoch": 0.8252330886728824,
-      "grad_norm": 0.050048828125,
-      "learning_rate": 0.0001,
-      "loss": 0.1382,
-      "step": 260
-    },
-    {
-      "epoch": 0.8411029557627455,
-      "grad_norm": 0.053955078125,
-      "learning_rate": 0.0001,
-      "loss": 0.0959,
-      "step": 265
-    },
-    {
-      "epoch": 0.8569728228526086,
-      "grad_norm": 0.055908203125,
-      "learning_rate": 0.0001,
-      "loss": 0.0986,
-      "step": 270
-    },
-    {
-      "epoch": 0.8728426899424717,
-      "grad_norm": 0.05322265625,
-      "learning_rate": 0.0001,
-      "loss": 0.0806,
-      "step": 275
-    },
-    {
-      "epoch": 0.8887125570323349,
-      "grad_norm": 0.037109375,
-      "learning_rate": 0.0001,
-      "loss": 0.0627,
-      "step": 280
-    },
-    {
-      "epoch": 0.904582424122198,
-      "grad_norm": 0.035888671875,
-      "learning_rate": 0.0001,
-      "loss": 0.0488,
-      "step": 285
-    },
-    {
-      "epoch": 0.9204522912120611,
-      "grad_norm": 0.049072265625,
-      "learning_rate": 0.0001,
-      "loss": 0.0334,
-      "step": 290
-    },
-    {
-      "epoch": 0.9363221583019242,
-      "grad_norm": 0.042236328125,
-      "learning_rate": 0.0001,
-      "loss": 0.0259,
-      "step": 295
-    },
-    {
-      "epoch": 0.9521920253917874,
-      "grad_norm": 0.02490234375,
-      "learning_rate": 0.0001,
-      "loss": 0.0168,
-      "step": 300
-    },
-    {
-      "epoch": 0.9680618924816504,
-      "grad_norm": 0.07080078125,
-      "learning_rate": 0.0001,
-      "loss": 0.1856,
-      "step": 305
-    },
-    {
-      "epoch": 0.9839317595715136,
-      "grad_norm": 0.09814453125,
-      "learning_rate": 0.0001,
-      "loss": 0.0806,
-      "step": 310
-    },
-    {
-      "epoch": 0.9998016266613767,
-      "grad_norm": 0.0380859375,
-      "learning_rate": 0.0001,
-      "loss": 0.0309,
-      "step": 315
-    },
-    {
-      "epoch": 1.0156714937512399,
-      "grad_norm": 0.07373046875,
-      "learning_rate": 0.0001,
-      "loss": 0.2891,
-      "step": 320
-    },
-    {
-      "epoch": 1.031541360841103,
-      "grad_norm": 0.06982421875,
-      "learning_rate": 0.0001,
-      "loss": 0.1519,
-      "step": 325
-    },
-    {
-      "epoch": 1.047411227930966,
-      "grad_norm": 0.048095703125,
-      "learning_rate": 0.0001,
-      "loss": 0.094,
-      "step": 330
-    },
-    {
-      "epoch": 1.0632810950208291,
-      "grad_norm": 0.051513671875,
-      "learning_rate": 0.0001,
-      "loss": 0.0843,
-      "step": 335
-    },
-    {
-      "epoch": 1.0791509621106923,
-      "grad_norm": 0.0517578125,
-      "learning_rate": 0.0001,
-      "loss": 0.0695,
-      "step": 340
-    },
-    {
-      "epoch": 1.0950208292005554,
-      "grad_norm": 0.04931640625,
-      "learning_rate": 0.0001,
-      "loss": 0.0586,
-      "step": 345
-    },
-    {
-      "epoch": 1.1108906962904186,
-      "grad_norm": 0.06201171875,
-      "learning_rate": 0.0001,
-      "loss": 0.0493,
-      "step": 350
-    },
-    {
-      "epoch": 1.1267605633802817,
-      "grad_norm": 0.0272216796875,
-      "learning_rate": 0.0001,
-      "loss": 0.0278,
-      "step": 355
-    },
-    {
-      "epoch": 1.142630430470145,
-      "grad_norm": 0.05419921875,
-      "learning_rate": 0.0001,
-      "loss": 0.0219,
-      "step": 360
-    },
-    {
-      "epoch": 1.1585002975600078,
-      "grad_norm": 0.07177734375,
-      "learning_rate": 0.0001,
-      "loss": 0.015,
-      "step": 365
-    },
-    {
-      "epoch": 1.174370164649871,
-      "grad_norm": 0.09521484375,
-      "learning_rate": 0.0001,
-      "loss": 0.2371,
-      "step": 370
-    },
-    {
-      "epoch": 1.1902400317397341,
-      "grad_norm": 0.060791015625,
-      "learning_rate": 0.0001,
-      "loss": 0.118,
-      "step": 375
-    },
-    {
-      "epoch": 1.2061098988295973,
-      "grad_norm": 0.059814453125,
-      "learning_rate": 0.0001,
-      "loss": 0.0904,
-      "step": 380
-    },
-    {
-      "epoch": 1.2219797659194604,
-      "grad_norm": 0.051513671875,
-      "learning_rate": 0.0001,
-      "loss": 0.079,
-      "step": 385
-    },
-    {
-      "epoch": 1.2378496330093236,
-      "grad_norm": 0.05126953125,
-      "learning_rate": 0.0001,
-      "loss": 0.0618,
-      "step": 390
-    },
-    {
-      "epoch": 1.2537195000991868,
-      "grad_norm": 0.06982421875,
-      "learning_rate": 0.0001,
-      "loss": 0.0501,
-      "step": 395
-    },
-    {
-      "epoch": 1.2695893671890497,
-      "grad_norm": 0.046142578125,
-      "learning_rate": 0.0001,
-      "loss": 0.0404,
-      "step": 400
-    },
-    {
-      "epoch": 1.2854592342789128,
-      "grad_norm": 0.03564453125,
-      "learning_rate": 0.0001,
-      "loss": 0.0295,
-      "step": 405
-    },
-    {
-      "epoch": 1.301329101368776,
-      "grad_norm": 0.0341796875,
-      "learning_rate": 0.0001,
-      "loss": 0.0185,
-      "step": 410
-    },
-    {
-      "epoch": 1.3171989684586392,
-      "grad_norm": 0.0286865234375,
-      "learning_rate": 0.0001,
-      "loss": 0.0123,
-      "step": 415
-    },
-    {
-      "epoch": 1.3330688355485023,
-      "grad_norm": 0.054931640625,
-      "learning_rate": 0.0001,
-      "loss": 0.2018,
-      "step": 420
-    },
-    {
-      "epoch": 1.3489387026383655,
-      "grad_norm": 0.060302734375,
-      "learning_rate": 0.0001,
-      "loss": 0.1189,
-      "step": 425
-    },
-    {
-      "epoch": 1.3648085697282286,
-      "grad_norm": 0.046630859375,
-      "learning_rate": 0.0001,
-      "loss": 0.0821,
-      "step": 430
-    },
-    {
-      "epoch": 1.3806784368180915,
-      "grad_norm": 0.0576171875,
-      "learning_rate": 0.0001,
-      "loss": 0.0759,
-      "step": 435
-    },
-    {
-      "epoch": 1.3965483039079547,
-      "grad_norm": 0.058349609375,
-      "learning_rate": 0.0001,
-      "loss": 0.0567,
-      "step": 440
-    },
-    {
-      "epoch": 1.4124181709978179,
-      "grad_norm": 0.05908203125,
-      "learning_rate": 0.0001,
-      "loss": 0.0435,
-      "step": 445
-    },
-    {
-      "epoch": 1.428288038087681,
-      "grad_norm": 0.054443359375,
-      "learning_rate": 0.0001,
-      "loss": 0.0414,
-      "step": 450
-    },
-    {
-      "epoch": 1.4441579051775442,
-      "grad_norm": 0.036376953125,
-      "learning_rate": 0.0001,
-      "loss": 0.0283,
-      "step": 455
-    },
-    {
-      "epoch": 1.4600277722674073,
-      "grad_norm": 0.142578125,
-      "learning_rate": 0.0001,
-      "loss": 0.0206,
-      "step": 460
-    },
-    {
-      "epoch": 1.4758976393572705,
-      "grad_norm": 0.044677734375,
-      "learning_rate": 0.0001,
-      "loss": 0.0129,
-      "step": 465
-    },
-    {
-      "epoch": 1.4917675064471334,
-      "grad_norm": 0.07275390625,
-      "learning_rate": 0.0001,
-      "loss": 0.2036,
-      "step": 470
-    },
-    {
-      "epoch": 1.5076373735369968,
-      "grad_norm": 0.052490234375,
-      "learning_rate": 0.0001,
-      "loss": 0.1093,
-      "step": 475
-    },
-    {
-      "epoch": 1.5076373735369968,
-      "step": 475,
-      "total_flos": 1.4535297138363187e+18,
-      "train_loss": 0.11740684490454824,
-      "train_runtime": 39384.0084,
-      "train_samples_per_second": 0.772,
-      "train_steps_per_second": 0.012
     }
   ],
   "logging_steps": 5,
-  "max_steps": 475,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 2,
   "save_steps": 90,
@@ -700,8 +301,8 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.4535297138363187e+18,
-  "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.2058706862356208,
   "eval_steps": 500,
+  "global_step": 190,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.0317334391114637,
+      "grad_norm": 0.060546875,
       "learning_rate": 0.0001,
+      "loss": 0.6421,
       "step": 5
     },
     {
+      "epoch": 0.0634668782229274,
       "grad_norm": 0.11572265625,
       "learning_rate": 0.0001,
+      "loss": 0.5213,
       "step": 10
     },
     {
+      "epoch": 0.09520031733439112,
+      "grad_norm": 0.08251953125,
       "learning_rate": 0.0001,
+      "loss": 0.2925,
       "step": 15
     },
     {
+      "epoch": 0.1269337564458548,
+      "grad_norm": 0.0634765625,
       "learning_rate": 0.0001,
+      "loss": 0.1978,
       "step": 20
     },
     {
+      "epoch": 0.15866719555731854,
+      "grad_norm": 0.08251953125,
       "learning_rate": 0.0001,
+      "loss": 0.1538,
       "step": 25
     },
     {
+      "epoch": 0.19040063466878224,
+      "grad_norm": 0.10888671875,
       "learning_rate": 0.0001,
+      "loss": 0.106,
       "step": 30
     },
     {
+      "epoch": 0.22213407378024594,
+      "grad_norm": 0.049560546875,
       "learning_rate": 0.0001,
+      "loss": 0.0454,
       "step": 35
     },
     {
+      "epoch": 0.2538675128917096,
+      "grad_norm": 0.310546875,
       "learning_rate": 0.0001,
+      "loss": 0.1215,
       "step": 40
     },
     {
+      "epoch": 0.28560095200317337,
+      "grad_norm": 0.06494140625,
       "learning_rate": 0.0001,
+      "loss": 0.2476,
       "step": 45
     },
     {
+      "epoch": 0.31733439111463707,
+      "grad_norm": 0.40234375,
       "learning_rate": 0.0001,
+      "loss": 0.1073,
       "step": 50
     },
     {
+      "epoch": 0.3490678302261008,
+      "grad_norm": 0.04052734375,
       "learning_rate": 0.0001,
+      "loss": 0.0863,
       "step": 55
     },
     {
+      "epoch": 0.3808012693375645,
+      "grad_norm": 0.03369140625,
       "learning_rate": 0.0001,
+      "loss": 0.0671,
       "step": 60
     },
     {
+      "epoch": 0.4125347084490282,
+      "grad_norm": 0.0274658203125,
       "learning_rate": 0.0001,
+      "loss": 0.0493,
       "step": 65
     },
     {
+      "epoch": 0.4442681475604919,
+      "grad_norm": 0.0277099609375,
       "learning_rate": 0.0001,
+      "loss": 0.0311,
       "step": 70
     },
     {
+      "epoch": 0.4760015866719556,
+      "grad_norm": 0.01275634765625,
       "learning_rate": 0.0001,
+      "loss": 0.0125,
       "step": 75
     },
     {
+      "epoch": 0.5077350257834192,
+      "grad_norm": 0.06787109375,
       "learning_rate": 0.0001,
+      "loss": 0.1307,
       "step": 80
     },
     {
+      "epoch": 0.539468464894883,
+      "grad_norm": 0.050048828125,
       "learning_rate": 0.0001,
+      "loss": 0.171,
       "step": 85
     },
     {
+      "epoch": 0.5712019040063467,
+      "grad_norm": 0.060791015625,
       "learning_rate": 0.0001,
+      "loss": 0.0818,
       "step": 90
     },
     {
+      "epoch": 0.6029353431178104,
+      "grad_norm": 0.033203125,
       "learning_rate": 0.0001,
+      "loss": 0.0658,
       "step": 95
     },
     {
+      "epoch": 0.6346687822292741,
+      "grad_norm": 0.0235595703125,
       "learning_rate": 0.0001,
+      "loss": 0.046,
       "step": 100
     },
     {
+      "epoch": 0.6664022213407378,
+      "grad_norm": 0.0299072265625,
       "learning_rate": 0.0001,
+      "loss": 0.0384,
       "step": 105
     },
     {
+      "epoch": 0.6981356604522015,
+      "grad_norm": 0.0181884765625,
       "learning_rate": 0.0001,
+      "loss": 0.0187,
       "step": 110
     },
     {
+      "epoch": 0.7298690995636652,
+      "grad_norm": 0.019775390625,
       "learning_rate": 0.0001,
+      "loss": 0.0095,
       "step": 115
     },
     {
+      "epoch": 0.761602538675129,
+      "grad_norm": 0.060791015625,
       "learning_rate": 0.0001,
+      "loss": 0.1381,
       "step": 120
     },
     {
+      "epoch": 0.7933359777865926,
+      "grad_norm": 0.038818359375,
       "learning_rate": 0.0001,
+      "loss": 0.1125,
       "step": 125
     },
     {
+      "epoch": 0.8250694168980564,
+      "grad_norm": 0.032958984375,
       "learning_rate": 0.0001,
+      "loss": 0.062,
       "step": 130
     },
     {
+      "epoch": 0.85680285600952,
+      "grad_norm": 0.03173828125,
       "learning_rate": 0.0001,
+      "loss": 0.0526,
       "step": 135
     },
     {
+      "epoch": 0.8885362951209838,
+      "grad_norm": 0.02392578125,
       "learning_rate": 0.0001,
+      "loss": 0.0382,
       "step": 140
     },
     {
+      "epoch": 0.9202697342324474,
+      "grad_norm": 0.027099609375,
       "learning_rate": 0.0001,
+      "loss": 0.027,
       "step": 145
     },
     {
+      "epoch": 0.9520031733439112,
+      "grad_norm": 0.02294921875,
       "learning_rate": 0.0001,
+      "loss": 0.0115,
       "step": 150
     },
     {
+      "epoch": 0.9837366124553748,
+      "grad_norm": 0.02099609375,
       "learning_rate": 0.0001,
+      "loss": 0.005,
       "step": 155
     },
     {
+      "epoch": 1.0154700515668384,
+      "grad_norm": 0.0703125,
       "learning_rate": 0.0001,
+      "loss": 0.1291,
       "step": 160
     },
     {
+      "epoch": 1.0472034906783023,
+      "grad_norm": 0.04052734375,
       "learning_rate": 0.0001,
+      "loss": 0.1033,
       "step": 165
     },
     {
+      "epoch": 1.078936929789766,
+      "grad_norm": 0.03173828125,
       "learning_rate": 0.0001,
+      "loss": 0.0539,
       "step": 170
     },
     {
+      "epoch": 1.1106703689012296,
+      "grad_norm": 0.0299072265625,
       "learning_rate": 0.0001,
+      "loss": 0.043,
       "step": 175
     },
     {
+      "epoch": 1.1424038080126935,
+      "grad_norm": 0.0262451171875,
       "learning_rate": 0.0001,
+      "loss": 0.0303,
       "step": 180
     },
     {
+      "epoch": 1.1741372471241571,
+      "grad_norm": 0.060791015625,
       "learning_rate": 0.0001,
+      "loss": 0.0239,
       "step": 185
     },
     {
+      "epoch": 1.2058706862356208,
+      "grad_norm": 0.015625,
       "learning_rate": 0.0001,
+      "loss": 0.0095,
       "step": 190
     },
     {
+      "epoch": 1.2058706862356208,
+      "step": 190,
+      "total_flos": 1.216645538039931e+18,
+      "train_loss": 0.10745409297707834,
+      "train_runtime": 37043.3755,
+      "train_samples_per_second": 0.657,
+      "train_steps_per_second": 0.005
     }
   ],
   "logging_steps": 5,
+  "max_steps": 190,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 2,
   "save_steps": 90,
       "attributes": {}
     }
   },
+  "total_flos": 1.216645538039931e+18,
+  "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null
 }

codellama/c/dmcodegen/dmcodegen_base_c/all_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 2.0779220779220777,
+    "total_flos": 1.8562430640540058e+18,
+    "train_loss": 0.45838437411520216,
+    "train_runtime": 56927.0701,
+    "train_samples_per_second": 0.405,
+    "train_steps_per_second": 0.003
+}

codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: ../CodeLlama-13b-Instruct-hf/
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.13.2

codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../CodeLlama-13b-Instruct-hf/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "up_proj",
+    "down_proj",
+    "k_proj",
+    "gate_proj",
+    "v_proj",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

codellama/{java/dataflow_pretrained/checkpoint-720/adapter_model → c/dmcodegen/dmcodegen_base_c/checkpoint-180}/adapter_model.safetensors RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3cf8f1c4cc300ca5094e08295cc0dcffacce527b464e1372de75271bb4d522a9
 size 1156480200

 version https://git-lfs.github.com/spec/v1
+oid sha256:4018337dfdcbc4ca01ac822efa32f00bdad5c13cd3bd3d5c8308564380b060be
 size 1156480200

codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/adapter_model/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: ../CodeLlama-13b-Instruct-hf/
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.13.2

codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/adapter_model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../CodeLlama-13b-Instruct-hf/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "up_proj",
+    "down_proj",
+    "k_proj",
+    "gate_proj",
+    "v_proj",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

codellama/c/{dataflow_c_pretrained/checkpoint-475 → dmcodegen/dmcodegen_base_c/checkpoint-180}/adapter_model/adapter_model.safetensors RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9e3bd7cb053c3e00ea48ed365eed4b65ae5e2d7d807e71ec5615d765dfba19de
 size 1156480200

 version https://git-lfs.github.com/spec/v1
+oid sha256:4018337dfdcbc4ca01ac822efa32f00bdad5c13cd3bd3d5c8308564380b060be
 size 1156480200

codellama/{java/dataflow_pretrained/checkpoint-720 → c/dmcodegen/dmcodegen_base_c/checkpoint-180}/added_tokens.json RENAMED Viewed

File without changes

codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5c311f9837fead460249a52390564c15ad9137ede61358886a6cf8ab7c563896
+size 2003126962

codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:09d9a2f5e5c671e23fbed743832c2d77a42f5fdb0981bfd74289171a6b58bdb8
+size 14244

codellama/c/{dataflow_c_pretrained/checkpoint-475 → dmcodegen/dmcodegen_base_c/checkpoint-180}/scheduler.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1219a1788d5d094f428228d99e4982dc061bcd85dea2cf1e1ca0c7a969573be6
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:244453cd6aad26ed6e8f9d969778193b9354089d8336fe58bfb91c089a53bf6f
 size 1064

codellama/{java/dataflow_pretrained/checkpoint-720 → c/dmcodegen/dmcodegen_base_c/checkpoint-180}/special_tokens_map.json RENAMED Viewed

File without changes

codellama/{java/dataflow_pretrained/checkpoint-720 → c/dmcodegen/dmcodegen_base_c/checkpoint-180}/tokenizer.model RENAMED Viewed

File without changes

codellama/{java/dataflow_pretrained/checkpoint-720 → c/dmcodegen/dmcodegen_base_c/checkpoint-180}/tokenizer_config.json RENAMED Viewed

File without changes

codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-180/trainer_state.json ADDED Viewed

	@@ -0,0 +1,285 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0779220779220777,
+  "eval_steps": 500,
+  "global_step": 180,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.05772005772005772,
+      "grad_norm": 0.018310546875,
+      "learning_rate": 0.0001,
+      "loss": 0.5558,
+      "step": 5
+    },
+    {
+      "epoch": 0.11544011544011544,
+      "grad_norm": 0.01544189453125,
+      "learning_rate": 0.0001,
+      "loss": 0.4953,
+      "step": 10
+    },
+    {
+      "epoch": 0.17316017316017315,
+      "grad_norm": 0.01611328125,
+      "learning_rate": 0.0001,
+      "loss": 0.4465,
+      "step": 15
+    },
+    {
+      "epoch": 0.23088023088023088,
+      "grad_norm": 0.0186767578125,
+      "learning_rate": 0.0001,
+      "loss": 0.419,
+      "step": 20
+    },
+    {
+      "epoch": 0.2886002886002886,
+      "grad_norm": 0.022705078125,
+      "learning_rate": 0.0001,
+      "loss": 0.5274,
+      "step": 25
+    },
+    {
+      "epoch": 0.3463203463203463,
+      "grad_norm": 0.020751953125,
+      "learning_rate": 0.0001,
+      "loss": 0.5132,
+      "step": 30
+    },
+    {
+      "epoch": 0.40404040404040403,
+      "grad_norm": 0.0174560546875,
+      "learning_rate": 0.0001,
+      "loss": 0.4466,
+      "step": 35
+    },
+    {
+      "epoch": 0.46176046176046176,
+      "grad_norm": 0.01904296875,
+      "learning_rate": 0.0001,
+      "loss": 0.4116,
+      "step": 40
+    },
+    {
+      "epoch": 0.5194805194805194,
+      "grad_norm": 0.026611328125,
+      "learning_rate": 0.0001,
+      "loss": 0.4913,
+      "step": 45
+    },
+    {
+      "epoch": 0.5772005772005772,
+      "grad_norm": 0.0208740234375,
+      "learning_rate": 0.0001,
+      "loss": 0.4982,
+      "step": 50
+    },
+    {
+      "epoch": 0.6349206349206349,
+      "grad_norm": 0.021484375,
+      "learning_rate": 0.0001,
+      "loss": 0.4586,
+      "step": 55
+    },
+    {
+      "epoch": 0.6926406926406926,
+      "grad_norm": 0.0191650390625,
+      "learning_rate": 0.0001,
+      "loss": 0.4132,
+      "step": 60
+    },
+    {
+      "epoch": 0.7503607503607503,
+      "grad_norm": 0.03125,
+      "learning_rate": 0.0001,
+      "loss": 0.445,
+      "step": 65
+    },
+    {
+      "epoch": 0.8080808080808081,
+      "grad_norm": 0.0242919921875,
+      "learning_rate": 0.0001,
+      "loss": 0.5186,
+      "step": 70
+    },
+    {
+      "epoch": 0.8658008658008658,
+      "grad_norm": 0.022216796875,
+      "learning_rate": 0.0001,
+      "loss": 0.463,
+      "step": 75
+    },
+    {
+      "epoch": 0.9235209235209235,
+      "grad_norm": 0.02001953125,
+      "learning_rate": 0.0001,
+      "loss": 0.4233,
+      "step": 80
+    },
+    {
+      "epoch": 0.9812409812409812,
+      "grad_norm": 0.0299072265625,
+      "learning_rate": 0.0001,
+      "loss": 0.4396,
+      "step": 85
+    },
+    {
+      "epoch": 1.0389610389610389,
+      "grad_norm": 0.02685546875,
+      "learning_rate": 0.0001,
+      "loss": 0.4944,
+      "step": 90
+    },
+    {
+      "epoch": 1.0966810966810967,
+      "grad_norm": 0.0260009765625,
+      "learning_rate": 0.0001,
+      "loss": 0.4896,
+      "step": 95
+    },
+    {
+      "epoch": 1.1544011544011543,
+      "grad_norm": 0.0247802734375,
+      "learning_rate": 0.0001,
+      "loss": 0.4402,
+      "step": 100
+    },
+    {
+      "epoch": 1.2121212121212122,
+      "grad_norm": 0.0240478515625,
+      "learning_rate": 0.0001,
+      "loss": 0.3963,
+      "step": 105
+    },
+    {
+      "epoch": 1.2698412698412698,
+      "grad_norm": 0.037353515625,
+      "learning_rate": 0.0001,
+      "loss": 0.4535,
+      "step": 110
+    },
+    {
+      "epoch": 1.3275613275613276,
+      "grad_norm": 0.032470703125,
+      "learning_rate": 0.0001,
+      "loss": 0.5045,
+      "step": 115
+    },
+    {
+      "epoch": 1.3852813852813852,
+      "grad_norm": 0.0301513671875,
+      "learning_rate": 0.0001,
+      "loss": 0.4466,
+      "step": 120
+    },
+    {
+      "epoch": 1.443001443001443,
+      "grad_norm": 0.0244140625,
+      "learning_rate": 0.0001,
+      "loss": 0.4095,
+      "step": 125
+    },
+    {
+      "epoch": 1.5007215007215007,
+      "grad_norm": 0.046630859375,
+      "learning_rate": 0.0001,
+      "loss": 0.4346,
+      "step": 130
+    },
+    {
+      "epoch": 1.5584415584415585,
+      "grad_norm": 0.0299072265625,
+      "learning_rate": 0.0001,
+      "loss": 0.5046,
+      "step": 135
+    },
+    {
+      "epoch": 1.6161616161616161,
+      "grad_norm": 0.032958984375,
+      "learning_rate": 0.0001,
+      "loss": 0.4556,
+      "step": 140
+    },
+    {
+      "epoch": 1.6738816738816737,
+      "grad_norm": 0.0272216796875,
+      "learning_rate": 0.0001,
+      "loss": 0.4245,
+      "step": 145
+    },
+    {
+      "epoch": 1.7316017316017316,
+      "grad_norm": 0.036865234375,
+      "learning_rate": 0.0001,
+      "loss": 0.3834,
+      "step": 150
+    },
+    {
+      "epoch": 1.7893217893217894,
+      "grad_norm": 0.03662109375,
+      "learning_rate": 0.0001,
+      "loss": 0.5163,
+      "step": 155
+    },
+    {
+      "epoch": 1.847041847041847,
+      "grad_norm": 0.033935546875,
+      "learning_rate": 0.0001,
+      "loss": 0.4565,
+      "step": 160
+    },
+    {
+      "epoch": 1.9047619047619047,
+      "grad_norm": 0.02880859375,
+      "learning_rate": 0.0001,
+      "loss": 0.4164,
+      "step": 165
+    },
+    {
+      "epoch": 1.9624819624819625,
+      "grad_norm": 0.03271484375,
+      "learning_rate": 0.0001,
+      "loss": 0.3956,
+      "step": 170
+    },
+    {
+      "epoch": 2.0202020202020203,
+      "grad_norm": 0.041748046875,
+      "learning_rate": 0.0001,
+      "loss": 0.4453,
+      "step": 175
+    },
+    {
+      "epoch": 2.0779220779220777,
+      "grad_norm": 0.03857421875,
+      "learning_rate": 0.0001,
+      "loss": 0.4681,
+      "step": 180
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 180,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 180,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.8562430640540058e+18,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

codellama/{java/dataflow_pretrained/checkpoint-720 → c/dmcodegen/dmcodegen_base_c/checkpoint-180}/training_args.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:824c938bee04d46c16bd5438c177873620e56e36a6e51c3a35b2b80c6e87b25b
 size 7416

 version https://git-lfs.github.com/spec/v1
+oid sha256:34f792b2641cd9a5b1462d877d12107fd74f2ec203190ed5f66b658484d1e7b3
 size 7416

codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: ../CodeLlama-13b-Instruct-hf/
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.13.2

codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../CodeLlama-13b-Instruct-hf/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "up_proj",
+    "gate_proj",
+    "q_proj",
+    "o_proj",
+    "v_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b83cea6a8ba3abb411145bffb7c0c1f6cba9fd43721b41ecc8ed32b2c3e179fe
+size 1156480200

codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/adapter_model/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: ../CodeLlama-13b-Instruct-hf/
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.13.2

codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/adapter_model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../CodeLlama-13b-Instruct-hf/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "up_proj",
+    "gate_proj",
+    "q_proj",
+    "o_proj",
+    "v_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/adapter_model/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b83cea6a8ba3abb411145bffb7c0c1f6cba9fd43721b41ecc8ed32b2c3e179fe
+size 1156480200

codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "[PAD]": 32016
+}

codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:119891f1916a60aabb0c81ceee15f0c8178c275d5b6ef81c9059743549e9653d
+size 2003126962

codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b04d21a080b5e438b4b32adc506500b95e99b49e74f9d44a991ff92733e72054
+size 14244

codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5c7d5936e70e72bf0e3651da983818a5b36c8198eb19437975051ad543d68cc9
+size 1064

codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "additional_special_tokens": [
+    "▁<PRE>",
+    "▁<MID>",
+    "▁<SUF>",
+    "▁<EOT>"
+  ],
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6
+size 500058

codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,94 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32007": {
+      "content": "▁<PRE>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32008": {
+      "content": "▁<SUF>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32009": {
+      "content": "▁<MID>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32010": {
+      "content": "▁<EOT>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32016": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "▁<PRE>",
+    "▁<MID>",
+    "▁<SUF>",
+    "▁<EOT>"
+  ],
+  "bos_token": "<s>",
+  "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content | trim + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' '  + content | trim + ' ' + eos_token }}{% endif %}{% endfor %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "eot_token": "▁<EOT>",
+  "fill_token": "<FILL_ME>",
+  "legacy": null,
+  "middle_token": "▁<MID>",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "padding_side": "right",
+  "prefix_token": "▁<PRE>",
+  "sp_model_kwargs": {},
+  "suffix_first": false,
+  "suffix_token": "▁<SUF>",
+  "tokenizer_class": "CodeLlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/trainer_state.json ADDED Viewed

	@@ -0,0 +1,159 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0389610389610389,
+  "eval_steps": 500,
+  "global_step": 90,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.05772005772005772,
+      "grad_norm": 0.036376953125,
+      "learning_rate": 0.0001,
+      "loss": 0.7562,
+      "step": 5
+    },
+    {
+      "epoch": 0.11544011544011544,
+      "grad_norm": 0.0206298828125,
+      "learning_rate": 0.0001,
+      "loss": 0.5993,
+      "step": 10
+    },
+    {
+      "epoch": 0.17316017316017315,
+      "grad_norm": 0.0244140625,
+      "learning_rate": 0.0001,
+      "loss": 0.5209,
+      "step": 15
+    },
+    {
+      "epoch": 0.23088023088023088,
+      "grad_norm": 0.0673828125,
+      "learning_rate": 0.0001,
+      "loss": 0.5029,
+      "step": 20
+    },
+    {
+      "epoch": 0.2886002886002886,
+      "grad_norm": 0.04150390625,
+      "learning_rate": 0.0001,
+      "loss": 0.622,
+      "step": 25
+    },
+    {
+      "epoch": 0.3463203463203463,
+      "grad_norm": 0.0284423828125,
+      "learning_rate": 0.0001,
+      "loss": 0.5561,
+      "step": 30
+    },
+    {
+      "epoch": 0.40404040404040403,
+      "grad_norm": 0.0189208984375,
+      "learning_rate": 0.0001,
+      "loss": 0.4707,
+      "step": 35
+    },
+    {
+      "epoch": 0.46176046176046176,
+      "grad_norm": 0.019775390625,
+      "learning_rate": 0.0001,
+      "loss": 0.4331,
+      "step": 40
+    },
+    {
+      "epoch": 0.5194805194805194,
+      "grad_norm": 0.0233154296875,
+      "learning_rate": 0.0001,
+      "loss": 0.534,
+      "step": 45
+    },
+    {
+      "epoch": 0.5772005772005772,
+      "grad_norm": 0.0185546875,
+      "learning_rate": 0.0001,
+      "loss": 0.5314,
+      "step": 50
+    },
+    {
+      "epoch": 0.6349206349206349,
+      "grad_norm": 0.0169677734375,
+      "learning_rate": 0.0001,
+      "loss": 0.4802,
+      "step": 55
+    },
+    {
+      "epoch": 0.6926406926406926,
+      "grad_norm": 0.0172119140625,
+      "learning_rate": 0.0001,
+      "loss": 0.4332,
+      "step": 60
+    },
+    {
+      "epoch": 0.7503607503607503,
+      "grad_norm": 0.021728515625,
+      "learning_rate": 0.0001,
+      "loss": 0.4812,
+      "step": 65
+    },
+    {
+      "epoch": 0.8080808080808081,
+      "grad_norm": 0.0155029296875,
+      "learning_rate": 0.0001,
+      "loss": 0.5443,
+      "step": 70
+    },
+    {
+      "epoch": 0.8658008658008658,
+      "grad_norm": 0.0159912109375,
+      "learning_rate": 0.0001,
+      "loss": 0.4805,
+      "step": 75
+    },
+    {
+      "epoch": 0.9235209235209235,
+      "grad_norm": 0.0146484375,
+      "learning_rate": 0.0001,
+      "loss": 0.4377,
+      "step": 80
+    },
+    {
+      "epoch": 0.9812409812409812,
+      "grad_norm": 0.019775390625,
+      "learning_rate": 0.0001,
+      "loss": 0.4548,
+      "step": 85
+    },
+    {
+      "epoch": 1.0389610389610389,
+      "grad_norm": 0.018310546875,
+      "learning_rate": 0.0001,
+      "loss": 0.5309,
+      "step": 90
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 270,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 90,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 9.286644455131546e+17,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

codellama/c/dmcodegen/dmcodegen_base_c/checkpoint-90/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3b5defc64c4beb14d7155cd334736bdf2476d6f1da0691ee9be164112ac9f9c
+size 7416