Upload 13 files

Browse files

Files changed (13) hide show

README.md +202 -0
adapter_config.json +34 -0
adapter_model.safetensors +3 -0
added_tokens.json +13 -0
optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
special_tokens_map.json +30 -0
tokenizer.json +0 -0
tokenizer.model +3 -0
tokenizer_config.json +131 -0
trainer_state.json +733 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: unsloth/phi-3-medium-4k-instruct-bnb-4bit
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.12.0

adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "unsloth/phi-3-medium-4k-instruct-bnb-4bit",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "k_proj",
+    "down_proj",
+    "v_proj",
+    "gate_proj",
+    "up_proj",
+    "o_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ef4a48a9e3651fb2a33c268dc10ee9d8f3af662e1ee6d45b47ccac069082cc67
+size 262219392

added_tokens.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "<|assistant|>": 32001,
+  "<|endoftext|>": 32000,
+  "<|end|>": 32007,
+  "<|placeholder1|>": 32002,
+  "<|placeholder2|>": 32003,
+  "<|placeholder3|>": 32004,
+  "<|placeholder4|>": 32005,
+  "<|placeholder5|>": 32008,
+  "<|placeholder6|>": 32009,
+  "<|system|>": 32006,
+  "<|user|>": 32010
+}

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b4fc0a22aa273b7a6c52487b900acaf323d9ed7bb84fe5df71f1895fd46e08c1
+size 131942996

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c05a1bb48a74f4f427bfae4fabb389d710b55715e4838c289c5bc6a59c103e4c
+size 14244

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a60c7d771c1fd156acee762fba03c724cb41829a3f71df370ecd1d20b134982
+size 1064

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|placeholder6|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,131 @@

+{
+  "add_bos_token": false,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "32000": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32001": {
+      "content": "<|assistant|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32002": {
+      "content": "<|placeholder1|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32003": {
+      "content": "<|placeholder2|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32004": {
+      "content": "<|placeholder3|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32005": {
+      "content": "<|placeholder4|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32006": {
+      "content": "<|system|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32007": {
+      "content": "<|end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32008": {
+      "content": "<|placeholder5|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32009": {
+      "content": "<|placeholder6|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32010": {
+      "content": "<|user|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "legacy": false,
+  "model_max_length": 4096,
+  "pad_token": "<|placeholder6|>",
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,733 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 80.0,
+  "eval_steps": 500,
+  "global_step": 100,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.8,
+      "grad_norm": 0.19112393260002136,
+      "learning_rate": 2.857142857142857e-05,
+      "loss": 1.258,
+      "step": 1
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 0.22035102546215057,
+      "learning_rate": 5.714285714285714e-05,
+      "loss": 1.4862,
+      "step": 2
+    },
+    {
+      "epoch": 2.4,
+      "grad_norm": 0.1868797093629837,
+      "learning_rate": 8.571428571428571e-05,
+      "loss": 1.1615,
+      "step": 3
+    },
+    {
+      "epoch": 3.2,
+      "grad_norm": 0.21939030289649963,
+      "learning_rate": 0.00011428571428571428,
+      "loss": 1.2374,
+      "step": 4
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 0.300101637840271,
+      "learning_rate": 0.00014285714285714287,
+      "loss": 1.5311,
+      "step": 5
+    },
+    {
+      "epoch": 4.8,
+      "grad_norm": 0.36734649538993835,
+      "learning_rate": 0.00017142857142857143,
+      "loss": 1.3291,
+      "step": 6
+    },
+    {
+      "epoch": 5.6,
+      "grad_norm": 0.32768917083740234,
+      "learning_rate": 0.0002,
+      "loss": 1.1584,
+      "step": 7
+    },
+    {
+      "epoch": 6.4,
+      "grad_norm": 0.5160196423530579,
+      "learning_rate": 0.00019784946236559142,
+      "loss": 1.3315,
+      "step": 8
+    },
+    {
+      "epoch": 7.2,
+      "grad_norm": 0.21470555663108826,
+      "learning_rate": 0.0001956989247311828,
+      "loss": 0.7039,
+      "step": 9
+    },
+    {
+      "epoch": 8.0,
+      "grad_norm": 0.36922088265419006,
+      "learning_rate": 0.00019354838709677422,
+      "loss": 1.0535,
+      "step": 10
+    },
+    {
+      "epoch": 8.8,
+      "grad_norm": 0.34553757309913635,
+      "learning_rate": 0.0001913978494623656,
+      "loss": 0.7557,
+      "step": 11
+    },
+    {
+      "epoch": 9.6,
+      "grad_norm": 0.8218473792076111,
+      "learning_rate": 0.000189247311827957,
+      "loss": 0.915,
+      "step": 12
+    },
+    {
+      "epoch": 10.4,
+      "grad_norm": 0.19179506599903107,
+      "learning_rate": 0.0001870967741935484,
+      "loss": 0.4637,
+      "step": 13
+    },
+    {
+      "epoch": 11.2,
+      "grad_norm": 0.25152501463890076,
+      "learning_rate": 0.00018494623655913978,
+      "loss": 0.5699,
+      "step": 14
+    },
+    {
+      "epoch": 12.0,
+      "grad_norm": 0.2666938304901123,
+      "learning_rate": 0.0001827956989247312,
+      "loss": 0.5765,
+      "step": 15
+    },
+    {
+      "epoch": 12.8,
+      "grad_norm": 0.22220765054225922,
+      "learning_rate": 0.00018064516129032257,
+      "loss": 0.4589,
+      "step": 16
+    },
+    {
+      "epoch": 13.6,
+      "grad_norm": 0.22124633193016052,
+      "learning_rate": 0.00017849462365591398,
+      "loss": 0.5251,
+      "step": 17
+    },
+    {
+      "epoch": 14.4,
+      "grad_norm": 0.18004488945007324,
+      "learning_rate": 0.0001763440860215054,
+      "loss": 0.3745,
+      "step": 18
+    },
+    {
+      "epoch": 15.2,
+      "grad_norm": 0.17500567436218262,
+      "learning_rate": 0.00017419354838709678,
+      "loss": 0.3414,
+      "step": 19
+    },
+    {
+      "epoch": 16.0,
+      "grad_norm": 0.21916472911834717,
+      "learning_rate": 0.0001720430107526882,
+      "loss": 0.3959,
+      "step": 20
+    },
+    {
+      "epoch": 16.8,
+      "grad_norm": 0.23270182311534882,
+      "learning_rate": 0.00016989247311827957,
+      "loss": 0.3104,
+      "step": 21
+    },
+    {
+      "epoch": 17.6,
+      "grad_norm": 0.3092079162597656,
+      "learning_rate": 0.00016774193548387098,
+      "loss": 0.3215,
+      "step": 22
+    },
+    {
+      "epoch": 18.4,
+      "grad_norm": 0.27041715383529663,
+      "learning_rate": 0.0001655913978494624,
+      "loss": 0.2244,
+      "step": 23
+    },
+    {
+      "epoch": 19.2,
+      "grad_norm": 0.3036593198776245,
+      "learning_rate": 0.00016344086021505378,
+      "loss": 0.2492,
+      "step": 24
+    },
+    {
+      "epoch": 20.0,
+      "grad_norm": 0.25133511424064636,
+      "learning_rate": 0.00016129032258064516,
+      "loss": 0.1733,
+      "step": 25
+    },
+    {
+      "epoch": 20.8,
+      "grad_norm": 0.22519220411777496,
+      "learning_rate": 0.00015913978494623657,
+      "loss": 0.1444,
+      "step": 26
+    },
+    {
+      "epoch": 21.6,
+      "grad_norm": 0.4399205148220062,
+      "learning_rate": 0.00015698924731182796,
+      "loss": 0.1221,
+      "step": 27
+    },
+    {
+      "epoch": 22.4,
+      "grad_norm": 0.3637382686138153,
+      "learning_rate": 0.00015483870967741937,
+      "loss": 0.1055,
+      "step": 28
+    },
+    {
+      "epoch": 23.2,
+      "grad_norm": 0.36955785751342773,
+      "learning_rate": 0.00015268817204301075,
+      "loss": 0.068,
+      "step": 29
+    },
+    {
+      "epoch": 24.0,
+      "grad_norm": 0.7627295255661011,
+      "learning_rate": 0.00015053763440860216,
+      "loss": 0.0689,
+      "step": 30
+    },
+    {
+      "epoch": 24.8,
+      "grad_norm": 0.16507157683372498,
+      "learning_rate": 0.00014838709677419355,
+      "loss": 0.0482,
+      "step": 31
+    },
+    {
+      "epoch": 25.6,
+      "grad_norm": 0.23467229306697845,
+      "learning_rate": 0.00014623655913978496,
+      "loss": 0.0317,
+      "step": 32
+    },
+    {
+      "epoch": 26.4,
+      "grad_norm": 0.13561402261257172,
+      "learning_rate": 0.00014408602150537637,
+      "loss": 0.0293,
+      "step": 33
+    },
+    {
+      "epoch": 27.2,
+      "grad_norm": 0.17790749669075012,
+      "learning_rate": 0.00014193548387096775,
+      "loss": 0.0282,
+      "step": 34
+    },
+    {
+      "epoch": 28.0,
+      "grad_norm": 0.24078549444675446,
+      "learning_rate": 0.00013978494623655916,
+      "loss": 0.0167,
+      "step": 35
+    },
+    {
+      "epoch": 28.8,
+      "grad_norm": 0.15411370992660522,
+      "learning_rate": 0.00013763440860215055,
+      "loss": 0.0152,
+      "step": 36
+    },
+    {
+      "epoch": 29.6,
+      "grad_norm": 0.11153895407915115,
+      "learning_rate": 0.00013548387096774193,
+      "loss": 0.0126,
+      "step": 37
+    },
+    {
+      "epoch": 30.4,
+      "grad_norm": 0.20012152194976807,
+      "learning_rate": 0.00013333333333333334,
+      "loss": 0.012,
+      "step": 38
+    },
+    {
+      "epoch": 31.2,
+      "grad_norm": 0.15093949437141418,
+      "learning_rate": 0.00013118279569892472,
+      "loss": 0.0127,
+      "step": 39
+    },
+    {
+      "epoch": 32.0,
+      "grad_norm": 0.0803963765501976,
+      "learning_rate": 0.00012903225806451613,
+      "loss": 0.0086,
+      "step": 40
+    },
+    {
+      "epoch": 32.8,
+      "grad_norm": 0.036438949406147,
+      "learning_rate": 0.00012688172043010752,
+      "loss": 0.0068,
+      "step": 41
+    },
+    {
+      "epoch": 33.6,
+      "grad_norm": 0.1063300296664238,
+      "learning_rate": 0.00012473118279569893,
+      "loss": 0.0084,
+      "step": 42
+    },
+    {
+      "epoch": 34.4,
+      "grad_norm": 0.06543727219104767,
+      "learning_rate": 0.00012258064516129034,
+      "loss": 0.0072,
+      "step": 43
+    },
+    {
+      "epoch": 35.2,
+      "grad_norm": 0.040057223290205,
+      "learning_rate": 0.00012043010752688172,
+      "loss": 0.0071,
+      "step": 44
+    },
+    {
+      "epoch": 36.0,
+      "grad_norm": 0.03660134598612785,
+      "learning_rate": 0.00011827956989247313,
+      "loss": 0.0067,
+      "step": 45
+    },
+    {
+      "epoch": 36.8,
+      "grad_norm": 0.03747409209609032,
+      "learning_rate": 0.00011612903225806453,
+      "loss": 0.0063,
+      "step": 46
+    },
+    {
+      "epoch": 37.6,
+      "grad_norm": 0.027792207896709442,
+      "learning_rate": 0.00011397849462365593,
+      "loss": 0.0075,
+      "step": 47
+    },
+    {
+      "epoch": 38.4,
+      "grad_norm": 0.04993543401360512,
+      "learning_rate": 0.00011182795698924731,
+      "loss": 0.0062,
+      "step": 48
+    },
+    {
+      "epoch": 39.2,
+      "grad_norm": 0.045758508145809174,
+      "learning_rate": 0.00010967741935483871,
+      "loss": 0.005,
+      "step": 49
+    },
+    {
+      "epoch": 40.0,
+      "grad_norm": 0.034359417855739594,
+      "learning_rate": 0.00010752688172043011,
+      "loss": 0.0065,
+      "step": 50
+    },
+    {
+      "epoch": 40.8,
+      "grad_norm": 0.05479728803038597,
+      "learning_rate": 0.0001053763440860215,
+      "loss": 0.0065,
+      "step": 51
+    },
+    {
+      "epoch": 41.6,
+      "grad_norm": 0.0569564513862133,
+      "learning_rate": 0.0001032258064516129,
+      "loss": 0.0046,
+      "step": 52
+    },
+    {
+      "epoch": 42.4,
+      "grad_norm": 0.08149953931570053,
+      "learning_rate": 0.0001010752688172043,
+      "loss": 0.0075,
+      "step": 53
+    },
+    {
+      "epoch": 43.2,
+      "grad_norm": 0.0535600371658802,
+      "learning_rate": 9.892473118279571e-05,
+      "loss": 0.006,
+      "step": 54
+    },
+    {
+      "epoch": 44.0,
+      "grad_norm": 0.03628645837306976,
+      "learning_rate": 9.677419354838711e-05,
+      "loss": 0.0053,
+      "step": 55
+    },
+    {
+      "epoch": 44.8,
+      "grad_norm": 0.041962940245866776,
+      "learning_rate": 9.46236559139785e-05,
+      "loss": 0.0061,
+      "step": 56
+    },
+    {
+      "epoch": 45.6,
+      "grad_norm": 0.04776445031166077,
+      "learning_rate": 9.247311827956989e-05,
+      "loss": 0.006,
+      "step": 57
+    },
+    {
+      "epoch": 46.4,
+      "grad_norm": 0.023789523169398308,
+      "learning_rate": 9.032258064516129e-05,
+      "loss": 0.005,
+      "step": 58
+    },
+    {
+      "epoch": 47.2,
+      "grad_norm": 0.0335339717566967,
+      "learning_rate": 8.81720430107527e-05,
+      "loss": 0.0057,
+      "step": 59
+    },
+    {
+      "epoch": 48.0,
+      "grad_norm": 0.03245672583580017,
+      "learning_rate": 8.60215053763441e-05,
+      "loss": 0.0056,
+      "step": 60
+    },
+    {
+      "epoch": 48.8,
+      "grad_norm": 0.028289852663874626,
+      "learning_rate": 8.387096774193549e-05,
+      "loss": 0.0048,
+      "step": 61
+    },
+    {
+      "epoch": 49.6,
+      "grad_norm": 0.04517865180969238,
+      "learning_rate": 8.172043010752689e-05,
+      "loss": 0.0063,
+      "step": 62
+    },
+    {
+      "epoch": 50.4,
+      "grad_norm": 0.043193262070417404,
+      "learning_rate": 7.956989247311829e-05,
+      "loss": 0.0047,
+      "step": 63
+    },
+    {
+      "epoch": 51.2,
+      "grad_norm": 0.06147604435682297,
+      "learning_rate": 7.741935483870968e-05,
+      "loss": 0.0062,
+      "step": 64
+    },
+    {
+      "epoch": 52.0,
+      "grad_norm": 0.034953437745571136,
+      "learning_rate": 7.526881720430108e-05,
+      "loss": 0.0055,
+      "step": 65
+    },
+    {
+      "epoch": 52.8,
+      "grad_norm": 0.01926315762102604,
+      "learning_rate": 7.311827956989248e-05,
+      "loss": 0.006,
+      "step": 66
+    },
+    {
+      "epoch": 53.6,
+      "grad_norm": 0.0263168103992939,
+      "learning_rate": 7.096774193548388e-05,
+      "loss": 0.0046,
+      "step": 67
+    },
+    {
+      "epoch": 54.4,
+      "grad_norm": 0.03226535767316818,
+      "learning_rate": 6.881720430107527e-05,
+      "loss": 0.0057,
+      "step": 68
+    },
+    {
+      "epoch": 55.2,
+      "grad_norm": 0.02902308478951454,
+      "learning_rate": 6.666666666666667e-05,
+      "loss": 0.0049,
+      "step": 69
+    },
+    {
+      "epoch": 56.0,
+      "grad_norm": 0.03194282203912735,
+      "learning_rate": 6.451612903225807e-05,
+      "loss": 0.0056,
+      "step": 70
+    },
+    {
+      "epoch": 56.8,
+      "grad_norm": 0.02228965424001217,
+      "learning_rate": 6.236559139784946e-05,
+      "loss": 0.0052,
+      "step": 71
+    },
+    {
+      "epoch": 57.6,
+      "grad_norm": 0.07022465765476227,
+      "learning_rate": 6.021505376344086e-05,
+      "loss": 0.0054,
+      "step": 72
+    },
+    {
+      "epoch": 58.4,
+      "grad_norm": 0.040501758456230164,
+      "learning_rate": 5.8064516129032266e-05,
+      "loss": 0.0054,
+      "step": 73
+    },
+    {
+      "epoch": 59.2,
+      "grad_norm": 0.02831866778433323,
+      "learning_rate": 5.5913978494623656e-05,
+      "loss": 0.005,
+      "step": 74
+    },
+    {
+      "epoch": 60.0,
+      "grad_norm": 0.021730881184339523,
+      "learning_rate": 5.3763440860215054e-05,
+      "loss": 0.0058,
+      "step": 75
+    },
+    {
+      "epoch": 60.8,
+      "grad_norm": 0.026638410985469818,
+      "learning_rate": 5.161290322580645e-05,
+      "loss": 0.0052,
+      "step": 76
+    },
+    {
+      "epoch": 61.6,
+      "grad_norm": 0.033660661429166794,
+      "learning_rate": 4.9462365591397855e-05,
+      "loss": 0.0053,
+      "step": 77
+    },
+    {
+      "epoch": 62.4,
+      "grad_norm": 0.03768754005432129,
+      "learning_rate": 4.731182795698925e-05,
+      "loss": 0.0062,
+      "step": 78
+    },
+    {
+      "epoch": 63.2,
+      "grad_norm": 0.02653289958834648,
+      "learning_rate": 4.516129032258064e-05,
+      "loss": 0.0051,
+      "step": 79
+    },
+    {
+      "epoch": 64.0,
+      "grad_norm": 0.030516313388943672,
+      "learning_rate": 4.301075268817205e-05,
+      "loss": 0.0047,
+      "step": 80
+    },
+    {
+      "epoch": 64.8,
+      "grad_norm": 0.01978660374879837,
+      "learning_rate": 4.0860215053763444e-05,
+      "loss": 0.0058,
+      "step": 81
+    },
+    {
+      "epoch": 65.6,
+      "grad_norm": 0.04382181912660599,
+      "learning_rate": 3.870967741935484e-05,
+      "loss": 0.0047,
+      "step": 82
+    },
+    {
+      "epoch": 66.4,
+      "grad_norm": 0.03583133593201637,
+      "learning_rate": 3.655913978494624e-05,
+      "loss": 0.0055,
+      "step": 83
+    },
+    {
+      "epoch": 67.2,
+      "grad_norm": 0.024245208129286766,
+      "learning_rate": 3.4408602150537636e-05,
+      "loss": 0.0047,
+      "step": 84
+    },
+    {
+      "epoch": 68.0,
+      "grad_norm": 0.018355444073677063,
+      "learning_rate": 3.2258064516129034e-05,
+      "loss": 0.0057,
+      "step": 85
+    },
+    {
+      "epoch": 68.8,
+      "grad_norm": 0.016613131389021873,
+      "learning_rate": 3.010752688172043e-05,
+      "loss": 0.0053,
+      "step": 86
+    },
+    {
+      "epoch": 69.6,
+      "grad_norm": 0.030019283294677734,
+      "learning_rate": 2.7956989247311828e-05,
+      "loss": 0.0044,
+      "step": 87
+    },
+    {
+      "epoch": 70.4,
+      "grad_norm": 0.03529616445302963,
+      "learning_rate": 2.5806451612903226e-05,
+      "loss": 0.0064,
+      "step": 88
+    },
+    {
+      "epoch": 71.2,
+      "grad_norm": 0.0599469393491745,
+      "learning_rate": 2.3655913978494626e-05,
+      "loss": 0.0059,
+      "step": 89
+    },
+    {
+      "epoch": 72.0,
+      "grad_norm": 0.026897842064499855,
+      "learning_rate": 2.1505376344086024e-05,
+      "loss": 0.0048,
+      "step": 90
+    },
+    {
+      "epoch": 72.8,
+      "grad_norm": 0.041727662086486816,
+      "learning_rate": 1.935483870967742e-05,
+      "loss": 0.005,
+      "step": 91
+    },
+    {
+      "epoch": 73.6,
+      "grad_norm": 0.05422442406415939,
+      "learning_rate": 1.7204301075268818e-05,
+      "loss": 0.0061,
+      "step": 92
+    },
+    {
+      "epoch": 74.4,
+      "grad_norm": 0.02461910806596279,
+      "learning_rate": 1.5053763440860215e-05,
+      "loss": 0.0045,
+      "step": 93
+    },
+    {
+      "epoch": 75.2,
+      "grad_norm": 0.030875274911522865,
+      "learning_rate": 1.2903225806451613e-05,
+      "loss": 0.0057,
+      "step": 94
+    },
+    {
+      "epoch": 76.0,
+      "grad_norm": 0.01852886937558651,
+      "learning_rate": 1.0752688172043012e-05,
+      "loss": 0.0044,
+      "step": 95
+    },
+    {
+      "epoch": 76.8,
+      "grad_norm": 0.025288186967372894,
+      "learning_rate": 8.602150537634409e-06,
+      "loss": 0.0061,
+      "step": 96
+    },
+    {
+      "epoch": 77.6,
+      "grad_norm": 0.024943077936768532,
+      "learning_rate": 6.451612903225806e-06,
+      "loss": 0.0043,
+      "step": 97
+    },
+    {
+      "epoch": 78.4,
+      "grad_norm": 0.027488090097904205,
+      "learning_rate": 4.3010752688172045e-06,
+      "loss": 0.0051,
+      "step": 98
+    },
+    {
+      "epoch": 79.2,
+      "grad_norm": 0.020602310076355934,
+      "learning_rate": 2.1505376344086023e-06,
+      "loss": 0.0059,
+      "step": 99
+    },
+    {
+      "epoch": 80.0,
+      "grad_norm": 0.022517595440149307,
+      "learning_rate": 0.0,
+      "loss": 0.0059,
+      "step": 100
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 100,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 100,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 4.78205460086784e+16,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:50637ac803c68c021245c732592f669baf306b89b539b39c77ea049b7f21bac6
+size 5112