Upload folder using huggingface_hub

Browse files

Files changed (14) hide show

107_128_e3_3e-5/adapter_config.json +39 -0
107_128_e3_3e-5/adapter_model.safetensors +3 -0
107_128_e3_3e-5/added_tokens.json +9 -0
107_128_e3_3e-5/all_results.json +9 -0
107_128_e3_3e-5/chat_template.jinja +62 -0
107_128_e3_3e-5/config.json +32 -0
107_128_e3_3e-5/merges.txt +0 -0
107_128_e3_3e-5/special_tokens_map.json +33 -0
107_128_e3_3e-5/tokenizer.json +0 -0
107_128_e3_3e-5/tokenizer_config.json +234 -0
107_128_e3_3e-5/train_results.json +9 -0
107_128_e3_3e-5/trainer_state.json +673 -0
107_128_e3_3e-5/training_args.bin +3 -0
107_128_e3_3e-5/vocab.json +0 -0

107_128_e3_3e-5/adapter_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "ibm-granite/granite-3.3-8b-instruct",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 256,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 128,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "o_proj",
+    "gate_proj",
+    "k_proj",
+    "up_proj",
+    "q_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

107_128_e3_3e-5/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3441ebf5457b8a9e4fe485d145a4842ab70c788b63dcfb3aa02e8cbd6b1a70
+size 791751704

107_128_e3_3e-5/added_tokens.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "<|end_of_cite|>": 49156,
+  "<|end_of_plugin|>": 49158,
+  "<|end_of_role|>": 49153,
+  "<|start_of_cite|>": 49155,
+  "<|start_of_plugin|>": 49157,
+  "<|start_of_role|>": 49152,
+  "<|tool_call|>": 49154
+}

107_128_e3_3e-5/all_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 3.0,
+    "total_flos": 6.581916459000136e+17,
+    "train_loss": 0.3717870048142427,
+    "train_runtime": 302.2852,
+    "train_samples": 4830,
+    "train_samples_per_second": 47.935,
+    "train_steps_per_second": 1.499
+}

107_128_e3_3e-5/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,62 @@

+{# Alias tools -> available_tools #}
+{%- if tools and not available_tools -%}
+    {%- set available_tools = tools -%}
+{%- endif -%}
+{%- if messages[0]['role'] == 'system' %}
+     {%- set system_message = messages[0]['content'] %}
+     {%- set loop_messages = messages[1:] %}
+ {%- else %}
+     {%- set system_message = "Knowledge Cutoff Date: April 2024.
+Today's Date: " + strftime_now('%B %d, %Y') + ".
+You are Granite, developed by IBM." %}
+     {%- if available_tools and documents %}
+         {%- set system_message = system_message + " You are a helpful assistant with access to the following tools. When a tool is required to answer the user's query, respond only with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request.
+Write the response to the user's input by strictly aligning with the facts in the provided documents. If the information needed to answer the question is not available in the documents, inform the user that the question cannot be answered based on the available data." %}
+     {%- elif available_tools %}
+         {%- set system_message = system_message + " You are a helpful assistant with access to the following tools. When a tool is required to answer the user's query, respond only with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request." %}
+     {%- elif documents %}
+         {%- set system_message = system_message + " Write the response to the user's input by strictly aligning with the facts in the provided documents. If the information needed to answer the question is not available in the documents, inform the user that the question cannot be answered based on the available data." %}
+    {%- elif thinking %}
+    {%- set system_message = system_message + " You are a helpful AI assistant.
+Respond to every user query in a comprehensive and detailed way. You can write down your thoughts and reasoning process before responding. In the thought process, engage in a comprehensive cycle of analysis, summarization, exploration, reassessment, reflection, backtracing, and iteration to develop well-considered thinking process. In the response section, based on various attempts, explorations, and reflections from the thoughts section, systematically present the final solution that you deem correct. The response should summarize the thought process. Write your thoughts between <think></think> and write your response between <response></response> for each user query." %}
+     {%- else %}
+         {%- set system_message = system_message + " You are a helpful AI assistant." %}
+     {%- endif %}
+     {%- if 'citations' in controls and documents %}
+         {%- set system_message = system_message + '
+Use the symbols <|start_of_cite|> and <|end_of_cite|> to indicate when a fact comes from a document in the search result, e.g <|start_of_cite|> {document_id: 1}my fact <|end_of_cite|> for a fact from document 1. Afterwards, list all the citations with their corresponding documents in an ordered list.' %}
+     {%- endif %}
+     {%- if 'hallucinations' in controls and documents %}
+         {%- set system_message = system_message + '
+Finally, after the response is written, include a numbered list of sentences from the response with a corresponding risk value that are hallucinated and not based in the documents.' %}
+     {%- endif %}
+     {%- set loop_messages = messages %}
+ {%- endif %}
+ {{- '<|start_of_role|>system<|end_of_role|>' + system_message + '<|end_of_text|>
+' }}
+ {%- if available_tools %}
+     {{- '<|start_of_role|>available_tools<|end_of_role|>' }}
+     {{- available_tools | tojson(indent=4) }}
+     {{- '<|end_of_text|>
+' }}
+ {%- endif %}
+ {%- if documents %}
+     {%- for document in documents %}
+         {{- '<|start_of_role|>document {"document_id": "' + document['doc_id'] | string + '"}<|end_of_role|>
+' }}
+         {{- document['text'] }}
+         {{- '<|end_of_text|>
+' }}
+              {%- endfor %}
+ {%- endif %}
+ {%- for message in loop_messages %}
+     {{- '<|start_of_role|>' + message['role'] + '<|end_of_role|>' + message['content'] + '<|end_of_text|>
+' }}
+     {%- if loop.last and add_generation_prompt %}
+         {{- '<|start_of_role|>assistant' }}
+             {%- if controls %}
+                 {{- ' ' + controls | tojson()}}
+             {%- endif %}
+         {{- '<|end_of_role|>' }}
+     {%- endif %}
+ {%- endfor %}

107_128_e3_3e-5/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "architectures": [
+    "GraniteForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attention_multiplier": 0.0078125,
+  "bos_token_id": 0,
+  "embedding_multiplier": 12.0,
+  "eos_token_id": 0,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12800,
+  "logits_scaling": 16.0,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "granite",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 40,
+  "num_key_value_heads": 8,
+  "pad_token_id": 0,
+  "residual_multiplier": 0.22,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000000.0,
+  "tie_word_embeddings": true,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.52.4",
+  "use_cache": true,
+  "vocab_size": 49159
+}

107_128_e3_3e-5/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

107_128_e3_3e-5/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "additional_special_tokens": [
+    "<|start_of_role|>",
+    "<|end_of_role|>",
+    "<|tool_call|>",
+    "<|start_of_cite|>",
+    "<|end_of_cite|>",
+    "<|start_of_plugin|>",
+    "<|end_of_plugin|>"
+  ],
+  "bos_token": {
+    "content": "<|end_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|end_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|end_of_plugin|>",
+  "unk_token": {
+    "content": "<|end_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

107_128_e3_3e-5/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

107_128_e3_3e-5/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,234 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<|end_of_text|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<fim_prefix>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<fim_middle>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<fim_suffix>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<fim_pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "5": {
+      "content": "<filename>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "6": {
+      "content": "<gh_stars>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "7": {
+      "content": "<issue_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "8": {
+      "content": "<issue_comment>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "9": {
+      "content": "<issue_closed>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "10": {
+      "content": "<jupyter_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "11": {
+      "content": "<jupyter_text>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "12": {
+      "content": "<jupyter_code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "13": {
+      "content": "<jupyter_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "14": {
+      "content": "<empty_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "15": {
+      "content": "<commit_before>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "16": {
+      "content": "<commit_msg>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "17": {
+      "content": "<commit_after>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "18": {
+      "content": "<reponame>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49152": {
+      "content": "<|start_of_role|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49153": {
+      "content": "<|end_of_role|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49154": {
+      "content": "<|tool_call|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49155": {
+      "content": "<|start_of_cite|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49156": {
+      "content": "<|end_of_cite|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49157": {
+      "content": "<|start_of_plugin|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49158": {
+      "content": "<|end_of_plugin|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|start_of_role|>",
+    "<|end_of_role|>",
+    "<|tool_call|>",
+    "<|start_of_cite|>",
+    "<|end_of_cite|>",
+    "<|start_of_plugin|>",
+    "<|end_of_plugin|>"
+  ],
+  "bos_token": "<|end_of_text|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|end_of_text|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 8192,
+  "pad_token": "<|end_of_plugin|>",
+  "padding_side": "left",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|end_of_text|>",
+  "vocab_size": 49152
+}

107_128_e3_3e-5/train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 3.0,
+    "total_flos": 6.581916459000136e+17,
+    "train_loss": 0.3717870048142427,
+    "train_runtime": 302.2852,
+    "train_samples": 4830,
+    "train_samples_per_second": 47.935,
+    "train_steps_per_second": 1.499
+}

107_128_e3_3e-5/trainer_state.json ADDED Viewed

	@@ -0,0 +1,673 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 453,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.033112582781456956,
+      "grad_norm": 2.674469232559204,
+      "learning_rate": 5.2173913043478265e-06,
+      "loss": 1.423,
+      "step": 5
+    },
+    {
+      "epoch": 0.06622516556291391,
+      "grad_norm": 1.0422779321670532,
+      "learning_rate": 1.173913043478261e-05,
+      "loss": 1.425,
+      "step": 10
+    },
+    {
+      "epoch": 0.09933774834437085,
+      "grad_norm": 0.519767701625824,
+      "learning_rate": 1.8260869565217393e-05,
+      "loss": 1.2741,
+      "step": 15
+    },
+    {
+      "epoch": 0.13245033112582782,
+      "grad_norm": 0.665399432182312,
+      "learning_rate": 2.4782608695652175e-05,
+      "loss": 1.2822,
+      "step": 20
+    },
+    {
+      "epoch": 0.16556291390728478,
+      "grad_norm": 0.4459891617298126,
+      "learning_rate": 2.999959966628585e-05,
+      "loss": 1.2438,
+      "step": 25
+    },
+    {
+      "epoch": 0.1986754966887417,
+      "grad_norm": 0.44003036618232727,
+      "learning_rate": 2.9985590229902075e-05,
+      "loss": 1.1643,
+      "step": 30
+    },
+    {
+      "epoch": 0.23178807947019867,
+      "grad_norm": 0.5263968110084534,
+      "learning_rate": 2.9951585471628692e-05,
+      "loss": 1.1186,
+      "step": 35
+    },
+    {
+      "epoch": 0.26490066225165565,
+      "grad_norm": 0.43545088171958923,
+      "learning_rate": 2.9897630764125496e-05,
+      "loss": 1.0956,
+      "step": 40
+    },
+    {
+      "epoch": 0.2980132450331126,
+      "grad_norm": 0.43640759587287903,
+      "learning_rate": 2.982379809933459e-05,
+      "loss": 1.0598,
+      "step": 45
+    },
+    {
+      "epoch": 0.33112582781456956,
+      "grad_norm": 0.5018694400787354,
+      "learning_rate": 2.9730185992421254e-05,
+      "loss": 1.0436,
+      "step": 50
+    },
+    {
+      "epoch": 0.36423841059602646,
+      "grad_norm": 0.46776172518730164,
+      "learning_rate": 2.9616919350324878e-05,
+      "loss": 0.9824,
+      "step": 55
+    },
+    {
+      "epoch": 0.3973509933774834,
+      "grad_norm": 0.5161912441253662,
+      "learning_rate": 2.9484149305095205e-05,
+      "loss": 0.9432,
+      "step": 60
+    },
+    {
+      "epoch": 0.4304635761589404,
+      "grad_norm": 0.5169657468795776,
+      "learning_rate": 2.9332053012236348e-05,
+      "loss": 0.8702,
+      "step": 65
+    },
+    {
+      "epoch": 0.46357615894039733,
+      "grad_norm": 0.5337439775466919,
+      "learning_rate": 2.9160833414327637e-05,
+      "loss": 0.8459,
+      "step": 70
+    },
+    {
+      "epoch": 0.4966887417218543,
+      "grad_norm": 0.58100426197052,
+      "learning_rate": 2.8970718970236647e-05,
+      "loss": 0.8072,
+      "step": 75
+    },
+    {
+      "epoch": 0.5298013245033113,
+      "grad_norm": 0.7042685747146606,
+      "learning_rate": 2.8761963350285812e-05,
+      "loss": 0.7613,
+      "step": 80
+    },
+    {
+      "epoch": 0.5629139072847682,
+      "grad_norm": 0.7149097323417664,
+      "learning_rate": 2.8534845097779284e-05,
+      "loss": 0.7864,
+      "step": 85
+    },
+    {
+      "epoch": 0.5960264900662252,
+      "grad_norm": 0.8003250956535339,
+      "learning_rate": 2.828966725734167e-05,
+      "loss": 0.6808,
+      "step": 90
+    },
+    {
+      "epoch": 0.6291390728476821,
+      "grad_norm": 0.8074373602867126,
+      "learning_rate": 2.8026756970564636e-05,
+      "loss": 0.6902,
+      "step": 95
+    },
+    {
+      "epoch": 0.6622516556291391,
+      "grad_norm": 0.7476047873497009,
+      "learning_rate": 2.7746465039500786e-05,
+      "loss": 0.6165,
+      "step": 100
+    },
+    {
+      "epoch": 0.695364238410596,
+      "grad_norm": 0.8559985756874084,
+      "learning_rate": 2.7449165458587357e-05,
+      "loss": 0.6168,
+      "step": 105
+    },
+    {
+      "epoch": 0.7284768211920529,
+      "grad_norm": 0.8710569143295288,
+      "learning_rate": 2.7135254915624213e-05,
+      "loss": 0.5953,
+      "step": 110
+    },
+    {
+      "epoch": 0.7615894039735099,
+      "grad_norm": 0.8935418725013733,
+      "learning_rate": 2.680515226247203e-05,
+      "loss": 0.5271,
+      "step": 115
+    },
+    {
+      "epoch": 0.7947019867549668,
+      "grad_norm": 0.890937864780426,
+      "learning_rate": 2.6459297956176887e-05,
+      "loss": 0.4906,
+      "step": 120
+    },
+    {
+      "epoch": 0.8278145695364238,
+      "grad_norm": 0.9725600481033325,
+      "learning_rate": 2.6098153471266992e-05,
+      "loss": 0.5357,
+      "step": 125
+    },
+    {
+      "epoch": 0.8609271523178808,
+      "grad_norm": 0.8549191951751709,
+      "learning_rate": 2.5722200684005718e-05,
+      "loss": 0.4919,
+      "step": 130
+    },
+    {
+      "epoch": 0.8940397350993378,
+      "grad_norm": 0.8652534484863281,
+      "learning_rate": 2.5331941229422532e-05,
+      "loss": 0.4868,
+      "step": 135
+    },
+    {
+      "epoch": 0.9271523178807947,
+      "grad_norm": 0.9563116431236267,
+      "learning_rate": 2.492789583197975e-05,
+      "loss": 0.4055,
+      "step": 140
+    },
+    {
+      "epoch": 0.9602649006622517,
+      "grad_norm": 0.9283990859985352,
+      "learning_rate": 2.4510603610768167e-05,
+      "loss": 0.427,
+      "step": 145
+    },
+    {
+      "epoch": 0.9933774834437086,
+      "grad_norm": 1.0428991317749023,
+      "learning_rate": 2.408062136015872e-05,
+      "loss": 0.3684,
+      "step": 150
+    },
+    {
+      "epoch": 1.0264900662251655,
+      "grad_norm": 0.8546621799468994,
+      "learning_rate": 2.363852280686994e-05,
+      "loss": 0.3329,
+      "step": 155
+    },
+    {
+      "epoch": 1.0596026490066226,
+      "grad_norm": 0.9985082745552063,
+      "learning_rate": 2.3184897844442498e-05,
+      "loss": 0.3369,
+      "step": 160
+    },
+    {
+      "epoch": 1.0927152317880795,
+      "grad_norm": 0.8879198431968689,
+      "learning_rate": 2.2720351746142368e-05,
+      "loss": 0.2891,
+      "step": 165
+    },
+    {
+      "epoch": 1.1258278145695364,
+      "grad_norm": 1.0966527462005615,
+      "learning_rate": 2.224550435734272e-05,
+      "loss": 0.2909,
+      "step": 170
+    },
+    {
+      "epoch": 1.1589403973509933,
+      "grad_norm": 0.998546302318573,
+      "learning_rate": 2.1760989268462184e-05,
+      "loss": 0.3399,
+      "step": 175
+    },
+    {
+      "epoch": 1.1920529801324504,
+      "grad_norm": 1.003989338874817,
+      "learning_rate": 2.1267452969563156e-05,
+      "loss": 0.29,
+      "step": 180
+    },
+    {
+      "epoch": 1.2251655629139073,
+      "grad_norm": 1.1075700521469116,
+      "learning_rate": 2.0765553987737942e-05,
+      "loss": 0.2797,
+      "step": 185
+    },
+    {
+      "epoch": 1.2582781456953642,
+      "grad_norm": 0.9683902263641357,
+      "learning_rate": 2.025596200843394e-05,
+      "loss": 0.268,
+      "step": 190
+    },
+    {
+      "epoch": 1.2913907284768211,
+      "grad_norm": 0.9466782808303833,
+      "learning_rate": 1.973935698189017e-05,
+      "loss": 0.2443,
+      "step": 195
+    },
+    {
+      "epoch": 1.3245033112582782,
+      "grad_norm": 1.0267927646636963,
+      "learning_rate": 1.9216428215877427e-05,
+      "loss": 0.2413,
+      "step": 200
+    },
+    {
+      "epoch": 1.3576158940397351,
+      "grad_norm": 0.8517641425132751,
+      "learning_rate": 1.86878734559527e-05,
+      "loss": 0.2484,
+      "step": 205
+    },
+    {
+      "epoch": 1.390728476821192,
+      "grad_norm": 0.9414626955986023,
+      "learning_rate": 1.8154397954454996e-05,
+      "loss": 0.2232,
+      "step": 210
+    },
+    {
+      "epoch": 1.423841059602649,
+      "grad_norm": 0.9601985216140747,
+      "learning_rate": 1.7616713529484835e-05,
+      "loss": 0.2094,
+      "step": 215
+    },
+    {
+      "epoch": 1.4569536423841059,
+      "grad_norm": 0.9515155553817749,
+      "learning_rate": 1.7075537615123045e-05,
+      "loss": 0.216,
+      "step": 220
+    },
+    {
+      "epoch": 1.490066225165563,
+      "grad_norm": 1.1002708673477173,
+      "learning_rate": 1.653159230415614e-05,
+      "loss": 0.1893,
+      "step": 225
+    },
+    {
+      "epoch": 1.5231788079470199,
+      "grad_norm": 0.9291001558303833,
+      "learning_rate": 1.5985603384585545e-05,
+      "loss": 0.1722,
+      "step": 230
+    },
+    {
+      "epoch": 1.5562913907284768,
+      "grad_norm": 0.9336148500442505,
+      "learning_rate": 1.54382993712063e-05,
+      "loss": 0.1896,
+      "step": 235
+    },
+    {
+      "epoch": 1.589403973509934,
+      "grad_norm": 1.026413083076477,
+      "learning_rate": 1.4890410533547405e-05,
+      "loss": 0.1721,
+      "step": 240
+    },
+    {
+      "epoch": 1.6225165562913908,
+      "grad_norm": 1.05998694896698,
+      "learning_rate": 1.4342667921470813e-05,
+      "loss": 0.1833,
+      "step": 245
+    },
+    {
+      "epoch": 1.6556291390728477,
+      "grad_norm": 0.8145864605903625,
+      "learning_rate": 1.3795802389729185e-05,
+      "loss": 0.1726,
+      "step": 250
+    },
+    {
+      "epoch": 1.6887417218543046,
+      "grad_norm": 0.9738907814025879,
+      "learning_rate": 1.3250543622784051e-05,
+      "loss": 0.1454,
+      "step": 255
+    },
+    {
+      "epoch": 1.7218543046357615,
+      "grad_norm": 0.9352027177810669,
+      "learning_rate": 1.2707619161185425e-05,
+      "loss": 0.1526,
+      "step": 260
+    },
+    {
+      "epoch": 1.7549668874172184,
+      "grad_norm": 1.009630799293518,
+      "learning_rate": 1.2167753430812096e-05,
+      "loss": 0.1452,
+      "step": 265
+    },
+    {
+      "epoch": 1.7880794701986755,
+      "grad_norm": 1.1254810094833374,
+      "learning_rate": 1.1631666776267804e-05,
+      "loss": 0.1537,
+      "step": 270
+    },
+    {
+      "epoch": 1.8211920529801324,
+      "grad_norm": 0.8762228488922119,
+      "learning_rate": 1.110007449972309e-05,
+      "loss": 0.1577,
+      "step": 275
+    },
+    {
+      "epoch": 1.8543046357615895,
+      "grad_norm": 0.7721819281578064,
+      "learning_rate": 1.0573685906485283e-05,
+      "loss": 0.1345,
+      "step": 280
+    },
+    {
+      "epoch": 1.8874172185430464,
+      "grad_norm": 0.9874298572540283,
+      "learning_rate": 1.0053203358570116e-05,
+      "loss": 0.1219,
+      "step": 285
+    },
+    {
+      "epoch": 1.9205298013245033,
+      "grad_norm": 0.7296121120452881,
+      "learning_rate": 9.5393213375378e-06,
+      "loss": 0.1226,
+      "step": 290
+    },
+    {
+      "epoch": 1.9536423841059603,
+      "grad_norm": 0.9243085980415344,
+      "learning_rate": 9.032725517844005e-06,
+      "loss": 0.1244,
+      "step": 295
+    },
+    {
+      "epoch": 1.9867549668874172,
+      "grad_norm": 0.84622722864151,
+      "learning_rate": 8.534091851942215e-06,
+      "loss": 0.1115,
+      "step": 300
+    },
+    {
+      "epoch": 2.019867549668874,
+      "grad_norm": 0.7075161933898926,
+      "learning_rate": 8.04408566835814e-06,
+      "loss": 0.1085,
+      "step": 305
+    },
+    {
+      "epoch": 2.052980132450331,
+      "grad_norm": 0.694309413433075,
+      "learning_rate": 7.563360783939724e-06,
+      "loss": 0.0907,
+      "step": 310
+    },
+    {
+      "epoch": 2.0860927152317883,
+      "grad_norm": 0.8202969431877136,
+      "learning_rate": 7.092558631467178e-06,
+      "loss": 0.098,
+      "step": 315
+    },
+    {
+      "epoch": 2.119205298013245,
+      "grad_norm": 0.6930757761001587,
+      "learning_rate": 6.632307403787139e-06,
+      "loss": 0.0892,
+      "step": 320
+    },
+    {
+      "epoch": 2.152317880794702,
+      "grad_norm": 0.6235249638557434,
+      "learning_rate": 6.1832212156129045e-06,
+      "loss": 0.0924,
+      "step": 325
+    },
+    {
+      "epoch": 2.185430463576159,
+      "grad_norm": 0.8508549332618713,
+      "learning_rate": 5.7458992841091544e-06,
+      "loss": 0.0987,
+      "step": 330
+    },
+    {
+      "epoch": 2.218543046357616,
+      "grad_norm": 0.6876497864723206,
+      "learning_rate": 5.320925129354552e-06,
+      "loss": 0.0911,
+      "step": 335
+    },
+    {
+      "epoch": 2.251655629139073,
+      "grad_norm": 0.6546669006347656,
+      "learning_rate": 4.908865795749e-06,
+      "loss": 0.0794,
+      "step": 340
+    },
+    {
+      "epoch": 2.2847682119205297,
+      "grad_norm": 0.6654724478721619,
+      "learning_rate": 4.5102710954044495e-06,
+      "loss": 0.0754,
+      "step": 345
+    },
+    {
+      "epoch": 2.3178807947019866,
+      "grad_norm": 0.8972987532615662,
+      "learning_rate": 4.125672874528797e-06,
+      "loss": 0.075,
+      "step": 350
+    },
+    {
+      "epoch": 2.3509933774834435,
+      "grad_norm": 0.6000303626060486,
+      "learning_rate": 3.755584303781763e-06,
+      "loss": 0.0794,
+      "step": 355
+    },
+    {
+      "epoch": 2.384105960264901,
+      "grad_norm": 0.6298790574073792,
+      "learning_rate": 3.4004991935496005e-06,
+      "loss": 0.0746,
+      "step": 360
+    },
+    {
+      "epoch": 2.4172185430463577,
+      "grad_norm": 0.641676127910614,
+      "learning_rate": 3.060891335052303e-06,
+      "loss": 0.076,
+      "step": 365
+    },
+    {
+      "epoch": 2.4503311258278146,
+      "grad_norm": 0.6149706244468689,
+      "learning_rate": 2.7372138681624246e-06,
+      "loss": 0.0724,
+      "step": 370
+    },
+    {
+      "epoch": 2.4834437086092715,
+      "grad_norm": 0.5954145193099976,
+      "learning_rate": 2.429898676779072e-06,
+      "loss": 0.085,
+      "step": 375
+    },
+    {
+      "epoch": 2.5165562913907285,
+      "grad_norm": 0.6012973189353943,
+      "learning_rate": 2.1393558125638067e-06,
+      "loss": 0.0815,
+      "step": 380
+    },
+    {
+      "epoch": 2.5496688741721854,
+      "grad_norm": 0.5832419991493225,
+      "learning_rate": 1.8659729478073768e-06,
+      "loss": 0.0744,
+      "step": 385
+    },
+    {
+      "epoch": 2.5827814569536423,
+      "grad_norm": 0.5609897971153259,
+      "learning_rate": 1.6101148581573276e-06,
+      "loss": 0.0722,
+      "step": 390
+    },
+    {
+      "epoch": 2.6158940397350996,
+      "grad_norm": 0.5528598427772522,
+      "learning_rate": 1.3721229358966547e-06,
+      "loss": 0.0705,
+      "step": 395
+    },
+    {
+      "epoch": 2.6490066225165565,
+      "grad_norm": 0.5859752893447876,
+      "learning_rate": 1.1523147344229717e-06,
+      "loss": 0.067,
+      "step": 400
+    },
+    {
+      "epoch": 2.6821192052980134,
+      "grad_norm": 0.6291056871414185,
+      "learning_rate": 9.509835445359738e-07,
+      "loss": 0.0736,
+      "step": 405
+    },
+    {
+      "epoch": 2.7152317880794703,
+      "grad_norm": 0.5751920938491821,
+      "learning_rate": 7.683980030985655e-07,
+      "loss": 0.0709,
+      "step": 410
+    },
+    {
+      "epoch": 2.748344370860927,
+      "grad_norm": 0.4950980246067047,
+      "learning_rate": 6.048017345938339e-07,
+      "loss": 0.0679,
+      "step": 415
+    },
+    {
+      "epoch": 2.781456953642384,
+      "grad_norm": 0.47585582733154297,
+      "learning_rate": 4.604130260560874e-07,
+      "loss": 0.0708,
+      "step": 420
+    },
+    {
+      "epoch": 2.814569536423841,
+      "grad_norm": 0.4346020221710205,
+      "learning_rate": 3.3542453580976795e-07,
+      "loss": 0.0665,
+      "step": 425
+    },
+    {
+      "epoch": 2.847682119205298,
+      "grad_norm": 0.6238631010055542,
+      "learning_rate": 2.300030364048139e-07,
+      "loss": 0.072,
+      "step": 430
+    },
+    {
+      "epoch": 2.880794701986755,
+      "grad_norm": 0.5560439825057983,
+      "learning_rate": 1.4428919209150294e-07,
+      "loss": 0.0646,
+      "step": 435
+    },
+    {
+      "epoch": 2.9139072847682117,
+      "grad_norm": 0.49565455317497253,
+      "learning_rate": 7.839737113168932e-08,
+      "loss": 0.07,
+      "step": 440
+    },
+    {
+      "epoch": 2.9470198675496686,
+      "grad_norm": 0.552863359451294,
+      "learning_rate": 3.241549319685622e-08,
+      "loss": 0.07,
+      "step": 445
+    },
+    {
+      "epoch": 2.980132450331126,
+      "grad_norm": 0.5463277101516724,
+      "learning_rate": 6.404912056610091e-09,
+      "loss": 0.0614,
+      "step": 450
+    },
+    {
+      "epoch": 3.0,
+      "step": 453,
+      "total_flos": 6.581916459000136e+17,
+      "train_loss": 0.3717870048142427,
+      "train_runtime": 302.2852,
+      "train_samples_per_second": 47.935,
+      "train_steps_per_second": 1.499
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 453,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 20000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 6.581916459000136e+17,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

107_128_e3_3e-5/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:30874acb043b7799c9cba323b842715e8140ecb1fb80602c4991278cb96be4a2
+size 8273

107_128_e3_3e-5/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff