Upload folder using huggingface_hub

Browse files

Files changed (14) hide show

111_128_e3_3e-5/adapter_config.json +39 -0
111_128_e3_3e-5/adapter_model.safetensors +3 -0
111_128_e3_3e-5/added_tokens.json +9 -0
111_128_e3_3e-5/all_results.json +9 -0
111_128_e3_3e-5/chat_template.jinja +62 -0
111_128_e3_3e-5/config.json +32 -0
111_128_e3_3e-5/merges.txt +0 -0
111_128_e3_3e-5/special_tokens_map.json +33 -0
111_128_e3_3e-5/tokenizer.json +0 -0
111_128_e3_3e-5/tokenizer_config.json +234 -0
111_128_e3_3e-5/train_results.json +9 -0
111_128_e3_3e-5/trainer_state.json +792 -0
111_128_e3_3e-5/training_args.bin +3 -0
111_128_e3_3e-5/vocab.json +0 -0

111_128_e3_3e-5/adapter_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "ibm-granite/granite-3.3-8b-instruct",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 256,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 128,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "q_proj",
+    "v_proj",
+    "up_proj",
+    "down_proj",
+    "gate_proj",
+    "o_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

111_128_e3_3e-5/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6bce77d98c92737ad2637763c0c1342ac90297f33b8618d73d489203335de9c1
+size 791751704

111_128_e3_3e-5/added_tokens.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "<|end_of_cite|>": 49156,
+  "<|end_of_plugin|>": 49158,
+  "<|end_of_role|>": 49153,
+  "<|start_of_cite|>": 49155,
+  "<|start_of_plugin|>": 49157,
+  "<|start_of_role|>": 49152,
+  "<|tool_call|>": 49154
+}

111_128_e3_3e-5/all_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 3.0,
+    "total_flos": 6.759267158426911e+17,
+    "train_loss": 0.35524007510151057,
+    "train_runtime": 332.4898,
+    "train_samples": 5712,
+    "train_samples_per_second": 51.538,
+    "train_steps_per_second": 1.615
+}

111_128_e3_3e-5/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,62 @@

+{# Alias tools -> available_tools #}
+{%- if tools and not available_tools -%}
+    {%- set available_tools = tools -%}
+{%- endif -%}
+{%- if messages[0]['role'] == 'system' %}
+     {%- set system_message = messages[0]['content'] %}
+     {%- set loop_messages = messages[1:] %}
+ {%- else %}
+     {%- set system_message = "Knowledge Cutoff Date: April 2024.
+Today's Date: " + strftime_now('%B %d, %Y') + ".
+You are Granite, developed by IBM." %}
+     {%- if available_tools and documents %}
+         {%- set system_message = system_message + " You are a helpful assistant with access to the following tools. When a tool is required to answer the user's query, respond only with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request.
+Write the response to the user's input by strictly aligning with the facts in the provided documents. If the information needed to answer the question is not available in the documents, inform the user that the question cannot be answered based on the available data." %}
+     {%- elif available_tools %}
+         {%- set system_message = system_message + " You are a helpful assistant with access to the following tools. When a tool is required to answer the user's query, respond only with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request." %}
+     {%- elif documents %}
+         {%- set system_message = system_message + " Write the response to the user's input by strictly aligning with the facts in the provided documents. If the information needed to answer the question is not available in the documents, inform the user that the question cannot be answered based on the available data." %}
+    {%- elif thinking %}
+    {%- set system_message = system_message + " You are a helpful AI assistant.
+Respond to every user query in a comprehensive and detailed way. You can write down your thoughts and reasoning process before responding. In the thought process, engage in a comprehensive cycle of analysis, summarization, exploration, reassessment, reflection, backtracing, and iteration to develop well-considered thinking process. In the response section, based on various attempts, explorations, and reflections from the thoughts section, systematically present the final solution that you deem correct. The response should summarize the thought process. Write your thoughts between <think></think> and write your response between <response></response> for each user query." %}
+     {%- else %}
+         {%- set system_message = system_message + " You are a helpful AI assistant." %}
+     {%- endif %}
+     {%- if 'citations' in controls and documents %}
+         {%- set system_message = system_message + '
+Use the symbols <|start_of_cite|> and <|end_of_cite|> to indicate when a fact comes from a document in the search result, e.g <|start_of_cite|> {document_id: 1}my fact <|end_of_cite|> for a fact from document 1. Afterwards, list all the citations with their corresponding documents in an ordered list.' %}
+     {%- endif %}
+     {%- if 'hallucinations' in controls and documents %}
+         {%- set system_message = system_message + '
+Finally, after the response is written, include a numbered list of sentences from the response with a corresponding risk value that are hallucinated and not based in the documents.' %}
+     {%- endif %}
+     {%- set loop_messages = messages %}
+ {%- endif %}
+ {{- '<|start_of_role|>system<|end_of_role|>' + system_message + '<|end_of_text|>
+' }}
+ {%- if available_tools %}
+     {{- '<|start_of_role|>available_tools<|end_of_role|>' }}
+     {{- available_tools | tojson(indent=4) }}
+     {{- '<|end_of_text|>
+' }}
+ {%- endif %}
+ {%- if documents %}
+     {%- for document in documents %}
+         {{- '<|start_of_role|>document {"document_id": "' + document['doc_id'] | string + '"}<|end_of_role|>
+' }}
+         {{- document['text'] }}
+         {{- '<|end_of_text|>
+' }}
+              {%- endfor %}
+ {%- endif %}
+ {%- for message in loop_messages %}
+     {{- '<|start_of_role|>' + message['role'] + '<|end_of_role|>' + message['content'] + '<|end_of_text|>
+' }}
+     {%- if loop.last and add_generation_prompt %}
+         {{- '<|start_of_role|>assistant' }}
+             {%- if controls %}
+                 {{- ' ' + controls | tojson()}}
+             {%- endif %}
+         {{- '<|end_of_role|>' }}
+     {%- endif %}
+ {%- endfor %}

111_128_e3_3e-5/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "architectures": [
+    "GraniteForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attention_multiplier": 0.0078125,
+  "bos_token_id": 0,
+  "embedding_multiplier": 12.0,
+  "eos_token_id": 0,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12800,
+  "logits_scaling": 16.0,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "granite",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 40,
+  "num_key_value_heads": 8,
+  "pad_token_id": 0,
+  "residual_multiplier": 0.22,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000000.0,
+  "tie_word_embeddings": true,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.52.4",
+  "use_cache": true,
+  "vocab_size": 49159
+}

111_128_e3_3e-5/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

111_128_e3_3e-5/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "additional_special_tokens": [
+    "<|start_of_role|>",
+    "<|end_of_role|>",
+    "<|tool_call|>",
+    "<|start_of_cite|>",
+    "<|end_of_cite|>",
+    "<|start_of_plugin|>",
+    "<|end_of_plugin|>"
+  ],
+  "bos_token": {
+    "content": "<|end_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|end_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|end_of_plugin|>",
+  "unk_token": {
+    "content": "<|end_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

111_128_e3_3e-5/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

111_128_e3_3e-5/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,234 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<|end_of_text|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<fim_prefix>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<fim_middle>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<fim_suffix>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<fim_pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "5": {
+      "content": "<filename>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "6": {
+      "content": "<gh_stars>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "7": {
+      "content": "<issue_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "8": {
+      "content": "<issue_comment>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "9": {
+      "content": "<issue_closed>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "10": {
+      "content": "<jupyter_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "11": {
+      "content": "<jupyter_text>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "12": {
+      "content": "<jupyter_code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "13": {
+      "content": "<jupyter_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "14": {
+      "content": "<empty_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "15": {
+      "content": "<commit_before>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "16": {
+      "content": "<commit_msg>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "17": {
+      "content": "<commit_after>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "18": {
+      "content": "<reponame>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49152": {
+      "content": "<|start_of_role|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49153": {
+      "content": "<|end_of_role|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49154": {
+      "content": "<|tool_call|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49155": {
+      "content": "<|start_of_cite|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49156": {
+      "content": "<|end_of_cite|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49157": {
+      "content": "<|start_of_plugin|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49158": {
+      "content": "<|end_of_plugin|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|start_of_role|>",
+    "<|end_of_role|>",
+    "<|tool_call|>",
+    "<|start_of_cite|>",
+    "<|end_of_cite|>",
+    "<|start_of_plugin|>",
+    "<|end_of_plugin|>"
+  ],
+  "bos_token": "<|end_of_text|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|end_of_text|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 8192,
+  "pad_token": "<|end_of_plugin|>",
+  "padding_side": "left",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|end_of_text|>",
+  "vocab_size": 49152
+}

111_128_e3_3e-5/train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 3.0,
+    "total_flos": 6.759267158426911e+17,
+    "train_loss": 0.35524007510151057,
+    "train_runtime": 332.4898,
+    "train_samples": 5712,
+    "train_samples_per_second": 51.538,
+    "train_steps_per_second": 1.615
+}

111_128_e3_3e-5/trainer_state.json ADDED Viewed

	@@ -0,0 +1,792 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 537,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.028011204481792718,
+      "grad_norm": 2.40775990486145,
+      "learning_rate": 4.444444444444444e-06,
+      "loss": 1.4581,
+      "step": 5
+    },
+    {
+      "epoch": 0.056022408963585436,
+      "grad_norm": 1.115090250968933,
+      "learning_rate": 9.999999999999999e-06,
+      "loss": 1.4399,
+      "step": 10
+    },
+    {
+      "epoch": 0.08403361344537816,
+      "grad_norm": 0.5162553787231445,
+      "learning_rate": 1.5555555555555555e-05,
+      "loss": 1.344,
+      "step": 15
+    },
+    {
+      "epoch": 0.11204481792717087,
+      "grad_norm": 0.4640718996524811,
+      "learning_rate": 2.111111111111111e-05,
+      "loss": 1.3241,
+      "step": 20
+    },
+    {
+      "epoch": 0.1400560224089636,
+      "grad_norm": 0.49681198596954346,
+      "learning_rate": 2.6666666666666667e-05,
+      "loss": 1.2975,
+      "step": 25
+    },
+    {
+      "epoch": 0.16806722689075632,
+      "grad_norm": 0.46783947944641113,
+      "learning_rate": 2.999886165172246e-05,
+      "loss": 1.2227,
+      "step": 30
+    },
+    {
+      "epoch": 0.19607843137254902,
+      "grad_norm": 0.44543421268463135,
+      "learning_rate": 2.99860572177674e-05,
+      "loss": 1.2471,
+      "step": 35
+    },
+    {
+      "epoch": 0.22408963585434175,
+      "grad_norm": 0.4427475035190582,
+      "learning_rate": 2.9959037600786822e-05,
+      "loss": 1.1544,
+      "step": 40
+    },
+    {
+      "epoch": 0.25210084033613445,
+      "grad_norm": 0.4466404318809509,
+      "learning_rate": 2.99178284305241e-05,
+      "loss": 1.1098,
+      "step": 45
+    },
+    {
+      "epoch": 0.2801120448179272,
+      "grad_norm": 0.6529573202133179,
+      "learning_rate": 2.9862468796373404e-05,
+      "loss": 1.1062,
+      "step": 50
+    },
+    {
+      "epoch": 0.3081232492997199,
+      "grad_norm": 0.5325852036476135,
+      "learning_rate": 2.9793011210301036e-05,
+      "loss": 1.0315,
+      "step": 55
+    },
+    {
+      "epoch": 0.33613445378151263,
+      "grad_norm": 0.5306492447853088,
+      "learning_rate": 2.9709521557034668e-05,
+      "loss": 0.9935,
+      "step": 60
+    },
+    {
+      "epoch": 0.3641456582633053,
+      "grad_norm": 0.5105966925621033,
+      "learning_rate": 2.9612079031567654e-05,
+      "loss": 0.9316,
+      "step": 65
+    },
+    {
+      "epoch": 0.39215686274509803,
+      "grad_norm": 0.6060460209846497,
+      "learning_rate": 2.9500776064037813e-05,
+      "loss": 0.8918,
+      "step": 70
+    },
+    {
+      "epoch": 0.42016806722689076,
+      "grad_norm": 0.5678498148918152,
+      "learning_rate": 2.9375718232051835e-05,
+      "loss": 0.8628,
+      "step": 75
+    },
+    {
+      "epoch": 0.4481792717086835,
+      "grad_norm": 0.6445951461791992,
+      "learning_rate": 2.923702416053852e-05,
+      "loss": 0.8649,
+      "step": 80
+    },
+    {
+      "epoch": 0.47619047619047616,
+      "grad_norm": 0.6435570120811462,
+      "learning_rate": 2.908482540922585e-05,
+      "loss": 0.7912,
+      "step": 85
+    },
+    {
+      "epoch": 0.5042016806722689,
+      "grad_norm": 0.6712879538536072,
+      "learning_rate": 2.891926634784862e-05,
+      "loss": 0.7608,
+      "step": 90
+    },
+    {
+      "epoch": 0.5322128851540616,
+      "grad_norm": 0.868604302406311,
+      "learning_rate": 2.8740504019205006e-05,
+      "loss": 0.7475,
+      "step": 95
+    },
+    {
+      "epoch": 0.5602240896358543,
+      "grad_norm": 0.8204479217529297,
+      "learning_rate": 2.8548707990191933e-05,
+      "loss": 0.6944,
+      "step": 100
+    },
+    {
+      "epoch": 0.5882352941176471,
+      "grad_norm": 0.7384553551673889,
+      "learning_rate": 2.8344060190960646e-05,
+      "loss": 0.6628,
+      "step": 105
+    },
+    {
+      "epoch": 0.6162464985994398,
+      "grad_norm": 0.8601762652397156,
+      "learning_rate": 2.812675474234489e-05,
+      "loss": 0.7067,
+      "step": 110
+    },
+    {
+      "epoch": 0.6442577030812325,
+      "grad_norm": 0.8940423727035522,
+      "learning_rate": 2.7896997771725588e-05,
+      "loss": 0.6481,
+      "step": 115
+    },
+    {
+      "epoch": 0.6722689075630253,
+      "grad_norm": 0.9656548500061035,
+      "learning_rate": 2.76550072175065e-05,
+      "loss": 0.5928,
+      "step": 120
+    },
+    {
+      "epoch": 0.7002801120448179,
+      "grad_norm": 0.81816166639328,
+      "learning_rate": 2.7401012622386454e-05,
+      "loss": 0.5655,
+      "step": 125
+    },
+    {
+      "epoch": 0.7282913165266106,
+      "grad_norm": 0.9374477863311768,
+      "learning_rate": 2.7135254915624213e-05,
+      "loss": 0.5516,
+      "step": 130
+    },
+    {
+      "epoch": 0.7563025210084033,
+      "grad_norm": 0.9610002636909485,
+      "learning_rate": 2.6857986184502452e-05,
+      "loss": 0.5348,
+      "step": 135
+    },
+    {
+      "epoch": 0.7843137254901961,
+      "grad_norm": 0.8842208981513977,
+      "learning_rate": 2.6569469435207712e-05,
+      "loss": 0.446,
+      "step": 140
+    },
+    {
+      "epoch": 0.8123249299719888,
+      "grad_norm": 0.9869241118431091,
+      "learning_rate": 2.6269978343353103e-05,
+      "loss": 0.4654,
+      "step": 145
+    },
+    {
+      "epoch": 0.8403361344537815,
+      "grad_norm": 0.9669775366783142,
+      "learning_rate": 2.5959796994380397e-05,
+      "loss": 0.4819,
+      "step": 150
+    },
+    {
+      "epoch": 0.8683473389355743,
+      "grad_norm": 1.0169594287872314,
+      "learning_rate": 2.5639219614087803e-05,
+      "loss": 0.4374,
+      "step": 155
+    },
+    {
+      "epoch": 0.896358543417367,
+      "grad_norm": 0.8904645442962646,
+      "learning_rate": 2.530855028953894e-05,
+      "loss": 0.4245,
+      "step": 160
+    },
+    {
+      "epoch": 0.9243697478991597,
+      "grad_norm": 1.15822434425354,
+      "learning_rate": 2.496810268061787e-05,
+      "loss": 0.3903,
+      "step": 165
+    },
+    {
+      "epoch": 0.9523809523809523,
+      "grad_norm": 0.9854434728622437,
+      "learning_rate": 2.4618199722503676e-05,
+      "loss": 0.4288,
+      "step": 170
+    },
+    {
+      "epoch": 0.9803921568627451,
+      "grad_norm": 0.9128867983818054,
+      "learning_rate": 2.4259173319346894e-05,
+      "loss": 0.3667,
+      "step": 175
+    },
+    {
+      "epoch": 1.0056022408963585,
+      "grad_norm": 1.0020173788070679,
+      "learning_rate": 2.3891364029438323e-05,
+      "loss": 0.3706,
+      "step": 180
+    },
+    {
+      "epoch": 1.0336134453781514,
+      "grad_norm": 0.8739632964134216,
+      "learning_rate": 2.351512074216885e-05,
+      "loss": 0.2701,
+      "step": 185
+    },
+    {
+      "epoch": 1.061624649859944,
+      "grad_norm": 1.0513161420822144,
+      "learning_rate": 2.313080034708674e-05,
+      "loss": 0.2862,
+      "step": 190
+    },
+    {
+      "epoch": 1.0896358543417366,
+      "grad_norm": 1.0746172666549683,
+      "learning_rate": 2.273876739536627e-05,
+      "loss": 0.2913,
+      "step": 195
+    },
+    {
+      "epoch": 1.1176470588235294,
+      "grad_norm": 0.9751669764518738,
+      "learning_rate": 2.2339393754008854e-05,
+      "loss": 0.2737,
+      "step": 200
+    },
+    {
+      "epoch": 1.145658263305322,
+      "grad_norm": 1.0398063659667969,
+      "learning_rate": 2.19330582531047e-05,
+      "loss": 0.2649,
+      "step": 205
+    },
+    {
+      "epoch": 1.173669467787115,
+      "grad_norm": 1.0206215381622314,
+      "learning_rate": 2.1520146326489476e-05,
+      "loss": 0.2523,
+      "step": 210
+    },
+    {
+      "epoch": 1.2016806722689075,
+      "grad_norm": 1.0185209512710571,
+      "learning_rate": 2.1101049646137008e-05,
+      "loss": 0.244,
+      "step": 215
+    },
+    {
+      "epoch": 1.2296918767507004,
+      "grad_norm": 1.115013837814331,
+      "learning_rate": 2.0676165750634656e-05,
+      "loss": 0.2023,
+      "step": 220
+    },
+    {
+      "epoch": 1.257703081232493,
+      "grad_norm": 0.9772208333015442,
+      "learning_rate": 2.0245897668093917e-05,
+      "loss": 0.2084,
+      "step": 225
+    },
+    {
+      "epoch": 1.2857142857142856,
+      "grad_norm": 1.1734644174575806,
+      "learning_rate": 1.9810653533853826e-05,
+      "loss": 0.2245,
+      "step": 230
+    },
+    {
+      "epoch": 1.3137254901960784,
+      "grad_norm": 1.0134533643722534,
+      "learning_rate": 1.937084620333987e-05,
+      "loss": 0.1965,
+      "step": 235
+    },
+    {
+      "epoch": 1.3417366946778713,
+      "grad_norm": 1.1958626508712769,
+      "learning_rate": 1.8926892860445607e-05,
+      "loss": 0.2101,
+      "step": 240
+    },
+    {
+      "epoch": 1.3697478991596639,
+      "grad_norm": 0.9363793134689331,
+      "learning_rate": 1.847921462180847e-05,
+      "loss": 0.2012,
+      "step": 245
+    },
+    {
+      "epoch": 1.3977591036414565,
+      "grad_norm": 1.2724140882492065,
+      "learning_rate": 1.8028236137355154e-05,
+      "loss": 0.2683,
+      "step": 250
+    },
+    {
+      "epoch": 1.4257703081232493,
+      "grad_norm": 1.1918549537658691,
+      "learning_rate": 1.7574385187495396e-05,
+      "loss": 0.2037,
+      "step": 255
+    },
+    {
+      "epoch": 1.453781512605042,
+      "grad_norm": 1.0699115991592407,
+      "learning_rate": 1.7118092277346372e-05,
+      "loss": 0.1685,
+      "step": 260
+    },
+    {
+      "epoch": 1.4817927170868348,
+      "grad_norm": 0.9984414577484131,
+      "learning_rate": 1.6659790228372512e-05,
+      "loss": 0.1755,
+      "step": 265
+    },
+    {
+      "epoch": 1.5098039215686274,
+      "grad_norm": 0.944294810295105,
+      "learning_rate": 1.6199913767828126e-05,
+      "loss": 0.1869,
+      "step": 270
+    },
+    {
+      "epoch": 1.53781512605042,
+      "grad_norm": 1.014652132987976,
+      "learning_rate": 1.5738899116392254e-05,
+      "loss": 0.15,
+      "step": 275
+    },
+    {
+      "epoch": 1.5658263305322129,
+      "grad_norm": 1.103360652923584,
+      "learning_rate": 1.5277183574386947e-05,
+      "loss": 0.1487,
+      "step": 280
+    },
+    {
+      "epoch": 1.5938375350140057,
+      "grad_norm": 1.0107719898223877,
+      "learning_rate": 1.4815205106971424e-05,
+      "loss": 0.1562,
+      "step": 285
+    },
+    {
+      "epoch": 1.6218487394957983,
+      "grad_norm": 0.8109914064407349,
+      "learning_rate": 1.435340192870557e-05,
+      "loss": 0.1544,
+      "step": 290
+    },
+    {
+      "epoch": 1.649859943977591,
+      "grad_norm": 1.027950644493103,
+      "learning_rate": 1.3892212087876892e-05,
+      "loss": 0.1331,
+      "step": 295
+    },
+    {
+      "epoch": 1.6778711484593838,
+      "grad_norm": 0.8135820627212524,
+      "learning_rate": 1.3432073050985201e-05,
+      "loss": 0.1423,
+      "step": 300
+    },
+    {
+      "epoch": 1.7058823529411766,
+      "grad_norm": 0.8551586270332336,
+      "learning_rate": 1.297342128777911e-05,
+      "loss": 0.1354,
+      "step": 305
+    },
+    {
+      "epoch": 1.7338935574229692,
+      "grad_norm": 1.0230919122695923,
+      "learning_rate": 1.251669185723805e-05,
+      "loss": 0.1489,
+      "step": 310
+    },
+    {
+      "epoch": 1.7619047619047619,
+      "grad_norm": 0.9129012227058411,
+      "learning_rate": 1.2062317994892499e-05,
+      "loss": 0.116,
+      "step": 315
+    },
+    {
+      "epoch": 1.7899159663865545,
+      "grad_norm": 0.8860085010528564,
+      "learning_rate": 1.1610730701873788e-05,
+      "loss": 0.1143,
+      "step": 320
+    },
+    {
+      "epoch": 1.8179271708683473,
+      "grad_norm": 0.8771934509277344,
+      "learning_rate": 1.1162358336083466e-05,
+      "loss": 0.1234,
+      "step": 325
+    },
+    {
+      "epoch": 1.8459383753501402,
+      "grad_norm": 0.8374683856964111,
+      "learning_rate": 1.071762620586987e-05,
+      "loss": 0.1141,
+      "step": 330
+    },
+    {
+      "epoch": 1.8739495798319328,
+      "grad_norm": 1.1901110410690308,
+      "learning_rate": 1.0276956166597384e-05,
+      "loss": 0.1218,
+      "step": 335
+    },
+    {
+      "epoch": 1.9019607843137254,
+      "grad_norm": 1.016089916229248,
+      "learning_rate": 9.840766220491078e-06,
+      "loss": 0.1205,
+      "step": 340
+    },
+    {
+      "epoch": 1.9299719887955182,
+      "grad_norm": 0.8016822934150696,
+      "learning_rate": 9.40947012013629e-06,
+      "loss": 0.1089,
+      "step": 345
+    },
+    {
+      "epoch": 1.957983193277311,
+      "grad_norm": 0.7022793889045715,
+      "learning_rate": 8.98347697600922e-06,
+      "loss": 0.1014,
+      "step": 350
+    },
+    {
+      "epoch": 1.9859943977591037,
+      "grad_norm": 0.9170655608177185,
+      "learning_rate": 8.563190868410867e-06,
+      "loss": 0.0942,
+      "step": 355
+    },
+    {
+      "epoch": 2.011204481792717,
+      "grad_norm": 0.6768052577972412,
+      "learning_rate": 8.149010464172392e-06,
+      "loss": 0.097,
+      "step": 360
+    },
+    {
+      "epoch": 2.0392156862745097,
+      "grad_norm": 0.6589996814727783,
+      "learning_rate": 7.741328638495477e-06,
+      "loss": 0.0776,
+      "step": 365
+    },
+    {
+      "epoch": 2.0672268907563027,
+      "grad_norm": 0.7865478992462158,
+      "learning_rate": 7.340532102286399e-06,
+      "loss": 0.0714,
+      "step": 370
+    },
+    {
+      "epoch": 2.0952380952380953,
+      "grad_norm": 0.7239094376564026,
+      "learning_rate": 6.947001035337299e-06,
+      "loss": 0.0815,
+      "step": 375
+    },
+    {
+      "epoch": 2.123249299719888,
+      "grad_norm": 0.6165900826454163,
+      "learning_rate": 6.561108725702653e-06,
+      "loss": 0.0703,
+      "step": 380
+    },
+    {
+      "epoch": 2.1512605042016806,
+      "grad_norm": 0.4964495301246643,
+      "learning_rate": 6.1832212156129045e-06,
+      "loss": 0.0698,
+      "step": 385
+    },
+    {
+      "epoch": 2.179271708683473,
+      "grad_norm": 0.71706622838974,
+      "learning_rate": 5.813696954261253e-06,
+      "loss": 0.0808,
+      "step": 390
+    },
+    {
+      "epoch": 2.2072829131652663,
+      "grad_norm": 0.6850592494010925,
+      "learning_rate": 5.452886457792882e-06,
+      "loss": 0.0782,
+      "step": 395
+    },
+    {
+      "epoch": 2.235294117647059,
+      "grad_norm": 0.5670154094696045,
+      "learning_rate": 5.101131976819165e-06,
+      "loss": 0.0764,
+      "step": 400
+    },
+    {
+      "epoch": 2.2633053221288515,
+      "grad_norm": 0.5384048819541931,
+      "learning_rate": 4.7587671717722184e-06,
+      "loss": 0.0745,
+      "step": 405
+    },
+    {
+      "epoch": 2.291316526610644,
+      "grad_norm": 0.5550390481948853,
+      "learning_rate": 4.426116796407794e-06,
+      "loss": 0.07,
+      "step": 410
+    },
+    {
+      "epoch": 2.3193277310924367,
+      "grad_norm": 0.5582753419876099,
+      "learning_rate": 4.10349638975664e-06,
+      "loss": 0.0721,
+      "step": 415
+    },
+    {
+      "epoch": 2.34733893557423,
+      "grad_norm": 0.6903561949729919,
+      "learning_rate": 3.791211976816634e-06,
+      "loss": 0.0708,
+      "step": 420
+    },
+    {
+      "epoch": 2.3753501400560224,
+      "grad_norm": 0.6835063099861145,
+      "learning_rate": 3.489559778269545e-06,
+      "loss": 0.0664,
+      "step": 425
+    },
+    {
+      "epoch": 2.403361344537815,
+      "grad_norm": 0.6259592771530151,
+      "learning_rate": 3.198825929497752e-06,
+      "loss": 0.0761,
+      "step": 430
+    },
+    {
+      "epoch": 2.431372549019608,
+      "grad_norm": 0.6236569285392761,
+      "learning_rate": 2.919286209167511e-06,
+      "loss": 0.0688,
+      "step": 435
+    },
+    {
+      "epoch": 2.4593837535014007,
+      "grad_norm": 0.5482921004295349,
+      "learning_rate": 2.6512057776361935e-06,
+      "loss": 0.0691,
+      "step": 440
+    },
+    {
+      "epoch": 2.4873949579831933,
+      "grad_norm": 0.5397653579711914,
+      "learning_rate": 2.394838925431611e-06,
+      "loss": 0.0599,
+      "step": 445
+    },
+    {
+      "epoch": 2.515406162464986,
+      "grad_norm": 0.5335206389427185,
+      "learning_rate": 2.1504288320420613e-06,
+      "loss": 0.0638,
+      "step": 450
+    },
+    {
+      "epoch": 2.5434173669467786,
+      "grad_norm": 0.5270382761955261,
+      "learning_rate": 1.9182073352458375e-06,
+      "loss": 0.0596,
+      "step": 455
+    },
+    {
+      "epoch": 2.571428571428571,
+      "grad_norm": 0.5109578371047974,
+      "learning_rate": 1.6983947111990717e-06,
+      "loss": 0.0774,
+      "step": 460
+    },
+    {
+      "epoch": 2.5994397759103642,
+      "grad_norm": 0.5798667669296265,
+      "learning_rate": 1.4911994654904404e-06,
+      "loss": 0.0611,
+      "step": 465
+    },
+    {
+      "epoch": 2.627450980392157,
+      "grad_norm": 0.6446117162704468,
+      "learning_rate": 1.2968181353609854e-06,
+      "loss": 0.0652,
+      "step": 470
+    },
+    {
+      "epoch": 2.6554621848739495,
+      "grad_norm": 0.5747822523117065,
+      "learning_rate": 1.115435103276657e-06,
+      "loss": 0.0677,
+      "step": 475
+    },
+    {
+      "epoch": 2.6834733893557425,
+      "grad_norm": 0.572148323059082,
+      "learning_rate": 9.472224220303427e-07,
+      "loss": 0.0547,
+      "step": 480
+    },
+    {
+      "epoch": 2.711484593837535,
+      "grad_norm": 0.4652992784976959,
+      "learning_rate": 7.923396515393966e-07,
+      "loss": 0.066,
+      "step": 485
+    },
+    {
+      "epoch": 2.7394957983193278,
+      "grad_norm": 0.6926423907279968,
+      "learning_rate": 6.509337074933891e-07,
+      "loss": 0.0592,
+      "step": 490
+    },
+    {
+      "epoch": 2.7675070028011204,
+      "grad_norm": 0.5990536212921143,
+      "learning_rate": 5.231387219956645e-07,
+      "loss": 0.0714,
+      "step": 495
+    },
+    {
+      "epoch": 2.795518207282913,
+      "grad_norm": 0.5518683791160583,
+      "learning_rate": 4.090759163309282e-07,
+      "loss": 0.0636,
+      "step": 500
+    },
+    {
+      "epoch": 2.8235294117647056,
+      "grad_norm": 0.5012083053588867,
+      "learning_rate": 3.088534859795117e-07,
+      "loss": 0.0572,
+      "step": 505
+    },
+    {
+      "epoch": 2.8515406162464987,
+      "grad_norm": 0.6362314820289612,
+      "learning_rate": 2.2256649798740204e-07,
+      "loss": 0.0682,
+      "step": 510
+    },
+    {
+      "epoch": 2.8795518207282913,
+      "grad_norm": 0.5269286036491394,
+      "learning_rate": 1.5029680078939567e-07,
+      "loss": 0.0679,
+      "step": 515
+    },
+    {
+      "epoch": 2.907563025210084,
+      "grad_norm": 0.5155965685844421,
+      "learning_rate": 9.211294657089587e-08,
+      "loss": 0.0582,
+      "step": 520
+    },
+    {
+      "epoch": 2.935574229691877,
+      "grad_norm": 0.4053967297077179,
+      "learning_rate": 4.807012624201468e-08,
+      "loss": 0.0531,
+      "step": 525
+    },
+    {
+      "epoch": 2.9635854341736696,
+      "grad_norm": 0.5257096886634827,
+      "learning_rate": 1.8210117085651902e-08,
+      "loss": 0.0696,
+      "step": 530
+    },
+    {
+      "epoch": 2.991596638655462,
+      "grad_norm": 0.5298425555229187,
+      "learning_rate": 2.561243129209356e-09,
+      "loss": 0.062,
+      "step": 535
+    },
+    {
+      "epoch": 3.0,
+      "step": 537,
+      "total_flos": 6.759267158426911e+17,
+      "train_loss": 0.35524007510151057,
+      "train_runtime": 332.4898,
+      "train_samples_per_second": 51.538,
+      "train_steps_per_second": 1.615
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 537,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 20000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 6.759267158426911e+17,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

111_128_e3_3e-5/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ca96e868ae4f4335ed46635ef5a94ef5af53fa05f6b3ac39137e242cfb64e51c
+size 8273

111_128_e3_3e-5/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff