Upload folder using huggingface_hub

Browse files

Files changed (14) hide show

26_128_e3_3e-5/adapter_config.json +39 -0
26_128_e3_3e-5/adapter_model.safetensors +3 -0
26_128_e3_3e-5/added_tokens.json +9 -0
26_128_e3_3e-5/all_results.json +9 -0
26_128_e3_3e-5/chat_template.jinja +62 -0
26_128_e3_3e-5/config.json +32 -0
26_128_e3_3e-5/merges.txt +0 -0
26_128_e3_3e-5/special_tokens_map.json +33 -0
26_128_e3_3e-5/tokenizer.json +0 -0
26_128_e3_3e-5/tokenizer_config.json +234 -0
26_128_e3_3e-5/train_results.json +9 -0
26_128_e3_3e-5/trainer_state.json +953 -0
26_128_e3_3e-5/training_args.bin +3 -0
26_128_e3_3e-5/vocab.json +0 -0

26_128_e3_3e-5/adapter_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "ibm-granite/granite-3.3-8b-instruct",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 256,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 128,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "up_proj",
+    "k_proj",
+    "o_proj",
+    "q_proj",
+    "down_proj",
+    "gate_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

26_128_e3_3e-5/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c8687f657dc6f0c946ba4fffc153d5152e8019a74618caf348acfec567173c74
+size 791751704

26_128_e3_3e-5/added_tokens.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "<|end_of_cite|>": 49156,
+  "<|end_of_plugin|>": 49158,
+  "<|end_of_role|>": 49153,
+  "<|start_of_cite|>": 49155,
+  "<|start_of_plugin|>": 49157,
+  "<|start_of_role|>": 49152,
+  "<|tool_call|>": 49154
+}

26_128_e3_3e-5/all_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 3.0,
+    "total_flos": 9.398740177230234e+17,
+    "train_loss": 0.5625836603138425,
+    "train_runtime": 455.3956,
+    "train_samples": 6963,
+    "train_samples_per_second": 45.87,
+    "train_steps_per_second": 1.436
+}

26_128_e3_3e-5/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,62 @@

+{# Alias tools -> available_tools #}
+{%- if tools and not available_tools -%}
+    {%- set available_tools = tools -%}
+{%- endif -%}
+{%- if messages[0]['role'] == 'system' %}
+     {%- set system_message = messages[0]['content'] %}
+     {%- set loop_messages = messages[1:] %}
+ {%- else %}
+     {%- set system_message = "Knowledge Cutoff Date: April 2024.
+Today's Date: " + strftime_now('%B %d, %Y') + ".
+You are Granite, developed by IBM." %}
+     {%- if available_tools and documents %}
+         {%- set system_message = system_message + " You are a helpful assistant with access to the following tools. When a tool is required to answer the user's query, respond only with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request.
+Write the response to the user's input by strictly aligning with the facts in the provided documents. If the information needed to answer the question is not available in the documents, inform the user that the question cannot be answered based on the available data." %}
+     {%- elif available_tools %}
+         {%- set system_message = system_message + " You are a helpful assistant with access to the following tools. When a tool is required to answer the user's query, respond only with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request." %}
+     {%- elif documents %}
+         {%- set system_message = system_message + " Write the response to the user's input by strictly aligning with the facts in the provided documents. If the information needed to answer the question is not available in the documents, inform the user that the question cannot be answered based on the available data." %}
+    {%- elif thinking %}
+    {%- set system_message = system_message + " You are a helpful AI assistant.
+Respond to every user query in a comprehensive and detailed way. You can write down your thoughts and reasoning process before responding. In the thought process, engage in a comprehensive cycle of analysis, summarization, exploration, reassessment, reflection, backtracing, and iteration to develop well-considered thinking process. In the response section, based on various attempts, explorations, and reflections from the thoughts section, systematically present the final solution that you deem correct. The response should summarize the thought process. Write your thoughts between <think></think> and write your response between <response></response> for each user query." %}
+     {%- else %}
+         {%- set system_message = system_message + " You are a helpful AI assistant." %}
+     {%- endif %}
+     {%- if 'citations' in controls and documents %}
+         {%- set system_message = system_message + '
+Use the symbols <|start_of_cite|> and <|end_of_cite|> to indicate when a fact comes from a document in the search result, e.g <|start_of_cite|> {document_id: 1}my fact <|end_of_cite|> for a fact from document 1. Afterwards, list all the citations with their corresponding documents in an ordered list.' %}
+     {%- endif %}
+     {%- if 'hallucinations' in controls and documents %}
+         {%- set system_message = system_message + '
+Finally, after the response is written, include a numbered list of sentences from the response with a corresponding risk value that are hallucinated and not based in the documents.' %}
+     {%- endif %}
+     {%- set loop_messages = messages %}
+ {%- endif %}
+ {{- '<|start_of_role|>system<|end_of_role|>' + system_message + '<|end_of_text|>
+' }}
+ {%- if available_tools %}
+     {{- '<|start_of_role|>available_tools<|end_of_role|>' }}
+     {{- available_tools | tojson(indent=4) }}
+     {{- '<|end_of_text|>
+' }}
+ {%- endif %}
+ {%- if documents %}
+     {%- for document in documents %}
+         {{- '<|start_of_role|>document {"document_id": "' + document['doc_id'] | string + '"}<|end_of_role|>
+' }}
+         {{- document['text'] }}
+         {{- '<|end_of_text|>
+' }}
+              {%- endfor %}
+ {%- endif %}
+ {%- for message in loop_messages %}
+     {{- '<|start_of_role|>' + message['role'] + '<|end_of_role|>' + message['content'] + '<|end_of_text|>
+' }}
+     {%- if loop.last and add_generation_prompt %}
+         {{- '<|start_of_role|>assistant' }}
+             {%- if controls %}
+                 {{- ' ' + controls | tojson()}}
+             {%- endif %}
+         {{- '<|end_of_role|>' }}
+     {%- endif %}
+ {%- endfor %}

26_128_e3_3e-5/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "architectures": [
+    "GraniteForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attention_multiplier": 0.0078125,
+  "bos_token_id": 0,
+  "embedding_multiplier": 12.0,
+  "eos_token_id": 0,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12800,
+  "logits_scaling": 16.0,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "granite",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 40,
+  "num_key_value_heads": 8,
+  "pad_token_id": 0,
+  "residual_multiplier": 0.22,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000000.0,
+  "tie_word_embeddings": true,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.52.4",
+  "use_cache": true,
+  "vocab_size": 49159
+}

26_128_e3_3e-5/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

26_128_e3_3e-5/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "additional_special_tokens": [
+    "<|start_of_role|>",
+    "<|end_of_role|>",
+    "<|tool_call|>",
+    "<|start_of_cite|>",
+    "<|end_of_cite|>",
+    "<|start_of_plugin|>",
+    "<|end_of_plugin|>"
+  ],
+  "bos_token": {
+    "content": "<|end_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|end_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|end_of_plugin|>",
+  "unk_token": {
+    "content": "<|end_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

26_128_e3_3e-5/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

26_128_e3_3e-5/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,234 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<|end_of_text|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<fim_prefix>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<fim_middle>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<fim_suffix>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<fim_pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "5": {
+      "content": "<filename>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "6": {
+      "content": "<gh_stars>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "7": {
+      "content": "<issue_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "8": {
+      "content": "<issue_comment>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "9": {
+      "content": "<issue_closed>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "10": {
+      "content": "<jupyter_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "11": {
+      "content": "<jupyter_text>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "12": {
+      "content": "<jupyter_code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "13": {
+      "content": "<jupyter_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "14": {
+      "content": "<empty_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "15": {
+      "content": "<commit_before>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "16": {
+      "content": "<commit_msg>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "17": {
+      "content": "<commit_after>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "18": {
+      "content": "<reponame>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49152": {
+      "content": "<|start_of_role|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49153": {
+      "content": "<|end_of_role|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49154": {
+      "content": "<|tool_call|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49155": {
+      "content": "<|start_of_cite|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49156": {
+      "content": "<|end_of_cite|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49157": {
+      "content": "<|start_of_plugin|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49158": {
+      "content": "<|end_of_plugin|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|start_of_role|>",
+    "<|end_of_role|>",
+    "<|tool_call|>",
+    "<|start_of_cite|>",
+    "<|end_of_cite|>",
+    "<|start_of_plugin|>",
+    "<|end_of_plugin|>"
+  ],
+  "bos_token": "<|end_of_text|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|end_of_text|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 8192,
+  "pad_token": "<|end_of_plugin|>",
+  "padding_side": "left",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|end_of_text|>",
+  "vocab_size": 49152
+}

26_128_e3_3e-5/train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 3.0,
+    "total_flos": 9.398740177230234e+17,
+    "train_loss": 0.5625836603138425,
+    "train_runtime": 455.3956,
+    "train_samples": 6963,
+    "train_samples_per_second": 45.87,
+    "train_steps_per_second": 1.436
+}

26_128_e3_3e-5/trainer_state.json ADDED Viewed

	@@ -0,0 +1,953 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 654,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.022935779816513763,
+      "grad_norm": 3.154066324234009,
+      "learning_rate": 3.6363636363636366e-06,
+      "loss": 1.5926,
+      "step": 5
+    },
+    {
+      "epoch": 0.045871559633027525,
+      "grad_norm": 1.382306694984436,
+      "learning_rate": 8.181818181818181e-06,
+      "loss": 1.4573,
+      "step": 10
+    },
+    {
+      "epoch": 0.06880733944954129,
+      "grad_norm": 0.6333721280097961,
+      "learning_rate": 1.2727272727272728e-05,
+      "loss": 1.4168,
+      "step": 15
+    },
+    {
+      "epoch": 0.09174311926605505,
+      "grad_norm": 0.5116958618164062,
+      "learning_rate": 1.7272727272727274e-05,
+      "loss": 1.3665,
+      "step": 20
+    },
+    {
+      "epoch": 0.11467889908256881,
+      "grad_norm": 0.4115943908691406,
+      "learning_rate": 2.1818181818181818e-05,
+      "loss": 1.3116,
+      "step": 25
+    },
+    {
+      "epoch": 0.13761467889908258,
+      "grad_norm": 0.36881327629089355,
+      "learning_rate": 2.6363636363636365e-05,
+      "loss": 1.2799,
+      "step": 30
+    },
+    {
+      "epoch": 0.16055045871559634,
+      "grad_norm": 0.3722558617591858,
+      "learning_rate": 2.9999808054965268e-05,
+      "loss": 1.3145,
+      "step": 35
+    },
+    {
+      "epoch": 0.1834862385321101,
+      "grad_norm": 0.36850863695144653,
+      "learning_rate": 2.999309049453608e-05,
+      "loss": 1.2359,
+      "step": 40
+    },
+    {
+      "epoch": 0.20642201834862386,
+      "grad_norm": 0.3976966142654419,
+      "learning_rate": 2.9976780594191328e-05,
+      "loss": 1.2522,
+      "step": 45
+    },
+    {
+      "epoch": 0.22935779816513763,
+      "grad_norm": 0.4128827452659607,
+      "learning_rate": 2.9950888788744855e-05,
+      "loss": 1.1857,
+      "step": 50
+    },
+    {
+      "epoch": 0.25229357798165136,
+      "grad_norm": 0.3990103304386139,
+      "learning_rate": 2.9915431643360378e-05,
+      "loss": 1.1394,
+      "step": 55
+    },
+    {
+      "epoch": 0.27522935779816515,
+      "grad_norm": 0.3858880400657654,
+      "learning_rate": 2.9870431842953412e-05,
+      "loss": 1.1781,
+      "step": 60
+    },
+    {
+      "epoch": 0.2981651376146789,
+      "grad_norm": 0.40772372484207153,
+      "learning_rate": 2.9815918177677778e-05,
+      "loss": 1.1378,
+      "step": 65
+    },
+    {
+      "epoch": 0.3211009174311927,
+      "grad_norm": 0.4397253692150116,
+      "learning_rate": 2.9751925524506132e-05,
+      "loss": 1.0961,
+      "step": 70
+    },
+    {
+      "epoch": 0.3440366972477064,
+      "grad_norm": 0.4583319127559662,
+      "learning_rate": 2.9678494824916264e-05,
+      "loss": 1.0828,
+      "step": 75
+    },
+    {
+      "epoch": 0.3669724770642202,
+      "grad_norm": 0.4776696562767029,
+      "learning_rate": 2.959567305869736e-05,
+      "loss": 1.0264,
+      "step": 80
+    },
+    {
+      "epoch": 0.38990825688073394,
+      "grad_norm": 0.5306005477905273,
+      "learning_rate": 2.950351321389309e-05,
+      "loss": 1.0082,
+      "step": 85
+    },
+    {
+      "epoch": 0.41284403669724773,
+      "grad_norm": 0.48584309220314026,
+      "learning_rate": 2.9402074252900728e-05,
+      "loss": 1.0188,
+      "step": 90
+    },
+    {
+      "epoch": 0.43577981651376146,
+      "grad_norm": 0.5304965376853943,
+      "learning_rate": 2.929142107474791e-05,
+      "loss": 0.9997,
+      "step": 95
+    },
+    {
+      "epoch": 0.45871559633027525,
+      "grad_norm": 0.49652519822120667,
+      "learning_rate": 2.9171624473571312e-05,
+      "loss": 0.9966,
+      "step": 100
+    },
+    {
+      "epoch": 0.481651376146789,
+      "grad_norm": 0.5302298665046692,
+      "learning_rate": 2.904276109332367e-05,
+      "loss": 0.9857,
+      "step": 105
+    },
+    {
+      "epoch": 0.5045871559633027,
+      "grad_norm": 0.5599017143249512,
+      "learning_rate": 2.8904913378738203e-05,
+      "loss": 0.9219,
+      "step": 110
+    },
+    {
+      "epoch": 0.5275229357798165,
+      "grad_norm": 0.5480442047119141,
+      "learning_rate": 2.8758169522581795e-05,
+      "loss": 0.9678,
+      "step": 115
+    },
+    {
+      "epoch": 0.5504587155963303,
+      "grad_norm": 0.6150339245796204,
+      "learning_rate": 2.860262340923068e-05,
+      "loss": 0.8796,
+      "step": 120
+    },
+    {
+      "epoch": 0.573394495412844,
+      "grad_norm": 0.624284029006958,
+      "learning_rate": 2.8438374554604693e-05,
+      "loss": 0.8916,
+      "step": 125
+    },
+    {
+      "epoch": 0.5963302752293578,
+      "grad_norm": 0.7007209658622742,
+      "learning_rate": 2.8265528042498586e-05,
+      "loss": 0.8468,
+      "step": 130
+    },
+    {
+      "epoch": 0.6192660550458715,
+      "grad_norm": 0.7358646392822266,
+      "learning_rate": 2.808419445735111e-05,
+      "loss": 0.8617,
+      "step": 135
+    },
+    {
+      "epoch": 0.6422018348623854,
+      "grad_norm": 0.6849257349967957,
+      "learning_rate": 2.789448981349483e-05,
+      "loss": 0.8376,
+      "step": 140
+    },
+    {
+      "epoch": 0.6651376146788991,
+      "grad_norm": 0.6804253458976746,
+      "learning_rate": 2.7696535480932016e-05,
+      "loss": 0.786,
+      "step": 145
+    },
+    {
+      "epoch": 0.6880733944954128,
+      "grad_norm": 0.6508814692497253,
+      "learning_rate": 2.7490458107684067e-05,
+      "loss": 0.7706,
+      "step": 150
+    },
+    {
+      "epoch": 0.7110091743119266,
+      "grad_norm": 0.687238872051239,
+      "learning_rate": 2.727638953876411e-05,
+      "loss": 0.7694,
+      "step": 155
+    },
+    {
+      "epoch": 0.7339449541284404,
+      "grad_norm": 0.786554217338562,
+      "learning_rate": 2.7054466731824673e-05,
+      "loss": 0.7852,
+      "step": 160
+    },
+    {
+      "epoch": 0.7568807339449541,
+      "grad_norm": 0.7580022811889648,
+      "learning_rate": 2.6824831669534373e-05,
+      "loss": 0.724,
+      "step": 165
+    },
+    {
+      "epoch": 0.7798165137614679,
+      "grad_norm": 0.7300242781639099,
+      "learning_rate": 2.6587631268739665e-05,
+      "loss": 0.7519,
+      "step": 170
+    },
+    {
+      "epoch": 0.8027522935779816,
+      "grad_norm": 0.7774202823638916,
+      "learning_rate": 2.634301728646978e-05,
+      "loss": 0.7385,
+      "step": 175
+    },
+    {
+      "epoch": 0.8256880733944955,
+      "grad_norm": 0.8375148773193359,
+      "learning_rate": 2.6091146222845015e-05,
+      "loss": 0.6821,
+      "step": 180
+    },
+    {
+      "epoch": 0.8486238532110092,
+      "grad_norm": 0.8503871560096741,
+      "learning_rate": 2.5832179220950454e-05,
+      "loss": 0.7309,
+      "step": 185
+    },
+    {
+      "epoch": 0.8715596330275229,
+      "grad_norm": 0.7620130181312561,
+      "learning_rate": 2.5566281963739187e-05,
+      "loss": 0.6918,
+      "step": 190
+    },
+    {
+      "epoch": 0.8944954128440367,
+      "grad_norm": 0.8705322742462158,
+      "learning_rate": 2.5293624568031008e-05,
+      "loss": 0.6933,
+      "step": 195
+    },
+    {
+      "epoch": 0.9174311926605505,
+      "grad_norm": 0.8482771515846252,
+      "learning_rate": 2.501438147567438e-05,
+      "loss": 0.643,
+      "step": 200
+    },
+    {
+      "epoch": 0.9403669724770642,
+      "grad_norm": 0.829285204410553,
+      "learning_rate": 2.4728731341941343e-05,
+      "loss": 0.6769,
+      "step": 205
+    },
+    {
+      "epoch": 0.963302752293578,
+      "grad_norm": 0.8753135204315186,
+      "learning_rate": 2.4436856921226704e-05,
+      "loss": 0.666,
+      "step": 210
+    },
+    {
+      "epoch": 0.9862385321100917,
+      "grad_norm": 0.8907920718193054,
+      "learning_rate": 2.4138944950124715e-05,
+      "loss": 0.6832,
+      "step": 215
+    },
+    {
+      "epoch": 1.0091743119266054,
+      "grad_norm": 0.8794606924057007,
+      "learning_rate": 2.383518602795796e-05,
+      "loss": 0.6227,
+      "step": 220
+    },
+    {
+      "epoch": 1.0321100917431192,
+      "grad_norm": 0.8743161559104919,
+      "learning_rate": 2.352577449483496e-05,
+      "loss": 0.5444,
+      "step": 225
+    },
+    {
+      "epoch": 1.0550458715596331,
+      "grad_norm": 1.0772712230682373,
+      "learning_rate": 2.3210908307314477e-05,
+      "loss": 0.5514,
+      "step": 230
+    },
+    {
+      "epoch": 1.0779816513761469,
+      "grad_norm": 1.07927405834198,
+      "learning_rate": 2.2890788911756068e-05,
+      "loss": 0.5402,
+      "step": 235
+    },
+    {
+      "epoch": 1.1009174311926606,
+      "grad_norm": 1.0382741689682007,
+      "learning_rate": 2.2565621115437916e-05,
+      "loss": 0.5429,
+      "step": 240
+    },
+    {
+      "epoch": 1.1238532110091743,
+      "grad_norm": 0.9207555651664734,
+      "learning_rate": 2.223561295552441e-05,
+      "loss": 0.523,
+      "step": 245
+    },
+    {
+      "epoch": 1.146788990825688,
+      "grad_norm": 1.0322675704956055,
+      "learning_rate": 2.1900975565967284e-05,
+      "loss": 0.5297,
+      "step": 250
+    },
+    {
+      "epoch": 1.1697247706422018,
+      "grad_norm": 1.0103211402893066,
+      "learning_rate": 2.156192304242548e-05,
+      "loss": 0.5135,
+      "step": 255
+    },
+    {
+      "epoch": 1.1926605504587156,
+      "grad_norm": 1.0103763341903687,
+      "learning_rate": 2.121867230529018e-05,
+      "loss": 0.4922,
+      "step": 260
+    },
+    {
+      "epoch": 1.2155963302752293,
+      "grad_norm": 1.0744143724441528,
+      "learning_rate": 2.087144296090259e-05,
+      "loss": 0.55,
+      "step": 265
+    },
+    {
+      "epoch": 1.238532110091743,
+      "grad_norm": 1.1584802865982056,
+      "learning_rate": 2.052045716105331e-05,
+      "loss": 0.4761,
+      "step": 270
+    },
+    {
+      "epoch": 1.261467889908257,
+      "grad_norm": 1.0127031803131104,
+      "learning_rate": 2.016593946085317e-05,
+      "loss": 0.4475,
+      "step": 275
+    },
+    {
+      "epoch": 1.2844036697247707,
+      "grad_norm": 1.0740467309951782,
+      "learning_rate": 1.980811667506646e-05,
+      "loss": 0.5215,
+      "step": 280
+    },
+    {
+      "epoch": 1.3073394495412844,
+      "grad_norm": 0.977476179599762,
+      "learning_rate": 1.9447217732998442e-05,
+      "loss": 0.4857,
+      "step": 285
+    },
+    {
+      "epoch": 1.3302752293577982,
+      "grad_norm": 1.150428295135498,
+      "learning_rate": 1.908347353203007e-05,
+      "loss": 0.5021,
+      "step": 290
+    },
+    {
+      "epoch": 1.353211009174312,
+      "grad_norm": 1.0584716796875,
+      "learning_rate": 1.8717116789893502e-05,
+      "loss": 0.4824,
+      "step": 295
+    },
+    {
+      "epoch": 1.3761467889908257,
+      "grad_norm": 1.034061312675476,
+      "learning_rate": 1.834838189578303e-05,
+      "loss": 0.471,
+      "step": 300
+    },
+    {
+      "epoch": 1.3990825688073394,
+      "grad_norm": 1.000554084777832,
+      "learning_rate": 1.7977504760396608e-05,
+      "loss": 0.4937,
+      "step": 305
+    },
+    {
+      "epoch": 1.4220183486238533,
+      "grad_norm": 1.0319263935089111,
+      "learning_rate": 1.760472266500396e-05,
+      "loss": 0.4243,
+      "step": 310
+    },
+    {
+      "epoch": 1.4449541284403669,
+      "grad_norm": 0.9685313105583191,
+      "learning_rate": 1.7230274109637782e-05,
+      "loss": 0.4625,
+      "step": 315
+    },
+    {
+      "epoch": 1.4678899082568808,
+      "grad_norm": 0.9785776734352112,
+      "learning_rate": 1.6854398660505295e-05,
+      "loss": 0.4959,
+      "step": 320
+    },
+    {
+      "epoch": 1.4908256880733946,
+      "grad_norm": 1.1590365171432495,
+      "learning_rate": 1.647733679671753e-05,
+      "loss": 0.4479,
+      "step": 325
+    },
+    {
+      "epoch": 1.5137614678899083,
+      "grad_norm": 1.0894136428833008,
+      "learning_rate": 1.6099329756434703e-05,
+      "loss": 0.462,
+      "step": 330
+    },
+    {
+      "epoch": 1.536697247706422,
+      "grad_norm": 1.1440633535385132,
+      "learning_rate": 1.5720619382525834e-05,
+      "loss": 0.4497,
+      "step": 335
+    },
+    {
+      "epoch": 1.5596330275229358,
+      "grad_norm": 1.1175260543823242,
+      "learning_rate": 1.5341447967841584e-05,
+      "loss": 0.4032,
+      "step": 340
+    },
+    {
+      "epoch": 1.5825688073394495,
+      "grad_norm": 1.1592456102371216,
+      "learning_rate": 1.4962058100199145e-05,
+      "loss": 0.4312,
+      "step": 345
+    },
+    {
+      "epoch": 1.6055045871559632,
+      "grad_norm": 1.189139723777771,
+      "learning_rate": 1.4582692507178406e-05,
+      "loss": 0.3975,
+      "step": 350
+    },
+    {
+      "epoch": 1.6284403669724772,
+      "grad_norm": 1.070449948310852,
+      "learning_rate": 1.4203593900828762e-05,
+      "loss": 0.4215,
+      "step": 355
+    },
+    {
+      "epoch": 1.6513761467889907,
+      "grad_norm": 1.0447947978973389,
+      "learning_rate": 1.3825004822385772e-05,
+      "loss": 0.4731,
+      "step": 360
+    },
+    {
+      "epoch": 1.6743119266055047,
+      "grad_norm": 1.0117021799087524,
+      "learning_rate": 1.34471674870972e-05,
+      "loss": 0.3787,
+      "step": 365
+    },
+    {
+      "epoch": 1.6972477064220184,
+      "grad_norm": 1.2419357299804688,
+      "learning_rate": 1.3070323629257536e-05,
+      "loss": 0.3781,
+      "step": 370
+    },
+    {
+      "epoch": 1.7201834862385321,
+      "grad_norm": 1.2271087169647217,
+      "learning_rate": 1.269471434755025e-05,
+      "loss": 0.3987,
+      "step": 375
+    },
+    {
+      "epoch": 1.7431192660550459,
+      "grad_norm": 1.140735149383545,
+      "learning_rate": 1.2320579950796726e-05,
+      "loss": 0.3971,
+      "step": 380
+    },
+    {
+      "epoch": 1.7660550458715596,
+      "grad_norm": 1.060956358909607,
+      "learning_rate": 1.1948159804210497e-05,
+      "loss": 0.3798,
+      "step": 385
+    },
+    {
+      "epoch": 1.7889908256880735,
+      "grad_norm": 1.0528767108917236,
+      "learning_rate": 1.1577692176255207e-05,
+      "loss": 0.36,
+      "step": 390
+    },
+    {
+      "epoch": 1.811926605504587,
+      "grad_norm": 1.1098915338516235,
+      "learning_rate": 1.120941408620426e-05,
+      "loss": 0.3857,
+      "step": 395
+    },
+    {
+      "epoch": 1.834862385321101,
+      "grad_norm": 1.1725845336914062,
+      "learning_rate": 1.0843561152499672e-05,
+      "loss": 0.3683,
+      "step": 400
+    },
+    {
+      "epoch": 1.8577981651376145,
+      "grad_norm": 1.2150763273239136,
+      "learning_rate": 1.0480367442007129e-05,
+      "loss": 0.3606,
+      "step": 405
+    },
+    {
+      "epoch": 1.8807339449541285,
+      "grad_norm": 1.368205189704895,
+      "learning_rate": 1.0120065320263785e-05,
+      "loss": 0.3812,
+      "step": 410
+    },
+    {
+      "epoch": 1.9036697247706422,
+      "grad_norm": 1.1948931217193604,
+      "learning_rate": 9.762885302814475e-06,
+      "loss": 0.3724,
+      "step": 415
+    },
+    {
+      "epoch": 1.926605504587156,
+      "grad_norm": 1.0344188213348389,
+      "learning_rate": 9.409055907731541e-06,
+      "loss": 0.3825,
+      "step": 420
+    },
+    {
+      "epoch": 1.9495412844036697,
+      "grad_norm": 1.102097511291504,
+      "learning_rate": 9.058803509412647e-06,
+      "loss": 0.3708,
+      "step": 425
+    },
+    {
+      "epoch": 1.9724770642201834,
+      "grad_norm": 1.058523178100586,
+      "learning_rate": 8.712352193750044e-06,
+      "loss": 0.3433,
+      "step": 430
+    },
+    {
+      "epoch": 1.9954128440366974,
+      "grad_norm": 1.0777422189712524,
+      "learning_rate": 8.369923614763957e-06,
+      "loss": 0.3255,
+      "step": 435
+    },
+    {
+      "epoch": 2.018348623853211,
+      "grad_norm": 1.1497981548309326,
+      "learning_rate": 8.031736852791953e-06,
+      "loss": 0.3166,
+      "step": 440
+    },
+    {
+      "epoch": 2.041284403669725,
+      "grad_norm": 1.1676654815673828,
+      "learning_rate": 7.698008274324768e-06,
+      "loss": 0.2898,
+      "step": 445
+    },
+    {
+      "epoch": 2.0642201834862384,
+      "grad_norm": 1.0730738639831543,
+      "learning_rate": 7.3689513935785416e-06,
+      "loss": 0.2679,
+      "step": 450
+    },
+    {
+      "epoch": 2.0871559633027523,
+      "grad_norm": 1.1856460571289062,
+      "learning_rate": 7.044776735891763e-06,
+      "loss": 0.2965,
+      "step": 455
+    },
+    {
+      "epoch": 2.1100917431192663,
+      "grad_norm": 1.1058369874954224,
+      "learning_rate": 6.725691703034592e-06,
+      "loss": 0.2826,
+      "step": 460
+    },
+    {
+      "epoch": 2.13302752293578,
+      "grad_norm": 1.0041329860687256,
+      "learning_rate": 6.411900440516469e-06,
+      "loss": 0.2869,
+      "step": 465
+    },
+    {
+      "epoch": 2.1559633027522938,
+      "grad_norm": 1.2383862733840942,
+      "learning_rate": 6.1036037069770965e-06,
+      "loss": 0.3092,
+      "step": 470
+    },
+    {
+      "epoch": 2.1788990825688073,
+      "grad_norm": 1.1808902025222778,
+      "learning_rate": 5.800998745744253e-06,
+      "loss": 0.292,
+      "step": 475
+    },
+    {
+      "epoch": 2.2018348623853212,
+      "grad_norm": 1.0828917026519775,
+      "learning_rate": 5.5042791586407025e-06,
+      "loss": 0.3025,
+      "step": 480
+    },
+    {
+      "epoch": 2.2247706422018347,
+      "grad_norm": 1.155086874961853,
+      "learning_rate": 5.213634782120758e-06,
+      "loss": 0.2736,
+      "step": 485
+    },
+    {
+      "epoch": 2.2477064220183487,
+      "grad_norm": 1.142809510231018,
+      "learning_rate": 4.92925156581605e-06,
+      "loss": 0.3002,
+      "step": 490
+    },
+    {
+      "epoch": 2.270642201834862,
+      "grad_norm": 1.133955717086792,
+      "learning_rate": 4.651311453567839e-06,
+      "loss": 0.3127,
+      "step": 495
+    },
+    {
+      "epoch": 2.293577981651376,
+      "grad_norm": 1.4609079360961914,
+      "learning_rate": 4.379992267022286e-06,
+      "loss": 0.2288,
+      "step": 500
+    },
+    {
+      "epoch": 2.31651376146789,
+      "grad_norm": 1.1233643293380737,
+      "learning_rate": 4.1154675918630144e-06,
+      "loss": 0.3099,
+      "step": 505
+    },
+    {
+      "epoch": 2.3394495412844036,
+      "grad_norm": 1.0275275707244873,
+      "learning_rate": 3.85790666675373e-06,
+      "loss": 0.2688,
+      "step": 510
+    },
+    {
+      "epoch": 2.3623853211009176,
+      "grad_norm": 1.1538602113723755,
+      "learning_rate": 3.6074742750620445e-06,
+      "loss": 0.2466,
+      "step": 515
+    },
+    {
+      "epoch": 2.385321100917431,
+      "grad_norm": 1.2466658353805542,
+      "learning_rate": 3.3643306394337016e-06,
+      "loss": 0.2737,
+      "step": 520
+    },
+    {
+      "epoch": 2.408256880733945,
+      "grad_norm": 1.0603047609329224,
+      "learning_rate": 3.1286313192847066e-06,
+      "loss": 0.2376,
+      "step": 525
+    },
+    {
+      "epoch": 2.4311926605504586,
+      "grad_norm": 1.094659447669983,
+      "learning_rate": 2.9005271112768487e-06,
+      "loss": 0.2497,
+      "step": 530
+    },
+    {
+      "epoch": 2.4541284403669725,
+      "grad_norm": 1.0665955543518066,
+      "learning_rate": 2.680163952840467e-06,
+      "loss": 0.2424,
+      "step": 535
+    },
+    {
+      "epoch": 2.477064220183486,
+      "grad_norm": 1.1814361810684204,
+      "learning_rate": 2.467682828805956e-06,
+      "loss": 0.2549,
+      "step": 540
+    },
+    {
+      "epoch": 2.5,
+      "grad_norm": 1.1644728183746338,
+      "learning_rate": 2.2632196812039553e-06,
+      "loss": 0.2277,
+      "step": 545
+    },
+    {
+      "epoch": 2.522935779816514,
+      "grad_norm": 1.0103965997695923,
+      "learning_rate": 2.066905322291797e-06,
+      "loss": 0.2634,
+      "step": 550
+    },
+    {
+      "epoch": 2.5458715596330275,
+      "grad_norm": 1.1028698682785034,
+      "learning_rate": 1.878865350861927e-06,
+      "loss": 0.2628,
+      "step": 555
+    },
+    {
+      "epoch": 2.5688073394495414,
+      "grad_norm": 1.044304609298706,
+      "learning_rate": 1.6992200718857676e-06,
+      "loss": 0.2447,
+      "step": 560
+    },
+    {
+      "epoch": 2.591743119266055,
+      "grad_norm": 1.1257292032241821,
+      "learning_rate": 1.5280844195445598e-06,
+      "loss": 0.2747,
+      "step": 565
+    },
+    {
+      "epoch": 2.614678899082569,
+      "grad_norm": 1.1923913955688477,
+      "learning_rate": 1.365567883696282e-06,
+      "loss": 0.2926,
+      "step": 570
+    },
+    {
+      "epoch": 2.6376146788990824,
+      "grad_norm": 1.0788416862487793,
+      "learning_rate": 1.2117744398257963e-06,
+      "loss": 0.219,
+      "step": 575
+    },
+    {
+      "epoch": 2.6605504587155964,
+      "grad_norm": 1.1790810823440552,
+      "learning_rate": 1.066802482522995e-06,
+      "loss": 0.2345,
+      "step": 580
+    },
+    {
+      "epoch": 2.68348623853211,
+      "grad_norm": 1.0914078950881958,
+      "learning_rate": 9.307447625315052e-07,
+      "loss": 0.2356,
+      "step": 585
+    },
+    {
+      "epoch": 2.706422018348624,
+      "grad_norm": 1.1160882711410522,
+      "learning_rate": 8.03688327408253e-07,
+      "loss": 0.2529,
+      "step": 590
+    },
+    {
+      "epoch": 2.729357798165138,
+      "grad_norm": 1.0512644052505493,
+      "learning_rate": 6.857144658318165e-07,
+      "loss": 0.2619,
+      "step": 595
+    },
+    {
+      "epoch": 2.7522935779816513,
+      "grad_norm": 1.1664880514144897,
+      "learning_rate": 5.768986555952415e-07,
+      "loss": 0.2482,
+      "step": 600
+    },
+    {
+      "epoch": 2.7752293577981653,
+      "grad_norm": 1.0369032621383667,
+      "learning_rate": 4.773105153165385e-07,
+      "loss": 0.2486,
+      "step": 605
+    },
+    {
+      "epoch": 2.7981651376146788,
+      "grad_norm": 1.0779132843017578,
+      "learning_rate": 3.8701375989781517e-07,
+      "loss": 0.3011,
+      "step": 610
+    },
+    {
+      "epoch": 2.8211009174311927,
+      "grad_norm": 0.944064199924469,
+      "learning_rate": 3.060661597614872e-07,
+      "loss": 0.2465,
+      "step": 615
+    },
+    {
+      "epoch": 2.8440366972477067,
+      "grad_norm": 1.0364296436309814,
+      "learning_rate": 2.3451950388969912e-07,
+      "loss": 0.2442,
+      "step": 620
+    },
+    {
+      "epoch": 2.86697247706422,
+      "grad_norm": 0.9234054684638977,
+      "learning_rate": 1.724195666905465e-07,
+      "loss": 0.2219,
+      "step": 625
+    },
+    {
+      "epoch": 2.8899082568807337,
+      "grad_norm": 1.1181162595748901,
+      "learning_rate": 1.1980607871234894e-07,
+      "loss": 0.2605,
+      "step": 630
+    },
+    {
+      "epoch": 2.9128440366972477,
+      "grad_norm": 1.3363423347473145,
+      "learning_rate": 7.671270122467822e-08,
+      "loss": 0.253,
+      "step": 635
+    },
+    {
+      "epoch": 2.9357798165137616,
+      "grad_norm": 1.0425448417663574,
+      "learning_rate": 4.316700468241175e-08,
+      "loss": 0.2552,
+      "step": 640
+    },
+    {
+      "epoch": 2.958715596330275,
+      "grad_norm": 1.0840250253677368,
+      "learning_rate": 1.9190451086599226e-08,
+      "loss": 0.3126,
+      "step": 645
+    },
+    {
+      "epoch": 2.981651376146789,
+      "grad_norm": 1.0141379833221436,
+      "learning_rate": 4.798380253417944e-09,
+      "loss": 0.2846,
+      "step": 650
+    },
+    {
+      "epoch": 3.0,
+      "step": 654,
+      "total_flos": 9.398740177230234e+17,
+      "train_loss": 0.5625836603138425,
+      "train_runtime": 455.3956,
+      "train_samples_per_second": 45.87,
+      "train_steps_per_second": 1.436
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 654,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 20000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 9.398740177230234e+17,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

26_128_e3_3e-5/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:92d53c14c5c97a7b251af0ffa4ce680912cc0d28b94cb59fd2de50cb5fc1ccbe
+size 8209

26_128_e3_3e-5/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff