Upload folder using huggingface_hub

Browse files

Files changed (14) hide show

20_128_e3_3e-5/adapter_config.json +39 -0
20_128_e3_3e-5/adapter_model.safetensors +3 -0
20_128_e3_3e-5/added_tokens.json +9 -0
20_128_e3_3e-5/all_results.json +9 -0
20_128_e3_3e-5/chat_template.jinja +62 -0
20_128_e3_3e-5/config.json +32 -0
20_128_e3_3e-5/merges.txt +0 -0
20_128_e3_3e-5/special_tokens_map.json +33 -0
20_128_e3_3e-5/tokenizer.json +0 -0
20_128_e3_3e-5/tokenizer_config.json +234 -0
20_128_e3_3e-5/train_results.json +9 -0
20_128_e3_3e-5/trainer_state.json +806 -0
20_128_e3_3e-5/training_args.bin +3 -0
20_128_e3_3e-5/vocab.json +0 -0

20_128_e3_3e-5/adapter_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "ibm-granite/granite-3.3-8b-instruct",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 256,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 128,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "up_proj",
+    "q_proj",
+    "gate_proj",
+    "v_proj",
+    "k_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

20_128_e3_3e-5/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c6c4bbf40d2ac5b2cbfc83a7f98311b53d039b049ba828e74fb86a1a1aa17734
+size 791751704

20_128_e3_3e-5/added_tokens.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "<|end_of_cite|>": 49156,
+  "<|end_of_plugin|>": 49158,
+  "<|end_of_role|>": 49153,
+  "<|start_of_cite|>": 49155,
+  "<|start_of_plugin|>": 49157,
+  "<|start_of_role|>": 49152,
+  "<|tool_call|>": 49154
+}

20_128_e3_3e-5/all_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 3.0,
+    "total_flos": 7.459266976179814e+17,
+    "train_loss": 0.5250646652202572,
+    "train_runtime": 382.361,
+    "train_samples": 5847,
+    "train_samples_per_second": 45.875,
+    "train_steps_per_second": 1.436
+}

20_128_e3_3e-5/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,62 @@

+{# Alias tools -> available_tools #}
+{%- if tools and not available_tools -%}
+    {%- set available_tools = tools -%}
+{%- endif -%}
+{%- if messages[0]['role'] == 'system' %}
+     {%- set system_message = messages[0]['content'] %}
+     {%- set loop_messages = messages[1:] %}
+ {%- else %}
+     {%- set system_message = "Knowledge Cutoff Date: April 2024.
+Today's Date: " + strftime_now('%B %d, %Y') + ".
+You are Granite, developed by IBM." %}
+     {%- if available_tools and documents %}
+         {%- set system_message = system_message + " You are a helpful assistant with access to the following tools. When a tool is required to answer the user's query, respond only with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request.
+Write the response to the user's input by strictly aligning with the facts in the provided documents. If the information needed to answer the question is not available in the documents, inform the user that the question cannot be answered based on the available data." %}
+     {%- elif available_tools %}
+         {%- set system_message = system_message + " You are a helpful assistant with access to the following tools. When a tool is required to answer the user's query, respond only with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request." %}
+     {%- elif documents %}
+         {%- set system_message = system_message + " Write the response to the user's input by strictly aligning with the facts in the provided documents. If the information needed to answer the question is not available in the documents, inform the user that the question cannot be answered based on the available data." %}
+    {%- elif thinking %}
+    {%- set system_message = system_message + " You are a helpful AI assistant.
+Respond to every user query in a comprehensive and detailed way. You can write down your thoughts and reasoning process before responding. In the thought process, engage in a comprehensive cycle of analysis, summarization, exploration, reassessment, reflection, backtracing, and iteration to develop well-considered thinking process. In the response section, based on various attempts, explorations, and reflections from the thoughts section, systematically present the final solution that you deem correct. The response should summarize the thought process. Write your thoughts between <think></think> and write your response between <response></response> for each user query." %}
+     {%- else %}
+         {%- set system_message = system_message + " You are a helpful AI assistant." %}
+     {%- endif %}
+     {%- if 'citations' in controls and documents %}
+         {%- set system_message = system_message + '
+Use the symbols <|start_of_cite|> and <|end_of_cite|> to indicate when a fact comes from a document in the search result, e.g <|start_of_cite|> {document_id: 1}my fact <|end_of_cite|> for a fact from document 1. Afterwards, list all the citations with their corresponding documents in an ordered list.' %}
+     {%- endif %}
+     {%- if 'hallucinations' in controls and documents %}
+         {%- set system_message = system_message + '
+Finally, after the response is written, include a numbered list of sentences from the response with a corresponding risk value that are hallucinated and not based in the documents.' %}
+     {%- endif %}
+     {%- set loop_messages = messages %}
+ {%- endif %}
+ {{- '<|start_of_role|>system<|end_of_role|>' + system_message + '<|end_of_text|>
+' }}
+ {%- if available_tools %}
+     {{- '<|start_of_role|>available_tools<|end_of_role|>' }}
+     {{- available_tools | tojson(indent=4) }}
+     {{- '<|end_of_text|>
+' }}
+ {%- endif %}
+ {%- if documents %}
+     {%- for document in documents %}
+         {{- '<|start_of_role|>document {"document_id": "' + document['doc_id'] | string + '"}<|end_of_role|>
+' }}
+         {{- document['text'] }}
+         {{- '<|end_of_text|>
+' }}
+              {%- endfor %}
+ {%- endif %}
+ {%- for message in loop_messages %}
+     {{- '<|start_of_role|>' + message['role'] + '<|end_of_role|>' + message['content'] + '<|end_of_text|>
+' }}
+     {%- if loop.last and add_generation_prompt %}
+         {{- '<|start_of_role|>assistant' }}
+             {%- if controls %}
+                 {{- ' ' + controls | tojson()}}
+             {%- endif %}
+         {{- '<|end_of_role|>' }}
+     {%- endif %}
+ {%- endfor %}

20_128_e3_3e-5/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "architectures": [
+    "GraniteForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attention_multiplier": 0.0078125,
+  "bos_token_id": 0,
+  "embedding_multiplier": 12.0,
+  "eos_token_id": 0,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12800,
+  "logits_scaling": 16.0,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "granite",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 40,
+  "num_key_value_heads": 8,
+  "pad_token_id": 0,
+  "residual_multiplier": 0.22,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000000.0,
+  "tie_word_embeddings": true,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.52.4",
+  "use_cache": true,
+  "vocab_size": 49159
+}

20_128_e3_3e-5/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

20_128_e3_3e-5/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "additional_special_tokens": [
+    "<|start_of_role|>",
+    "<|end_of_role|>",
+    "<|tool_call|>",
+    "<|start_of_cite|>",
+    "<|end_of_cite|>",
+    "<|start_of_plugin|>",
+    "<|end_of_plugin|>"
+  ],
+  "bos_token": {
+    "content": "<|end_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|end_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|end_of_plugin|>",
+  "unk_token": {
+    "content": "<|end_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

20_128_e3_3e-5/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

20_128_e3_3e-5/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,234 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<|end_of_text|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<fim_prefix>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<fim_middle>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<fim_suffix>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<fim_pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "5": {
+      "content": "<filename>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "6": {
+      "content": "<gh_stars>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "7": {
+      "content": "<issue_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "8": {
+      "content": "<issue_comment>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "9": {
+      "content": "<issue_closed>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "10": {
+      "content": "<jupyter_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "11": {
+      "content": "<jupyter_text>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "12": {
+      "content": "<jupyter_code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "13": {
+      "content": "<jupyter_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "14": {
+      "content": "<empty_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "15": {
+      "content": "<commit_before>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "16": {
+      "content": "<commit_msg>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "17": {
+      "content": "<commit_after>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "18": {
+      "content": "<reponame>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49152": {
+      "content": "<|start_of_role|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49153": {
+      "content": "<|end_of_role|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49154": {
+      "content": "<|tool_call|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49155": {
+      "content": "<|start_of_cite|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49156": {
+      "content": "<|end_of_cite|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49157": {
+      "content": "<|start_of_plugin|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49158": {
+      "content": "<|end_of_plugin|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|start_of_role|>",
+    "<|end_of_role|>",
+    "<|tool_call|>",
+    "<|start_of_cite|>",
+    "<|end_of_cite|>",
+    "<|start_of_plugin|>",
+    "<|end_of_plugin|>"
+  ],
+  "bos_token": "<|end_of_text|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|end_of_text|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 8192,
+  "pad_token": "<|end_of_plugin|>",
+  "padding_side": "left",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|end_of_text|>",
+  "vocab_size": 49152
+}

20_128_e3_3e-5/train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 3.0,
+    "total_flos": 7.459266976179814e+17,
+    "train_loss": 0.5250646652202572,
+    "train_runtime": 382.361,
+    "train_samples": 5847,
+    "train_samples_per_second": 45.875,
+    "train_steps_per_second": 1.436
+}

20_128_e3_3e-5/trainer_state.json ADDED Viewed

	@@ -0,0 +1,806 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 549,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0273224043715847,
+      "grad_norm": 3.059643030166626,
+      "learning_rate": 4.2857142857142855e-06,
+      "loss": 1.5758,
+      "step": 5
+    },
+    {
+      "epoch": 0.0546448087431694,
+      "grad_norm": 1.2129096984863281,
+      "learning_rate": 9.642857142857144e-06,
+      "loss": 1.5243,
+      "step": 10
+    },
+    {
+      "epoch": 0.08196721311475409,
+      "grad_norm": 0.5296608209609985,
+      "learning_rate": 1.5e-05,
+      "loss": 1.3208,
+      "step": 15
+    },
+    {
+      "epoch": 0.1092896174863388,
+      "grad_norm": 0.5503635406494141,
+      "learning_rate": 2.0357142857142858e-05,
+      "loss": 1.3562,
+      "step": 20
+    },
+    {
+      "epoch": 0.1366120218579235,
+      "grad_norm": 0.4368098974227905,
+      "learning_rate": 2.5714285714285714e-05,
+      "loss": 1.2634,
+      "step": 25
+    },
+    {
+      "epoch": 0.16393442622950818,
+      "grad_norm": 0.4096377491950989,
+      "learning_rate": 2.9999727300559898e-05,
+      "loss": 1.2939,
+      "step": 30
+    },
+    {
+      "epoch": 0.1912568306010929,
+      "grad_norm": 0.3619536757469177,
+      "learning_rate": 2.9990183861225638e-05,
+      "loss": 1.1325,
+      "step": 35
+    },
+    {
+      "epoch": 0.2185792349726776,
+      "grad_norm": 0.4071503281593323,
+      "learning_rate": 2.996701536359701e-05,
+      "loss": 1.2214,
+      "step": 40
+    },
+    {
+      "epoch": 0.2459016393442623,
+      "grad_norm": 0.42328494787216187,
+      "learning_rate": 2.993024286626367e-05,
+      "loss": 1.1298,
+      "step": 45
+    },
+    {
+      "epoch": 0.273224043715847,
+      "grad_norm": 0.43418291211128235,
+      "learning_rate": 2.987989979292638e-05,
+      "loss": 1.0944,
+      "step": 50
+    },
+    {
+      "epoch": 0.3005464480874317,
+      "grad_norm": 0.4443665146827698,
+      "learning_rate": 2.981603190201714e-05,
+      "loss": 1.0722,
+      "step": 55
+    },
+    {
+      "epoch": 0.32786885245901637,
+      "grad_norm": 0.4118559956550598,
+      "learning_rate": 2.97386972451079e-05,
+      "loss": 1.0652,
+      "step": 60
+    },
+    {
+      "epoch": 0.3551912568306011,
+      "grad_norm": 0.5022803544998169,
+      "learning_rate": 2.9647966114145582e-05,
+      "loss": 1.0402,
+      "step": 65
+    },
+    {
+      "epoch": 0.3825136612021858,
+      "grad_norm": 0.468370646238327,
+      "learning_rate": 2.954392097756154e-05,
+      "loss": 1.0402,
+      "step": 70
+    },
+    {
+      "epoch": 0.4098360655737705,
+      "grad_norm": 0.46648314595222473,
+      "learning_rate": 2.9426656405313292e-05,
+      "loss": 0.9903,
+      "step": 75
+    },
+    {
+      "epoch": 0.4371584699453552,
+      "grad_norm": 0.4596513509750366,
+      "learning_rate": 2.9296278982926918e-05,
+      "loss": 1.0013,
+      "step": 80
+    },
+    {
+      "epoch": 0.4644808743169399,
+      "grad_norm": 0.4788200855255127,
+      "learning_rate": 2.9152907214618022e-05,
+      "loss": 1.0186,
+      "step": 85
+    },
+    {
+      "epoch": 0.4918032786885246,
+      "grad_norm": 0.5422234535217285,
+      "learning_rate": 2.8996671415579474e-05,
+      "loss": 0.9389,
+      "step": 90
+    },
+    {
+      "epoch": 0.5191256830601093,
+      "grad_norm": 0.5459036827087402,
+      "learning_rate": 2.8827713593533747e-05,
+      "loss": 0.925,
+      "step": 95
+    },
+    {
+      "epoch": 0.546448087431694,
+      "grad_norm": 0.6408877372741699,
+      "learning_rate": 2.864618731965754e-05,
+      "loss": 0.8163,
+      "step": 100
+    },
+    {
+      "epoch": 0.5737704918032787,
+      "grad_norm": 0.528777539730072,
+      "learning_rate": 2.845225758899605e-05,
+      "loss": 0.8996,
+      "step": 105
+    },
+    {
+      "epoch": 0.6010928961748634,
+      "grad_norm": 0.5720665454864502,
+      "learning_rate": 2.8246100670493656e-05,
+      "loss": 0.8686,
+      "step": 110
+    },
+    {
+      "epoch": 0.6284153005464481,
+      "grad_norm": 0.6229217052459717,
+      "learning_rate": 2.8027903946777434e-05,
+      "loss": 0.7646,
+      "step": 115
+    },
+    {
+      "epoch": 0.6557377049180327,
+      "grad_norm": 0.6496168375015259,
+      "learning_rate": 2.7797865743839104e-05,
+      "loss": 0.8618,
+      "step": 120
+    },
+    {
+      "epoch": 0.6830601092896175,
+      "grad_norm": 0.7382768988609314,
+      "learning_rate": 2.7556195150770166e-05,
+      "loss": 0.8082,
+      "step": 125
+    },
+    {
+      "epoch": 0.7103825136612022,
+      "grad_norm": 0.7171357870101929,
+      "learning_rate": 2.7303111829714135e-05,
+      "loss": 0.7585,
+      "step": 130
+    },
+    {
+      "epoch": 0.7377049180327869,
+      "grad_norm": 0.7362774610519409,
+      "learning_rate": 2.703884581620857e-05,
+      "loss": 0.7515,
+      "step": 135
+    },
+    {
+      "epoch": 0.7650273224043715,
+      "grad_norm": 0.7897721529006958,
+      "learning_rate": 2.6763637310098427e-05,
+      "loss": 0.7427,
+      "step": 140
+    },
+    {
+      "epoch": 0.7923497267759563,
+      "grad_norm": 0.6889026165008545,
+      "learning_rate": 2.6477736457210703e-05,
+      "loss": 0.7584,
+      "step": 145
+    },
+    {
+      "epoch": 0.819672131147541,
+      "grad_norm": 0.8032437562942505,
+      "learning_rate": 2.618140312198889e-05,
+      "loss": 0.6513,
+      "step": 150
+    },
+    {
+      "epoch": 0.8469945355191257,
+      "grad_norm": 0.834784746170044,
+      "learning_rate": 2.5874906651293877e-05,
+      "loss": 0.7104,
+      "step": 155
+    },
+    {
+      "epoch": 0.8743169398907104,
+      "grad_norm": 0.8231666088104248,
+      "learning_rate": 2.5558525629585913e-05,
+      "loss": 0.6427,
+      "step": 160
+    },
+    {
+      "epoch": 0.9016393442622951,
+      "grad_norm": 0.8814102411270142,
+      "learning_rate": 2.5232547625710324e-05,
+      "loss": 0.6995,
+      "step": 165
+    },
+    {
+      "epoch": 0.9289617486338798,
+      "grad_norm": 0.9681147336959839,
+      "learning_rate": 2.4897268931516927e-05,
+      "loss": 0.6363,
+      "step": 170
+    },
+    {
+      "epoch": 0.9562841530054644,
+      "grad_norm": 0.7128579616546631,
+      "learning_rate": 2.4552994292550906e-05,
+      "loss": 0.5636,
+      "step": 175
+    },
+    {
+      "epoch": 0.9836065573770492,
+      "grad_norm": 0.8019705414772034,
+      "learning_rate": 2.4200036631059834e-05,
+      "loss": 0.6638,
+      "step": 180
+    },
+    {
+      "epoch": 1.010928961748634,
+      "grad_norm": 0.8757464289665222,
+      "learning_rate": 2.383871676156858e-05,
+      "loss": 0.5886,
+      "step": 185
+    },
+    {
+      "epoch": 1.0382513661202186,
+      "grad_norm": 0.9065819978713989,
+      "learning_rate": 2.3469363099280743e-05,
+      "loss": 0.5288,
+      "step": 190
+    },
+    {
+      "epoch": 1.0655737704918034,
+      "grad_norm": 0.8466619253158569,
+      "learning_rate": 2.3092311361571534e-05,
+      "loss": 0.4916,
+      "step": 195
+    },
+    {
+      "epoch": 1.092896174863388,
+      "grad_norm": 0.9222168922424316,
+      "learning_rate": 2.2707904262843523e-05,
+      "loss": 0.5011,
+      "step": 200
+    },
+    {
+      "epoch": 1.1202185792349726,
+      "grad_norm": 0.9852139949798584,
+      "learning_rate": 2.2316491203022513e-05,
+      "loss": 0.4938,
+      "step": 205
+    },
+    {
+      "epoch": 1.1475409836065573,
+      "grad_norm": 0.8196564316749573,
+      "learning_rate": 2.191842794997677e-05,
+      "loss": 0.4555,
+      "step": 210
+    },
+    {
+      "epoch": 1.174863387978142,
+      "grad_norm": 0.8838884830474854,
+      "learning_rate": 2.1514076316148202e-05,
+      "loss": 0.4664,
+      "step": 215
+    },
+    {
+      "epoch": 1.2021857923497268,
+      "grad_norm": 0.9457610845565796,
+      "learning_rate": 2.1103803829689464e-05,
+      "loss": 0.4991,
+      "step": 220
+    },
+    {
+      "epoch": 1.2295081967213115,
+      "grad_norm": 1.2174898386001587,
+      "learning_rate": 2.0687983400405826e-05,
+      "loss": 0.4845,
+      "step": 225
+    },
+    {
+      "epoch": 1.2568306010928962,
+      "grad_norm": 0.9168224334716797,
+      "learning_rate": 2.0266992980805497e-05,
+      "loss": 0.4535,
+      "step": 230
+    },
+    {
+      "epoch": 1.2841530054644807,
+      "grad_norm": 0.9985936284065247,
+      "learning_rate": 1.9841215222566506e-05,
+      "loss": 0.3993,
+      "step": 235
+    },
+    {
+      "epoch": 1.3114754098360657,
+      "grad_norm": 0.8839482665061951,
+      "learning_rate": 1.9411037128732317e-05,
+      "loss": 0.4824,
+      "step": 240
+    },
+    {
+      "epoch": 1.3387978142076502,
+      "grad_norm": 1.0553685426712036,
+      "learning_rate": 1.897684970195238e-05,
+      "loss": 0.4493,
+      "step": 245
+    },
+    {
+      "epoch": 1.366120218579235,
+      "grad_norm": 1.0127540826797485,
+      "learning_rate": 1.8539047589087297e-05,
+      "loss": 0.3991,
+      "step": 250
+    },
+    {
+      "epoch": 1.3934426229508197,
+      "grad_norm": 0.8810409903526306,
+      "learning_rate": 1.8098028722501626e-05,
+      "loss": 0.4029,
+      "step": 255
+    },
+    {
+      "epoch": 1.4207650273224044,
+      "grad_norm": 0.9336234927177429,
+      "learning_rate": 1.7654193958370436e-05,
+      "loss": 0.4234,
+      "step": 260
+    },
+    {
+      "epoch": 1.4480874316939891,
+      "grad_norm": 1.0207489728927612,
+      "learning_rate": 1.720794671232827e-05,
+      "loss": 0.389,
+      "step": 265
+    },
+    {
+      "epoch": 1.4754098360655736,
+      "grad_norm": 1.0800464153289795,
+      "learning_rate": 1.675969259279177e-05,
+      "loss": 0.3892,
+      "step": 270
+    },
+    {
+      "epoch": 1.5027322404371586,
+      "grad_norm": 1.0996469259262085,
+      "learning_rate": 1.6309839032289168e-05,
+      "loss": 0.3738,
+      "step": 275
+    },
+    {
+      "epoch": 1.530054644808743,
+      "grad_norm": 1.0179123878479004,
+      "learning_rate": 1.5858794917131847e-05,
+      "loss": 0.3649,
+      "step": 280
+    },
+    {
+      "epoch": 1.5573770491803278,
+      "grad_norm": 0.9927940964698792,
+      "learning_rate": 1.540697021576443e-05,
+      "loss": 0.3488,
+      "step": 285
+    },
+    {
+      "epoch": 1.5846994535519126,
+      "grad_norm": 1.0678116083145142,
+      "learning_rate": 1.4954775606131366e-05,
+      "loss": 0.3859,
+      "step": 290
+    },
+    {
+      "epoch": 1.6120218579234973,
+      "grad_norm": 1.2027899026870728,
+      "learning_rate": 1.4502622102398571e-05,
+      "loss": 0.3532,
+      "step": 295
+    },
+    {
+      "epoch": 1.639344262295082,
+      "grad_norm": 1.0589059591293335,
+      "learning_rate": 1.4050920681369484e-05,
+      "loss": 0.3603,
+      "step": 300
+    },
+    {
+      "epoch": 1.6666666666666665,
+      "grad_norm": 1.070417881011963,
+      "learning_rate": 1.360008190893511e-05,
+      "loss": 0.3661,
+      "step": 305
+    },
+    {
+      "epoch": 1.6939890710382515,
+      "grad_norm": 0.9856560826301575,
+      "learning_rate": 1.3150515566897501e-05,
+      "loss": 0.3682,
+      "step": 310
+    },
+    {
+      "epoch": 1.721311475409836,
+      "grad_norm": 1.1567139625549316,
+      "learning_rate": 1.270263028050596e-05,
+      "loss": 0.3641,
+      "step": 315
+    },
+    {
+      "epoch": 1.748633879781421,
+      "grad_norm": 1.0516977310180664,
+      "learning_rate": 1.2256833147044474e-05,
+      "loss": 0.34,
+      "step": 320
+    },
+    {
+      "epoch": 1.7759562841530054,
+      "grad_norm": 0.9534119367599487,
+      "learning_rate": 1.1813529365807926e-05,
+      "loss": 0.3352,
+      "step": 325
+    },
+    {
+      "epoch": 1.8032786885245902,
+      "grad_norm": 1.036719799041748,
+      "learning_rate": 1.1373121869803509e-05,
+      "loss": 0.3692,
+      "step": 330
+    },
+    {
+      "epoch": 1.830601092896175,
+      "grad_norm": 1.1735156774520874,
+      "learning_rate": 1.0936010959511984e-05,
+      "loss": 0.3493,
+      "step": 335
+    },
+    {
+      "epoch": 1.8579234972677594,
+      "grad_norm": 1.076923131942749,
+      "learning_rate": 1.0502593939041751e-05,
+      "loss": 0.3312,
+      "step": 340
+    },
+    {
+      "epoch": 1.8852459016393444,
+      "grad_norm": 1.0882071256637573,
+      "learning_rate": 1.0073264755006438e-05,
+      "loss": 0.315,
+      "step": 345
+    },
+    {
+      "epoch": 1.9125683060109289,
+      "grad_norm": 1.015730857849121,
+      "learning_rate": 9.648413638454175e-06,
+      "loss": 0.3132,
+      "step": 350
+    },
+    {
+      "epoch": 1.9398907103825138,
+      "grad_norm": 1.2598564624786377,
+      "learning_rate": 9.22842675017412e-06,
+      "loss": 0.3075,
+      "step": 355
+    },
+    {
+      "epoch": 1.9672131147540983,
+      "grad_norm": 1.0782288312911987,
+      "learning_rate": 8.813685829702549e-06,
+      "loss": 0.316,
+      "step": 360
+    },
+    {
+      "epoch": 1.994535519125683,
+      "grad_norm": 0.9131107926368713,
+      "learning_rate": 8.404567848347521e-06,
+      "loss": 0.2865,
+      "step": 365
+    },
+    {
+      "epoch": 2.021857923497268,
+      "grad_norm": 1.101699709892273,
+      "learning_rate": 8.001444666547644e-06,
+      "loss": 0.2606,
+      "step": 370
+    },
+    {
+      "epoch": 2.0491803278688523,
+      "grad_norm": 1.0068752765655518,
+      "learning_rate": 7.604682695876156e-06,
+      "loss": 0.2284,
+      "step": 375
+    },
+    {
+      "epoch": 2.0765027322404372,
+      "grad_norm": 1.0016173124313354,
+      "learning_rate": 7.214642565997741e-06,
+      "loss": 0.2417,
+      "step": 380
+    },
+    {
+      "epoch": 2.1038251366120218,
+      "grad_norm": 0.8938846588134766,
+      "learning_rate": 6.83167879688066e-06,
+      "loss": 0.2091,
+      "step": 385
+    },
+    {
+      "epoch": 2.1311475409836067,
+      "grad_norm": 1.0374826192855835,
+      "learning_rate": 6.456139476562203e-06,
+      "loss": 0.2482,
+      "step": 390
+    },
+    {
+      "epoch": 2.158469945355191,
+      "grad_norm": 0.8814703822135925,
+      "learning_rate": 6.088365944760323e-06,
+      "loss": 0.2443,
+      "step": 395
+    },
+    {
+      "epoch": 2.185792349726776,
+      "grad_norm": 1.0764206647872925,
+      "learning_rate": 5.728692482619012e-06,
+      "loss": 0.237,
+      "step": 400
+    },
+    {
+      "epoch": 2.2131147540983607,
+      "grad_norm": 0.9924208521842957,
+      "learning_rate": 5.377446008869484e-06,
+      "loss": 0.2482,
+      "step": 405
+    },
+    {
+      "epoch": 2.240437158469945,
+      "grad_norm": 1.0087777376174927,
+      "learning_rate": 5.0349457826832804e-06,
+      "loss": 0.242,
+      "step": 410
+    },
+    {
+      "epoch": 2.26775956284153,
+      "grad_norm": 1.1189223527908325,
+      "learning_rate": 4.7015031134873315e-06,
+      "loss": 0.2294,
+      "step": 415
+    },
+    {
+      "epoch": 2.2950819672131146,
+      "grad_norm": 0.9861758947372437,
+      "learning_rate": 4.377421078004895e-06,
+      "loss": 0.2237,
+      "step": 420
+    },
+    {
+      "epoch": 2.3224043715846996,
+      "grad_norm": 1.0427005290985107,
+      "learning_rate": 4.062994244779387e-06,
+      "loss": 0.1875,
+      "step": 425
+    },
+    {
+      "epoch": 2.349726775956284,
+      "grad_norm": 1.306485891342163,
+      "learning_rate": 3.7585084064316085e-06,
+      "loss": 0.21,
+      "step": 430
+    },
+    {
+      "epoch": 2.3770491803278686,
+      "grad_norm": 0.9148208498954773,
+      "learning_rate": 3.46424031989369e-06,
+      "loss": 0.2213,
+      "step": 435
+    },
+    {
+      "epoch": 2.4043715846994536,
+      "grad_norm": 0.8958441019058228,
+      "learning_rate": 3.180457454855807e-06,
+      "loss": 0.2304,
+      "step": 440
+    },
+    {
+      "epoch": 2.431693989071038,
+      "grad_norm": 1.0531716346740723,
+      "learning_rate": 2.907417750654462e-06,
+      "loss": 0.2305,
+      "step": 445
+    },
+    {
+      "epoch": 2.459016393442623,
+      "grad_norm": 1.0453715324401855,
+      "learning_rate": 2.6453693818231173e-06,
+      "loss": 0.2092,
+      "step": 450
+    },
+    {
+      "epoch": 2.4863387978142075,
+      "grad_norm": 1.1073248386383057,
+      "learning_rate": 2.3945505325184473e-06,
+      "loss": 0.2013,
+      "step": 455
+    },
+    {
+      "epoch": 2.5136612021857925,
+      "grad_norm": 1.0848618745803833,
+      "learning_rate": 2.1551891800271096e-06,
+      "loss": 0.2312,
+      "step": 460
+    },
+    {
+      "epoch": 2.540983606557377,
+      "grad_norm": 1.0568619966506958,
+      "learning_rate": 1.927502887549872e-06,
+      "loss": 0.1919,
+      "step": 465
+    },
+    {
+      "epoch": 2.5683060109289615,
+      "grad_norm": 1.0123893022537231,
+      "learning_rate": 1.7116986064514828e-06,
+      "loss": 0.2132,
+      "step": 470
+    },
+    {
+      "epoch": 2.5956284153005464,
+      "grad_norm": 1.1613751649856567,
+      "learning_rate": 1.5079724881559243e-06,
+      "loss": 0.2043,
+      "step": 475
+    },
+    {
+      "epoch": 2.6229508196721314,
+      "grad_norm": 1.0579661130905151,
+      "learning_rate": 1.3165097058581055e-06,
+      "loss": 0.1938,
+      "step": 480
+    },
+    {
+      "epoch": 2.650273224043716,
+      "grad_norm": 0.9900656342506409,
+      "learning_rate": 1.1374842862140188e-06,
+      "loss": 0.2159,
+      "step": 485
+    },
+    {
+      "epoch": 2.6775956284153004,
+      "grad_norm": 1.1497286558151245,
+      "learning_rate": 9.710589511623196e-07,
+      "loss": 0.2086,
+      "step": 490
+    },
+    {
+      "epoch": 2.7049180327868854,
+      "grad_norm": 1.0409764051437378,
+      "learning_rate": 8.173849700211694e-07,
+      "loss": 0.2317,
+      "step": 495
+    },
+    {
+      "epoch": 2.73224043715847,
+      "grad_norm": 1.0730262994766235,
+      "learning_rate": 6.766020219946895e-07,
+      "loss": 0.2027,
+      "step": 500
+    },
+    {
+      "epoch": 2.7595628415300544,
+      "grad_norm": 0.965694785118103,
+      "learning_rate": 5.488380692140644e-07,
+      "loss": 0.213,
+      "step": 505
+    },
+    {
+      "epoch": 2.7868852459016393,
+      "grad_norm": 0.999904215335846,
+      "learning_rate": 4.3420924042865904e-07,
+      "loss": 0.2573,
+      "step": 510
+    },
+    {
+      "epoch": 2.8142076502732243,
+      "grad_norm": 0.9462453722953796,
+      "learning_rate": 3.3281972545287807e-07,
+      "loss": 0.204,
+      "step": 515
+    },
+    {
+      "epoch": 2.841530054644809,
+      "grad_norm": 1.0435760021209717,
+      "learning_rate": 2.4476168046471016e-07,
+      "loss": 0.2378,
+      "step": 520
+    },
+    {
+      "epoch": 2.8688524590163933,
+      "grad_norm": 1.0594007968902588,
+      "learning_rate": 1.7011514424202236e-07,
+      "loss": 0.2364,
+      "step": 525
+    },
+    {
+      "epoch": 2.8961748633879782,
+      "grad_norm": 1.1840893030166626,
+      "learning_rate": 1.0894796541275065e-07,
+      "loss": 0.2155,
+      "step": 530
+    },
+    {
+      "epoch": 2.9234972677595628,
+      "grad_norm": 1.0450780391693115,
+      "learning_rate": 6.131574078511226e-08,
+      "loss": 0.2193,
+      "step": 535
+    },
+    {
+      "epoch": 2.9508196721311473,
+      "grad_norm": 1.008967399597168,
+      "learning_rate": 2.7261764813880098e-08,
+      "loss": 0.2279,
+      "step": 540
+    },
+    {
+      "epoch": 2.978142076502732,
+      "grad_norm": 1.1028051376342773,
+      "learning_rate": 6.81699024867255e-09,
+      "loss": 0.2092,
+      "step": 545
+    },
+    {
+      "epoch": 3.0,
+      "step": 549,
+      "total_flos": 7.459266976179814e+17,
+      "train_loss": 0.5250646652202572,
+      "train_runtime": 382.361,
+      "train_samples_per_second": 45.875,
+      "train_steps_per_second": 1.436
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 549,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 20000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 7.459266976179814e+17,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

20_128_e3_3e-5/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0f764909ed45eade300d3d86fd061ec7a1c28e2ea058064ef5730dcf7e554c75
+size 8209

20_128_e3_3e-5/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff