Upload folder using huggingface_hub

Browse files

Files changed (14) hide show

124_128_e3_3e-5/adapter_config.json +39 -0
124_128_e3_3e-5/adapter_model.safetensors +3 -0
124_128_e3_3e-5/added_tokens.json +9 -0
124_128_e3_3e-5/all_results.json +9 -0
124_128_e3_3e-5/chat_template.jinja +62 -0
124_128_e3_3e-5/config.json +32 -0
124_128_e3_3e-5/merges.txt +0 -0
124_128_e3_3e-5/special_tokens_map.json +33 -0
124_128_e3_3e-5/tokenizer.json +0 -0
124_128_e3_3e-5/tokenizer_config.json +234 -0
124_128_e3_3e-5/train_results.json +9 -0
124_128_e3_3e-5/trainer_state.json +785 -0
124_128_e3_3e-5/training_args.bin +3 -0
124_128_e3_3e-5/vocab.json +0 -0

124_128_e3_3e-5/adapter_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "ibm-granite/granite-3.3-8b-instruct",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 256,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 128,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "q_proj",
+    "k_proj",
+    "down_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

124_128_e3_3e-5/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:24aaac1c87d67fdab8340962701f5ee1168d8fbd4be19e6ee1ff8ffdba32c3b4
+size 791751704

124_128_e3_3e-5/added_tokens.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "<|end_of_cite|>": 49156,
+  "<|end_of_plugin|>": 49158,
+  "<|end_of_role|>": 49153,
+  "<|start_of_cite|>": 49155,
+  "<|start_of_plugin|>": 49157,
+  "<|start_of_role|>": 49152,
+  "<|tool_call|>": 49154
+}

124_128_e3_3e-5/all_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 3.0,
+    "total_flos": 7.772178482829722e+17,
+    "train_loss": 0.6026598820079132,
+    "train_runtime": 371.2709,
+    "train_samples": 5691,
+    "train_samples_per_second": 45.985,
+    "train_steps_per_second": 1.438
+}

124_128_e3_3e-5/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,62 @@

+{# Alias tools -> available_tools #}
+{%- if tools and not available_tools -%}
+    {%- set available_tools = tools -%}
+{%- endif -%}
+{%- if messages[0]['role'] == 'system' %}
+     {%- set system_message = messages[0]['content'] %}
+     {%- set loop_messages = messages[1:] %}
+ {%- else %}
+     {%- set system_message = "Knowledge Cutoff Date: April 2024.
+Today's Date: " + strftime_now('%B %d, %Y') + ".
+You are Granite, developed by IBM." %}
+     {%- if available_tools and documents %}
+         {%- set system_message = system_message + " You are a helpful assistant with access to the following tools. When a tool is required to answer the user's query, respond only with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request.
+Write the response to the user's input by strictly aligning with the facts in the provided documents. If the information needed to answer the question is not available in the documents, inform the user that the question cannot be answered based on the available data." %}
+     {%- elif available_tools %}
+         {%- set system_message = system_message + " You are a helpful assistant with access to the following tools. When a tool is required to answer the user's query, respond only with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request." %}
+     {%- elif documents %}
+         {%- set system_message = system_message + " Write the response to the user's input by strictly aligning with the facts in the provided documents. If the information needed to answer the question is not available in the documents, inform the user that the question cannot be answered based on the available data." %}
+    {%- elif thinking %}
+    {%- set system_message = system_message + " You are a helpful AI assistant.
+Respond to every user query in a comprehensive and detailed way. You can write down your thoughts and reasoning process before responding. In the thought process, engage in a comprehensive cycle of analysis, summarization, exploration, reassessment, reflection, backtracing, and iteration to develop well-considered thinking process. In the response section, based on various attempts, explorations, and reflections from the thoughts section, systematically present the final solution that you deem correct. The response should summarize the thought process. Write your thoughts between <think></think> and write your response between <response></response> for each user query." %}
+     {%- else %}
+         {%- set system_message = system_message + " You are a helpful AI assistant." %}
+     {%- endif %}
+     {%- if 'citations' in controls and documents %}
+         {%- set system_message = system_message + '
+Use the symbols <|start_of_cite|> and <|end_of_cite|> to indicate when a fact comes from a document in the search result, e.g <|start_of_cite|> {document_id: 1}my fact <|end_of_cite|> for a fact from document 1. Afterwards, list all the citations with their corresponding documents in an ordered list.' %}
+     {%- endif %}
+     {%- if 'hallucinations' in controls and documents %}
+         {%- set system_message = system_message + '
+Finally, after the response is written, include a numbered list of sentences from the response with a corresponding risk value that are hallucinated and not based in the documents.' %}
+     {%- endif %}
+     {%- set loop_messages = messages %}
+ {%- endif %}
+ {{- '<|start_of_role|>system<|end_of_role|>' + system_message + '<|end_of_text|>
+' }}
+ {%- if available_tools %}
+     {{- '<|start_of_role|>available_tools<|end_of_role|>' }}
+     {{- available_tools | tojson(indent=4) }}
+     {{- '<|end_of_text|>
+' }}
+ {%- endif %}
+ {%- if documents %}
+     {%- for document in documents %}
+         {{- '<|start_of_role|>document {"document_id": "' + document['doc_id'] | string + '"}<|end_of_role|>
+' }}
+         {{- document['text'] }}
+         {{- '<|end_of_text|>
+' }}
+              {%- endfor %}
+ {%- endif %}
+ {%- for message in loop_messages %}
+     {{- '<|start_of_role|>' + message['role'] + '<|end_of_role|>' + message['content'] + '<|end_of_text|>
+' }}
+     {%- if loop.last and add_generation_prompt %}
+         {{- '<|start_of_role|>assistant' }}
+             {%- if controls %}
+                 {{- ' ' + controls | tojson()}}
+             {%- endif %}
+         {{- '<|end_of_role|>' }}
+     {%- endif %}
+ {%- endfor %}

124_128_e3_3e-5/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "architectures": [
+    "GraniteForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attention_multiplier": 0.0078125,
+  "bos_token_id": 0,
+  "embedding_multiplier": 12.0,
+  "eos_token_id": 0,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12800,
+  "logits_scaling": 16.0,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "granite",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 40,
+  "num_key_value_heads": 8,
+  "pad_token_id": 0,
+  "residual_multiplier": 0.22,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000000.0,
+  "tie_word_embeddings": true,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.52.4",
+  "use_cache": true,
+  "vocab_size": 49159
+}

124_128_e3_3e-5/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

124_128_e3_3e-5/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "additional_special_tokens": [
+    "<|start_of_role|>",
+    "<|end_of_role|>",
+    "<|tool_call|>",
+    "<|start_of_cite|>",
+    "<|end_of_cite|>",
+    "<|start_of_plugin|>",
+    "<|end_of_plugin|>"
+  ],
+  "bos_token": {
+    "content": "<|end_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|end_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|end_of_plugin|>",
+  "unk_token": {
+    "content": "<|end_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

124_128_e3_3e-5/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

124_128_e3_3e-5/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,234 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<|end_of_text|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<fim_prefix>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<fim_middle>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<fim_suffix>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<fim_pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "5": {
+      "content": "<filename>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "6": {
+      "content": "<gh_stars>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "7": {
+      "content": "<issue_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "8": {
+      "content": "<issue_comment>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "9": {
+      "content": "<issue_closed>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "10": {
+      "content": "<jupyter_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "11": {
+      "content": "<jupyter_text>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "12": {
+      "content": "<jupyter_code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "13": {
+      "content": "<jupyter_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "14": {
+      "content": "<empty_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "15": {
+      "content": "<commit_before>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "16": {
+      "content": "<commit_msg>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "17": {
+      "content": "<commit_after>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "18": {
+      "content": "<reponame>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49152": {
+      "content": "<|start_of_role|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49153": {
+      "content": "<|end_of_role|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49154": {
+      "content": "<|tool_call|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49155": {
+      "content": "<|start_of_cite|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49156": {
+      "content": "<|end_of_cite|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49157": {
+      "content": "<|start_of_plugin|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49158": {
+      "content": "<|end_of_plugin|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|start_of_role|>",
+    "<|end_of_role|>",
+    "<|tool_call|>",
+    "<|start_of_cite|>",
+    "<|end_of_cite|>",
+    "<|start_of_plugin|>",
+    "<|end_of_plugin|>"
+  ],
+  "bos_token": "<|end_of_text|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|end_of_text|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 8192,
+  "pad_token": "<|end_of_plugin|>",
+  "padding_side": "left",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|end_of_text|>",
+  "vocab_size": 49152
+}

124_128_e3_3e-5/train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 3.0,
+    "total_flos": 7.772178482829722e+17,
+    "train_loss": 0.6026598820079132,
+    "train_runtime": 371.2709,
+    "train_samples": 5691,
+    "train_samples_per_second": 45.985,
+    "train_steps_per_second": 1.438
+}

124_128_e3_3e-5/trainer_state.json ADDED Viewed

	@@ -0,0 +1,785 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 534,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.028089887640449437,
+      "grad_norm": 2.933321714401245,
+      "learning_rate": 4.444444444444444e-06,
+      "loss": 1.5235,
+      "step": 5
+    },
+    {
+      "epoch": 0.056179775280898875,
+      "grad_norm": 1.0247745513916016,
+      "learning_rate": 9.999999999999999e-06,
+      "loss": 1.5466,
+      "step": 10
+    },
+    {
+      "epoch": 0.08426966292134831,
+      "grad_norm": 0.4670858085155487,
+      "learning_rate": 1.5555555555555555e-05,
+      "loss": 1.4287,
+      "step": 15
+    },
+    {
+      "epoch": 0.11235955056179775,
+      "grad_norm": 0.4169785678386688,
+      "learning_rate": 2.111111111111111e-05,
+      "loss": 1.3436,
+      "step": 20
+    },
+    {
+      "epoch": 0.1404494382022472,
+      "grad_norm": 0.399252712726593,
+      "learning_rate": 2.6666666666666667e-05,
+      "loss": 1.3524,
+      "step": 25
+    },
+    {
+      "epoch": 0.16853932584269662,
+      "grad_norm": 0.3836585581302643,
+      "learning_rate": 2.99988481404614e-05,
+      "loss": 1.2411,
+      "step": 30
+    },
+    {
+      "epoch": 0.19662921348314608,
+      "grad_norm": 0.3541490435600281,
+      "learning_rate": 2.9985891752198826e-05,
+      "loss": 1.2758,
+      "step": 35
+    },
+    {
+      "epoch": 0.2247191011235955,
+      "grad_norm": 0.3229026794433594,
+      "learning_rate": 2.9958551628493237e-05,
+      "loss": 1.2259,
+      "step": 40
+    },
+    {
+      "epoch": 0.25280898876404495,
+      "grad_norm": 0.3473356068134308,
+      "learning_rate": 2.9916854010899866e-05,
+      "loss": 1.2085,
+      "step": 45
+    },
+    {
+      "epoch": 0.2808988764044944,
+      "grad_norm": 0.3754599094390869,
+      "learning_rate": 2.9860838921561555e-05,
+      "loss": 1.1445,
+      "step": 50
+    },
+    {
+      "epoch": 0.3089887640449438,
+      "grad_norm": 0.4058438837528229,
+      "learning_rate": 2.9790560124794702e-05,
+      "loss": 1.1215,
+      "step": 55
+    },
+    {
+      "epoch": 0.33707865168539325,
+      "grad_norm": 0.38481196761131287,
+      "learning_rate": 2.970608507548533e-05,
+      "loss": 1.0949,
+      "step": 60
+    },
+    {
+      "epoch": 0.3651685393258427,
+      "grad_norm": 0.3943042755126953,
+      "learning_rate": 2.96074948543446e-05,
+      "loss": 1.0958,
+      "step": 65
+    },
+    {
+      "epoch": 0.39325842696629215,
+      "grad_norm": 0.42663338780403137,
+      "learning_rate": 2.9494884090086088e-05,
+      "loss": 1.0759,
+      "step": 70
+    },
+    {
+      "epoch": 0.42134831460674155,
+      "grad_norm": 0.4838952124118805,
+      "learning_rate": 2.9368360868599364e-05,
+      "loss": 1.0741,
+      "step": 75
+    },
+    {
+      "epoch": 0.449438202247191,
+      "grad_norm": 0.3960781693458557,
+      "learning_rate": 2.922804662920718e-05,
+      "loss": 1.0279,
+      "step": 80
+    },
+    {
+      "epoch": 0.47752808988764045,
+      "grad_norm": 0.46060478687286377,
+      "learning_rate": 2.907407604810578e-05,
+      "loss": 1.0478,
+      "step": 85
+    },
+    {
+      "epoch": 0.5056179775280899,
+      "grad_norm": 0.5021286606788635,
+      "learning_rate": 2.8906596909100155e-05,
+      "loss": 1.0486,
+      "step": 90
+    },
+    {
+      "epoch": 0.5337078651685393,
+      "grad_norm": 0.49248790740966797,
+      "learning_rate": 2.8725769961758478e-05,
+      "loss": 0.9593,
+      "step": 95
+    },
+    {
+      "epoch": 0.5617977528089888,
+      "grad_norm": 0.5105310082435608,
+      "learning_rate": 2.8531768767121656e-05,
+      "loss": 0.9373,
+      "step": 100
+    },
+    {
+      "epoch": 0.5898876404494382,
+      "grad_norm": 0.5544092059135437,
+      "learning_rate": 2.8324779531116275e-05,
+      "loss": 0.9431,
+      "step": 105
+    },
+    {
+      "epoch": 0.6179775280898876,
+      "grad_norm": 0.5785091519355774,
+      "learning_rate": 2.8105000925830673e-05,
+      "loss": 0.9614,
+      "step": 110
+    },
+    {
+      "epoch": 0.6460674157303371,
+      "grad_norm": 0.610243558883667,
+      "learning_rate": 2.787264389882586e-05,
+      "loss": 0.8706,
+      "step": 115
+    },
+    {
+      "epoch": 0.6741573033707865,
+      "grad_norm": 0.6222745776176453,
+      "learning_rate": 2.76279314706641e-05,
+      "loss": 0.8835,
+      "step": 120
+    },
+    {
+      "epoch": 0.702247191011236,
+      "grad_norm": 0.5865106582641602,
+      "learning_rate": 2.7371098520849705e-05,
+      "loss": 0.8389,
+      "step": 125
+    },
+    {
+      "epoch": 0.7303370786516854,
+      "grad_norm": 0.6899399757385254,
+      "learning_rate": 2.710239156238732e-05,
+      "loss": 0.8392,
+      "step": 130
+    },
+    {
+      "epoch": 0.7584269662921348,
+      "grad_norm": 0.7009347677230835,
+      "learning_rate": 2.6822068505174208e-05,
+      "loss": 0.8117,
+      "step": 135
+    },
+    {
+      "epoch": 0.7865168539325843,
+      "grad_norm": 0.746788501739502,
+      "learning_rate": 2.6530398408453572e-05,
+      "loss": 0.8723,
+      "step": 140
+    },
+    {
+      "epoch": 0.8146067415730337,
+      "grad_norm": 0.7709099054336548,
+      "learning_rate": 2.6227661222566516e-05,
+      "loss": 0.843,
+      "step": 145
+    },
+    {
+      "epoch": 0.8426966292134831,
+      "grad_norm": 0.8254103064537048,
+      "learning_rate": 2.5914147520250565e-05,
+      "loss": 0.8102,
+      "step": 150
+    },
+    {
+      "epoch": 0.8707865168539326,
+      "grad_norm": 0.7346197366714478,
+      "learning_rate": 2.5590158217742565e-05,
+      "loss": 0.7474,
+      "step": 155
+    },
+    {
+      "epoch": 0.898876404494382,
+      "grad_norm": 0.7145342826843262,
+      "learning_rate": 2.5256004285953737e-05,
+      "loss": 0.7609,
+      "step": 160
+    },
+    {
+      "epoch": 0.9269662921348315,
+      "grad_norm": 0.7305863499641418,
+      "learning_rate": 2.4912006451994048e-05,
+      "loss": 0.7553,
+      "step": 165
+    },
+    {
+      "epoch": 0.9550561797752809,
+      "grad_norm": 0.9788331389427185,
+      "learning_rate": 2.455849489133244e-05,
+      "loss": 0.7209,
+      "step": 170
+    },
+    {
+      "epoch": 0.9831460674157303,
+      "grad_norm": 0.9055688977241516,
+      "learning_rate": 2.4195808910888293e-05,
+      "loss": 0.7511,
+      "step": 175
+    },
+    {
+      "epoch": 1.0112359550561798,
+      "grad_norm": 0.8753438591957092,
+      "learning_rate": 2.382429662335839e-05,
+      "loss": 0.6044,
+      "step": 180
+    },
+    {
+      "epoch": 1.0393258426966292,
+      "grad_norm": 0.8324143290519714,
+      "learning_rate": 2.3444314613091934e-05,
+      "loss": 0.6126,
+      "step": 185
+    },
+    {
+      "epoch": 1.0674157303370786,
+      "grad_norm": 0.852999210357666,
+      "learning_rate": 2.3056227593834306e-05,
+      "loss": 0.6519,
+      "step": 190
+    },
+    {
+      "epoch": 1.095505617977528,
+      "grad_norm": 0.9365205764770508,
+      "learning_rate": 2.266040805866807e-05,
+      "loss": 0.6106,
+      "step": 195
+    },
+    {
+      "epoch": 1.1235955056179776,
+      "grad_norm": 0.8534255623817444,
+      "learning_rate": 2.2257235922487262e-05,
+      "loss": 0.5575,
+      "step": 200
+    },
+    {
+      "epoch": 1.151685393258427,
+      "grad_norm": 0.94826740026474,
+      "learning_rate": 2.1847098157348053e-05,
+      "loss": 0.5962,
+      "step": 205
+    },
+    {
+      "epoch": 1.1797752808988764,
+      "grad_norm": 0.9749745726585388,
+      "learning_rate": 2.1430388421045812e-05,
+      "loss": 0.5716,
+      "step": 210
+    },
+    {
+      "epoch": 1.2078651685393258,
+      "grad_norm": 0.934945285320282,
+      "learning_rate": 2.1007506679275126e-05,
+      "loss": 0.5944,
+      "step": 215
+    },
+    {
+      "epoch": 1.2359550561797752,
+      "grad_norm": 1.1705440282821655,
+      "learning_rate": 2.0578858821735304e-05,
+      "loss": 0.5572,
+      "step": 220
+    },
+    {
+      "epoch": 1.2640449438202248,
+      "grad_norm": 0.9548543691635132,
+      "learning_rate": 2.0144856272549985e-05,
+      "loss": 0.5227,
+      "step": 225
+    },
+    {
+      "epoch": 1.2921348314606742,
+      "grad_norm": 0.9868324398994446,
+      "learning_rate": 1.9705915595374663e-05,
+      "loss": 0.5357,
+      "step": 230
+    },
+    {
+      "epoch": 1.3202247191011236,
+      "grad_norm": 0.9227640628814697,
+      "learning_rate": 1.9262458093571193e-05,
+      "loss": 0.5537,
+      "step": 235
+    },
+    {
+      "epoch": 1.348314606741573,
+      "grad_norm": 0.9637176990509033,
+      "learning_rate": 1.8814909405833065e-05,
+      "loss": 0.5202,
+      "step": 240
+    },
+    {
+      "epoch": 1.3764044943820224,
+      "grad_norm": 0.877534031867981,
+      "learning_rate": 1.8363699097649528e-05,
+      "loss": 0.499,
+      "step": 245
+    },
+    {
+      "epoch": 1.404494382022472,
+      "grad_norm": 1.0015568733215332,
+      "learning_rate": 1.7909260249000692e-05,
+      "loss": 0.512,
+      "step": 250
+    },
+    {
+      "epoch": 1.4325842696629214,
+      "grad_norm": 1.1195361614227295,
+      "learning_rate": 1.7452029038679395e-05,
+      "loss": 0.4542,
+      "step": 255
+    },
+    {
+      "epoch": 1.4606741573033708,
+      "grad_norm": 1.0795161724090576,
+      "learning_rate": 1.6992444325638727e-05,
+      "loss": 0.4947,
+      "step": 260
+    },
+    {
+      "epoch": 1.4887640449438202,
+      "grad_norm": 1.0410566329956055,
+      "learning_rate": 1.6530947227767125e-05,
+      "loss": 0.4977,
+      "step": 265
+    },
+    {
+      "epoch": 1.5168539325842696,
+      "grad_norm": 1.0502032041549683,
+      "learning_rate": 1.6067980698495272e-05,
+      "loss": 0.4646,
+      "step": 270
+    },
+    {
+      "epoch": 1.5449438202247192,
+      "grad_norm": 1.010963797569275,
+      "learning_rate": 1.5603989101641228e-05,
+      "loss": 0.4899,
+      "step": 275
+    },
+    {
+      "epoch": 1.5730337078651684,
+      "grad_norm": 1.216365933418274,
+      "learning_rate": 1.5139417784901836e-05,
+      "loss": 0.4949,
+      "step": 280
+    },
+    {
+      "epoch": 1.601123595505618,
+      "grad_norm": 1.0427546501159668,
+      "learning_rate": 1.4674712652399767e-05,
+      "loss": 0.4236,
+      "step": 285
+    },
+    {
+      "epoch": 1.6292134831460674,
+      "grad_norm": 1.1208810806274414,
+      "learning_rate": 1.421031973669656e-05,
+      "loss": 0.449,
+      "step": 290
+    },
+    {
+      "epoch": 1.6573033707865168,
+      "grad_norm": 1.000199317932129,
+      "learning_rate": 1.374668477068228e-05,
+      "loss": 0.4321,
+      "step": 295
+    },
+    {
+      "epoch": 1.6853932584269664,
+      "grad_norm": 1.0883219242095947,
+      "learning_rate": 1.3284252759752915e-05,
+      "loss": 0.4243,
+      "step": 300
+    },
+    {
+      "epoch": 1.7134831460674156,
+      "grad_norm": 1.221422553062439,
+      "learning_rate": 1.2823467554685946e-05,
+      "loss": 0.4262,
+      "step": 305
+    },
+    {
+      "epoch": 1.7415730337078652,
+      "grad_norm": 1.062383770942688,
+      "learning_rate": 1.2364771425624213e-05,
+      "loss": 0.4324,
+      "step": 310
+    },
+    {
+      "epoch": 1.7696629213483146,
+      "grad_norm": 1.0984909534454346,
+      "learning_rate": 1.1908604637576845e-05,
+      "loss": 0.4441,
+      "step": 315
+    },
+    {
+      "epoch": 1.797752808988764,
+      "grad_norm": 1.070756196975708,
+      "learning_rate": 1.1455405027844796e-05,
+      "loss": 0.4236,
+      "step": 320
+    },
+    {
+      "epoch": 1.8258426966292136,
+      "grad_norm": 1.1225444078445435,
+      "learning_rate": 1.1005607585776527e-05,
+      "loss": 0.3783,
+      "step": 325
+    },
+    {
+      "epoch": 1.8539325842696628,
+      "grad_norm": 1.0719342231750488,
+      "learning_rate": 1.055964403525717e-05,
+      "loss": 0.3877,
+      "step": 330
+    },
+    {
+      "epoch": 1.8820224719101124,
+      "grad_norm": 1.072813868522644,
+      "learning_rate": 1.0117942420332035e-05,
+      "loss": 0.4238,
+      "step": 335
+    },
+    {
+      "epoch": 1.9101123595505618,
+      "grad_norm": 1.0331416130065918,
+      "learning_rate": 9.680926694361966e-06,
+      "loss": 0.4149,
+      "step": 340
+    },
+    {
+      "epoch": 1.9382022471910112,
+      "grad_norm": 1.0326926708221436,
+      "learning_rate": 9.249016313105144e-06,
+      "loss": 0.3891,
+      "step": 345
+    },
+    {
+      "epoch": 1.9662921348314608,
+      "grad_norm": 1.1697996854782104,
+      "learning_rate": 8.822625832115668e-06,
+      "loss": 0.3767,
+      "step": 350
+    },
+    {
+      "epoch": 1.99438202247191,
+      "grad_norm": 1.1903326511383057,
+      "learning_rate": 8.402164508845516e-06,
+      "loss": 0.3897,
+      "step": 355
+    },
+    {
+      "epoch": 2.0224719101123596,
+      "grad_norm": 1.1597716808319092,
+      "learning_rate": 7.988035909831648e-06,
+      "loss": 0.3235,
+      "step": 360
+    },
+    {
+      "epoch": 2.050561797752809,
+      "grad_norm": 1.0445481538772583,
+      "learning_rate": 7.58063752334546e-06,
+      "loss": 0.3219,
+      "step": 365
+    },
+    {
+      "epoch": 2.0786516853932584,
+      "grad_norm": 1.16510808467865,
+      "learning_rate": 7.180360377876125e-06,
+      "loss": 0.3238,
+      "step": 370
+    },
+    {
+      "epoch": 2.106741573033708,
+      "grad_norm": 1.1520670652389526,
+      "learning_rate": 6.787588666814321e-06,
+      "loss": 0.3411,
+      "step": 375
+    },
+    {
+      "epoch": 2.134831460674157,
+      "grad_norm": 1.144171953201294,
+      "learning_rate": 6.402699379696258e-06,
+      "loss": 0.3128,
+      "step": 380
+    },
+    {
+      "epoch": 2.162921348314607,
+      "grad_norm": 1.1329914331436157,
+      "learning_rate": 6.026061940362187e-06,
+      "loss": 0.3414,
+      "step": 385
+    },
+    {
+      "epoch": 2.191011235955056,
+      "grad_norm": 1.0946805477142334,
+      "learning_rate": 5.658037852376591e-06,
+      "loss": 0.3092,
+      "step": 390
+    },
+    {
+      "epoch": 2.2191011235955056,
+      "grad_norm": 1.1616297960281372,
+      "learning_rate": 5.29898035205038e-06,
+      "loss": 0.3008,
+      "step": 395
+    },
+    {
+      "epoch": 2.247191011235955,
+      "grad_norm": 1.211364507675171,
+      "learning_rate": 4.949234069398165e-06,
+      "loss": 0.2606,
+      "step": 400
+    },
+    {
+      "epoch": 2.2752808988764044,
+      "grad_norm": 1.2527793645858765,
+      "learning_rate": 4.609134697356009e-06,
+      "loss": 0.2933,
+      "step": 405
+    },
+    {
+      "epoch": 2.303370786516854,
+      "grad_norm": 1.1577643156051636,
+      "learning_rate": 4.279008669577158e-06,
+      "loss": 0.3032,
+      "step": 410
+    },
+    {
+      "epoch": 2.331460674157303,
+      "grad_norm": 1.058172583580017,
+      "learning_rate": 3.959172847114991e-06,
+      "loss": 0.3028,
+      "step": 415
+    },
+    {
+      "epoch": 2.359550561797753,
+      "grad_norm": 1.1726353168487549,
+      "learning_rate": 3.64993421429394e-06,
+      "loss": 0.2877,
+      "step": 420
+    },
+    {
+      "epoch": 2.3876404494382024,
+      "grad_norm": 1.3179293870925903,
+      "learning_rate": 3.3515895840602487e-06,
+      "loss": 0.2959,
+      "step": 425
+    },
+    {
+      "epoch": 2.4157303370786516,
+      "grad_norm": 1.0723299980163574,
+      "learning_rate": 3.0644253130954747e-06,
+      "loss": 0.3026,
+      "step": 430
+    },
+    {
+      "epoch": 2.443820224719101,
+      "grad_norm": 1.1094893217086792,
+      "learning_rate": 2.7887170269660146e-06,
+      "loss": 0.3088,
+      "step": 435
+    },
+    {
+      "epoch": 2.4719101123595504,
+      "grad_norm": 1.1231456995010376,
+      "learning_rate": 2.5247293555726244e-06,
+      "loss": 0.3197,
+      "step": 440
+    },
+    {
+      "epoch": 2.5,
+      "grad_norm": 1.1325987577438354,
+      "learning_rate": 2.2727156791537544e-06,
+      "loss": 0.2729,
+      "step": 445
+    },
+    {
+      "epoch": 2.5280898876404496,
+      "grad_norm": 1.0779447555541992,
+      "learning_rate": 2.032917885086519e-06,
+      "loss": 0.2694,
+      "step": 450
+    },
+    {
+      "epoch": 2.556179775280899,
+      "grad_norm": 1.1157567501068115,
+      "learning_rate": 1.8055661357187225e-06,
+      "loss": 0.3103,
+      "step": 455
+    },
+    {
+      "epoch": 2.5842696629213484,
+      "grad_norm": 1.0660185813903809,
+      "learning_rate": 1.5908786474548004e-06,
+      "loss": 0.2576,
+      "step": 460
+    },
+    {
+      "epoch": 2.6123595505617976,
+      "grad_norm": 1.0340101718902588,
+      "learning_rate": 1.3890614813076852e-06,
+      "loss": 0.2666,
+      "step": 465
+    },
+    {
+      "epoch": 2.640449438202247,
+      "grad_norm": 1.0444116592407227,
+      "learning_rate": 1.2003083451176366e-06,
+      "loss": 0.3032,
+      "step": 470
+    },
+    {
+      "epoch": 2.668539325842697,
+      "grad_norm": 1.0766863822937012,
+      "learning_rate": 1.0248004076278905e-06,
+      "loss": 0.2981,
+      "step": 475
+    },
+    {
+      "epoch": 2.696629213483146,
+      "grad_norm": 1.247438907623291,
+      "learning_rate": 8.627061245955519e-07,
+      "loss": 0.3061,
+      "step": 480
+    },
+    {
+      "epoch": 2.7247191011235956,
+      "grad_norm": 1.0808175802230835,
+      "learning_rate": 7.141810771046486e-07,
+      "loss": 0.3033,
+      "step": 485
+    },
+    {
+      "epoch": 2.752808988764045,
+      "grad_norm": 1.0937455892562866,
+      "learning_rate": 5.793678222365433e-07,
+      "loss": 0.2883,
+      "step": 490
+    },
+    {
+      "epoch": 2.7808988764044944,
+      "grad_norm": 1.0853956937789917,
+      "learning_rate": 4.5839575624100627e-07,
+      "loss": 0.2905,
+      "step": 495
+    },
+    {
+      "epoch": 2.808988764044944,
+      "grad_norm": 1.0512863397598267,
+      "learning_rate": 3.513809903393167e-07,
+      "loss": 0.2586,
+      "step": 500
+    },
+    {
+      "epoch": 2.837078651685393,
+      "grad_norm": 1.1749273538589478,
+      "learning_rate": 2.5842623927856244e-07,
+      "loss": 0.2863,
+      "step": 505
+    },
+    {
+      "epoch": 2.865168539325843,
+      "grad_norm": 1.0883651971817017,
+      "learning_rate": 1.796207227441332e-07,
+      "loss": 0.253,
+      "step": 510
+    },
+    {
+      "epoch": 2.893258426966292,
+      "grad_norm": 1.1884039640426636,
+      "learning_rate": 1.1504007972502284e-07,
+      "loss": 0.303,
+      "step": 515
+    },
+    {
+      "epoch": 2.9213483146067416,
+      "grad_norm": 1.0880359411239624,
+      "learning_rate": 6.474629591412651e-08,
+      "loss": 0.2631,
+      "step": 520
+    },
+    {
+      "epoch": 2.949438202247191,
+      "grad_norm": 1.1617555618286133,
+      "learning_rate": 2.8787644213233432e-08,
+      "loss": 0.2995,
+      "step": 525
+    },
+    {
+      "epoch": 2.9775280898876404,
+      "grad_norm": 1.2445521354675293,
+      "learning_rate": 7.198638399802948e-09,
+      "loss": 0.2847,
+      "step": 530
+    },
+    {
+      "epoch": 3.0,
+      "step": 534,
+      "total_flos": 7.772178482829722e+17,
+      "train_loss": 0.6026598820079132,
+      "train_runtime": 371.2709,
+      "train_samples_per_second": 45.985,
+      "train_steps_per_second": 1.438
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 534,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 20000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 7.772178482829722e+17,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

124_128_e3_3e-5/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:418a29f82bf5232a92ccc9131623c76be12e66cbcaf05a11085cc2ef171de419
+size 8273

124_128_e3_3e-5/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff