Upload folder using huggingface_hub

Browse files

Files changed (14) hide show

123_128_e3_3e-5/adapter_config.json +39 -0
123_128_e3_3e-5/adapter_model.safetensors +3 -0
123_128_e3_3e-5/added_tokens.json +9 -0
123_128_e3_3e-5/all_results.json +9 -0
123_128_e3_3e-5/chat_template.jinja +62 -0
123_128_e3_3e-5/config.json +32 -0
123_128_e3_3e-5/merges.txt +0 -0
123_128_e3_3e-5/special_tokens_map.json +33 -0
123_128_e3_3e-5/tokenizer.json +0 -0
123_128_e3_3e-5/tokenizer_config.json +234 -0
123_128_e3_3e-5/train_results.json +9 -0
123_128_e3_3e-5/trainer_state.json +764 -0
123_128_e3_3e-5/training_args.bin +3 -0
123_128_e3_3e-5/vocab.json +0 -0

123_128_e3_3e-5/adapter_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "ibm-granite/granite-3.3-8b-instruct",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 256,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 128,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "gate_proj",
+    "v_proj",
+    "up_proj",
+    "down_proj",
+    "k_proj",
+    "o_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

123_128_e3_3e-5/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:556ca830cad1df262cfe00bc6b6c2bd3dfe47d3387def37b687477d81d7c18f4
+size 791751704

123_128_e3_3e-5/added_tokens.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "<|end_of_cite|>": 49156,
+  "<|end_of_plugin|>": 49158,
+  "<|end_of_role|>": 49153,
+  "<|start_of_cite|>": 49155,
+  "<|start_of_plugin|>": 49157,
+  "<|start_of_role|>": 49152,
+  "<|tool_call|>": 49154
+}

123_128_e3_3e-5/all_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 3.0,
+    "total_flos": 6.754583228155167e+17,
+    "train_loss": 0.3721794698517336,
+    "train_runtime": 322.2859,
+    "train_samples": 5497,
+    "train_samples_per_second": 51.169,
+    "train_steps_per_second": 1.601
+}

123_128_e3_3e-5/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,62 @@

+{# Alias tools -> available_tools #}
+{%- if tools and not available_tools -%}
+    {%- set available_tools = tools -%}
+{%- endif -%}
+{%- if messages[0]['role'] == 'system' %}
+     {%- set system_message = messages[0]['content'] %}
+     {%- set loop_messages = messages[1:] %}
+ {%- else %}
+     {%- set system_message = "Knowledge Cutoff Date: April 2024.
+Today's Date: " + strftime_now('%B %d, %Y') + ".
+You are Granite, developed by IBM." %}
+     {%- if available_tools and documents %}
+         {%- set system_message = system_message + " You are a helpful assistant with access to the following tools. When a tool is required to answer the user's query, respond only with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request.
+Write the response to the user's input by strictly aligning with the facts in the provided documents. If the information needed to answer the question is not available in the documents, inform the user that the question cannot be answered based on the available data." %}
+     {%- elif available_tools %}
+         {%- set system_message = system_message + " You are a helpful assistant with access to the following tools. When a tool is required to answer the user's query, respond only with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request." %}
+     {%- elif documents %}
+         {%- set system_message = system_message + " Write the response to the user's input by strictly aligning with the facts in the provided documents. If the information needed to answer the question is not available in the documents, inform the user that the question cannot be answered based on the available data." %}
+    {%- elif thinking %}
+    {%- set system_message = system_message + " You are a helpful AI assistant.
+Respond to every user query in a comprehensive and detailed way. You can write down your thoughts and reasoning process before responding. In the thought process, engage in a comprehensive cycle of analysis, summarization, exploration, reassessment, reflection, backtracing, and iteration to develop well-considered thinking process. In the response section, based on various attempts, explorations, and reflections from the thoughts section, systematically present the final solution that you deem correct. The response should summarize the thought process. Write your thoughts between <think></think> and write your response between <response></response> for each user query." %}
+     {%- else %}
+         {%- set system_message = system_message + " You are a helpful AI assistant." %}
+     {%- endif %}
+     {%- if 'citations' in controls and documents %}
+         {%- set system_message = system_message + '
+Use the symbols <|start_of_cite|> and <|end_of_cite|> to indicate when a fact comes from a document in the search result, e.g <|start_of_cite|> {document_id: 1}my fact <|end_of_cite|> for a fact from document 1. Afterwards, list all the citations with their corresponding documents in an ordered list.' %}
+     {%- endif %}
+     {%- if 'hallucinations' in controls and documents %}
+         {%- set system_message = system_message + '
+Finally, after the response is written, include a numbered list of sentences from the response with a corresponding risk value that are hallucinated and not based in the documents.' %}
+     {%- endif %}
+     {%- set loop_messages = messages %}
+ {%- endif %}
+ {{- '<|start_of_role|>system<|end_of_role|>' + system_message + '<|end_of_text|>
+' }}
+ {%- if available_tools %}
+     {{- '<|start_of_role|>available_tools<|end_of_role|>' }}
+     {{- available_tools | tojson(indent=4) }}
+     {{- '<|end_of_text|>
+' }}
+ {%- endif %}
+ {%- if documents %}
+     {%- for document in documents %}
+         {{- '<|start_of_role|>document {"document_id": "' + document['doc_id'] | string + '"}<|end_of_role|>
+' }}
+         {{- document['text'] }}
+         {{- '<|end_of_text|>
+' }}
+              {%- endfor %}
+ {%- endif %}
+ {%- for message in loop_messages %}
+     {{- '<|start_of_role|>' + message['role'] + '<|end_of_role|>' + message['content'] + '<|end_of_text|>
+' }}
+     {%- if loop.last and add_generation_prompt %}
+         {{- '<|start_of_role|>assistant' }}
+             {%- if controls %}
+                 {{- ' ' + controls | tojson()}}
+             {%- endif %}
+         {{- '<|end_of_role|>' }}
+     {%- endif %}
+ {%- endfor %}

123_128_e3_3e-5/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "architectures": [
+    "GraniteForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attention_multiplier": 0.0078125,
+  "bos_token_id": 0,
+  "embedding_multiplier": 12.0,
+  "eos_token_id": 0,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12800,
+  "logits_scaling": 16.0,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "granite",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 40,
+  "num_key_value_heads": 8,
+  "pad_token_id": 0,
+  "residual_multiplier": 0.22,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000000.0,
+  "tie_word_embeddings": true,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.52.4",
+  "use_cache": true,
+  "vocab_size": 49159
+}

123_128_e3_3e-5/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

123_128_e3_3e-5/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "additional_special_tokens": [
+    "<|start_of_role|>",
+    "<|end_of_role|>",
+    "<|tool_call|>",
+    "<|start_of_cite|>",
+    "<|end_of_cite|>",
+    "<|start_of_plugin|>",
+    "<|end_of_plugin|>"
+  ],
+  "bos_token": {
+    "content": "<|end_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|end_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|end_of_plugin|>",
+  "unk_token": {
+    "content": "<|end_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

123_128_e3_3e-5/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

123_128_e3_3e-5/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,234 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<|end_of_text|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<fim_prefix>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<fim_middle>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<fim_suffix>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<fim_pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "5": {
+      "content": "<filename>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "6": {
+      "content": "<gh_stars>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "7": {
+      "content": "<issue_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "8": {
+      "content": "<issue_comment>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "9": {
+      "content": "<issue_closed>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "10": {
+      "content": "<jupyter_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "11": {
+      "content": "<jupyter_text>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "12": {
+      "content": "<jupyter_code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "13": {
+      "content": "<jupyter_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "14": {
+      "content": "<empty_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "15": {
+      "content": "<commit_before>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "16": {
+      "content": "<commit_msg>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "17": {
+      "content": "<commit_after>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "18": {
+      "content": "<reponame>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49152": {
+      "content": "<|start_of_role|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49153": {
+      "content": "<|end_of_role|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49154": {
+      "content": "<|tool_call|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49155": {
+      "content": "<|start_of_cite|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49156": {
+      "content": "<|end_of_cite|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49157": {
+      "content": "<|start_of_plugin|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49158": {
+      "content": "<|end_of_plugin|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|start_of_role|>",
+    "<|end_of_role|>",
+    "<|tool_call|>",
+    "<|start_of_cite|>",
+    "<|end_of_cite|>",
+    "<|start_of_plugin|>",
+    "<|end_of_plugin|>"
+  ],
+  "bos_token": "<|end_of_text|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|end_of_text|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 8192,
+  "pad_token": "<|end_of_plugin|>",
+  "padding_side": "left",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|end_of_text|>",
+  "vocab_size": 49152
+}

123_128_e3_3e-5/train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 3.0,
+    "total_flos": 6.754583228155167e+17,
+    "train_loss": 0.3721794698517336,
+    "train_runtime": 322.2859,
+    "train_samples": 5497,
+    "train_samples_per_second": 51.169,
+    "train_steps_per_second": 1.601
+}

123_128_e3_3e-5/trainer_state.json ADDED Viewed

	@@ -0,0 +1,764 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 516,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.029069767441860465,
+      "grad_norm": 2.533512830734253,
+      "learning_rate": 4.615384615384616e-06,
+      "loss": 1.5308,
+      "step": 5
+    },
+    {
+      "epoch": 0.05813953488372093,
+      "grad_norm": 0.9892963171005249,
+      "learning_rate": 1.0384615384615384e-05,
+      "loss": 1.4852,
+      "step": 10
+    },
+    {
+      "epoch": 0.0872093023255814,
+      "grad_norm": 0.46061113476753235,
+      "learning_rate": 1.6153846153846154e-05,
+      "loss": 1.3645,
+      "step": 15
+    },
+    {
+      "epoch": 0.11627906976744186,
+      "grad_norm": 0.45335695147514343,
+      "learning_rate": 2.1923076923076924e-05,
+      "loss": 1.3947,
+      "step": 20
+    },
+    {
+      "epoch": 0.14534883720930233,
+      "grad_norm": 0.4039892554283142,
+      "learning_rate": 2.7692307692307694e-05,
+      "loss": 1.3418,
+      "step": 25
+    },
+    {
+      "epoch": 0.1744186046511628,
+      "grad_norm": 0.4111834764480591,
+      "learning_rate": 2.999722541541585e-05,
+      "loss": 1.3239,
+      "step": 30
+    },
+    {
+      "epoch": 0.20348837209302326,
+      "grad_norm": 0.411482036113739,
+      "learning_rate": 2.9980273337750767e-05,
+      "loss": 1.2673,
+      "step": 35
+    },
+    {
+      "epoch": 0.23255813953488372,
+      "grad_norm": 0.40074753761291504,
+      "learning_rate": 2.9947928016585273e-05,
+      "loss": 1.1851,
+      "step": 40
+    },
+    {
+      "epoch": 0.2616279069767442,
+      "grad_norm": 0.4646604657173157,
+      "learning_rate": 2.9900222688923374e-05,
+      "loss": 1.1745,
+      "step": 45
+    },
+    {
+      "epoch": 0.29069767441860467,
+      "grad_norm": 0.49917590618133545,
+      "learning_rate": 2.9837206375214043e-05,
+      "loss": 1.1324,
+      "step": 50
+    },
+    {
+      "epoch": 0.31976744186046513,
+      "grad_norm": 0.5163657069206238,
+      "learning_rate": 2.9758943828979444e-05,
+      "loss": 1.0559,
+      "step": 55
+    },
+    {
+      "epoch": 0.3488372093023256,
+      "grad_norm": 0.4930253028869629,
+      "learning_rate": 2.9665515470276274e-05,
+      "loss": 1.0013,
+      "step": 60
+    },
+    {
+      "epoch": 0.37790697674418605,
+      "grad_norm": 0.5615187883377075,
+      "learning_rate": 2.955701730305872e-05,
+      "loss": 1.0037,
+      "step": 65
+    },
+    {
+      "epoch": 0.4069767441860465,
+      "grad_norm": 0.6281460523605347,
+      "learning_rate": 2.9433560816527933e-05,
+      "loss": 0.9161,
+      "step": 70
+    },
+    {
+      "epoch": 0.436046511627907,
+      "grad_norm": 0.5863350033760071,
+      "learning_rate": 2.929527287056931e-05,
+      "loss": 0.8844,
+      "step": 75
+    },
+    {
+      "epoch": 0.46511627906976744,
+      "grad_norm": 0.668615996837616,
+      "learning_rate": 2.9142295565395394e-05,
+      "loss": 0.8538,
+      "step": 80
+    },
+    {
+      "epoch": 0.4941860465116279,
+      "grad_norm": 0.6634598970413208,
+      "learning_rate": 2.8974786095528312e-05,
+      "loss": 0.7883,
+      "step": 85
+    },
+    {
+      "epoch": 0.5232558139534884,
+      "grad_norm": 0.7066566944122314,
+      "learning_rate": 2.8792916588271762e-05,
+      "loss": 0.7728,
+      "step": 90
+    },
+    {
+      "epoch": 0.5523255813953488,
+      "grad_norm": 0.7743446230888367,
+      "learning_rate": 2.8596873926838572e-05,
+      "loss": 0.7769,
+      "step": 95
+    },
+    {
+      "epoch": 0.5813953488372093,
+      "grad_norm": 0.7685606479644775,
+      "learning_rate": 2.838685955831559e-05,
+      "loss": 0.7206,
+      "step": 100
+    },
+    {
+      "epoch": 0.6104651162790697,
+      "grad_norm": 0.8624259829521179,
+      "learning_rate": 2.8163089286663147e-05,
+      "loss": 0.6435,
+      "step": 105
+    },
+    {
+      "epoch": 0.6395348837209303,
+      "grad_norm": 0.8765841126441956,
+      "learning_rate": 2.7925793050961927e-05,
+      "loss": 0.6203,
+      "step": 110
+    },
+    {
+      "epoch": 0.6686046511627907,
+      "grad_norm": 0.8885002136230469,
+      "learning_rate": 2.767521468913502e-05,
+      "loss": 0.6139,
+      "step": 115
+    },
+    {
+      "epoch": 0.6976744186046512,
+      "grad_norm": 0.7977299094200134,
+      "learning_rate": 2.741161168738795e-05,
+      "loss": 0.5973,
+      "step": 120
+    },
+    {
+      "epoch": 0.7267441860465116,
+      "grad_norm": 0.9100492596626282,
+      "learning_rate": 2.7135254915624213e-05,
+      "loss": 0.555,
+      "step": 125
+    },
+    {
+      "epoch": 0.7558139534883721,
+      "grad_norm": 1.0174195766448975,
+      "learning_rate": 2.6846428349108137e-05,
+      "loss": 0.5854,
+      "step": 130
+    },
+    {
+      "epoch": 0.7848837209302325,
+      "grad_norm": 0.9298872351646423,
+      "learning_rate": 2.6545428776661092e-05,
+      "loss": 0.5579,
+      "step": 135
+    },
+    {
+      "epoch": 0.813953488372093,
+      "grad_norm": 0.8672717213630676,
+      "learning_rate": 2.6232565495690922e-05,
+      "loss": 0.4883,
+      "step": 140
+    },
+    {
+      "epoch": 0.8430232558139535,
+      "grad_norm": 1.0029618740081787,
+      "learning_rate": 2.590815999436796e-05,
+      "loss": 0.4803,
+      "step": 145
+    },
+    {
+      "epoch": 0.872093023255814,
+      "grad_norm": 0.9330613017082214,
+      "learning_rate": 2.5572545621274177e-05,
+      "loss": 0.4539,
+      "step": 150
+    },
+    {
+      "epoch": 0.9011627906976745,
+      "grad_norm": 1.1058361530303955,
+      "learning_rate": 2.5226067242864992e-05,
+      "loss": 0.4532,
+      "step": 155
+    },
+    {
+      "epoch": 0.9302325581395349,
+      "grad_norm": 1.0333130359649658,
+      "learning_rate": 2.4869080889095693e-05,
+      "loss": 0.4231,
+      "step": 160
+    },
+    {
+      "epoch": 0.9593023255813954,
+      "grad_norm": 1.0879161357879639,
+      "learning_rate": 2.4501953387576552e-05,
+      "loss": 0.3813,
+      "step": 165
+    },
+    {
+      "epoch": 0.9883720930232558,
+      "grad_norm": 1.0834219455718994,
+      "learning_rate": 2.41250619866327e-05,
+      "loss": 0.3975,
+      "step": 170
+    },
+    {
+      "epoch": 1.0174418604651163,
+      "grad_norm": 0.9528940320014954,
+      "learning_rate": 2.3738793967655943e-05,
+      "loss": 0.3438,
+      "step": 175
+    },
+    {
+      "epoch": 1.0465116279069768,
+      "grad_norm": 1.069732904434204,
+      "learning_rate": 2.3343546247146972e-05,
+      "loss": 0.3326,
+      "step": 180
+    },
+    {
+      "epoch": 1.0755813953488371,
+      "grad_norm": 0.9847567081451416,
+      "learning_rate": 2.2939724968856813e-05,
+      "loss": 0.3279,
+      "step": 185
+    },
+    {
+      "epoch": 1.1046511627906976,
+      "grad_norm": 1.0055873394012451,
+      "learning_rate": 2.2527745086446666e-05,
+      "loss": 0.2819,
+      "step": 190
+    },
+    {
+      "epoch": 1.1337209302325582,
+      "grad_norm": 0.8243905305862427,
+      "learning_rate": 2.210802993709498e-05,
+      "loss": 0.2573,
+      "step": 195
+    },
+    {
+      "epoch": 1.1627906976744187,
+      "grad_norm": 1.0015650987625122,
+      "learning_rate": 2.1681010806489887e-05,
+      "loss": 0.2651,
+      "step": 200
+    },
+    {
+      "epoch": 1.191860465116279,
+      "grad_norm": 1.1682273149490356,
+      "learning_rate": 2.1247126485653987e-05,
+      "loss": 0.266,
+      "step": 205
+    },
+    {
+      "epoch": 1.2209302325581395,
+      "grad_norm": 0.9905490279197693,
+      "learning_rate": 2.0806822820056922e-05,
+      "loss": 0.2466,
+      "step": 210
+    },
+    {
+      "epoch": 1.25,
+      "grad_norm": 1.029731273651123,
+      "learning_rate": 2.0360552251479015e-05,
+      "loss": 0.2605,
+      "step": 215
+    },
+    {
+      "epoch": 1.2790697674418605,
+      "grad_norm": 1.0552231073379517,
+      "learning_rate": 1.9908773353096754e-05,
+      "loss": 0.2483,
+      "step": 220
+    },
+    {
+      "epoch": 1.308139534883721,
+      "grad_norm": 0.9601156115531921,
+      "learning_rate": 1.9451950358267858e-05,
+      "loss": 0.2225,
+      "step": 225
+    },
+    {
+      "epoch": 1.3372093023255813,
+      "grad_norm": 0.9771324396133423,
+      "learning_rate": 1.8990552683500128e-05,
+      "loss": 0.2278,
+      "step": 230
+    },
+    {
+      "epoch": 1.3662790697674418,
+      "grad_norm": 1.280364751815796,
+      "learning_rate": 1.852505444609423e-05,
+      "loss": 0.2204,
+      "step": 235
+    },
+    {
+      "epoch": 1.3953488372093024,
+      "grad_norm": 1.026919960975647,
+      "learning_rate": 1.8055933976956134e-05,
+      "loss": 0.2072,
+      "step": 240
+    },
+    {
+      "epoch": 1.4244186046511627,
+      "grad_norm": 1.2836418151855469,
+      "learning_rate": 1.7583673329079786e-05,
+      "loss": 0.2071,
+      "step": 245
+    },
+    {
+      "epoch": 1.4534883720930232,
+      "grad_norm": 1.0000046491622925,
+      "learning_rate": 1.7108757782205044e-05,
+      "loss": 0.1866,
+      "step": 250
+    },
+    {
+      "epoch": 1.4825581395348837,
+      "grad_norm": 0.9951073527336121,
+      "learning_rate": 1.6631675344159966e-05,
+      "loss": 0.1701,
+      "step": 255
+    },
+    {
+      "epoch": 1.5116279069767442,
+      "grad_norm": 1.1564542055130005,
+      "learning_rate": 1.615291624939975e-05,
+      "loss": 0.2049,
+      "step": 260
+    },
+    {
+      "epoch": 1.5406976744186047,
+      "grad_norm": 0.952828586101532,
+      "learning_rate": 1.5672972455257726e-05,
+      "loss": 0.1791,
+      "step": 265
+    },
+    {
+      "epoch": 1.5697674418604652,
+      "grad_norm": 1.0791171789169312,
+      "learning_rate": 1.519233713642591e-05,
+      "loss": 0.156,
+      "step": 270
+    },
+    {
+      "epoch": 1.5988372093023255,
+      "grad_norm": 0.9374697208404541,
+      "learning_rate": 1.4711504178184694e-05,
+      "loss": 0.1691,
+      "step": 275
+    },
+    {
+      "epoch": 1.627906976744186,
+      "grad_norm": 0.850075900554657,
+      "learning_rate": 1.4230967668902314e-05,
+      "loss": 0.1511,
+      "step": 280
+    },
+    {
+      "epoch": 1.6569767441860463,
+      "grad_norm": 0.9074893593788147,
+      "learning_rate": 1.3751221392325662e-05,
+      "loss": 0.1438,
+      "step": 285
+    },
+    {
+      "epoch": 1.6860465116279069,
+      "grad_norm": 1.0882065296173096,
+      "learning_rate": 1.327275832018411e-05,
+      "loss": 0.1606,
+      "step": 290
+    },
+    {
+      "epoch": 1.7151162790697674,
+      "grad_norm": 0.9048277735710144,
+      "learning_rate": 1.2796070105627732e-05,
+      "loss": 0.1421,
+      "step": 295
+    },
+    {
+      "epoch": 1.744186046511628,
+      "grad_norm": 1.0059200525283813,
+      "learning_rate": 1.2321646578020452e-05,
+      "loss": 0.1357,
+      "step": 300
+    },
+    {
+      "epoch": 1.7732558139534884,
+      "grad_norm": 0.9617013335227966,
+      "learning_rate": 1.184997523960726e-05,
+      "loss": 0.1501,
+      "step": 305
+    },
+    {
+      "epoch": 1.802325581395349,
+      "grad_norm": 0.8497111201286316,
+      "learning_rate": 1.1381540764572711e-05,
+      "loss": 0.1204,
+      "step": 310
+    },
+    {
+      "epoch": 1.8313953488372094,
+      "grad_norm": 1.0311341285705566,
+      "learning_rate": 1.0916824501005434e-05,
+      "loss": 0.1329,
+      "step": 315
+    },
+    {
+      "epoch": 1.8604651162790697,
+      "grad_norm": 0.8731353282928467,
+      "learning_rate": 1.0456303976280419e-05,
+      "loss": 0.1238,
+      "step": 320
+    },
+    {
+      "epoch": 1.8895348837209303,
+      "grad_norm": 0.8247725367546082,
+      "learning_rate": 1.0000452406367376e-05,
+      "loss": 0.1193,
+      "step": 325
+    },
+    {
+      "epoch": 1.9186046511627906,
+      "grad_norm": 0.819237470626831,
+      "learning_rate": 9.54973820956933e-06,
+      "loss": 0.1043,
+      "step": 330
+    },
+    {
+      "epoch": 1.947674418604651,
+      "grad_norm": 1.0489195585250854,
+      "learning_rate": 9.104624525191147e-06,
+      "loss": 0.106,
+      "step": 335
+    },
+    {
+      "epoch": 1.9767441860465116,
+      "grad_norm": 0.9402236938476562,
+      "learning_rate": 8.665568737632606e-06,
+      "loss": 0.1141,
+      "step": 340
+    },
+    {
+      "epoch": 2.005813953488372,
+      "grad_norm": 0.7371605038642883,
+      "learning_rate": 8.23302200639498e-06,
+      "loss": 0.0995,
+      "step": 345
+    },
+    {
+      "epoch": 2.0348837209302326,
+      "grad_norm": 0.7095077633857727,
+      "learning_rate": 7.807428802484194e-06,
+      "loss": 0.0854,
+      "step": 350
+    },
+    {
+      "epoch": 2.063953488372093,
+      "grad_norm": 0.6523789167404175,
+      "learning_rate": 7.38922645168677e-06,
+      "loss": 0.0771,
+      "step": 355
+    },
+    {
+      "epoch": 2.0930232558139537,
+      "grad_norm": 0.7162448167800903,
+      "learning_rate": 6.978844685188054e-06,
+      "loss": 0.0924,
+      "step": 360
+    },
+    {
+      "epoch": 2.1220930232558137,
+      "grad_norm": 0.8413357734680176,
+      "learning_rate": 6.576705197994378e-06,
+      "loss": 0.0797,
+      "step": 365
+    },
+    {
+      "epoch": 2.1511627906976742,
+      "grad_norm": 0.7490615248680115,
+      "learning_rate": 6.1832212156129045e-06,
+      "loss": 0.0768,
+      "step": 370
+    },
+    {
+      "epoch": 2.1802325581395348,
+      "grad_norm": 0.7371830940246582,
+      "learning_rate": 5.798797069434505e-06,
+      "loss": 0.0698,
+      "step": 375
+    },
+    {
+      "epoch": 2.2093023255813953,
+      "grad_norm": 0.5536708831787109,
+      "learning_rate": 5.423827781255919e-06,
+      "loss": 0.0767,
+      "step": 380
+    },
+    {
+      "epoch": 2.238372093023256,
+      "grad_norm": 0.8051421642303467,
+      "learning_rate": 5.058698657368156e-06,
+      "loss": 0.0769,
+      "step": 385
+    },
+    {
+      "epoch": 2.2674418604651163,
+      "grad_norm": 0.8469139933586121,
+      "learning_rate": 4.7037848926282465e-06,
+      "loss": 0.0707,
+      "step": 390
+    },
+    {
+      "epoch": 2.296511627906977,
+      "grad_norm": 0.6710076332092285,
+      "learning_rate": 4.3594511849211325e-06,
+      "loss": 0.0758,
+      "step": 395
+    },
+    {
+      "epoch": 2.3255813953488373,
+      "grad_norm": 0.9827410578727722,
+      "learning_rate": 4.026051360407979e-06,
+      "loss": 0.0699,
+      "step": 400
+    },
+    {
+      "epoch": 2.354651162790698,
+      "grad_norm": 0.6657628417015076,
+      "learning_rate": 3.7039280099458373e-06,
+      "loss": 0.0698,
+      "step": 405
+    },
+    {
+      "epoch": 2.383720930232558,
+      "grad_norm": 0.6634133458137512,
+      "learning_rate": 3.393412137052397e-06,
+      "loss": 0.067,
+      "step": 410
+    },
+    {
+      "epoch": 2.4127906976744184,
+      "grad_norm": 0.629249095916748,
+      "learning_rate": 3.0948228177775166e-06,
+      "loss": 0.0686,
+      "step": 415
+    },
+    {
+      "epoch": 2.441860465116279,
+      "grad_norm": 0.6132029891014099,
+      "learning_rate": 2.808466872830961e-06,
+      "loss": 0.0713,
+      "step": 420
+    },
+    {
+      "epoch": 2.4709302325581395,
+      "grad_norm": 0.6225571036338806,
+      "learning_rate": 2.534638552303441e-06,
+      "loss": 0.0694,
+      "step": 425
+    },
+    {
+      "epoch": 2.5,
+      "grad_norm": 0.5198988318443298,
+      "learning_rate": 2.2736192333047158e-06,
+      "loss": 0.0658,
+      "step": 430
+    },
+    {
+      "epoch": 2.5290697674418605,
+      "grad_norm": 0.6030337810516357,
+      "learning_rate": 2.0256771308295974e-06,
+      "loss": 0.0728,
+      "step": 435
+    },
+    {
+      "epoch": 2.558139534883721,
+      "grad_norm": 0.6326708793640137,
+      "learning_rate": 1.79106702214893e-06,
+      "loss": 0.0694,
+      "step": 440
+    },
+    {
+      "epoch": 2.5872093023255816,
+      "grad_norm": 0.5493963360786438,
+      "learning_rate": 1.5700299850087046e-06,
+      "loss": 0.0684,
+      "step": 445
+    },
+    {
+      "epoch": 2.616279069767442,
+      "grad_norm": 0.950859010219574,
+      "learning_rate": 1.3627931499063762e-06,
+      "loss": 0.0624,
+      "step": 450
+    },
+    {
+      "epoch": 2.645348837209302,
+      "grad_norm": 0.6443171501159668,
+      "learning_rate": 1.1695694666989426e-06,
+      "loss": 0.0696,
+      "step": 455
+    },
+    {
+      "epoch": 2.6744186046511627,
+      "grad_norm": 0.5071727633476257,
+      "learning_rate": 9.905574857825533e-07,
+      "loss": 0.0649,
+      "step": 460
+    },
+    {
+      "epoch": 2.703488372093023,
+      "grad_norm": 0.6271297931671143,
+      "learning_rate": 8.25941154068563e-07,
+      "loss": 0.0645,
+      "step": 465
+    },
+    {
+      "epoch": 2.7325581395348837,
+      "grad_norm": 0.5594238638877869,
+      "learning_rate": 6.75889625965645e-07,
+      "loss": 0.0633,
+      "step": 470
+    },
+    {
+      "epoch": 2.761627906976744,
+      "grad_norm": 0.49989861249923706,
+      "learning_rate": 5.405570895622014e-07,
+      "loss": 0.0668,
+      "step": 475
+    },
+    {
+      "epoch": 2.7906976744186047,
+      "grad_norm": 0.4707631468772888,
+      "learning_rate": 4.200826081876824e-07,
+      "loss": 0.0621,
+      "step": 480
+    },
+    {
+      "epoch": 2.8197674418604652,
+      "grad_norm": 0.5463367104530334,
+      "learning_rate": 3.1458997751560694e-07,
+      "loss": 0.0655,
+      "step": 485
+    },
+    {
+      "epoch": 2.8488372093023253,
+      "grad_norm": 0.5609475374221802,
+      "learning_rate": 2.2418759835514892e-07,
+      "loss": 0.0768,
+      "step": 490
+    },
+    {
+      "epoch": 2.8779069767441863,
+      "grad_norm": 0.47383224964141846,
+      "learning_rate": 1.4896836526197277e-07,
+      "loss": 0.0606,
+      "step": 495
+    },
+    {
+      "epoch": 2.9069767441860463,
+      "grad_norm": 0.48377734422683716,
+      "learning_rate": 8.900957108279484e-08,
+      "loss": 0.0646,
+      "step": 500
+    },
+    {
+      "epoch": 2.936046511627907,
+      "grad_norm": 0.5261256098747253,
+      "learning_rate": 4.43728275317723e-08,
+      "loss": 0.06,
+      "step": 505
+    },
+    {
+      "epoch": 2.9651162790697674,
+      "grad_norm": 0.6682769656181335,
+      "learning_rate": 1.510400188028116e-08,
+      "loss": 0.0679,
+      "step": 510
+    },
+    {
+      "epoch": 2.994186046511628,
+      "grad_norm": 0.6152586340904236,
+      "learning_rate": 1.2331698252110445e-09,
+      "loss": 0.0699,
+      "step": 515
+    },
+    {
+      "epoch": 3.0,
+      "step": 516,
+      "total_flos": 6.754583228155167e+17,
+      "train_loss": 0.3721794698517336,
+      "train_runtime": 322.2859,
+      "train_samples_per_second": 51.169,
+      "train_steps_per_second": 1.601
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 516,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 20000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 6.754583228155167e+17,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

123_128_e3_3e-5/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fd5c545e551a389cbaf69f71c3168302dead4e43baa11d393d2ce41d817e7d53
+size 8273

123_128_e3_3e-5/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff