Raiff1982 commited on 9 days ago

Commit

ada1d8c

verified ·

1 Parent(s): 6e9d86f

Upload folder using huggingface_hub

Browse files

Files changed (40) hide show

.gitattributes +4 -0
consciousness/README.md +62 -0
consciousness/adapter_config.json +43 -0
consciousness/adapter_model.safetensors +3 -0
consciousness/chat_template.jinja +109 -0
consciousness/checkpoint-1000/README.md +209 -0
consciousness/checkpoint-1000/adapter_config.json +43 -0
consciousness/checkpoint-1000/adapter_model.safetensors +3 -0
consciousness/checkpoint-1000/chat_template.jinja +109 -0
consciousness/checkpoint-1000/optimizer.pt +3 -0
consciousness/checkpoint-1000/rng_state.pth +3 -0
consciousness/checkpoint-1000/scheduler.pt +3 -0
consciousness/checkpoint-1000/tokenizer.json +3 -0
consciousness/checkpoint-1000/tokenizer_config.json +14 -0
consciousness/checkpoint-1000/trainer_state.json +1034 -0
consciousness/checkpoint-1000/training_args.bin +3 -0
consciousness/checkpoint-1125/README.md +209 -0
consciousness/checkpoint-1125/adapter_config.json +43 -0
consciousness/checkpoint-1125/adapter_model.safetensors +3 -0
consciousness/checkpoint-1125/chat_template.jinja +109 -0
consciousness/checkpoint-1125/optimizer.pt +3 -0
consciousness/checkpoint-1125/rng_state.pth +3 -0
consciousness/checkpoint-1125/scheduler.pt +3 -0
consciousness/checkpoint-1125/tokenizer.json +3 -0
consciousness/checkpoint-1125/tokenizer_config.json +14 -0
consciousness/checkpoint-1125/trainer_state.json +1154 -0
consciousness/checkpoint-1125/training_args.bin +3 -0
consciousness/checkpoint-500/README.md +209 -0
consciousness/checkpoint-500/adapter_config.json +43 -0
consciousness/checkpoint-500/adapter_model.safetensors +3 -0
consciousness/checkpoint-500/chat_template.jinja +109 -0
consciousness/checkpoint-500/optimizer.pt +3 -0
consciousness/checkpoint-500/rng_state.pth +3 -0
consciousness/checkpoint-500/scheduler.pt +3 -0
consciousness/checkpoint-500/tokenizer.json +3 -0
consciousness/checkpoint-500/tokenizer_config.json +14 -0
consciousness/checkpoint-500/trainer_state.json +534 -0
consciousness/checkpoint-500/training_args.bin +3 -0
consciousness/tokenizer.json +3 -0
consciousness/tokenizer_config.json +14 -0

.gitattributes CHANGED Viewed

@@ -49,3 +49,7 @@ philosophy/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 quantum/checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 quantum/checkpoint-750/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 quantum/tokenizer.json filter=lfs diff=lfs merge=lfs -text

 quantum/checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 quantum/checkpoint-750/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 quantum/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+consciousness/checkpoint-1000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+consciousness/checkpoint-1125/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+consciousness/checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+consciousness/tokenizer.json filter=lfs diff=lfs merge=lfs -text

consciousness/README.md ADDED Viewed

	@@ -0,0 +1,62 @@

+---
+base_model: meta-llama/Llama-3.1-8B-Instruct
+library_name: peft
+model_name: consciousness
+tags:
+- base_model:adapter:meta-llama/Llama-3.1-8B-Instruct
+- lora
+- sft
+- transformers
+- trl
+licence: license
+pipeline_tag: text-generation
+---
+# Model Card for consciousness
+This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct).
+It has been trained using [TRL](https://github.com/huggingface/trl).
+## Quick start
+```python
+from transformers import pipeline
+question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
+generator = pipeline("text-generation", model="None", device="cuda")
+output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
+print(output["generated_text"])
+```
+## Training procedure
+This model was trained with SFT.
+### Framework versions
+- PEFT 0.18.1
+- TRL: 0.29.0
+- Transformers: 5.3.0
+- Pytorch: 2.10.0
+- Datasets: 4.6.1
+- Tokenizers: 0.22.2
+## Citations
+Cite TRL as:
+```bibtex
+@software{vonwerra2020trl,
+  title   = {{TRL: Transformers Reinforcement Learning}},
+  author  = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin},
+  license = {Apache-2.0},
+  url     = {https://github.com/huggingface/trl},
+  year    = {2020}
+}
+```

consciousness/adapter_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.18.1",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "k_proj",
+    "o_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

consciousness/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:66930f755168eb2cfd2ae1b754fcc51080acdc1050a221f1373d5ff234b23bb6
+size 27297544

consciousness/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,109 @@

+{{- bos_token }}
+{%- if custom_tools is defined %}
+    {%- set tools = custom_tools %}
+{%- endif %}
+{%- if not tools_in_user_message is defined %}
+    {%- set tools_in_user_message = true %}
+{%- endif %}
+{%- if not date_string is defined %}
+    {%- set date_string = "26 Jul 2024" %}
+{%- endif %}
+{%- if not tools is defined %}
+    {%- set tools = none %}
+{%- endif %}
+{#- This block extracts the system message, so we can slot it into the right place. #}
+{%- if messages[0]['role'] == 'system' %}
+    {%- set system_message = messages[0]['content']|trim %}
+    {%- set messages = messages[1:] %}
+{%- else %}
+    {%- set system_message = "" %}
+{%- endif %}
+{#- System message + builtin tools #}
+{{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
+{%- if builtin_tools is defined or tools is not none %}
+    {{- "Environment: ipython\n" }}
+{%- endif %}
+{%- if builtin_tools is defined %}
+    {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}}
+{%- endif %}
+{{- "Cutting Knowledge Date: December 2023\n" }}
+{{- "Today Date: " + date_string + "\n\n" }}
+{%- if tools is not none and not tools_in_user_message %}
+    {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
+    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
+    {{- "Do not use variables.\n\n" }}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+{%- endif %}
+{{- system_message }}
+{{- "<|eot_id|>" }}
+{#- Custom tools are passed in a user message with some extra guidance #}
+{%- if tools_in_user_message and not tools is none %}
+    {#- Extract the first user message so we can plug it in here #}
+    {%- if messages | length != 0 %}
+        {%- set first_user_message = messages[0]['content']|trim %}
+        {%- set messages = messages[1:] %}
+    {%- else %}
+        {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
+{%- endif %}
+    {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
+    {{- "Given the following functions, please respond with a JSON for a function call " }}
+    {{- "with its proper arguments that best answers the given prompt.\n\n" }}
+    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
+    {{- "Do not use variables.\n\n" }}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+    {{- first_user_message + "<|eot_id|>"}}
+{%- endif %}
+{%- for message in messages %}
+    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
+        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}
+    {%- elif 'tool_calls' in message %}
+        {%- if not message.tool_calls|length == 1 %}
+            {{- raise_exception("This model only supports single tool-calls at once!") }}
+        {%- endif %}
+        {%- set tool_call = message.tool_calls[0].function %}
+        {%- if builtin_tools is defined and tool_call.name in builtin_tools %}
+            {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
+            {{- "<|python_tag|>" + tool_call.name + ".call(" }}
+            {%- for arg_name, arg_val in tool_call.arguments | items %}
+                {{- arg_name + '="' + arg_val + '"' }}
+                {%- if not loop.last %}
+                    {{- ", " }}
+                {%- endif %}
+                {%- endfor %}
+            {{- ")" }}
+        {%- else  %}
+            {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
+            {{- '{"name": "' + tool_call.name + '", ' }}
+            {{- '"parameters": ' }}
+            {{- tool_call.arguments | tojson }}
+            {{- "}" }}
+        {%- endif %}
+        {%- if builtin_tools is defined %}
+            {#- This means we're in ipython mode #}
+            {{- "<|eom_id|>" }}
+        {%- else %}
+            {{- "<|eot_id|>" }}
+        {%- endif %}
+    {%- elif message.role == "tool" or message.role == "ipython" %}
+        {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
+        {%- if message.content is mapping or message.content is iterable %}
+            {{- message.content | tojson }}
+        {%- else %}
+            {{- message.content }}
+        {%- endif %}
+        {{- "<|eot_id|>" }}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
+{%- endif %}

consciousness/checkpoint-1000/README.md ADDED Viewed

	@@ -0,0 +1,209 @@

+---
+base_model: meta-llama/Llama-3.1-8B-Instruct
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:meta-llama/Llama-3.1-8B-Instruct
+- lora
+- sft
+- transformers
+- trl
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.18.1

consciousness/checkpoint-1000/adapter_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.18.1",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "k_proj",
+    "o_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

consciousness/checkpoint-1000/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:78d689a4a2c11ef70aa9a78685560bcb7511741f7dd40c730cfbd0688db400ca
+size 27297544

consciousness/checkpoint-1000/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,109 @@

+{{- bos_token }}
+{%- if custom_tools is defined %}
+    {%- set tools = custom_tools %}
+{%- endif %}
+{%- if not tools_in_user_message is defined %}
+    {%- set tools_in_user_message = true %}
+{%- endif %}
+{%- if not date_string is defined %}
+    {%- set date_string = "26 Jul 2024" %}
+{%- endif %}
+{%- if not tools is defined %}
+    {%- set tools = none %}
+{%- endif %}
+{#- This block extracts the system message, so we can slot it into the right place. #}
+{%- if messages[0]['role'] == 'system' %}
+    {%- set system_message = messages[0]['content']|trim %}
+    {%- set messages = messages[1:] %}
+{%- else %}
+    {%- set system_message = "" %}
+{%- endif %}
+{#- System message + builtin tools #}
+{{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
+{%- if builtin_tools is defined or tools is not none %}
+    {{- "Environment: ipython\n" }}
+{%- endif %}
+{%- if builtin_tools is defined %}
+    {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}}
+{%- endif %}
+{{- "Cutting Knowledge Date: December 2023\n" }}
+{{- "Today Date: " + date_string + "\n\n" }}
+{%- if tools is not none and not tools_in_user_message %}
+    {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
+    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
+    {{- "Do not use variables.\n\n" }}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+{%- endif %}
+{{- system_message }}
+{{- "<|eot_id|>" }}
+{#- Custom tools are passed in a user message with some extra guidance #}
+{%- if tools_in_user_message and not tools is none %}
+    {#- Extract the first user message so we can plug it in here #}
+    {%- if messages | length != 0 %}
+        {%- set first_user_message = messages[0]['content']|trim %}
+        {%- set messages = messages[1:] %}
+    {%- else %}
+        {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
+{%- endif %}
+    {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
+    {{- "Given the following functions, please respond with a JSON for a function call " }}
+    {{- "with its proper arguments that best answers the given prompt.\n\n" }}
+    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
+    {{- "Do not use variables.\n\n" }}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+    {{- first_user_message + "<|eot_id|>"}}
+{%- endif %}
+{%- for message in messages %}
+    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
+        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}
+    {%- elif 'tool_calls' in message %}
+        {%- if not message.tool_calls|length == 1 %}
+            {{- raise_exception("This model only supports single tool-calls at once!") }}
+        {%- endif %}
+        {%- set tool_call = message.tool_calls[0].function %}
+        {%- if builtin_tools is defined and tool_call.name in builtin_tools %}
+            {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
+            {{- "<|python_tag|>" + tool_call.name + ".call(" }}
+            {%- for arg_name, arg_val in tool_call.arguments | items %}
+                {{- arg_name + '="' + arg_val + '"' }}
+                {%- if not loop.last %}
+                    {{- ", " }}
+                {%- endif %}
+                {%- endfor %}
+            {{- ")" }}
+        {%- else  %}
+            {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
+            {{- '{"name": "' + tool_call.name + '", ' }}
+            {{- '"parameters": ' }}
+            {{- tool_call.arguments | tojson }}
+            {{- "}" }}
+        {%- endif %}
+        {%- if builtin_tools is defined %}
+            {#- This means we're in ipython mode #}
+            {{- "<|eom_id|>" }}
+        {%- else %}
+            {{- "<|eot_id|>" }}
+        {%- endif %}
+    {%- elif message.role == "tool" or message.role == "ipython" %}
+        {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
+        {%- if message.content is mapping or message.content is iterable %}
+            {{- message.content | tojson }}
+        {%- else %}
+            {{- message.content }}
+        {%- endif %}
+        {{- "<|eot_id|>" }}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
+{%- endif %}

consciousness/checkpoint-1000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3905673d0212fe2aa3ed16181c33da0e6644dbcab057c5b331bfde9b981f9b09
+size 54745547

consciousness/checkpoint-1000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:20a0e97a0b18c6c63524b5e2d5d036565a39ab9e6e15437df6c458f81b3b9ce7
+size 14645

consciousness/checkpoint-1000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ede3d2a514005ed80690b07770eb75aab9fd0b335517babd631dfbc1716d09fd
+size 1465

consciousness/checkpoint-1000/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
+size 17209920

consciousness/checkpoint-1000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "backend": "tokenizers",
+  "bos_token": "<|begin_of_text|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|eot_id|>",
+  "is_local": false,
+  "model_input_names": [
+    "input_ids",
+    "attention_mask"
+  ],
+  "model_max_length": 131072,
+  "pad_token": "<|eot_id|>",
+  "tokenizer_class": "TokenizersBackend"
+}

consciousness/checkpoint-1000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,1034 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.6666666666666665,
+  "eval_steps": 500,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 2.8901569664478304,
+      "epoch": 0.02666666666666667,
+      "grad_norm": 0.267578125,
+      "learning_rate": 5.294117647058824e-05,
+      "loss": 2.9965847015380858,
+      "mean_token_accuracy": 0.4477638125419617,
+      "num_tokens": 57327.0,
+      "step": 10
+    },
+    {
+      "entropy": 2.431850343942642,
+      "epoch": 0.05333333333333334,
+      "grad_norm": 0.255859375,
+      "learning_rate": 0.00011176470588235294,
+      "loss": 2.636348533630371,
+      "mean_token_accuracy": 0.4853471860289574,
+      "num_tokens": 115513.0,
+      "step": 20
+    },
+    {
+      "entropy": 2.0503339916467667,
+      "epoch": 0.08,
+      "grad_norm": 0.2734375,
+      "learning_rate": 0.00017058823529411766,
+      "loss": 1.9743734359741212,
+      "mean_token_accuracy": 0.5749435268342495,
+      "num_tokens": 173246.0,
+      "step": 30
+    },
+    {
+      "entropy": 1.4122730612754821,
+      "epoch": 0.10666666666666667,
+      "grad_norm": 0.357421875,
+      "learning_rate": 0.00019908340971585702,
+      "loss": 1.3748690605163574,
+      "mean_token_accuracy": 0.6867235794663429,
+      "num_tokens": 231278.0,
+      "step": 40
+    },
+    {
+      "entropy": 1.0256530404090882,
+      "epoch": 0.13333333333333333,
+      "grad_norm": 0.359375,
+      "learning_rate": 0.00019725022914757106,
+      "loss": 0.9704485893249511,
+      "mean_token_accuracy": 0.7766638442873954,
+      "num_tokens": 288393.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.7456447497010231,
+      "epoch": 0.16,
+      "grad_norm": 0.328125,
+      "learning_rate": 0.00019541704857928507,
+      "loss": 0.6671554565429687,
+      "mean_token_accuracy": 0.845753838121891,
+      "num_tokens": 346339.0,
+      "step": 60
+    },
+    {
+      "entropy": 0.537605831772089,
+      "epoch": 0.18666666666666668,
+      "grad_norm": 0.328125,
+      "learning_rate": 0.00019358386801099912,
+      "loss": 0.4658506393432617,
+      "mean_token_accuracy": 0.8909643113613128,
+      "num_tokens": 404015.0,
+      "step": 70
+    },
+    {
+      "entropy": 0.37200461626052855,
+      "epoch": 0.21333333333333335,
+      "grad_norm": 0.283203125,
+      "learning_rate": 0.0001917506874427131,
+      "loss": 0.3085629940032959,
+      "mean_token_accuracy": 0.9280218213796616,
+      "num_tokens": 461115.0,
+      "step": 80
+    },
+    {
+      "entropy": 0.2607572071254253,
+      "epoch": 0.24,
+      "grad_norm": 0.267578125,
+      "learning_rate": 0.00018991750687442712,
+      "loss": 0.21192119121551514,
+      "mean_token_accuracy": 0.9511988922953606,
+      "num_tokens": 518926.0,
+      "step": 90
+    },
+    {
+      "entropy": 0.21099306046962737,
+      "epoch": 0.26666666666666666,
+      "grad_norm": 0.2021484375,
+      "learning_rate": 0.00018808432630614116,
+      "loss": 0.1747212290763855,
+      "mean_token_accuracy": 0.9576459184288979,
+      "num_tokens": 577120.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.1670930277556181,
+      "epoch": 0.29333333333333333,
+      "grad_norm": 0.2431640625,
+      "learning_rate": 0.00018625114573785518,
+      "loss": 0.14222853183746337,
+      "mean_token_accuracy": 0.9636133790016175,
+      "num_tokens": 635382.0,
+      "step": 110
+    },
+    {
+      "entropy": 0.15717535726726056,
+      "epoch": 0.32,
+      "grad_norm": 0.12890625,
+      "learning_rate": 0.00018441796516956922,
+      "loss": 0.13076614141464232,
+      "mean_token_accuracy": 0.9650501102209091,
+      "num_tokens": 692813.0,
+      "step": 120
+    },
+    {
+      "entropy": 0.14444066677242517,
+      "epoch": 0.3466666666666667,
+      "grad_norm": 0.1640625,
+      "learning_rate": 0.00018258478460128323,
+      "loss": 0.11674572229385376,
+      "mean_token_accuracy": 0.9665236040949822,
+      "num_tokens": 750815.0,
+      "step": 130
+    },
+    {
+      "entropy": 0.1316229362040758,
+      "epoch": 0.37333333333333335,
+      "grad_norm": 0.177734375,
+      "learning_rate": 0.00018075160403299728,
+      "loss": 0.10633546113967896,
+      "mean_token_accuracy": 0.9686767488718033,
+      "num_tokens": 809607.0,
+      "step": 140
+    },
+    {
+      "entropy": 0.12511782981455327,
+      "epoch": 0.4,
+      "grad_norm": 0.1103515625,
+      "learning_rate": 0.0001789184234647113,
+      "loss": 0.10267382860183716,
+      "mean_token_accuracy": 0.9692023977637291,
+      "num_tokens": 867374.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.1147347992286086,
+      "epoch": 0.4266666666666667,
+      "grad_norm": 0.10302734375,
+      "learning_rate": 0.0001770852428964253,
+      "loss": 0.09604376554489136,
+      "mean_token_accuracy": 0.9704543471336364,
+      "num_tokens": 925873.0,
+      "step": 160
+    },
+    {
+      "entropy": 0.11136603765189648,
+      "epoch": 0.4533333333333333,
+      "grad_norm": 0.12158203125,
+      "learning_rate": 0.00017525206232813932,
+      "loss": 0.09679205417633056,
+      "mean_token_accuracy": 0.9701795622706413,
+      "num_tokens": 983653.0,
+      "step": 170
+    },
+    {
+      "entropy": 0.10962173249572515,
+      "epoch": 0.48,
+      "grad_norm": 0.10986328125,
+      "learning_rate": 0.00017341888175985334,
+      "loss": 0.09156813025474549,
+      "mean_token_accuracy": 0.971354915201664,
+      "num_tokens": 1041996.0,
+      "step": 180
+    },
+    {
+      "entropy": 0.10233879294246435,
+      "epoch": 0.5066666666666667,
+      "grad_norm": 0.2431640625,
+      "learning_rate": 0.00017158570119156738,
+      "loss": 0.09264941811561585,
+      "mean_token_accuracy": 0.9716016605496407,
+      "num_tokens": 1100105.0,
+      "step": 190
+    },
+    {
+      "entropy": 0.10874027330428362,
+      "epoch": 0.5333333333333333,
+      "grad_norm": 0.103515625,
+      "learning_rate": 0.0001697525206232814,
+      "loss": 0.09393113255500793,
+      "mean_token_accuracy": 0.9707169815897941,
+      "num_tokens": 1157940.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.10715384036302567,
+      "epoch": 0.56,
+      "grad_norm": 0.0927734375,
+      "learning_rate": 0.00016791934005499544,
+      "loss": 0.08979941606521606,
+      "mean_token_accuracy": 0.9710627257823944,
+      "num_tokens": 1216048.0,
+      "step": 210
+    },
+    {
+      "entropy": 0.09897389095276594,
+      "epoch": 0.5866666666666667,
+      "grad_norm": 0.1005859375,
+      "learning_rate": 0.00016608615948670945,
+      "loss": 0.08646941781044007,
+      "mean_token_accuracy": 0.9712389498949051,
+      "num_tokens": 1274357.0,
+      "step": 220
+    },
+    {
+      "entropy": 0.09603469483554364,
+      "epoch": 0.6133333333333333,
+      "grad_norm": 0.09716796875,
+      "learning_rate": 0.0001642529789184235,
+      "loss": 0.08556437492370605,
+      "mean_token_accuracy": 0.9711127072572708,
+      "num_tokens": 1332152.0,
+      "step": 230
+    },
+    {
+      "entropy": 0.09263445399701595,
+      "epoch": 0.64,
+      "grad_norm": 0.1396484375,
+      "learning_rate": 0.0001624197983501375,
+      "loss": 0.08339133858680725,
+      "mean_token_accuracy": 0.9719239071011543,
+      "num_tokens": 1389574.0,
+      "step": 240
+    },
+    {
+      "entropy": 0.09686502479016781,
+      "epoch": 0.6666666666666666,
+      "grad_norm": 0.0712890625,
+      "learning_rate": 0.00016058661778185152,
+      "loss": 0.08281562328338624,
+      "mean_token_accuracy": 0.9720177337527275,
+      "num_tokens": 1447409.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.08692479655146598,
+      "epoch": 0.6933333333333334,
+      "grad_norm": 0.0830078125,
+      "learning_rate": 0.00015875343721356554,
+      "loss": 0.08078550696372985,
+      "mean_token_accuracy": 0.9726089149713516,
+      "num_tokens": 1506058.0,
+      "step": 260
+    },
+    {
+      "entropy": 0.09034751150757074,
+      "epoch": 0.72,
+      "grad_norm": 0.08935546875,
+      "learning_rate": 0.00015692025664527955,
+      "loss": 0.08023000955581665,
+      "mean_token_accuracy": 0.9725215956568718,
+      "num_tokens": 1563817.0,
+      "step": 270
+    },
+    {
+      "entropy": 0.08724061641842126,
+      "epoch": 0.7466666666666667,
+      "grad_norm": 0.09423828125,
+      "learning_rate": 0.0001550870760769936,
+      "loss": 0.0812032699584961,
+      "mean_token_accuracy": 0.9722012594342232,
+      "num_tokens": 1621737.0,
+      "step": 280
+    },
+    {
+      "entropy": 0.08758355937898159,
+      "epoch": 0.7733333333333333,
+      "grad_norm": 0.2099609375,
+      "learning_rate": 0.0001532538955087076,
+      "loss": 0.08142906427383423,
+      "mean_token_accuracy": 0.9718389093875885,
+      "num_tokens": 1679970.0,
+      "step": 290
+    },
+    {
+      "entropy": 0.08803936429321765,
+      "epoch": 0.8,
+      "grad_norm": 0.12451171875,
+      "learning_rate": 0.00015142071494042165,
+      "loss": 0.0806335985660553,
+      "mean_token_accuracy": 0.9723069176077843,
+      "num_tokens": 1738304.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.0896342158317566,
+      "epoch": 0.8266666666666667,
+      "grad_norm": 0.059814453125,
+      "learning_rate": 0.00014958753437213567,
+      "loss": 0.08014391660690308,
+      "mean_token_accuracy": 0.9721709281206131,
+      "num_tokens": 1795881.0,
+      "step": 310
+    },
+    {
+      "entropy": 0.08054284229874611,
+      "epoch": 0.8533333333333334,
+      "grad_norm": 0.09033203125,
+      "learning_rate": 0.00014775435380384968,
+      "loss": 0.07684423327445984,
+      "mean_token_accuracy": 0.9731693744659424,
+      "num_tokens": 1854853.0,
+      "step": 320
+    },
+    {
+      "entropy": 0.0840398171916604,
+      "epoch": 0.88,
+      "grad_norm": 0.05224609375,
+      "learning_rate": 0.00014592117323556373,
+      "loss": 0.07634277939796448,
+      "mean_token_accuracy": 0.9732364892959595,
+      "num_tokens": 1912939.0,
+      "step": 330
+    },
+    {
+      "entropy": 0.08260406106710434,
+      "epoch": 0.9066666666666666,
+      "grad_norm": 0.072265625,
+      "learning_rate": 0.00014408799266727771,
+      "loss": 0.076292884349823,
+      "mean_token_accuracy": 0.9736541777849197,
+      "num_tokens": 1971345.0,
+      "step": 340
+    },
+    {
+      "entropy": 0.08077720124274493,
+      "epoch": 0.9333333333333333,
+      "grad_norm": 0.057373046875,
+      "learning_rate": 0.00014225481209899176,
+      "loss": 0.07518362402915954,
+      "mean_token_accuracy": 0.9735523566603661,
+      "num_tokens": 2029618.0,
+      "step": 350
+    },
+    {
+      "entropy": 0.0814354794099927,
+      "epoch": 0.96,
+      "grad_norm": 0.087890625,
+      "learning_rate": 0.00014042163153070577,
+      "loss": 0.07500824928283692,
+      "mean_token_accuracy": 0.9733900666236878,
+      "num_tokens": 2088198.0,
+      "step": 360
+    },
+    {
+      "entropy": 0.08081495910882949,
+      "epoch": 0.9866666666666667,
+      "grad_norm": 0.05810546875,
+      "learning_rate": 0.0001385884509624198,
+      "loss": 0.07559239268302917,
+      "mean_token_accuracy": 0.9732825100421906,
+      "num_tokens": 2145822.0,
+      "step": 370
+    },
+    {
+      "entropy": 0.08157326076179743,
+      "epoch": 1.0133333333333334,
+      "grad_norm": 0.058837890625,
+      "learning_rate": 0.00013675527039413383,
+      "loss": 0.07452890872955323,
+      "mean_token_accuracy": 0.9733605772256851,
+      "num_tokens": 2203248.0,
+      "step": 380
+    },
+    {
+      "entropy": 0.07517405189573764,
+      "epoch": 1.04,
+      "grad_norm": 0.087890625,
+      "learning_rate": 0.00013492208982584784,
+      "loss": 0.07157951593399048,
+      "mean_token_accuracy": 0.9741677790880203,
+      "num_tokens": 2261444.0,
+      "step": 390
+    },
+    {
+      "entropy": 0.07766247931867838,
+      "epoch": 1.0666666666666667,
+      "grad_norm": 0.060302734375,
+      "learning_rate": 0.00013308890925756189,
+      "loss": 0.07183201909065247,
+      "mean_token_accuracy": 0.9740341395139694,
+      "num_tokens": 2319551.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.07695812471210957,
+      "epoch": 1.0933333333333333,
+      "grad_norm": 0.054443359375,
+      "learning_rate": 0.0001312557286892759,
+      "loss": 0.07332680225372315,
+      "mean_token_accuracy": 0.9733265534043312,
+      "num_tokens": 2377422.0,
+      "step": 410
+    },
+    {
+      "entropy": 0.07884457465261221,
+      "epoch": 1.12,
+      "grad_norm": 0.060791015625,
+      "learning_rate": 0.00012942254812098992,
+      "loss": 0.07316585779190063,
+      "mean_token_accuracy": 0.973577855527401,
+      "num_tokens": 2435382.0,
+      "step": 420
+    },
+    {
+      "entropy": 0.07917917389422655,
+      "epoch": 1.1466666666666667,
+      "grad_norm": 0.0771484375,
+      "learning_rate": 0.00012758936755270393,
+      "loss": 0.07189081907272339,
+      "mean_token_accuracy": 0.9741694211959839,
+      "num_tokens": 2493780.0,
+      "step": 430
+    },
+    {
+      "entropy": 0.07554319184273481,
+      "epoch": 1.1733333333333333,
+      "grad_norm": 0.126953125,
+      "learning_rate": 0.00012575618698441797,
+      "loss": 0.07339509725570678,
+      "mean_token_accuracy": 0.9734218120574951,
+      "num_tokens": 2551587.0,
+      "step": 440
+    },
+    {
+      "entropy": 0.0771414702758193,
+      "epoch": 1.2,
+      "grad_norm": 0.052490234375,
+      "learning_rate": 0.000123923006416132,
+      "loss": 0.07223436832427979,
+      "mean_token_accuracy": 0.9731230854988098,
+      "num_tokens": 2609738.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.07702515590935946,
+      "epoch": 1.2266666666666666,
+      "grad_norm": 0.05078125,
+      "learning_rate": 0.00012208982584784603,
+      "loss": 0.07126941084861756,
+      "mean_token_accuracy": 0.9743727937340736,
+      "num_tokens": 2667570.0,
+      "step": 460
+    },
+    {
+      "entropy": 0.0751312056556344,
+      "epoch": 1.2533333333333334,
+      "grad_norm": 0.052490234375,
+      "learning_rate": 0.00012025664527956005,
+      "loss": 0.07185030579566956,
+      "mean_token_accuracy": 0.9739165529608727,
+      "num_tokens": 2725220.0,
+      "step": 470
+    },
+    {
+      "entropy": 0.07488212268799543,
+      "epoch": 1.28,
+      "grad_norm": 0.0517578125,
+      "learning_rate": 0.00011842346471127406,
+      "loss": 0.07148469686508178,
+      "mean_token_accuracy": 0.973349143564701,
+      "num_tokens": 2782737.0,
+      "step": 480
+    },
+    {
+      "entropy": 0.07398118702694774,
+      "epoch": 1.3066666666666666,
+      "grad_norm": 0.044677734375,
+      "learning_rate": 0.00011659028414298809,
+      "loss": 0.06911076903343201,
+      "mean_token_accuracy": 0.9739873677492141,
+      "num_tokens": 2841536.0,
+      "step": 490
+    },
+    {
+      "entropy": 0.07522298116236925,
+      "epoch": 1.3333333333333333,
+      "grad_norm": 0.05908203125,
+      "learning_rate": 0.0001147571035747021,
+      "loss": 0.06964495182037353,
+      "mean_token_accuracy": 0.9739961415529251,
+      "num_tokens": 2899599.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.07469812557101249,
+      "epoch": 1.3599999999999999,
+      "grad_norm": 0.134765625,
+      "learning_rate": 0.00011292392300641615,
+      "loss": 0.0720504343509674,
+      "mean_token_accuracy": 0.973115186393261,
+      "num_tokens": 2956977.0,
+      "step": 510
+    },
+    {
+      "entropy": 0.07730768620967865,
+      "epoch": 1.3866666666666667,
+      "grad_norm": 0.0966796875,
+      "learning_rate": 0.00011109074243813016,
+      "loss": 0.07261049151420593,
+      "mean_token_accuracy": 0.9735311016440391,
+      "num_tokens": 3014397.0,
+      "step": 520
+    },
+    {
+      "entropy": 0.07650617882609367,
+      "epoch": 1.4133333333333333,
+      "grad_norm": 0.059814453125,
+      "learning_rate": 0.00010925756186984419,
+      "loss": 0.07154079675674438,
+      "mean_token_accuracy": 0.9735050886869431,
+      "num_tokens": 3072014.0,
+      "step": 530
+    },
+    {
+      "entropy": 0.07418479155749083,
+      "epoch": 1.44,
+      "grad_norm": 0.08447265625,
+      "learning_rate": 0.0001074243813015582,
+      "loss": 0.07014204263687134,
+      "mean_token_accuracy": 0.9742872670292855,
+      "num_tokens": 3129542.0,
+      "step": 540
+    },
+    {
+      "entropy": 0.07493350077420473,
+      "epoch": 1.4666666666666668,
+      "grad_norm": 0.06396484375,
+      "learning_rate": 0.00010559120073327222,
+      "loss": 0.0693817377090454,
+      "mean_token_accuracy": 0.9750317439436913,
+      "num_tokens": 3188756.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.07290575439110399,
+      "epoch": 1.4933333333333334,
+      "grad_norm": 0.0634765625,
+      "learning_rate": 0.00010375802016498626,
+      "loss": 0.06914764046669006,
+      "mean_token_accuracy": 0.9740337684750557,
+      "num_tokens": 3246755.0,
+      "step": 560
+    },
+    {
+      "entropy": 0.07299449313431979,
+      "epoch": 1.52,
+      "grad_norm": 0.056396484375,
+      "learning_rate": 0.00010192483959670028,
+      "loss": 0.06928544640541076,
+      "mean_token_accuracy": 0.9739615619182587,
+      "num_tokens": 3304396.0,
+      "step": 570
+    },
+    {
+      "entropy": 0.07399061964824796,
+      "epoch": 1.5466666666666666,
+      "grad_norm": 0.049560546875,
+      "learning_rate": 0.0001000916590284143,
+      "loss": 0.07088688611984253,
+      "mean_token_accuracy": 0.9734108299016953,
+      "num_tokens": 3361947.0,
+      "step": 580
+    },
+    {
+      "entropy": 0.07375452127307654,
+      "epoch": 1.5733333333333333,
+      "grad_norm": 0.056884765625,
+      "learning_rate": 9.825847846012832e-05,
+      "loss": 0.06905483603477477,
+      "mean_token_accuracy": 0.9750793874263763,
+      "num_tokens": 3420088.0,
+      "step": 590
+    },
+    {
+      "entropy": 0.0736119981855154,
+      "epoch": 1.6,
+      "grad_norm": 0.07958984375,
+      "learning_rate": 9.642529789184235e-05,
+      "loss": 0.06965676546096802,
+      "mean_token_accuracy": 0.9740386828780174,
+      "num_tokens": 3478106.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.07634057383984327,
+      "epoch": 1.6266666666666667,
+      "grad_norm": 0.045166015625,
+      "learning_rate": 9.459211732355638e-05,
+      "loss": 0.07061071991920471,
+      "mean_token_accuracy": 0.9740972384810448,
+      "num_tokens": 3535104.0,
+      "step": 610
+    },
+    {
+      "entropy": 0.07195411194115878,
+      "epoch": 1.6533333333333333,
+      "grad_norm": 0.061279296875,
+      "learning_rate": 9.27589367552704e-05,
+      "loss": 0.06899864077568055,
+      "mean_token_accuracy": 0.9742538690567016,
+      "num_tokens": 3592771.0,
+      "step": 620
+    },
+    {
+      "entropy": 0.07130216900259256,
+      "epoch": 1.6800000000000002,
+      "grad_norm": 0.05322265625,
+      "learning_rate": 9.092575618698442e-05,
+      "loss": 0.06816592216491699,
+      "mean_token_accuracy": 0.9751273840665817,
+      "num_tokens": 3651210.0,
+      "step": 630
+    },
+    {
+      "entropy": 0.07459970507770777,
+      "epoch": 1.7066666666666666,
+      "grad_norm": 0.08154296875,
+      "learning_rate": 8.909257561869845e-05,
+      "loss": 0.06940353512763978,
+      "mean_token_accuracy": 0.9741591110825538,
+      "num_tokens": 3709024.0,
+      "step": 640
+    },
+    {
+      "entropy": 0.07277811467647552,
+      "epoch": 1.7333333333333334,
+      "grad_norm": 0.06298828125,
+      "learning_rate": 8.725939505041248e-05,
+      "loss": 0.06912165284156799,
+      "mean_token_accuracy": 0.9736517399549485,
+      "num_tokens": 3766449.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.07142239715903997,
+      "epoch": 1.76,
+      "grad_norm": 0.053466796875,
+      "learning_rate": 8.54262144821265e-05,
+      "loss": 0.0682906985282898,
+      "mean_token_accuracy": 0.9748834028840065,
+      "num_tokens": 3824090.0,
+      "step": 660
+    },
+    {
+      "entropy": 0.07114961184561253,
+      "epoch": 1.7866666666666666,
+      "grad_norm": 0.048828125,
+      "learning_rate": 8.359303391384051e-05,
+      "loss": 0.06772947311401367,
+      "mean_token_accuracy": 0.9746391758322716,
+      "num_tokens": 3882786.0,
+      "step": 670
+    },
+    {
+      "entropy": 0.07262304350733757,
+      "epoch": 1.8133333333333335,
+      "grad_norm": 0.0849609375,
+      "learning_rate": 8.175985334555454e-05,
+      "loss": 0.06904927492141724,
+      "mean_token_accuracy": 0.9745182231068611,
+      "num_tokens": 3940648.0,
+      "step": 680
+    },
+    {
+      "entropy": 0.07172201108187437,
+      "epoch": 1.8399999999999999,
+      "grad_norm": 0.08984375,
+      "learning_rate": 7.992667277726857e-05,
+      "loss": 0.0677194595336914,
+      "mean_token_accuracy": 0.974525648355484,
+      "num_tokens": 3998798.0,
+      "step": 690
+    },
+    {
+      "entropy": 0.07090398538857698,
+      "epoch": 1.8666666666666667,
+      "grad_norm": 0.051025390625,
+      "learning_rate": 7.809349220898258e-05,
+      "loss": 0.06749570369720459,
+      "mean_token_accuracy": 0.9741235420107841,
+      "num_tokens": 4056793.0,
+      "step": 700
+    },
+    {
+      "entropy": 0.07037429772317409,
+      "epoch": 1.8933333333333333,
+      "grad_norm": 0.053955078125,
+      "learning_rate": 7.626031164069661e-05,
+      "loss": 0.06616277694702148,
+      "mean_token_accuracy": 0.9749814510345459,
+      "num_tokens": 4115690.0,
+      "step": 710
+    },
+    {
+      "entropy": 0.06948063550516963,
+      "epoch": 1.92,
+      "grad_norm": 0.0810546875,
+      "learning_rate": 7.442713107241064e-05,
+      "loss": 0.06898298859596252,
+      "mean_token_accuracy": 0.9742853432893753,
+      "num_tokens": 4173841.0,
+      "step": 720
+    },
+    {
+      "entropy": 0.07196591291576623,
+      "epoch": 1.9466666666666668,
+      "grad_norm": 0.047607421875,
+      "learning_rate": 7.259395050412467e-05,
+      "loss": 0.06768189072608947,
+      "mean_token_accuracy": 0.9750556230545044,
+      "num_tokens": 4232146.0,
+      "step": 730
+    },
+    {
+      "entropy": 0.07190824458375573,
+      "epoch": 1.9733333333333334,
+      "grad_norm": 0.07275390625,
+      "learning_rate": 7.076076993583868e-05,
+      "loss": 0.06733205318450927,
+      "mean_token_accuracy": 0.9746471583843231,
+      "num_tokens": 4290020.0,
+      "step": 740
+    },
+    {
+      "entropy": 0.07098262775689364,
+      "epoch": 2.0,
+      "grad_norm": 0.046630859375,
+      "learning_rate": 6.89275893675527e-05,
+      "loss": 0.06668331623077392,
+      "mean_token_accuracy": 0.9744203120470047,
+      "num_tokens": 4348788.0,
+      "step": 750
+    },
+    {
+      "entropy": 0.07027366831898689,
+      "epoch": 2.026666666666667,
+      "grad_norm": 0.04931640625,
+      "learning_rate": 6.709440879926673e-05,
+      "loss": 0.06592612862586975,
+      "mean_token_accuracy": 0.974977059662342,
+      "num_tokens": 4406426.0,
+      "step": 760
+    },
+    {
+      "entropy": 0.06931058187037706,
+      "epoch": 2.0533333333333332,
+      "grad_norm": 0.05615234375,
+      "learning_rate": 6.526122823098076e-05,
+      "loss": 0.06590970754623413,
+      "mean_token_accuracy": 0.9754465237259865,
+      "num_tokens": 4464145.0,
+      "step": 770
+    },
+    {
+      "entropy": 0.06888462873175741,
+      "epoch": 2.08,
+      "grad_norm": 0.0771484375,
+      "learning_rate": 6.342804766269478e-05,
+      "loss": 0.06574443578720093,
+      "mean_token_accuracy": 0.9753611847758293,
+      "num_tokens": 4522337.0,
+      "step": 780
+    },
+    {
+      "entropy": 0.06854705391451717,
+      "epoch": 2.1066666666666665,
+      "grad_norm": 0.048583984375,
+      "learning_rate": 6.15948670944088e-05,
+      "loss": 0.06521100401878357,
+      "mean_token_accuracy": 0.9753493323922158,
+      "num_tokens": 4580367.0,
+      "step": 790
+    },
+    {
+      "entropy": 0.07078330684453249,
+      "epoch": 2.1333333333333333,
+      "grad_norm": 0.05615234375,
+      "learning_rate": 5.976168652612283e-05,
+      "loss": 0.06622718572616577,
+      "mean_token_accuracy": 0.9753074139356613,
+      "num_tokens": 4637987.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.06828645439818501,
+      "epoch": 2.16,
+      "grad_norm": 0.05322265625,
+      "learning_rate": 5.792850595783685e-05,
+      "loss": 0.06564919948577881,
+      "mean_token_accuracy": 0.9755483835935592,
+      "num_tokens": 4695886.0,
+      "step": 810
+    },
+    {
+      "entropy": 0.06908007161691784,
+      "epoch": 2.1866666666666665,
+      "grad_norm": 0.053955078125,
+      "learning_rate": 5.6095325389550866e-05,
+      "loss": 0.06564045548439026,
+      "mean_token_accuracy": 0.9750929772853851,
+      "num_tokens": 4753433.0,
+      "step": 820
+    },
+    {
+      "entropy": 0.0687640338204801,
+      "epoch": 2.2133333333333334,
+      "grad_norm": 0.052001953125,
+      "learning_rate": 5.4262144821264894e-05,
+      "loss": 0.06568140983581543,
+      "mean_token_accuracy": 0.9756649106740951,
+      "num_tokens": 4811459.0,
+      "step": 830
+    },
+    {
+      "entropy": 0.06845789151266217,
+      "epoch": 2.24,
+      "grad_norm": 0.05322265625,
+      "learning_rate": 5.2428964252978916e-05,
+      "loss": 0.0644676923751831,
+      "mean_token_accuracy": 0.975266519188881,
+      "num_tokens": 4870185.0,
+      "step": 840
+    },
+    {
+      "entropy": 0.06863211318850518,
+      "epoch": 2.2666666666666666,
+      "grad_norm": 0.04541015625,
+      "learning_rate": 5.0595783684692945e-05,
+      "loss": 0.06450478434562683,
+      "mean_token_accuracy": 0.9765662357211113,
+      "num_tokens": 4928705.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.06972924629226326,
+      "epoch": 2.2933333333333334,
+      "grad_norm": 0.052001953125,
+      "learning_rate": 4.876260311640697e-05,
+      "loss": 0.06663312911987304,
+      "mean_token_accuracy": 0.974696435034275,
+      "num_tokens": 4985443.0,
+      "step": 860
+    },
+    {
+      "entropy": 0.06926036775112152,
+      "epoch": 2.32,
+      "grad_norm": 0.050537109375,
+      "learning_rate": 4.6929422548120995e-05,
+      "loss": 0.06713547110557556,
+      "mean_token_accuracy": 0.974915811419487,
+      "num_tokens": 5042822.0,
+      "step": 870
+    },
+    {
+      "entropy": 0.07052302733063698,
+      "epoch": 2.3466666666666667,
+      "grad_norm": 0.0634765625,
+      "learning_rate": 4.509624197983501e-05,
+      "loss": 0.06567599177360535,
+      "mean_token_accuracy": 0.9748982191085815,
+      "num_tokens": 5100175.0,
+      "step": 880
+    },
+    {
+      "entropy": 0.06812258837744593,
+      "epoch": 2.3733333333333335,
+      "grad_norm": 0.046875,
+      "learning_rate": 4.326306141154904e-05,
+      "loss": 0.06469246745109558,
+      "mean_token_accuracy": 0.9756363064050675,
+      "num_tokens": 5158712.0,
+      "step": 890
+    },
+    {
+      "entropy": 0.06816195128485561,
+      "epoch": 2.4,
+      "grad_norm": 0.052734375,
+      "learning_rate": 4.142988084326306e-05,
+      "loss": 0.06546497344970703,
+      "mean_token_accuracy": 0.9757738158106803,
+      "num_tokens": 5217131.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.069792415574193,
+      "epoch": 2.4266666666666667,
+      "grad_norm": 0.05078125,
+      "learning_rate": 3.959670027497709e-05,
+      "loss": 0.0649182915687561,
+      "mean_token_accuracy": 0.9754065230488778,
+      "num_tokens": 5275013.0,
+      "step": 910
+    },
+    {
+      "entropy": 0.06883814567700028,
+      "epoch": 2.453333333333333,
+      "grad_norm": 0.056640625,
+      "learning_rate": 3.776351970669111e-05,
+      "loss": 0.06510300636291504,
+      "mean_token_accuracy": 0.9752438068389893,
+      "num_tokens": 5332790.0,
+      "step": 920
+    },
+    {
+      "entropy": 0.06875044060871005,
+      "epoch": 2.48,
+      "grad_norm": 0.049072265625,
+      "learning_rate": 3.593033913840513e-05,
+      "loss": 0.06483979225158691,
+      "mean_token_accuracy": 0.9757223874330521,
+      "num_tokens": 5390822.0,
+      "step": 930
+    },
+    {
+      "entropy": 0.06840683752670884,
+      "epoch": 2.506666666666667,
+      "grad_norm": 0.06005859375,
+      "learning_rate": 3.409715857011916e-05,
+      "loss": 0.06430425643920898,
+      "mean_token_accuracy": 0.9757388934493065,
+      "num_tokens": 5449491.0,
+      "step": 940
+    },
+    {
+      "entropy": 0.06754063200205565,
+      "epoch": 2.533333333333333,
+      "grad_norm": 0.05078125,
+      "learning_rate": 3.2263978001833184e-05,
+      "loss": 0.06348671317100525,
+      "mean_token_accuracy": 0.9755341604351997,
+      "num_tokens": 5508033.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.0688040841370821,
+      "epoch": 2.56,
+      "grad_norm": 0.052734375,
+      "learning_rate": 3.0430797433547202e-05,
+      "loss": 0.065876704454422,
+      "mean_token_accuracy": 0.9748074486851692,
+      "num_tokens": 5565759.0,
+      "step": 960
+    },
+    {
+      "entropy": 0.06747948992997407,
+      "epoch": 2.586666666666667,
+      "grad_norm": 0.04833984375,
+      "learning_rate": 2.8597616865261228e-05,
+      "loss": 0.06365298628807067,
+      "mean_token_accuracy": 0.976527401804924,
+      "num_tokens": 5624012.0,
+      "step": 970
+    },
+    {
+      "entropy": 0.06841521579772233,
+      "epoch": 2.6133333333333333,
+      "grad_norm": 0.0654296875,
+      "learning_rate": 2.6764436296975253e-05,
+      "loss": 0.063433438539505,
+      "mean_token_accuracy": 0.975967101752758,
+      "num_tokens": 5682704.0,
+      "step": 980
+    },
+    {
+      "entropy": 0.06835865909233689,
+      "epoch": 2.64,
+      "grad_norm": 0.052001953125,
+      "learning_rate": 2.4931255728689275e-05,
+      "loss": 0.06502929329872131,
+      "mean_token_accuracy": 0.9752402231097221,
+      "num_tokens": 5740681.0,
+      "step": 990
+    },
+    {
+      "entropy": 0.06940433531999587,
+      "epoch": 2.6666666666666665,
+      "grad_norm": 0.05908203125,
+      "learning_rate": 2.30980751604033e-05,
+      "loss": 0.06487542390823364,
+      "mean_token_accuracy": 0.975363838672638,
+      "num_tokens": 5797760.0,
+      "step": 1000
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1125,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.7209408134397952e+17,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

consciousness/checkpoint-1000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:64a765ba4e23e22cae3a67629a6fda1378e5af8f1478b2252bd6159d00e12541
+size 5649

consciousness/checkpoint-1125/README.md ADDED Viewed

	@@ -0,0 +1,209 @@

+---
+base_model: meta-llama/Llama-3.1-8B-Instruct
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:meta-llama/Llama-3.1-8B-Instruct
+- lora
+- sft
+- transformers
+- trl
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.18.1

consciousness/checkpoint-1125/adapter_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.18.1",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "k_proj",
+    "o_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

consciousness/checkpoint-1125/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:66930f755168eb2cfd2ae1b754fcc51080acdc1050a221f1373d5ff234b23bb6
+size 27297544

consciousness/checkpoint-1125/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,109 @@

+{{- bos_token }}
+{%- if custom_tools is defined %}
+    {%- set tools = custom_tools %}
+{%- endif %}
+{%- if not tools_in_user_message is defined %}
+    {%- set tools_in_user_message = true %}
+{%- endif %}
+{%- if not date_string is defined %}
+    {%- set date_string = "26 Jul 2024" %}
+{%- endif %}
+{%- if not tools is defined %}
+    {%- set tools = none %}
+{%- endif %}
+{#- This block extracts the system message, so we can slot it into the right place. #}
+{%- if messages[0]['role'] == 'system' %}
+    {%- set system_message = messages[0]['content']|trim %}
+    {%- set messages = messages[1:] %}
+{%- else %}
+    {%- set system_message = "" %}
+{%- endif %}
+{#- System message + builtin tools #}
+{{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
+{%- if builtin_tools is defined or tools is not none %}
+    {{- "Environment: ipython\n" }}
+{%- endif %}
+{%- if builtin_tools is defined %}
+    {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}}
+{%- endif %}
+{{- "Cutting Knowledge Date: December 2023\n" }}
+{{- "Today Date: " + date_string + "\n\n" }}
+{%- if tools is not none and not tools_in_user_message %}
+    {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
+    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
+    {{- "Do not use variables.\n\n" }}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+{%- endif %}
+{{- system_message }}
+{{- "<|eot_id|>" }}
+{#- Custom tools are passed in a user message with some extra guidance #}
+{%- if tools_in_user_message and not tools is none %}
+    {#- Extract the first user message so we can plug it in here #}
+    {%- if messages | length != 0 %}
+        {%- set first_user_message = messages[0]['content']|trim %}
+        {%- set messages = messages[1:] %}
+    {%- else %}
+        {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
+{%- endif %}
+    {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
+    {{- "Given the following functions, please respond with a JSON for a function call " }}
+    {{- "with its proper arguments that best answers the given prompt.\n\n" }}
+    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
+    {{- "Do not use variables.\n\n" }}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+    {{- first_user_message + "<|eot_id|>"}}
+{%- endif %}
+{%- for message in messages %}
+    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
+        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}
+    {%- elif 'tool_calls' in message %}
+        {%- if not message.tool_calls|length == 1 %}
+            {{- raise_exception("This model only supports single tool-calls at once!") }}
+        {%- endif %}
+        {%- set tool_call = message.tool_calls[0].function %}
+        {%- if builtin_tools is defined and tool_call.name in builtin_tools %}
+            {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
+            {{- "<|python_tag|>" + tool_call.name + ".call(" }}
+            {%- for arg_name, arg_val in tool_call.arguments | items %}
+                {{- arg_name + '="' + arg_val + '"' }}
+                {%- if not loop.last %}
+                    {{- ", " }}
+                {%- endif %}
+                {%- endfor %}
+            {{- ")" }}
+        {%- else  %}
+            {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
+            {{- '{"name": "' + tool_call.name + '", ' }}
+            {{- '"parameters": ' }}
+            {{- tool_call.arguments | tojson }}
+            {{- "}" }}
+        {%- endif %}
+        {%- if builtin_tools is defined %}
+            {#- This means we're in ipython mode #}
+            {{- "<|eom_id|>" }}
+        {%- else %}
+            {{- "<|eot_id|>" }}
+        {%- endif %}
+    {%- elif message.role == "tool" or message.role == "ipython" %}
+        {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
+        {%- if message.content is mapping or message.content is iterable %}
+            {{- message.content | tojson }}
+        {%- else %}
+            {{- message.content }}
+        {%- endif %}
+        {{- "<|eot_id|>" }}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
+{%- endif %}

consciousness/checkpoint-1125/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d6f71682a7cc7ff19459018202fe6c95a67a1f308af06926e14f039c612e6e27
+size 54745547

consciousness/checkpoint-1125/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d7c2bfaf680528eb393bf7b7edd31580bea5bb0bfa92242241681bd71bd2442
+size 14645

consciousness/checkpoint-1125/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cb2a37c55dd5d7928c5b2c15b7d4f650fade3ddb7af6dc8961ca05874b789488
+size 1465

consciousness/checkpoint-1125/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
+size 17209920

consciousness/checkpoint-1125/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "backend": "tokenizers",
+  "bos_token": "<|begin_of_text|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|eot_id|>",
+  "is_local": false,
+  "model_input_names": [
+    "input_ids",
+    "attention_mask"
+  ],
+  "model_max_length": 131072,
+  "pad_token": "<|eot_id|>",
+  "tokenizer_class": "TokenizersBackend"
+}

consciousness/checkpoint-1125/trainer_state.json ADDED Viewed

	@@ -0,0 +1,1154 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 1125,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 2.8901569664478304,
+      "epoch": 0.02666666666666667,
+      "grad_norm": 0.267578125,
+      "learning_rate": 5.294117647058824e-05,
+      "loss": 2.9965847015380858,
+      "mean_token_accuracy": 0.4477638125419617,
+      "num_tokens": 57327.0,
+      "step": 10
+    },
+    {
+      "entropy": 2.431850343942642,
+      "epoch": 0.05333333333333334,
+      "grad_norm": 0.255859375,
+      "learning_rate": 0.00011176470588235294,
+      "loss": 2.636348533630371,
+      "mean_token_accuracy": 0.4853471860289574,
+      "num_tokens": 115513.0,
+      "step": 20
+    },
+    {
+      "entropy": 2.0503339916467667,
+      "epoch": 0.08,
+      "grad_norm": 0.2734375,
+      "learning_rate": 0.00017058823529411766,
+      "loss": 1.9743734359741212,
+      "mean_token_accuracy": 0.5749435268342495,
+      "num_tokens": 173246.0,
+      "step": 30
+    },
+    {
+      "entropy": 1.4122730612754821,
+      "epoch": 0.10666666666666667,
+      "grad_norm": 0.357421875,
+      "learning_rate": 0.00019908340971585702,
+      "loss": 1.3748690605163574,
+      "mean_token_accuracy": 0.6867235794663429,
+      "num_tokens": 231278.0,
+      "step": 40
+    },
+    {
+      "entropy": 1.0256530404090882,
+      "epoch": 0.13333333333333333,
+      "grad_norm": 0.359375,
+      "learning_rate": 0.00019725022914757106,
+      "loss": 0.9704485893249511,
+      "mean_token_accuracy": 0.7766638442873954,
+      "num_tokens": 288393.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.7456447497010231,
+      "epoch": 0.16,
+      "grad_norm": 0.328125,
+      "learning_rate": 0.00019541704857928507,
+      "loss": 0.6671554565429687,
+      "mean_token_accuracy": 0.845753838121891,
+      "num_tokens": 346339.0,
+      "step": 60
+    },
+    {
+      "entropy": 0.537605831772089,
+      "epoch": 0.18666666666666668,
+      "grad_norm": 0.328125,
+      "learning_rate": 0.00019358386801099912,
+      "loss": 0.4658506393432617,
+      "mean_token_accuracy": 0.8909643113613128,
+      "num_tokens": 404015.0,
+      "step": 70
+    },
+    {
+      "entropy": 0.37200461626052855,
+      "epoch": 0.21333333333333335,
+      "grad_norm": 0.283203125,
+      "learning_rate": 0.0001917506874427131,
+      "loss": 0.3085629940032959,
+      "mean_token_accuracy": 0.9280218213796616,
+      "num_tokens": 461115.0,
+      "step": 80
+    },
+    {
+      "entropy": 0.2607572071254253,
+      "epoch": 0.24,
+      "grad_norm": 0.267578125,
+      "learning_rate": 0.00018991750687442712,
+      "loss": 0.21192119121551514,
+      "mean_token_accuracy": 0.9511988922953606,
+      "num_tokens": 518926.0,
+      "step": 90
+    },
+    {
+      "entropy": 0.21099306046962737,
+      "epoch": 0.26666666666666666,
+      "grad_norm": 0.2021484375,
+      "learning_rate": 0.00018808432630614116,
+      "loss": 0.1747212290763855,
+      "mean_token_accuracy": 0.9576459184288979,
+      "num_tokens": 577120.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.1670930277556181,
+      "epoch": 0.29333333333333333,
+      "grad_norm": 0.2431640625,
+      "learning_rate": 0.00018625114573785518,
+      "loss": 0.14222853183746337,
+      "mean_token_accuracy": 0.9636133790016175,
+      "num_tokens": 635382.0,
+      "step": 110
+    },
+    {
+      "entropy": 0.15717535726726056,
+      "epoch": 0.32,
+      "grad_norm": 0.12890625,
+      "learning_rate": 0.00018441796516956922,
+      "loss": 0.13076614141464232,
+      "mean_token_accuracy": 0.9650501102209091,
+      "num_tokens": 692813.0,
+      "step": 120
+    },
+    {
+      "entropy": 0.14444066677242517,
+      "epoch": 0.3466666666666667,
+      "grad_norm": 0.1640625,
+      "learning_rate": 0.00018258478460128323,
+      "loss": 0.11674572229385376,
+      "mean_token_accuracy": 0.9665236040949822,
+      "num_tokens": 750815.0,
+      "step": 130
+    },
+    {
+      "entropy": 0.1316229362040758,
+      "epoch": 0.37333333333333335,
+      "grad_norm": 0.177734375,
+      "learning_rate": 0.00018075160403299728,
+      "loss": 0.10633546113967896,
+      "mean_token_accuracy": 0.9686767488718033,
+      "num_tokens": 809607.0,
+      "step": 140
+    },
+    {
+      "entropy": 0.12511782981455327,
+      "epoch": 0.4,
+      "grad_norm": 0.1103515625,
+      "learning_rate": 0.0001789184234647113,
+      "loss": 0.10267382860183716,
+      "mean_token_accuracy": 0.9692023977637291,
+      "num_tokens": 867374.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.1147347992286086,
+      "epoch": 0.4266666666666667,
+      "grad_norm": 0.10302734375,
+      "learning_rate": 0.0001770852428964253,
+      "loss": 0.09604376554489136,
+      "mean_token_accuracy": 0.9704543471336364,
+      "num_tokens": 925873.0,
+      "step": 160
+    },
+    {
+      "entropy": 0.11136603765189648,
+      "epoch": 0.4533333333333333,
+      "grad_norm": 0.12158203125,
+      "learning_rate": 0.00017525206232813932,
+      "loss": 0.09679205417633056,
+      "mean_token_accuracy": 0.9701795622706413,
+      "num_tokens": 983653.0,
+      "step": 170
+    },
+    {
+      "entropy": 0.10962173249572515,
+      "epoch": 0.48,
+      "grad_norm": 0.10986328125,
+      "learning_rate": 0.00017341888175985334,
+      "loss": 0.09156813025474549,
+      "mean_token_accuracy": 0.971354915201664,
+      "num_tokens": 1041996.0,
+      "step": 180
+    },
+    {
+      "entropy": 0.10233879294246435,
+      "epoch": 0.5066666666666667,
+      "grad_norm": 0.2431640625,
+      "learning_rate": 0.00017158570119156738,
+      "loss": 0.09264941811561585,
+      "mean_token_accuracy": 0.9716016605496407,
+      "num_tokens": 1100105.0,
+      "step": 190
+    },
+    {
+      "entropy": 0.10874027330428362,
+      "epoch": 0.5333333333333333,
+      "grad_norm": 0.103515625,
+      "learning_rate": 0.0001697525206232814,
+      "loss": 0.09393113255500793,
+      "mean_token_accuracy": 0.9707169815897941,
+      "num_tokens": 1157940.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.10715384036302567,
+      "epoch": 0.56,
+      "grad_norm": 0.0927734375,
+      "learning_rate": 0.00016791934005499544,
+      "loss": 0.08979941606521606,
+      "mean_token_accuracy": 0.9710627257823944,
+      "num_tokens": 1216048.0,
+      "step": 210
+    },
+    {
+      "entropy": 0.09897389095276594,
+      "epoch": 0.5866666666666667,
+      "grad_norm": 0.1005859375,
+      "learning_rate": 0.00016608615948670945,
+      "loss": 0.08646941781044007,
+      "mean_token_accuracy": 0.9712389498949051,
+      "num_tokens": 1274357.0,
+      "step": 220
+    },
+    {
+      "entropy": 0.09603469483554364,
+      "epoch": 0.6133333333333333,
+      "grad_norm": 0.09716796875,
+      "learning_rate": 0.0001642529789184235,
+      "loss": 0.08556437492370605,
+      "mean_token_accuracy": 0.9711127072572708,
+      "num_tokens": 1332152.0,
+      "step": 230
+    },
+    {
+      "entropy": 0.09263445399701595,
+      "epoch": 0.64,
+      "grad_norm": 0.1396484375,
+      "learning_rate": 0.0001624197983501375,
+      "loss": 0.08339133858680725,
+      "mean_token_accuracy": 0.9719239071011543,
+      "num_tokens": 1389574.0,
+      "step": 240
+    },
+    {
+      "entropy": 0.09686502479016781,
+      "epoch": 0.6666666666666666,
+      "grad_norm": 0.0712890625,
+      "learning_rate": 0.00016058661778185152,
+      "loss": 0.08281562328338624,
+      "mean_token_accuracy": 0.9720177337527275,
+      "num_tokens": 1447409.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.08692479655146598,
+      "epoch": 0.6933333333333334,
+      "grad_norm": 0.0830078125,
+      "learning_rate": 0.00015875343721356554,
+      "loss": 0.08078550696372985,
+      "mean_token_accuracy": 0.9726089149713516,
+      "num_tokens": 1506058.0,
+      "step": 260
+    },
+    {
+      "entropy": 0.09034751150757074,
+      "epoch": 0.72,
+      "grad_norm": 0.08935546875,
+      "learning_rate": 0.00015692025664527955,
+      "loss": 0.08023000955581665,
+      "mean_token_accuracy": 0.9725215956568718,
+      "num_tokens": 1563817.0,
+      "step": 270
+    },
+    {
+      "entropy": 0.08724061641842126,
+      "epoch": 0.7466666666666667,
+      "grad_norm": 0.09423828125,
+      "learning_rate": 0.0001550870760769936,
+      "loss": 0.0812032699584961,
+      "mean_token_accuracy": 0.9722012594342232,
+      "num_tokens": 1621737.0,
+      "step": 280
+    },
+    {
+      "entropy": 0.08758355937898159,
+      "epoch": 0.7733333333333333,
+      "grad_norm": 0.2099609375,
+      "learning_rate": 0.0001532538955087076,
+      "loss": 0.08142906427383423,
+      "mean_token_accuracy": 0.9718389093875885,
+      "num_tokens": 1679970.0,
+      "step": 290
+    },
+    {
+      "entropy": 0.08803936429321765,
+      "epoch": 0.8,
+      "grad_norm": 0.12451171875,
+      "learning_rate": 0.00015142071494042165,
+      "loss": 0.0806335985660553,
+      "mean_token_accuracy": 0.9723069176077843,
+      "num_tokens": 1738304.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.0896342158317566,
+      "epoch": 0.8266666666666667,
+      "grad_norm": 0.059814453125,
+      "learning_rate": 0.00014958753437213567,
+      "loss": 0.08014391660690308,
+      "mean_token_accuracy": 0.9721709281206131,
+      "num_tokens": 1795881.0,
+      "step": 310
+    },
+    {
+      "entropy": 0.08054284229874611,
+      "epoch": 0.8533333333333334,
+      "grad_norm": 0.09033203125,
+      "learning_rate": 0.00014775435380384968,
+      "loss": 0.07684423327445984,
+      "mean_token_accuracy": 0.9731693744659424,
+      "num_tokens": 1854853.0,
+      "step": 320
+    },
+    {
+      "entropy": 0.0840398171916604,
+      "epoch": 0.88,
+      "grad_norm": 0.05224609375,
+      "learning_rate": 0.00014592117323556373,
+      "loss": 0.07634277939796448,
+      "mean_token_accuracy": 0.9732364892959595,
+      "num_tokens": 1912939.0,
+      "step": 330
+    },
+    {
+      "entropy": 0.08260406106710434,
+      "epoch": 0.9066666666666666,
+      "grad_norm": 0.072265625,
+      "learning_rate": 0.00014408799266727771,
+      "loss": 0.076292884349823,
+      "mean_token_accuracy": 0.9736541777849197,
+      "num_tokens": 1971345.0,
+      "step": 340
+    },
+    {
+      "entropy": 0.08077720124274493,
+      "epoch": 0.9333333333333333,
+      "grad_norm": 0.057373046875,
+      "learning_rate": 0.00014225481209899176,
+      "loss": 0.07518362402915954,
+      "mean_token_accuracy": 0.9735523566603661,
+      "num_tokens": 2029618.0,
+      "step": 350
+    },
+    {
+      "entropy": 0.0814354794099927,
+      "epoch": 0.96,
+      "grad_norm": 0.087890625,
+      "learning_rate": 0.00014042163153070577,
+      "loss": 0.07500824928283692,
+      "mean_token_accuracy": 0.9733900666236878,
+      "num_tokens": 2088198.0,
+      "step": 360
+    },
+    {
+      "entropy": 0.08081495910882949,
+      "epoch": 0.9866666666666667,
+      "grad_norm": 0.05810546875,
+      "learning_rate": 0.0001385884509624198,
+      "loss": 0.07559239268302917,
+      "mean_token_accuracy": 0.9732825100421906,
+      "num_tokens": 2145822.0,
+      "step": 370
+    },
+    {
+      "entropy": 0.08157326076179743,
+      "epoch": 1.0133333333333334,
+      "grad_norm": 0.058837890625,
+      "learning_rate": 0.00013675527039413383,
+      "loss": 0.07452890872955323,
+      "mean_token_accuracy": 0.9733605772256851,
+      "num_tokens": 2203248.0,
+      "step": 380
+    },
+    {
+      "entropy": 0.07517405189573764,
+      "epoch": 1.04,
+      "grad_norm": 0.087890625,
+      "learning_rate": 0.00013492208982584784,
+      "loss": 0.07157951593399048,
+      "mean_token_accuracy": 0.9741677790880203,
+      "num_tokens": 2261444.0,
+      "step": 390
+    },
+    {
+      "entropy": 0.07766247931867838,
+      "epoch": 1.0666666666666667,
+      "grad_norm": 0.060302734375,
+      "learning_rate": 0.00013308890925756189,
+      "loss": 0.07183201909065247,
+      "mean_token_accuracy": 0.9740341395139694,
+      "num_tokens": 2319551.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.07695812471210957,
+      "epoch": 1.0933333333333333,
+      "grad_norm": 0.054443359375,
+      "learning_rate": 0.0001312557286892759,
+      "loss": 0.07332680225372315,
+      "mean_token_accuracy": 0.9733265534043312,
+      "num_tokens": 2377422.0,
+      "step": 410
+    },
+    {
+      "entropy": 0.07884457465261221,
+      "epoch": 1.12,
+      "grad_norm": 0.060791015625,
+      "learning_rate": 0.00012942254812098992,
+      "loss": 0.07316585779190063,
+      "mean_token_accuracy": 0.973577855527401,
+      "num_tokens": 2435382.0,
+      "step": 420
+    },
+    {
+      "entropy": 0.07917917389422655,
+      "epoch": 1.1466666666666667,
+      "grad_norm": 0.0771484375,
+      "learning_rate": 0.00012758936755270393,
+      "loss": 0.07189081907272339,
+      "mean_token_accuracy": 0.9741694211959839,
+      "num_tokens": 2493780.0,
+      "step": 430
+    },
+    {
+      "entropy": 0.07554319184273481,
+      "epoch": 1.1733333333333333,
+      "grad_norm": 0.126953125,
+      "learning_rate": 0.00012575618698441797,
+      "loss": 0.07339509725570678,
+      "mean_token_accuracy": 0.9734218120574951,
+      "num_tokens": 2551587.0,
+      "step": 440
+    },
+    {
+      "entropy": 0.0771414702758193,
+      "epoch": 1.2,
+      "grad_norm": 0.052490234375,
+      "learning_rate": 0.000123923006416132,
+      "loss": 0.07223436832427979,
+      "mean_token_accuracy": 0.9731230854988098,
+      "num_tokens": 2609738.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.07702515590935946,
+      "epoch": 1.2266666666666666,
+      "grad_norm": 0.05078125,
+      "learning_rate": 0.00012208982584784603,
+      "loss": 0.07126941084861756,
+      "mean_token_accuracy": 0.9743727937340736,
+      "num_tokens": 2667570.0,
+      "step": 460
+    },
+    {
+      "entropy": 0.0751312056556344,
+      "epoch": 1.2533333333333334,
+      "grad_norm": 0.052490234375,
+      "learning_rate": 0.00012025664527956005,
+      "loss": 0.07185030579566956,
+      "mean_token_accuracy": 0.9739165529608727,
+      "num_tokens": 2725220.0,
+      "step": 470
+    },
+    {
+      "entropy": 0.07488212268799543,
+      "epoch": 1.28,
+      "grad_norm": 0.0517578125,
+      "learning_rate": 0.00011842346471127406,
+      "loss": 0.07148469686508178,
+      "mean_token_accuracy": 0.973349143564701,
+      "num_tokens": 2782737.0,
+      "step": 480
+    },
+    {
+      "entropy": 0.07398118702694774,
+      "epoch": 1.3066666666666666,
+      "grad_norm": 0.044677734375,
+      "learning_rate": 0.00011659028414298809,
+      "loss": 0.06911076903343201,
+      "mean_token_accuracy": 0.9739873677492141,
+      "num_tokens": 2841536.0,
+      "step": 490
+    },
+    {
+      "entropy": 0.07522298116236925,
+      "epoch": 1.3333333333333333,
+      "grad_norm": 0.05908203125,
+      "learning_rate": 0.0001147571035747021,
+      "loss": 0.06964495182037353,
+      "mean_token_accuracy": 0.9739961415529251,
+      "num_tokens": 2899599.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.07469812557101249,
+      "epoch": 1.3599999999999999,
+      "grad_norm": 0.134765625,
+      "learning_rate": 0.00011292392300641615,
+      "loss": 0.0720504343509674,
+      "mean_token_accuracy": 0.973115186393261,
+      "num_tokens": 2956977.0,
+      "step": 510
+    },
+    {
+      "entropy": 0.07730768620967865,
+      "epoch": 1.3866666666666667,
+      "grad_norm": 0.0966796875,
+      "learning_rate": 0.00011109074243813016,
+      "loss": 0.07261049151420593,
+      "mean_token_accuracy": 0.9735311016440391,
+      "num_tokens": 3014397.0,
+      "step": 520
+    },
+    {
+      "entropy": 0.07650617882609367,
+      "epoch": 1.4133333333333333,
+      "grad_norm": 0.059814453125,
+      "learning_rate": 0.00010925756186984419,
+      "loss": 0.07154079675674438,
+      "mean_token_accuracy": 0.9735050886869431,
+      "num_tokens": 3072014.0,
+      "step": 530
+    },
+    {
+      "entropy": 0.07418479155749083,
+      "epoch": 1.44,
+      "grad_norm": 0.08447265625,
+      "learning_rate": 0.0001074243813015582,
+      "loss": 0.07014204263687134,
+      "mean_token_accuracy": 0.9742872670292855,
+      "num_tokens": 3129542.0,
+      "step": 540
+    },
+    {
+      "entropy": 0.07493350077420473,
+      "epoch": 1.4666666666666668,
+      "grad_norm": 0.06396484375,
+      "learning_rate": 0.00010559120073327222,
+      "loss": 0.0693817377090454,
+      "mean_token_accuracy": 0.9750317439436913,
+      "num_tokens": 3188756.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.07290575439110399,
+      "epoch": 1.4933333333333334,
+      "grad_norm": 0.0634765625,
+      "learning_rate": 0.00010375802016498626,
+      "loss": 0.06914764046669006,
+      "mean_token_accuracy": 0.9740337684750557,
+      "num_tokens": 3246755.0,
+      "step": 560
+    },
+    {
+      "entropy": 0.07299449313431979,
+      "epoch": 1.52,
+      "grad_norm": 0.056396484375,
+      "learning_rate": 0.00010192483959670028,
+      "loss": 0.06928544640541076,
+      "mean_token_accuracy": 0.9739615619182587,
+      "num_tokens": 3304396.0,
+      "step": 570
+    },
+    {
+      "entropy": 0.07399061964824796,
+      "epoch": 1.5466666666666666,
+      "grad_norm": 0.049560546875,
+      "learning_rate": 0.0001000916590284143,
+      "loss": 0.07088688611984253,
+      "mean_token_accuracy": 0.9734108299016953,
+      "num_tokens": 3361947.0,
+      "step": 580
+    },
+    {
+      "entropy": 0.07375452127307654,
+      "epoch": 1.5733333333333333,
+      "grad_norm": 0.056884765625,
+      "learning_rate": 9.825847846012832e-05,
+      "loss": 0.06905483603477477,
+      "mean_token_accuracy": 0.9750793874263763,
+      "num_tokens": 3420088.0,
+      "step": 590
+    },
+    {
+      "entropy": 0.0736119981855154,
+      "epoch": 1.6,
+      "grad_norm": 0.07958984375,
+      "learning_rate": 9.642529789184235e-05,
+      "loss": 0.06965676546096802,
+      "mean_token_accuracy": 0.9740386828780174,
+      "num_tokens": 3478106.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.07634057383984327,
+      "epoch": 1.6266666666666667,
+      "grad_norm": 0.045166015625,
+      "learning_rate": 9.459211732355638e-05,
+      "loss": 0.07061071991920471,
+      "mean_token_accuracy": 0.9740972384810448,
+      "num_tokens": 3535104.0,
+      "step": 610
+    },
+    {
+      "entropy": 0.07195411194115878,
+      "epoch": 1.6533333333333333,
+      "grad_norm": 0.061279296875,
+      "learning_rate": 9.27589367552704e-05,
+      "loss": 0.06899864077568055,
+      "mean_token_accuracy": 0.9742538690567016,
+      "num_tokens": 3592771.0,
+      "step": 620
+    },
+    {
+      "entropy": 0.07130216900259256,
+      "epoch": 1.6800000000000002,
+      "grad_norm": 0.05322265625,
+      "learning_rate": 9.092575618698442e-05,
+      "loss": 0.06816592216491699,
+      "mean_token_accuracy": 0.9751273840665817,
+      "num_tokens": 3651210.0,
+      "step": 630
+    },
+    {
+      "entropy": 0.07459970507770777,
+      "epoch": 1.7066666666666666,
+      "grad_norm": 0.08154296875,
+      "learning_rate": 8.909257561869845e-05,
+      "loss": 0.06940353512763978,
+      "mean_token_accuracy": 0.9741591110825538,
+      "num_tokens": 3709024.0,
+      "step": 640
+    },
+    {
+      "entropy": 0.07277811467647552,
+      "epoch": 1.7333333333333334,
+      "grad_norm": 0.06298828125,
+      "learning_rate": 8.725939505041248e-05,
+      "loss": 0.06912165284156799,
+      "mean_token_accuracy": 0.9736517399549485,
+      "num_tokens": 3766449.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.07142239715903997,
+      "epoch": 1.76,
+      "grad_norm": 0.053466796875,
+      "learning_rate": 8.54262144821265e-05,
+      "loss": 0.0682906985282898,
+      "mean_token_accuracy": 0.9748834028840065,
+      "num_tokens": 3824090.0,
+      "step": 660
+    },
+    {
+      "entropy": 0.07114961184561253,
+      "epoch": 1.7866666666666666,
+      "grad_norm": 0.048828125,
+      "learning_rate": 8.359303391384051e-05,
+      "loss": 0.06772947311401367,
+      "mean_token_accuracy": 0.9746391758322716,
+      "num_tokens": 3882786.0,
+      "step": 670
+    },
+    {
+      "entropy": 0.07262304350733757,
+      "epoch": 1.8133333333333335,
+      "grad_norm": 0.0849609375,
+      "learning_rate": 8.175985334555454e-05,
+      "loss": 0.06904927492141724,
+      "mean_token_accuracy": 0.9745182231068611,
+      "num_tokens": 3940648.0,
+      "step": 680
+    },
+    {
+      "entropy": 0.07172201108187437,
+      "epoch": 1.8399999999999999,
+      "grad_norm": 0.08984375,
+      "learning_rate": 7.992667277726857e-05,
+      "loss": 0.0677194595336914,
+      "mean_token_accuracy": 0.974525648355484,
+      "num_tokens": 3998798.0,
+      "step": 690
+    },
+    {
+      "entropy": 0.07090398538857698,
+      "epoch": 1.8666666666666667,
+      "grad_norm": 0.051025390625,
+      "learning_rate": 7.809349220898258e-05,
+      "loss": 0.06749570369720459,
+      "mean_token_accuracy": 0.9741235420107841,
+      "num_tokens": 4056793.0,
+      "step": 700
+    },
+    {
+      "entropy": 0.07037429772317409,
+      "epoch": 1.8933333333333333,
+      "grad_norm": 0.053955078125,
+      "learning_rate": 7.626031164069661e-05,
+      "loss": 0.06616277694702148,
+      "mean_token_accuracy": 0.9749814510345459,
+      "num_tokens": 4115690.0,
+      "step": 710
+    },
+    {
+      "entropy": 0.06948063550516963,
+      "epoch": 1.92,
+      "grad_norm": 0.0810546875,
+      "learning_rate": 7.442713107241064e-05,
+      "loss": 0.06898298859596252,
+      "mean_token_accuracy": 0.9742853432893753,
+      "num_tokens": 4173841.0,
+      "step": 720
+    },
+    {
+      "entropy": 0.07196591291576623,
+      "epoch": 1.9466666666666668,
+      "grad_norm": 0.047607421875,
+      "learning_rate": 7.259395050412467e-05,
+      "loss": 0.06768189072608947,
+      "mean_token_accuracy": 0.9750556230545044,
+      "num_tokens": 4232146.0,
+      "step": 730
+    },
+    {
+      "entropy": 0.07190824458375573,
+      "epoch": 1.9733333333333334,
+      "grad_norm": 0.07275390625,
+      "learning_rate": 7.076076993583868e-05,
+      "loss": 0.06733205318450927,
+      "mean_token_accuracy": 0.9746471583843231,
+      "num_tokens": 4290020.0,
+      "step": 740
+    },
+    {
+      "entropy": 0.07098262775689364,
+      "epoch": 2.0,
+      "grad_norm": 0.046630859375,
+      "learning_rate": 6.89275893675527e-05,
+      "loss": 0.06668331623077392,
+      "mean_token_accuracy": 0.9744203120470047,
+      "num_tokens": 4348788.0,
+      "step": 750
+    },
+    {
+      "entropy": 0.07027366831898689,
+      "epoch": 2.026666666666667,
+      "grad_norm": 0.04931640625,
+      "learning_rate": 6.709440879926673e-05,
+      "loss": 0.06592612862586975,
+      "mean_token_accuracy": 0.974977059662342,
+      "num_tokens": 4406426.0,
+      "step": 760
+    },
+    {
+      "entropy": 0.06931058187037706,
+      "epoch": 2.0533333333333332,
+      "grad_norm": 0.05615234375,
+      "learning_rate": 6.526122823098076e-05,
+      "loss": 0.06590970754623413,
+      "mean_token_accuracy": 0.9754465237259865,
+      "num_tokens": 4464145.0,
+      "step": 770
+    },
+    {
+      "entropy": 0.06888462873175741,
+      "epoch": 2.08,
+      "grad_norm": 0.0771484375,
+      "learning_rate": 6.342804766269478e-05,
+      "loss": 0.06574443578720093,
+      "mean_token_accuracy": 0.9753611847758293,
+      "num_tokens": 4522337.0,
+      "step": 780
+    },
+    {
+      "entropy": 0.06854705391451717,
+      "epoch": 2.1066666666666665,
+      "grad_norm": 0.048583984375,
+      "learning_rate": 6.15948670944088e-05,
+      "loss": 0.06521100401878357,
+      "mean_token_accuracy": 0.9753493323922158,
+      "num_tokens": 4580367.0,
+      "step": 790
+    },
+    {
+      "entropy": 0.07078330684453249,
+      "epoch": 2.1333333333333333,
+      "grad_norm": 0.05615234375,
+      "learning_rate": 5.976168652612283e-05,
+      "loss": 0.06622718572616577,
+      "mean_token_accuracy": 0.9753074139356613,
+      "num_tokens": 4637987.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.06828645439818501,
+      "epoch": 2.16,
+      "grad_norm": 0.05322265625,
+      "learning_rate": 5.792850595783685e-05,
+      "loss": 0.06564919948577881,
+      "mean_token_accuracy": 0.9755483835935592,
+      "num_tokens": 4695886.0,
+      "step": 810
+    },
+    {
+      "entropy": 0.06908007161691784,
+      "epoch": 2.1866666666666665,
+      "grad_norm": 0.053955078125,
+      "learning_rate": 5.6095325389550866e-05,
+      "loss": 0.06564045548439026,
+      "mean_token_accuracy": 0.9750929772853851,
+      "num_tokens": 4753433.0,
+      "step": 820
+    },
+    {
+      "entropy": 0.0687640338204801,
+      "epoch": 2.2133333333333334,
+      "grad_norm": 0.052001953125,
+      "learning_rate": 5.4262144821264894e-05,
+      "loss": 0.06568140983581543,
+      "mean_token_accuracy": 0.9756649106740951,
+      "num_tokens": 4811459.0,
+      "step": 830
+    },
+    {
+      "entropy": 0.06845789151266217,
+      "epoch": 2.24,
+      "grad_norm": 0.05322265625,
+      "learning_rate": 5.2428964252978916e-05,
+      "loss": 0.0644676923751831,
+      "mean_token_accuracy": 0.975266519188881,
+      "num_tokens": 4870185.0,
+      "step": 840
+    },
+    {
+      "entropy": 0.06863211318850518,
+      "epoch": 2.2666666666666666,
+      "grad_norm": 0.04541015625,
+      "learning_rate": 5.0595783684692945e-05,
+      "loss": 0.06450478434562683,
+      "mean_token_accuracy": 0.9765662357211113,
+      "num_tokens": 4928705.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.06972924629226326,
+      "epoch": 2.2933333333333334,
+      "grad_norm": 0.052001953125,
+      "learning_rate": 4.876260311640697e-05,
+      "loss": 0.06663312911987304,
+      "mean_token_accuracy": 0.974696435034275,
+      "num_tokens": 4985443.0,
+      "step": 860
+    },
+    {
+      "entropy": 0.06926036775112152,
+      "epoch": 2.32,
+      "grad_norm": 0.050537109375,
+      "learning_rate": 4.6929422548120995e-05,
+      "loss": 0.06713547110557556,
+      "mean_token_accuracy": 0.974915811419487,
+      "num_tokens": 5042822.0,
+      "step": 870
+    },
+    {
+      "entropy": 0.07052302733063698,
+      "epoch": 2.3466666666666667,
+      "grad_norm": 0.0634765625,
+      "learning_rate": 4.509624197983501e-05,
+      "loss": 0.06567599177360535,
+      "mean_token_accuracy": 0.9748982191085815,
+      "num_tokens": 5100175.0,
+      "step": 880
+    },
+    {
+      "entropy": 0.06812258837744593,
+      "epoch": 2.3733333333333335,
+      "grad_norm": 0.046875,
+      "learning_rate": 4.326306141154904e-05,
+      "loss": 0.06469246745109558,
+      "mean_token_accuracy": 0.9756363064050675,
+      "num_tokens": 5158712.0,
+      "step": 890
+    },
+    {
+      "entropy": 0.06816195128485561,
+      "epoch": 2.4,
+      "grad_norm": 0.052734375,
+      "learning_rate": 4.142988084326306e-05,
+      "loss": 0.06546497344970703,
+      "mean_token_accuracy": 0.9757738158106803,
+      "num_tokens": 5217131.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.069792415574193,
+      "epoch": 2.4266666666666667,
+      "grad_norm": 0.05078125,
+      "learning_rate": 3.959670027497709e-05,
+      "loss": 0.0649182915687561,
+      "mean_token_accuracy": 0.9754065230488778,
+      "num_tokens": 5275013.0,
+      "step": 910
+    },
+    {
+      "entropy": 0.06883814567700028,
+      "epoch": 2.453333333333333,
+      "grad_norm": 0.056640625,
+      "learning_rate": 3.776351970669111e-05,
+      "loss": 0.06510300636291504,
+      "mean_token_accuracy": 0.9752438068389893,
+      "num_tokens": 5332790.0,
+      "step": 920
+    },
+    {
+      "entropy": 0.06875044060871005,
+      "epoch": 2.48,
+      "grad_norm": 0.049072265625,
+      "learning_rate": 3.593033913840513e-05,
+      "loss": 0.06483979225158691,
+      "mean_token_accuracy": 0.9757223874330521,
+      "num_tokens": 5390822.0,
+      "step": 930
+    },
+    {
+      "entropy": 0.06840683752670884,
+      "epoch": 2.506666666666667,
+      "grad_norm": 0.06005859375,
+      "learning_rate": 3.409715857011916e-05,
+      "loss": 0.06430425643920898,
+      "mean_token_accuracy": 0.9757388934493065,
+      "num_tokens": 5449491.0,
+      "step": 940
+    },
+    {
+      "entropy": 0.06754063200205565,
+      "epoch": 2.533333333333333,
+      "grad_norm": 0.05078125,
+      "learning_rate": 3.2263978001833184e-05,
+      "loss": 0.06348671317100525,
+      "mean_token_accuracy": 0.9755341604351997,
+      "num_tokens": 5508033.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.0688040841370821,
+      "epoch": 2.56,
+      "grad_norm": 0.052734375,
+      "learning_rate": 3.0430797433547202e-05,
+      "loss": 0.065876704454422,
+      "mean_token_accuracy": 0.9748074486851692,
+      "num_tokens": 5565759.0,
+      "step": 960
+    },
+    {
+      "entropy": 0.06747948992997407,
+      "epoch": 2.586666666666667,
+      "grad_norm": 0.04833984375,
+      "learning_rate": 2.8597616865261228e-05,
+      "loss": 0.06365298628807067,
+      "mean_token_accuracy": 0.976527401804924,
+      "num_tokens": 5624012.0,
+      "step": 970
+    },
+    {
+      "entropy": 0.06841521579772233,
+      "epoch": 2.6133333333333333,
+      "grad_norm": 0.0654296875,
+      "learning_rate": 2.6764436296975253e-05,
+      "loss": 0.063433438539505,
+      "mean_token_accuracy": 0.975967101752758,
+      "num_tokens": 5682704.0,
+      "step": 980
+    },
+    {
+      "entropy": 0.06835865909233689,
+      "epoch": 2.64,
+      "grad_norm": 0.052001953125,
+      "learning_rate": 2.4931255728689275e-05,
+      "loss": 0.06502929329872131,
+      "mean_token_accuracy": 0.9752402231097221,
+      "num_tokens": 5740681.0,
+      "step": 990
+    },
+    {
+      "entropy": 0.06940433531999587,
+      "epoch": 2.6666666666666665,
+      "grad_norm": 0.05908203125,
+      "learning_rate": 2.30980751604033e-05,
+      "loss": 0.06487542390823364,
+      "mean_token_accuracy": 0.975363838672638,
+      "num_tokens": 5797760.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.0681289511732757,
+      "epoch": 2.6933333333333334,
+      "grad_norm": 0.058349609375,
+      "learning_rate": 2.1264894592117325e-05,
+      "loss": 0.06304082870483399,
+      "mean_token_accuracy": 0.9760710150003433,
+      "num_tokens": 5856063.0,
+      "step": 1010
+    },
+    {
+      "entropy": 0.06688070669770241,
+      "epoch": 2.7199999999999998,
+      "grad_norm": 0.055908203125,
+      "learning_rate": 1.943171402383135e-05,
+      "loss": 0.06357985734939575,
+      "mean_token_accuracy": 0.9757649004459381,
+      "num_tokens": 5915064.0,
+      "step": 1020
+    },
+    {
+      "entropy": 0.06751234699040651,
+      "epoch": 2.7466666666666666,
+      "grad_norm": 0.046630859375,
+      "learning_rate": 1.7598533455545372e-05,
+      "loss": 0.06384648084640503,
+      "mean_token_accuracy": 0.9755441978573799,
+      "num_tokens": 5973236.0,
+      "step": 1030
+    },
+    {
+      "entropy": 0.06810427764430642,
+      "epoch": 2.7733333333333334,
+      "grad_norm": 0.0537109375,
+      "learning_rate": 1.5765352887259398e-05,
+      "loss": 0.06439932584762573,
+      "mean_token_accuracy": 0.9754612877964973,
+      "num_tokens": 6030961.0,
+      "step": 1040
+    },
+    {
+      "entropy": 0.06740497639402747,
+      "epoch": 2.8,
+      "grad_norm": 0.060791015625,
+      "learning_rate": 1.393217231897342e-05,
+      "loss": 0.0649307906627655,
+      "mean_token_accuracy": 0.9751472353935242,
+      "num_tokens": 6088421.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.0683750979602337,
+      "epoch": 2.8266666666666667,
+      "grad_norm": 0.0517578125,
+      "learning_rate": 1.2098991750687445e-05,
+      "loss": 0.06393700838088989,
+      "mean_token_accuracy": 0.9756790235638618,
+      "num_tokens": 6146038.0,
+      "step": 1060
+    },
+    {
+      "entropy": 0.06746620442718268,
+      "epoch": 2.8533333333333335,
+      "grad_norm": 0.06103515625,
+      "learning_rate": 1.0265811182401468e-05,
+      "loss": 0.06306450963020324,
+      "mean_token_accuracy": 0.9759816557168961,
+      "num_tokens": 6204853.0,
+      "step": 1070
+    },
+    {
+      "entropy": 0.06763323042541743,
+      "epoch": 2.88,
+      "grad_norm": 0.08984375,
+      "learning_rate": 8.43263061411549e-06,
+      "loss": 0.06345561742782593,
+      "mean_token_accuracy": 0.976481594145298,
+      "num_tokens": 6263092.0,
+      "step": 1080
+    },
+    {
+      "entropy": 0.06731683500111103,
+      "epoch": 2.9066666666666667,
+      "grad_norm": 0.055419921875,
+      "learning_rate": 6.599450045829514e-06,
+      "loss": 0.063571298122406,
+      "mean_token_accuracy": 0.9759738191962242,
+      "num_tokens": 6321218.0,
+      "step": 1090
+    },
+    {
+      "entropy": 0.06766533879563212,
+      "epoch": 2.9333333333333336,
+      "grad_norm": 0.054931640625,
+      "learning_rate": 4.766269477543538e-06,
+      "loss": 0.06393821239471435,
+      "mean_token_accuracy": 0.97576145529747,
+      "num_tokens": 6379366.0,
+      "step": 1100
+    },
+    {
+      "entropy": 0.06769317779690028,
+      "epoch": 2.96,
+      "grad_norm": 0.052490234375,
+      "learning_rate": 2.933088909257562e-06,
+      "loss": 0.06439238786697388,
+      "mean_token_accuracy": 0.9759220287203789,
+      "num_tokens": 6437208.0,
+      "step": 1110
+    },
+    {
+      "entropy": 0.06784211797639728,
+      "epoch": 2.986666666666667,
+      "grad_norm": 0.0556640625,
+      "learning_rate": 1.0999083409715858e-06,
+      "loss": 0.06470418572425843,
+      "mean_token_accuracy": 0.9759309142827988,
+      "num_tokens": 6494923.0,
+      "step": 1120
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1125,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3.0619076360380416e+17,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

consciousness/checkpoint-1125/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:64a765ba4e23e22cae3a67629a6fda1378e5af8f1478b2252bd6159d00e12541
+size 5649

consciousness/checkpoint-500/README.md ADDED Viewed

	@@ -0,0 +1,209 @@

+---
+base_model: meta-llama/Llama-3.1-8B-Instruct
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:meta-llama/Llama-3.1-8B-Instruct
+- lora
+- sft
+- transformers
+- trl
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.18.1

consciousness/checkpoint-500/adapter_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.18.1",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "k_proj",
+    "o_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

consciousness/checkpoint-500/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:208e9b171d54a26b9a6bf3e6539d569970cca468b3cceb4c86b9c82889f10567
+size 27297544

consciousness/checkpoint-500/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,109 @@

+{{- bos_token }}
+{%- if custom_tools is defined %}
+    {%- set tools = custom_tools %}
+{%- endif %}
+{%- if not tools_in_user_message is defined %}
+    {%- set tools_in_user_message = true %}
+{%- endif %}
+{%- if not date_string is defined %}
+    {%- set date_string = "26 Jul 2024" %}
+{%- endif %}
+{%- if not tools is defined %}
+    {%- set tools = none %}
+{%- endif %}
+{#- This block extracts the system message, so we can slot it into the right place. #}
+{%- if messages[0]['role'] == 'system' %}
+    {%- set system_message = messages[0]['content']|trim %}
+    {%- set messages = messages[1:] %}
+{%- else %}
+    {%- set system_message = "" %}
+{%- endif %}
+{#- System message + builtin tools #}
+{{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
+{%- if builtin_tools is defined or tools is not none %}
+    {{- "Environment: ipython\n" }}
+{%- endif %}
+{%- if builtin_tools is defined %}
+    {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}}
+{%- endif %}
+{{- "Cutting Knowledge Date: December 2023\n" }}
+{{- "Today Date: " + date_string + "\n\n" }}
+{%- if tools is not none and not tools_in_user_message %}
+    {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
+    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
+    {{- "Do not use variables.\n\n" }}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+{%- endif %}
+{{- system_message }}
+{{- "<|eot_id|>" }}
+{#- Custom tools are passed in a user message with some extra guidance #}
+{%- if tools_in_user_message and not tools is none %}
+    {#- Extract the first user message so we can plug it in here #}
+    {%- if messages | length != 0 %}
+        {%- set first_user_message = messages[0]['content']|trim %}
+        {%- set messages = messages[1:] %}
+    {%- else %}
+        {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
+{%- endif %}
+    {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
+    {{- "Given the following functions, please respond with a JSON for a function call " }}
+    {{- "with its proper arguments that best answers the given prompt.\n\n" }}
+    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
+    {{- "Do not use variables.\n\n" }}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+    {{- first_user_message + "<|eot_id|>"}}
+{%- endif %}
+{%- for message in messages %}
+    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
+        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}
+    {%- elif 'tool_calls' in message %}
+        {%- if not message.tool_calls|length == 1 %}
+            {{- raise_exception("This model only supports single tool-calls at once!") }}
+        {%- endif %}
+        {%- set tool_call = message.tool_calls[0].function %}
+        {%- if builtin_tools is defined and tool_call.name in builtin_tools %}
+            {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
+            {{- "<|python_tag|>" + tool_call.name + ".call(" }}
+            {%- for arg_name, arg_val in tool_call.arguments | items %}
+                {{- arg_name + '="' + arg_val + '"' }}
+                {%- if not loop.last %}
+                    {{- ", " }}
+                {%- endif %}
+                {%- endfor %}
+            {{- ")" }}
+        {%- else  %}
+            {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
+            {{- '{"name": "' + tool_call.name + '", ' }}
+            {{- '"parameters": ' }}
+            {{- tool_call.arguments | tojson }}
+            {{- "}" }}
+        {%- endif %}
+        {%- if builtin_tools is defined %}
+            {#- This means we're in ipython mode #}
+            {{- "<|eom_id|>" }}
+        {%- else %}
+            {{- "<|eot_id|>" }}
+        {%- endif %}
+    {%- elif message.role == "tool" or message.role == "ipython" %}
+        {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
+        {%- if message.content is mapping or message.content is iterable %}
+            {{- message.content | tojson }}
+        {%- else %}
+            {{- message.content }}
+        {%- endif %}
+        {{- "<|eot_id|>" }}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
+{%- endif %}

consciousness/checkpoint-500/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a00fb9968a64e95dfd14a8da47f012b55f5086b758d77bd8b0935845daa3e582
+size 54745547

consciousness/checkpoint-500/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:03588b766dd784331959c80d527c519c2e449bde648d7f2e90539f9a580bd9d2
+size 14645

consciousness/checkpoint-500/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c5f2b5fa48c7f09e4487186c3527dd0cb37a3de8892b16ebca696ce3df604cb5
+size 1465

consciousness/checkpoint-500/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
+size 17209920

consciousness/checkpoint-500/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "backend": "tokenizers",
+  "bos_token": "<|begin_of_text|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|eot_id|>",
+  "is_local": false,
+  "model_input_names": [
+    "input_ids",
+    "attention_mask"
+  ],
+  "model_max_length": 131072,
+  "pad_token": "<|eot_id|>",
+  "tokenizer_class": "TokenizersBackend"
+}

consciousness/checkpoint-500/trainer_state.json ADDED Viewed

	@@ -0,0 +1,534 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.3333333333333333,
+  "eval_steps": 500,
+  "global_step": 500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 2.8901569664478304,
+      "epoch": 0.02666666666666667,
+      "grad_norm": 0.267578125,
+      "learning_rate": 5.294117647058824e-05,
+      "loss": 2.9965847015380858,
+      "mean_token_accuracy": 0.4477638125419617,
+      "num_tokens": 57327.0,
+      "step": 10
+    },
+    {
+      "entropy": 2.431850343942642,
+      "epoch": 0.05333333333333334,
+      "grad_norm": 0.255859375,
+      "learning_rate": 0.00011176470588235294,
+      "loss": 2.636348533630371,
+      "mean_token_accuracy": 0.4853471860289574,
+      "num_tokens": 115513.0,
+      "step": 20
+    },
+    {
+      "entropy": 2.0503339916467667,
+      "epoch": 0.08,
+      "grad_norm": 0.2734375,
+      "learning_rate": 0.00017058823529411766,
+      "loss": 1.9743734359741212,
+      "mean_token_accuracy": 0.5749435268342495,
+      "num_tokens": 173246.0,
+      "step": 30
+    },
+    {
+      "entropy": 1.4122730612754821,
+      "epoch": 0.10666666666666667,
+      "grad_norm": 0.357421875,
+      "learning_rate": 0.00019908340971585702,
+      "loss": 1.3748690605163574,
+      "mean_token_accuracy": 0.6867235794663429,
+      "num_tokens": 231278.0,
+      "step": 40
+    },
+    {
+      "entropy": 1.0256530404090882,
+      "epoch": 0.13333333333333333,
+      "grad_norm": 0.359375,
+      "learning_rate": 0.00019725022914757106,
+      "loss": 0.9704485893249511,
+      "mean_token_accuracy": 0.7766638442873954,
+      "num_tokens": 288393.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.7456447497010231,
+      "epoch": 0.16,
+      "grad_norm": 0.328125,
+      "learning_rate": 0.00019541704857928507,
+      "loss": 0.6671554565429687,
+      "mean_token_accuracy": 0.845753838121891,
+      "num_tokens": 346339.0,
+      "step": 60
+    },
+    {
+      "entropy": 0.537605831772089,
+      "epoch": 0.18666666666666668,
+      "grad_norm": 0.328125,
+      "learning_rate": 0.00019358386801099912,
+      "loss": 0.4658506393432617,
+      "mean_token_accuracy": 0.8909643113613128,
+      "num_tokens": 404015.0,
+      "step": 70
+    },
+    {
+      "entropy": 0.37200461626052855,
+      "epoch": 0.21333333333333335,
+      "grad_norm": 0.283203125,
+      "learning_rate": 0.0001917506874427131,
+      "loss": 0.3085629940032959,
+      "mean_token_accuracy": 0.9280218213796616,
+      "num_tokens": 461115.0,
+      "step": 80
+    },
+    {
+      "entropy": 0.2607572071254253,
+      "epoch": 0.24,
+      "grad_norm": 0.267578125,
+      "learning_rate": 0.00018991750687442712,
+      "loss": 0.21192119121551514,
+      "mean_token_accuracy": 0.9511988922953606,
+      "num_tokens": 518926.0,
+      "step": 90
+    },
+    {
+      "entropy": 0.21099306046962737,
+      "epoch": 0.26666666666666666,
+      "grad_norm": 0.2021484375,
+      "learning_rate": 0.00018808432630614116,
+      "loss": 0.1747212290763855,
+      "mean_token_accuracy": 0.9576459184288979,
+      "num_tokens": 577120.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.1670930277556181,
+      "epoch": 0.29333333333333333,
+      "grad_norm": 0.2431640625,
+      "learning_rate": 0.00018625114573785518,
+      "loss": 0.14222853183746337,
+      "mean_token_accuracy": 0.9636133790016175,
+      "num_tokens": 635382.0,
+      "step": 110
+    },
+    {
+      "entropy": 0.15717535726726056,
+      "epoch": 0.32,
+      "grad_norm": 0.12890625,
+      "learning_rate": 0.00018441796516956922,
+      "loss": 0.13076614141464232,
+      "mean_token_accuracy": 0.9650501102209091,
+      "num_tokens": 692813.0,
+      "step": 120
+    },
+    {
+      "entropy": 0.14444066677242517,
+      "epoch": 0.3466666666666667,
+      "grad_norm": 0.1640625,
+      "learning_rate": 0.00018258478460128323,
+      "loss": 0.11674572229385376,
+      "mean_token_accuracy": 0.9665236040949822,
+      "num_tokens": 750815.0,
+      "step": 130
+    },
+    {
+      "entropy": 0.1316229362040758,
+      "epoch": 0.37333333333333335,
+      "grad_norm": 0.177734375,
+      "learning_rate": 0.00018075160403299728,
+      "loss": 0.10633546113967896,
+      "mean_token_accuracy": 0.9686767488718033,
+      "num_tokens": 809607.0,
+      "step": 140
+    },
+    {
+      "entropy": 0.12511782981455327,
+      "epoch": 0.4,
+      "grad_norm": 0.1103515625,
+      "learning_rate": 0.0001789184234647113,
+      "loss": 0.10267382860183716,
+      "mean_token_accuracy": 0.9692023977637291,
+      "num_tokens": 867374.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.1147347992286086,
+      "epoch": 0.4266666666666667,
+      "grad_norm": 0.10302734375,
+      "learning_rate": 0.0001770852428964253,
+      "loss": 0.09604376554489136,
+      "mean_token_accuracy": 0.9704543471336364,
+      "num_tokens": 925873.0,
+      "step": 160
+    },
+    {
+      "entropy": 0.11136603765189648,
+      "epoch": 0.4533333333333333,
+      "grad_norm": 0.12158203125,
+      "learning_rate": 0.00017525206232813932,
+      "loss": 0.09679205417633056,
+      "mean_token_accuracy": 0.9701795622706413,
+      "num_tokens": 983653.0,
+      "step": 170
+    },
+    {
+      "entropy": 0.10962173249572515,
+      "epoch": 0.48,
+      "grad_norm": 0.10986328125,
+      "learning_rate": 0.00017341888175985334,
+      "loss": 0.09156813025474549,
+      "mean_token_accuracy": 0.971354915201664,
+      "num_tokens": 1041996.0,
+      "step": 180
+    },
+    {
+      "entropy": 0.10233879294246435,
+      "epoch": 0.5066666666666667,
+      "grad_norm": 0.2431640625,
+      "learning_rate": 0.00017158570119156738,
+      "loss": 0.09264941811561585,
+      "mean_token_accuracy": 0.9716016605496407,
+      "num_tokens": 1100105.0,
+      "step": 190
+    },
+    {
+      "entropy": 0.10874027330428362,
+      "epoch": 0.5333333333333333,
+      "grad_norm": 0.103515625,
+      "learning_rate": 0.0001697525206232814,
+      "loss": 0.09393113255500793,
+      "mean_token_accuracy": 0.9707169815897941,
+      "num_tokens": 1157940.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.10715384036302567,
+      "epoch": 0.56,
+      "grad_norm": 0.0927734375,
+      "learning_rate": 0.00016791934005499544,
+      "loss": 0.08979941606521606,
+      "mean_token_accuracy": 0.9710627257823944,
+      "num_tokens": 1216048.0,
+      "step": 210
+    },
+    {
+      "entropy": 0.09897389095276594,
+      "epoch": 0.5866666666666667,
+      "grad_norm": 0.1005859375,
+      "learning_rate": 0.00016608615948670945,
+      "loss": 0.08646941781044007,
+      "mean_token_accuracy": 0.9712389498949051,
+      "num_tokens": 1274357.0,
+      "step": 220
+    },
+    {
+      "entropy": 0.09603469483554364,
+      "epoch": 0.6133333333333333,
+      "grad_norm": 0.09716796875,
+      "learning_rate": 0.0001642529789184235,
+      "loss": 0.08556437492370605,
+      "mean_token_accuracy": 0.9711127072572708,
+      "num_tokens": 1332152.0,
+      "step": 230
+    },
+    {
+      "entropy": 0.09263445399701595,
+      "epoch": 0.64,
+      "grad_norm": 0.1396484375,
+      "learning_rate": 0.0001624197983501375,
+      "loss": 0.08339133858680725,
+      "mean_token_accuracy": 0.9719239071011543,
+      "num_tokens": 1389574.0,
+      "step": 240
+    },
+    {
+      "entropy": 0.09686502479016781,
+      "epoch": 0.6666666666666666,
+      "grad_norm": 0.0712890625,
+      "learning_rate": 0.00016058661778185152,
+      "loss": 0.08281562328338624,
+      "mean_token_accuracy": 0.9720177337527275,
+      "num_tokens": 1447409.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.08692479655146598,
+      "epoch": 0.6933333333333334,
+      "grad_norm": 0.0830078125,
+      "learning_rate": 0.00015875343721356554,
+      "loss": 0.08078550696372985,
+      "mean_token_accuracy": 0.9726089149713516,
+      "num_tokens": 1506058.0,
+      "step": 260
+    },
+    {
+      "entropy": 0.09034751150757074,
+      "epoch": 0.72,
+      "grad_norm": 0.08935546875,
+      "learning_rate": 0.00015692025664527955,
+      "loss": 0.08023000955581665,
+      "mean_token_accuracy": 0.9725215956568718,
+      "num_tokens": 1563817.0,
+      "step": 270
+    },
+    {
+      "entropy": 0.08724061641842126,
+      "epoch": 0.7466666666666667,
+      "grad_norm": 0.09423828125,
+      "learning_rate": 0.0001550870760769936,
+      "loss": 0.0812032699584961,
+      "mean_token_accuracy": 0.9722012594342232,
+      "num_tokens": 1621737.0,
+      "step": 280
+    },
+    {
+      "entropy": 0.08758355937898159,
+      "epoch": 0.7733333333333333,
+      "grad_norm": 0.2099609375,
+      "learning_rate": 0.0001532538955087076,
+      "loss": 0.08142906427383423,
+      "mean_token_accuracy": 0.9718389093875885,
+      "num_tokens": 1679970.0,
+      "step": 290
+    },
+    {
+      "entropy": 0.08803936429321765,
+      "epoch": 0.8,
+      "grad_norm": 0.12451171875,
+      "learning_rate": 0.00015142071494042165,
+      "loss": 0.0806335985660553,
+      "mean_token_accuracy": 0.9723069176077843,
+      "num_tokens": 1738304.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.0896342158317566,
+      "epoch": 0.8266666666666667,
+      "grad_norm": 0.059814453125,
+      "learning_rate": 0.00014958753437213567,
+      "loss": 0.08014391660690308,
+      "mean_token_accuracy": 0.9721709281206131,
+      "num_tokens": 1795881.0,
+      "step": 310
+    },
+    {
+      "entropy": 0.08054284229874611,
+      "epoch": 0.8533333333333334,
+      "grad_norm": 0.09033203125,
+      "learning_rate": 0.00014775435380384968,
+      "loss": 0.07684423327445984,
+      "mean_token_accuracy": 0.9731693744659424,
+      "num_tokens": 1854853.0,
+      "step": 320
+    },
+    {
+      "entropy": 0.0840398171916604,
+      "epoch": 0.88,
+      "grad_norm": 0.05224609375,
+      "learning_rate": 0.00014592117323556373,
+      "loss": 0.07634277939796448,
+      "mean_token_accuracy": 0.9732364892959595,
+      "num_tokens": 1912939.0,
+      "step": 330
+    },
+    {
+      "entropy": 0.08260406106710434,
+      "epoch": 0.9066666666666666,
+      "grad_norm": 0.072265625,
+      "learning_rate": 0.00014408799266727771,
+      "loss": 0.076292884349823,
+      "mean_token_accuracy": 0.9736541777849197,
+      "num_tokens": 1971345.0,
+      "step": 340
+    },
+    {
+      "entropy": 0.08077720124274493,
+      "epoch": 0.9333333333333333,
+      "grad_norm": 0.057373046875,
+      "learning_rate": 0.00014225481209899176,
+      "loss": 0.07518362402915954,
+      "mean_token_accuracy": 0.9735523566603661,
+      "num_tokens": 2029618.0,
+      "step": 350
+    },
+    {
+      "entropy": 0.0814354794099927,
+      "epoch": 0.96,
+      "grad_norm": 0.087890625,
+      "learning_rate": 0.00014042163153070577,
+      "loss": 0.07500824928283692,
+      "mean_token_accuracy": 0.9733900666236878,
+      "num_tokens": 2088198.0,
+      "step": 360
+    },
+    {
+      "entropy": 0.08081495910882949,
+      "epoch": 0.9866666666666667,
+      "grad_norm": 0.05810546875,
+      "learning_rate": 0.0001385884509624198,
+      "loss": 0.07559239268302917,
+      "mean_token_accuracy": 0.9732825100421906,
+      "num_tokens": 2145822.0,
+      "step": 370
+    },
+    {
+      "entropy": 0.08157326076179743,
+      "epoch": 1.0133333333333334,
+      "grad_norm": 0.058837890625,
+      "learning_rate": 0.00013675527039413383,
+      "loss": 0.07452890872955323,
+      "mean_token_accuracy": 0.9733605772256851,
+      "num_tokens": 2203248.0,
+      "step": 380
+    },
+    {
+      "entropy": 0.07517405189573764,
+      "epoch": 1.04,
+      "grad_norm": 0.087890625,
+      "learning_rate": 0.00013492208982584784,
+      "loss": 0.07157951593399048,
+      "mean_token_accuracy": 0.9741677790880203,
+      "num_tokens": 2261444.0,
+      "step": 390
+    },
+    {
+      "entropy": 0.07766247931867838,
+      "epoch": 1.0666666666666667,
+      "grad_norm": 0.060302734375,
+      "learning_rate": 0.00013308890925756189,
+      "loss": 0.07183201909065247,
+      "mean_token_accuracy": 0.9740341395139694,
+      "num_tokens": 2319551.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.07695812471210957,
+      "epoch": 1.0933333333333333,
+      "grad_norm": 0.054443359375,
+      "learning_rate": 0.0001312557286892759,
+      "loss": 0.07332680225372315,
+      "mean_token_accuracy": 0.9733265534043312,
+      "num_tokens": 2377422.0,
+      "step": 410
+    },
+    {
+      "entropy": 0.07884457465261221,
+      "epoch": 1.12,
+      "grad_norm": 0.060791015625,
+      "learning_rate": 0.00012942254812098992,
+      "loss": 0.07316585779190063,
+      "mean_token_accuracy": 0.973577855527401,
+      "num_tokens": 2435382.0,
+      "step": 420
+    },
+    {
+      "entropy": 0.07917917389422655,
+      "epoch": 1.1466666666666667,
+      "grad_norm": 0.0771484375,
+      "learning_rate": 0.00012758936755270393,
+      "loss": 0.07189081907272339,
+      "mean_token_accuracy": 0.9741694211959839,
+      "num_tokens": 2493780.0,
+      "step": 430
+    },
+    {
+      "entropy": 0.07554319184273481,
+      "epoch": 1.1733333333333333,
+      "grad_norm": 0.126953125,
+      "learning_rate": 0.00012575618698441797,
+      "loss": 0.07339509725570678,
+      "mean_token_accuracy": 0.9734218120574951,
+      "num_tokens": 2551587.0,
+      "step": 440
+    },
+    {
+      "entropy": 0.0771414702758193,
+      "epoch": 1.2,
+      "grad_norm": 0.052490234375,
+      "learning_rate": 0.000123923006416132,
+      "loss": 0.07223436832427979,
+      "mean_token_accuracy": 0.9731230854988098,
+      "num_tokens": 2609738.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.07702515590935946,
+      "epoch": 1.2266666666666666,
+      "grad_norm": 0.05078125,
+      "learning_rate": 0.00012208982584784603,
+      "loss": 0.07126941084861756,
+      "mean_token_accuracy": 0.9743727937340736,
+      "num_tokens": 2667570.0,
+      "step": 460
+    },
+    {
+      "entropy": 0.0751312056556344,
+      "epoch": 1.2533333333333334,
+      "grad_norm": 0.052490234375,
+      "learning_rate": 0.00012025664527956005,
+      "loss": 0.07185030579566956,
+      "mean_token_accuracy": 0.9739165529608727,
+      "num_tokens": 2725220.0,
+      "step": 470
+    },
+    {
+      "entropy": 0.07488212268799543,
+      "epoch": 1.28,
+      "grad_norm": 0.0517578125,
+      "learning_rate": 0.00011842346471127406,
+      "loss": 0.07148469686508178,
+      "mean_token_accuracy": 0.973349143564701,
+      "num_tokens": 2782737.0,
+      "step": 480
+    },
+    {
+      "entropy": 0.07398118702694774,
+      "epoch": 1.3066666666666666,
+      "grad_norm": 0.044677734375,
+      "learning_rate": 0.00011659028414298809,
+      "loss": 0.06911076903343201,
+      "mean_token_accuracy": 0.9739873677492141,
+      "num_tokens": 2841536.0,
+      "step": 490
+    },
+    {
+      "entropy": 0.07522298116236925,
+      "epoch": 1.3333333333333333,
+      "grad_norm": 0.05908203125,
+      "learning_rate": 0.0001147571035747021,
+      "loss": 0.06964495182037353,
+      "mean_token_accuracy": 0.9739961415529251,
+      "num_tokens": 2899599.0,
+      "step": 500
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1125,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.3611705346695168e+17,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

consciousness/checkpoint-500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:64a765ba4e23e22cae3a67629a6fda1378e5af8f1478b2252bd6159d00e12541
+size 5649

consciousness/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
+size 17209920

consciousness/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "backend": "tokenizers",
+  "bos_token": "<|begin_of_text|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|eot_id|>",
+  "is_local": false,
+  "model_input_names": [
+    "input_ids",
+    "attention_mask"
+  ],
+  "model_max_length": 131072,
+  "pad_token": "<|eot_id|>",
+  "tokenizer_class": "TokenizersBackend"
+}