AndriLawrence commited on Nov 1, 2025

Commit

3c341b2

verified ·

1 Parent(s): 07ac6c5

[auto] Update adapter/

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

adapter/README.md +62 -0
adapter/adapter_config.json +42 -0
adapter/adapter_model.safetensors +3 -0
adapter/added_tokens.json +13 -0
adapter/chat_template.jinja +8 -0
adapter/checkpoint-119/README.md +209 -0
adapter/checkpoint-119/adapter_config.json +42 -0
adapter/checkpoint-119/adapter_model.safetensors +3 -0
adapter/checkpoint-119/added_tokens.json +13 -0
adapter/checkpoint-119/chat_template.jinja +8 -0
adapter/checkpoint-119/optimizer.pt +3 -0
adapter/checkpoint-119/rng_state.pth +3 -0
adapter/checkpoint-119/scaler.pt +3 -0
adapter/checkpoint-119/scheduler.pt +3 -0
adapter/checkpoint-119/special_tokens_map.json +30 -0
adapter/checkpoint-119/tokenizer.json +0 -0
adapter/checkpoint-119/tokenizer.model +3 -0
adapter/checkpoint-119/tokenizer_config.json +131 -0
adapter/checkpoint-119/trainer_state.json +54 -0
adapter/checkpoint-119/training_args.bin +3 -0
adapter/checkpoint-238/README.md +209 -0
adapter/checkpoint-238/adapter_config.json +42 -0
adapter/checkpoint-238/adapter_model.safetensors +3 -0
adapter/checkpoint-238/added_tokens.json +13 -0
adapter/checkpoint-238/chat_template.jinja +8 -0
adapter/checkpoint-238/optimizer.pt +3 -0
adapter/checkpoint-238/rng_state.pth +3 -0
adapter/checkpoint-238/scaler.pt +3 -0
adapter/checkpoint-238/scheduler.pt +3 -0
adapter/checkpoint-238/special_tokens_map.json +30 -0
adapter/checkpoint-238/tokenizer.json +0 -0
adapter/checkpoint-238/tokenizer.model +3 -0
adapter/checkpoint-238/tokenizer_config.json +131 -0
adapter/checkpoint-238/trainer_state.json +74 -0
adapter/checkpoint-238/training_args.bin +3 -0
adapter/checkpoint-357/README.md +209 -0
adapter/checkpoint-357/adapter_config.json +42 -0
adapter/checkpoint-357/adapter_model.safetensors +3 -0
adapter/checkpoint-357/added_tokens.json +13 -0
adapter/checkpoint-357/chat_template.jinja +8 -0
adapter/checkpoint-357/optimizer.pt +3 -0
adapter/checkpoint-357/rng_state.pth +3 -0
adapter/checkpoint-357/scaler.pt +3 -0
adapter/checkpoint-357/scheduler.pt +3 -0
adapter/checkpoint-357/special_tokens_map.json +30 -0
adapter/checkpoint-357/tokenizer.json +0 -0
adapter/checkpoint-357/tokenizer.model +3 -0
adapter/checkpoint-357/tokenizer_config.json +131 -0
adapter/checkpoint-357/trainer_state.json +104 -0
adapter/checkpoint-357/training_args.bin +3 -0

adapter/README.md ADDED Viewed

	@@ -0,0 +1,62 @@

+---
+base_model: microsoft/Phi-3-mini-4k-instruct
+library_name: peft
+model_name: adapter
+tags:
+- base_model:adapter:microsoft/Phi-3-mini-4k-instruct
+- lora
+- sft
+- transformers
+- trl
+licence: license
+pipeline_tag: text-generation
+---
+# Model Card for adapter
+This model is a fine-tuned version of [microsoft/Phi-3-mini-4k-instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct).
+It has been trained using [TRL](https://github.com/huggingface/trl).
+## Quick start
+```python
+from transformers import pipeline
+question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
+generator = pipeline("text-generation", model="None", device="cuda")
+output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
+print(output["generated_text"])
+```
+## Training procedure
+This model was trained with SFT.
+### Framework versions
+- PEFT 0.17.1
+- TRL: 0.24.0
+- Transformers: 4.57.1
+- Pytorch: 2.8.0+cu126
+- Datasets: 4.0.0
+- Tokenizers: 0.22.1
+## Citations
+Cite TRL as:
+```bibtex
+@misc{vonwerra2022trl,
+	title        = {{TRL: Transformer Reinforcement Learning}},
+	author       = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec},
+	year         = 2020,
+	journal      = {GitHub repository},
+	publisher    = {GitHub},
+	howpublished = {\url{https://github.com/huggingface/trl}}
+}
+```

adapter/adapter_config.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "microsoft/Phi-3-mini-4k-instruct",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 192,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 96,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "q_proj",
+    "gate_proj",
+    "o_proj",
+    "up_proj",
+    "down_proj",
+    "k_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

adapter/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a3d5d69ede43a495f9732237c17232e69e7ed7ef301473bb50c5510c9e2e7523
+size 213926712

adapter/added_tokens.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "<|assistant|>": 32001,
+  "<|endoftext|>": 32000,
+  "<|end|>": 32007,
+  "<|placeholder1|>": 32002,
+  "<|placeholder2|>": 32003,
+  "<|placeholder3|>": 32004,
+  "<|placeholder4|>": 32005,
+  "<|placeholder5|>": 32008,
+  "<|placeholder6|>": 32009,
+  "<|system|>": 32006,
+  "<|user|>": 32010
+}

adapter/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,8 @@

+{% for message in messages %}{% if message['role'] == 'system' %}{{'<|system|>
+' + message['content'] + '<|end|>
+'}}{% elif message['role'] == 'user' %}{{'<|user|>
+' + message['content'] + '<|end|>
+'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>
+' + message['content'] + '<|end|>
+'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>
+' }}{% else %}{{ eos_token }}{% endif %}

adapter/checkpoint-119/README.md ADDED Viewed

	@@ -0,0 +1,209 @@

+---
+base_model: microsoft/Phi-3-mini-4k-instruct
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:microsoft/Phi-3-mini-4k-instruct
+- lora
+- sft
+- transformers
+- trl
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.17.1

adapter/checkpoint-119/adapter_config.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "microsoft/Phi-3-mini-4k-instruct",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 192,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 96,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "q_proj",
+    "gate_proj",
+    "o_proj",
+    "up_proj",
+    "down_proj",
+    "k_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

adapter/checkpoint-119/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e83ed11637d6fbfc99cea8bf03da17b887409d7c6d5c2ef2858e5904085ac615
+size 213926712

adapter/checkpoint-119/added_tokens.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "<|assistant|>": 32001,
+  "<|endoftext|>": 32000,
+  "<|end|>": 32007,
+  "<|placeholder1|>": 32002,
+  "<|placeholder2|>": 32003,
+  "<|placeholder3|>": 32004,
+  "<|placeholder4|>": 32005,
+  "<|placeholder5|>": 32008,
+  "<|placeholder6|>": 32009,
+  "<|system|>": 32006,
+  "<|user|>": 32010
+}

adapter/checkpoint-119/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,8 @@

+{% for message in messages %}{% if message['role'] == 'system' %}{{'<|system|>
+' + message['content'] + '<|end|>
+'}}{% elif message['role'] == 'user' %}{{'<|user|>
+' + message['content'] + '<|end|>
+'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>
+' + message['content'] + '<|end|>
+'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>
+' }}{% else %}{{ eos_token }}{% endif %}

adapter/checkpoint-119/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd89bc1b43686d9d15e572b652042c946d062296a4cbf73071e227160168de1d
+size 427927627

adapter/checkpoint-119/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a6b53332fd42b2155076da130565f288d9e3de0f66568dc6d23d5246e323932
+size 14645

adapter/checkpoint-119/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:39c6df766dff8c38df284e1e007782618a42ba539ea56db9178f70fddf1dff8d
+size 1383

adapter/checkpoint-119/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9de59e668a0590547a148ffdfc8f1df58d7b661067fcf00ab8213c11a6a86fe3
+size 1465

adapter/checkpoint-119/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

adapter/checkpoint-119/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

adapter/checkpoint-119/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

adapter/checkpoint-119/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,131 @@

+{
+  "add_bos_token": false,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "32000": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32001": {
+      "content": "<|assistant|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32002": {
+      "content": "<|placeholder1|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32003": {
+      "content": "<|placeholder2|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32004": {
+      "content": "<|placeholder3|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32005": {
+      "content": "<|placeholder4|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32006": {
+      "content": "<|system|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32007": {
+      "content": "<|end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32008": {
+      "content": "<|placeholder5|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32009": {
+      "content": "<|placeholder6|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32010": {
+      "content": "<|user|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "extra_special_tokens": {},
+  "legacy": false,
+  "model_max_length": 4096,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "left",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

adapter/checkpoint-119/trainer_state.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 119,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.2289654366672038,
+      "epoch": 0.42016806722689076,
+      "grad_norm": 0.6204772591590881,
+      "learning_rate": 2.9115977529165132e-05,
+      "loss": 1.3159,
+      "mean_token_accuracy": 0.7482603192329407,
+      "num_tokens": 23880.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.23407331008464097,
+      "epoch": 0.8403361344537815,
+      "grad_norm": 0.5559602975845337,
+      "learning_rate": 2.5461164569035244e-05,
+      "loss": 0.2882,
+      "mean_token_accuracy": 0.9380387860536575,
+      "num_tokens": 46973.0,
+      "step": 100
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 357,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1252283765852160.0,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

adapter/checkpoint-119/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cbe7279a74cafd876acae2e314504077100f9c173b602acb7a5c3abf72f205af
+size 6225

adapter/checkpoint-238/README.md ADDED Viewed

	@@ -0,0 +1,209 @@

+---
+base_model: microsoft/Phi-3-mini-4k-instruct
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:microsoft/Phi-3-mini-4k-instruct
+- lora
+- sft
+- transformers
+- trl
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.17.1

adapter/checkpoint-238/adapter_config.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "microsoft/Phi-3-mini-4k-instruct",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 192,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 96,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "q_proj",
+    "gate_proj",
+    "o_proj",
+    "up_proj",
+    "down_proj",
+    "k_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

adapter/checkpoint-238/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e78b8069f801f2696a092e0641dea29f0e2922cfabb518715b631717adee1d4e
+size 213926712

adapter/checkpoint-238/added_tokens.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "<|assistant|>": 32001,
+  "<|endoftext|>": 32000,
+  "<|end|>": 32007,
+  "<|placeholder1|>": 32002,
+  "<|placeholder2|>": 32003,
+  "<|placeholder3|>": 32004,
+  "<|placeholder4|>": 32005,
+  "<|placeholder5|>": 32008,
+  "<|placeholder6|>": 32009,
+  "<|system|>": 32006,
+  "<|user|>": 32010
+}

adapter/checkpoint-238/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,8 @@

+{% for message in messages %}{% if message['role'] == 'system' %}{{'<|system|>
+' + message['content'] + '<|end|>
+'}}{% elif message['role'] == 'user' %}{{'<|user|>
+' + message['content'] + '<|end|>
+'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>
+' + message['content'] + '<|end|>
+'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>
+' }}{% else %}{{ eos_token }}{% endif %}

adapter/checkpoint-238/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e14513502fe2f15a74a032515912b9ab17f73264786ca8f8a3bbb5e7f442cfbf
+size 427927627

adapter/checkpoint-238/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ef62eff921bc97c7324c3ea0bdbaf59577c09cb1d37b851ca9b2bdc2e07a1952
+size 14645

adapter/checkpoint-238/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:13f7e91a3385cb740845b5b833c2fe09e67e4d93bbbf140a8170671b30608215
+size 1383

adapter/checkpoint-238/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0c9bbf4b8cde06dedb8d0080a7827e3582f2ea1194e18d147a698f636941a2a2
+size 1465

adapter/checkpoint-238/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

adapter/checkpoint-238/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

adapter/checkpoint-238/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

adapter/checkpoint-238/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,131 @@

+{
+  "add_bos_token": false,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "32000": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32001": {
+      "content": "<|assistant|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32002": {
+      "content": "<|placeholder1|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32003": {
+      "content": "<|placeholder2|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32004": {
+      "content": "<|placeholder3|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32005": {
+      "content": "<|placeholder4|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32006": {
+      "content": "<|system|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32007": {
+      "content": "<|end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32008": {
+      "content": "<|placeholder5|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32009": {
+      "content": "<|placeholder6|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32010": {
+      "content": "<|user|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "extra_special_tokens": {},
+  "legacy": false,
+  "model_max_length": 4096,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "left",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

adapter/checkpoint-238/trainer_state.json ADDED Viewed

	@@ -0,0 +1,74 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 238,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.2289654366672038,
+      "epoch": 0.42016806722689076,
+      "grad_norm": 0.6204772591590881,
+      "learning_rate": 2.9115977529165132e-05,
+      "loss": 1.3159,
+      "mean_token_accuracy": 0.7482603192329407,
+      "num_tokens": 23880.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.23407331008464097,
+      "epoch": 0.8403361344537815,
+      "grad_norm": 0.5559602975845337,
+      "learning_rate": 2.5461164569035244e-05,
+      "loss": 0.2882,
+      "mean_token_accuracy": 0.9380387860536575,
+      "num_tokens": 46973.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.1401429148344323,
+      "epoch": 1.2605042016806722,
+      "grad_norm": 1.0606675148010254,
+      "learning_rate": 1.9687036349108852e-05,
+      "loss": 0.1391,
+      "mean_token_accuracy": 0.960659795999527,
+      "num_tokens": 69681.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.11909225001931191,
+      "epoch": 1.680672268907563,
+      "grad_norm": 0.7996079325675964,
+      "learning_rate": 1.2963366843187444e-05,
+      "loss": 0.1028,
+      "mean_token_accuracy": 0.9667794328927993,
+      "num_tokens": 93399.0,
+      "step": 200
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 357,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2505655035924480.0,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

adapter/checkpoint-238/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cbe7279a74cafd876acae2e314504077100f9c173b602acb7a5c3abf72f205af
+size 6225

adapter/checkpoint-357/README.md ADDED Viewed

	@@ -0,0 +1,209 @@

+---
+base_model: microsoft/Phi-3-mini-4k-instruct
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:microsoft/Phi-3-mini-4k-instruct
+- lora
+- sft
+- transformers
+- trl
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.17.1

adapter/checkpoint-357/adapter_config.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "microsoft/Phi-3-mini-4k-instruct",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 192,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 96,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "q_proj",
+    "gate_proj",
+    "o_proj",
+    "up_proj",
+    "down_proj",
+    "k_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

adapter/checkpoint-357/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a3d5d69ede43a495f9732237c17232e69e7ed7ef301473bb50c5510c9e2e7523
+size 213926712

adapter/checkpoint-357/added_tokens.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "<|assistant|>": 32001,
+  "<|endoftext|>": 32000,
+  "<|end|>": 32007,
+  "<|placeholder1|>": 32002,
+  "<|placeholder2|>": 32003,
+  "<|placeholder3|>": 32004,
+  "<|placeholder4|>": 32005,
+  "<|placeholder5|>": 32008,
+  "<|placeholder6|>": 32009,
+  "<|system|>": 32006,
+  "<|user|>": 32010
+}

adapter/checkpoint-357/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,8 @@

+{% for message in messages %}{% if message['role'] == 'system' %}{{'<|system|>
+' + message['content'] + '<|end|>
+'}}{% elif message['role'] == 'user' %}{{'<|user|>
+' + message['content'] + '<|end|>
+'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>
+' + message['content'] + '<|end|>
+'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>
+' }}{% else %}{{ eos_token }}{% endif %}

adapter/checkpoint-357/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1d69522bebdb1c85a95bf5d45dd36c5e7ef220b2993de72ca28c5b10846d6553
+size 427927627

adapter/checkpoint-357/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:742d28a280d205ead0c96d4ac700da25876529d93e6688bfe9237e7c55c60d0b
+size 14645

adapter/checkpoint-357/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:238d2655fb1dcb33f9e8c2680c1a46d6d40181bfa7a6d21817a3567c922a5f96
+size 1383

adapter/checkpoint-357/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:496e375e8973aa520d9f111802da6e016006f91bb784d3d3d8d1f1cd3bd2c3d5
+size 1465

adapter/checkpoint-357/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

adapter/checkpoint-357/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

adapter/checkpoint-357/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

adapter/checkpoint-357/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,131 @@

+{
+  "add_bos_token": false,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "32000": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32001": {
+      "content": "<|assistant|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32002": {
+      "content": "<|placeholder1|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32003": {
+      "content": "<|placeholder2|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32004": {
+      "content": "<|placeholder3|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32005": {
+      "content": "<|placeholder4|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32006": {
+      "content": "<|system|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32007": {
+      "content": "<|end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32008": {
+      "content": "<|placeholder5|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32009": {
+      "content": "<|placeholder6|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32010": {
+      "content": "<|user|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "extra_special_tokens": {},
+  "legacy": false,
+  "model_max_length": 4096,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "left",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

adapter/checkpoint-357/trainer_state.json ADDED Viewed

	@@ -0,0 +1,104 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 357,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.2289654366672038,
+      "epoch": 0.42016806722689076,
+      "grad_norm": 0.6204772591590881,
+      "learning_rate": 2.9115977529165132e-05,
+      "loss": 1.3159,
+      "mean_token_accuracy": 0.7482603192329407,
+      "num_tokens": 23880.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.23407331008464097,
+      "epoch": 0.8403361344537815,
+      "grad_norm": 0.5559602975845337,
+      "learning_rate": 2.5461164569035244e-05,
+      "loss": 0.2882,
+      "mean_token_accuracy": 0.9380387860536575,
+      "num_tokens": 46973.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.1401429148344323,
+      "epoch": 1.2605042016806722,
+      "grad_norm": 1.0606675148010254,
+      "learning_rate": 1.9687036349108852e-05,
+      "loss": 0.1391,
+      "mean_token_accuracy": 0.960659795999527,
+      "num_tokens": 69681.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.11909225001931191,
+      "epoch": 1.680672268907563,
+      "grad_norm": 0.7996079325675964,
+      "learning_rate": 1.2963366843187444e-05,
+      "loss": 0.1028,
+      "mean_token_accuracy": 0.9667794328927993,
+      "num_tokens": 93399.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.07826383059844375,
+      "epoch": 2.100840336134454,
+      "grad_norm": 0.6310080289840698,
+      "learning_rate": 6.652296498607628e-06,
+      "loss": 0.0764,
+      "mean_token_accuracy": 0.9723758244514465,
+      "num_tokens": 116439.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.0685878100991249,
+      "epoch": 2.5210084033613445,
+      "grad_norm": 0.47998136281967163,
+      "learning_rate": 2.03237777467277e-06,
+      "loss": 0.0602,
+      "mean_token_accuracy": 0.9756964665651321,
+      "num_tokens": 139302.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.07313475079834461,
+      "epoch": 2.9411764705882355,
+      "grad_norm": 0.612311065196991,
+      "learning_rate": 3.9554675135183293e-08,
+      "loss": 0.0597,
+      "mean_token_accuracy": 0.9755178612470626,
+      "num_tokens": 162932.0,
+      "step": 350
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 357,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3757848176424960.0,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

adapter/checkpoint-357/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cbe7279a74cafd876acae2e314504077100f9c173b602acb7a5c3abf72f205af
+size 6225