Upload folder using huggingface_hub
Browse files- README.md +68 -0
- checkpoint-100/config.json +31 -0
- checkpoint-100/generation_config.json +9 -0
- checkpoint-100/model.safetensors +3 -0
- checkpoint-100/optimizer.pt +3 -0
- checkpoint-100/rng_state.pth +3 -0
- checkpoint-100/scheduler.pt +3 -0
- checkpoint-100/special_tokens_map.json +24 -0
- checkpoint-100/tokenizer.json +0 -0
- checkpoint-100/tokenizer_config.json +46 -0
- checkpoint-100/trainer_state.json +184 -0
- checkpoint-100/training_args.bin +3 -0
- checkpoint-114/config.json +31 -0
- checkpoint-114/generation_config.json +9 -0
- checkpoint-114/model.safetensors +3 -0
- checkpoint-114/optimizer.pt +3 -0
- checkpoint-114/rng_state.pth +3 -0
- checkpoint-114/scheduler.pt +3 -0
- checkpoint-114/special_tokens_map.json +24 -0
- checkpoint-114/tokenizer.json +0 -0
- checkpoint-114/tokenizer_config.json +46 -0
- checkpoint-114/trainer_state.json +199 -0
- checkpoint-114/training_args.bin +3 -0
- config.json +31 -0
- generation_config.json +9 -0
- model.safetensors +3 -0
- special_tokens_map.json +24 -0
- tokenizer.json +0 -0
- tokenizer_config.json +46 -0
- training_args.bin +3 -0
README.md
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
library_name: transformers
|
| 3 |
+
model_name: dpo
|
| 4 |
+
tags:
|
| 5 |
+
- generated_from_trainer
|
| 6 |
+
- trl
|
| 7 |
+
- dpo
|
| 8 |
+
licence: license
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
# Model Card for dpo
|
| 12 |
+
|
| 13 |
+
This model is a fine-tuned version of [None](https://huggingface.co/None).
|
| 14 |
+
It has been trained using [TRL](https://github.com/huggingface/trl).
|
| 15 |
+
|
| 16 |
+
## Quick start
|
| 17 |
+
|
| 18 |
+
```python
|
| 19 |
+
from transformers import pipeline
|
| 20 |
+
|
| 21 |
+
question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
|
| 22 |
+
generator = pipeline("text-generation", model="None", device="cuda")
|
| 23 |
+
output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
|
| 24 |
+
print(output["generated_text"])
|
| 25 |
+
```
|
| 26 |
+
|
| 27 |
+
## Training procedure
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
This model was trained with DPO, a method introduced in [Direct Preference Optimization: Your Language Model is Secretly a Reward Model](https://huggingface.co/papers/2305.18290).
|
| 33 |
+
|
| 34 |
+
### Framework versions
|
| 35 |
+
|
| 36 |
+
- TRL: 0.26.2
|
| 37 |
+
- Transformers: 4.57.3
|
| 38 |
+
- Pytorch: 2.6.0+cu124
|
| 39 |
+
- Datasets: 4.4.2
|
| 40 |
+
- Tokenizers: 0.22.1
|
| 41 |
+
|
| 42 |
+
## Citations
|
| 43 |
+
|
| 44 |
+
Cite DPO as:
|
| 45 |
+
|
| 46 |
+
```bibtex
|
| 47 |
+
@inproceedings{rafailov2023direct,
|
| 48 |
+
title = {{Direct Preference Optimization: Your Language Model is Secretly a Reward Model}},
|
| 49 |
+
author = {Rafael Rafailov and Archit Sharma and Eric Mitchell and Christopher D. Manning and Stefano Ermon and Chelsea Finn},
|
| 50 |
+
year = 2023,
|
| 51 |
+
booktitle = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023},
|
| 52 |
+
url = {http://papers.nips.cc/paper_files/paper/2023/hash/a85b405ed65c6477a4fe8302b5e06ce7-Abstract-Conference.html},
|
| 53 |
+
editor = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine},
|
| 54 |
+
}
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
Cite TRL as:
|
| 58 |
+
|
| 59 |
+
```bibtex
|
| 60 |
+
@misc{vonwerra2022trl,
|
| 61 |
+
title = {{TRL: Transformer Reinforcement Learning}},
|
| 62 |
+
author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec},
|
| 63 |
+
year = 2020,
|
| 64 |
+
journal = {GitHub repository},
|
| 65 |
+
publisher = {GitHub},
|
| 66 |
+
howpublished = {\url{https://github.com/huggingface/trl}}
|
| 67 |
+
}
|
| 68 |
+
```
|
checkpoint-100/config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_comment": "NBR-500: ~500M par\u00e2metros para portugu\u00eas brasileiro",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"LlamaForCausalLM"
|
| 5 |
+
],
|
| 6 |
+
"attention_bias": false,
|
| 7 |
+
"attention_dropout": 0.0,
|
| 8 |
+
"bos_token_id": 1,
|
| 9 |
+
"dtype": "bfloat16",
|
| 10 |
+
"eos_token_id": 2,
|
| 11 |
+
"head_dim": 64,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 1024,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 4096,
|
| 16 |
+
"max_position_embeddings": 2048,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"num_attention_heads": 16,
|
| 20 |
+
"num_hidden_layers": 24,
|
| 21 |
+
"num_key_value_heads": 16,
|
| 22 |
+
"pad_token_id": 2,
|
| 23 |
+
"pretraining_tp": 1,
|
| 24 |
+
"rms_norm_eps": 1e-05,
|
| 25 |
+
"rope_scaling": null,
|
| 26 |
+
"rope_theta": 10000.0,
|
| 27 |
+
"tie_word_embeddings": false,
|
| 28 |
+
"transformers_version": "4.57.3",
|
| 29 |
+
"use_cache": false,
|
| 30 |
+
"vocab_size": 32000
|
| 31 |
+
}
|
checkpoint-100/generation_config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"bos_token_id": 1,
|
| 4 |
+
"eos_token_id": [
|
| 5 |
+
2
|
| 6 |
+
],
|
| 7 |
+
"pad_token_id": 2,
|
| 8 |
+
"transformers_version": "4.57.3"
|
| 9 |
+
}
|
checkpoint-100/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a992f659b95ebdee2e0cb46d7b5fc233803353d14eef4ccf33a1dab5b438d4b1
|
| 3 |
+
size 936503664
|
checkpoint-100/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b11b3fa85bccc03d9bd12799fec6b94d0eec821dc7fa4874f5a95d795ea0aabd
|
| 3 |
+
size 1873142010
|
checkpoint-100/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:386fcc8cc1089aade9450d86fb239ea3483f455fd2d78d8378645feecfec9d69
|
| 3 |
+
size 14244
|
checkpoint-100/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aaed6329260e258afe9ac442743aa37f2be3d46edb6a545c5a761ef3faeeaa70
|
| 3 |
+
size 1064
|
checkpoint-100/special_tokens_map.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "</s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": "</s>",
|
| 17 |
+
"unk_token": {
|
| 18 |
+
"content": "<unk>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
}
|
| 24 |
+
}
|
checkpoint-100/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint-100/tokenizer_config.json
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": true,
|
| 3 |
+
"add_eos_token": false,
|
| 4 |
+
"added_tokens_decoder": {
|
| 5 |
+
"0": {
|
| 6 |
+
"content": "<pad>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false,
|
| 11 |
+
"special": true
|
| 12 |
+
},
|
| 13 |
+
"1": {
|
| 14 |
+
"content": "<s>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": false,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false,
|
| 19 |
+
"special": true
|
| 20 |
+
},
|
| 21 |
+
"2": {
|
| 22 |
+
"content": "</s>",
|
| 23 |
+
"lstrip": false,
|
| 24 |
+
"normalized": false,
|
| 25 |
+
"rstrip": false,
|
| 26 |
+
"single_word": false,
|
| 27 |
+
"special": true
|
| 28 |
+
},
|
| 29 |
+
"3": {
|
| 30 |
+
"content": "<unk>",
|
| 31 |
+
"lstrip": false,
|
| 32 |
+
"normalized": false,
|
| 33 |
+
"rstrip": false,
|
| 34 |
+
"single_word": false,
|
| 35 |
+
"special": true
|
| 36 |
+
}
|
| 37 |
+
},
|
| 38 |
+
"bos_token": "<s>",
|
| 39 |
+
"clean_up_tokenization_spaces": false,
|
| 40 |
+
"eos_token": "</s>",
|
| 41 |
+
"extra_special_tokens": {},
|
| 42 |
+
"model_max_length": 2048,
|
| 43 |
+
"pad_token": "</s>",
|
| 44 |
+
"tokenizer_class": "PreTrainedTokenizerFast",
|
| 45 |
+
"unk_token": "<unk>"
|
| 46 |
+
}
|
checkpoint-100/trainer_state.json
ADDED
|
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": null,
|
| 3 |
+
"best_metric": null,
|
| 4 |
+
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.8839779005524862,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 100,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.08839779005524862,
|
| 14 |
+
"grad_norm": 13.0625,
|
| 15 |
+
"learning_rate": 4.605263157894737e-06,
|
| 16 |
+
"logits/chosen": -3.266218900680542,
|
| 17 |
+
"logits/rejected": -3.173306703567505,
|
| 18 |
+
"logps/chosen": -169.16183471679688,
|
| 19 |
+
"logps/rejected": -190.1395263671875,
|
| 20 |
+
"loss": 0.6924,
|
| 21 |
+
"rewards/accuracies": 0.48124998807907104,
|
| 22 |
+
"rewards/chosen": 0.0008526706951670349,
|
| 23 |
+
"rewards/margins": 0.0017210483783856034,
|
| 24 |
+
"rewards/rejected": -0.0008683774503879249,
|
| 25 |
+
"step": 10
|
| 26 |
+
},
|
| 27 |
+
{
|
| 28 |
+
"epoch": 0.17679558011049723,
|
| 29 |
+
"grad_norm": 12.625,
|
| 30 |
+
"learning_rate": 4.166666666666667e-06,
|
| 31 |
+
"logits/chosen": -3.263390302658081,
|
| 32 |
+
"logits/rejected": -3.2149460315704346,
|
| 33 |
+
"logps/chosen": -162.94313049316406,
|
| 34 |
+
"logps/rejected": -179.63369750976562,
|
| 35 |
+
"loss": 0.6946,
|
| 36 |
+
"rewards/accuracies": 0.4625000059604645,
|
| 37 |
+
"rewards/chosen": -0.0010867499513551593,
|
| 38 |
+
"rewards/margins": -0.002635319484397769,
|
| 39 |
+
"rewards/rejected": 0.0015485694166272879,
|
| 40 |
+
"step": 20
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"epoch": 0.26519337016574585,
|
| 44 |
+
"grad_norm": 13.625,
|
| 45 |
+
"learning_rate": 3.728070175438597e-06,
|
| 46 |
+
"logits/chosen": -3.299316883087158,
|
| 47 |
+
"logits/rejected": -3.243040084838867,
|
| 48 |
+
"logps/chosen": -168.03802490234375,
|
| 49 |
+
"logps/rejected": -191.59165954589844,
|
| 50 |
+
"loss": 0.6908,
|
| 51 |
+
"rewards/accuracies": 0.5562499761581421,
|
| 52 |
+
"rewards/chosen": 0.005441132001578808,
|
| 53 |
+
"rewards/margins": 0.004950051195919514,
|
| 54 |
+
"rewards/rejected": 0.0004910803982056677,
|
| 55 |
+
"step": 30
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"epoch": 0.35359116022099446,
|
| 59 |
+
"grad_norm": 12.8125,
|
| 60 |
+
"learning_rate": 3.289473684210527e-06,
|
| 61 |
+
"logits/chosen": -3.3205394744873047,
|
| 62 |
+
"logits/rejected": -3.2248268127441406,
|
| 63 |
+
"logps/chosen": -163.4181365966797,
|
| 64 |
+
"logps/rejected": -190.28494262695312,
|
| 65 |
+
"loss": 0.692,
|
| 66 |
+
"rewards/accuracies": 0.4937500059604645,
|
| 67 |
+
"rewards/chosen": 0.0033660412300378084,
|
| 68 |
+
"rewards/margins": 0.0026486157439649105,
|
| 69 |
+
"rewards/rejected": 0.0007174253696575761,
|
| 70 |
+
"step": 40
|
| 71 |
+
},
|
| 72 |
+
{
|
| 73 |
+
"epoch": 0.4419889502762431,
|
| 74 |
+
"grad_norm": 13.4375,
|
| 75 |
+
"learning_rate": 2.8508771929824565e-06,
|
| 76 |
+
"logits/chosen": -3.266845226287842,
|
| 77 |
+
"logits/rejected": -3.239501953125,
|
| 78 |
+
"logps/chosen": -165.8727569580078,
|
| 79 |
+
"logps/rejected": -180.970703125,
|
| 80 |
+
"loss": 0.69,
|
| 81 |
+
"rewards/accuracies": 0.5562499761581421,
|
| 82 |
+
"rewards/chosen": 0.002517760032787919,
|
| 83 |
+
"rewards/margins": 0.006597781088203192,
|
| 84 |
+
"rewards/rejected": -0.004080021288245916,
|
| 85 |
+
"step": 50
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"epoch": 0.5303867403314917,
|
| 89 |
+
"grad_norm": 13.0,
|
| 90 |
+
"learning_rate": 2.412280701754386e-06,
|
| 91 |
+
"logits/chosen": -3.300370454788208,
|
| 92 |
+
"logits/rejected": -3.2336509227752686,
|
| 93 |
+
"logps/chosen": -166.43919372558594,
|
| 94 |
+
"logps/rejected": -177.72369384765625,
|
| 95 |
+
"loss": 0.6912,
|
| 96 |
+
"rewards/accuracies": 0.5249999761581421,
|
| 97 |
+
"rewards/chosen": 0.0010731505462899804,
|
| 98 |
+
"rewards/margins": 0.004260644782334566,
|
| 99 |
+
"rewards/rejected": -0.0031874938867986202,
|
| 100 |
+
"step": 60
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.6187845303867403,
|
| 104 |
+
"grad_norm": 13.1875,
|
| 105 |
+
"learning_rate": 1.973684210526316e-06,
|
| 106 |
+
"logits/chosen": -3.292241334915161,
|
| 107 |
+
"logits/rejected": -3.263917922973633,
|
| 108 |
+
"logps/chosen": -166.0662841796875,
|
| 109 |
+
"logps/rejected": -179.9429168701172,
|
| 110 |
+
"loss": 0.6886,
|
| 111 |
+
"rewards/accuracies": 0.550000011920929,
|
| 112 |
+
"rewards/chosen": 0.004152910318225622,
|
| 113 |
+
"rewards/margins": 0.009452776983380318,
|
| 114 |
+
"rewards/rejected": -0.005299866199493408,
|
| 115 |
+
"step": 70
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"epoch": 0.7071823204419889,
|
| 119 |
+
"grad_norm": 13.1875,
|
| 120 |
+
"learning_rate": 1.5350877192982458e-06,
|
| 121 |
+
"logits/chosen": -3.2774970531463623,
|
| 122 |
+
"logits/rejected": -3.2063546180725098,
|
| 123 |
+
"logps/chosen": -166.04795837402344,
|
| 124 |
+
"logps/rejected": -189.38204956054688,
|
| 125 |
+
"loss": 0.6917,
|
| 126 |
+
"rewards/accuracies": 0.574999988079071,
|
| 127 |
+
"rewards/chosen": 0.002573251724243164,
|
| 128 |
+
"rewards/margins": 0.00316311651840806,
|
| 129 |
+
"rewards/rejected": -0.0005898644449189305,
|
| 130 |
+
"step": 80
|
| 131 |
+
},
|
| 132 |
+
{
|
| 133 |
+
"epoch": 0.7955801104972375,
|
| 134 |
+
"grad_norm": 11.75,
|
| 135 |
+
"learning_rate": 1.0964912280701756e-06,
|
| 136 |
+
"logits/chosen": -3.3311314582824707,
|
| 137 |
+
"logits/rejected": -3.2796638011932373,
|
| 138 |
+
"logps/chosen": -157.93948364257812,
|
| 139 |
+
"logps/rejected": -183.2423095703125,
|
| 140 |
+
"loss": 0.6909,
|
| 141 |
+
"rewards/accuracies": 0.5874999761581421,
|
| 142 |
+
"rewards/chosen": 0.002239528112113476,
|
| 143 |
+
"rewards/margins": 0.00483693415299058,
|
| 144 |
+
"rewards/rejected": -0.002597406040877104,
|
| 145 |
+
"step": 90
|
| 146 |
+
},
|
| 147 |
+
{
|
| 148 |
+
"epoch": 0.8839779005524862,
|
| 149 |
+
"grad_norm": 14.75,
|
| 150 |
+
"learning_rate": 6.578947368421053e-07,
|
| 151 |
+
"logits/chosen": -3.2761390209198,
|
| 152 |
+
"logits/rejected": -3.2539830207824707,
|
| 153 |
+
"logps/chosen": -178.083984375,
|
| 154 |
+
"logps/rejected": -185.00401306152344,
|
| 155 |
+
"loss": 0.6913,
|
| 156 |
+
"rewards/accuracies": 0.5625,
|
| 157 |
+
"rewards/chosen": 0.001332092098891735,
|
| 158 |
+
"rewards/margins": 0.004107826389372349,
|
| 159 |
+
"rewards/rejected": -0.0027757335919886827,
|
| 160 |
+
"step": 100
|
| 161 |
+
}
|
| 162 |
+
],
|
| 163 |
+
"logging_steps": 10,
|
| 164 |
+
"max_steps": 114,
|
| 165 |
+
"num_input_tokens_seen": 0,
|
| 166 |
+
"num_train_epochs": 1,
|
| 167 |
+
"save_steps": 100,
|
| 168 |
+
"stateful_callbacks": {
|
| 169 |
+
"TrainerControl": {
|
| 170 |
+
"args": {
|
| 171 |
+
"should_epoch_stop": false,
|
| 172 |
+
"should_evaluate": false,
|
| 173 |
+
"should_log": false,
|
| 174 |
+
"should_save": true,
|
| 175 |
+
"should_training_stop": false
|
| 176 |
+
},
|
| 177 |
+
"attributes": {}
|
| 178 |
+
}
|
| 179 |
+
},
|
| 180 |
+
"total_flos": 0.0,
|
| 181 |
+
"train_batch_size": 2,
|
| 182 |
+
"trial_name": null,
|
| 183 |
+
"trial_params": null
|
| 184 |
+
}
|
checkpoint-100/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:52ae26ca728b9c55a1db652b7133c7e2be09b11b9b63ff4c0d770c9a77cac8da
|
| 3 |
+
size 6328
|
checkpoint-114/config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_comment": "NBR-500: ~500M par\u00e2metros para portugu\u00eas brasileiro",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"LlamaForCausalLM"
|
| 5 |
+
],
|
| 6 |
+
"attention_bias": false,
|
| 7 |
+
"attention_dropout": 0.0,
|
| 8 |
+
"bos_token_id": 1,
|
| 9 |
+
"dtype": "bfloat16",
|
| 10 |
+
"eos_token_id": 2,
|
| 11 |
+
"head_dim": 64,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 1024,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 4096,
|
| 16 |
+
"max_position_embeddings": 2048,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"num_attention_heads": 16,
|
| 20 |
+
"num_hidden_layers": 24,
|
| 21 |
+
"num_key_value_heads": 16,
|
| 22 |
+
"pad_token_id": 2,
|
| 23 |
+
"pretraining_tp": 1,
|
| 24 |
+
"rms_norm_eps": 1e-05,
|
| 25 |
+
"rope_scaling": null,
|
| 26 |
+
"rope_theta": 10000.0,
|
| 27 |
+
"tie_word_embeddings": false,
|
| 28 |
+
"transformers_version": "4.57.3",
|
| 29 |
+
"use_cache": false,
|
| 30 |
+
"vocab_size": 32000
|
| 31 |
+
}
|
checkpoint-114/generation_config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"bos_token_id": 1,
|
| 4 |
+
"eos_token_id": [
|
| 5 |
+
2
|
| 6 |
+
],
|
| 7 |
+
"pad_token_id": 2,
|
| 8 |
+
"transformers_version": "4.57.3"
|
| 9 |
+
}
|
checkpoint-114/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dcf10b593b7eb0fd168419507c47e04f486dd08b2eaa751599a696af802e75a3
|
| 3 |
+
size 936503664
|
checkpoint-114/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81e3ea8f3a9eeab81011ff7ee56184b2cce918dc116baf95cf5d9bf957767f79
|
| 3 |
+
size 1873142010
|
checkpoint-114/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:386fcc8cc1089aade9450d86fb239ea3483f455fd2d78d8378645feecfec9d69
|
| 3 |
+
size 14244
|
checkpoint-114/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e605dc9a220a2c38aa0574c6768d6a81a37743a7d5cfd9324ba647b6b06737c2
|
| 3 |
+
size 1064
|
checkpoint-114/special_tokens_map.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "</s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": "</s>",
|
| 17 |
+
"unk_token": {
|
| 18 |
+
"content": "<unk>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
}
|
| 24 |
+
}
|
checkpoint-114/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint-114/tokenizer_config.json
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": true,
|
| 3 |
+
"add_eos_token": false,
|
| 4 |
+
"added_tokens_decoder": {
|
| 5 |
+
"0": {
|
| 6 |
+
"content": "<pad>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false,
|
| 11 |
+
"special": true
|
| 12 |
+
},
|
| 13 |
+
"1": {
|
| 14 |
+
"content": "<s>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": false,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false,
|
| 19 |
+
"special": true
|
| 20 |
+
},
|
| 21 |
+
"2": {
|
| 22 |
+
"content": "</s>",
|
| 23 |
+
"lstrip": false,
|
| 24 |
+
"normalized": false,
|
| 25 |
+
"rstrip": false,
|
| 26 |
+
"single_word": false,
|
| 27 |
+
"special": true
|
| 28 |
+
},
|
| 29 |
+
"3": {
|
| 30 |
+
"content": "<unk>",
|
| 31 |
+
"lstrip": false,
|
| 32 |
+
"normalized": false,
|
| 33 |
+
"rstrip": false,
|
| 34 |
+
"single_word": false,
|
| 35 |
+
"special": true
|
| 36 |
+
}
|
| 37 |
+
},
|
| 38 |
+
"bos_token": "<s>",
|
| 39 |
+
"clean_up_tokenization_spaces": false,
|
| 40 |
+
"eos_token": "</s>",
|
| 41 |
+
"extra_special_tokens": {},
|
| 42 |
+
"model_max_length": 2048,
|
| 43 |
+
"pad_token": "</s>",
|
| 44 |
+
"tokenizer_class": "PreTrainedTokenizerFast",
|
| 45 |
+
"unk_token": "<unk>"
|
| 46 |
+
}
|
checkpoint-114/trainer_state.json
ADDED
|
@@ -0,0 +1,199 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": null,
|
| 3 |
+
"best_metric": null,
|
| 4 |
+
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 114,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.08839779005524862,
|
| 14 |
+
"grad_norm": 13.0625,
|
| 15 |
+
"learning_rate": 4.605263157894737e-06,
|
| 16 |
+
"logits/chosen": -3.266218900680542,
|
| 17 |
+
"logits/rejected": -3.173306703567505,
|
| 18 |
+
"logps/chosen": -169.16183471679688,
|
| 19 |
+
"logps/rejected": -190.1395263671875,
|
| 20 |
+
"loss": 0.6924,
|
| 21 |
+
"rewards/accuracies": 0.48124998807907104,
|
| 22 |
+
"rewards/chosen": 0.0008526706951670349,
|
| 23 |
+
"rewards/margins": 0.0017210483783856034,
|
| 24 |
+
"rewards/rejected": -0.0008683774503879249,
|
| 25 |
+
"step": 10
|
| 26 |
+
},
|
| 27 |
+
{
|
| 28 |
+
"epoch": 0.17679558011049723,
|
| 29 |
+
"grad_norm": 12.625,
|
| 30 |
+
"learning_rate": 4.166666666666667e-06,
|
| 31 |
+
"logits/chosen": -3.263390302658081,
|
| 32 |
+
"logits/rejected": -3.2149460315704346,
|
| 33 |
+
"logps/chosen": -162.94313049316406,
|
| 34 |
+
"logps/rejected": -179.63369750976562,
|
| 35 |
+
"loss": 0.6946,
|
| 36 |
+
"rewards/accuracies": 0.4625000059604645,
|
| 37 |
+
"rewards/chosen": -0.0010867499513551593,
|
| 38 |
+
"rewards/margins": -0.002635319484397769,
|
| 39 |
+
"rewards/rejected": 0.0015485694166272879,
|
| 40 |
+
"step": 20
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"epoch": 0.26519337016574585,
|
| 44 |
+
"grad_norm": 13.625,
|
| 45 |
+
"learning_rate": 3.728070175438597e-06,
|
| 46 |
+
"logits/chosen": -3.299316883087158,
|
| 47 |
+
"logits/rejected": -3.243040084838867,
|
| 48 |
+
"logps/chosen": -168.03802490234375,
|
| 49 |
+
"logps/rejected": -191.59165954589844,
|
| 50 |
+
"loss": 0.6908,
|
| 51 |
+
"rewards/accuracies": 0.5562499761581421,
|
| 52 |
+
"rewards/chosen": 0.005441132001578808,
|
| 53 |
+
"rewards/margins": 0.004950051195919514,
|
| 54 |
+
"rewards/rejected": 0.0004910803982056677,
|
| 55 |
+
"step": 30
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"epoch": 0.35359116022099446,
|
| 59 |
+
"grad_norm": 12.8125,
|
| 60 |
+
"learning_rate": 3.289473684210527e-06,
|
| 61 |
+
"logits/chosen": -3.3205394744873047,
|
| 62 |
+
"logits/rejected": -3.2248268127441406,
|
| 63 |
+
"logps/chosen": -163.4181365966797,
|
| 64 |
+
"logps/rejected": -190.28494262695312,
|
| 65 |
+
"loss": 0.692,
|
| 66 |
+
"rewards/accuracies": 0.4937500059604645,
|
| 67 |
+
"rewards/chosen": 0.0033660412300378084,
|
| 68 |
+
"rewards/margins": 0.0026486157439649105,
|
| 69 |
+
"rewards/rejected": 0.0007174253696575761,
|
| 70 |
+
"step": 40
|
| 71 |
+
},
|
| 72 |
+
{
|
| 73 |
+
"epoch": 0.4419889502762431,
|
| 74 |
+
"grad_norm": 13.4375,
|
| 75 |
+
"learning_rate": 2.8508771929824565e-06,
|
| 76 |
+
"logits/chosen": -3.266845226287842,
|
| 77 |
+
"logits/rejected": -3.239501953125,
|
| 78 |
+
"logps/chosen": -165.8727569580078,
|
| 79 |
+
"logps/rejected": -180.970703125,
|
| 80 |
+
"loss": 0.69,
|
| 81 |
+
"rewards/accuracies": 0.5562499761581421,
|
| 82 |
+
"rewards/chosen": 0.002517760032787919,
|
| 83 |
+
"rewards/margins": 0.006597781088203192,
|
| 84 |
+
"rewards/rejected": -0.004080021288245916,
|
| 85 |
+
"step": 50
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"epoch": 0.5303867403314917,
|
| 89 |
+
"grad_norm": 13.0,
|
| 90 |
+
"learning_rate": 2.412280701754386e-06,
|
| 91 |
+
"logits/chosen": -3.300370454788208,
|
| 92 |
+
"logits/rejected": -3.2336509227752686,
|
| 93 |
+
"logps/chosen": -166.43919372558594,
|
| 94 |
+
"logps/rejected": -177.72369384765625,
|
| 95 |
+
"loss": 0.6912,
|
| 96 |
+
"rewards/accuracies": 0.5249999761581421,
|
| 97 |
+
"rewards/chosen": 0.0010731505462899804,
|
| 98 |
+
"rewards/margins": 0.004260644782334566,
|
| 99 |
+
"rewards/rejected": -0.0031874938867986202,
|
| 100 |
+
"step": 60
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.6187845303867403,
|
| 104 |
+
"grad_norm": 13.1875,
|
| 105 |
+
"learning_rate": 1.973684210526316e-06,
|
| 106 |
+
"logits/chosen": -3.292241334915161,
|
| 107 |
+
"logits/rejected": -3.263917922973633,
|
| 108 |
+
"logps/chosen": -166.0662841796875,
|
| 109 |
+
"logps/rejected": -179.9429168701172,
|
| 110 |
+
"loss": 0.6886,
|
| 111 |
+
"rewards/accuracies": 0.550000011920929,
|
| 112 |
+
"rewards/chosen": 0.004152910318225622,
|
| 113 |
+
"rewards/margins": 0.009452776983380318,
|
| 114 |
+
"rewards/rejected": -0.005299866199493408,
|
| 115 |
+
"step": 70
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"epoch": 0.7071823204419889,
|
| 119 |
+
"grad_norm": 13.1875,
|
| 120 |
+
"learning_rate": 1.5350877192982458e-06,
|
| 121 |
+
"logits/chosen": -3.2774970531463623,
|
| 122 |
+
"logits/rejected": -3.2063546180725098,
|
| 123 |
+
"logps/chosen": -166.04795837402344,
|
| 124 |
+
"logps/rejected": -189.38204956054688,
|
| 125 |
+
"loss": 0.6917,
|
| 126 |
+
"rewards/accuracies": 0.574999988079071,
|
| 127 |
+
"rewards/chosen": 0.002573251724243164,
|
| 128 |
+
"rewards/margins": 0.00316311651840806,
|
| 129 |
+
"rewards/rejected": -0.0005898644449189305,
|
| 130 |
+
"step": 80
|
| 131 |
+
},
|
| 132 |
+
{
|
| 133 |
+
"epoch": 0.7955801104972375,
|
| 134 |
+
"grad_norm": 11.75,
|
| 135 |
+
"learning_rate": 1.0964912280701756e-06,
|
| 136 |
+
"logits/chosen": -3.3311314582824707,
|
| 137 |
+
"logits/rejected": -3.2796638011932373,
|
| 138 |
+
"logps/chosen": -157.93948364257812,
|
| 139 |
+
"logps/rejected": -183.2423095703125,
|
| 140 |
+
"loss": 0.6909,
|
| 141 |
+
"rewards/accuracies": 0.5874999761581421,
|
| 142 |
+
"rewards/chosen": 0.002239528112113476,
|
| 143 |
+
"rewards/margins": 0.00483693415299058,
|
| 144 |
+
"rewards/rejected": -0.002597406040877104,
|
| 145 |
+
"step": 90
|
| 146 |
+
},
|
| 147 |
+
{
|
| 148 |
+
"epoch": 0.8839779005524862,
|
| 149 |
+
"grad_norm": 14.75,
|
| 150 |
+
"learning_rate": 6.578947368421053e-07,
|
| 151 |
+
"logits/chosen": -3.2761390209198,
|
| 152 |
+
"logits/rejected": -3.2539830207824707,
|
| 153 |
+
"logps/chosen": -178.083984375,
|
| 154 |
+
"logps/rejected": -185.00401306152344,
|
| 155 |
+
"loss": 0.6913,
|
| 156 |
+
"rewards/accuracies": 0.5625,
|
| 157 |
+
"rewards/chosen": 0.001332092098891735,
|
| 158 |
+
"rewards/margins": 0.004107826389372349,
|
| 159 |
+
"rewards/rejected": -0.0027757335919886827,
|
| 160 |
+
"step": 100
|
| 161 |
+
},
|
| 162 |
+
{
|
| 163 |
+
"epoch": 0.9723756906077348,
|
| 164 |
+
"grad_norm": 13.3125,
|
| 165 |
+
"learning_rate": 2.192982456140351e-07,
|
| 166 |
+
"logits/chosen": -3.2679781913757324,
|
| 167 |
+
"logits/rejected": -3.1474175453186035,
|
| 168 |
+
"logps/chosen": -170.65786743164062,
|
| 169 |
+
"logps/rejected": -183.07583618164062,
|
| 170 |
+
"loss": 0.692,
|
| 171 |
+
"rewards/accuracies": 0.550000011920929,
|
| 172 |
+
"rewards/chosen": 0.001837458461523056,
|
| 173 |
+
"rewards/margins": 0.0027389838360249996,
|
| 174 |
+
"rewards/rejected": -0.0009015247924253345,
|
| 175 |
+
"step": 110
|
| 176 |
+
}
|
| 177 |
+
],
|
| 178 |
+
"logging_steps": 10,
|
| 179 |
+
"max_steps": 114,
|
| 180 |
+
"num_input_tokens_seen": 0,
|
| 181 |
+
"num_train_epochs": 1,
|
| 182 |
+
"save_steps": 100,
|
| 183 |
+
"stateful_callbacks": {
|
| 184 |
+
"TrainerControl": {
|
| 185 |
+
"args": {
|
| 186 |
+
"should_epoch_stop": false,
|
| 187 |
+
"should_evaluate": false,
|
| 188 |
+
"should_log": false,
|
| 189 |
+
"should_save": true,
|
| 190 |
+
"should_training_stop": true
|
| 191 |
+
},
|
| 192 |
+
"attributes": {}
|
| 193 |
+
}
|
| 194 |
+
},
|
| 195 |
+
"total_flos": 0.0,
|
| 196 |
+
"train_batch_size": 2,
|
| 197 |
+
"trial_name": null,
|
| 198 |
+
"trial_params": null
|
| 199 |
+
}
|
checkpoint-114/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:52ae26ca728b9c55a1db652b7133c7e2be09b11b9b63ff4c0d770c9a77cac8da
|
| 3 |
+
size 6328
|
config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_comment": "NBR-500: ~500M par\u00e2metros para portugu\u00eas brasileiro",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"LlamaForCausalLM"
|
| 5 |
+
],
|
| 6 |
+
"attention_bias": false,
|
| 7 |
+
"attention_dropout": 0.0,
|
| 8 |
+
"bos_token_id": 1,
|
| 9 |
+
"dtype": "bfloat16",
|
| 10 |
+
"eos_token_id": 2,
|
| 11 |
+
"head_dim": 64,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 1024,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 4096,
|
| 16 |
+
"max_position_embeddings": 2048,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"num_attention_heads": 16,
|
| 20 |
+
"num_hidden_layers": 24,
|
| 21 |
+
"num_key_value_heads": 16,
|
| 22 |
+
"pad_token_id": 2,
|
| 23 |
+
"pretraining_tp": 1,
|
| 24 |
+
"rms_norm_eps": 1e-05,
|
| 25 |
+
"rope_scaling": null,
|
| 26 |
+
"rope_theta": 10000.0,
|
| 27 |
+
"tie_word_embeddings": false,
|
| 28 |
+
"transformers_version": "4.57.3",
|
| 29 |
+
"use_cache": false,
|
| 30 |
+
"vocab_size": 32000
|
| 31 |
+
}
|
generation_config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"bos_token_id": 1,
|
| 4 |
+
"eos_token_id": [
|
| 5 |
+
2
|
| 6 |
+
],
|
| 7 |
+
"pad_token_id": 2,
|
| 8 |
+
"transformers_version": "4.57.3"
|
| 9 |
+
}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dcf10b593b7eb0fd168419507c47e04f486dd08b2eaa751599a696af802e75a3
|
| 3 |
+
size 936503664
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "</s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": "</s>",
|
| 17 |
+
"unk_token": {
|
| 18 |
+
"content": "<unk>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
}
|
| 24 |
+
}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": true,
|
| 3 |
+
"add_eos_token": false,
|
| 4 |
+
"added_tokens_decoder": {
|
| 5 |
+
"0": {
|
| 6 |
+
"content": "<pad>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false,
|
| 11 |
+
"special": true
|
| 12 |
+
},
|
| 13 |
+
"1": {
|
| 14 |
+
"content": "<s>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": false,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false,
|
| 19 |
+
"special": true
|
| 20 |
+
},
|
| 21 |
+
"2": {
|
| 22 |
+
"content": "</s>",
|
| 23 |
+
"lstrip": false,
|
| 24 |
+
"normalized": false,
|
| 25 |
+
"rstrip": false,
|
| 26 |
+
"single_word": false,
|
| 27 |
+
"special": true
|
| 28 |
+
},
|
| 29 |
+
"3": {
|
| 30 |
+
"content": "<unk>",
|
| 31 |
+
"lstrip": false,
|
| 32 |
+
"normalized": false,
|
| 33 |
+
"rstrip": false,
|
| 34 |
+
"single_word": false,
|
| 35 |
+
"special": true
|
| 36 |
+
}
|
| 37 |
+
},
|
| 38 |
+
"bos_token": "<s>",
|
| 39 |
+
"clean_up_tokenization_spaces": false,
|
| 40 |
+
"eos_token": "</s>",
|
| 41 |
+
"extra_special_tokens": {},
|
| 42 |
+
"model_max_length": 2048,
|
| 43 |
+
"pad_token": "</s>",
|
| 44 |
+
"tokenizer_class": "PreTrainedTokenizerFast",
|
| 45 |
+
"unk_token": "<unk>"
|
| 46 |
+
}
|
training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:52ae26ca728b9c55a1db652b7133c7e2be09b11b9b63ff4c0d770c9a77cac8da
|
| 3 |
+
size 6328
|