limajr commited on
Commit
e15164d
·
verified ·
1 Parent(s): e9ab503

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ model_name: dpo
4
+ tags:
5
+ - generated_from_trainer
6
+ - trl
7
+ - dpo
8
+ licence: license
9
+ ---
10
+
11
+ # Model Card for dpo
12
+
13
+ This model is a fine-tuned version of [None](https://huggingface.co/None).
14
+ It has been trained using [TRL](https://github.com/huggingface/trl).
15
+
16
+ ## Quick start
17
+
18
+ ```python
19
+ from transformers import pipeline
20
+
21
+ question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
22
+ generator = pipeline("text-generation", model="None", device="cuda")
23
+ output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
24
+ print(output["generated_text"])
25
+ ```
26
+
27
+ ## Training procedure
28
+
29
+
30
+
31
+
32
+ This model was trained with DPO, a method introduced in [Direct Preference Optimization: Your Language Model is Secretly a Reward Model](https://huggingface.co/papers/2305.18290).
33
+
34
+ ### Framework versions
35
+
36
+ - TRL: 0.26.2
37
+ - Transformers: 4.57.3
38
+ - Pytorch: 2.6.0+cu124
39
+ - Datasets: 4.4.2
40
+ - Tokenizers: 0.22.1
41
+
42
+ ## Citations
43
+
44
+ Cite DPO as:
45
+
46
+ ```bibtex
47
+ @inproceedings{rafailov2023direct,
48
+ title = {{Direct Preference Optimization: Your Language Model is Secretly a Reward Model}},
49
+ author = {Rafael Rafailov and Archit Sharma and Eric Mitchell and Christopher D. Manning and Stefano Ermon and Chelsea Finn},
50
+ year = 2023,
51
+ booktitle = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023},
52
+ url = {http://papers.nips.cc/paper_files/paper/2023/hash/a85b405ed65c6477a4fe8302b5e06ce7-Abstract-Conference.html},
53
+ editor = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine},
54
+ }
55
+ ```
56
+
57
+ Cite TRL as:
58
+
59
+ ```bibtex
60
+ @misc{vonwerra2022trl,
61
+ title = {{TRL: Transformer Reinforcement Learning}},
62
+ author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec},
63
+ year = 2020,
64
+ journal = {GitHub repository},
65
+ publisher = {GitHub},
66
+ howpublished = {\url{https://github.com/huggingface/trl}}
67
+ }
68
+ ```
checkpoint-100/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_comment": "NBR-500: ~500M par\u00e2metros para portugu\u00eas brasileiro",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "dtype": "bfloat16",
10
+ "eos_token_id": 2,
11
+ "head_dim": 64,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 1024,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 4096,
16
+ "max_position_embeddings": 2048,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "num_attention_heads": 16,
20
+ "num_hidden_layers": 24,
21
+ "num_key_value_heads": 16,
22
+ "pad_token_id": 2,
23
+ "pretraining_tp": 1,
24
+ "rms_norm_eps": 1e-05,
25
+ "rope_scaling": null,
26
+ "rope_theta": 10000.0,
27
+ "tie_word_embeddings": false,
28
+ "transformers_version": "4.57.3",
29
+ "use_cache": false,
30
+ "vocab_size": 32000
31
+ }
checkpoint-100/generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": [
5
+ 2
6
+ ],
7
+ "pad_token_id": 2,
8
+ "transformers_version": "4.57.3"
9
+ }
checkpoint-100/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a992f659b95ebdee2e0cb46d7b5fc233803353d14eef4ccf33a1dab5b438d4b1
3
+ size 936503664
checkpoint-100/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b11b3fa85bccc03d9bd12799fec6b94d0eec821dc7fa4874f5a95d795ea0aabd
3
+ size 1873142010
checkpoint-100/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:386fcc8cc1089aade9450d86fb239ea3483f455fd2d78d8378645feecfec9d69
3
+ size 14244
checkpoint-100/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aaed6329260e258afe9ac442743aa37f2be3d46edb6a545c5a761ef3faeeaa70
3
+ size 1064
checkpoint-100/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
checkpoint-100/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-100/tokenizer_config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<pad>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "3": {
30
+ "content": "<unk>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ }
37
+ },
38
+ "bos_token": "<s>",
39
+ "clean_up_tokenization_spaces": false,
40
+ "eos_token": "</s>",
41
+ "extra_special_tokens": {},
42
+ "model_max_length": 2048,
43
+ "pad_token": "</s>",
44
+ "tokenizer_class": "PreTrainedTokenizerFast",
45
+ "unk_token": "<unk>"
46
+ }
checkpoint-100/trainer_state.json ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.8839779005524862,
6
+ "eval_steps": 500,
7
+ "global_step": 100,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.08839779005524862,
14
+ "grad_norm": 13.0625,
15
+ "learning_rate": 4.605263157894737e-06,
16
+ "logits/chosen": -3.266218900680542,
17
+ "logits/rejected": -3.173306703567505,
18
+ "logps/chosen": -169.16183471679688,
19
+ "logps/rejected": -190.1395263671875,
20
+ "loss": 0.6924,
21
+ "rewards/accuracies": 0.48124998807907104,
22
+ "rewards/chosen": 0.0008526706951670349,
23
+ "rewards/margins": 0.0017210483783856034,
24
+ "rewards/rejected": -0.0008683774503879249,
25
+ "step": 10
26
+ },
27
+ {
28
+ "epoch": 0.17679558011049723,
29
+ "grad_norm": 12.625,
30
+ "learning_rate": 4.166666666666667e-06,
31
+ "logits/chosen": -3.263390302658081,
32
+ "logits/rejected": -3.2149460315704346,
33
+ "logps/chosen": -162.94313049316406,
34
+ "logps/rejected": -179.63369750976562,
35
+ "loss": 0.6946,
36
+ "rewards/accuracies": 0.4625000059604645,
37
+ "rewards/chosen": -0.0010867499513551593,
38
+ "rewards/margins": -0.002635319484397769,
39
+ "rewards/rejected": 0.0015485694166272879,
40
+ "step": 20
41
+ },
42
+ {
43
+ "epoch": 0.26519337016574585,
44
+ "grad_norm": 13.625,
45
+ "learning_rate": 3.728070175438597e-06,
46
+ "logits/chosen": -3.299316883087158,
47
+ "logits/rejected": -3.243040084838867,
48
+ "logps/chosen": -168.03802490234375,
49
+ "logps/rejected": -191.59165954589844,
50
+ "loss": 0.6908,
51
+ "rewards/accuracies": 0.5562499761581421,
52
+ "rewards/chosen": 0.005441132001578808,
53
+ "rewards/margins": 0.004950051195919514,
54
+ "rewards/rejected": 0.0004910803982056677,
55
+ "step": 30
56
+ },
57
+ {
58
+ "epoch": 0.35359116022099446,
59
+ "grad_norm": 12.8125,
60
+ "learning_rate": 3.289473684210527e-06,
61
+ "logits/chosen": -3.3205394744873047,
62
+ "logits/rejected": -3.2248268127441406,
63
+ "logps/chosen": -163.4181365966797,
64
+ "logps/rejected": -190.28494262695312,
65
+ "loss": 0.692,
66
+ "rewards/accuracies": 0.4937500059604645,
67
+ "rewards/chosen": 0.0033660412300378084,
68
+ "rewards/margins": 0.0026486157439649105,
69
+ "rewards/rejected": 0.0007174253696575761,
70
+ "step": 40
71
+ },
72
+ {
73
+ "epoch": 0.4419889502762431,
74
+ "grad_norm": 13.4375,
75
+ "learning_rate": 2.8508771929824565e-06,
76
+ "logits/chosen": -3.266845226287842,
77
+ "logits/rejected": -3.239501953125,
78
+ "logps/chosen": -165.8727569580078,
79
+ "logps/rejected": -180.970703125,
80
+ "loss": 0.69,
81
+ "rewards/accuracies": 0.5562499761581421,
82
+ "rewards/chosen": 0.002517760032787919,
83
+ "rewards/margins": 0.006597781088203192,
84
+ "rewards/rejected": -0.004080021288245916,
85
+ "step": 50
86
+ },
87
+ {
88
+ "epoch": 0.5303867403314917,
89
+ "grad_norm": 13.0,
90
+ "learning_rate": 2.412280701754386e-06,
91
+ "logits/chosen": -3.300370454788208,
92
+ "logits/rejected": -3.2336509227752686,
93
+ "logps/chosen": -166.43919372558594,
94
+ "logps/rejected": -177.72369384765625,
95
+ "loss": 0.6912,
96
+ "rewards/accuracies": 0.5249999761581421,
97
+ "rewards/chosen": 0.0010731505462899804,
98
+ "rewards/margins": 0.004260644782334566,
99
+ "rewards/rejected": -0.0031874938867986202,
100
+ "step": 60
101
+ },
102
+ {
103
+ "epoch": 0.6187845303867403,
104
+ "grad_norm": 13.1875,
105
+ "learning_rate": 1.973684210526316e-06,
106
+ "logits/chosen": -3.292241334915161,
107
+ "logits/rejected": -3.263917922973633,
108
+ "logps/chosen": -166.0662841796875,
109
+ "logps/rejected": -179.9429168701172,
110
+ "loss": 0.6886,
111
+ "rewards/accuracies": 0.550000011920929,
112
+ "rewards/chosen": 0.004152910318225622,
113
+ "rewards/margins": 0.009452776983380318,
114
+ "rewards/rejected": -0.005299866199493408,
115
+ "step": 70
116
+ },
117
+ {
118
+ "epoch": 0.7071823204419889,
119
+ "grad_norm": 13.1875,
120
+ "learning_rate": 1.5350877192982458e-06,
121
+ "logits/chosen": -3.2774970531463623,
122
+ "logits/rejected": -3.2063546180725098,
123
+ "logps/chosen": -166.04795837402344,
124
+ "logps/rejected": -189.38204956054688,
125
+ "loss": 0.6917,
126
+ "rewards/accuracies": 0.574999988079071,
127
+ "rewards/chosen": 0.002573251724243164,
128
+ "rewards/margins": 0.00316311651840806,
129
+ "rewards/rejected": -0.0005898644449189305,
130
+ "step": 80
131
+ },
132
+ {
133
+ "epoch": 0.7955801104972375,
134
+ "grad_norm": 11.75,
135
+ "learning_rate": 1.0964912280701756e-06,
136
+ "logits/chosen": -3.3311314582824707,
137
+ "logits/rejected": -3.2796638011932373,
138
+ "logps/chosen": -157.93948364257812,
139
+ "logps/rejected": -183.2423095703125,
140
+ "loss": 0.6909,
141
+ "rewards/accuracies": 0.5874999761581421,
142
+ "rewards/chosen": 0.002239528112113476,
143
+ "rewards/margins": 0.00483693415299058,
144
+ "rewards/rejected": -0.002597406040877104,
145
+ "step": 90
146
+ },
147
+ {
148
+ "epoch": 0.8839779005524862,
149
+ "grad_norm": 14.75,
150
+ "learning_rate": 6.578947368421053e-07,
151
+ "logits/chosen": -3.2761390209198,
152
+ "logits/rejected": -3.2539830207824707,
153
+ "logps/chosen": -178.083984375,
154
+ "logps/rejected": -185.00401306152344,
155
+ "loss": 0.6913,
156
+ "rewards/accuracies": 0.5625,
157
+ "rewards/chosen": 0.001332092098891735,
158
+ "rewards/margins": 0.004107826389372349,
159
+ "rewards/rejected": -0.0027757335919886827,
160
+ "step": 100
161
+ }
162
+ ],
163
+ "logging_steps": 10,
164
+ "max_steps": 114,
165
+ "num_input_tokens_seen": 0,
166
+ "num_train_epochs": 1,
167
+ "save_steps": 100,
168
+ "stateful_callbacks": {
169
+ "TrainerControl": {
170
+ "args": {
171
+ "should_epoch_stop": false,
172
+ "should_evaluate": false,
173
+ "should_log": false,
174
+ "should_save": true,
175
+ "should_training_stop": false
176
+ },
177
+ "attributes": {}
178
+ }
179
+ },
180
+ "total_flos": 0.0,
181
+ "train_batch_size": 2,
182
+ "trial_name": null,
183
+ "trial_params": null
184
+ }
checkpoint-100/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52ae26ca728b9c55a1db652b7133c7e2be09b11b9b63ff4c0d770c9a77cac8da
3
+ size 6328
checkpoint-114/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_comment": "NBR-500: ~500M par\u00e2metros para portugu\u00eas brasileiro",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "dtype": "bfloat16",
10
+ "eos_token_id": 2,
11
+ "head_dim": 64,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 1024,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 4096,
16
+ "max_position_embeddings": 2048,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "num_attention_heads": 16,
20
+ "num_hidden_layers": 24,
21
+ "num_key_value_heads": 16,
22
+ "pad_token_id": 2,
23
+ "pretraining_tp": 1,
24
+ "rms_norm_eps": 1e-05,
25
+ "rope_scaling": null,
26
+ "rope_theta": 10000.0,
27
+ "tie_word_embeddings": false,
28
+ "transformers_version": "4.57.3",
29
+ "use_cache": false,
30
+ "vocab_size": 32000
31
+ }
checkpoint-114/generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": [
5
+ 2
6
+ ],
7
+ "pad_token_id": 2,
8
+ "transformers_version": "4.57.3"
9
+ }
checkpoint-114/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcf10b593b7eb0fd168419507c47e04f486dd08b2eaa751599a696af802e75a3
3
+ size 936503664
checkpoint-114/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81e3ea8f3a9eeab81011ff7ee56184b2cce918dc116baf95cf5d9bf957767f79
3
+ size 1873142010
checkpoint-114/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:386fcc8cc1089aade9450d86fb239ea3483f455fd2d78d8378645feecfec9d69
3
+ size 14244
checkpoint-114/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e605dc9a220a2c38aa0574c6768d6a81a37743a7d5cfd9324ba647b6b06737c2
3
+ size 1064
checkpoint-114/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
checkpoint-114/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-114/tokenizer_config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<pad>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "3": {
30
+ "content": "<unk>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ }
37
+ },
38
+ "bos_token": "<s>",
39
+ "clean_up_tokenization_spaces": false,
40
+ "eos_token": "</s>",
41
+ "extra_special_tokens": {},
42
+ "model_max_length": 2048,
43
+ "pad_token": "</s>",
44
+ "tokenizer_class": "PreTrainedTokenizerFast",
45
+ "unk_token": "<unk>"
46
+ }
checkpoint-114/trainer_state.json ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 114,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.08839779005524862,
14
+ "grad_norm": 13.0625,
15
+ "learning_rate": 4.605263157894737e-06,
16
+ "logits/chosen": -3.266218900680542,
17
+ "logits/rejected": -3.173306703567505,
18
+ "logps/chosen": -169.16183471679688,
19
+ "logps/rejected": -190.1395263671875,
20
+ "loss": 0.6924,
21
+ "rewards/accuracies": 0.48124998807907104,
22
+ "rewards/chosen": 0.0008526706951670349,
23
+ "rewards/margins": 0.0017210483783856034,
24
+ "rewards/rejected": -0.0008683774503879249,
25
+ "step": 10
26
+ },
27
+ {
28
+ "epoch": 0.17679558011049723,
29
+ "grad_norm": 12.625,
30
+ "learning_rate": 4.166666666666667e-06,
31
+ "logits/chosen": -3.263390302658081,
32
+ "logits/rejected": -3.2149460315704346,
33
+ "logps/chosen": -162.94313049316406,
34
+ "logps/rejected": -179.63369750976562,
35
+ "loss": 0.6946,
36
+ "rewards/accuracies": 0.4625000059604645,
37
+ "rewards/chosen": -0.0010867499513551593,
38
+ "rewards/margins": -0.002635319484397769,
39
+ "rewards/rejected": 0.0015485694166272879,
40
+ "step": 20
41
+ },
42
+ {
43
+ "epoch": 0.26519337016574585,
44
+ "grad_norm": 13.625,
45
+ "learning_rate": 3.728070175438597e-06,
46
+ "logits/chosen": -3.299316883087158,
47
+ "logits/rejected": -3.243040084838867,
48
+ "logps/chosen": -168.03802490234375,
49
+ "logps/rejected": -191.59165954589844,
50
+ "loss": 0.6908,
51
+ "rewards/accuracies": 0.5562499761581421,
52
+ "rewards/chosen": 0.005441132001578808,
53
+ "rewards/margins": 0.004950051195919514,
54
+ "rewards/rejected": 0.0004910803982056677,
55
+ "step": 30
56
+ },
57
+ {
58
+ "epoch": 0.35359116022099446,
59
+ "grad_norm": 12.8125,
60
+ "learning_rate": 3.289473684210527e-06,
61
+ "logits/chosen": -3.3205394744873047,
62
+ "logits/rejected": -3.2248268127441406,
63
+ "logps/chosen": -163.4181365966797,
64
+ "logps/rejected": -190.28494262695312,
65
+ "loss": 0.692,
66
+ "rewards/accuracies": 0.4937500059604645,
67
+ "rewards/chosen": 0.0033660412300378084,
68
+ "rewards/margins": 0.0026486157439649105,
69
+ "rewards/rejected": 0.0007174253696575761,
70
+ "step": 40
71
+ },
72
+ {
73
+ "epoch": 0.4419889502762431,
74
+ "grad_norm": 13.4375,
75
+ "learning_rate": 2.8508771929824565e-06,
76
+ "logits/chosen": -3.266845226287842,
77
+ "logits/rejected": -3.239501953125,
78
+ "logps/chosen": -165.8727569580078,
79
+ "logps/rejected": -180.970703125,
80
+ "loss": 0.69,
81
+ "rewards/accuracies": 0.5562499761581421,
82
+ "rewards/chosen": 0.002517760032787919,
83
+ "rewards/margins": 0.006597781088203192,
84
+ "rewards/rejected": -0.004080021288245916,
85
+ "step": 50
86
+ },
87
+ {
88
+ "epoch": 0.5303867403314917,
89
+ "grad_norm": 13.0,
90
+ "learning_rate": 2.412280701754386e-06,
91
+ "logits/chosen": -3.300370454788208,
92
+ "logits/rejected": -3.2336509227752686,
93
+ "logps/chosen": -166.43919372558594,
94
+ "logps/rejected": -177.72369384765625,
95
+ "loss": 0.6912,
96
+ "rewards/accuracies": 0.5249999761581421,
97
+ "rewards/chosen": 0.0010731505462899804,
98
+ "rewards/margins": 0.004260644782334566,
99
+ "rewards/rejected": -0.0031874938867986202,
100
+ "step": 60
101
+ },
102
+ {
103
+ "epoch": 0.6187845303867403,
104
+ "grad_norm": 13.1875,
105
+ "learning_rate": 1.973684210526316e-06,
106
+ "logits/chosen": -3.292241334915161,
107
+ "logits/rejected": -3.263917922973633,
108
+ "logps/chosen": -166.0662841796875,
109
+ "logps/rejected": -179.9429168701172,
110
+ "loss": 0.6886,
111
+ "rewards/accuracies": 0.550000011920929,
112
+ "rewards/chosen": 0.004152910318225622,
113
+ "rewards/margins": 0.009452776983380318,
114
+ "rewards/rejected": -0.005299866199493408,
115
+ "step": 70
116
+ },
117
+ {
118
+ "epoch": 0.7071823204419889,
119
+ "grad_norm": 13.1875,
120
+ "learning_rate": 1.5350877192982458e-06,
121
+ "logits/chosen": -3.2774970531463623,
122
+ "logits/rejected": -3.2063546180725098,
123
+ "logps/chosen": -166.04795837402344,
124
+ "logps/rejected": -189.38204956054688,
125
+ "loss": 0.6917,
126
+ "rewards/accuracies": 0.574999988079071,
127
+ "rewards/chosen": 0.002573251724243164,
128
+ "rewards/margins": 0.00316311651840806,
129
+ "rewards/rejected": -0.0005898644449189305,
130
+ "step": 80
131
+ },
132
+ {
133
+ "epoch": 0.7955801104972375,
134
+ "grad_norm": 11.75,
135
+ "learning_rate": 1.0964912280701756e-06,
136
+ "logits/chosen": -3.3311314582824707,
137
+ "logits/rejected": -3.2796638011932373,
138
+ "logps/chosen": -157.93948364257812,
139
+ "logps/rejected": -183.2423095703125,
140
+ "loss": 0.6909,
141
+ "rewards/accuracies": 0.5874999761581421,
142
+ "rewards/chosen": 0.002239528112113476,
143
+ "rewards/margins": 0.00483693415299058,
144
+ "rewards/rejected": -0.002597406040877104,
145
+ "step": 90
146
+ },
147
+ {
148
+ "epoch": 0.8839779005524862,
149
+ "grad_norm": 14.75,
150
+ "learning_rate": 6.578947368421053e-07,
151
+ "logits/chosen": -3.2761390209198,
152
+ "logits/rejected": -3.2539830207824707,
153
+ "logps/chosen": -178.083984375,
154
+ "logps/rejected": -185.00401306152344,
155
+ "loss": 0.6913,
156
+ "rewards/accuracies": 0.5625,
157
+ "rewards/chosen": 0.001332092098891735,
158
+ "rewards/margins": 0.004107826389372349,
159
+ "rewards/rejected": -0.0027757335919886827,
160
+ "step": 100
161
+ },
162
+ {
163
+ "epoch": 0.9723756906077348,
164
+ "grad_norm": 13.3125,
165
+ "learning_rate": 2.192982456140351e-07,
166
+ "logits/chosen": -3.2679781913757324,
167
+ "logits/rejected": -3.1474175453186035,
168
+ "logps/chosen": -170.65786743164062,
169
+ "logps/rejected": -183.07583618164062,
170
+ "loss": 0.692,
171
+ "rewards/accuracies": 0.550000011920929,
172
+ "rewards/chosen": 0.001837458461523056,
173
+ "rewards/margins": 0.0027389838360249996,
174
+ "rewards/rejected": -0.0009015247924253345,
175
+ "step": 110
176
+ }
177
+ ],
178
+ "logging_steps": 10,
179
+ "max_steps": 114,
180
+ "num_input_tokens_seen": 0,
181
+ "num_train_epochs": 1,
182
+ "save_steps": 100,
183
+ "stateful_callbacks": {
184
+ "TrainerControl": {
185
+ "args": {
186
+ "should_epoch_stop": false,
187
+ "should_evaluate": false,
188
+ "should_log": false,
189
+ "should_save": true,
190
+ "should_training_stop": true
191
+ },
192
+ "attributes": {}
193
+ }
194
+ },
195
+ "total_flos": 0.0,
196
+ "train_batch_size": 2,
197
+ "trial_name": null,
198
+ "trial_params": null
199
+ }
checkpoint-114/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52ae26ca728b9c55a1db652b7133c7e2be09b11b9b63ff4c0d770c9a77cac8da
3
+ size 6328
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_comment": "NBR-500: ~500M par\u00e2metros para portugu\u00eas brasileiro",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "dtype": "bfloat16",
10
+ "eos_token_id": 2,
11
+ "head_dim": 64,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 1024,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 4096,
16
+ "max_position_embeddings": 2048,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "num_attention_heads": 16,
20
+ "num_hidden_layers": 24,
21
+ "num_key_value_heads": 16,
22
+ "pad_token_id": 2,
23
+ "pretraining_tp": 1,
24
+ "rms_norm_eps": 1e-05,
25
+ "rope_scaling": null,
26
+ "rope_theta": 10000.0,
27
+ "tie_word_embeddings": false,
28
+ "transformers_version": "4.57.3",
29
+ "use_cache": false,
30
+ "vocab_size": 32000
31
+ }
generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": [
5
+ 2
6
+ ],
7
+ "pad_token_id": 2,
8
+ "transformers_version": "4.57.3"
9
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcf10b593b7eb0fd168419507c47e04f486dd08b2eaa751599a696af802e75a3
3
+ size 936503664
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<pad>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "3": {
30
+ "content": "<unk>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ }
37
+ },
38
+ "bos_token": "<s>",
39
+ "clean_up_tokenization_spaces": false,
40
+ "eos_token": "</s>",
41
+ "extra_special_tokens": {},
42
+ "model_max_length": 2048,
43
+ "pad_token": "</s>",
44
+ "tokenizer_class": "PreTrainedTokenizerFast",
45
+ "unk_token": "<unk>"
46
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52ae26ca728b9c55a1db652b7133c7e2be09b11b9b63ff4c0d770c9a77cac8da
3
+ size 6328