Training in progress, epoch 1
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +4 -0
- config.json +35 -0
- model.safetensors +3 -0
- run-0/checkpoint-3032/config.json +35 -0
- run-0/checkpoint-3032/model.safetensors +3 -0
- run-0/checkpoint-3032/optimizer.pt +3 -0
- run-0/checkpoint-3032/rng_state.pth +3 -0
- run-0/checkpoint-3032/scaler.pt +3 -0
- run-0/checkpoint-3032/scheduler.pt +3 -0
- run-0/checkpoint-3032/sentencepiece.bpe.model +3 -0
- run-0/checkpoint-3032/special_tokens_map.json +15 -0
- run-0/checkpoint-3032/tokenizer.json +3 -0
- run-0/checkpoint-3032/tokenizer_config.json +55 -0
- run-0/checkpoint-3032/trainer_state.json +64 -0
- run-0/checkpoint-3032/training_args.bin +3 -0
- run-12/checkpoint-500/config.json +27 -0
- run-12/checkpoint-500/model.safetensors +3 -0
- run-12/checkpoint-500/optimizer.pt +3 -0
- run-12/checkpoint-500/rng_state.pth +3 -0
- run-12/checkpoint-500/scaler.pt +3 -0
- run-12/checkpoint-500/scheduler.pt +3 -0
- run-12/checkpoint-500/sentencepiece.bpe.model +3 -0
- run-12/checkpoint-500/special_tokens_map.json +15 -0
- run-12/checkpoint-500/tokenizer.json +3 -0
- run-12/checkpoint-500/tokenizer_config.json +55 -0
- run-12/checkpoint-500/trainer_state.json +208 -0
- run-12/checkpoint-500/training_args.bin +3 -0
- run-2/checkpoint-500/config.json +35 -0
- run-2/checkpoint-500/model.safetensors +3 -0
- run-2/checkpoint-500/optimizer.pt +3 -0
- run-2/checkpoint-500/rng_state.pth +3 -0
- run-2/checkpoint-500/scaler.pt +3 -0
- run-2/checkpoint-500/scheduler.pt +3 -0
- run-2/checkpoint-500/sentencepiece.bpe.model +3 -0
- run-2/checkpoint-500/special_tokens_map.json +15 -0
- run-2/checkpoint-500/tokenizer.json +3 -0
- run-2/checkpoint-500/tokenizer_config.json +55 -0
- run-2/checkpoint-500/trainer_state.json +208 -0
- run-2/checkpoint-500/training_args.bin +3 -0
- runs/May11_07-22-34_falcon-02/events.out.tfevents.1746944563.falcon-02.556678.0 +3 -0
- runs/May11_07-22-34_falcon-02/events.out.tfevents.1746944615.falcon-02.556678.1 +3 -0
- runs/May11_07-22-34_falcon-02/events.out.tfevents.1746944739.falcon-02.556678.2 +3 -0
- runs/May11_07-22-34_falcon-02/events.out.tfevents.1746944791.falcon-02.556678.3 +3 -0
- runs/May11_07-22-34_falcon-02/events.out.tfevents.1746944868.falcon-02.556678.4 +3 -0
- runs/May11_07-22-34_falcon-02/events.out.tfevents.1746944977.falcon-02.556678.5 +3 -0
- runs/May11_07-22-34_falcon-02/events.out.tfevents.1746945041.falcon-02.556678.6 +3 -0
- runs/May11_07-22-34_falcon-02/events.out.tfevents.1746945058.falcon-02.556678.7 +3 -0
- runs/May11_07-22-34_falcon-02/events.out.tfevents.1746945074.falcon-02.556678.8 +3 -0
- runs/May11_07-22-34_falcon-02/events.out.tfevents.1746945088.falcon-02.556678.9 +3 -0
- runs/May11_07-22-34_falcon-02/events.out.tfevents.1746945104.falcon-02.556678.10 +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
run-0/checkpoint-3032/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
run-12/checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
run-2/checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
config.json
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"XLMRobertaForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"bos_token_id": 0,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"eos_token_id": 2,
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_dropout_prob": 0.1,
|
| 11 |
+
"hidden_size": 1024,
|
| 12 |
+
"id2label": {
|
| 13 |
+
"0": "not sexist",
|
| 14 |
+
"1": "sexist"
|
| 15 |
+
},
|
| 16 |
+
"initializer_range": 0.02,
|
| 17 |
+
"intermediate_size": 4096,
|
| 18 |
+
"label2id": {
|
| 19 |
+
"not sexist": 0,
|
| 20 |
+
"sexist": 1
|
| 21 |
+
},
|
| 22 |
+
"layer_norm_eps": 1e-05,
|
| 23 |
+
"max_position_embeddings": 514,
|
| 24 |
+
"model_type": "xlm-roberta",
|
| 25 |
+
"num_attention_heads": 16,
|
| 26 |
+
"num_hidden_layers": 24,
|
| 27 |
+
"output_past": true,
|
| 28 |
+
"pad_token_id": 1,
|
| 29 |
+
"position_embedding_type": "absolute",
|
| 30 |
+
"torch_dtype": "float32",
|
| 31 |
+
"transformers_version": "4.51.3",
|
| 32 |
+
"type_vocab_size": 1,
|
| 33 |
+
"use_cache": true,
|
| 34 |
+
"vocab_size": 250002
|
| 35 |
+
}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8cc03c5173b360ca67ebd2dcab9a11b585688af8205dcaadbc304aeb34e07b2b
|
| 3 |
+
size 2239618672
|
run-0/checkpoint-3032/config.json
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"XLMRobertaForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"bos_token_id": 0,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"eos_token_id": 2,
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_dropout_prob": 0.1,
|
| 11 |
+
"hidden_size": 1024,
|
| 12 |
+
"id2label": {
|
| 13 |
+
"0": "not sexist",
|
| 14 |
+
"1": "sexist"
|
| 15 |
+
},
|
| 16 |
+
"initializer_range": 0.02,
|
| 17 |
+
"intermediate_size": 4096,
|
| 18 |
+
"label2id": {
|
| 19 |
+
"not sexist": 0,
|
| 20 |
+
"sexist": 1
|
| 21 |
+
},
|
| 22 |
+
"layer_norm_eps": 1e-05,
|
| 23 |
+
"max_position_embeddings": 514,
|
| 24 |
+
"model_type": "xlm-roberta",
|
| 25 |
+
"num_attention_heads": 16,
|
| 26 |
+
"num_hidden_layers": 24,
|
| 27 |
+
"output_past": true,
|
| 28 |
+
"pad_token_id": 1,
|
| 29 |
+
"position_embedding_type": "absolute",
|
| 30 |
+
"torch_dtype": "float32",
|
| 31 |
+
"transformers_version": "4.51.3",
|
| 32 |
+
"type_vocab_size": 1,
|
| 33 |
+
"use_cache": true,
|
| 34 |
+
"vocab_size": 250002
|
| 35 |
+
}
|
run-0/checkpoint-3032/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8cc03c5173b360ca67ebd2dcab9a11b585688af8205dcaadbc304aeb34e07b2b
|
| 3 |
+
size 2239618672
|
run-0/checkpoint-3032/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:462e34503d7ef75f6b00a1bbce572f5085a1c9b2a53a68173d8a2df1d49dc7fa
|
| 3 |
+
size 4352331
|
run-0/checkpoint-3032/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:43573145e13dc140110e6eab09890508e8b3cb7b8e476824893201bf02519e7c
|
| 3 |
+
size 14645
|
run-0/checkpoint-3032/scaler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:32721d3593cfb1ca3bdeee455f8bca855bf389eeeb500ad3da6bf1df546d6f0f
|
| 3 |
+
size 1383
|
run-0/checkpoint-3032/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a57d356c1a53e074e2470da1ef03af144caf6a442b86cdf421fb45dcd0dc87f3
|
| 3 |
+
size 1465
|
run-0/checkpoint-3032/sentencepiece.bpe.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
|
| 3 |
+
size 5069051
|
run-0/checkpoint-3032/special_tokens_map.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "<s>",
|
| 3 |
+
"cls_token": "<s>",
|
| 4 |
+
"eos_token": "</s>",
|
| 5 |
+
"mask_token": {
|
| 6 |
+
"content": "<mask>",
|
| 7 |
+
"lstrip": true,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false
|
| 11 |
+
},
|
| 12 |
+
"pad_token": "<pad>",
|
| 13 |
+
"sep_token": "</s>",
|
| 14 |
+
"unk_token": "<unk>"
|
| 15 |
+
}
|
run-0/checkpoint-3032/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c7a0e0871aad3996728f46860ee352dd6ec57264dae6319c75acba465f93d55
|
| 3 |
+
size 17082999
|
run-0/checkpoint-3032/tokenizer_config.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<s>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<pad>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "</s>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "<unk>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"250001": {
|
| 36 |
+
"content": "<mask>",
|
| 37 |
+
"lstrip": true,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"bos_token": "<s>",
|
| 45 |
+
"clean_up_tokenization_spaces": false,
|
| 46 |
+
"cls_token": "<s>",
|
| 47 |
+
"eos_token": "</s>",
|
| 48 |
+
"extra_special_tokens": {},
|
| 49 |
+
"mask_token": "<mask>",
|
| 50 |
+
"model_max_length": 512,
|
| 51 |
+
"pad_token": "<pad>",
|
| 52 |
+
"sep_token": "</s>",
|
| 53 |
+
"tokenizer_class": "XLMRobertaTokenizer",
|
| 54 |
+
"unk_token": "<unk>"
|
| 55 |
+
}
|
run-0/checkpoint-3032/trainer_state.json
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 3032,
|
| 3 |
+
"best_metric": 0.33899504600141545,
|
| 4 |
+
"best_model_checkpoint": "./xlm-all/run-0/checkpoint-3032",
|
| 5 |
+
"epoch": 1.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 3032,
|
| 8 |
+
"is_hyper_param_search": true,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 1.0,
|
| 14 |
+
"grad_norm": 9.779642105102539,
|
| 15 |
+
"learning_rate": 1.8643519897391662e-05,
|
| 16 |
+
"loss": 0.7018,
|
| 17 |
+
"step": 3032
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 1.0,
|
| 21 |
+
"eval_f1": 0.33899504600141545,
|
| 22 |
+
"eval_loss": 0.7100579142570496,
|
| 23 |
+
"eval_runtime": 10.182,
|
| 24 |
+
"eval_samples_per_second": 183.46,
|
| 25 |
+
"eval_steps_per_second": 45.865,
|
| 26 |
+
"step": 3032
|
| 27 |
+
}
|
| 28 |
+
],
|
| 29 |
+
"logging_steps": 500,
|
| 30 |
+
"max_steps": 24256,
|
| 31 |
+
"num_input_tokens_seen": 0,
|
| 32 |
+
"num_train_epochs": 8,
|
| 33 |
+
"save_steps": 500,
|
| 34 |
+
"stateful_callbacks": {
|
| 35 |
+
"EarlyStoppingCallback": {
|
| 36 |
+
"args": {
|
| 37 |
+
"early_stopping_patience": 3,
|
| 38 |
+
"early_stopping_threshold": 0.0
|
| 39 |
+
},
|
| 40 |
+
"attributes": {
|
| 41 |
+
"early_stopping_patience_counter": 0
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"TrainerControl": {
|
| 45 |
+
"args": {
|
| 46 |
+
"should_epoch_stop": false,
|
| 47 |
+
"should_evaluate": false,
|
| 48 |
+
"should_log": false,
|
| 49 |
+
"should_save": true,
|
| 50 |
+
"should_training_stop": false
|
| 51 |
+
},
|
| 52 |
+
"attributes": {}
|
| 53 |
+
}
|
| 54 |
+
},
|
| 55 |
+
"total_flos": 4238423840415744.0,
|
| 56 |
+
"train_batch_size": 1,
|
| 57 |
+
"trial_name": null,
|
| 58 |
+
"trial_params": {
|
| 59 |
+
"learning_rate": 2.1300858155022713e-05,
|
| 60 |
+
"num_train_epochs": 8,
|
| 61 |
+
"per_device_train_batch_size": 1,
|
| 62 |
+
"weight_decay": 0.01912682401845879
|
| 63 |
+
}
|
| 64 |
+
}
|
run-0/checkpoint-3032/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:363fa8cab07af263bfe2ce4fc5547f865fc88e22d75a7d7f77858446d6d2fdb9
|
| 3 |
+
size 5713
|
run-12/checkpoint-500/config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"XLMRobertaForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"bos_token_id": 0,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"eos_token_id": 2,
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_dropout_prob": 0.1,
|
| 11 |
+
"hidden_size": 1024,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 4096,
|
| 14 |
+
"layer_norm_eps": 1e-05,
|
| 15 |
+
"max_position_embeddings": 514,
|
| 16 |
+
"model_type": "xlm-roberta",
|
| 17 |
+
"num_attention_heads": 16,
|
| 18 |
+
"num_hidden_layers": 24,
|
| 19 |
+
"output_past": true,
|
| 20 |
+
"pad_token_id": 1,
|
| 21 |
+
"position_embedding_type": "absolute",
|
| 22 |
+
"torch_dtype": "float32",
|
| 23 |
+
"transformers_version": "4.51.3",
|
| 24 |
+
"type_vocab_size": 1,
|
| 25 |
+
"use_cache": true,
|
| 26 |
+
"vocab_size": 250002
|
| 27 |
+
}
|
run-12/checkpoint-500/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fd0bb97a408c6e2bc59d600861bab0584cdd19abfef5783fbaf60e1d06cb4d43
|
| 3 |
+
size 2239618672
|
run-12/checkpoint-500/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f5b929d3708018d81a4ec578e1e32e6aad9658e6202f07047478cc4e36f1a98
|
| 3 |
+
size 4479478370
|
run-12/checkpoint-500/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f224bd7745d146fc6fd23dd483ea1bbc47ebe0edc32b876a06e77a6f6958544
|
| 3 |
+
size 14645
|
run-12/checkpoint-500/scaler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4fbfec9c1604d855c11bf7fee15bf45b9bde326223f67131018d8f94ee0c4416
|
| 3 |
+
size 1383
|
run-12/checkpoint-500/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8cba22734b9c47917b883d00be31ed32bbdea045ddbcb206acccd37ff1e77108
|
| 3 |
+
size 1465
|
run-12/checkpoint-500/sentencepiece.bpe.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
|
| 3 |
+
size 5069051
|
run-12/checkpoint-500/special_tokens_map.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "<s>",
|
| 3 |
+
"cls_token": "<s>",
|
| 4 |
+
"eos_token": "</s>",
|
| 5 |
+
"mask_token": {
|
| 6 |
+
"content": "<mask>",
|
| 7 |
+
"lstrip": true,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false
|
| 11 |
+
},
|
| 12 |
+
"pad_token": "<pad>",
|
| 13 |
+
"sep_token": "</s>",
|
| 14 |
+
"unk_token": "<unk>"
|
| 15 |
+
}
|
run-12/checkpoint-500/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c7a0e0871aad3996728f46860ee352dd6ec57264dae6319c75acba465f93d55
|
| 3 |
+
size 17082999
|
run-12/checkpoint-500/tokenizer_config.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<s>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<pad>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "</s>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "<unk>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"250001": {
|
| 36 |
+
"content": "<mask>",
|
| 37 |
+
"lstrip": true,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"bos_token": "<s>",
|
| 45 |
+
"clean_up_tokenization_spaces": false,
|
| 46 |
+
"cls_token": "<s>",
|
| 47 |
+
"eos_token": "</s>",
|
| 48 |
+
"extra_special_tokens": {},
|
| 49 |
+
"mask_token": "<mask>",
|
| 50 |
+
"model_max_length": 512,
|
| 51 |
+
"pad_token": "<pad>",
|
| 52 |
+
"sep_token": "</s>",
|
| 53 |
+
"tokenizer_class": "XLMRobertaTokenizer",
|
| 54 |
+
"unk_token": "<unk>"
|
| 55 |
+
}
|
run-12/checkpoint-500/trainer_state.json
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 450,
|
| 3 |
+
"best_metric": 0.8391053291622452,
|
| 4 |
+
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.32981530343007914,
|
| 6 |
+
"eval_steps": 50,
|
| 7 |
+
"global_step": 500,
|
| 8 |
+
"is_hyper_param_search": true,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.032981530343007916,
|
| 14 |
+
"grad_norm": 15.073063850402832,
|
| 15 |
+
"learning_rate": 1.0481442147752927e-05,
|
| 16 |
+
"loss": 0.7017,
|
| 17 |
+
"step": 50
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 0.032981530343007916,
|
| 21 |
+
"eval_f1": 0.5537185581303228,
|
| 22 |
+
"eval_loss": 0.6697196364402771,
|
| 23 |
+
"eval_runtime": 4.8576,
|
| 24 |
+
"eval_samples_per_second": 384.551,
|
| 25 |
+
"eval_steps_per_second": 24.086,
|
| 26 |
+
"step": 50
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"epoch": 0.06596306068601583,
|
| 30 |
+
"grad_norm": 12.232400894165039,
|
| 31 |
+
"learning_rate": 1.0367361591280272e-05,
|
| 32 |
+
"loss": 0.6578,
|
| 33 |
+
"step": 100
|
| 34 |
+
},
|
| 35 |
+
{
|
| 36 |
+
"epoch": 0.06596306068601583,
|
| 37 |
+
"eval_f1": 0.6990423298725792,
|
| 38 |
+
"eval_loss": 0.6165490746498108,
|
| 39 |
+
"eval_runtime": 4.8456,
|
| 40 |
+
"eval_samples_per_second": 385.501,
|
| 41 |
+
"eval_steps_per_second": 24.145,
|
| 42 |
+
"step": 100
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"epoch": 0.09894459102902374,
|
| 46 |
+
"grad_norm": 3.7829582691192627,
|
| 47 |
+
"learning_rate": 1.0250952860185725e-05,
|
| 48 |
+
"loss": 0.6224,
|
| 49 |
+
"step": 150
|
| 50 |
+
},
|
| 51 |
+
{
|
| 52 |
+
"epoch": 0.09894459102902374,
|
| 53 |
+
"eval_f1": 0.7200410324019838,
|
| 54 |
+
"eval_loss": 0.5773984789848328,
|
| 55 |
+
"eval_runtime": 4.846,
|
| 56 |
+
"eval_samples_per_second": 385.472,
|
| 57 |
+
"eval_steps_per_second": 24.144,
|
| 58 |
+
"step": 150
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.13192612137203166,
|
| 62 |
+
"grad_norm": 10.99559497833252,
|
| 63 |
+
"learning_rate": 1.013687230371307e-05,
|
| 64 |
+
"loss": 0.5958,
|
| 65 |
+
"step": 200
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.13192612137203166,
|
| 69 |
+
"eval_f1": 0.7867794893545026,
|
| 70 |
+
"eval_loss": 0.5273851156234741,
|
| 71 |
+
"eval_runtime": 4.8539,
|
| 72 |
+
"eval_samples_per_second": 384.841,
|
| 73 |
+
"eval_steps_per_second": 24.104,
|
| 74 |
+
"step": 200
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"epoch": 0.16490765171503957,
|
| 78 |
+
"grad_norm": 89.5782699584961,
|
| 79 |
+
"learning_rate": 1.0020463572618525e-05,
|
| 80 |
+
"loss": 0.5523,
|
| 81 |
+
"step": 250
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"epoch": 0.16490765171503957,
|
| 85 |
+
"eval_f1": 0.8000186185067958,
|
| 86 |
+
"eval_loss": 0.5776695609092712,
|
| 87 |
+
"eval_runtime": 4.8517,
|
| 88 |
+
"eval_samples_per_second": 385.021,
|
| 89 |
+
"eval_steps_per_second": 24.115,
|
| 90 |
+
"step": 250
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"epoch": 0.19788918205804748,
|
| 94 |
+
"grad_norm": 25.763916015625,
|
| 95 |
+
"learning_rate": 9.906383016145868e-06,
|
| 96 |
+
"loss": 0.5392,
|
| 97 |
+
"step": 300
|
| 98 |
+
},
|
| 99 |
+
{
|
| 100 |
+
"epoch": 0.19788918205804748,
|
| 101 |
+
"eval_f1": 0.7131010817115004,
|
| 102 |
+
"eval_loss": 0.67691570520401,
|
| 103 |
+
"eval_runtime": 4.8697,
|
| 104 |
+
"eval_samples_per_second": 383.594,
|
| 105 |
+
"eval_steps_per_second": 24.026,
|
| 106 |
+
"step": 300
|
| 107 |
+
},
|
| 108 |
+
{
|
| 109 |
+
"epoch": 0.23087071240105542,
|
| 110 |
+
"grad_norm": 15.219924926757812,
|
| 111 |
+
"learning_rate": 9.789974285051322e-06,
|
| 112 |
+
"loss": 0.5264,
|
| 113 |
+
"step": 350
|
| 114 |
+
},
|
| 115 |
+
{
|
| 116 |
+
"epoch": 0.23087071240105542,
|
| 117 |
+
"eval_f1": 0.8238222304011777,
|
| 118 |
+
"eval_loss": 0.47358641028404236,
|
| 119 |
+
"eval_runtime": 4.8732,
|
| 120 |
+
"eval_samples_per_second": 383.324,
|
| 121 |
+
"eval_steps_per_second": 24.009,
|
| 122 |
+
"step": 350
|
| 123 |
+
},
|
| 124 |
+
{
|
| 125 |
+
"epoch": 0.2638522427440633,
|
| 126 |
+
"grad_norm": 86.06718444824219,
|
| 127 |
+
"learning_rate": 9.673565553956777e-06,
|
| 128 |
+
"loss": 0.5024,
|
| 129 |
+
"step": 400
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"epoch": 0.2638522427440633,
|
| 133 |
+
"eval_f1": 0.8296470392671125,
|
| 134 |
+
"eval_loss": 0.5718086957931519,
|
| 135 |
+
"eval_runtime": 4.8555,
|
| 136 |
+
"eval_samples_per_second": 384.715,
|
| 137 |
+
"eval_steps_per_second": 24.096,
|
| 138 |
+
"step": 400
|
| 139 |
+
},
|
| 140 |
+
{
|
| 141 |
+
"epoch": 0.29683377308707126,
|
| 142 |
+
"grad_norm": 33.91449737548828,
|
| 143 |
+
"learning_rate": 9.55715682286223e-06,
|
| 144 |
+
"loss": 0.4662,
|
| 145 |
+
"step": 450
|
| 146 |
+
},
|
| 147 |
+
{
|
| 148 |
+
"epoch": 0.29683377308707126,
|
| 149 |
+
"eval_f1": 0.8391053291622452,
|
| 150 |
+
"eval_loss": 0.46681010723114014,
|
| 151 |
+
"eval_runtime": 4.8544,
|
| 152 |
+
"eval_samples_per_second": 384.808,
|
| 153 |
+
"eval_steps_per_second": 24.102,
|
| 154 |
+
"step": 450
|
| 155 |
+
},
|
| 156 |
+
{
|
| 157 |
+
"epoch": 0.32981530343007914,
|
| 158 |
+
"grad_norm": 24.058568954467773,
|
| 159 |
+
"learning_rate": 9.440748091767686e-06,
|
| 160 |
+
"loss": 0.5321,
|
| 161 |
+
"step": 500
|
| 162 |
+
},
|
| 163 |
+
{
|
| 164 |
+
"epoch": 0.32981530343007914,
|
| 165 |
+
"eval_f1": 0.8283790177730616,
|
| 166 |
+
"eval_loss": 0.5165355801582336,
|
| 167 |
+
"eval_runtime": 4.8573,
|
| 168 |
+
"eval_samples_per_second": 384.579,
|
| 169 |
+
"eval_steps_per_second": 24.088,
|
| 170 |
+
"step": 500
|
| 171 |
+
}
|
| 172 |
+
],
|
| 173 |
+
"logging_steps": 50,
|
| 174 |
+
"max_steps": 4548,
|
| 175 |
+
"num_input_tokens_seen": 0,
|
| 176 |
+
"num_train_epochs": 3,
|
| 177 |
+
"save_steps": 500,
|
| 178 |
+
"stateful_callbacks": {
|
| 179 |
+
"EarlyStoppingCallback": {
|
| 180 |
+
"args": {
|
| 181 |
+
"early_stopping_patience": 3,
|
| 182 |
+
"early_stopping_threshold": 0.0
|
| 183 |
+
},
|
| 184 |
+
"attributes": {
|
| 185 |
+
"early_stopping_patience_counter": 1
|
| 186 |
+
}
|
| 187 |
+
},
|
| 188 |
+
"TrainerControl": {
|
| 189 |
+
"args": {
|
| 190 |
+
"should_epoch_stop": false,
|
| 191 |
+
"should_evaluate": false,
|
| 192 |
+
"should_log": false,
|
| 193 |
+
"should_save": true,
|
| 194 |
+
"should_training_stop": false
|
| 195 |
+
},
|
| 196 |
+
"attributes": {}
|
| 197 |
+
}
|
| 198 |
+
},
|
| 199 |
+
"total_flos": 1397897044992000.0,
|
| 200 |
+
"train_batch_size": 8,
|
| 201 |
+
"trial_name": null,
|
| 202 |
+
"trial_params": {
|
| 203 |
+
"learning_rate": 1.0588538180359909e-05,
|
| 204 |
+
"num_train_epochs": 3,
|
| 205 |
+
"per_device_train_batch_size": 8,
|
| 206 |
+
"weight_decay": 0.03407948058217602
|
| 207 |
+
}
|
| 208 |
+
}
|
run-12/checkpoint-500/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c296ab74dc3a225baa76195c87e77c70aae20136b09890596615bc64e19d627b
|
| 3 |
+
size 5777
|
run-2/checkpoint-500/config.json
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"XLMRobertaForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"bos_token_id": 0,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"eos_token_id": 2,
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_dropout_prob": 0.1,
|
| 11 |
+
"hidden_size": 1024,
|
| 12 |
+
"id2label": {
|
| 13 |
+
"0": "not sexist",
|
| 14 |
+
"1": "sexist"
|
| 15 |
+
},
|
| 16 |
+
"initializer_range": 0.02,
|
| 17 |
+
"intermediate_size": 4096,
|
| 18 |
+
"label2id": {
|
| 19 |
+
"not sexist": 0,
|
| 20 |
+
"sexist": 1
|
| 21 |
+
},
|
| 22 |
+
"layer_norm_eps": 1e-05,
|
| 23 |
+
"max_position_embeddings": 514,
|
| 24 |
+
"model_type": "xlm-roberta",
|
| 25 |
+
"num_attention_heads": 16,
|
| 26 |
+
"num_hidden_layers": 24,
|
| 27 |
+
"output_past": true,
|
| 28 |
+
"pad_token_id": 1,
|
| 29 |
+
"position_embedding_type": "absolute",
|
| 30 |
+
"torch_dtype": "float32",
|
| 31 |
+
"transformers_version": "4.51.3",
|
| 32 |
+
"type_vocab_size": 1,
|
| 33 |
+
"use_cache": true,
|
| 34 |
+
"vocab_size": 250002
|
| 35 |
+
}
|
run-2/checkpoint-500/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ba112cc745e854908f362f7faefca7bd4d18007615f73561aa079c7f25e633a
|
| 3 |
+
size 2239618672
|
run-2/checkpoint-500/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c9806e18df9460573a9240fd2f85d6ac4b6eb35cf9b67a11fee6f3c79f63706b
|
| 3 |
+
size 4352331
|
run-2/checkpoint-500/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1942a5da586c2a0a19239d8e57166a2c72fbc86a5216d92c1a8d0b7c4ff84f05
|
| 3 |
+
size 14645
|
run-2/checkpoint-500/scaler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a5fcc66c7adcba5cfe142542fca6035ccc365bbdf901ef681e6536d85dfc96f
|
| 3 |
+
size 1383
|
run-2/checkpoint-500/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb025a871a3d3dcf6babe0c13b54c8746095bcb99f3cc26ae245f44033cf777e
|
| 3 |
+
size 1465
|
run-2/checkpoint-500/sentencepiece.bpe.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
|
| 3 |
+
size 5069051
|
run-2/checkpoint-500/special_tokens_map.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "<s>",
|
| 3 |
+
"cls_token": "<s>",
|
| 4 |
+
"eos_token": "</s>",
|
| 5 |
+
"mask_token": {
|
| 6 |
+
"content": "<mask>",
|
| 7 |
+
"lstrip": true,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false
|
| 11 |
+
},
|
| 12 |
+
"pad_token": "<pad>",
|
| 13 |
+
"sep_token": "</s>",
|
| 14 |
+
"unk_token": "<unk>"
|
| 15 |
+
}
|
run-2/checkpoint-500/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20
|
| 3 |
+
size 17082734
|
run-2/checkpoint-500/tokenizer_config.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<s>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<pad>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "</s>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "<unk>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"250001": {
|
| 36 |
+
"content": "<mask>",
|
| 37 |
+
"lstrip": true,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"bos_token": "<s>",
|
| 45 |
+
"clean_up_tokenization_spaces": false,
|
| 46 |
+
"cls_token": "<s>",
|
| 47 |
+
"eos_token": "</s>",
|
| 48 |
+
"extra_special_tokens": {},
|
| 49 |
+
"mask_token": "<mask>",
|
| 50 |
+
"model_max_length": 512,
|
| 51 |
+
"pad_token": "<pad>",
|
| 52 |
+
"sep_token": "</s>",
|
| 53 |
+
"tokenizer_class": "XLMRobertaTokenizer",
|
| 54 |
+
"unk_token": "<unk>"
|
| 55 |
+
}
|
run-2/checkpoint-500/trainer_state.json
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 500,
|
| 3 |
+
"best_metric": 0.8490071509894094,
|
| 4 |
+
"best_model_checkpoint": "./xlm-all/run-2/checkpoint-500",
|
| 5 |
+
"epoch": 0.6596306068601583,
|
| 6 |
+
"eval_steps": 50,
|
| 7 |
+
"global_step": 500,
|
| 8 |
+
"is_hyper_param_search": true,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.06596306068601583,
|
| 14 |
+
"grad_norm": 10.362518310546875,
|
| 15 |
+
"learning_rate": 1.4492402520151846e-05,
|
| 16 |
+
"loss": 0.694,
|
| 17 |
+
"step": 50
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 0.06596306068601583,
|
| 21 |
+
"eval_f1": 0.4743595515735446,
|
| 22 |
+
"eval_loss": 0.6772196888923645,
|
| 23 |
+
"eval_runtime": 37.1764,
|
| 24 |
+
"eval_samples_per_second": 50.247,
|
| 25 |
+
"eval_steps_per_second": 50.247,
|
| 26 |
+
"step": 50
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"epoch": 0.13192612137203166,
|
| 30 |
+
"grad_norm": 8.178845405578613,
|
| 31 |
+
"learning_rate": 1.4389725720009073e-05,
|
| 32 |
+
"loss": 0.6465,
|
| 33 |
+
"step": 100
|
| 34 |
+
},
|
| 35 |
+
{
|
| 36 |
+
"epoch": 0.13192612137203166,
|
| 37 |
+
"eval_f1": 0.7500369139904024,
|
| 38 |
+
"eval_loss": 0.5915173292160034,
|
| 39 |
+
"eval_runtime": 37.3774,
|
| 40 |
+
"eval_samples_per_second": 49.977,
|
| 41 |
+
"eval_steps_per_second": 49.977,
|
| 42 |
+
"step": 100
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"epoch": 0.19788918205804748,
|
| 46 |
+
"grad_norm": 15.20943546295166,
|
| 47 |
+
"learning_rate": 1.428277071986035e-05,
|
| 48 |
+
"loss": 0.6062,
|
| 49 |
+
"step": 150
|
| 50 |
+
},
|
| 51 |
+
{
|
| 52 |
+
"epoch": 0.19788918205804748,
|
| 53 |
+
"eval_f1": 0.8016847838630017,
|
| 54 |
+
"eval_loss": 0.5473856329917908,
|
| 55 |
+
"eval_runtime": 37.4663,
|
| 56 |
+
"eval_samples_per_second": 49.858,
|
| 57 |
+
"eval_steps_per_second": 49.858,
|
| 58 |
+
"step": 150
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.2638522427440633,
|
| 62 |
+
"grad_norm": 125.6336898803711,
|
| 63 |
+
"learning_rate": 1.4175815719711629e-05,
|
| 64 |
+
"loss": 0.5423,
|
| 65 |
+
"step": 200
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.2638522427440633,
|
| 69 |
+
"eval_f1": 0.8237314037413463,
|
| 70 |
+
"eval_loss": 0.4995958209037781,
|
| 71 |
+
"eval_runtime": 37.5101,
|
| 72 |
+
"eval_samples_per_second": 49.8,
|
| 73 |
+
"eval_steps_per_second": 49.8,
|
| 74 |
+
"step": 200
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"epoch": 0.32981530343007914,
|
| 78 |
+
"grad_norm": 14.32999038696289,
|
| 79 |
+
"learning_rate": 1.4073138919568856e-05,
|
| 80 |
+
"loss": 0.5334,
|
| 81 |
+
"step": 250
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"epoch": 0.32981530343007914,
|
| 85 |
+
"eval_f1": 0.8198898071625345,
|
| 86 |
+
"eval_loss": 0.49298107624053955,
|
| 87 |
+
"eval_runtime": 37.1619,
|
| 88 |
+
"eval_samples_per_second": 50.266,
|
| 89 |
+
"eval_steps_per_second": 50.266,
|
| 90 |
+
"step": 250
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"epoch": 0.39577836411609496,
|
| 94 |
+
"grad_norm": 27.561870574951172,
|
| 95 |
+
"learning_rate": 1.3966183919420133e-05,
|
| 96 |
+
"loss": 0.5223,
|
| 97 |
+
"step": 300
|
| 98 |
+
},
|
| 99 |
+
{
|
| 100 |
+
"epoch": 0.39577836411609496,
|
| 101 |
+
"eval_f1": 0.8261666781553733,
|
| 102 |
+
"eval_loss": 0.4933040738105774,
|
| 103 |
+
"eval_runtime": 37.2817,
|
| 104 |
+
"eval_samples_per_second": 50.105,
|
| 105 |
+
"eval_steps_per_second": 50.105,
|
| 106 |
+
"step": 300
|
| 107 |
+
},
|
| 108 |
+
{
|
| 109 |
+
"epoch": 0.46174142480211083,
|
| 110 |
+
"grad_norm": 17.7518253326416,
|
| 111 |
+
"learning_rate": 1.3859228919271411e-05,
|
| 112 |
+
"loss": 0.5568,
|
| 113 |
+
"step": 350
|
| 114 |
+
},
|
| 115 |
+
{
|
| 116 |
+
"epoch": 0.46174142480211083,
|
| 117 |
+
"eval_f1": 0.7932735150594417,
|
| 118 |
+
"eval_loss": 0.5218016505241394,
|
| 119 |
+
"eval_runtime": 37.2531,
|
| 120 |
+
"eval_samples_per_second": 50.143,
|
| 121 |
+
"eval_steps_per_second": 50.143,
|
| 122 |
+
"step": 350
|
| 123 |
+
},
|
| 124 |
+
{
|
| 125 |
+
"epoch": 0.5277044854881267,
|
| 126 |
+
"grad_norm": 8.947376251220703,
|
| 127 |
+
"learning_rate": 1.375227391912269e-05,
|
| 128 |
+
"loss": 0.4753,
|
| 129 |
+
"step": 400
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"epoch": 0.5277044854881267,
|
| 133 |
+
"eval_f1": 0.8261835490810062,
|
| 134 |
+
"eval_loss": 0.46639615297317505,
|
| 135 |
+
"eval_runtime": 36.9348,
|
| 136 |
+
"eval_samples_per_second": 50.576,
|
| 137 |
+
"eval_steps_per_second": 50.576,
|
| 138 |
+
"step": 400
|
| 139 |
+
},
|
| 140 |
+
{
|
| 141 |
+
"epoch": 0.5936675461741425,
|
| 142 |
+
"grad_norm": 9.670042991638184,
|
| 143 |
+
"learning_rate": 1.3645318918973967e-05,
|
| 144 |
+
"loss": 0.5012,
|
| 145 |
+
"step": 450
|
| 146 |
+
},
|
| 147 |
+
{
|
| 148 |
+
"epoch": 0.5936675461741425,
|
| 149 |
+
"eval_f1": 0.8392116896888902,
|
| 150 |
+
"eval_loss": 0.45583826303482056,
|
| 151 |
+
"eval_runtime": 37.4674,
|
| 152 |
+
"eval_samples_per_second": 49.857,
|
| 153 |
+
"eval_steps_per_second": 49.857,
|
| 154 |
+
"step": 450
|
| 155 |
+
},
|
| 156 |
+
{
|
| 157 |
+
"epoch": 0.6596306068601583,
|
| 158 |
+
"grad_norm": 33.995361328125,
|
| 159 |
+
"learning_rate": 1.3538363918825243e-05,
|
| 160 |
+
"loss": 0.4731,
|
| 161 |
+
"step": 500
|
| 162 |
+
},
|
| 163 |
+
{
|
| 164 |
+
"epoch": 0.6596306068601583,
|
| 165 |
+
"eval_f1": 0.8490071509894094,
|
| 166 |
+
"eval_loss": 0.4522168040275574,
|
| 167 |
+
"eval_runtime": 37.674,
|
| 168 |
+
"eval_samples_per_second": 49.583,
|
| 169 |
+
"eval_steps_per_second": 49.583,
|
| 170 |
+
"step": 500
|
| 171 |
+
}
|
| 172 |
+
],
|
| 173 |
+
"logging_steps": 50,
|
| 174 |
+
"max_steps": 6822,
|
| 175 |
+
"num_input_tokens_seen": 0,
|
| 176 |
+
"num_train_epochs": 9,
|
| 177 |
+
"save_steps": 500,
|
| 178 |
+
"stateful_callbacks": {
|
| 179 |
+
"EarlyStoppingCallback": {
|
| 180 |
+
"args": {
|
| 181 |
+
"early_stopping_patience": 3,
|
| 182 |
+
"early_stopping_threshold": 0.0
|
| 183 |
+
},
|
| 184 |
+
"attributes": {
|
| 185 |
+
"early_stopping_patience_counter": 0
|
| 186 |
+
}
|
| 187 |
+
},
|
| 188 |
+
"TrainerControl": {
|
| 189 |
+
"args": {
|
| 190 |
+
"should_epoch_stop": false,
|
| 191 |
+
"should_evaluate": false,
|
| 192 |
+
"should_log": false,
|
| 193 |
+
"should_save": true,
|
| 194 |
+
"should_training_stop": false
|
| 195 |
+
},
|
| 196 |
+
"attributes": {}
|
| 197 |
+
}
|
| 198 |
+
},
|
| 199 |
+
"total_flos": 2795794089984000.0,
|
| 200 |
+
"train_batch_size": 4,
|
| 201 |
+
"trial_name": null,
|
| 202 |
+
"trial_params": {
|
| 203 |
+
"learning_rate": 1.4592940220291645e-05,
|
| 204 |
+
"num_train_epochs": 9,
|
| 205 |
+
"per_device_train_batch_size": 4,
|
| 206 |
+
"weight_decay": 0.07364830079588867
|
| 207 |
+
}
|
| 208 |
+
}
|
run-2/checkpoint-500/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6c223988c5a783e1bdd88ad218a1ff0ba50cc8d55b49d745846b9ced8975099f
|
| 3 |
+
size 5713
|
runs/May11_07-22-34_falcon-02/events.out.tfevents.1746944563.falcon-02.556678.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6c14b07978a57838667a2b4ed9ef1cedb8f34aeb2dd775ef3c22a638d39b4dc7
|
| 3 |
+
size 7463
|
runs/May11_07-22-34_falcon-02/events.out.tfevents.1746944615.falcon-02.556678.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:08301bb5ec677a547173970f4b0459ad7b00df17330e7e2177326fbe8c93335e
|
| 3 |
+
size 9575
|
runs/May11_07-22-34_falcon-02/events.out.tfevents.1746944739.falcon-02.556678.2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7036470b244ba7f5d9eccc364e3e1f5c699d7ddf27a00ef6f071dba867ba3966
|
| 3 |
+
size 7462
|
runs/May11_07-22-34_falcon-02/events.out.tfevents.1746944791.falcon-02.556678.3
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bc7bb8817033713426ab136781d6aaeb6421ebdf5b251ec00e53779b8e769dc7
|
| 3 |
+
size 8519
|
runs/May11_07-22-34_falcon-02/events.out.tfevents.1746944868.falcon-02.556678.4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f5a055dbace1a809bafe43279671e8618fb3548386732a7cc5fd92fd0bde3676
|
| 3 |
+
size 9047
|
runs/May11_07-22-34_falcon-02/events.out.tfevents.1746944977.falcon-02.556678.5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f46e5dc315e376f75ef509bfee2340a1696f32eaafe9dd5a14b19123cde1353b
|
| 3 |
+
size 7990
|
runs/May11_07-22-34_falcon-02/events.out.tfevents.1746945041.falcon-02.556678.6
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:409c2f24effb3ae5182391f3a5e4a51e90efe804e52b8636e83a7e5f37ac1381
|
| 3 |
+
size 5534
|
runs/May11_07-22-34_falcon-02/events.out.tfevents.1746945058.falcon-02.556678.7
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:94b5366f3b763ea753b0ceab57b76df6d9b832d0e61f1d2883f9f2c07a5346a1
|
| 3 |
+
size 5536
|
runs/May11_07-22-34_falcon-02/events.out.tfevents.1746945074.falcon-02.556678.8
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6571247dd9621d4a1eee48042a633a324fc213d210a3d943fd81b8ffe63da9e4
|
| 3 |
+
size 5534
|
runs/May11_07-22-34_falcon-02/events.out.tfevents.1746945088.falcon-02.556678.9
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:670b986206c0eb565d4df77f7e557c5e1d45dd0ab785a26e29ae437e74bb7565
|
| 3 |
+
size 5536
|
runs/May11_07-22-34_falcon-02/events.out.tfevents.1746945104.falcon-02.556678.10
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:98d4c2653f6162f2d4fce58bd34893462bb60d530ce50eedc3ef7719e6fbea2f
|
| 3 |
+
size 5534
|