Upload folder using huggingface_hub
Browse files- config.json +45 -0
- hyperparameters.csv +11 -0
- optimizer.pt +3 -0
- pytorch_model.bin +3 -0
- scheduler.pt +3 -0
- trainer_state.json +67 -0
- training_args.bin +3 -0
- training_resume.json +29 -0
config.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "dccuchile/distilbert-base-spanish-uncased",
|
| 3 |
+
"activation": "gelu",
|
| 4 |
+
"architectures": [
|
| 5 |
+
"DistilBertForSequenceClassification"
|
| 6 |
+
],
|
| 7 |
+
"attention_dropout": 0.1,
|
| 8 |
+
"dim": 768,
|
| 9 |
+
"dropout": 0.1,
|
| 10 |
+
"hidden_dim": 3072,
|
| 11 |
+
"id2label": {
|
| 12 |
+
"0": "LABEL_0",
|
| 13 |
+
"1": "LABEL_1",
|
| 14 |
+
"2": "LABEL_2",
|
| 15 |
+
"3": "LABEL_3",
|
| 16 |
+
"4": "LABEL_4",
|
| 17 |
+
"5": "LABEL_5",
|
| 18 |
+
"6": "LABEL_6",
|
| 19 |
+
"7": "LABEL_7"
|
| 20 |
+
},
|
| 21 |
+
"initializer_range": 0.02,
|
| 22 |
+
"label2id": {
|
| 23 |
+
"LABEL_0": 0,
|
| 24 |
+
"LABEL_1": 1,
|
| 25 |
+
"LABEL_2": 2,
|
| 26 |
+
"LABEL_3": 3,
|
| 27 |
+
"LABEL_4": 4,
|
| 28 |
+
"LABEL_5": 5,
|
| 29 |
+
"LABEL_6": 6,
|
| 30 |
+
"LABEL_7": 7
|
| 31 |
+
},
|
| 32 |
+
"max_position_embeddings": 512,
|
| 33 |
+
"model_type": "distilbert",
|
| 34 |
+
"n_heads": 12,
|
| 35 |
+
"n_layers": 6,
|
| 36 |
+
"pad_token_id": 0,
|
| 37 |
+
"problem_type": "single_label_classification",
|
| 38 |
+
"qa_dropout": 0.1,
|
| 39 |
+
"seq_classif_dropout": 0.2,
|
| 40 |
+
"sinusoidal_pos_embds": true,
|
| 41 |
+
"tie_weights_": true,
|
| 42 |
+
"torch_dtype": "float32",
|
| 43 |
+
"transformers_version": "4.23.1",
|
| 44 |
+
"vocab_size": 31002
|
| 45 |
+
}
|
hyperparameters.csv
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
objective,best,learning_rate,num_train_epochs,per_device_train_batch_size,warmup_steps,weight_decay,time_this_iter_s
|
| 2 |
+
0.597362118441995,False,3.271499237806267e-05,4,16,1000,0.2565075619288112,144.12740564346313
|
| 3 |
+
0.5198963371642762,False,1.0741466184541341e-05,5,16,0,0.07599321456170617,144.151517868042
|
| 4 |
+
0.5946714730314795,False,3.37620447413037e-05,3,8,500,0.18998600872372765,155.90862655639648
|
| 5 |
+
0.4574393106598997,False,1.4671776366845966e-05,1,16,500,0.21137985700516712,145.89185881614685
|
| 6 |
+
0.4709334976994894,False,1.046796947096866e-05,2,16,0,0.026531140748479454,145.94519090652466
|
| 7 |
+
0.47061894490157463,False,4.9540747889715425e-05,1,16,1000,0.06647603436044074,145.85856461524963
|
| 8 |
+
0.4375637251729597,False,2.580653331424728e-05,1,16,1000,0.07837433494112092,145.91601490974426
|
| 9 |
+
0.46680378064714356,False,4.168611764617624e-05,1,16,1000,0.2422358346736178,145.84486627578735
|
| 10 |
+
0.5901139866275458,False,2.7145037133288976e-05,3,8,1000,0.17082315068595946,155.86436223983765
|
| 11 |
+
0.6201819619499414,True,4.985384913085322e-05,3,8,500,0.20061124234729208,155.91449403762817
|
optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5d8a6c2ddf65034f3989bda9b15c32daa8aa2071501d20802433fd66d47b8b70
|
| 3 |
+
size 535566213
|
pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:117117900b2bb5f28c4fc851057856cdaa1cb842e1e0da8648c99381fad01238
|
| 3 |
+
size 269348525
|
scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b318a21d72fee272d9435783a30338821657728fd9fe2d7bee71bf08b42c3a6
|
| 3 |
+
size 627
|
trainer_state.json
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": 0.8988341093063354,
|
| 3 |
+
"best_model_checkpoint": "./results/run-d045ac8a/checkpoint-1764",
|
| 4 |
+
"epoch": 3.0,
|
| 5 |
+
"global_step": 2646,
|
| 6 |
+
"is_hyper_param_search": true,
|
| 7 |
+
"is_local_process_zero": true,
|
| 8 |
+
"is_world_process_zero": true,
|
| 9 |
+
"log_history": [
|
| 10 |
+
{
|
| 11 |
+
"epoch": 0.0,
|
| 12 |
+
"learning_rate": 9.970769826170644e-08,
|
| 13 |
+
"loss": 2.104,
|
| 14 |
+
"step": 1
|
| 15 |
+
},
|
| 16 |
+
{
|
| 17 |
+
"epoch": 1.0,
|
| 18 |
+
"eval_f1": 0.5132287012657224,
|
| 19 |
+
"eval_loss": 0.9356955289840698,
|
| 20 |
+
"eval_runtime": 3.9376,
|
| 21 |
+
"eval_samples_per_second": 597.314,
|
| 22 |
+
"eval_steps_per_second": 74.664,
|
| 23 |
+
"step": 882
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 1.13,
|
| 27 |
+
"learning_rate": 3.823832044239721e-05,
|
| 28 |
+
"loss": 1.1539,
|
| 29 |
+
"step": 1000
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"epoch": 2.0,
|
| 33 |
+
"eval_f1": 0.6100360403521365,
|
| 34 |
+
"eval_loss": 0.8988341093063354,
|
| 35 |
+
"eval_runtime": 3.9403,
|
| 36 |
+
"eval_samples_per_second": 596.902,
|
| 37 |
+
"eval_steps_per_second": 74.613,
|
| 38 |
+
"step": 1764
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"epoch": 2.27,
|
| 42 |
+
"learning_rate": 1.5007263065485174e-05,
|
| 43 |
+
"loss": 0.6207,
|
| 44 |
+
"step": 2000
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 3.0,
|
| 48 |
+
"eval_f1": 0.6201819619499414,
|
| 49 |
+
"eval_loss": 1.0120480060577393,
|
| 50 |
+
"eval_runtime": 3.9533,
|
| 51 |
+
"eval_samples_per_second": 594.945,
|
| 52 |
+
"eval_steps_per_second": 74.368,
|
| 53 |
+
"step": 2646
|
| 54 |
+
}
|
| 55 |
+
],
|
| 56 |
+
"max_steps": 2646,
|
| 57 |
+
"num_train_epochs": 3,
|
| 58 |
+
"total_flos": 1840940131235520.0,
|
| 59 |
+
"trial_name": null,
|
| 60 |
+
"trial_params": {
|
| 61 |
+
"learning_rate": 4.985384913085322e-05,
|
| 62 |
+
"num_train_epochs": 3,
|
| 63 |
+
"per_device_train_batch_size": 8,
|
| 64 |
+
"warmup_steps": 500,
|
| 65 |
+
"weight_decay": 0.20061124234729208
|
| 66 |
+
}
|
| 67 |
+
}
|
training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fe8e42a83d520066827abee6a387e6325c3b69fb81ae5e2db9f9fdb4257dcb74
|
| 3 |
+
size 3387
|
training_resume.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"batch_train_size": 16,
|
| 3 |
+
"batch_val_size": 32,
|
| 4 |
+
"corpus": "text",
|
| 5 |
+
"dataset": "safercity",
|
| 6 |
+
"epochs": [
|
| 7 |
+
1,
|
| 8 |
+
2,
|
| 9 |
+
3,
|
| 10 |
+
4,
|
| 11 |
+
5
|
| 12 |
+
],
|
| 13 |
+
"folder": "distilbeto-base",
|
| 14 |
+
"iterations": 10,
|
| 15 |
+
"labels": 8,
|
| 16 |
+
"model": "transformers",
|
| 17 |
+
"original_pretrained_model": "dccuchile/distilbert-base-spanish-uncased",
|
| 18 |
+
"pretrained_model": "../assets/safercity/text/models/distilbeto-base",
|
| 19 |
+
"resources_per_trial": {
|
| 20 |
+
"gpu": 1
|
| 21 |
+
},
|
| 22 |
+
"run_id": "d045ac8a",
|
| 23 |
+
"task": "",
|
| 24 |
+
"task_type": "classification",
|
| 25 |
+
"tokenizer_field": "tweet",
|
| 26 |
+
"tokenizer_model": "dccuchile/distilbert-base-spanish-uncased",
|
| 27 |
+
"warmup_steps": 500,
|
| 28 |
+
"weight_decay": 0.01
|
| 29 |
+
}
|