jaydubya commited on
Commit
5be45df
·
verified ·
1 Parent(s): 8345ea1

Upload folder using huggingface_hub

Browse files
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "./results/Entity_Action",
3
+ "architectures": [
4
+ "DebertaV2ForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 3072,
12
+ "layer_norm_eps": 1e-07,
13
+ "max_position_embeddings": 512,
14
+ "max_relative_positions": -1,
15
+ "model_type": "deberta-v2",
16
+ "norm_rel_ebd": "layer_norm",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 6,
19
+ "pad_token_id": 0,
20
+ "pooler_dropout": 0,
21
+ "pooler_hidden_act": "gelu",
22
+ "pooler_hidden_size": 768,
23
+ "pos_att_type": [
24
+ "p2c",
25
+ "c2p"
26
+ ],
27
+ "position_biased_input": false,
28
+ "position_buckets": 256,
29
+ "relative_attention": true,
30
+ "share_att_key": true,
31
+ "torch_dtype": "float32",
32
+ "transformers_version": "4.42.4",
33
+ "type_vocab_size": 0,
34
+ "vocab_size": 128100
35
+ }
confusion_matrix_artefacts.xlsx ADDED
Binary file (6.53 kB). View file
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:896acd461faa67488cd14bc0fd13c2328e196de5796aadc582f0569e30c0dd3e
3
+ size 567598552
optuna_trial_output.txt ADDED
File without changes
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128000": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": false,
48
+ "eos_token": "[SEP]",
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 1000000000000000019884624838656,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "sp_model_kwargs": {},
54
+ "split_by_punct": false,
55
+ "tokenizer_class": "DebertaV2Tokenizer",
56
+ "unk_token": "[UNK]",
57
+ "vocab_type": "spm"
58
+ }
training_log.txt ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Input data filename: project-30-at-2024-08-27-13-17-2b8a675a_ind_theme_split_27_aug_andtitledescr.json
2
+ Training date and time: 2024-08-27 14:57:34.590724
3
+
4
+ Training parameters:
5
+ output_dir: ./results/Entity_Action
6
+ overwrite_output_dir: False
7
+ do_train: False
8
+ do_eval: True
9
+ do_predict: False
10
+ eval_strategy: epoch
11
+ prediction_loss_only: False
12
+ per_device_train_batch_size: 8
13
+ per_device_eval_batch_size: 8
14
+ per_gpu_train_batch_size: None
15
+ per_gpu_eval_batch_size: None
16
+ gradient_accumulation_steps: 1
17
+ eval_accumulation_steps: None
18
+ eval_delay: 0
19
+ learning_rate: 4.270726644478662e-05
20
+ weight_decay: 0.21946446535298075
21
+ adam_beta1: 0.9
22
+ adam_beta2: 0.999
23
+ adam_epsilon: 1e-08
24
+ max_grad_norm: 1.0
25
+ num_train_epochs: 5
26
+ max_steps: -1
27
+ lr_scheduler_type: linear
28
+ lr_scheduler_kwargs: {}
29
+ warmup_ratio: 0.0
30
+ warmup_steps: 0
31
+ log_level: passive
32
+ log_level_replica: warning
33
+ log_on_each_node: True
34
+ logging_dir: ./logs
35
+ logging_strategy: epoch
36
+ logging_first_step: False
37
+ logging_steps: 10
38
+ logging_nan_inf_filter: True
39
+ save_strategy: epoch
40
+ save_steps: 500
41
+ save_total_limit: None
42
+ save_safetensors: True
43
+ save_on_each_node: False
44
+ save_only_model: False
45
+ restore_callback_states_from_checkpoint: False
46
+ no_cuda: False
47
+ use_cpu: False
48
+ use_mps_device: False
49
+ seed: 42
50
+ data_seed: None
51
+ jit_mode_eval: False
52
+ use_ipex: False
53
+ bf16: False
54
+ fp16: False
55
+ fp16_opt_level: O1
56
+ half_precision_backend: auto
57
+ bf16_full_eval: False
58
+ fp16_full_eval: False
59
+ tf32: None
60
+ local_rank: 0
61
+ ddp_backend: None
62
+ tpu_num_cores: None
63
+ tpu_metrics_debug: False
64
+ debug: []
65
+ dataloader_drop_last: False
66
+ eval_steps: None
67
+ dataloader_num_workers: 0
68
+ dataloader_prefetch_factor: None
69
+ past_index: -1
70
+ run_name: ./results/Entity_Action
71
+ disable_tqdm: False
72
+ remove_unused_columns: False
73
+ label_names: None
74
+ load_best_model_at_end: True
75
+ metric_for_best_model: eval_accuracy
76
+ greater_is_better: True
77
+ ignore_data_skip: False
78
+ fsdp: []
79
+ fsdp_min_num_params: 0
80
+ fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
81
+ fsdp_transformer_layer_cls_to_wrap: None
82
+ accelerator_config: AcceleratorConfig(split_batches=False, dispatch_batches=None, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False)
83
+ deepspeed: None
84
+ label_smoothing_factor: 0.0
85
+ optim: adamw_torch
86
+ optim_args: None
87
+ adafactor: False
88
+ group_by_length: False
89
+ length_column_name: length
90
+ report_to: ['tensorboard']
91
+ ddp_find_unused_parameters: None
92
+ ddp_bucket_cap_mb: None
93
+ ddp_broadcast_buffers: None
94
+ dataloader_pin_memory: True
95
+ dataloader_persistent_workers: False
96
+ skip_memory_metrics: True
97
+ use_legacy_prediction_loop: False
98
+ push_to_hub: False
99
+ resume_from_checkpoint: None
100
+ hub_model_id: None
101
+ hub_strategy: every_save
102
+ hub_token: None
103
+ hub_private_repo: False
104
+ hub_always_push: False
105
+ gradient_checkpointing: False
106
+ gradient_checkpointing_kwargs: None
107
+ include_inputs_for_metrics: False
108
+ eval_do_concat_batches: True
109
+ fp16_backend: auto
110
+ evaluation_strategy: None
111
+ push_to_hub_model_id: None
112
+ push_to_hub_organization: None
113
+ push_to_hub_token: None
114
+ mp_parameters:
115
+ auto_find_batch_size: False
116
+ full_determinism: False
117
+ torchdynamo: None
118
+ ray_scope: last
119
+ ddp_timeout: 1800
120
+ torch_compile: False
121
+ torch_compile_backend: None
122
+ torch_compile_mode: None
123
+ dispatch_batches: None
124
+ split_batches: None
125
+ include_tokens_per_second: False
126
+ include_num_input_tokens_seen: False
127
+ neftune_noise_alpha: None
128
+ optim_target_modules: None
129
+ batch_eval_metrics: False
130
+ eval_on_start: False
131
+ distributed_state: Distributed environment: NO
132
+ Num processes: 1
133
+ Process index: 0
134
+ Local process index: 0
135
+ Device: cuda
136
+
137
+ _n_gpu: 1
138
+ __cached__setup_devices: cuda:0
139
+ deepspeed_plugin: None
140
+
141
+ Best Hyperparameters:
142
+ learning_rate: 4.270726644478662e-05
143
+ num_train_epochs: 5
144
+ weight_decay: 0.21946446535298075
145
+
146
+ Training metrics:
147
+ train_runtime: 672.3301
148
+ train_samples_per_second: 18.414
149
+ train_steps_per_second: 2.305
150
+ total_flos: 1640004811530240.0
151
+ train_loss: 0.12886674880981444
152
+ epoch: 5.0
153
+
154
+ All Epoch Metrics:
155
+ Epoch Training Loss Validation Loss Accuracy Precision Recall F1 Bce Loss Runtime Samples Per Second Steps Per Second
156
+ 1.0 0.3189 0.1607036143541336 0.9532258064516129 0.9599056603773585 0.9713603818615751 0.9655990510083036 0.16070359953727179 672.3301 18.414 2.305
157
+ 2.0 0.1528 0.11495514214038849 0.9725806451612903 0.9808612440191388 0.9785202863961814 0.9796893667861409 0.1149551347974206 672.3301 18.414 2.305
158
+ 3.0 0.0751 0.10520578175783157 0.9790322580645161 0.981042654028436 0.9880668257756563 0.9845422116527943 0.10520577938253993 672.3301 18.414 2.305
159
+ 4.0 0.0623 0.08832962810993195 0.9854838709677419 0.9904306220095693 0.9880668257756563 0.9892473118279569 0.08832962441818693 672.3301 18.414 2.305
160
+ 5.0 0.0353 0.08785796910524368 0.9838709677419355 0.9904076738609112 0.9856801909307876 0.9880382775119617 0.08785795159062294 672.3301 18.414 2.305