alex2awesome commited on
Commit
8c394a2
·
1 Parent(s): 45aed31
big-bird__loss-window-2__all_results.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_e": 0.0,
4
+ "eval_f1": 0.0019538069857852587,
5
+ "eval_loss": 7.420947551727295,
6
+ "eval_runtime": 82.0111,
7
+ "eval_samples": 3576,
8
+ "eval_samples_per_second": 43.604,
9
+ "eval_steps_per_second": 43.604,
10
+ "test_e": 0.5774608501118568,
11
+ "test_f1": 0.532088049820999,
12
+ "test_loss": 3.9567770957946777,
13
+ "test_runtime": 81.5031,
14
+ "test_samples_per_second": 43.876,
15
+ "test_steps_per_second": 43.876,
16
+ "train_loss": 6.972438398601898,
17
+ "train_runtime": 3412.8799,
18
+ "train_samples": 10138,
19
+ "train_samples_per_second": 8.912,
20
+ "train_steps_per_second": 8.912
21
+ }
big-bird__loss-window-2__config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/bigbird-roberta-base",
3
+ "architectures": [
4
+ "QAModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "attention_type": "original_full",
8
+ "block_size": 64,
9
+ "bos_token_id": 1,
10
+ "classifier_dropout": null,
11
+ "eos_token_id": 2,
12
+ "freeze_layers": null,
13
+ "gradient_checkpointing": false,
14
+ "hidden_act": "gelu_new",
15
+ "hidden_dropout_prob": 0.1,
16
+ "hidden_size": 768,
17
+ "include_nones_as_positives": false,
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "layer_norm_eps": 1e-12,
21
+ "max_position_embeddings": 4096,
22
+ "model_type": "big_bird",
23
+ "num_attention_heads": 12,
24
+ "num_hidden_layers": 12,
25
+ "num_random_blocks": 3,
26
+ "pad_token_id": 0,
27
+ "position_embedding_type": "absolute",
28
+ "qa_head": {},
29
+ "rescale_embeddings": false,
30
+ "sep_token_id": 66,
31
+ "torch_dtype": "float32",
32
+ "transformers_version": "4.12.2",
33
+ "type_vocab_size": 2,
34
+ "use_bias": true,
35
+ "use_cache": true,
36
+ "vocab_size": 50358
37
+ }
big-bird__loss-window-2__prediction_output.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
big-bird__qa-model__all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_e": 0.0,
3
+ "eval_f1": 0.0026853597694833906,
4
+ "eval_loss": 7.507201671600342,
5
+ "eval_runtime": 82.025,
6
+ "eval_samples_per_second": 43.596,
7
+ "eval_steps_per_second": 43.596
8
+ }
big-bird__qa-model__roberta-large__all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_e": 0.0,
3
+ "eval_f1": 0.0025534260097588124,
4
+ "eval_loss": 7.366059303283691,
5
+ "eval_runtime": 216.7168,
6
+ "eval_samples_per_second": 16.501,
7
+ "eval_steps_per_second": 16.501
8
+ }
big-bird__salience-model__all_results.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_e": 0.0,
4
+ "eval_f1": 0.0019965838147566663,
5
+ "eval_loss": 7.391232013702393,
6
+ "eval_runtime": 157.8075,
7
+ "eval_samples": 3576,
8
+ "eval_samples_per_second": 22.661,
9
+ "eval_steps_per_second": 22.661,
10
+ "test_e": 0.5914429530201343,
11
+ "test_f1": 0.5363322750706369,
12
+ "test_loss": 3.4301834106445312,
13
+ "test_runtime": 157.9622,
14
+ "test_samples_per_second": 22.638,
15
+ "test_steps_per_second": 22.638,
16
+ "train_loss": 1.4217724323805765,
17
+ "train_runtime": 5802.1755,
18
+ "train_samples": 10138,
19
+ "train_samples_per_second": 5.242,
20
+ "train_steps_per_second": 5.242
21
+ }
big-bird__salience-model__augmented-data__all_results.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_e": 0.0,
4
+ "eval_f1": 0.0018995468801266367,
5
+ "eval_loss": 7.458363056182861,
6
+ "eval_runtime": 156.6623,
7
+ "eval_samples": 3576,
8
+ "eval_samples_per_second": 22.826,
9
+ "eval_steps_per_second": 22.826,
10
+ "test_e": 0.5584451901565995,
11
+ "test_f1": 0.5157599466272186,
12
+ "test_loss": 3.5660927295684814,
13
+ "test_runtime": 156.6477,
14
+ "test_samples_per_second": 22.828,
15
+ "test_steps_per_second": 22.828,
16
+ "train_loss": 1.9798311326282656,
17
+ "train_runtime": 7385.0011,
18
+ "train_samples": 13605,
19
+ "train_samples_per_second": 5.527,
20
+ "train_steps_per_second": 5.527
21
+ }
big-bird__salience-model__augmented-data__config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/bigbird-roberta-base",
3
+ "architectures": [
4
+ "QAModelWithSalience"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "attention_type": "original_full",
8
+ "block_size": 64,
9
+ "bos_token_id": 1,
10
+ "classifier_dropout": null,
11
+ "eos_token_id": 2,
12
+ "freeze_layers": null,
13
+ "gradient_checkpointing": false,
14
+ "hidden_act": "gelu_new",
15
+ "hidden_dropout_prob": 0.1,
16
+ "hidden_size": 768,
17
+ "include_nones_as_positives": false,
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "layer_norm_eps": 1e-12,
21
+ "max_position_embeddings": 4096,
22
+ "model_type": "big_bird",
23
+ "num_attention_heads": 12,
24
+ "num_hidden_layers": 12,
25
+ "num_random_blocks": 3,
26
+ "pad_token_id": 0,
27
+ "position_embedding_type": "absolute",
28
+ "qa_head": {},
29
+ "rescale_embeddings": false,
30
+ "sep_token_id": 66,
31
+ "torch_dtype": "float32",
32
+ "transformers_version": "4.12.2",
33
+ "type_vocab_size": 2,
34
+ "use_bias": true,
35
+ "use_cache": true,
36
+ "vocab_size": 50358
37
+ }
big-bird__salience-model__augmented-data__prediction_output.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
big-bird__salience-model__config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/bigbird-roberta-base",
3
+ "architectures": [
4
+ "QAModelWithSalience"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "attention_type": "original_full",
8
+ "block_size": 64,
9
+ "bos_token_id": 1,
10
+ "classifier_dropout": null,
11
+ "eos_token_id": 2,
12
+ "freeze_layers": null,
13
+ "gradient_checkpointing": false,
14
+ "hidden_act": "gelu_new",
15
+ "hidden_dropout_prob": 0.1,
16
+ "hidden_size": 768,
17
+ "include_nones_as_positives": false,
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "layer_norm_eps": 1e-12,
21
+ "max_position_embeddings": 4096,
22
+ "model_type": "big_bird",
23
+ "num_attention_heads": 12,
24
+ "num_hidden_layers": 12,
25
+ "num_random_blocks": 3,
26
+ "pad_token_id": 0,
27
+ "position_embedding_type": "absolute",
28
+ "qa_head": {},
29
+ "rescale_embeddings": false,
30
+ "sep_token_id": 66,
31
+ "torch_dtype": "float32",
32
+ "transformers_version": "4.12.2",
33
+ "type_vocab_size": 2,
34
+ "use_bias": true,
35
+ "use_cache": true,
36
+ "vocab_size": 50358
37
+ }
quote-attribution-qa__big-bird-base__all_results.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_e": 0.0011185682326621924,
4
+ "eval_f1": 0.0030897725029911207,
5
+ "eval_loss": 7.352117538452148,
6
+ "eval_runtime": 82.3342,
7
+ "eval_samples": 3576,
8
+ "eval_samples_per_second": 43.433,
9
+ "eval_steps_per_second": 43.433,
10
+ "test_e": 0.5970357941834452,
11
+ "test_f1": 0.5385527992826207,
12
+ "test_loss": 3.199585199356079,
13
+ "test_runtime": 81.8465,
14
+ "test_samples_per_second": 43.692,
15
+ "test_steps_per_second": 43.692,
16
+ "train_loss": 1.5040293447896245,
17
+ "train_runtime": 3366.7923,
18
+ "train_samples": 10138,
19
+ "train_samples_per_second": 9.034,
20
+ "train_steps_per_second": 9.034
21
+ }
quote-attribution-qa__big-bird-base__config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/bigbird-roberta-base",
3
+ "architectures": [
4
+ "QAModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "attention_type": "original_full",
8
+ "block_size": 64,
9
+ "bos_token_id": 1,
10
+ "classifier_dropout": null,
11
+ "eos_token_id": 2,
12
+ "frozen_layers": null,
13
+ "gradient_checkpointing": false,
14
+ "hidden_act": "gelu_new",
15
+ "hidden_dropout_prob": 0.1,
16
+ "hidden_size": 768,
17
+ "include_nones_as_positives": false,
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "layer_norm_eps": 1e-12,
21
+ "max_position_embeddings": 4096,
22
+ "model_type": "big_bird",
23
+ "num_attention_heads": 12,
24
+ "num_hidden_layers": 12,
25
+ "num_random_blocks": 3,
26
+ "pad_token_id": 0,
27
+ "position_embedding_type": "absolute",
28
+ "qa_head": {},
29
+ "rescale_embeddings": false,
30
+ "sep_token_id": 66,
31
+ "torch_dtype": "float32",
32
+ "transformers_version": "4.12.2",
33
+ "type_vocab_size": 2,
34
+ "use_bias": true,
35
+ "use_cache": true,
36
+ "vocab_size": 50358
37
+ }
quote-detection__roberta-base-sentence__all_results.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_f1": 0.6645335958651243,
4
+ "eval_loss": 0.6792270531125815,
5
+ "eval_runtime": 1.8612,
6
+ "eval_samples": 72,
7
+ "eval_samples_per_second": 38.686,
8
+ "eval_steps_per_second": 38.686,
9
+ "test_f1": 0.8574610244988863,
10
+ "test_loss": 0.41005876731686347,
11
+ "test_runtime": 1.8863,
12
+ "test_samples_per_second": 38.171,
13
+ "test_steps_per_second": 38.171,
14
+ "train_loss": 0.34470706577448884,
15
+ "train_runtime": 143.1766,
16
+ "train_samples": 516,
17
+ "train_samples_per_second": 10.812,
18
+ "train_steps_per_second": 10.812
19
+ }
quote-detection__roberta-base-sentence__config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SentenceClassificationModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classification_head": {
8
+ "num_labels": 1,
9
+ "pooling_method": "average"
10
+ },
11
+ "classifier_dropout": null,
12
+ "eos_token_id": 2,
13
+ "frozen_layers": [
14
+ 0,
15
+ 1,
16
+ 2,
17
+ 3,
18
+ 4,
19
+ 5,
20
+ 6,
21
+ 7,
22
+ 8
23
+ ],
24
+ "hidden_act": "gelu",
25
+ "hidden_dropout_prob": 0.1,
26
+ "hidden_size": 768,
27
+ "initializer_range": 0.02,
28
+ "intermediate_size": 3072,
29
+ "layer_norm_eps": 1e-05,
30
+ "max_position_embeddings": 514,
31
+ "model_type": "roberta",
32
+ "num_attention_heads": 12,
33
+ "num_hidden_layers": 12,
34
+ "pad_token_id": 1,
35
+ "position_embedding_type": "absolute",
36
+ "torch_dtype": "float32",
37
+ "transformers_version": "4.12.2",
38
+ "type_vocab_size": 1,
39
+ "use_cache": true,
40
+ "vocab_size": 50265
41
+ }