Commit ·
8c394a2
1
Parent(s): 45aed31
- big-bird__loss-window-2__all_results.json +21 -0
- big-bird__loss-window-2__config.json +37 -0
- big-bird__loss-window-2__prediction_output.jsonl +0 -0
- big-bird__qa-model__all_results.json +8 -0
- big-bird__qa-model__roberta-large__all_results.json +8 -0
- big-bird__salience-model__all_results.json +21 -0
- big-bird__salience-model__augmented-data__all_results.json +21 -0
- big-bird__salience-model__augmented-data__config.json +37 -0
- big-bird__salience-model__augmented-data__prediction_output.jsonl +0 -0
- big-bird__salience-model__config.json +37 -0
- quote-attribution-qa__big-bird-base__all_results.json +21 -0
- quote-attribution-qa__big-bird-base__config.json +37 -0
- quote-detection__roberta-base-sentence__all_results.json +19 -0
- quote-detection__roberta-base-sentence__config.json +41 -0
big-bird__loss-window-2__all_results.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 3.0,
|
| 3 |
+
"eval_e": 0.0,
|
| 4 |
+
"eval_f1": 0.0019538069857852587,
|
| 5 |
+
"eval_loss": 7.420947551727295,
|
| 6 |
+
"eval_runtime": 82.0111,
|
| 7 |
+
"eval_samples": 3576,
|
| 8 |
+
"eval_samples_per_second": 43.604,
|
| 9 |
+
"eval_steps_per_second": 43.604,
|
| 10 |
+
"test_e": 0.5774608501118568,
|
| 11 |
+
"test_f1": 0.532088049820999,
|
| 12 |
+
"test_loss": 3.9567770957946777,
|
| 13 |
+
"test_runtime": 81.5031,
|
| 14 |
+
"test_samples_per_second": 43.876,
|
| 15 |
+
"test_steps_per_second": 43.876,
|
| 16 |
+
"train_loss": 6.972438398601898,
|
| 17 |
+
"train_runtime": 3412.8799,
|
| 18 |
+
"train_samples": 10138,
|
| 19 |
+
"train_samples_per_second": 8.912,
|
| 20 |
+
"train_steps_per_second": 8.912
|
| 21 |
+
}
|
big-bird__loss-window-2__config.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "google/bigbird-roberta-base",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"QAModel"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"attention_type": "original_full",
|
| 8 |
+
"block_size": 64,
|
| 9 |
+
"bos_token_id": 1,
|
| 10 |
+
"classifier_dropout": null,
|
| 11 |
+
"eos_token_id": 2,
|
| 12 |
+
"freeze_layers": null,
|
| 13 |
+
"gradient_checkpointing": false,
|
| 14 |
+
"hidden_act": "gelu_new",
|
| 15 |
+
"hidden_dropout_prob": 0.1,
|
| 16 |
+
"hidden_size": 768,
|
| 17 |
+
"include_nones_as_positives": false,
|
| 18 |
+
"initializer_range": 0.02,
|
| 19 |
+
"intermediate_size": 3072,
|
| 20 |
+
"layer_norm_eps": 1e-12,
|
| 21 |
+
"max_position_embeddings": 4096,
|
| 22 |
+
"model_type": "big_bird",
|
| 23 |
+
"num_attention_heads": 12,
|
| 24 |
+
"num_hidden_layers": 12,
|
| 25 |
+
"num_random_blocks": 3,
|
| 26 |
+
"pad_token_id": 0,
|
| 27 |
+
"position_embedding_type": "absolute",
|
| 28 |
+
"qa_head": {},
|
| 29 |
+
"rescale_embeddings": false,
|
| 30 |
+
"sep_token_id": 66,
|
| 31 |
+
"torch_dtype": "float32",
|
| 32 |
+
"transformers_version": "4.12.2",
|
| 33 |
+
"type_vocab_size": 2,
|
| 34 |
+
"use_bias": true,
|
| 35 |
+
"use_cache": true,
|
| 36 |
+
"vocab_size": 50358
|
| 37 |
+
}
|
big-bird__loss-window-2__prediction_output.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
big-bird__qa-model__all_results.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"eval_e": 0.0,
|
| 3 |
+
"eval_f1": 0.0026853597694833906,
|
| 4 |
+
"eval_loss": 7.507201671600342,
|
| 5 |
+
"eval_runtime": 82.025,
|
| 6 |
+
"eval_samples_per_second": 43.596,
|
| 7 |
+
"eval_steps_per_second": 43.596
|
| 8 |
+
}
|
big-bird__qa-model__roberta-large__all_results.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"eval_e": 0.0,
|
| 3 |
+
"eval_f1": 0.0025534260097588124,
|
| 4 |
+
"eval_loss": 7.366059303283691,
|
| 5 |
+
"eval_runtime": 216.7168,
|
| 6 |
+
"eval_samples_per_second": 16.501,
|
| 7 |
+
"eval_steps_per_second": 16.501
|
| 8 |
+
}
|
big-bird__salience-model__all_results.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 3.0,
|
| 3 |
+
"eval_e": 0.0,
|
| 4 |
+
"eval_f1": 0.0019965838147566663,
|
| 5 |
+
"eval_loss": 7.391232013702393,
|
| 6 |
+
"eval_runtime": 157.8075,
|
| 7 |
+
"eval_samples": 3576,
|
| 8 |
+
"eval_samples_per_second": 22.661,
|
| 9 |
+
"eval_steps_per_second": 22.661,
|
| 10 |
+
"test_e": 0.5914429530201343,
|
| 11 |
+
"test_f1": 0.5363322750706369,
|
| 12 |
+
"test_loss": 3.4301834106445312,
|
| 13 |
+
"test_runtime": 157.9622,
|
| 14 |
+
"test_samples_per_second": 22.638,
|
| 15 |
+
"test_steps_per_second": 22.638,
|
| 16 |
+
"train_loss": 1.4217724323805765,
|
| 17 |
+
"train_runtime": 5802.1755,
|
| 18 |
+
"train_samples": 10138,
|
| 19 |
+
"train_samples_per_second": 5.242,
|
| 20 |
+
"train_steps_per_second": 5.242
|
| 21 |
+
}
|
big-bird__salience-model__augmented-data__all_results.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 3.0,
|
| 3 |
+
"eval_e": 0.0,
|
| 4 |
+
"eval_f1": 0.0018995468801266367,
|
| 5 |
+
"eval_loss": 7.458363056182861,
|
| 6 |
+
"eval_runtime": 156.6623,
|
| 7 |
+
"eval_samples": 3576,
|
| 8 |
+
"eval_samples_per_second": 22.826,
|
| 9 |
+
"eval_steps_per_second": 22.826,
|
| 10 |
+
"test_e": 0.5584451901565995,
|
| 11 |
+
"test_f1": 0.5157599466272186,
|
| 12 |
+
"test_loss": 3.5660927295684814,
|
| 13 |
+
"test_runtime": 156.6477,
|
| 14 |
+
"test_samples_per_second": 22.828,
|
| 15 |
+
"test_steps_per_second": 22.828,
|
| 16 |
+
"train_loss": 1.9798311326282656,
|
| 17 |
+
"train_runtime": 7385.0011,
|
| 18 |
+
"train_samples": 13605,
|
| 19 |
+
"train_samples_per_second": 5.527,
|
| 20 |
+
"train_steps_per_second": 5.527
|
| 21 |
+
}
|
big-bird__salience-model__augmented-data__config.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "google/bigbird-roberta-base",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"QAModelWithSalience"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"attention_type": "original_full",
|
| 8 |
+
"block_size": 64,
|
| 9 |
+
"bos_token_id": 1,
|
| 10 |
+
"classifier_dropout": null,
|
| 11 |
+
"eos_token_id": 2,
|
| 12 |
+
"freeze_layers": null,
|
| 13 |
+
"gradient_checkpointing": false,
|
| 14 |
+
"hidden_act": "gelu_new",
|
| 15 |
+
"hidden_dropout_prob": 0.1,
|
| 16 |
+
"hidden_size": 768,
|
| 17 |
+
"include_nones_as_positives": false,
|
| 18 |
+
"initializer_range": 0.02,
|
| 19 |
+
"intermediate_size": 3072,
|
| 20 |
+
"layer_norm_eps": 1e-12,
|
| 21 |
+
"max_position_embeddings": 4096,
|
| 22 |
+
"model_type": "big_bird",
|
| 23 |
+
"num_attention_heads": 12,
|
| 24 |
+
"num_hidden_layers": 12,
|
| 25 |
+
"num_random_blocks": 3,
|
| 26 |
+
"pad_token_id": 0,
|
| 27 |
+
"position_embedding_type": "absolute",
|
| 28 |
+
"qa_head": {},
|
| 29 |
+
"rescale_embeddings": false,
|
| 30 |
+
"sep_token_id": 66,
|
| 31 |
+
"torch_dtype": "float32",
|
| 32 |
+
"transformers_version": "4.12.2",
|
| 33 |
+
"type_vocab_size": 2,
|
| 34 |
+
"use_bias": true,
|
| 35 |
+
"use_cache": true,
|
| 36 |
+
"vocab_size": 50358
|
| 37 |
+
}
|
big-bird__salience-model__augmented-data__prediction_output.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
big-bird__salience-model__config.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "google/bigbird-roberta-base",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"QAModelWithSalience"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"attention_type": "original_full",
|
| 8 |
+
"block_size": 64,
|
| 9 |
+
"bos_token_id": 1,
|
| 10 |
+
"classifier_dropout": null,
|
| 11 |
+
"eos_token_id": 2,
|
| 12 |
+
"freeze_layers": null,
|
| 13 |
+
"gradient_checkpointing": false,
|
| 14 |
+
"hidden_act": "gelu_new",
|
| 15 |
+
"hidden_dropout_prob": 0.1,
|
| 16 |
+
"hidden_size": 768,
|
| 17 |
+
"include_nones_as_positives": false,
|
| 18 |
+
"initializer_range": 0.02,
|
| 19 |
+
"intermediate_size": 3072,
|
| 20 |
+
"layer_norm_eps": 1e-12,
|
| 21 |
+
"max_position_embeddings": 4096,
|
| 22 |
+
"model_type": "big_bird",
|
| 23 |
+
"num_attention_heads": 12,
|
| 24 |
+
"num_hidden_layers": 12,
|
| 25 |
+
"num_random_blocks": 3,
|
| 26 |
+
"pad_token_id": 0,
|
| 27 |
+
"position_embedding_type": "absolute",
|
| 28 |
+
"qa_head": {},
|
| 29 |
+
"rescale_embeddings": false,
|
| 30 |
+
"sep_token_id": 66,
|
| 31 |
+
"torch_dtype": "float32",
|
| 32 |
+
"transformers_version": "4.12.2",
|
| 33 |
+
"type_vocab_size": 2,
|
| 34 |
+
"use_bias": true,
|
| 35 |
+
"use_cache": true,
|
| 36 |
+
"vocab_size": 50358
|
| 37 |
+
}
|
quote-attribution-qa__big-bird-base__all_results.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 3.0,
|
| 3 |
+
"eval_e": 0.0011185682326621924,
|
| 4 |
+
"eval_f1": 0.0030897725029911207,
|
| 5 |
+
"eval_loss": 7.352117538452148,
|
| 6 |
+
"eval_runtime": 82.3342,
|
| 7 |
+
"eval_samples": 3576,
|
| 8 |
+
"eval_samples_per_second": 43.433,
|
| 9 |
+
"eval_steps_per_second": 43.433,
|
| 10 |
+
"test_e": 0.5970357941834452,
|
| 11 |
+
"test_f1": 0.5385527992826207,
|
| 12 |
+
"test_loss": 3.199585199356079,
|
| 13 |
+
"test_runtime": 81.8465,
|
| 14 |
+
"test_samples_per_second": 43.692,
|
| 15 |
+
"test_steps_per_second": 43.692,
|
| 16 |
+
"train_loss": 1.5040293447896245,
|
| 17 |
+
"train_runtime": 3366.7923,
|
| 18 |
+
"train_samples": 10138,
|
| 19 |
+
"train_samples_per_second": 9.034,
|
| 20 |
+
"train_steps_per_second": 9.034
|
| 21 |
+
}
|
quote-attribution-qa__big-bird-base__config.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "google/bigbird-roberta-base",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"QAModel"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"attention_type": "original_full",
|
| 8 |
+
"block_size": 64,
|
| 9 |
+
"bos_token_id": 1,
|
| 10 |
+
"classifier_dropout": null,
|
| 11 |
+
"eos_token_id": 2,
|
| 12 |
+
"frozen_layers": null,
|
| 13 |
+
"gradient_checkpointing": false,
|
| 14 |
+
"hidden_act": "gelu_new",
|
| 15 |
+
"hidden_dropout_prob": 0.1,
|
| 16 |
+
"hidden_size": 768,
|
| 17 |
+
"include_nones_as_positives": false,
|
| 18 |
+
"initializer_range": 0.02,
|
| 19 |
+
"intermediate_size": 3072,
|
| 20 |
+
"layer_norm_eps": 1e-12,
|
| 21 |
+
"max_position_embeddings": 4096,
|
| 22 |
+
"model_type": "big_bird",
|
| 23 |
+
"num_attention_heads": 12,
|
| 24 |
+
"num_hidden_layers": 12,
|
| 25 |
+
"num_random_blocks": 3,
|
| 26 |
+
"pad_token_id": 0,
|
| 27 |
+
"position_embedding_type": "absolute",
|
| 28 |
+
"qa_head": {},
|
| 29 |
+
"rescale_embeddings": false,
|
| 30 |
+
"sep_token_id": 66,
|
| 31 |
+
"torch_dtype": "float32",
|
| 32 |
+
"transformers_version": "4.12.2",
|
| 33 |
+
"type_vocab_size": 2,
|
| 34 |
+
"use_bias": true,
|
| 35 |
+
"use_cache": true,
|
| 36 |
+
"vocab_size": 50358
|
| 37 |
+
}
|
quote-detection__roberta-base-sentence__all_results.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 3.0,
|
| 3 |
+
"eval_f1": 0.6645335958651243,
|
| 4 |
+
"eval_loss": 0.6792270531125815,
|
| 5 |
+
"eval_runtime": 1.8612,
|
| 6 |
+
"eval_samples": 72,
|
| 7 |
+
"eval_samples_per_second": 38.686,
|
| 8 |
+
"eval_steps_per_second": 38.686,
|
| 9 |
+
"test_f1": 0.8574610244988863,
|
| 10 |
+
"test_loss": 0.41005876731686347,
|
| 11 |
+
"test_runtime": 1.8863,
|
| 12 |
+
"test_samples_per_second": 38.171,
|
| 13 |
+
"test_steps_per_second": 38.171,
|
| 14 |
+
"train_loss": 0.34470706577448884,
|
| 15 |
+
"train_runtime": 143.1766,
|
| 16 |
+
"train_samples": 516,
|
| 17 |
+
"train_samples_per_second": 10.812,
|
| 18 |
+
"train_steps_per_second": 10.812
|
| 19 |
+
}
|
quote-detection__roberta-base-sentence__config.json
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"SentenceClassificationModel"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"bos_token_id": 0,
|
| 7 |
+
"classification_head": {
|
| 8 |
+
"num_labels": 1,
|
| 9 |
+
"pooling_method": "average"
|
| 10 |
+
},
|
| 11 |
+
"classifier_dropout": null,
|
| 12 |
+
"eos_token_id": 2,
|
| 13 |
+
"frozen_layers": [
|
| 14 |
+
0,
|
| 15 |
+
1,
|
| 16 |
+
2,
|
| 17 |
+
3,
|
| 18 |
+
4,
|
| 19 |
+
5,
|
| 20 |
+
6,
|
| 21 |
+
7,
|
| 22 |
+
8
|
| 23 |
+
],
|
| 24 |
+
"hidden_act": "gelu",
|
| 25 |
+
"hidden_dropout_prob": 0.1,
|
| 26 |
+
"hidden_size": 768,
|
| 27 |
+
"initializer_range": 0.02,
|
| 28 |
+
"intermediate_size": 3072,
|
| 29 |
+
"layer_norm_eps": 1e-05,
|
| 30 |
+
"max_position_embeddings": 514,
|
| 31 |
+
"model_type": "roberta",
|
| 32 |
+
"num_attention_heads": 12,
|
| 33 |
+
"num_hidden_layers": 12,
|
| 34 |
+
"pad_token_id": 1,
|
| 35 |
+
"position_embedding_type": "absolute",
|
| 36 |
+
"torch_dtype": "float32",
|
| 37 |
+
"transformers_version": "4.12.2",
|
| 38 |
+
"type_vocab_size": 1,
|
| 39 |
+
"use_cache": true,
|
| 40 |
+
"vocab_size": 50265
|
| 41 |
+
}
|