| [[36m2024-05-28 23:33:58,508[39m][[34m__main__[39m][[32mINFO[39m] - Save taskmodule to /home/arne/projects/pie-document-level/models/dataset-sciarg/task-ner_re/v0.3/2024-05-28_23-33-46 [push_to_hub=False] | |
| [[36m2024-05-28 23:33:58,512[39m][[34m__main__[39m][[32mINFO[39m] - Starting training! | |
| [[36m2024-05-28 23:33:59,216[39m][[34mpie_modules.document.processing.tokenization[39m][[33mWARNING[39m] - could not convert all annotations from document with id=A02 to token based documents, missed annotations (disable this message with verbose=False): | |
| { | |
| "binary_relations": "{BinaryRelation(head=LabeledSpan(start=23576, end=23668, label='own_claim', score=1.0), tail=LabeledSpan(start=24841, end=25014, label='own_claim', score=1.0), label='semantically_same', score=1.0)}" | |
| } | |
| [[36m2024-05-28 23:33:59,348[39m][[34mpie_modules.document.processing.tokenization[39m][[33mWARNING[39m] - could not convert all annotations from document with id=A06 to token based documents, missed annotations (disable this message with verbose=False): | |
| { | |
| "binary_relations": "{BinaryRelation(head=LabeledSpan(start=38474, end=38482, label='own_claim', score=1.0), tail=LabeledSpan(start=39445, end=39547, label='own_claim', score=1.0), label='parts_of_same', score=1.0)}" | |
| } | |
| [[36m2024-05-28 23:33:59,567[39m][[34mpie_modules.document.processing.tokenization[39m][[33mWARNING[39m] - could not convert all annotations from document with id=A13 to token based documents, missed annotations (disable this message with verbose=False): | |
| { | |
| "binary_relations": "{BinaryRelation(head=LabeledSpan(start=36143, end=36172, label='own_claim', score=1.0), tail=LabeledSpan(start=37284, end=37312, label='own_claim', score=1.0), label='parts_of_same', score=1.0)}" | |
| } | |
| encode inputs: 37%|ββββββββββββββββββββββββββββββββββββββββββββββββββ | 10/27 [00:01<00:02, 7.38it/s] | |
| [[36m2024-05-28 23:34:00,685[39m][[34mpie_modules.document.processing.tokenization[39m][[33mWARNING[39m] - could not convert all annotations from document with id=A22 to token based documents, missed annotations (disable this message with verbose=False): | |
| { | |
| "binary_relations": "{BinaryRelation(head=LabeledSpan(start=34492, end=34542, label='background_claim', score=1.0), tail=LabeledSpan(start=33705, end=33750, label='background_claim', score=1.0), label='supports', score=1.0)}" | |
| } | |
| [[36m2024-05-28 23:34:01,674[39m][[34mpie_modules.document.processing.tokenization[39m][[33mWARNING[39m] - could not convert all annotations from document with id=A01 to token based documents, missed annotations (disable this message with verbose=False): | |
| { | |
| "binary_relations": "{BinaryRelation(head=LabeledSpan(start=9590, end=9619, label='own_claim', score=1.0), tail=LabeledSpan(start=15220, end=15338, label='own_claim', score=1.0), label='semantically_same', score=1.0), BinaryRelation(head=LabeledSpan(start=15220, end=15338, label='own_claim', score=1.0), tail=LabeledSpan(start=15478, end=15498, label='own_claim', score=1.0), label='semantically_same', score=1.0), BinaryRelation(head=LabeledSpan(start=17950, end=17980, label='own_claim', score=1.0), tail=LabeledSpan(start=28960, end=29042, label='own_claim', score=1.0), label='semantically_same', score=1.0), BinaryRelation(head=LabeledSpan(start=5866, end=5919, label='own_claim', score=1.0), tail=LabeledSpan(start=9263, end=9308, label='own_claim', score=1.0), label='semantically_same', score=1.0)}" | |
| } | |
| [[36m2024-05-28 23:34:01,906[39m][[34mpie_modules.document.processing.tokenization[39m][[33mWARNING[39m] - could not convert all annotations from document with id=A20 to token based documents, missed annotations (disable this message with verbose=False): | |
| { | |
| "binary_relations": "{BinaryRelation(head=LabeledSpan(start=10323, end=10462, label='own_claim', score=1.0), tail=LabeledSpan(start=11788, end=11920, label='own_claim', score=1.0), label='semantically_same', score=1.0)}" | |
| } | |
| [[36m2024-05-28 23:34:02,269[39m][[34mpie_modules.document.processing.tokenization[39m][[33mWARNING[39m] - could not convert all annotations from document with id=A29 to token based documents, missed annotations (disable this message with verbose=False): | |
| { | |
| "binary_relations": "{BinaryRelation(head=LabeledSpan(start=19620, end=19626, label='data', score=1.0), tail=LabeledSpan(start=19919, end=19964, label='own_claim', score=1.0), label='supports', score=1.0)}" | |
| encode inputs: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 27/27 [00:03<00:00, 7.83it/s] | |
| encode targets: 0%| | 0/537 [00:00<?, ?it/s] | |
| encode targets: 82%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 442/537 [00:02<00:00, 238.39it/s] | |
| [[36m2024-05-28 23:34:04,875[39m][[34mpie_modules.document.processing.tokenization[39m][[33mWARNING[39m] - could not convert all annotations from document with id=A04 to token based documents, missed annotations (disable this message with verbose=False): | |
| { | |
| "binary_relations": "{BinaryRelation(head=LabeledSpan(start=3205, end=3270, label='background_claim', score=1.0), tail=LabeledSpan(start=7724, end=7814, label='background_claim', score=1.0), label='semantically_same', score=1.0)}" | |
| } | |
| βββββββ³βββββββββββββββββββββββββββββββββββββββββββββββββββββββ³ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ³βββββββββ | |
| β[1m [22mβ[1m Name [22mβ[1m Type [22mβ[1m Params [22mβ | |
| β‘ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ© | |
| β 0 β metric_val β WrappedLayerMetricsWithUnbatchAndDecodeWithErrorsFunction β 0 β | |
| β 1 β metric_val.layer_metrics β ModuleDict β 0 β | |
| β 2 β metric_val.layer_metrics.labeled_spans β PrecisionRecallAndF1ForLabeledAnnotations β 0 β | |
| β 3 β metric_val.layer_metrics.binary_relations β PrecisionRecallAndF1ForLabeledAnnotations β 0 β | |
| β 4 β metric_test β WrappedLayerMetricsWithUnbatchAndDecodeWithErrorsFunction β 0 β | |
| β 5 β metric_test.layer_metrics β ModuleDict β 0 β | |
| β 6 β metric_test.layer_metrics.labeled_spans β PrecisionRecallAndF1ForLabeledAnnotations β 0 β | |
| β 7 β metric_test.layer_metrics.binary_relations β PrecisionRecallAndF1ForLabeledAnnotations β 0 β | |
| β 8 β metric_train β WrappedLayerMetricsWithUnbatchAndDecodeWithErrorsFunction β 0 β | |
| β 9 β metric_train.layer_metrics β ModuleDict β 0 β | |
| β 10 β metric_train.layer_metrics.labeled_spans β PrecisionRecallAndF1ForLabeledAnnotations β 0 β | |
| β 11 β metric_train.layer_metrics.binary_relations β PrecisionRecallAndF1ForLabeledAnnotations β 0 β | |
| β 12 β model β BartAsPointerNetwork β 141 M β | |
| β 13 β model.model β BartModelWithDecoderPositionIds β 139 M β | |
| β 14 β model.model.shared β Embedding β 38.6 M β | |
| β 15 β model.model.encoder β BartEncoder β 81.9 M β | |
| β 16 β model.model.encoder.embed_positions β BartLearnedPositionalEmbedding β 787 K β | |
| β 17 β model.model.encoder.layers β ModuleList β 42.5 M β | |
| β 18 β model.model.encoder.layers.0 β BartEncoderLayer β 7.1 M β | |
| β 19 β model.model.encoder.layers.0.self_attn β BartSdpaAttention β 2.4 M β | |
| β 20 β model.model.encoder.layers.0.self_attn.k_proj β Linear β 590 K β | |
| β 21 β model.model.encoder.layers.0.self_attn.v_proj β Linear β 590 K β | |
| β 22 β model.model.encoder.layers.0.self_attn.q_proj β Linear β 590 K β | |
| β 23 β model.model.encoder.layers.0.self_attn.out_proj β Linear β 590 K β | |
| β 24 β model.model.encoder.layers.0.self_attn_layer_norm β LayerNorm β 1.5 K β | |
| β 25 β model.model.encoder.layers.0.activation_fn β GELUActivation β 0 β | |
| β 26 β model.model.encoder.layers.0.fc1 β Linear β 2.4 M β | |
| β 27 β model.model.encoder.layers.0.fc2 β Linear β 2.4 M β | |
| β 28 β model.model.encoder.layers.0.final_layer_norm β LayerNorm β 1.5 K β | |
| β 29 β model.model.encoder.layers.1 β BartEncoderLayer β 7.1 M β | |
| β 30 β model.model.encoder.layers.1.self_attn β BartSdpaAttention β 2.4 M β | |
| β 31 β model.model.encoder.layers.1.self_attn.k_proj β Linear β 590 K β | |
| β 32 β model.model.encoder.layers.1.self_attn.v_proj β Linear β 590 K β | |
| β 33 β model.model.encoder.layers.1.self_attn.q_proj β Linear β 590 K β | |
| β 34 β model.model.encoder.layers.1.self_attn.out_proj β Linear β 590 K β | |
| β 35 β model.model.encoder.layers.1.self_attn_layer_norm β LayerNorm β 1.5 K β | |
| β 36 β model.model.encoder.layers.1.activation_fn β GELUActivation β 0 β | |
| β 37 β model.model.encoder.layers.1.fc1 β Linear β 2.4 M β | |
| β 38 β model.model.encoder.layers.1.fc2 β Linear β 2.4 M β | |
| β 39 β model.model.encoder.layers.1.final_layer_norm β LayerNorm β 1.5 K β | |
| β 40 β model.model.encoder.layers.2 β BartEncoderLayer β 7.1 M β | |
| β 41 β model.model.encoder.layers.2.self_attn β BartSdpaAttention β 2.4 M β | |
| β 42 β model.model.encoder.layers.2.self_attn.k_proj β Linear β 590 K β | |
| β 43 β model.model.encoder.layers.2.self_attn.v_proj β Linear β 590 K β | |
| β 44 β model.model.encoder.layers.2.self_attn.q_proj β Linear β 590 K β | |
| β 45 β model.model.encoder.layers.2.self_attn.out_proj β Linear β 590 K β | |
| β 46 β model.model.encoder.layers.2.self_attn_layer_norm β LayerNorm β 1.5 K β | |
| β 47 β model.model.encoder.layers.2.activation_fn β GELUActivation β 0 β | |
| β 48 β model.model.encoder.layers.2.fc1 β Linear β 2.4 M β | |
| β 49 β model.model.encoder.layers.2.fc2 β Linear β 2.4 M β | |
| β 50 β model.model.encoder.layers.2.final_layer_norm β LayerNorm β 1.5 K β | |
| β 51 β model.model.encoder.layers.3 β BartEncoderLayer β 7.1 M β | |
| β 52 β model.model.encoder.layers.3.self_attn β BartSdpaAttention β 2.4 M β | |
| β 53 β model.model.encoder.layers.3.self_attn.k_proj β Linear β 590 K β | |
| β 54 β model.model.encoder.layers.3.self_attn.v_proj β Linear β 590 K β | |
| β 55 β model.model.encoder.layers.3.self_attn.q_proj β Linear β 590 K β | |
| β 56 β model.model.encoder.layers.3.self_attn.out_proj β Linear β 590 K β | |
| β 57 β model.model.encoder.layers.3.self_attn_layer_norm β LayerNorm β 1.5 K β | |
| β 58 β model.model.encoder.layers.3.activation_fn β GELUActivation β 0 β | |
| β 59 β model.model.encoder.layers.3.fc1 β Linear β 2.4 M β | |
| β 60 β model.model.encoder.layers.3.fc2 β Linear β 2.4 M β | |
| β 61 β model.model.encoder.layers.3.final_layer_norm β LayerNorm β 1.5 K β | |
| β 62 β model.model.encoder.layers.4 β BartEncoderLayer β 7.1 M β | |
| β 63 β model.model.encoder.layers.4.self_attn β BartSdpaAttention β 2.4 M β | |
| β 64 β model.model.encoder.layers.4.self_attn.k_proj β Linear β 590 K β | |
| β 65 β model.model.encoder.layers.4.self_attn.v_proj β Linear β 590 K β | |
| β 66 β model.model.encoder.layers.4.self_attn.q_proj β Linear β 590 K β | |
| β 67 β model.model.encoder.layers.4.self_attn.out_proj β Linear β 590 K β | |
| β 68 β model.model.encoder.layers.4.self_attn_layer_norm β LayerNorm β 1.5 K β | |
| β 69 β model.model.encoder.layers.4.activation_fn β GELUActivation β 0 β | |
| β 70 β model.model.encoder.layers.4.fc1 β Linear β 2.4 M β | |
| β 71 β model.model.encoder.layers.4.fc2 β Linear β 2.4 M β | |
| β 72 β model.model.encoder.layers.4.final_layer_norm β LayerNorm β 1.5 K β | |
| β 73 β model.model.encoder.layers.5 β BartEncoderLayer β 7.1 M β | |
| β 74 β model.model.encoder.layers.5.self_attn β BartSdpaAttention β 2.4 M β | |
| β 75 β model.model.encoder.layers.5.self_attn.k_proj β Linear β 590 K β | |
| β 76 β model.model.encoder.layers.5.self_attn.v_proj β Linear β 590 K β | |
| β 77 β model.model.encoder.layers.5.self_attn.q_proj β Linear β 590 K β | |
| β 78 β model.model.encoder.layers.5.self_attn.out_proj β Linear β 590 K β | |
| β 79 β model.model.encoder.layers.5.self_attn_layer_norm β LayerNorm β 1.5 K β | |
| β 80 β model.model.encoder.layers.5.activation_fn β GELUActivation β 0 β | |
| β 81 β model.model.encoder.layers.5.fc1 β Linear β 2.4 M β | |
| β 82 β model.model.encoder.layers.5.fc2 β Linear β 2.4 M β | |
| β 83 β model.model.encoder.layers.5.final_layer_norm β LayerNorm β 1.5 K β | |
| β 84 β model.model.encoder.layernorm_embedding β LayerNorm β 1.5 K β | |
| β 85 β model.model.decoder β BartDecoderWithPositionIds β 96.1 M β | |
| β 86 β model.model.decoder.embed_positions β BartLearnedPositionalEmbeddingWithPositionIds β 787 K β | |
| β 87 β model.model.decoder.layers β ModuleList β 56.7 M β | |
| β 88 β model.model.decoder.layers.0 β BartDecoderLayer β 9.5 M β | |
| β 89 β model.model.decoder.layers.0.self_attn β BartSdpaAttention β 2.4 M β | |
| β 90 β model.model.decoder.layers.0.self_attn.k_proj β Linear β 590 K β | |
| β 91 β model.model.decoder.layers.0.self_attn.v_proj β Linear β 590 K β | |
| β 92 β model.model.decoder.layers.0.self_attn.q_proj β Linear β 590 K β | |
| β 93 β model.model.decoder.layers.0.self_attn.out_proj β Linear β 590 K β | |
| β 94 β model.model.decoder.layers.0.activation_fn β GELUActivation β 0 β | |
| β 95 β model.model.decoder.layers.0.self_attn_layer_norm β LayerNorm β 1.5 K β | |
| β 96 β model.model.decoder.layers.0.encoder_attn β BartSdpaAttention β 2.4 M β | |
| β 97 β model.model.decoder.layers.0.encoder_attn.k_proj β Linear β 590 K β | |
| β 98 β model.model.decoder.layers.0.encoder_attn.v_proj β Linear β 590 K β | |
| β 99 β model.model.decoder.layers.0.encoder_attn.q_proj β Linear β 590 K β | |
| β 100 β model.model.decoder.layers.0.encoder_attn.out_proj β Linear β 590 K β | |
| β 101 β model.model.decoder.layers.0.encoder_attn_layer_norm β LayerNorm β 1.5 K β | |
| β 102 β model.model.decoder.layers.0.fc1 β Linear β 2.4 M β | |
| β 103 β model.model.decoder.layers.0.fc2 β Linear β 2.4 M β | |
| β 104 β model.model.decoder.layers.0.final_layer_norm β LayerNorm β 1.5 K β | |
| β 105 β model.model.decoder.layers.1 β BartDecoderLayer β 9.5 M β | |
| β 106 β model.model.decoder.layers.1.self_attn β BartSdpaAttention β 2.4 M β | |
| β 107 β model.model.decoder.layers.1.self_attn.k_proj β Linear β 590 K β | |
| β 108 β model.model.decoder.layers.1.self_attn.v_proj β Linear β 590 K β | |
| β 109 β model.model.decoder.layers.1.self_attn.q_proj β Linear β 590 K β | |
| β 110 β model.model.decoder.layers.1.self_attn.out_proj β Linear β 590 K β | |
| β 111 β model.model.decoder.layers.1.activation_fn β GELUActivation β 0 β | |
| β 112 β model.model.decoder.layers.1.self_attn_layer_norm β LayerNorm β 1.5 K β | |
| β 113 β model.model.decoder.layers.1.encoder_attn β BartSdpaAttention β 2.4 M β | |
| β 114 β model.model.decoder.layers.1.encoder_attn.k_proj β Linear β 590 K β | |
| β 115 β model.model.decoder.layers.1.encoder_attn.v_proj β Linear β 590 K β | |
| β 116 β model.model.decoder.layers.1.encoder_attn.q_proj β Linear β 590 K β | |
| β 117 β model.model.decoder.layers.1.encoder_attn.out_proj β Linear β 590 K β | |
| β 118 β model.model.decoder.layers.1.encoder_attn_layer_norm β LayerNorm β 1.5 K β | |
| β 119 β model.model.decoder.layers.1.fc1 β Linear β 2.4 M β | |
| β 120 β model.model.decoder.layers.1.fc2 β Linear β 2.4 M β | |
| β 121 β model.model.decoder.layers.1.final_layer_norm β LayerNorm β 1.5 K β | |
| β 122 β model.model.decoder.layers.2 β BartDecoderLayer β 9.5 M β | |
| β 123 β model.model.decoder.layers.2.self_attn β BartSdpaAttention β 2.4 M β | |
| β 124 β model.model.decoder.layers.2.self_attn.k_proj β Linear β 590 K β | |
| β 125 β model.model.decoder.layers.2.self_attn.v_proj β Linear β 590 K β | |
| β 126 β model.model.decoder.layers.2.self_attn.q_proj β Linear β 590 K β | |
| β 127 β model.model.decoder.layers.2.self_attn.out_proj β Linear β 590 K β | |
| β 128 β model.model.decoder.layers.2.activation_fn β GELUActivation β 0 β | |
| β 129 β model.model.decoder.layers.2.self_attn_layer_norm β LayerNorm β 1.5 K β | |
| β 130 β model.model.decoder.layers.2.encoder_attn β BartSdpaAttention β 2.4 M β | |
| β 131 β model.model.decoder.layers.2.encoder_attn.k_proj β Linear β 590 K β | |
| β 132 β model.model.decoder.layers.2.encoder_attn.v_proj β Linear β 590 K β | |
| β 133 β model.model.decoder.layers.2.encoder_attn.q_proj β Linear β 590 K β | |
| β 134 β model.model.decoder.layers.2.encoder_attn.out_proj β Linear β 590 K β | |
| β 135 β model.model.decoder.layers.2.encoder_attn_layer_norm β LayerNorm β 1.5 K β | |
| β 136 β model.model.decoder.layers.2.fc1 β Linear β 2.4 M β | |
| β 137 β model.model.decoder.layers.2.fc2 β Linear β 2.4 M β | |
| β 138 β model.model.decoder.layers.2.final_layer_norm β LayerNorm β 1.5 K β | |
| β 139 β model.model.decoder.layers.3 β BartDecoderLayer β 9.5 M β | |
| β 140 β model.model.decoder.layers.3.self_attn β BartSdpaAttention β 2.4 M β | |
| β 141 β model.model.decoder.layers.3.self_attn.k_proj β Linear β 590 K β | |
| β 142 β model.model.decoder.layers.3.self_attn.v_proj β Linear β 590 K β | |
| β 143 β model.model.decoder.layers.3.self_attn.q_proj β Linear β 590 K β | |
| β 144 β model.model.decoder.layers.3.self_attn.out_proj β Linear β 590 K β | |
| β 145 β model.model.decoder.layers.3.activation_fn β GELUActivation β 0 β | |
| β 146 β model.model.decoder.layers.3.self_attn_layer_norm β LayerNorm β 1.5 K β | |
| β 147 β model.model.decoder.layers.3.encoder_attn β BartSdpaAttention β 2.4 M β | |
| β 148 β model.model.decoder.layers.3.encoder_attn.k_proj β Linear β 590 K β | |
| β 149 β model.model.decoder.layers.3.encoder_attn.v_proj β Linear β 590 K β | |
| β 150 β model.model.decoder.layers.3.encoder_attn.q_proj β Linear β 590 K β | |
| β 151 β model.model.decoder.layers.3.encoder_attn.out_proj β Linear β 590 K β | |
| β 152 β model.model.decoder.layers.3.encoder_attn_layer_norm β LayerNorm β 1.5 K β | |
| β 153 β model.model.decoder.layers.3.fc1 β Linear β 2.4 M β | |
| β 154 β model.model.decoder.layers.3.fc2 β Linear β 2.4 M β | |
| β 155 β model.model.decoder.layers.3.final_layer_norm β LayerNorm β 1.5 K β | |
| β 156 β model.model.decoder.layers.4 β BartDecoderLayer β 9.5 M β | |
| β 157 β model.model.decoder.layers.4.self_attn β BartSdpaAttention β 2.4 M β | |
| β 158 β model.model.decoder.layers.4.self_attn.k_proj β Linear β 590 K β | |
| β 159 β model.model.decoder.layers.4.self_attn.v_proj β Linear β 590 K β | |
| β 160 β model.model.decoder.layers.4.self_attn.q_proj β Linear β 590 K β | |
| β 161 β model.model.decoder.layers.4.self_attn.out_proj β Linear β 590 K β | |
| β 162 β model.model.decoder.layers.4.activation_fn β GELUActivation β 0 β | |
| β 163 β model.model.decoder.layers.4.self_attn_layer_norm β LayerNorm β 1.5 K β | |
| β 164 β model.model.decoder.layers.4.encoder_attn β BartSdpaAttention β 2.4 M β | |
| β 165 β model.model.decoder.layers.4.encoder_attn.k_proj β Linear β 590 K β | |
| β 166 β model.model.decoder.layers.4.encoder_attn.v_proj β Linear β 590 K β | |
| β 167 β model.model.decoder.layers.4.encoder_attn.q_proj β Linear β 590 K β | |
| β 168 β model.model.decoder.layers.4.encoder_attn.out_proj β Linear β 590 K β | |
| β 169 β model.model.decoder.layers.4.encoder_attn_layer_norm β LayerNorm β 1.5 K β | |
| β 170 β model.model.decoder.layers.4.fc1 β Linear β 2.4 M β | |
| β 171 β model.model.decoder.layers.4.fc2 β Linear β 2.4 M β | |
| β 172 β model.model.decoder.layers.4.final_layer_norm β LayerNorm β 1.5 K β | |
| β 173 β model.model.decoder.layers.5 β BartDecoderLayer β 9.5 M β | |
| β 174 β model.model.decoder.layers.5.self_attn β BartSdpaAttention β 2.4 M β | |
| β 175 β model.model.decoder.layers.5.self_attn.k_proj β Linear β 590 K β | |
| β 176 β model.model.decoder.layers.5.self_attn.v_proj β Linear β 590 K β | |
| β 177 β model.model.decoder.layers.5.self_attn.q_proj β Linear β 590 K β | |
| β 178 β model.model.decoder.layers.5.self_attn.out_proj β Linear β 590 K β | |
| β 179 β model.model.decoder.layers.5.activation_fn β GELUActivation β 0 β | |
| β 180 β model.model.decoder.layers.5.self_attn_layer_norm β LayerNorm β 1.5 K β | |
| β 181 β model.model.decoder.layers.5.encoder_attn β BartSdpaAttention β 2.4 M β | |
| β 182 β model.model.decoder.layers.5.encoder_attn.k_proj β Linear β 590 K β | |
| β 183 β model.model.decoder.layers.5.encoder_attn.v_proj β Linear β 590 K β | |
| β 184 β model.model.decoder.layers.5.encoder_attn.q_proj β Linear β 590 K β | |
| β 185 β model.model.decoder.layers.5.encoder_attn.out_proj β Linear β 590 K β | |
| β 186 β model.model.decoder.layers.5.encoder_attn_layer_norm β LayerNorm β 1.5 K β | |
| β 187 β model.model.decoder.layers.5.fc1 β Linear β 2.4 M β | |
| β 188 β model.model.decoder.layers.5.fc2 β Linear β 2.4 M β | |
| β 189 β model.model.decoder.layers.5.final_layer_norm β LayerNorm β 1.5 K β | |
| β 190 β model.model.decoder.layernorm_embedding β LayerNorm β 1.5 K β | |
| β 191 β model.pointer_head β PointerHead β 41.0 M β | |
| β 192 β model.pointer_head.encoder_mlp β Sequential β 1.2 M β | |
| β 193 β model.pointer_head.encoder_mlp.0 β Linear β 590 K β | |
| β 194 β model.pointer_head.encoder_mlp.1 β Dropout β 0 β | |
| β 195 β model.pointer_head.encoder_mlp.2 β ReLU β 0 β | |
| β 196 β model.pointer_head.encoder_mlp.3 β Linear β 590 K β | |
| β 197 β model.pointer_head.constraints_encoder_mlp β Sequential β 1.2 M β | |
| β 198 β model.pointer_head.constraints_encoder_mlp.0 β Linear β 590 K β | |
| β 199 β model.pointer_head.constraints_encoder_mlp.1 β Dropout β 0 β | |
| β 200 β model.pointer_head.constraints_encoder_mlp.2 β ReLU β 0 β | |
| β 201 β model.pointer_head.constraints_encoder_mlp.3 β Linear β 590 K β | |
| βββββββ΄βββββββββββββββββββββββββββββββββββββββββββββββββββββββ΄ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ΄βββββββββ | |
| [1mTrainable params[22m: 141 M | |
| [1mNon-trainable params[22m: 0 | |
| [1mTotal params[22m: 141 M | |
| [1mTotal estimated model params size (MB)[22m: 567 | |
| /home/arne/miniconda3/envs/pie-document-level/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers | |
| which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance. | |
| /home/arne/miniconda3/envs/pie-document-level/lib/python3.9/site-packages/transformers/generation/utils.py:1197: UserWarning: You have modified the pretrained model configuration to | |
| control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use and modify the model generation configuration (see | |
| https://huggingface.co/docs/transformers/generation_strategies#default-text-generation-configuration ) | |
| encode targets: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 537/537 [00:02<00:00, 222.45it/s] | |
| encode inputs: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 4/4 [00:00<00:00, 10.53it/s] | |
| encode targets: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 72/72 [00:00<00:00, 231.24it/s] | |
| LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0] | |
| Loading `train_dataloader` to estimate number of stepping batches. | |
| /home/arne/miniconda3/envs/pie-document-level/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance. | |
| [37mEpoch 0/149[39m [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.82it/s[39m [37mv_num: 91di loss/train_step: 5.348 | |
| Epoch 1/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.89it/s[39m [37mv_num: 91di loss/train_step: 4.651 loss/val: 5.071 loss/train_epoch: 5.349 | |
| Epoch 2/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.84it/s[39m [37mv_num: 91di loss/train_step: 5.185 loss/val: 3.677 loss/train_epoch: 4.854 | |
| Epoch 3/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.85it/s[39m [37mv_num: 91di loss/train_step: 3.813 loss/val: 3.471 loss/train_epoch: 4.415 | |
| Epoch 4/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.72it/s[39m [37mv_num: 91di loss/train_step: 2.717 loss/val: 2.595 loss/train_epoch: 4.252 | |
| Epoch 5/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.82it/s[39m [37mv_num: 91di loss/train_step: 3.089 loss/val: 2.219 loss/train_epoch: 3.127 | |
| Epoch 6/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.72it/s[39m [37mv_num: 91di loss/train_step: 3.199 loss/val: 2.075 loss/train_epoch: 2.683 | |
| Epoch 7/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.82it/s[39m [37mv_num: 91di loss/train_step: 2.419 loss/val: 1.917 loss/train_epoch: 2.521 | |
| Epoch 8/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.89it/s[39m [37mv_num: 91di loss/train_step: 2.774 loss/val: 1.801 loss/train_epoch: 2.188 | |
| Epoch 9/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.82it/s[39m [37mv_num: 91di loss/train_step: 0.161 loss/val: 1.802 loss/train_epoch: 2.138 | |
| Epoch 10/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.95it/s[39m [37mv_num: 91di loss/train_step: 2.037 loss/val: 1.605 loss/train_epoch: 2.026 | |
| Epoch 11/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.88it/s[39m [37mv_num: 91di loss/train_step: 2.007 loss/val: 1.410 loss/train_epoch: 1.764 | |
| Epoch 12/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.76it/s[39m [37mv_num: 91di loss/train_step: 1.451 loss/val: 1.374 loss/train_epoch: 1.689 | |
| Epoch 13/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.92it/s[39m [37mv_num: 91di loss/train_step: 0.793 loss/val: 1.232 loss/train_epoch: 1.381 | |
| Epoch 14/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.78it/s[39m [37mv_num: 91di loss/train_step: 1.384 loss/val: 1.248 loss/train_epoch: 1.386 | |
| Epoch 15/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.90it/s[39m [37mv_num: 91di loss/train_step: 1.072 loss/val: 1.184 loss/train_epoch: 1.141 | |
| Epoch 16/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.98it/s[39m [37mv_num: 91di loss/train_step: 0.182 loss/val: 1.105 loss/train_epoch: 1.041 | |
| Epoch 17/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.78it/s[39m [37mv_num: 91di loss/train_step: 0.782 loss/val: 1.127 loss/train_epoch: 0.931 | |
| Epoch 18/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.84it/s[39m [37mv_num: 91di loss/train_step: 0.855 loss/val: 1.123 loss/train_epoch: 0.865 | |
| Epoch 19/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.64it/s[39m [37mv_num: 91di loss/train_step: 0.745 loss/val: 1.117 loss/train_epoch: 0.771 | |
| Epoch 20/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m6.08it/s[39m [37mv_num: 91di loss/train_step: 1.241 loss/val: 1.065 loss/train_epoch: 0.750 | |
| Epoch 21/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.81it/s[39m [37mv_num: 91di loss/train_step: 0.645 loss/val: 1.074 loss/train_epoch: 0.737 | |
| Epoch 22/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.75it/s[39m [37mv_num: 91di loss/train_step: 0.422 loss/val: 1.103 loss/train_epoch: 0.682 | |
| Epoch 23/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.78it/s[39m [37mv_num: 91di loss/train_step: 0.125 loss/val: 1.159 loss/train_epoch: 0.613 | |
| Epoch 24/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.79it/s[39m [37mv_num: 91di loss/train_step: 0.679 loss/val: 1.104 loss/train_epoch: 0.603 | |
| Epoch 25/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.72it/s[39m [37mv_num: 91di loss/train_step: 0.399 loss/val: 1.202 loss/train_epoch: 0.538 | |
| Epoch 26/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.85it/s[39m [37mv_num: 91di loss/train_step: 0.559 loss/val: 1.146 loss/train_epoch: 0.538 | |
| Epoch 27/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.75it/s[39m [37mv_num: 91di loss/train_step: 0.696 loss/val: 1.154 loss/train_epoch: 0.492 | |
| Epoch 28/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.89it/s[39m [37mv_num: 91di loss/train_step: 0.314 loss/val: 1.223 loss/train_epoch: 0.452 | |
| Epoch 29/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.84it/s[39m [37mv_num: 91di loss/train_step: 0.831 loss/val: 1.245 loss/train_epoch: 0.417 | |
| Epoch 30/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.74it/s[39m [37mv_num: 91di loss/train_step: 0.603 loss/val: 1.228 loss/train_epoch: 0.441 | |
| Epoch 31/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.87it/s[39m [37mv_num: 91di loss/train_step: 0.166 loss/val: 1.257 loss/train_epoch: 0.410 | |
| Epoch 32/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.77it/s[39m [37mv_num: 91di loss/train_step: 0.080 loss/val: 1.371 loss/train_epoch: 0.369 | |
| Epoch 33/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.90it/s[39m [37mv_num: 91di loss/train_step: 0.463 loss/val: 1.380 loss/train_epoch: 0.347 | |
| Epoch 34/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.98it/s[39m [37mv_num: 91di loss/train_step: 0.130 loss/val: 1.409 loss/train_epoch: 0.338 | |
| Epoch 35/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.98it/s[39m [37mv_num: 91di loss/train_step: 0.221 loss/val: 1.524 loss/train_epoch: 0.316 | |
| Epoch 36/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.90it/s[39m [37mv_num: 91di loss/train_step: 0.195 loss/val: 1.582 loss/train_epoch: 0.303 | |
| Epoch 37/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.85it/s[39m [37mv_num: 91di loss/train_step: 0.112 loss/val: 1.523 loss/train_epoch: 0.303 | |
| Epoch 38/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.86it/s[39m [37mv_num: 91di loss/train_step: 0.649 loss/val: 1.394 loss/train_epoch: 0.303 | |
| Epoch 39/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.77it/s[39m [37mv_num: 91di loss/train_step: 0.247 loss/val: 1.421 loss/train_epoch: 0.293 | |
| Epoch 40/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.74it/s[39m [37mv_num: 91di loss/train_step: 0.129 loss/val: 1.397 loss/train_epoch: 0.283 | |
| Epoch 41/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.89it/s[39m [37mv_num: 91di loss/train_step: 0.269 loss/val: 1.445 loss/train_epoch: 0.247 | |
| Epoch 42/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.79it/s[39m [37mv_num: 91di loss/train_step: 0.187 loss/val: 1.516 loss/train_epoch: 0.245 | |
| Epoch 43/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.94it/s[39m [37mv_num: 91di loss/train_step: 0.422 loss/val: 1.480 loss/train_epoch: 0.238 | |
| Epoch 44/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.91it/s[39m [37mv_num: 91di loss/train_step: 0.315 loss/val: 1.524 loss/train_epoch: 0.227 | |
| Epoch 45/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.94it/s[39m [37mv_num: 91di loss/train_step: 0.203 loss/val: 1.493 loss/train_epoch: 0.223 | |
| Epoch 46/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.79it/s[39m [37mv_num: 91di loss/train_step: 0.097 loss/val: 1.513 loss/train_epoch: 0.228 | |
| Epoch 47/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.79it/s[39m [37mv_num: 91di loss/train_step: 0.202 loss/val: 1.626 loss/train_epoch: 0.215 | |
| Epoch 48/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.86it/s[39m [37mv_num: 91di loss/train_step: 0.198 loss/val: 1.830 loss/train_epoch: 0.196 | |
| Epoch 49/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.72it/s[39m [37mv_num: 91di loss/train_step: 0.238 loss/val: 1.544 loss/train_epoch: 0.193 | |
| Epoch 50/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.71it/s[39m [37mv_num: 91di loss/train_step: 0.188 loss/val: 1.625 loss/train_epoch: 0.183 | |
| Epoch 51/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.89it/s[39m [37mv_num: 91di loss/train_step: 0.089 loss/val: 1.623 loss/train_epoch: 0.197 | |
| Epoch 52/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.82it/s[39m [37mv_num: 91di loss/train_step: 0.090 loss/val: 1.546 loss/train_epoch: 0.212 | |
| Epoch 53/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.78it/s[39m [37mv_num: 91di loss/train_step: 0.216 loss/val: 1.505 loss/train_epoch: 0.199 | |
| Epoch 54/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.85it/s[39m [37mv_num: 91di loss/train_step: 0.201 loss/val: 1.678 loss/train_epoch: 0.162 | |
| Epoch 55/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.90it/s[39m [37mv_num: 91di loss/train_step: 0.064 loss/val: 1.652 loss/train_epoch: 0.154 | |
| Epoch 56/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.91it/s[39m [37mv_num: 91di loss/train_step: 0.041 loss/val: 1.553 loss/train_epoch: 0.157 | |
| Epoch 57/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.86it/s[39m [37mv_num: 91di loss/train_step: 0.217 loss/val: 1.675 loss/train_epoch: 0.148 | |
| Epoch 58/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.75it/s[39m [37mv_num: 91di loss/train_step: 0.113 loss/val: 1.723 loss/train_epoch: 0.139 | |
| Epoch 59/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.77it/s[39m [37mv_num: 91di loss/train_step: 0.104 loss/val: 1.795 loss/train_epoch: 0.136 | |
| Epoch 60/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.91it/s[39m [37mv_num: 91di loss/train_step: 0.069 loss/val: 1.769 loss/train_epoch: 0.133 | |
| Epoch 61/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m6.06it/s[39m [37mv_num: 91di loss/train_step: 0.127 loss/val: 1.640 loss/train_epoch: 0.133 | |
| Epoch 62/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.82it/s[39m [37mv_num: 91di loss/train_step: 0.246 loss/val: 1.746 loss/train_epoch: 0.146 | |
| Epoch 63/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.79it/s[39m [37mv_num: 91di loss/train_step: 0.629 loss/val: 1.645 loss/train_epoch: 0.148 | |
| Epoch 64/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.80it/s[39m [37mv_num: 91di loss/train_step: 0.111 loss/val: 1.741 loss/train_epoch: 0.131 | |
| Epoch 65/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.87it/s[39m [37mv_num: 91di loss/train_step: 0.233 loss/val: 1.862 loss/train_epoch: 0.121 | |
| Epoch 66/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.96it/s[39m [37mv_num: 91di loss/train_step: 0.073 loss/val: 1.758 loss/train_epoch: 0.145 | |
| Epoch 67/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.99it/s[39m [37mv_num: 91di loss/train_step: 0.028 loss/val: 1.794 loss/train_epoch: 0.137 | |
| Epoch 68/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.83it/s[39m [37mv_num: 91di loss/train_step: 0.026 loss/val: 1.717 loss/train_epoch: 0.132 | |
| Epoch 69/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.86it/s[39m [37mv_num: 91di loss/train_step: 0.123 loss/val: 1.874 loss/train_epoch: 0.113 | |
| Epoch 70/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.80it/s[39m [37mv_num: 91di loss/train_step: 0.045 loss/val: 1.794 loss/train_epoch: 0.124 | |
| Epoch 71/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.87it/s[39m [37mv_num: 91di loss/train_step: 0.100 loss/val: 1.785 loss/train_epoch: 0.119 | |
| Epoch 72/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.91it/s[39m [37mv_num: 91di loss/train_step: 0.034 loss/val: 1.798 loss/train_epoch: 0.107 | |
| Epoch 73/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.87it/s[39m [37mv_num: 91di loss/train_step: 0.105 loss/val: 1.776 loss/train_epoch: 0.105 | |
| Epoch 74/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.93it/s[39m [37mv_num: 91di loss/train_step: 0.168 loss/val: 1.799 loss/train_epoch: 0.129 | |
| Epoch 75/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.79it/s[39m [37mv_num: 91di loss/train_step: 0.065 loss/val: 1.889 loss/train_epoch: 0.105 | |
| Epoch 76/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.87it/s[39m [37mv_num: 91di loss/train_step: 0.035 loss/val: 1.877 loss/train_epoch: 0.109 | |
| Epoch 77/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.83it/s[39m [37mv_num: 91di loss/train_step: 0.078 loss/val: 1.928 loss/train_epoch: 0.097 | |
| Epoch 78/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.88it/s[39m [37mv_num: 91di loss/train_step: 0.131 loss/val: 1.931 loss/train_epoch: 0.109 | |
| Epoch 79/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.70it/s[39m [37mv_num: 91di loss/train_step: 0.099 loss/val: 1.792 loss/train_epoch: 0.114 | |
| Epoch 80/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.74it/s[39m [37mv_num: 91di loss/train_step: 0.059 loss/val: 1.861 loss/train_epoch: 0.114 | |
| Epoch 81/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.85it/s[39m [37mv_num: 91di loss/train_step: 0.023 loss/val: 1.913 loss/train_epoch: 0.104 | |
| Epoch 82/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.87it/s[39m [37mv_num: 91di loss/train_step: 0.044 loss/val: 1.905 loss/train_epoch: 0.089 | |
| Epoch 83/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.74it/s[39m [37mv_num: 91di loss/train_step: 0.072 loss/val: 1.943 loss/train_epoch: 0.091 | |
| Epoch 84/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.78it/s[39m [37mv_num: 91di loss/train_step: 0.083 loss/val: 1.943 loss/train_epoch: 0.089 | |
| Epoch 85/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.80it/s[39m [37mv_num: 91di loss/train_step: 0.039 loss/val: 1.942 loss/train_epoch: 0.100 | |
| Epoch 86/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.83it/s[39m [37mv_num: 91di loss/train_step: 0.040 loss/val: 1.911 loss/train_epoch: 0.080 | |
| Epoch 87/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.76it/s[39m [37mv_num: 91di loss/train_step: 0.056 loss/val: 1.917 loss/train_epoch: 0.088 | |
| Epoch 88/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.85it/s[39m [37mv_num: 91di loss/train_step: 0.113 loss/val: 1.911 loss/train_epoch: 0.084 | |
| Epoch 89/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.81it/s[39m [37mv_num: 91di loss/train_step: 0.035 loss/val: 1.832 loss/train_epoch: 0.096 | |
| Epoch 90/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.96it/s[39m [37mv_num: 91di loss/train_step: 0.200 loss/val: 1.940 loss/train_epoch: 0.078 | |
| Epoch 91/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.84it/s[39m [37mv_num: 91di loss/train_step: 0.037 loss/val: 1.810 loss/train_epoch: 0.152 | |
| Epoch 92/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.86it/s[39m [37mv_num: 91di loss/train_step: 0.061 loss/val: 1.882 loss/train_epoch: 0.097 | |
| Epoch 93/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.86it/s[39m [37mv_num: 91di loss/train_step: 0.032 loss/val: 1.924 loss/train_epoch: 0.086 | |
| Epoch 94/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.78it/s[39m [37mv_num: 91di loss/train_step: 0.065 loss/val: 1.964 loss/train_epoch: 0.088 | |
| Epoch 95/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.82it/s[39m [37mv_num: 91di loss/train_step: 0.175 loss/val: 2.014 loss/train_epoch: 0.072 | |
| Epoch 96/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.79it/s[39m [37mv_num: 91di loss/train_step: 0.098 loss/val: 2.006 loss/train_epoch: 0.082 | |
| Epoch 97/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.83it/s[39m [37mv_num: 91di loss/train_step: 0.039 loss/val: 2.039 loss/train_epoch: 0.073 | |
| Epoch 98/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.75it/s[39m [37mv_num: 91di loss/train_step: 0.183 loss/val: 2.132 loss/train_epoch: 0.087 | |
| Epoch 99/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.86it/s[39m [37mv_num: 91di loss/train_step: 0.369 loss/val: 2.105 loss/train_epoch: 0.076 | |
| Epoch 100/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.70it/s[39m [37mv_num: 91di loss/train_step: 0.041 loss/val: 2.049 loss/train_epoch: 0.076 | |
| Epoch 101/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.94it/s[39m [37mv_num: 91di loss/train_step: 0.036 loss/val: 2.143 loss/train_epoch: 0.077 | |
| Epoch 102/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.77it/s[39m [37mv_num: 91di loss/train_step: 0.081 loss/val: 2.091 loss/train_epoch: 0.075 | |
| Epoch 103/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.75it/s[39m [37mv_num: 91di loss/train_step: 0.033 loss/val: 2.147 loss/train_epoch: 0.076 | |
| Epoch 104/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.88it/s[39m [37mv_num: 91di loss/train_step: 0.132 loss/val: 2.083 loss/train_epoch: 0.081 | |
| Epoch 105/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.73it/s[39m [37mv_num: 91di loss/train_step: 0.021 loss/val: 2.136 loss/train_epoch: 0.079 | |
| Epoch 106/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.77it/s[39m [37mv_num: 91di loss/train_step: 0.175 loss/val: 2.173 loss/train_epoch: 0.068 | |
| Epoch 107/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.96it/s[39m [37mv_num: 91di loss/train_step: 0.032 loss/val: 2.089 loss/train_epoch: 0.075 | |
| Epoch 108/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.84it/s[39m [37mv_num: 91di loss/train_step: 0.038 loss/val: 2.221 loss/train_epoch: 0.071 | |
| Epoch 109/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.87it/s[39m [37mv_num: 91di loss/train_step: 0.046 loss/val: 2.048 loss/train_epoch: 0.077 | |
| Epoch 110/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.78it/s[39m [37mv_num: 91di loss/train_step: 0.033 loss/val: 2.112 loss/train_epoch: 0.069 | |
| Epoch 111/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.84it/s[39m [37mv_num: 91di loss/train_step: 0.043 loss/val: 2.121 loss/train_epoch: 0.068 | |
| Epoch 112/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.63it/s[39m [37mv_num: 91di loss/train_step: 0.033 loss/val: 2.148 loss/train_epoch: 0.072 | |
| Epoch 113/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.80it/s[39m [37mv_num: 91di loss/train_step: 0.035 loss/val: 2.160 loss/train_epoch: 0.068 | |
| Epoch 114/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.88it/s[39m [37mv_num: 91di loss/train_step: 0.047 loss/val: 2.233 loss/train_epoch: 0.075 | |
| Epoch 115/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.89it/s[39m [37mv_num: 91di loss/train_step: 0.056 loss/val: 2.143 loss/train_epoch: 0.066 | |
| Epoch 116/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.77it/s[39m [37mv_num: 91di loss/train_step: 0.047 loss/val: 2.137 loss/train_epoch: 0.064 | |
| Epoch 117/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.79it/s[39m [37mv_num: 91di loss/train_step: 0.065 loss/val: 2.219 loss/train_epoch: 0.061 | |
| Epoch 118/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.99it/s[39m [37mv_num: 91di loss/train_step: 0.022 loss/val: 2.201 loss/train_epoch: 0.072 | |
| Epoch 119/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.68it/s[39m [37mv_num: 91di loss/train_step: 0.023 loss/val: 2.236 loss/train_epoch: 0.070 | |
| Epoch 120/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.71it/s[39m [37mv_num: 91di loss/train_step: 0.030 loss/val: 2.254 loss/train_epoch: 0.066 | |
| Epoch 121/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.87it/s[39m [37mv_num: 91di loss/train_step: 0.052 loss/val: 2.163 loss/train_epoch: 0.071 | |
| Epoch 122/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.70it/s[39m [37mv_num: 91di loss/train_step: 0.055 loss/val: 2.212 loss/train_epoch: 0.075 | |
| Epoch 123/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.82it/s[39m [37mv_num: 91di loss/train_step: 0.026 loss/val: 2.267 loss/train_epoch: 0.067 | |
| Epoch 124/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.92it/s[39m [37mv_num: 91di loss/train_step: 0.020 loss/val: 2.247 loss/train_epoch: 0.071 | |
| Epoch 125/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.91it/s[39m [37mv_num: 91di loss/train_step: 0.072 loss/val: 2.169 loss/train_epoch: 0.070 | |
| Epoch 126/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.83it/s[39m [37mv_num: 91di loss/train_step: 0.685 loss/val: 2.217 loss/train_epoch: 0.063 | |
| Epoch 127/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.87it/s[39m [37mv_num: 91di loss/train_step: 0.046 loss/val: 2.225 loss/train_epoch: 0.065 | |
| Epoch 128/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.89it/s[39m [37mv_num: 91di loss/train_step: 0.021 loss/val: 2.262 loss/train_epoch: 0.059 | |
| Epoch 129/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.87it/s[39m [37mv_num: 91di loss/train_step: 0.030 loss/val: 2.257 loss/train_epoch: 0.060 | |
| Epoch 130/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.74it/s[39m [37mv_num: 91di loss/train_step: 0.038 loss/val: 2.274 loss/train_epoch: 0.059 | |
| Epoch 131/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.91it/s[39m [37mv_num: 91di loss/train_step: 0.030 loss/val: 2.288 loss/train_epoch: 0.059 | |
| Epoch 132/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.79it/s[39m [37mv_num: 91di loss/train_step: 0.413 loss/val: 2.227 loss/train_epoch: 0.064 | |
| Epoch 133/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.79it/s[39m [37mv_num: 91di loss/train_step: 0.023 loss/val: 2.291 loss/train_epoch: 0.061 | |
| Epoch 134/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.76it/s[39m [37mv_num: 91di loss/train_step: 0.038 loss/val: 2.346 loss/train_epoch: 0.058 | |
| Epoch 135/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.98it/s[39m [37mv_num: 91di loss/train_step: 0.040 loss/val: 2.342 loss/train_epoch: 0.056 | |
| Epoch 136/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.83it/s[39m [37mv_num: 91di loss/train_step: 0.024 loss/val: 2.382 loss/train_epoch: 0.055 | |
| Epoch 137/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.88it/s[39m [37mv_num: 91di loss/train_step: 0.060 loss/val: 2.370 loss/train_epoch: 0.060 | |
| Epoch 138/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.80it/s[39m [37mv_num: 91di loss/train_step: 0.029 loss/val: 2.350 loss/train_epoch: 0.060 | |
| Epoch 139/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.97it/s[39m [37mv_num: 91di loss/train_step: 0.020 loss/val: 2.357 loss/train_epoch: 0.061 | |
| Epoch 140/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.91it/s[39m [37mv_num: 91di loss/train_step: 0.018 loss/val: 2.340 loss/train_epoch: 0.074 | |
| Epoch 141/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.82it/s[39m [37mv_num: 91di loss/train_step: 0.038 loss/val: 2.392 loss/train_epoch: 0.056 | |
| Epoch 142/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.92it/s[39m [37mv_num: 91di loss/train_step: 0.023 loss/val: 2.362 loss/train_epoch: 0.062 | |
| Epoch 143/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.98it/s[39m [37mv_num: 91di loss/train_step: 0.057 loss/val: 2.396 loss/train_epoch: 0.068 | |
| Epoch 144/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.72it/s[39m [37mv_num: 91di loss/train_step: 0.053 loss/val: 2.407 loss/train_epoch: 0.064 | |
| Epoch 145/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.83it/s[39m [37mv_num: 91di loss/train_step: 0.181 loss/val: 2.387 loss/train_epoch: 0.061 | |
| Epoch 146/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.79it/s[39m [37mv_num: 91di loss/train_step: 0.031 loss/val: 2.404 loss/train_epoch: 0.065 | |
| Epoch 147/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.79it/s[39m [37mv_num: 91di loss/train_step: 0.028 loss/val: 2.356 loss/train_epoch: 0.052 | |
| Epoch 148/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.85it/s[39m [37mv_num: 91di loss/train_step: 0.030 loss/val: 2.376 loss/train_epoch: 0.058 | |
| Epoch 149/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.72it/s[39m [37mv_num: 91di loss/train_step: 0.028 loss/val: 2.393 loss/train_epoch: 0.051 | |
| [37mValidation[39m [38mβββββββββββββββββββββββββββββββββββββΈβββ[39m [37m33/36 [39m [38m0:00:59 β’ 0:00:06[39m [38m0.58it/s | |
| `Trainer.fit` stopped: `max_epochs=150` reached. | |
| /home/arne/miniconda3/envs/pie-document-level/lib/python3.9/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. | |
| Epoch 149/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.72it/s[39m [37mv_num: 91di loss/train_step: 0.028 loss/val: 2.378 loss/train_epoch: 0.059 | |
| [?25h[[36m2024-05-29 04:19:31,092[39m][[34m__main__[39m][[32mINFO[39m] - Best ckpt path: /mnt/data/experiments/pie-document-level/logs/training/multiruns/dataset-sciarg/task-ner_re/v0.3/2024-05-28_18-12-11/1/checkpoints/epoch_146.ckpt | |
| Some weights of BartAsPointerNetwork were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['pointer_head.constraints_encoder_mlp.0.bias', 'pointer_head.constraints_encoder_mlp.0.weight', 'pointer_head.constraints_encoder_mlp.3.bias', 'pointer_head.constraints_encoder_mlp.3.weight', 'pointer_head.decoder_position_id_pattern', 'pointer_head.encoder_mlp.0.bias', 'pointer_head.encoder_mlp.0.weight', 'pointer_head.encoder_mlp.3.bias', 'pointer_head.encoder_mlp.3.weight', 'pointer_head.target2token_id'] | |
| You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. | |
| encode inputs: 50%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2/4 [00:00<00:00, 10.79it/s] | |
| [[36m2024-05-29 04:19:34,073[39m][[34m__main__[39m][[32mINFO[39m] - Save model to /home/arne/projects/pie-document-level/models/dataset-sciarg/task-ner_re/v0.3/2024-05-28_23-33-46 [push_to_hub=False] | |
| [[36m2024-05-29 04:19:34,508[39m][[34m__main__[39m][[32mINFO[39m] - Starting validation! | |
| [[36m2024-05-29 04:19:34,626[39m][[34mpie_modules.document.processing.tokenization[39m][[33mWARNING[39m] - could not convert all annotations from document with id=A04 to token based documents, missed annotations (disable this message with verbose=False): | |
| { | |
| "binary_relations": "{BinaryRelation(head=LabeledSpan(start=3205, end=3270, label='background_claim', score=1.0), tail=LabeledSpan(start=7724, end=7814, label='background_claim', score=1.0), label='semantically_same', score=1.0)}" | |
| encode inputs: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 4/4 [00:00<00:00, 10.55it/s] | |
| encode targets: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 72/72 [00:00<00:00, 238.68it/s] | |
| Restoring states from the checkpoint path at /mnt/data/experiments/pie-document-level/logs/training/multiruns/dataset-sciarg/task-ner_re/v0.3/2024-05-28_18-12-11/1/checkpoints/epoch_146.ckpt | |
| LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0] | |
| Loaded model weights from the checkpoint at /mnt/data/experiments/pie-document-level/logs/training/multiruns/dataset-sciarg/task-ner_re/v0.3/2024-05-28_18-12-11/1/checkpoints/epoch_146.ckpt | |
| [37mValidation[39m [38mβββββββββββββββββββββββββββββββββββββΈβββ[39m [37m33/36[39m [38m0:00:57 β’ 0:00:05[39m [38m0.64it/s | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ³ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| β[1m Validate metric [22mβ[1m DataLoader 0 [22mβ | |
| β‘ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ© | |
| β[36m loss/val [39mβ[35m 2.3559770584106445 [39mβ | |
| β[36m metric/binary_relations/contradicts/f1/val [39mβ[35m 0.20481927692890167 [39mβ | |
| β[36m metric/binary_relations/contradicts/precision/val [39mβ[35m 0.2266666740179062 [39mβ | |
| β[36m metric/binary_relations/contradicts/recall/val [39mβ[35m 0.18681319057941437 [39mβ | |
| β[36m metric/binary_relations/macro/f1/val [39mβ[35m 0.16949915885925293 [39mβ | |
| β[36m metric/binary_relations/macro/precision/val [39mβ[35m 0.1830090433359146 [39mβ | |
| β[36m metric/binary_relations/macro/recall/val [39mβ[35m 0.15798917412757874 [39mβ | |
| β[36m metric/binary_relations/micro/f1/val [39mβ[35m 0.23862887918949127 [39mβ | |
| β[36m metric/binary_relations/micro/precision/val [39mβ[35m 0.25931233167648315 [39mβ | |
| β[36m metric/binary_relations/micro/recall/val [39mβ[35m 0.22100122272968292 [39mβ | |
| β[36m metric/binary_relations/parts_of_same/f1/val [39mβ[35m 0.22608695924282074 [39mβ | |
| β[36m metric/binary_relations/parts_of_same/precision/val [39mβ[35m 0.23636363446712494 [39mβ | |
| β[36m metric/binary_relations/parts_of_same/recall/val [39mβ[35m 0.21666666865348816 [39mβ | |
| β[36m metric/binary_relations/semantically_same/f1/val [39mβ[35m 0.0 [39mβ | |
| β[36m metric/binary_relations/semantically_same/precision/val [39mβ[35m 0.0 [39mβ | |
| β[36m metric/binary_relations/semantically_same/recall/val [39mβ[35m 0.0 [39mβ | |
| β[36m metric/binary_relations/supports/f1/val [39mβ[35m 0.2470904141664505 [39mβ | |
| β[36m metric/binary_relations/supports/precision/val [39mβ[35m 0.26900583505630493 [39mβ | |
| β[36m metric/binary_relations/supports/recall/val [39mβ[35m 0.22847682237625122 [39mβ | |
| β[36m metric/decoding_errors/all/val [39mβ[35m 0.0 [39mβ | |
| β[36m metric/decoding_errors/correct/val [39mβ[35m 1.0 [39mβ | |
| β[36m metric/exact_encoding_matches/val [39mβ[35m 0.3611111044883728 [39mβ | |
| β[36m metric/labeled_spans/background_claim/f1/val [39mβ[35m 0.5077399015426636 [39mβ | |
| β[36m metric/labeled_spans/background_claim/precision/val [39mβ[35m 0.4984802305698395 [39mβ | |
| β[36m metric/labeled_spans/background_claim/recall/val [39mβ[35m 0.5173501372337341 [39mβ | |
| β[36m metric/labeled_spans/data/f1/val [39mβ[35m 0.536285400390625 [39mβ | |
| β[36m metric/labeled_spans/data/precision/val [39mβ[35m 0.5797872543334961 [39mβ | |
| β[36m metric/labeled_spans/data/recall/val [39mβ[35m 0.4988558292388916 [39mβ | |
| β[36m metric/labeled_spans/macro/f1/val [39mβ[35m 0.49655672907829285 [39mβ | |
| β[36m metric/labeled_spans/macro/precision/val [39mβ[35m 0.4986630082130432 [39mβ | |
| β[36m metric/labeled_spans/macro/recall/val [39mβ[35m 0.49792489409446716 [39mβ | |
| β[36m metric/labeled_spans/micro/f1/val [39mβ[35m 0.4843537509441376 [39mβ | |
| β[36m metric/labeled_spans/micro/precision/val [39mβ[35m 0.4762541949748993 [39mβ | |
| β[36m metric/labeled_spans/micro/recall/val [39mβ[35m 0.49273356795310974 [39mβ | |
| β[36m metric/labeled_spans/own_claim/f1/val [39mβ[35m 0.4456448256969452 [39mβ | |
| β[36m metric/labeled_spans/own_claim/precision/val [39mβ[35m 0.4177215099334717 [39mβ | |
| β[36m metric/labeled_spans/own_claim/recall/val [39mβ[35m 0.47756874561309814 [39mβ | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ΄ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| [37mValidation[39m [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m36/36[39m [38m0:01:00 β’ 0:00:00[39m [38m0.64it/s | |
| encode inputs: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 9/9 [00:01<00:00, 7.49it/s] | |
| encode targets: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 165/165 [00:00<00:00, 257.10it/s] | |
| Restoring states from the checkpoint path at /mnt/data/experiments/pie-document-level/logs/training/multiruns/dataset-sciarg/task-ner_re/v0.3/2024-05-28_18-12-11/1/checkpoints/epoch_146.ckpt | |
| [[36m2024-05-29 04:20:37,543[39m][[34mpie_modules.document.processing.tokenization[39m][[33mWARNING[39m] - could not convert all annotations from document with id=A35 to token based documents, missed annotations (disable this message with verbose=False): | |
| { | |
| "binary_relations": "{BinaryRelation(head=LabeledSpan(start=20713, end=20836, label='own_claim', score=1.0), tail=LabeledSpan(start=19655, end=19670, label='own_claim', score=1.0), label='contradicts', score=1.0)}" | |
| } | |
| [[36m2024-05-29 04:20:38,104[39m][[34mpie_modules.document.processing.tokenization[39m][[33mWARNING[39m] - could not convert all annotations from document with id=A40 to token based documents, missed annotations (disable this message with verbose=False): | |
| { | |
| "binary_relations": "{BinaryRelation(head=LabeledSpan(start=16497, end=16501, label='data', score=1.0), tail=LabeledSpan(start=17415, end=17613, label='background_claim', score=1.0), label='supports', score=1.0)}" | |
| } | |
| LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0] | |
| Loaded model weights from the checkpoint at /mnt/data/experiments/pie-document-level/logs/training/multiruns/dataset-sciarg/task-ner_re/v0.3/2024-05-28_18-12-11/1/checkpoints/epoch_146.ckpt | |
| /home/arne/miniconda3/envs/pie-document-level/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance. | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ³βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| β[1m Test metric [22mβ[1m DataLoader 0 [22mβ | |
| β‘ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ© | |
| β[36m loss/test [39mβ[35m 3.026346206665039 [39mβ | |
| β[36m metric/binary_relations/contradicts/f1/test [39mβ[35m 0.22545455396175385 [39mβ | |
| β[36m metric/binary_relations/contradicts/precision/test [39mβ[35m 0.2246376872062683 [39mβ | |
| β[36m metric/binary_relations/contradicts/recall/test [39mβ[35m 0.22627736628055573 [39mβ | |
| β[36m metric/binary_relations/macro/f1/test [39mβ[35m 0.2669767737388611 [39mβ | |
| β[36m metric/binary_relations/macro/precision/test [39mβ[35m 0.24492204189300537 [39mβ | |
| β[36m metric/binary_relations/macro/recall/test [39mβ[35m 0.31745821237564087 [39mβ | |
| β[36m metric/binary_relations/micro/f1/test [39mβ[35m 0.21676044166088104 [39mβ | |
| β[36m metric/binary_relations/micro/precision/test [39mβ[35m 0.22932331264019012 [39mβ | |
| β[36m metric/binary_relations/micro/recall/test [39mβ[35m 0.20550252497196198 [39mβ | |
| β[36m metric/binary_relations/parts_of_same/f1/test [39mβ[35m 0.17359857261180878 [39mβ | |
| β[36m metric/binary_relations/parts_of_same/precision/test [39mβ[35m 0.1818181872367859 [39mβ | |
| β[36m metric/binary_relations/parts_of_same/recall/test [39mβ[35m 0.1660899668931961 [39mβ | |
| β[36m metric/binary_relations/semantically_same/f1/test [39mβ[35m 0.444444477558136 [39mβ | |
| β[36m metric/binary_relations/semantically_same/precision/test [39mβ[35m 0.3333333432674408 [39mβ | |
| β[36m metric/binary_relations/semantically_same/recall/test [39mβ[35m 0.6666666865348816 [39mβ | |
| β[36m metric/binary_relations/supports/f1/test [39mβ[35m 0.22440946102142334 [39mβ | |
| β[36m metric/binary_relations/supports/precision/test [39mβ[35m 0.23989899456501007 [39mβ | |
| β[36m metric/binary_relations/supports/recall/test [39mβ[35m 0.21079881489276886 [39mβ | |
| β[36m metric/decoding_errors/all/test [39mβ[35m 0.20856545865535736 [39mβ | |
| β[36m metric/decoding_errors/correct/test [39mβ[35m 0.7914345264434814 [39mβ | |
| β[36m metric/decoding_errors/index/test [39mβ[35m 0.00034818940912373364 [39mβ | |
| β[36m metric/decoding_errors/len/test [39mβ[35m 0.20821726322174072 [39mβ | |
| β[36m metric/exact_encoding_matches/test [39mβ[35m 0.34545454382896423 [39mβ | |
| β[36m metric/labeled_spans/background_claim/f1/test [39mβ[35m 0.42137405276298523 [39mβ | |
| β[36m metric/labeled_spans/background_claim/precision/test [39mβ[35m 0.4502446949481964 [39mβ | |
| β[36m metric/labeled_spans/background_claim/recall/test [39mβ[35m 0.3959827721118927 [39mβ | |
| β[36m metric/labeled_spans/data/f1/test [39mβ[35m 0.5156335234642029 [39mβ | |
| β[36m metric/labeled_spans/data/precision/test [39mβ[35m 0.5328798294067383 [39mβ | |
| β[36m metric/labeled_spans/data/recall/test [39mβ[35m 0.4994686543941498 [39mβ | |
| β[36m metric/labeled_spans/macro/f1/test [39mβ[35m 0.43463802337646484 [39mβ | |
| β[36m metric/labeled_spans/macro/precision/test [39mβ[35m 0.4366190433502197 [39mβ | |
| β[36m metric/labeled_spans/macro/recall/test [39mβ[35m 0.43793198466300964 [39mβ | |
| β[36m metric/labeled_spans/micro/f1/test [39mβ[35m 0.42222580313682556 [39mβ | |
| β[36m metric/labeled_spans/micro/precision/test [39mβ[35m 0.40691158175468445 [39mβ | |
| β[36m metric/labeled_spans/micro/recall/test [39mβ[35m 0.4387378394603729 [39mβ | |
| β[36m metric/labeled_spans/own_claim/f1/test [39mβ[35m 0.36690646409988403 [39mβ | |
| β[36m metric/labeled_spans/own_claim/precision/test [39mβ[35m 0.32673266530036926 [39mβ | |
| β[36m metric/labeled_spans/own_claim/recall/test [39mβ[35m 0.41834452748298645 [39mβ | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ΄βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| [37mTesting[39m [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m83/83[39m [38m0:02:19 β’ 0:00:00[39m [38m0.66it/s | |
| [?25h[[36m2024-05-29 04:22:59,545[39m][[34msrc.utils.logging_utils[39m][[32mINFO[39m] - Closing loggers... | |
| [[36m2024-05-29 04:22:59,545[39m][[34msrc.utils.logging_utils[39m][[32mINFO[39m] - Closing wandb! |