Model save

Files changed (7) hide show

README.md CHANGED Viewed

@@ -21,13 +21,13 @@ model-index:
       split: validation
     metrics:
     - type: f1
-      value: 0.299975408003577
       name: Null F1
     - type: accuracy
-      value: 0.8425411096652076
       name: Ud Jaccard
     - type: accuracy
-      value: 0.7888746761126963
       name: Eud Jaccard
 ---

       split: validation
     metrics:
     - type: f1
+      value: 0.2499731726074437
       name: Null F1
     - type: accuracy
+      value: 0.8431713191455759
       name: Ud Jaccard
     - type: accuracy
+      value: 0.7898003415210824
       name: Eud Jaccard
 ---

config.json CHANGED Viewed

@@ -9,7 +9,7 @@
   },
   "consecutive_null_limit": 3,
   "custom_pipelines": {
-    "cobald-parsing": {
       "impl": "pipeline.ConlluTokenClassificationPipeline",
       "pt": "CobaldParser"
     }

   },
   "consecutive_null_limit": 3,
   "custom_pipelines": {
+    "conllu-parsing": {
       "impl": "pipeline.ConlluTokenClassificationPipeline",
       "pt": "CobaldParser"
     }

configuration.py CHANGED Viewed

@@ -40,7 +40,7 @@ class CobaldParserConfig(PretrainedConfig):
         # HACK: Tell HF hub about custom pipeline.
         # It should not be hardcoded like this but other workaround are worse imo.
         self.custom_pipelines = {
-            "cobald-parsing": {
                 "impl": "pipeline.ConlluTokenClassificationPipeline",
                 "pt": "CobaldParser",
             }

         # HACK: Tell HF hub about custom pipeline.
         # It should not be hardcoded like this but other workaround are worse imo.
         self.custom_pipelines = {
+            "conllu-parsing": {
                 "impl": "pipeline.ConlluTokenClassificationPipeline",
                 "pt": "CobaldParser",
             }

dependency_classifier.py CHANGED Viewed

@@ -135,14 +135,15 @@ class DependencyHead(DependencyHeadBase):
         padding_mask: BoolTensor # [batch_size, seq_len, seq_len]
     ) -> Tensor:
-        # if self.training:
-        #     # During training, use fast greedy decoding.
-        #     # - [batch_size, seq_len]
-        #     pred_arcs_seq = s_arc.argmax(dim=1)
-        # else:
-        #     # During inference, decode Maximum Spanning Tree.
-        #     pred_arcs_seq = self._mst_decode(s_arc, padding_mask)
-        pred_arcs_seq = s_arc.argmax(dim=1)
         # Upscale arcs sequence of shape [batch_size, seq_len]
         # to matrix of shape [batch_size, seq_len, seq_len].

         padding_mask: BoolTensor # [batch_size, seq_len, seq_len]
     ) -> Tensor:
+        if self.training:
+            # During training, use fast greedy decoding.
+            # - [batch_size, seq_len]
+            pred_arcs_seq = s_arc.argmax(dim=1)
+        else:
+            # FIXME
+            # During inference, decode Maximum Spanning Tree.
+            # pred_arcs_seq = self._mst_decode(s_arc, padding_mask)
+            pred_arcs_seq = s_arc.argmax(dim=1)
         # Upscale arcs sequence of shape [batch_size, seq_len]
         # to matrix of shape [batch_size, seq_len, seq_len].

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6efb7359f986fa0c921dbe5897e6d61de5352983917beac6b6a6039aa182e210
 size 1147244460

 version https://git-lfs.github.com/spec/v1
+oid sha256:6e57c51c3b8efeb92a3214f10fb3f6e924f324f1d4e6d8f972c85055f68f5a23
 size 1147244460

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0f27e710fddb0a40260d69d44da519d4e2e797e87a327e2dee5a897b1fe73e0a
 size 5777

 version https://git-lfs.github.com/spec/v1
+oid sha256:bc955910612be351d925d633c8959f5fd6f13ed42a6b3673dffa16f82b33d63f
 size 5777

utils.py CHANGED Viewed

@@ -57,9 +57,12 @@ def add_nulls(sentences: list[list[str]], counting_mask) -> list[list[str]]:
     Return a copy of sentences with nulls restored according to counting masks.
     """
     sentences_with_nulls = []
-    for sentence, counting_mask in zip(sentences, counting_mask):
         sentence_with_nulls = []
-        for word, n_nulls_to_insert in zip(sentence, counting_mask):
             sentence_with_nulls.append(word)
             sentence_with_nulls.extend(["#NULL"] * n_nulls_to_insert)
         sentences_with_nulls.append(sentence_with_nulls)

     Return a copy of sentences with nulls restored according to counting masks.
     """
     sentences_with_nulls = []
+    for sentence, counting_mask in zip(sentences, counting_mask, strict=True):
         sentence_with_nulls = []
+        assert 0 < len(counting_mask)
+        # Account for leading (CLS) auxiliary token.
+        sentence_with_nulls.extend(["#NULL"] * counting_mask[0])
+        for word, n_nulls_to_insert in zip(sentence, counting_mask[1:], strict=True):
             sentence_with_nulls.append(word)
             sentence_with_nulls.extend(["#NULL"] * n_nulls_to_insert)
         sentences_with_nulls.append(sentence_with_nulls)