fortvivlan commited on
Commit
6fa452b
·
verified ·
1 Parent(s): 7838e84

Model save

Browse files
README.md CHANGED
@@ -21,13 +21,13 @@ model-index:
21
  split: validation
22
  metrics:
23
  - type: f1
24
- value: 0.299975408003577
25
  name: Null F1
26
  - type: accuracy
27
- value: 0.8425411096652076
28
  name: Ud Jaccard
29
  - type: accuracy
30
- value: 0.7888746761126963
31
  name: Eud Jaccard
32
  ---
33
 
 
21
  split: validation
22
  metrics:
23
  - type: f1
24
+ value: 0.2499731726074437
25
  name: Null F1
26
  - type: accuracy
27
+ value: 0.8431713191455759
28
  name: Ud Jaccard
29
  - type: accuracy
30
+ value: 0.7898003415210824
31
  name: Eud Jaccard
32
  ---
33
 
config.json CHANGED
@@ -9,7 +9,7 @@
9
  },
10
  "consecutive_null_limit": 3,
11
  "custom_pipelines": {
12
- "cobald-parsing": {
13
  "impl": "pipeline.ConlluTokenClassificationPipeline",
14
  "pt": "CobaldParser"
15
  }
 
9
  },
10
  "consecutive_null_limit": 3,
11
  "custom_pipelines": {
12
+ "conllu-parsing": {
13
  "impl": "pipeline.ConlluTokenClassificationPipeline",
14
  "pt": "CobaldParser"
15
  }
configuration.py CHANGED
@@ -40,7 +40,7 @@ class CobaldParserConfig(PretrainedConfig):
40
  # HACK: Tell HF hub about custom pipeline.
41
  # It should not be hardcoded like this but other workaround are worse imo.
42
  self.custom_pipelines = {
43
- "cobald-parsing": {
44
  "impl": "pipeline.ConlluTokenClassificationPipeline",
45
  "pt": "CobaldParser",
46
  }
 
40
  # HACK: Tell HF hub about custom pipeline.
41
  # It should not be hardcoded like this but other workaround are worse imo.
42
  self.custom_pipelines = {
43
+ "conllu-parsing": {
44
  "impl": "pipeline.ConlluTokenClassificationPipeline",
45
  "pt": "CobaldParser",
46
  }
dependency_classifier.py CHANGED
@@ -135,14 +135,15 @@ class DependencyHead(DependencyHeadBase):
135
  padding_mask: BoolTensor # [batch_size, seq_len, seq_len]
136
  ) -> Tensor:
137
 
138
- # if self.training:
139
- # # During training, use fast greedy decoding.
140
- # # - [batch_size, seq_len]
141
- # pred_arcs_seq = s_arc.argmax(dim=1)
142
- # else:
143
- # # During inference, decode Maximum Spanning Tree.
144
- # pred_arcs_seq = self._mst_decode(s_arc, padding_mask)
145
- pred_arcs_seq = s_arc.argmax(dim=1)
 
146
 
147
  # Upscale arcs sequence of shape [batch_size, seq_len]
148
  # to matrix of shape [batch_size, seq_len, seq_len].
 
135
  padding_mask: BoolTensor # [batch_size, seq_len, seq_len]
136
  ) -> Tensor:
137
 
138
+ if self.training:
139
+ # During training, use fast greedy decoding.
140
+ # - [batch_size, seq_len]
141
+ pred_arcs_seq = s_arc.argmax(dim=1)
142
+ else:
143
+ # FIXME
144
+ # During inference, decode Maximum Spanning Tree.
145
+ # pred_arcs_seq = self._mst_decode(s_arc, padding_mask)
146
+ pred_arcs_seq = s_arc.argmax(dim=1)
147
 
148
  # Upscale arcs sequence of shape [batch_size, seq_len]
149
  # to matrix of shape [batch_size, seq_len, seq_len].
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6efb7359f986fa0c921dbe5897e6d61de5352983917beac6b6a6039aa182e210
3
  size 1147244460
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e57c51c3b8efeb92a3214f10fb3f6e924f324f1d4e6d8f972c85055f68f5a23
3
  size 1147244460
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f27e710fddb0a40260d69d44da519d4e2e797e87a327e2dee5a897b1fe73e0a
3
  size 5777
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc955910612be351d925d633c8959f5fd6f13ed42a6b3673dffa16f82b33d63f
3
  size 5777
utils.py CHANGED
@@ -57,9 +57,12 @@ def add_nulls(sentences: list[list[str]], counting_mask) -> list[list[str]]:
57
  Return a copy of sentences with nulls restored according to counting masks.
58
  """
59
  sentences_with_nulls = []
60
- for sentence, counting_mask in zip(sentences, counting_mask):
61
  sentence_with_nulls = []
62
- for word, n_nulls_to_insert in zip(sentence, counting_mask):
 
 
 
63
  sentence_with_nulls.append(word)
64
  sentence_with_nulls.extend(["#NULL"] * n_nulls_to_insert)
65
  sentences_with_nulls.append(sentence_with_nulls)
 
57
  Return a copy of sentences with nulls restored according to counting masks.
58
  """
59
  sentences_with_nulls = []
60
+ for sentence, counting_mask in zip(sentences, counting_mask, strict=True):
61
  sentence_with_nulls = []
62
+ assert 0 < len(counting_mask)
63
+ # Account for leading (CLS) auxiliary token.
64
+ sentence_with_nulls.extend(["#NULL"] * counting_mask[0])
65
+ for word, n_nulls_to_insert in zip(sentence, counting_mask[1:], strict=True):
66
  sentence_with_nulls.append(word)
67
  sentence_with_nulls.extend(["#NULL"] * n_nulls_to_insert)
68
  sentences_with_nulls.append(sentence_with_nulls)