fortvivlan commited on
Commit
1e142f5
·
verified ·
1 Parent(s): 49371b3

Upload ConlluTokenClassificationPipeline

Browse files
Files changed (2) hide show
  1. config.json +10 -0
  2. pipeline.py +6 -0
config.json CHANGED
@@ -8,6 +8,16 @@
8
  "AutoModel": "modeling_parser.CobaldParser"
9
  },
10
  "consecutive_null_limit": 3,
 
 
 
 
 
 
 
 
 
 
11
  "deepslot_classifier_hidden_size": 256,
12
  "dependency_classifier_hidden_size": 128,
13
  "dropout": 0.1,
 
8
  "AutoModel": "modeling_parser.CobaldParser"
9
  },
10
  "consecutive_null_limit": 3,
11
+ "custom_pipelines": {
12
+ "conllu-parsing": {
13
+ "impl": "pipeline.ConlluTokenClassificationPipeline",
14
+ "pt": [
15
+ "AutoModel"
16
+ ],
17
+ "tf": [],
18
+ "type": "text"
19
+ }
20
+ },
21
  "deepslot_classifier_hidden_size": 256,
22
  "dependency_classifier_hidden_size": 128,
23
  "dropout": 0.1,
pipeline.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  from transformers import Pipeline
2
 
3
  from src.lemmatize_helper import reconstruct_lemma
@@ -15,6 +17,7 @@ class ConlluTokenClassificationPipeline(Pipeline):
15
  self.tokenizer = tokenizer
16
  self.sentenizer = sentenizer
17
 
 
18
  def _sanitize_parameters(self, output_format: str = 'list', **kwargs):
19
  if output_format not in ['list', 'str']:
20
  raise ValueError(
@@ -23,6 +26,7 @@ class ConlluTokenClassificationPipeline(Pipeline):
23
  # capture output_format for postprocessing
24
  return {}, {}, {'output_format': output_format}
25
 
 
26
  def preprocess(self, inputs: str) -> dict:
27
  if not isinstance(inputs, str):
28
  raise ValueError("pipeline input must be string (text)")
@@ -36,9 +40,11 @@ class ConlluTokenClassificationPipeline(Pipeline):
36
  self._texts = sentences
37
  return {"words": words}
38
 
 
39
  def _forward(self, model_inputs: dict) -> dict:
40
  return self.model(**model_inputs, inference_mode=True)
41
 
 
42
  def postprocess(self, model_outputs: dict, output_format: str) -> list[dict] | str:
43
  sentences = self._decode_model_output(model_outputs)
44
  # Format sentences into CoNLL-U string if requested.
 
1
+ from typing import override
2
+
3
  from transformers import Pipeline
4
 
5
  from src.lemmatize_helper import reconstruct_lemma
 
17
  self.tokenizer = tokenizer
18
  self.sentenizer = sentenizer
19
 
20
+ @override
21
  def _sanitize_parameters(self, output_format: str = 'list', **kwargs):
22
  if output_format not in ['list', 'str']:
23
  raise ValueError(
 
26
  # capture output_format for postprocessing
27
  return {}, {}, {'output_format': output_format}
28
 
29
+ @override
30
  def preprocess(self, inputs: str) -> dict:
31
  if not isinstance(inputs, str):
32
  raise ValueError("pipeline input must be string (text)")
 
40
  self._texts = sentences
41
  return {"words": words}
42
 
43
+ @override
44
  def _forward(self, model_inputs: dict) -> dict:
45
  return self.model(**model_inputs, inference_mode=True)
46
 
47
+ @override
48
  def postprocess(self, model_outputs: dict, output_format: str) -> list[dict] | str:
49
  sentences = self._decode_model_output(model_outputs)
50
  # Format sentences into CoNLL-U string if requested.