HoneyTian commited on
Commit
1e90767
·
1 Parent(s): f25ad53
examples/sound_classification_by_lstm/step_6_export_onnx_model.py CHANGED
@@ -14,14 +14,13 @@ import onnxruntime as ort
14
  import torch
15
 
16
  from toolbox.torch.utils.data.vocabulary import Vocabulary
17
- from toolbox.torchaudio.models.lstm_audio_classifier.modeling_lstm_audio_classifier import WaveClassifierPreprocessExport, WaveClassifierExport
18
 
19
 
20
  def get_args():
21
  parser = argparse.ArgumentParser()
22
  parser.add_argument("--vocabulary_dir", default="file_dir/best/vocabulary", type=str)
23
  parser.add_argument("--model_dir", default="file_dir/best", type=str)
24
- parser.add_argument("--onnx_preprocess_file", default="preprocess.onnx", type=str)
25
  parser.add_argument("--onnx_model_file", default="model.onnx", type=str)
26
 
27
  args = parser.parse_args()
@@ -46,7 +45,6 @@ def logging_config():
46
  def main():
47
  args = get_args()
48
 
49
- onnx_preprocess_file = Path(args.onnx_preprocess_file)
50
  onnx_model_file = Path(args.onnx_model_file)
51
 
52
  logger = logging_config()
@@ -57,13 +55,6 @@ def main():
57
  logger.info("prepare vocabulary, model")
58
  vocabulary = Vocabulary.from_files(args.vocabulary_dir)
59
 
60
- model_preprocess_export = WaveClassifierPreprocessExport.from_pretrained(
61
- pretrained_model_name_or_path=args.model_dir,
62
- num_labels=vocabulary.get_vocab_size(namespace="labels")
63
- )
64
- model_preprocess_export.to(device)
65
- model_preprocess_export.eval()
66
-
67
  model_export = WaveClassifierExport.from_pretrained(
68
  pretrained_model_name_or_path=args.model_dir,
69
  num_labels=vocabulary.get_vocab_size(namespace="labels")
@@ -78,33 +69,8 @@ def main():
78
  waveform = torch.unsqueeze(waveform, dim=0)
79
  waveform = waveform.to(device)
80
 
81
- logger.info("export onnx preprocess models")
82
- torch.onnx.export(model_preprocess_export,
83
- args=(waveform,),
84
- f=onnx_preprocess_file.as_posix(),
85
- input_names=["inputs"],
86
- output_names=["spec"],
87
- dynamic_axes={
88
- "inputs": {1: "num_samples"},
89
- }
90
- )
91
-
92
- preprocess_ort_session = ort.InferenceSession(onnx_preprocess_file.as_posix())
93
- input_feed = {
94
- "inputs": waveform.numpy(),
95
- }
96
- output_names = [
97
- "spec",
98
- ]
99
- outputs = preprocess_ort_session.run(output_names, input_feed)
100
- spec = outputs[0]
101
- # shape = [b, t, f]
102
-
103
  logger.info("export onnx models")
104
 
105
- inputs = spec
106
- inputs = torch.tensor(inputs, dtype=torch.float32)
107
-
108
  lstm_layer_param = model_export.config.lstm_layer_param
109
  num_layers = lstm_layer_param["num_layers"]
110
  hidden_size = lstm_layer_param["hidden_size"]
@@ -112,7 +78,7 @@ def main():
112
  c = torch.rand(size=(num_layers, 1, hidden_size), dtype=torch.float32)
113
 
114
  torch.onnx.export(model_export,
115
- args=(inputs, h, c),
116
  f=onnx_model_file.as_posix(),
117
  input_names=["inputs", "h", "c"],
118
  output_names=[
@@ -125,7 +91,7 @@ def main():
125
 
126
  model_ort_session = ort.InferenceSession(onnx_model_file.as_posix())
127
  input_feed = {
128
- "inputs": inputs.numpy(),
129
  "h": h.numpy(),
130
  "c": c.numpy(),
131
  }
 
14
  import torch
15
 
16
  from toolbox.torch.utils.data.vocabulary import Vocabulary
17
+ from toolbox.torchaudio.models.lstm_audio_classifier.modeling_lstm_audio_classifier import WaveClassifierExport
18
 
19
 
20
  def get_args():
21
  parser = argparse.ArgumentParser()
22
  parser.add_argument("--vocabulary_dir", default="file_dir/best/vocabulary", type=str)
23
  parser.add_argument("--model_dir", default="file_dir/best", type=str)
 
24
  parser.add_argument("--onnx_model_file", default="model.onnx", type=str)
25
 
26
  args = parser.parse_args()
 
45
  def main():
46
  args = get_args()
47
 
 
48
  onnx_model_file = Path(args.onnx_model_file)
49
 
50
  logger = logging_config()
 
55
  logger.info("prepare vocabulary, model")
56
  vocabulary = Vocabulary.from_files(args.vocabulary_dir)
57
 
 
 
 
 
 
 
 
58
  model_export = WaveClassifierExport.from_pretrained(
59
  pretrained_model_name_or_path=args.model_dir,
60
  num_labels=vocabulary.get_vocab_size(namespace="labels")
 
69
  waveform = torch.unsqueeze(waveform, dim=0)
70
  waveform = waveform.to(device)
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  logger.info("export onnx models")
73
 
 
 
 
74
  lstm_layer_param = model_export.config.lstm_layer_param
75
  num_layers = lstm_layer_param["num_layers"]
76
  hidden_size = lstm_layer_param["hidden_size"]
 
78
  c = torch.rand(size=(num_layers, 1, hidden_size), dtype=torch.float32)
79
 
80
  torch.onnx.export(model_export,
81
+ args=(waveform, h, c),
82
  f=onnx_model_file.as_posix(),
83
  input_names=["inputs", "h", "c"],
84
  output_names=[
 
91
 
92
  model_ort_session = ort.InferenceSession(onnx_model_file.as_posix())
93
  input_feed = {
94
+ "inputs": waveform.numpy(),
95
  "h": h.numpy(),
96
  "c": c.numpy(),
97
  }
examples/sound_classification_by_lstm/step_9_evaluation_onnx_model.py CHANGED
@@ -40,7 +40,6 @@ def get_args():
40
  parser.add_argument("--dataset", default="evaluation.xlsx", type=str)
41
  parser.add_argument("--vocabulary_dir", default="vocabulary", type=str)
42
  parser.add_argument("--model_dir", default="best", type=str)
43
- parser.add_argument("--onnx_preprocess_file", default="preprocess.onnx", type=str)
44
  parser.add_argument("--onnx_model_file", default="model.onnx", type=str)
45
  parser.add_argument("--output_file", default="evaluation_onnx.xlsx", type=str)
46
  # parser.add_argument("--device", default="cuda" if torch.cuda.is_available() else "cpu", type=str)
@@ -70,23 +69,17 @@ def main():
70
  f_zip.extractall(path=out_root)
71
  tgt_path = out_root / model_file.stem
72
  config_file = tgt_path / "config.yaml"
73
- onnx_preprocess_file = tgt_path / "preprocess.onnx"
74
  onnx_model_file = tgt_path / "model.onnx"
75
  vocab_path = tgt_path / "vocabulary"
76
  evaluation_file = tgt_path / "evaluation.xlsx"
77
  else:
78
  config_file = model_dir / "config.yaml"
79
- onnx_preprocess_file = Path(args.onnx_preprocess_file)
80
  onnx_model_file = Path(args.onnx_model_file)
81
  vocab_path = Path(args.vocabulary_dir)
82
  evaluation_file = Path(args.dataset)
83
 
84
  config = WaveClassifierConfig.from_pretrained(config_file.as_posix())
85
- preprocess_ort_session = ort.InferenceSession(
86
- onnx_preprocess_file.as_posix(),
87
- providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
88
- )
89
- model_ort_session = ort.InferenceSession(
90
  onnx_model_file.as_posix(),
91
  providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
92
  )
@@ -132,25 +125,16 @@ def main():
132
  end = begin + 4000
133
 
134
  waveform_ = waveform[:, begin: end]
135
- input_feed = {
136
- "inputs": waveform_.numpy(),
137
- }
138
- output_names = [
139
- "spec",
140
- ]
141
- outputs = preprocess_ort_session.run(output_names, input_feed)
142
- # shape = [b, t, f]
143
- inputs: np.ndarray = outputs[0]
144
 
145
  input_feed = {
146
- "inputs": inputs,
147
  "h": h,
148
  "c": c,
149
  }
150
  output_names = [
151
  "logits", "new_h", "new_c"
152
  ]
153
- logits, new_h, new_c = model_ort_session.run(output_names, input_feed)
154
  # print(f"logits: {logits.shape}")
155
  # print(f"new_h: {new_h.shape}")
156
  # print(f"new_c: {new_c.shape}")
 
40
  parser.add_argument("--dataset", default="evaluation.xlsx", type=str)
41
  parser.add_argument("--vocabulary_dir", default="vocabulary", type=str)
42
  parser.add_argument("--model_dir", default="best", type=str)
 
43
  parser.add_argument("--onnx_model_file", default="model.onnx", type=str)
44
  parser.add_argument("--output_file", default="evaluation_onnx.xlsx", type=str)
45
  # parser.add_argument("--device", default="cuda" if torch.cuda.is_available() else "cpu", type=str)
 
69
  f_zip.extractall(path=out_root)
70
  tgt_path = out_root / model_file.stem
71
  config_file = tgt_path / "config.yaml"
 
72
  onnx_model_file = tgt_path / "model.onnx"
73
  vocab_path = tgt_path / "vocabulary"
74
  evaluation_file = tgt_path / "evaluation.xlsx"
75
  else:
76
  config_file = model_dir / "config.yaml"
 
77
  onnx_model_file = Path(args.onnx_model_file)
78
  vocab_path = Path(args.vocabulary_dir)
79
  evaluation_file = Path(args.dataset)
80
 
81
  config = WaveClassifierConfig.from_pretrained(config_file.as_posix())
82
+ ort_session = ort.InferenceSession(
 
 
 
 
83
  onnx_model_file.as_posix(),
84
  providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
85
  )
 
125
  end = begin + 4000
126
 
127
  waveform_ = waveform[:, begin: end]
 
 
 
 
 
 
 
 
 
128
 
129
  input_feed = {
130
+ "inputs": waveform_.numpy(),
131
  "h": h,
132
  "c": c,
133
  }
134
  output_names = [
135
  "logits", "new_h", "new_c"
136
  ]
137
+ logits, new_h, new_c = ort_session.run(output_names, input_feed)
138
  # print(f"logits: {logits.shape}")
139
  # print(f"new_h: {new_h.shape}")
140
  # print(f"new_c: {new_c.shape}")
toolbox/torchaudio/models/lstm_audio_classifier/modeling_lstm_audio_classifier.py CHANGED
@@ -308,12 +308,14 @@ class WaveClassifierPretrainedModel(WaveClassifier):
308
  return save_directory
309
 
310
 
311
- class WaveClassifierPreprocessExport(WaveClassifierPretrainedModel):
312
  def __init__(self, config: WaveClassifierConfig):
313
- super(WaveClassifierPreprocessExport, self).__init__(config=config)
314
 
315
  def forward(self,
316
  inputs: torch.Tensor,
 
 
317
  ):
318
  # x: [b, num_samples]
319
  x = inputs
@@ -327,22 +329,8 @@ class WaveClassifierPreprocessExport(WaveClassifierPretrainedModel):
327
  # shape = [b, t, mel_bins]
328
  spec = x + 1e-6
329
  spec = spec.log()
330
-
331
- # shape = [b, t, f]
332
- return spec
333
-
334
-
335
- class WaveClassifierExport(WaveClassifierPretrainedModel):
336
- def __init__(self, config: WaveClassifierConfig):
337
- super(WaveClassifierExport, self).__init__(config=config)
338
-
339
- def forward(self,
340
- inputs: torch.Tensor,
341
- h: torch.Tensor = None,
342
- c: torch.Tensor = None,
343
- ):
344
- # inputs shape = [b, t, f]
345
- features, h, c = self.wave_encoder.lstm_layer.forward(inputs, h=h, c=c)
346
  # features: shape, [b, t, hidden_size]
347
  # h: shape, [num_layers, b, hidden_size]
348
  # c: shape, [num_layers, b, hidden_size]
@@ -358,10 +346,8 @@ class WaveClassifierExport(WaveClassifierPretrainedModel):
358
  def main():
359
  config = WaveClassifierConfig.from_pretrained("examples/lstm_classifier.yaml")
360
  model = WaveClassifierPretrainedModel(config)
361
- model_preprocess = WaveClassifierPreprocessExport(config)
362
  model_export = WaveClassifierExport(config)
363
  model.eval()
364
- model_preprocess.eval()
365
  model_export.eval()
366
 
367
  inputs = torch.rand(size=(1, 16000), dtype=torch.float32)
@@ -369,8 +355,7 @@ def main():
369
  logits = model.forward(inputs)
370
  print(logits)
371
 
372
- spec = model_preprocess.forward(inputs)
373
- logits, h, c = model_export.forward(spec)
374
 
375
  return
376
 
 
308
  return save_directory
309
 
310
 
311
+ class WaveClassifierExport(WaveClassifierPretrainedModel):
312
  def __init__(self, config: WaveClassifierConfig):
313
+ super(WaveClassifierExport, self).__init__(config=config)
314
 
315
  def forward(self,
316
  inputs: torch.Tensor,
317
+ h: torch.Tensor = None,
318
+ c: torch.Tensor = None,
319
  ):
320
  # x: [b, num_samples]
321
  x = inputs
 
329
  # shape = [b, t, mel_bins]
330
  spec = x + 1e-6
331
  spec = spec.log()
332
+ # spec shape = [b, t, f]
333
+ features, h, c = self.wave_encoder.lstm_layer.forward(spec, h=h, c=c)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
334
  # features: shape, [b, t, hidden_size]
335
  # h: shape, [num_layers, b, hidden_size]
336
  # c: shape, [num_layers, b, hidden_size]
 
346
  def main():
347
  config = WaveClassifierConfig.from_pretrained("examples/lstm_classifier.yaml")
348
  model = WaveClassifierPretrainedModel(config)
 
349
  model_export = WaveClassifierExport(config)
350
  model.eval()
 
351
  model_export.eval()
352
 
353
  inputs = torch.rand(size=(1, 16000), dtype=torch.float32)
 
355
  logits = model.forward(inputs)
356
  print(logits)
357
 
358
+ logits, h, c = model_export.forward(inputs)
 
359
 
360
  return
361
 
voicemail-es-mx-2-l3-ch64-lstm.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4875557ee77aa194cba21c84bb577383ae5d6aab53a424f1253bb84e2253049e
3
+ size 4528606