niobures commited on
Commit
3322099
·
verified ·
1 Parent(s): 16d64c8

CREPE (models_onnx)

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ models/onnx/ailia-models/code/test.wav filter=lfs diff=lfs merge=lfs -text
models/onnx/ailia-models/code/README.md ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CREPE Pitch Tracker
2
+
3
+ ## Input
4
+
5
+ Audio file
6
+
7
+ (Audio from https://github.com/maxrmorrison/torchcrepe/blob/master/tests/assets/test.wav)
8
+
9
+ ## Output
10
+
11
+ Pitch (F0) per 10ms
12
+
13
+ full model
14
+
15
+ ![output_full](output_full.png)
16
+
17
+ tiny model
18
+
19
+ ![output_tiny](output_tiny.png)
20
+
21
+ ## Requirements
22
+
23
+ This model requires additional module.
24
+ ```
25
+ pip3 install librosa
26
+ pip3 install soundfile
27
+ ```
28
+
29
+ ## Usage
30
+ Automatically downloads the onnx and prototxt files on the first run.
31
+ It is necessary to be connected to the Internet while downloading.
32
+
33
+ For the sample wav,
34
+ ```bash
35
+ $ python3 crepe.py
36
+ ```
37
+
38
+ If you want to specify the audio, put the file path after the `--input` option.
39
+ ```bash
40
+ $ python3 crepe.py --input AUDIO_FILE
41
+ ```
42
+
43
+ Specify the f0 option to infer a model that uses f0. You can choice `crepe` or `crepe_tiny` for f0_method.
44
+
45
+ ```bash $
46
+ python3 crepe.py --f0_method crepe_tiny
47
+ ```
48
+
49
+ Specify the `--evaluate` option, you can be compared with the f0 using pyworld.
50
+
51
+ ```bash $
52
+ python3 crepe.py --f0_method crepe_tiny --evaluate
53
+ ```
54
+
55
+ ## Reference
56
+
57
+ - [crepe](https://github.com/marl/crepe/)
58
+ - [torchcrepe](https://github.com/maxrmorrison/torchcrepe)
59
+
60
+ ## Framework
61
+
62
+ Pytorch
63
+
64
+ ## Model Format
65
+
66
+ ONNX opset=11
67
+
68
+ ## Netron
69
+
70
+ - [crepe.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/rvc/crepe.onnx.prototxt)
71
+ - [crepe_tiny.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/rvc/crepe_tiny.onnx.prototxt)
models/onnx/ailia-models/code/crepe.py ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import time
3
+ from logging import getLogger
4
+
5
+ import numpy as np
6
+ import scipy.signal as signal
7
+ from PIL import Image
8
+ import librosa
9
+ import soundfile as sf
10
+ import matplotlib.pyplot as plt
11
+
12
+ import ailia
13
+
14
+ # import original modules
15
+ sys.path.append('../../util')
16
+ from microphone_utils import start_microphone_input # noqa
17
+ from model_utils import check_and_download_models # noqa
18
+ from arg_utils import get_base_parser, get_savepath, update_parser # noqa
19
+
20
+ # crepe util
21
+ import mod_crepe
22
+ from mod_crepe import WEIGHT_CREPE_PATH, MODEL_CREPE_PATH, WEIGHT_CREPE_TINY_PATH, MODEL_CREPE_TINY_PATH
23
+
24
+ flg_ffmpeg = False
25
+
26
+ if flg_ffmpeg:
27
+ import ffmpeg
28
+
29
+ logger = getLogger(__name__)
30
+
31
+ # ======================
32
+ # Parameters
33
+ # ======================
34
+
35
+ REMOTE_PATH = 'https://storage.googleapis.com/ailia-models/rvc/'
36
+
37
+ SAMPLE_RATE = 16000
38
+
39
+ WAV_PATH = 'test.wav'
40
+ FIG_PATH = "output.png"
41
+
42
+ # ======================
43
+ # Arguemnt Parser Config
44
+ # ======================
45
+
46
+ parser = get_base_parser(
47
+ 'Crepe', WAV_PATH, FIG_PATH, input_ftype='audio'
48
+ )
49
+ parser.add_argument(
50
+ '--f0_method', default="crepe_tiny", choices=("crepe", "crepe_tiny"),
51
+ help='Select the pitch extraction algorithm',
52
+ )
53
+ parser.add_argument(
54
+ '--onnx',
55
+ action='store_true',
56
+ help='execute onnxruntime version.'
57
+ )
58
+ parser.add_argument(
59
+ '--evaluate',
60
+ action='store_true',
61
+ help='evaluate with harvest.'
62
+ )
63
+ args = update_parser(parser)
64
+
65
+
66
+ # ======================
67
+ # Secondaty Functions
68
+ # ======================
69
+
70
+ def load_audio(file: str, sr: int = SAMPLE_RATE):
71
+ if flg_ffmpeg:
72
+ # https://github.com/openai/whisper/blob/main/whisper/audio.py#L26
73
+ # This launches a subprocess to decode audio while down-mixing and resampling as necessary.
74
+ # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
75
+ out, _ = ffmpeg.input(file, threads=0) \
76
+ .output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr) \
77
+ .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
78
+
79
+ audio = np.frombuffer(out, np.float32).flatten()
80
+ else:
81
+ # prepare input data
82
+ audio, source_sr = librosa.load(file, sr=None)
83
+ # Resample the wav if needed
84
+ if source_sr is not None and source_sr != sr:
85
+ audio = librosa.resample(audio, orig_sr=source_sr, target_sr=sr)
86
+
87
+ return audio
88
+
89
+
90
+ # ======================
91
+ # Main functions
92
+ # ======================
93
+
94
+ def get_f0(
95
+ f0_method,
96
+ window,
97
+ x,
98
+ p_len,
99
+ ):
100
+ sr = SAMPLE_RATE
101
+ f0_min = 50
102
+ f0_max = 1100
103
+
104
+ if f0_method == "harvest":
105
+ import pyworld
106
+
107
+ audio = x.astype(np.double)
108
+ fs = sr
109
+ frame_period = 10
110
+ f0, t = pyworld.harvest(
111
+ audio,
112
+ fs=fs,
113
+ f0_ceil=f0_max,
114
+ f0_floor=f0_min,
115
+ frame_period=frame_period,
116
+ )
117
+ f0 = pyworld.stonemask(audio, f0, t, fs)
118
+
119
+ filter_radius = 3
120
+ if filter_radius > 2:
121
+ f0 = signal.medfilt(f0, 3)
122
+ elif f0_method == "crepe" or f0_method == "crepe_tiny":
123
+ import mod_crepe
124
+
125
+ # Pick a batch size that doesn't cause memory errors on your gpu
126
+ batch_size = 512
127
+ audio = np.copy(x)[None]
128
+ f0, pd = mod_crepe.predict(
129
+ audio,
130
+ sr,
131
+ window,
132
+ f0_min,
133
+ f0_max,
134
+ batch_size=batch_size,
135
+ return_periodicity=True,
136
+ )
137
+ pd = mod_crepe.median(pd, 3)
138
+ f0 = mod_crepe.mean(f0, 3)
139
+ f0[pd < 0.1] = 0
140
+ f0 = f0[0]
141
+ else:
142
+ raise ValueError("f0_method: %s" % f0_method)
143
+
144
+ return f0
145
+
146
+
147
+ def predict(audio, model, f0_method):
148
+ audio_max = np.abs(audio).max() / 0.95
149
+ if audio_max > 1:
150
+ audio /= audio_max
151
+
152
+ window = 160
153
+ p_len = audio.shape[0] // window
154
+
155
+ pitch = get_f0(
156
+ f0_method,
157
+ window,
158
+ audio,
159
+ p_len,
160
+ )
161
+
162
+ return pitch
163
+
164
+
165
+
166
+ def recognize_from_audio(models):
167
+ # input audio loop
168
+ for audio_path in args.input:
169
+ logger.info(audio_path)
170
+
171
+ # prepare input data
172
+ audio = load_audio(audio_path, SAMPLE_RATE)
173
+
174
+ # inference
175
+ logger.info('Start inference...')
176
+ if args.benchmark:
177
+ logger.info('BENCHMARK mode')
178
+ start = int(round(time.time() * 1000))
179
+ output = predict(audio, models, args.f0_method)
180
+ end = int(round(time.time() * 1000))
181
+ estimation_time = (end - start)
182
+ logger.info(f'\ttotal processing time {estimation_time} ms')
183
+ else:
184
+ output = predict(audio, models, args.f0_method)
185
+
186
+ # reference data
187
+ if args.evaluate:
188
+ harvest = predict(audio, models, "harvest")
189
+
190
+ # plot
191
+ x = np.linspace(0, audio.shape[0] / SAMPLE_RATE, output.shape[0])
192
+ y = output
193
+
194
+ fig = plt.figure()
195
+ ax = fig.add_subplot()
196
+
197
+ y = output
198
+ ax.plot(x, y, label=args.f0_method)
199
+
200
+ if args.evaluate:
201
+ y = harvest
202
+ ax.plot(x, y, label="harvest", linestyle = "dashed")
203
+
204
+ ax.set_xlabel("sec")
205
+ ax.set_ylabel("f0 (hz)")
206
+
207
+ plt.legend()
208
+
209
+ # save result
210
+ savepath = get_savepath(args.savepath, audio_path, ext='.png')
211
+ logger.info(f'saved at : {savepath}')
212
+ plt.savefig(savepath)
213
+
214
+ logger.info('Script finished successfully.')
215
+
216
+
217
+ def main():
218
+ if args.f0_method == "crepe_tiny":
219
+ check_and_download_models(WEIGHT_CREPE_TINY_PATH, MODEL_CREPE_TINY_PATH, REMOTE_PATH)
220
+ else:
221
+ check_and_download_models(WEIGHT_CREPE_PATH, MODEL_CREPE_PATH, REMOTE_PATH)
222
+
223
+ env_id = args.env_id
224
+
225
+ f0_model = mod_crepe.load_model(env_id, args.onnx, args.f0_method == "crepe_tiny")
226
+ if args.profile:
227
+ f0_model.set_profile_mode(True)
228
+ else:
229
+ f0_model = None
230
+
231
+ recognize_from_audio(f0_model)
232
+
233
+ if args.profile and not args.onnx:
234
+ print("--- profile f0_model")
235
+ print(f0_model.get_summary())
236
+ print("")
237
+
238
+
239
+ if __name__ == '__main__':
240
+ main()
models/onnx/ailia-models/code/mod_crepe.py ADDED
@@ -0,0 +1,471 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import functools
2
+
3
+ import numpy as np
4
+ import scipy
5
+ import librosa
6
+
7
+ import ailia
8
+ from functional import im2col
9
+ from math_utils import softmax
10
+
11
+ WEIGHT_CREPE_PATH = "crepe.onnx"
12
+ MODEL_CREPE_PATH = "crepe.onnx.prototxt"
13
+
14
+ WEIGHT_CREPE_TINY_PATH = "crepe_tiny.onnx"
15
+ MODEL_CREPE_TINY_PATH = "crepe_tiny.onnx.prototxt"
16
+
17
+ CENTS_PER_BIN = 20 # cents
18
+ MAX_FMAX = 2006. # hz
19
+ PITCH_BINS = 360
20
+ SAMPLE_RATE = 16000 # hz
21
+ WINDOW_SIZE = 1024 # samples
22
+ UNVOICED = np.nan
23
+
24
+
25
+ def load_model(env_id=0, flg_onnx=False, tiny=False):
26
+ # initialize
27
+ if tiny:
28
+ model_path = MODEL_CREPE_TINY_PATH
29
+ weight_path = WEIGHT_CREPE_TINY_PATH
30
+ else:
31
+ model_path = MODEL_CREPE_PATH
32
+ weight_path = WEIGHT_CREPE_PATH
33
+ if not flg_onnx:
34
+ model = ailia.Net(model_path, weight_path, env_id=env_id)
35
+ else:
36
+ import onnxruntime
37
+ providers = ["CPUExecutionProvider", "CUDAExecutionProvider"]
38
+ model = onnxruntime.InferenceSession(weight_path, providers=providers)
39
+
40
+ infer.flg_onnx = flg_onnx
41
+ infer.model = model
42
+ return model
43
+
44
+
45
+ ###############################################################################
46
+ # Probability sequence decoding methods
47
+ ###############################################################################
48
+
49
+ def viterbi(logits):
50
+ """Sample observations using viterbi decoding"""
51
+ # Create viterbi transition matrix
52
+ if not hasattr(viterbi, 'transition'):
53
+ xx, yy = np.meshgrid(range(360), range(360))
54
+ transition = np.maximum(12 - abs(xx - yy), 0)
55
+ transition = transition / transition.sum(axis=1, keepdims=True)
56
+ viterbi.transition = transition
57
+
58
+ # Normalize logits
59
+ sequences = softmax(logits, axis=1)
60
+
61
+ # Perform viterbi decoding
62
+ bins = np.array([
63
+ librosa.sequence.viterbi(sequence, viterbi.transition).astype(np.int64)
64
+ for sequence in sequences])
65
+
66
+ # Convert to frequency in Hz
67
+ return bins, bins_to_frequency(bins)
68
+
69
+
70
+ ###############################################################################
71
+ # Crepe pitch prediction
72
+ ###############################################################################
73
+
74
+ def predict(
75
+ audio,
76
+ sample_rate,
77
+ hop_length=None,
78
+ fmin=50.,
79
+ fmax=MAX_FMAX,
80
+ decoder=viterbi,
81
+ return_periodicity=False,
82
+ batch_size=None,
83
+ pad=True):
84
+ """Performs pitch estimation
85
+
86
+ Arguments
87
+ audio (np.ndarray [shape=(1, time)])
88
+ The audio signal
89
+ sample_rate (int)
90
+ The sampling rate in Hz
91
+ hop_length (int)
92
+ The hop_length in samples
93
+ fmin (float)
94
+ The minimum allowable frequency in Hz
95
+ fmax (float)
96
+ The maximum allowable frequency in Hz
97
+ decoder (function)
98
+ The decoder to use. See decode.py for decoders.
99
+ return_harmonicity (bool) [DEPRECATED]
100
+ Whether to also return the network confidence
101
+ return_periodicity (bool)
102
+ Whether to also return the network confidence
103
+ batch_size (int)
104
+ The number of frames per batch
105
+ pad (bool)
106
+ Whether to zero-pad the audio
107
+
108
+ Returns
109
+ pitch (np.ndarray [shape=(1, 1 + int(time // hop_length))])
110
+ (Optional) periodicity (np.ndarray
111
+ [shape=(1, 1 + int(time // hop_length))])
112
+ """
113
+
114
+ results = []
115
+
116
+ # Preprocess audio
117
+ generator = preprocess(
118
+ audio, sample_rate, hop_length, batch_size, pad)
119
+
120
+ for frames in generator:
121
+ # Infer independent probabilities for each pitch bin
122
+ probabilities = infer(frames)
123
+
124
+ # shape=(batch, 360, time / hop_length)
125
+ probabilities = probabilities.reshape(
126
+ audio.shape[0], -1, PITCH_BINS).transpose(0, 2, 1)
127
+
128
+ # Convert probabilities to F0 and periodicity
129
+ result = postprocess(
130
+ probabilities, fmin, fmax,
131
+ decoder, return_periodicity)
132
+
133
+ results.append(result)
134
+
135
+ # Split pitch and periodicity
136
+ if return_periodicity:
137
+ pitch, periodicity = zip(*results)
138
+ return np.concatenate(pitch, axis=1), np.concatenate(periodicity, axis=1)
139
+
140
+ # Concatenate
141
+ return np.concatenate(results, axis=1)
142
+
143
+
144
+ ###############################################################################
145
+ # Components for step-by-step prediction
146
+ ###############################################################################
147
+
148
+ def infer(frame):
149
+ if not hasattr(infer, 'model'):
150
+ load_model()
151
+
152
+ flg_onnx = infer.flg_onnx
153
+ model = infer.model
154
+
155
+ # feedforward
156
+ if not flg_onnx:
157
+ output = model.predict([frame])
158
+ else:
159
+ output = model.run(None, {'input': frame})
160
+
161
+ return output[0]
162
+
163
+
164
+ def postprocess(
165
+ probabilities,
166
+ fmin=0.,
167
+ fmax=MAX_FMAX,
168
+ decoder=viterbi,
169
+ return_periodicity=False):
170
+ """Convert model output to F0 and periodicity
171
+
172
+ Arguments
173
+ probabilities (np.ndarray [shape=(1, 360, time / hop_length)])
174
+ The probabilities for each pitch bin inferred by the network
175
+ fmin (float)
176
+ The minimum allowable frequency in Hz
177
+ fmax (float)
178
+ The maximum allowable frequency in Hz
179
+ viterbi (bool)
180
+ Whether to use viterbi decoding
181
+ return_periodicity (bool)
182
+ Whether to also return the network confidence
183
+
184
+ Returns
185
+ pitch (np.ndarray [shape=(1, 1 + int(time // hop_length))])
186
+ periodicity (np.ndarray [shape=(1, 1 + int(time // hop_length))])
187
+ """
188
+
189
+ # Convert frequency range to pitch bin range
190
+ minidx = frequency_to_bins(np.array(fmin))
191
+ maxidx = frequency_to_bins(np.array(fmax), np.ceil)
192
+
193
+ # Remove frequencies outside of allowable range
194
+ probabilities[:, :minidx] = -float('inf')
195
+ probabilities[:, maxidx:] = -float('inf')
196
+
197
+ # Perform argmax or viterbi sampling
198
+ bins, pitch = decoder(probabilities)
199
+
200
+ if not return_periodicity:
201
+ return pitch
202
+
203
+ # Compute periodicity from probabilities and decoded pitch bins
204
+ return pitch, periodicity(probabilities, bins)
205
+
206
+
207
+ def preprocess(
208
+ audio,
209
+ sample_rate,
210
+ hop_length=None,
211
+ batch_size=None,
212
+ pad=True):
213
+ """Convert audio to model input
214
+
215
+ Arguments
216
+ audio (np.ndarray [shape=(1, time)])
217
+ The audio signals
218
+ sample_rate (int)
219
+ The sampling rate in Hz
220
+ hop_length (int)
221
+ The hop_length in samples
222
+ batch_size (int)
223
+ The number of frames per batch
224
+ pad (bool)
225
+ Whether to zero-pad the audio
226
+
227
+ Returns
228
+ frames (np.ndarray [shape=(1 + int(time // hop_length), 1024)])
229
+ """
230
+ # Default hop length of 10 ms
231
+ hop_length = sample_rate // 100 if hop_length is None else hop_length
232
+
233
+ # Resample
234
+ if sample_rate != SAMPLE_RATE:
235
+ # We have to use resampy if we want numbers to match Crepe
236
+ import resampy
237
+
238
+ audio = audio[0]
239
+ audio = resampy.resample(audio, sample_rate, SAMPLE_RATE)
240
+ audio = audio[None]
241
+ hop_length = int(hop_length * SAMPLE_RATE / sample_rate)
242
+
243
+ # Get total number of frames
244
+
245
+ # Maybe pad
246
+ if pad:
247
+ total_frames = 1 + int(audio.shape[1] // hop_length)
248
+ audio = np.pad(
249
+ audio,
250
+ ((0, 0), (WINDOW_SIZE // 2, WINDOW_SIZE // 2)))
251
+ else:
252
+ total_frames = 1 + int((audio.shape[1] - WINDOW_SIZE) // hop_length)
253
+
254
+ # Default to running all frames in a single batch
255
+ batch_size = total_frames if batch_size is None else batch_size
256
+
257
+ # Generate batches
258
+ for i in range(0, total_frames, batch_size):
259
+ # Batch indices
260
+ start = max(0, i * hop_length)
261
+ end = min(
262
+ audio.shape[1],
263
+ (i + batch_size - 1) * hop_length + WINDOW_SIZE)
264
+
265
+ kernel_size = (1, WINDOW_SIZE)
266
+ stride = (1, hop_length)
267
+ unfold = functools.partial(im2col, filters=kernel_size, stride=stride)
268
+
269
+ # Chunk
270
+ frames, *_ = unfold(audio[:, None, None, start:end])
271
+ frames = frames.astype(np.float32)
272
+
273
+ # shape=(1 + int(time / hop_length, 1024)
274
+ frames = frames[None].transpose(0, 2, 1).reshape(-1, WINDOW_SIZE)
275
+
276
+ # Mean-center
277
+ frames -= np.mean(frames, axis=1, keepdims=True)
278
+
279
+ # Scale
280
+ # Note: during silent frames, this produces very large values. But
281
+ # this seems to be what the network expects.
282
+ std = np.std(frames, axis=1, keepdims=True)
283
+ frames /= np.where(std > 1e-10, std, 1e-10)
284
+
285
+ yield frames
286
+
287
+
288
+ ###############################################################################
289
+ # Pitch unit conversions
290
+ ###############################################################################
291
+
292
+ def bins_to_cents(bins):
293
+ """Converts pitch bins to cents"""
294
+ cents = CENTS_PER_BIN * bins + 1997.3794084376191
295
+
296
+ # Trade quantization error for noise
297
+ return dither(cents)
298
+
299
+
300
+ def bins_to_frequency(bins):
301
+ """Converts pitch bins to frequency in Hz"""
302
+ return cents_to_frequency(bins_to_cents(bins))
303
+
304
+
305
+ def cents_to_bins(cents, quantize_fn=np.floor):
306
+ """Converts cents to pitch bins"""
307
+ bins = (cents - 1997.3794084376191) / CENTS_PER_BIN
308
+ return quantize_fn(bins).astype(int)
309
+
310
+
311
+ def cents_to_frequency(cents):
312
+ """Converts cents to frequency in Hz"""
313
+ return 10 * 2 ** (cents / 1200)
314
+
315
+
316
+ def frequency_to_bins(frequency, quantize_fn=np.floor):
317
+ """Convert frequency in Hz to pitch bins"""
318
+ return cents_to_bins(frequency_to_cents(frequency), quantize_fn)
319
+
320
+
321
+ def frequency_to_cents(frequency):
322
+ """Convert frequency in Hz to cents"""
323
+ return 1200 * np.log2(frequency / 10.)
324
+
325
+
326
+ ###############################################################################
327
+ # Utilities
328
+ ###############################################################################
329
+
330
+ def periodicity(probabilities, bins):
331
+ """Computes the periodicity from the network output and pitch bins"""
332
+ # shape=(batch * time / hop_length, 360)
333
+ probs_stacked = probabilities.transpose(0, 2, 1).reshape(-1, PITCH_BINS)
334
+
335
+ # shape=(batch * time / hop_length, 1)
336
+ bins_stacked = bins.reshape(-1, 1).astype(np.int64)
337
+
338
+ # Use maximum logit over pitch bins as periodicity
339
+ # periodicity = probs_stacked.gather(1, bins_stacked)
340
+ periodicity = np.zeros(bins_stacked.shape)
341
+ for i in range(bins_stacked.shape[0]):
342
+ periodicity[i] = probs_stacked[i, bins_stacked[i]]
343
+
344
+ # shape=(batch, time / hop_length)
345
+ return periodicity.reshape(probabilities.shape[0], probabilities.shape[2])
346
+
347
+
348
+ def dither(cents):
349
+ """Dither the predicted pitch in cents to remove quantization error"""
350
+ noise = scipy.stats.triang.rvs(
351
+ c=0.5,
352
+ loc=-CENTS_PER_BIN,
353
+ scale=2 * CENTS_PER_BIN,
354
+ size=cents.shape)
355
+ return cents + noise
356
+
357
+
358
+ ###############################################################################
359
+ # Sequence filters
360
+ ###############################################################################
361
+
362
+ def mean(signals, win_length=9):
363
+ """Averave filtering for signals containing nan values
364
+
365
+ Arguments
366
+ signals (np.ndarray (shape=(batch, time)))
367
+ The signals to filter
368
+ win_length
369
+ The size of the analysis window
370
+
371
+ Returns
372
+ filtered (np.ndarray (shape=(batch, time)))
373
+ """
374
+
375
+ assert signals.ndim == 2, "Input tensor must have 2 dimensions (batch_size, width)"
376
+
377
+ def apply_convolution(array, kernel):
378
+ pad_width = win_length // 2
379
+ padded_array = np.pad(array, ((0, 0), (pad_width, pad_width)), mode='constant', constant_values=0)
380
+ convolved = np.array([
381
+ np.convolve(padded_array[i, :], kernel, mode='valid')
382
+ for i in range(padded_array.shape[0])
383
+ ])
384
+ return convolved
385
+
386
+ # Apply the mask by setting masked elements to zero, or make NaNs zero
387
+ mask = ~np.isnan(signals)
388
+ masked_x = np.where(mask, signals, np.zeros(signals.shape))
389
+
390
+ # Create a ones kernel with the same number of channels as the input tensor
391
+ ones_kernel = np.ones(win_length)
392
+
393
+ # Perform sum pooling
394
+ sum_pooled = apply_convolution(masked_x, ones_kernel)
395
+
396
+ # Count the non-masked (valid) elements in each pooling window
397
+ valid_count = apply_convolution(mask.astype(float), ones_kernel)
398
+
399
+ valid_count = np.clip(valid_count, 1, None) # Avoid division by zero
400
+
401
+ # Perform masked average pooling
402
+ avg_pooled = sum_pooled / valid_count
403
+
404
+ # Fill zero values with NaNs
405
+ avg_pooled[avg_pooled == 0] = float("nan")
406
+
407
+ return avg_pooled
408
+
409
+
410
+ def median(signals, win_length):
411
+ """Median filtering for signals containing nan values
412
+
413
+ Arguments
414
+ signals (np.ndarray (shape=(batch, time)))
415
+ The signals to filter
416
+ win_length
417
+ The size of the analysis window
418
+
419
+ Returns
420
+ filtered (np.ndarray (shape=(batch, time)))
421
+ """
422
+
423
+ assert signals.ndim == 2, "Input tensor must have 2 dimensions (batch_size, width)"
424
+ signals = np.expand_dims(signals, axis=1)
425
+
426
+ mask = ~np.isnan(signals)
427
+ masked_x = np.where(mask, signals, np.zeros(signals.shape))
428
+ padding = win_length // 2
429
+
430
+ shape = masked_x.shape
431
+
432
+ x = np.pad(masked_x, ((0, 0), (0, 0), (padding, padding)), mode="reflect")
433
+ mask = np.pad(
434
+ mask.astype(np.float32), ((0, 0), (0, 0), (padding, padding)),
435
+ mode="constant", constant_values=0)
436
+
437
+ _x = np.zeros(shape + (win_length,))
438
+ _msk = np.zeros(shape + (win_length,))
439
+ for i in range(shape[-1]):
440
+ _x[:, :, i] = x[:, :, i:i + win_length]
441
+ _msk[:, :, i] = mask[:, :, i:i + win_length]
442
+ x = _x
443
+ mask = _msk
444
+
445
+ x = x.reshape(x.shape[:3] + (-1,))
446
+ mask = mask.reshape(mask.shape[:3] + (-1,))
447
+
448
+ # Combine the mask with the input tensor
449
+ x_masked = np.where(mask.astype(bool), x.astype(np.float32), float("inf"))
450
+
451
+ # Sort the masked tensor along the last dimension
452
+ x_sorted = np.sort(x_masked, axis=-1)
453
+
454
+ # Compute the count of non-masked (valid) values
455
+ valid_count = np.sum(mask, axis=-1)
456
+
457
+ # Calculate the index of the median value for each pooling window
458
+ median_idx = np.clip((valid_count - 1) // 2, 0, None)
459
+
460
+ # Gather the median values using the calculated indices
461
+ # median_pooled = x_sorted.gather(-1, median_idx.unsqueeze(-1).long()).squeeze(-1)
462
+ median_idx = median_idx.astype(int)
463
+ median_pooled = [
464
+ x_sorted[:, :, [i], median_idx[0, 0, i]] for i in range(median_idx.shape[-1])
465
+ ]
466
+ median_pooled = np.concatenate(median_pooled, axis=-1)
467
+
468
+ # Fill infinite values with NaNs
469
+ median_pooled[np.isinf(median_pooled)] = float("nan")
470
+
471
+ return np.squeeze(median_pooled, axis=1)
models/onnx/ailia-models/code/output_full.png ADDED
models/onnx/ailia-models/code/output_tiny.png ADDED
models/onnx/ailia-models/code/test.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d20bdf667c6b606917159f62c2b728f5836de53079830216735b459bc6aad2e8
3
+ size 882044
models/onnx/ailia-models/crepe.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:124679dba8e591eb2e8b97e338184c824300f4cc100b8e4036f4ef7afccaa9aa
3
+ size 88991558
models/onnx/ailia-models/crepe.onnx.prototxt ADDED
@@ -0,0 +1,2108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ir_version: 6
2
+ producer_name: "pytorch"
3
+ producer_version: "2.1.0"
4
+ model_version: 0
5
+ graph {
6
+ name: "torch_jit"
7
+ node {
8
+ input: "input"
9
+ output: "/Unsqueeze_output_0"
10
+ name: "/Unsqueeze"
11
+ op_type: "Unsqueeze"
12
+ attribute {
13
+ name: "axes"
14
+ ints: 1
15
+ type: INTS
16
+ }
17
+ }
18
+ node {
19
+ input: "/Unsqueeze_output_0"
20
+ output: "/Unsqueeze_1_output_0"
21
+ name: "/Unsqueeze_1"
22
+ op_type: "Unsqueeze"
23
+ attribute {
24
+ name: "axes"
25
+ ints: 3
26
+ type: INTS
27
+ }
28
+ }
29
+ node {
30
+ output: "/Constant_output_0"
31
+ name: "/Constant"
32
+ op_type: "Constant"
33
+ attribute {
34
+ name: "value"
35
+ t {
36
+ dims: 1
37
+ data_type: 7
38
+ data_location: 0
39
+ }
40
+ type: TENSOR
41
+ }
42
+ }
43
+ node {
44
+ output: "/Constant_1_output_0"
45
+ name: "/Constant_1"
46
+ op_type: "Constant"
47
+ attribute {
48
+ name: "value"
49
+ t {
50
+ dims: 4
51
+ data_type: 7
52
+ data_location: 0
53
+ }
54
+ type: TENSOR
55
+ }
56
+ }
57
+ node {
58
+ input: "/Constant_output_0"
59
+ output: "/ConstantOfShape_output_0"
60
+ name: "/ConstantOfShape"
61
+ op_type: "ConstantOfShape"
62
+ attribute {
63
+ name: "value"
64
+ t {
65
+ dims: 1
66
+ data_type: 7
67
+ raw_data: "\000\000\000\000\000\000\000\000"
68
+ }
69
+ type: TENSOR
70
+ }
71
+ }
72
+ node {
73
+ input: "/Constant_1_output_0"
74
+ input: "/ConstantOfShape_output_0"
75
+ output: "/Concat_output_0"
76
+ name: "/Concat"
77
+ op_type: "Concat"
78
+ attribute {
79
+ name: "axis"
80
+ i: 0
81
+ type: INT
82
+ }
83
+ }
84
+ node {
85
+ output: "/Constant_2_output_0"
86
+ name: "/Constant_2"
87
+ op_type: "Constant"
88
+ attribute {
89
+ name: "value"
90
+ t {
91
+ dims: 2
92
+ data_type: 7
93
+ data_location: 0
94
+ }
95
+ type: TENSOR
96
+ }
97
+ }
98
+ node {
99
+ input: "/Concat_output_0"
100
+ input: "/Constant_2_output_0"
101
+ output: "/Reshape_output_0"
102
+ name: "/Reshape"
103
+ op_type: "Reshape"
104
+ }
105
+ node {
106
+ output: "/Constant_3_output_0"
107
+ name: "/Constant_3"
108
+ op_type: "Constant"
109
+ attribute {
110
+ name: "value"
111
+ t {
112
+ dims: 1
113
+ data_type: 7
114
+ data_location: 0
115
+ }
116
+ type: TENSOR
117
+ }
118
+ }
119
+ node {
120
+ output: "/Constant_4_output_0"
121
+ name: "/Constant_4"
122
+ op_type: "Constant"
123
+ attribute {
124
+ name: "value"
125
+ t {
126
+ dims: 1
127
+ data_type: 7
128
+ data_location: 0
129
+ }
130
+ type: TENSOR
131
+ }
132
+ }
133
+ node {
134
+ output: "/Constant_5_output_0"
135
+ name: "/Constant_5"
136
+ op_type: "Constant"
137
+ attribute {
138
+ name: "value"
139
+ t {
140
+ dims: 1
141
+ data_type: 7
142
+ data_location: 0
143
+ }
144
+ type: TENSOR
145
+ }
146
+ }
147
+ node {
148
+ output: "/Constant_6_output_0"
149
+ name: "/Constant_6"
150
+ op_type: "Constant"
151
+ attribute {
152
+ name: "value"
153
+ t {
154
+ dims: 1
155
+ data_type: 7
156
+ data_location: 0
157
+ }
158
+ type: TENSOR
159
+ }
160
+ }
161
+ node {
162
+ input: "/Reshape_output_0"
163
+ input: "/Constant_4_output_0"
164
+ input: "/Constant_5_output_0"
165
+ input: "/Constant_3_output_0"
166
+ input: "/Constant_6_output_0"
167
+ output: "/Slice_output_0"
168
+ name: "/Slice"
169
+ op_type: "Slice"
170
+ }
171
+ node {
172
+ input: "/Slice_output_0"
173
+ output: "/Transpose_output_0"
174
+ name: "/Transpose"
175
+ op_type: "Transpose"
176
+ attribute {
177
+ name: "perm"
178
+ ints: 1
179
+ ints: 0
180
+ type: INTS
181
+ }
182
+ }
183
+ node {
184
+ output: "/Constant_7_output_0"
185
+ name: "/Constant_7"
186
+ op_type: "Constant"
187
+ attribute {
188
+ name: "value"
189
+ t {
190
+ dims: 1
191
+ data_type: 7
192
+ data_location: 0
193
+ }
194
+ type: TENSOR
195
+ }
196
+ }
197
+ node {
198
+ input: "/Transpose_output_0"
199
+ input: "/Constant_7_output_0"
200
+ output: "/Reshape_1_output_0"
201
+ name: "/Reshape_1"
202
+ op_type: "Reshape"
203
+ }
204
+ node {
205
+ input: "/Reshape_1_output_0"
206
+ output: "/Cast_output_0"
207
+ name: "/Cast"
208
+ op_type: "Cast"
209
+ attribute {
210
+ name: "to"
211
+ i: 7
212
+ type: INT
213
+ }
214
+ }
215
+ node {
216
+ input: "/Unsqueeze_1_output_0"
217
+ input: "/Cast_output_0"
218
+ input: ""
219
+ output: "/Pad_output_0"
220
+ name: "/Pad"
221
+ op_type: "Pad"
222
+ attribute {
223
+ name: "mode"
224
+ s: "constant"
225
+ type: STRING
226
+ }
227
+ }
228
+ node {
229
+ input: "/Pad_output_0"
230
+ input: "conv1.weight"
231
+ input: "conv1.bias"
232
+ output: "/conv1/Conv_output_0"
233
+ name: "/conv1/Conv"
234
+ op_type: "Conv"
235
+ attribute {
236
+ name: "dilations"
237
+ ints: 1
238
+ ints: 1
239
+ type: INTS
240
+ }
241
+ attribute {
242
+ name: "group"
243
+ i: 1
244
+ type: INT
245
+ }
246
+ attribute {
247
+ name: "kernel_shape"
248
+ ints: 512
249
+ ints: 1
250
+ type: INTS
251
+ }
252
+ attribute {
253
+ name: "pads"
254
+ ints: 0
255
+ ints: 0
256
+ ints: 0
257
+ ints: 0
258
+ type: INTS
259
+ }
260
+ attribute {
261
+ name: "strides"
262
+ ints: 4
263
+ ints: 1
264
+ type: INTS
265
+ }
266
+ }
267
+ node {
268
+ input: "/conv1/Conv_output_0"
269
+ output: "/Relu_output_0"
270
+ name: "/Relu"
271
+ op_type: "Relu"
272
+ }
273
+ node {
274
+ input: "/Relu_output_0"
275
+ input: "conv1_BN.weight"
276
+ input: "conv1_BN.bias"
277
+ input: "conv1_BN.running_mean"
278
+ input: "conv1_BN.running_var"
279
+ output: "/conv1_BN/BatchNormalization_output_0"
280
+ name: "/conv1_BN/BatchNormalization"
281
+ op_type: "BatchNormalization"
282
+ attribute {
283
+ name: "epsilon"
284
+ f: 0.0010000000474974513
285
+ type: FLOAT
286
+ }
287
+ attribute {
288
+ name: "momentum"
289
+ f: 1.0
290
+ type: FLOAT
291
+ }
292
+ }
293
+ node {
294
+ input: "/conv1_BN/BatchNormalization_output_0"
295
+ output: "/MaxPool_output_0"
296
+ name: "/MaxPool"
297
+ op_type: "MaxPool"
298
+ attribute {
299
+ name: "ceil_mode"
300
+ i: 0
301
+ type: INT
302
+ }
303
+ attribute {
304
+ name: "kernel_shape"
305
+ ints: 2
306
+ ints: 1
307
+ type: INTS
308
+ }
309
+ attribute {
310
+ name: "pads"
311
+ ints: 0
312
+ ints: 0
313
+ ints: 0
314
+ ints: 0
315
+ type: INTS
316
+ }
317
+ attribute {
318
+ name: "strides"
319
+ ints: 2
320
+ ints: 1
321
+ type: INTS
322
+ }
323
+ }
324
+ node {
325
+ output: "/Constant_8_output_0"
326
+ name: "/Constant_8"
327
+ op_type: "Constant"
328
+ attribute {
329
+ name: "value"
330
+ t {
331
+ dims: 1
332
+ data_type: 7
333
+ data_location: 0
334
+ }
335
+ type: TENSOR
336
+ }
337
+ }
338
+ node {
339
+ output: "/Constant_9_output_0"
340
+ name: "/Constant_9"
341
+ op_type: "Constant"
342
+ attribute {
343
+ name: "value"
344
+ t {
345
+ dims: 4
346
+ data_type: 7
347
+ data_location: 0
348
+ }
349
+ type: TENSOR
350
+ }
351
+ }
352
+ node {
353
+ input: "/Constant_8_output_0"
354
+ output: "/ConstantOfShape_1_output_0"
355
+ name: "/ConstantOfShape_1"
356
+ op_type: "ConstantOfShape"
357
+ attribute {
358
+ name: "value"
359
+ t {
360
+ dims: 1
361
+ data_type: 7
362
+ raw_data: "\000\000\000\000\000\000\000\000"
363
+ }
364
+ type: TENSOR
365
+ }
366
+ }
367
+ node {
368
+ input: "/Constant_9_output_0"
369
+ input: "/ConstantOfShape_1_output_0"
370
+ output: "/Concat_1_output_0"
371
+ name: "/Concat_1"
372
+ op_type: "Concat"
373
+ attribute {
374
+ name: "axis"
375
+ i: 0
376
+ type: INT
377
+ }
378
+ }
379
+ node {
380
+ output: "/Constant_10_output_0"
381
+ name: "/Constant_10"
382
+ op_type: "Constant"
383
+ attribute {
384
+ name: "value"
385
+ t {
386
+ dims: 2
387
+ data_type: 7
388
+ data_location: 0
389
+ }
390
+ type: TENSOR
391
+ }
392
+ }
393
+ node {
394
+ input: "/Concat_1_output_0"
395
+ input: "/Constant_10_output_0"
396
+ output: "/Reshape_2_output_0"
397
+ name: "/Reshape_2"
398
+ op_type: "Reshape"
399
+ }
400
+ node {
401
+ output: "/Constant_11_output_0"
402
+ name: "/Constant_11"
403
+ op_type: "Constant"
404
+ attribute {
405
+ name: "value"
406
+ t {
407
+ dims: 1
408
+ data_type: 7
409
+ data_location: 0
410
+ }
411
+ type: TENSOR
412
+ }
413
+ }
414
+ node {
415
+ output: "/Constant_12_output_0"
416
+ name: "/Constant_12"
417
+ op_type: "Constant"
418
+ attribute {
419
+ name: "value"
420
+ t {
421
+ dims: 1
422
+ data_type: 7
423
+ data_location: 0
424
+ }
425
+ type: TENSOR
426
+ }
427
+ }
428
+ node {
429
+ output: "/Constant_13_output_0"
430
+ name: "/Constant_13"
431
+ op_type: "Constant"
432
+ attribute {
433
+ name: "value"
434
+ t {
435
+ dims: 1
436
+ data_type: 7
437
+ data_location: 0
438
+ }
439
+ type: TENSOR
440
+ }
441
+ }
442
+ node {
443
+ output: "/Constant_14_output_0"
444
+ name: "/Constant_14"
445
+ op_type: "Constant"
446
+ attribute {
447
+ name: "value"
448
+ t {
449
+ dims: 1
450
+ data_type: 7
451
+ data_location: 0
452
+ }
453
+ type: TENSOR
454
+ }
455
+ }
456
+ node {
457
+ input: "/Reshape_2_output_0"
458
+ input: "/Constant_12_output_0"
459
+ input: "/Constant_13_output_0"
460
+ input: "/Constant_11_output_0"
461
+ input: "/Constant_14_output_0"
462
+ output: "/Slice_1_output_0"
463
+ name: "/Slice_1"
464
+ op_type: "Slice"
465
+ }
466
+ node {
467
+ input: "/Slice_1_output_0"
468
+ output: "/Transpose_1_output_0"
469
+ name: "/Transpose_1"
470
+ op_type: "Transpose"
471
+ attribute {
472
+ name: "perm"
473
+ ints: 1
474
+ ints: 0
475
+ type: INTS
476
+ }
477
+ }
478
+ node {
479
+ output: "/Constant_15_output_0"
480
+ name: "/Constant_15"
481
+ op_type: "Constant"
482
+ attribute {
483
+ name: "value"
484
+ t {
485
+ dims: 1
486
+ data_type: 7
487
+ data_location: 0
488
+ }
489
+ type: TENSOR
490
+ }
491
+ }
492
+ node {
493
+ input: "/Transpose_1_output_0"
494
+ input: "/Constant_15_output_0"
495
+ output: "/Reshape_3_output_0"
496
+ name: "/Reshape_3"
497
+ op_type: "Reshape"
498
+ }
499
+ node {
500
+ input: "/Reshape_3_output_0"
501
+ output: "/Cast_1_output_0"
502
+ name: "/Cast_1"
503
+ op_type: "Cast"
504
+ attribute {
505
+ name: "to"
506
+ i: 7
507
+ type: INT
508
+ }
509
+ }
510
+ node {
511
+ input: "/MaxPool_output_0"
512
+ input: "/Cast_1_output_0"
513
+ input: ""
514
+ output: "/Pad_1_output_0"
515
+ name: "/Pad_1"
516
+ op_type: "Pad"
517
+ attribute {
518
+ name: "mode"
519
+ s: "constant"
520
+ type: STRING
521
+ }
522
+ }
523
+ node {
524
+ input: "/Pad_1_output_0"
525
+ input: "conv2.weight"
526
+ input: "conv2.bias"
527
+ output: "/conv2/Conv_output_0"
528
+ name: "/conv2/Conv"
529
+ op_type: "Conv"
530
+ attribute {
531
+ name: "dilations"
532
+ ints: 1
533
+ ints: 1
534
+ type: INTS
535
+ }
536
+ attribute {
537
+ name: "group"
538
+ i: 1
539
+ type: INT
540
+ }
541
+ attribute {
542
+ name: "kernel_shape"
543
+ ints: 64
544
+ ints: 1
545
+ type: INTS
546
+ }
547
+ attribute {
548
+ name: "pads"
549
+ ints: 0
550
+ ints: 0
551
+ ints: 0
552
+ ints: 0
553
+ type: INTS
554
+ }
555
+ attribute {
556
+ name: "strides"
557
+ ints: 1
558
+ ints: 1
559
+ type: INTS
560
+ }
561
+ }
562
+ node {
563
+ input: "/conv2/Conv_output_0"
564
+ output: "/Relu_1_output_0"
565
+ name: "/Relu_1"
566
+ op_type: "Relu"
567
+ }
568
+ node {
569
+ input: "/Relu_1_output_0"
570
+ input: "conv2_BN.weight"
571
+ input: "conv2_BN.bias"
572
+ input: "conv2_BN.running_mean"
573
+ input: "conv2_BN.running_var"
574
+ output: "/conv2_BN/BatchNormalization_output_0"
575
+ name: "/conv2_BN/BatchNormalization"
576
+ op_type: "BatchNormalization"
577
+ attribute {
578
+ name: "epsilon"
579
+ f: 0.0010000000474974513
580
+ type: FLOAT
581
+ }
582
+ attribute {
583
+ name: "momentum"
584
+ f: 1.0
585
+ type: FLOAT
586
+ }
587
+ }
588
+ node {
589
+ input: "/conv2_BN/BatchNormalization_output_0"
590
+ output: "/MaxPool_1_output_0"
591
+ name: "/MaxPool_1"
592
+ op_type: "MaxPool"
593
+ attribute {
594
+ name: "ceil_mode"
595
+ i: 0
596
+ type: INT
597
+ }
598
+ attribute {
599
+ name: "kernel_shape"
600
+ ints: 2
601
+ ints: 1
602
+ type: INTS
603
+ }
604
+ attribute {
605
+ name: "pads"
606
+ ints: 0
607
+ ints: 0
608
+ ints: 0
609
+ ints: 0
610
+ type: INTS
611
+ }
612
+ attribute {
613
+ name: "strides"
614
+ ints: 2
615
+ ints: 1
616
+ type: INTS
617
+ }
618
+ }
619
+ node {
620
+ output: "/Constant_16_output_0"
621
+ name: "/Constant_16"
622
+ op_type: "Constant"
623
+ attribute {
624
+ name: "value"
625
+ t {
626
+ dims: 1
627
+ data_type: 7
628
+ data_location: 0
629
+ }
630
+ type: TENSOR
631
+ }
632
+ }
633
+ node {
634
+ output: "/Constant_17_output_0"
635
+ name: "/Constant_17"
636
+ op_type: "Constant"
637
+ attribute {
638
+ name: "value"
639
+ t {
640
+ dims: 4
641
+ data_type: 7
642
+ data_location: 0
643
+ }
644
+ type: TENSOR
645
+ }
646
+ }
647
+ node {
648
+ input: "/Constant_16_output_0"
649
+ output: "/ConstantOfShape_2_output_0"
650
+ name: "/ConstantOfShape_2"
651
+ op_type: "ConstantOfShape"
652
+ attribute {
653
+ name: "value"
654
+ t {
655
+ dims: 1
656
+ data_type: 7
657
+ raw_data: "\000\000\000\000\000\000\000\000"
658
+ }
659
+ type: TENSOR
660
+ }
661
+ }
662
+ node {
663
+ input: "/Constant_17_output_0"
664
+ input: "/ConstantOfShape_2_output_0"
665
+ output: "/Concat_2_output_0"
666
+ name: "/Concat_2"
667
+ op_type: "Concat"
668
+ attribute {
669
+ name: "axis"
670
+ i: 0
671
+ type: INT
672
+ }
673
+ }
674
+ node {
675
+ output: "/Constant_18_output_0"
676
+ name: "/Constant_18"
677
+ op_type: "Constant"
678
+ attribute {
679
+ name: "value"
680
+ t {
681
+ dims: 2
682
+ data_type: 7
683
+ data_location: 0
684
+ }
685
+ type: TENSOR
686
+ }
687
+ }
688
+ node {
689
+ input: "/Concat_2_output_0"
690
+ input: "/Constant_18_output_0"
691
+ output: "/Reshape_4_output_0"
692
+ name: "/Reshape_4"
693
+ op_type: "Reshape"
694
+ }
695
+ node {
696
+ output: "/Constant_19_output_0"
697
+ name: "/Constant_19"
698
+ op_type: "Constant"
699
+ attribute {
700
+ name: "value"
701
+ t {
702
+ dims: 1
703
+ data_type: 7
704
+ data_location: 0
705
+ }
706
+ type: TENSOR
707
+ }
708
+ }
709
+ node {
710
+ output: "/Constant_20_output_0"
711
+ name: "/Constant_20"
712
+ op_type: "Constant"
713
+ attribute {
714
+ name: "value"
715
+ t {
716
+ dims: 1
717
+ data_type: 7
718
+ data_location: 0
719
+ }
720
+ type: TENSOR
721
+ }
722
+ }
723
+ node {
724
+ output: "/Constant_21_output_0"
725
+ name: "/Constant_21"
726
+ op_type: "Constant"
727
+ attribute {
728
+ name: "value"
729
+ t {
730
+ dims: 1
731
+ data_type: 7
732
+ data_location: 0
733
+ }
734
+ type: TENSOR
735
+ }
736
+ }
737
+ node {
738
+ output: "/Constant_22_output_0"
739
+ name: "/Constant_22"
740
+ op_type: "Constant"
741
+ attribute {
742
+ name: "value"
743
+ t {
744
+ dims: 1
745
+ data_type: 7
746
+ data_location: 0
747
+ }
748
+ type: TENSOR
749
+ }
750
+ }
751
+ node {
752
+ input: "/Reshape_4_output_0"
753
+ input: "/Constant_20_output_0"
754
+ input: "/Constant_21_output_0"
755
+ input: "/Constant_19_output_0"
756
+ input: "/Constant_22_output_0"
757
+ output: "/Slice_2_output_0"
758
+ name: "/Slice_2"
759
+ op_type: "Slice"
760
+ }
761
+ node {
762
+ input: "/Slice_2_output_0"
763
+ output: "/Transpose_2_output_0"
764
+ name: "/Transpose_2"
765
+ op_type: "Transpose"
766
+ attribute {
767
+ name: "perm"
768
+ ints: 1
769
+ ints: 0
770
+ type: INTS
771
+ }
772
+ }
773
+ node {
774
+ output: "/Constant_23_output_0"
775
+ name: "/Constant_23"
776
+ op_type: "Constant"
777
+ attribute {
778
+ name: "value"
779
+ t {
780
+ dims: 1
781
+ data_type: 7
782
+ data_location: 0
783
+ }
784
+ type: TENSOR
785
+ }
786
+ }
787
+ node {
788
+ input: "/Transpose_2_output_0"
789
+ input: "/Constant_23_output_0"
790
+ output: "/Reshape_5_output_0"
791
+ name: "/Reshape_5"
792
+ op_type: "Reshape"
793
+ }
794
+ node {
795
+ input: "/Reshape_5_output_0"
796
+ output: "/Cast_2_output_0"
797
+ name: "/Cast_2"
798
+ op_type: "Cast"
799
+ attribute {
800
+ name: "to"
801
+ i: 7
802
+ type: INT
803
+ }
804
+ }
805
+ node {
806
+ input: "/MaxPool_1_output_0"
807
+ input: "/Cast_2_output_0"
808
+ input: ""
809
+ output: "/Pad_2_output_0"
810
+ name: "/Pad_2"
811
+ op_type: "Pad"
812
+ attribute {
813
+ name: "mode"
814
+ s: "constant"
815
+ type: STRING
816
+ }
817
+ }
818
+ node {
819
+ input: "/Pad_2_output_0"
820
+ input: "conv3.weight"
821
+ input: "conv3.bias"
822
+ output: "/conv3/Conv_output_0"
823
+ name: "/conv3/Conv"
824
+ op_type: "Conv"
825
+ attribute {
826
+ name: "dilations"
827
+ ints: 1
828
+ ints: 1
829
+ type: INTS
830
+ }
831
+ attribute {
832
+ name: "group"
833
+ i: 1
834
+ type: INT
835
+ }
836
+ attribute {
837
+ name: "kernel_shape"
838
+ ints: 64
839
+ ints: 1
840
+ type: INTS
841
+ }
842
+ attribute {
843
+ name: "pads"
844
+ ints: 0
845
+ ints: 0
846
+ ints: 0
847
+ ints: 0
848
+ type: INTS
849
+ }
850
+ attribute {
851
+ name: "strides"
852
+ ints: 1
853
+ ints: 1
854
+ type: INTS
855
+ }
856
+ }
857
+ node {
858
+ input: "/conv3/Conv_output_0"
859
+ output: "/Relu_2_output_0"
860
+ name: "/Relu_2"
861
+ op_type: "Relu"
862
+ }
863
+ node {
864
+ input: "/Relu_2_output_0"
865
+ input: "conv3_BN.weight"
866
+ input: "conv3_BN.bias"
867
+ input: "conv3_BN.running_mean"
868
+ input: "conv3_BN.running_var"
869
+ output: "/conv3_BN/BatchNormalization_output_0"
870
+ name: "/conv3_BN/BatchNormalization"
871
+ op_type: "BatchNormalization"
872
+ attribute {
873
+ name: "epsilon"
874
+ f: 0.0010000000474974513
875
+ type: FLOAT
876
+ }
877
+ attribute {
878
+ name: "momentum"
879
+ f: 1.0
880
+ type: FLOAT
881
+ }
882
+ }
883
+ node {
884
+ input: "/conv3_BN/BatchNormalization_output_0"
885
+ output: "/MaxPool_2_output_0"
886
+ name: "/MaxPool_2"
887
+ op_type: "MaxPool"
888
+ attribute {
889
+ name: "ceil_mode"
890
+ i: 0
891
+ type: INT
892
+ }
893
+ attribute {
894
+ name: "kernel_shape"
895
+ ints: 2
896
+ ints: 1
897
+ type: INTS
898
+ }
899
+ attribute {
900
+ name: "pads"
901
+ ints: 0
902
+ ints: 0
903
+ ints: 0
904
+ ints: 0
905
+ type: INTS
906
+ }
907
+ attribute {
908
+ name: "strides"
909
+ ints: 2
910
+ ints: 1
911
+ type: INTS
912
+ }
913
+ }
914
+ node {
915
+ output: "/Constant_24_output_0"
916
+ name: "/Constant_24"
917
+ op_type: "Constant"
918
+ attribute {
919
+ name: "value"
920
+ t {
921
+ dims: 1
922
+ data_type: 7
923
+ data_location: 0
924
+ }
925
+ type: TENSOR
926
+ }
927
+ }
928
+ node {
929
+ output: "/Constant_25_output_0"
930
+ name: "/Constant_25"
931
+ op_type: "Constant"
932
+ attribute {
933
+ name: "value"
934
+ t {
935
+ dims: 4
936
+ data_type: 7
937
+ data_location: 0
938
+ }
939
+ type: TENSOR
940
+ }
941
+ }
942
+ node {
943
+ input: "/Constant_24_output_0"
944
+ output: "/ConstantOfShape_3_output_0"
945
+ name: "/ConstantOfShape_3"
946
+ op_type: "ConstantOfShape"
947
+ attribute {
948
+ name: "value"
949
+ t {
950
+ dims: 1
951
+ data_type: 7
952
+ raw_data: "\000\000\000\000\000\000\000\000"
953
+ }
954
+ type: TENSOR
955
+ }
956
+ }
957
+ node {
958
+ input: "/Constant_25_output_0"
959
+ input: "/ConstantOfShape_3_output_0"
960
+ output: "/Concat_3_output_0"
961
+ name: "/Concat_3"
962
+ op_type: "Concat"
963
+ attribute {
964
+ name: "axis"
965
+ i: 0
966
+ type: INT
967
+ }
968
+ }
969
+ node {
970
+ output: "/Constant_26_output_0"
971
+ name: "/Constant_26"
972
+ op_type: "Constant"
973
+ attribute {
974
+ name: "value"
975
+ t {
976
+ dims: 2
977
+ data_type: 7
978
+ data_location: 0
979
+ }
980
+ type: TENSOR
981
+ }
982
+ }
983
+ node {
984
+ input: "/Concat_3_output_0"
985
+ input: "/Constant_26_output_0"
986
+ output: "/Reshape_6_output_0"
987
+ name: "/Reshape_6"
988
+ op_type: "Reshape"
989
+ }
990
+ node {
991
+ output: "/Constant_27_output_0"
992
+ name: "/Constant_27"
993
+ op_type: "Constant"
994
+ attribute {
995
+ name: "value"
996
+ t {
997
+ dims: 1
998
+ data_type: 7
999
+ data_location: 0
1000
+ }
1001
+ type: TENSOR
1002
+ }
1003
+ }
1004
+ node {
1005
+ output: "/Constant_28_output_0"
1006
+ name: "/Constant_28"
1007
+ op_type: "Constant"
1008
+ attribute {
1009
+ name: "value"
1010
+ t {
1011
+ dims: 1
1012
+ data_type: 7
1013
+ data_location: 0
1014
+ }
1015
+ type: TENSOR
1016
+ }
1017
+ }
1018
+ node {
1019
+ output: "/Constant_29_output_0"
1020
+ name: "/Constant_29"
1021
+ op_type: "Constant"
1022
+ attribute {
1023
+ name: "value"
1024
+ t {
1025
+ dims: 1
1026
+ data_type: 7
1027
+ data_location: 0
1028
+ }
1029
+ type: TENSOR
1030
+ }
1031
+ }
1032
+ node {
1033
+ output: "/Constant_30_output_0"
1034
+ name: "/Constant_30"
1035
+ op_type: "Constant"
1036
+ attribute {
1037
+ name: "value"
1038
+ t {
1039
+ dims: 1
1040
+ data_type: 7
1041
+ data_location: 0
1042
+ }
1043
+ type: TENSOR
1044
+ }
1045
+ }
1046
+ node {
1047
+ input: "/Reshape_6_output_0"
1048
+ input: "/Constant_28_output_0"
1049
+ input: "/Constant_29_output_0"
1050
+ input: "/Constant_27_output_0"
1051
+ input: "/Constant_30_output_0"
1052
+ output: "/Slice_3_output_0"
1053
+ name: "/Slice_3"
1054
+ op_type: "Slice"
1055
+ }
1056
+ node {
1057
+ input: "/Slice_3_output_0"
1058
+ output: "/Transpose_3_output_0"
1059
+ name: "/Transpose_3"
1060
+ op_type: "Transpose"
1061
+ attribute {
1062
+ name: "perm"
1063
+ ints: 1
1064
+ ints: 0
1065
+ type: INTS
1066
+ }
1067
+ }
1068
+ node {
1069
+ output: "/Constant_31_output_0"
1070
+ name: "/Constant_31"
1071
+ op_type: "Constant"
1072
+ attribute {
1073
+ name: "value"
1074
+ t {
1075
+ dims: 1
1076
+ data_type: 7
1077
+ data_location: 0
1078
+ }
1079
+ type: TENSOR
1080
+ }
1081
+ }
1082
+ node {
1083
+ input: "/Transpose_3_output_0"
1084
+ input: "/Constant_31_output_0"
1085
+ output: "/Reshape_7_output_0"
1086
+ name: "/Reshape_7"
1087
+ op_type: "Reshape"
1088
+ }
1089
+ node {
1090
+ input: "/Reshape_7_output_0"
1091
+ output: "/Cast_3_output_0"
1092
+ name: "/Cast_3"
1093
+ op_type: "Cast"
1094
+ attribute {
1095
+ name: "to"
1096
+ i: 7
1097
+ type: INT
1098
+ }
1099
+ }
1100
+ node {
1101
+ input: "/MaxPool_2_output_0"
1102
+ input: "/Cast_3_output_0"
1103
+ input: ""
1104
+ output: "/Pad_3_output_0"
1105
+ name: "/Pad_3"
1106
+ op_type: "Pad"
1107
+ attribute {
1108
+ name: "mode"
1109
+ s: "constant"
1110
+ type: STRING
1111
+ }
1112
+ }
1113
+ node {
1114
+ input: "/Pad_3_output_0"
1115
+ input: "conv4.weight"
1116
+ input: "conv4.bias"
1117
+ output: "/conv4/Conv_output_0"
1118
+ name: "/conv4/Conv"
1119
+ op_type: "Conv"
1120
+ attribute {
1121
+ name: "dilations"
1122
+ ints: 1
1123
+ ints: 1
1124
+ type: INTS
1125
+ }
1126
+ attribute {
1127
+ name: "group"
1128
+ i: 1
1129
+ type: INT
1130
+ }
1131
+ attribute {
1132
+ name: "kernel_shape"
1133
+ ints: 64
1134
+ ints: 1
1135
+ type: INTS
1136
+ }
1137
+ attribute {
1138
+ name: "pads"
1139
+ ints: 0
1140
+ ints: 0
1141
+ ints: 0
1142
+ ints: 0
1143
+ type: INTS
1144
+ }
1145
+ attribute {
1146
+ name: "strides"
1147
+ ints: 1
1148
+ ints: 1
1149
+ type: INTS
1150
+ }
1151
+ }
1152
+ node {
1153
+ input: "/conv4/Conv_output_0"
1154
+ output: "/Relu_3_output_0"
1155
+ name: "/Relu_3"
1156
+ op_type: "Relu"
1157
+ }
1158
+ node {
1159
+ input: "/Relu_3_output_0"
1160
+ input: "conv4_BN.weight"
1161
+ input: "conv4_BN.bias"
1162
+ input: "conv4_BN.running_mean"
1163
+ input: "conv4_BN.running_var"
1164
+ output: "/conv4_BN/BatchNormalization_output_0"
1165
+ name: "/conv4_BN/BatchNormalization"
1166
+ op_type: "BatchNormalization"
1167
+ attribute {
1168
+ name: "epsilon"
1169
+ f: 0.0010000000474974513
1170
+ type: FLOAT
1171
+ }
1172
+ attribute {
1173
+ name: "momentum"
1174
+ f: 1.0
1175
+ type: FLOAT
1176
+ }
1177
+ }
1178
+ node {
1179
+ input: "/conv4_BN/BatchNormalization_output_0"
1180
+ output: "/MaxPool_3_output_0"
1181
+ name: "/MaxPool_3"
1182
+ op_type: "MaxPool"
1183
+ attribute {
1184
+ name: "ceil_mode"
1185
+ i: 0
1186
+ type: INT
1187
+ }
1188
+ attribute {
1189
+ name: "kernel_shape"
1190
+ ints: 2
1191
+ ints: 1
1192
+ type: INTS
1193
+ }
1194
+ attribute {
1195
+ name: "pads"
1196
+ ints: 0
1197
+ ints: 0
1198
+ ints: 0
1199
+ ints: 0
1200
+ type: INTS
1201
+ }
1202
+ attribute {
1203
+ name: "strides"
1204
+ ints: 2
1205
+ ints: 1
1206
+ type: INTS
1207
+ }
1208
+ }
1209
+ node {
1210
+ output: "/Constant_32_output_0"
1211
+ name: "/Constant_32"
1212
+ op_type: "Constant"
1213
+ attribute {
1214
+ name: "value"
1215
+ t {
1216
+ dims: 1
1217
+ data_type: 7
1218
+ data_location: 0
1219
+ }
1220
+ type: TENSOR
1221
+ }
1222
+ }
1223
+ node {
1224
+ output: "/Constant_33_output_0"
1225
+ name: "/Constant_33"
1226
+ op_type: "Constant"
1227
+ attribute {
1228
+ name: "value"
1229
+ t {
1230
+ dims: 4
1231
+ data_type: 7
1232
+ data_location: 0
1233
+ }
1234
+ type: TENSOR
1235
+ }
1236
+ }
1237
+ node {
1238
+ input: "/Constant_32_output_0"
1239
+ output: "/ConstantOfShape_4_output_0"
1240
+ name: "/ConstantOfShape_4"
1241
+ op_type: "ConstantOfShape"
1242
+ attribute {
1243
+ name: "value"
1244
+ t {
1245
+ dims: 1
1246
+ data_type: 7
1247
+ raw_data: "\000\000\000\000\000\000\000\000"
1248
+ }
1249
+ type: TENSOR
1250
+ }
1251
+ }
1252
+ node {
1253
+ input: "/Constant_33_output_0"
1254
+ input: "/ConstantOfShape_4_output_0"
1255
+ output: "/Concat_4_output_0"
1256
+ name: "/Concat_4"
1257
+ op_type: "Concat"
1258
+ attribute {
1259
+ name: "axis"
1260
+ i: 0
1261
+ type: INT
1262
+ }
1263
+ }
1264
+ node {
1265
+ output: "/Constant_34_output_0"
1266
+ name: "/Constant_34"
1267
+ op_type: "Constant"
1268
+ attribute {
1269
+ name: "value"
1270
+ t {
1271
+ dims: 2
1272
+ data_type: 7
1273
+ data_location: 0
1274
+ }
1275
+ type: TENSOR
1276
+ }
1277
+ }
1278
+ node {
1279
+ input: "/Concat_4_output_0"
1280
+ input: "/Constant_34_output_0"
1281
+ output: "/Reshape_8_output_0"
1282
+ name: "/Reshape_8"
1283
+ op_type: "Reshape"
1284
+ }
1285
+ node {
1286
+ output: "/Constant_35_output_0"
1287
+ name: "/Constant_35"
1288
+ op_type: "Constant"
1289
+ attribute {
1290
+ name: "value"
1291
+ t {
1292
+ dims: 1
1293
+ data_type: 7
1294
+ data_location: 0
1295
+ }
1296
+ type: TENSOR
1297
+ }
1298
+ }
1299
+ node {
1300
+ output: "/Constant_36_output_0"
1301
+ name: "/Constant_36"
1302
+ op_type: "Constant"
1303
+ attribute {
1304
+ name: "value"
1305
+ t {
1306
+ dims: 1
1307
+ data_type: 7
1308
+ data_location: 0
1309
+ }
1310
+ type: TENSOR
1311
+ }
1312
+ }
1313
+ node {
1314
+ output: "/Constant_37_output_0"
1315
+ name: "/Constant_37"
1316
+ op_type: "Constant"
1317
+ attribute {
1318
+ name: "value"
1319
+ t {
1320
+ dims: 1
1321
+ data_type: 7
1322
+ data_location: 0
1323
+ }
1324
+ type: TENSOR
1325
+ }
1326
+ }
1327
+ node {
1328
+ output: "/Constant_38_output_0"
1329
+ name: "/Constant_38"
1330
+ op_type: "Constant"
1331
+ attribute {
1332
+ name: "value"
1333
+ t {
1334
+ dims: 1
1335
+ data_type: 7
1336
+ data_location: 0
1337
+ }
1338
+ type: TENSOR
1339
+ }
1340
+ }
1341
+ node {
1342
+ input: "/Reshape_8_output_0"
1343
+ input: "/Constant_36_output_0"
1344
+ input: "/Constant_37_output_0"
1345
+ input: "/Constant_35_output_0"
1346
+ input: "/Constant_38_output_0"
1347
+ output: "/Slice_4_output_0"
1348
+ name: "/Slice_4"
1349
+ op_type: "Slice"
1350
+ }
1351
+ node {
1352
+ input: "/Slice_4_output_0"
1353
+ output: "/Transpose_4_output_0"
1354
+ name: "/Transpose_4"
1355
+ op_type: "Transpose"
1356
+ attribute {
1357
+ name: "perm"
1358
+ ints: 1
1359
+ ints: 0
1360
+ type: INTS
1361
+ }
1362
+ }
1363
+ node {
1364
+ output: "/Constant_39_output_0"
1365
+ name: "/Constant_39"
1366
+ op_type: "Constant"
1367
+ attribute {
1368
+ name: "value"
1369
+ t {
1370
+ dims: 1
1371
+ data_type: 7
1372
+ data_location: 0
1373
+ }
1374
+ type: TENSOR
1375
+ }
1376
+ }
1377
+ node {
1378
+ input: "/Transpose_4_output_0"
1379
+ input: "/Constant_39_output_0"
1380
+ output: "/Reshape_9_output_0"
1381
+ name: "/Reshape_9"
1382
+ op_type: "Reshape"
1383
+ }
1384
+ node {
1385
+ input: "/Reshape_9_output_0"
1386
+ output: "/Cast_4_output_0"
1387
+ name: "/Cast_4"
1388
+ op_type: "Cast"
1389
+ attribute {
1390
+ name: "to"
1391
+ i: 7
1392
+ type: INT
1393
+ }
1394
+ }
1395
+ node {
1396
+ input: "/MaxPool_3_output_0"
1397
+ input: "/Cast_4_output_0"
1398
+ input: ""
1399
+ output: "/Pad_4_output_0"
1400
+ name: "/Pad_4"
1401
+ op_type: "Pad"
1402
+ attribute {
1403
+ name: "mode"
1404
+ s: "constant"
1405
+ type: STRING
1406
+ }
1407
+ }
1408
+ node {
1409
+ input: "/Pad_4_output_0"
1410
+ input: "conv5.weight"
1411
+ input: "conv5.bias"
1412
+ output: "/conv5/Conv_output_0"
1413
+ name: "/conv5/Conv"
1414
+ op_type: "Conv"
1415
+ attribute {
1416
+ name: "dilations"
1417
+ ints: 1
1418
+ ints: 1
1419
+ type: INTS
1420
+ }
1421
+ attribute {
1422
+ name: "group"
1423
+ i: 1
1424
+ type: INT
1425
+ }
1426
+ attribute {
1427
+ name: "kernel_shape"
1428
+ ints: 64
1429
+ ints: 1
1430
+ type: INTS
1431
+ }
1432
+ attribute {
1433
+ name: "pads"
1434
+ ints: 0
1435
+ ints: 0
1436
+ ints: 0
1437
+ ints: 0
1438
+ type: INTS
1439
+ }
1440
+ attribute {
1441
+ name: "strides"
1442
+ ints: 1
1443
+ ints: 1
1444
+ type: INTS
1445
+ }
1446
+ }
1447
+ node {
1448
+ input: "/conv5/Conv_output_0"
1449
+ output: "/Relu_4_output_0"
1450
+ name: "/Relu_4"
1451
+ op_type: "Relu"
1452
+ }
1453
+ node {
1454
+ input: "/Relu_4_output_0"
1455
+ input: "conv5_BN.weight"
1456
+ input: "conv5_BN.bias"
1457
+ input: "conv5_BN.running_mean"
1458
+ input: "conv5_BN.running_var"
1459
+ output: "/conv5_BN/BatchNormalization_output_0"
1460
+ name: "/conv5_BN/BatchNormalization"
1461
+ op_type: "BatchNormalization"
1462
+ attribute {
1463
+ name: "epsilon"
1464
+ f: 0.0010000000474974513
1465
+ type: FLOAT
1466
+ }
1467
+ attribute {
1468
+ name: "momentum"
1469
+ f: 1.0
1470
+ type: FLOAT
1471
+ }
1472
+ }
1473
+ node {
1474
+ input: "/conv5_BN/BatchNormalization_output_0"
1475
+ output: "/MaxPool_4_output_0"
1476
+ name: "/MaxPool_4"
1477
+ op_type: "MaxPool"
1478
+ attribute {
1479
+ name: "ceil_mode"
1480
+ i: 0
1481
+ type: INT
1482
+ }
1483
+ attribute {
1484
+ name: "kernel_shape"
1485
+ ints: 2
1486
+ ints: 1
1487
+ type: INTS
1488
+ }
1489
+ attribute {
1490
+ name: "pads"
1491
+ ints: 0
1492
+ ints: 0
1493
+ ints: 0
1494
+ ints: 0
1495
+ type: INTS
1496
+ }
1497
+ attribute {
1498
+ name: "strides"
1499
+ ints: 2
1500
+ ints: 1
1501
+ type: INTS
1502
+ }
1503
+ }
1504
+ node {
1505
+ output: "/Constant_40_output_0"
1506
+ name: "/Constant_40"
1507
+ op_type: "Constant"
1508
+ attribute {
1509
+ name: "value"
1510
+ t {
1511
+ dims: 1
1512
+ data_type: 7
1513
+ data_location: 0
1514
+ }
1515
+ type: TENSOR
1516
+ }
1517
+ }
1518
+ node {
1519
+ output: "/Constant_41_output_0"
1520
+ name: "/Constant_41"
1521
+ op_type: "Constant"
1522
+ attribute {
1523
+ name: "value"
1524
+ t {
1525
+ dims: 4
1526
+ data_type: 7
1527
+ data_location: 0
1528
+ }
1529
+ type: TENSOR
1530
+ }
1531
+ }
1532
+ node {
1533
+ input: "/Constant_40_output_0"
1534
+ output: "/ConstantOfShape_5_output_0"
1535
+ name: "/ConstantOfShape_5"
1536
+ op_type: "ConstantOfShape"
1537
+ attribute {
1538
+ name: "value"
1539
+ t {
1540
+ dims: 1
1541
+ data_type: 7
1542
+ raw_data: "\000\000\000\000\000\000\000\000"
1543
+ }
1544
+ type: TENSOR
1545
+ }
1546
+ }
1547
+ node {
1548
+ input: "/Constant_41_output_0"
1549
+ input: "/ConstantOfShape_5_output_0"
1550
+ output: "/Concat_5_output_0"
1551
+ name: "/Concat_5"
1552
+ op_type: "Concat"
1553
+ attribute {
1554
+ name: "axis"
1555
+ i: 0
1556
+ type: INT
1557
+ }
1558
+ }
1559
+ node {
1560
+ output: "/Constant_42_output_0"
1561
+ name: "/Constant_42"
1562
+ op_type: "Constant"
1563
+ attribute {
1564
+ name: "value"
1565
+ t {
1566
+ dims: 2
1567
+ data_type: 7
1568
+ data_location: 0
1569
+ }
1570
+ type: TENSOR
1571
+ }
1572
+ }
1573
+ node {
1574
+ input: "/Concat_5_output_0"
1575
+ input: "/Constant_42_output_0"
1576
+ output: "/Reshape_10_output_0"
1577
+ name: "/Reshape_10"
1578
+ op_type: "Reshape"
1579
+ }
1580
+ node {
1581
+ output: "/Constant_43_output_0"
1582
+ name: "/Constant_43"
1583
+ op_type: "Constant"
1584
+ attribute {
1585
+ name: "value"
1586
+ t {
1587
+ dims: 1
1588
+ data_type: 7
1589
+ data_location: 0
1590
+ }
1591
+ type: TENSOR
1592
+ }
1593
+ }
1594
+ node {
1595
+ output: "/Constant_44_output_0"
1596
+ name: "/Constant_44"
1597
+ op_type: "Constant"
1598
+ attribute {
1599
+ name: "value"
1600
+ t {
1601
+ dims: 1
1602
+ data_type: 7
1603
+ data_location: 0
1604
+ }
1605
+ type: TENSOR
1606
+ }
1607
+ }
1608
+ node {
1609
+ output: "/Constant_45_output_0"
1610
+ name: "/Constant_45"
1611
+ op_type: "Constant"
1612
+ attribute {
1613
+ name: "value"
1614
+ t {
1615
+ dims: 1
1616
+ data_type: 7
1617
+ data_location: 0
1618
+ }
1619
+ type: TENSOR
1620
+ }
1621
+ }
1622
+ node {
1623
+ output: "/Constant_46_output_0"
1624
+ name: "/Constant_46"
1625
+ op_type: "Constant"
1626
+ attribute {
1627
+ name: "value"
1628
+ t {
1629
+ dims: 1
1630
+ data_type: 7
1631
+ data_location: 0
1632
+ }
1633
+ type: TENSOR
1634
+ }
1635
+ }
1636
+ node {
1637
+ input: "/Reshape_10_output_0"
1638
+ input: "/Constant_44_output_0"
1639
+ input: "/Constant_45_output_0"
1640
+ input: "/Constant_43_output_0"
1641
+ input: "/Constant_46_output_0"
1642
+ output: "/Slice_5_output_0"
1643
+ name: "/Slice_5"
1644
+ op_type: "Slice"
1645
+ }
1646
+ node {
1647
+ input: "/Slice_5_output_0"
1648
+ output: "/Transpose_5_output_0"
1649
+ name: "/Transpose_5"
1650
+ op_type: "Transpose"
1651
+ attribute {
1652
+ name: "perm"
1653
+ ints: 1
1654
+ ints: 0
1655
+ type: INTS
1656
+ }
1657
+ }
1658
+ node {
1659
+ output: "/Constant_47_output_0"
1660
+ name: "/Constant_47"
1661
+ op_type: "Constant"
1662
+ attribute {
1663
+ name: "value"
1664
+ t {
1665
+ dims: 1
1666
+ data_type: 7
1667
+ data_location: 0
1668
+ }
1669
+ type: TENSOR
1670
+ }
1671
+ }
1672
+ node {
1673
+ input: "/Transpose_5_output_0"
1674
+ input: "/Constant_47_output_0"
1675
+ output: "/Reshape_11_output_0"
1676
+ name: "/Reshape_11"
1677
+ op_type: "Reshape"
1678
+ }
1679
+ node {
1680
+ input: "/Reshape_11_output_0"
1681
+ output: "/Cast_5_output_0"
1682
+ name: "/Cast_5"
1683
+ op_type: "Cast"
1684
+ attribute {
1685
+ name: "to"
1686
+ i: 7
1687
+ type: INT
1688
+ }
1689
+ }
1690
+ node {
1691
+ input: "/MaxPool_4_output_0"
1692
+ input: "/Cast_5_output_0"
1693
+ input: ""
1694
+ output: "/Pad_5_output_0"
1695
+ name: "/Pad_5"
1696
+ op_type: "Pad"
1697
+ attribute {
1698
+ name: "mode"
1699
+ s: "constant"
1700
+ type: STRING
1701
+ }
1702
+ }
1703
+ node {
1704
+ input: "/Pad_5_output_0"
1705
+ input: "conv6.weight"
1706
+ input: "conv6.bias"
1707
+ output: "/conv6/Conv_output_0"
1708
+ name: "/conv6/Conv"
1709
+ op_type: "Conv"
1710
+ attribute {
1711
+ name: "dilations"
1712
+ ints: 1
1713
+ ints: 1
1714
+ type: INTS
1715
+ }
1716
+ attribute {
1717
+ name: "group"
1718
+ i: 1
1719
+ type: INT
1720
+ }
1721
+ attribute {
1722
+ name: "kernel_shape"
1723
+ ints: 64
1724
+ ints: 1
1725
+ type: INTS
1726
+ }
1727
+ attribute {
1728
+ name: "pads"
1729
+ ints: 0
1730
+ ints: 0
1731
+ ints: 0
1732
+ ints: 0
1733
+ type: INTS
1734
+ }
1735
+ attribute {
1736
+ name: "strides"
1737
+ ints: 1
1738
+ ints: 1
1739
+ type: INTS
1740
+ }
1741
+ }
1742
+ node {
1743
+ input: "/conv6/Conv_output_0"
1744
+ output: "/Relu_5_output_0"
1745
+ name: "/Relu_5"
1746
+ op_type: "Relu"
1747
+ }
1748
+ node {
1749
+ input: "/Relu_5_output_0"
1750
+ input: "conv6_BN.weight"
1751
+ input: "conv6_BN.bias"
1752
+ input: "conv6_BN.running_mean"
1753
+ input: "conv6_BN.running_var"
1754
+ output: "/conv6_BN/BatchNormalization_output_0"
1755
+ name: "/conv6_BN/BatchNormalization"
1756
+ op_type: "BatchNormalization"
1757
+ attribute {
1758
+ name: "epsilon"
1759
+ f: 0.0010000000474974513
1760
+ type: FLOAT
1761
+ }
1762
+ attribute {
1763
+ name: "momentum"
1764
+ f: 1.0
1765
+ type: FLOAT
1766
+ }
1767
+ }
1768
+ node {
1769
+ input: "/conv6_BN/BatchNormalization_output_0"
1770
+ output: "/MaxPool_5_output_0"
1771
+ name: "/MaxPool_5"
1772
+ op_type: "MaxPool"
1773
+ attribute {
1774
+ name: "ceil_mode"
1775
+ i: 0
1776
+ type: INT
1777
+ }
1778
+ attribute {
1779
+ name: "kernel_shape"
1780
+ ints: 2
1781
+ ints: 1
1782
+ type: INTS
1783
+ }
1784
+ attribute {
1785
+ name: "pads"
1786
+ ints: 0
1787
+ ints: 0
1788
+ ints: 0
1789
+ ints: 0
1790
+ type: INTS
1791
+ }
1792
+ attribute {
1793
+ name: "strides"
1794
+ ints: 2
1795
+ ints: 1
1796
+ type: INTS
1797
+ }
1798
+ }
1799
+ node {
1800
+ input: "/MaxPool_5_output_0"
1801
+ output: "/Transpose_6_output_0"
1802
+ name: "/Transpose_6"
1803
+ op_type: "Transpose"
1804
+ attribute {
1805
+ name: "perm"
1806
+ ints: 0
1807
+ ints: 2
1808
+ ints: 1
1809
+ ints: 3
1810
+ type: INTS
1811
+ }
1812
+ }
1813
+ node {
1814
+ output: "/Constant_48_output_0"
1815
+ name: "/Constant_48"
1816
+ op_type: "Constant"
1817
+ attribute {
1818
+ name: "value"
1819
+ t {
1820
+ dims: 2
1821
+ data_type: 7
1822
+ data_location: 0
1823
+ }
1824
+ type: TENSOR
1825
+ }
1826
+ }
1827
+ node {
1828
+ input: "/Transpose_6_output_0"
1829
+ input: "/Constant_48_output_0"
1830
+ output: "/Reshape_12_output_0"
1831
+ name: "/Reshape_12"
1832
+ op_type: "Reshape"
1833
+ }
1834
+ node {
1835
+ input: "/Reshape_12_output_0"
1836
+ input: "classifier.weight"
1837
+ input: "classifier.bias"
1838
+ output: "/classifier/Gemm_output_0"
1839
+ name: "/classifier/Gemm"
1840
+ op_type: "Gemm"
1841
+ attribute {
1842
+ name: "alpha"
1843
+ f: 1.0
1844
+ type: FLOAT
1845
+ }
1846
+ attribute {
1847
+ name: "beta"
1848
+ f: 1.0
1849
+ type: FLOAT
1850
+ }
1851
+ attribute {
1852
+ name: "transB"
1853
+ i: 1
1854
+ type: INT
1855
+ }
1856
+ }
1857
+ node {
1858
+ input: "/classifier/Gemm_output_0"
1859
+ output: "output"
1860
+ name: "/Sigmoid"
1861
+ op_type: "Sigmoid"
1862
+ }
1863
+ initializer {
1864
+ dims: 1024
1865
+ dims: 1
1866
+ dims: 512
1867
+ dims: 1
1868
+ data_type: 1
1869
+ name: "conv1.weight"
1870
+ }
1871
+ initializer {
1872
+ dims: 1024
1873
+ data_type: 1
1874
+ name: "conv1.bias"
1875
+ }
1876
+ initializer {
1877
+ dims: 1024
1878
+ data_type: 1
1879
+ name: "conv1_BN.weight"
1880
+ }
1881
+ initializer {
1882
+ dims: 1024
1883
+ data_type: 1
1884
+ name: "conv1_BN.bias"
1885
+ }
1886
+ initializer {
1887
+ dims: 1024
1888
+ data_type: 1
1889
+ name: "conv1_BN.running_mean"
1890
+ }
1891
+ initializer {
1892
+ dims: 1024
1893
+ data_type: 1
1894
+ name: "conv1_BN.running_var"
1895
+ }
1896
+ initializer {
1897
+ dims: 128
1898
+ dims: 1024
1899
+ dims: 64
1900
+ dims: 1
1901
+ data_type: 1
1902
+ name: "conv2.weight"
1903
+ }
1904
+ initializer {
1905
+ dims: 128
1906
+ data_type: 1
1907
+ name: "conv2.bias"
1908
+ }
1909
+ initializer {
1910
+ dims: 128
1911
+ data_type: 1
1912
+ name: "conv2_BN.weight"
1913
+ }
1914
+ initializer {
1915
+ dims: 128
1916
+ data_type: 1
1917
+ name: "conv2_BN.bias"
1918
+ }
1919
+ initializer {
1920
+ dims: 128
1921
+ data_type: 1
1922
+ name: "conv2_BN.running_mean"
1923
+ }
1924
+ initializer {
1925
+ dims: 128
1926
+ data_type: 1
1927
+ name: "conv2_BN.running_var"
1928
+ }
1929
+ initializer {
1930
+ dims: 128
1931
+ dims: 128
1932
+ dims: 64
1933
+ dims: 1
1934
+ data_type: 1
1935
+ name: "conv3.weight"
1936
+ }
1937
+ initializer {
1938
+ dims: 128
1939
+ data_type: 1
1940
+ name: "conv3.bias"
1941
+ }
1942
+ initializer {
1943
+ dims: 128
1944
+ data_type: 1
1945
+ name: "conv3_BN.weight"
1946
+ }
1947
+ initializer {
1948
+ dims: 128
1949
+ data_type: 1
1950
+ name: "conv3_BN.bias"
1951
+ }
1952
+ initializer {
1953
+ dims: 128
1954
+ data_type: 1
1955
+ name: "conv3_BN.running_mean"
1956
+ }
1957
+ initializer {
1958
+ dims: 128
1959
+ data_type: 1
1960
+ name: "conv3_BN.running_var"
1961
+ }
1962
+ initializer {
1963
+ dims: 128
1964
+ dims: 128
1965
+ dims: 64
1966
+ dims: 1
1967
+ data_type: 1
1968
+ name: "conv4.weight"
1969
+ }
1970
+ initializer {
1971
+ dims: 128
1972
+ data_type: 1
1973
+ name: "conv4.bias"
1974
+ }
1975
+ initializer {
1976
+ dims: 128
1977
+ data_type: 1
1978
+ name: "conv4_BN.weight"
1979
+ }
1980
+ initializer {
1981
+ dims: 128
1982
+ data_type: 1
1983
+ name: "conv4_BN.bias"
1984
+ }
1985
+ initializer {
1986
+ dims: 128
1987
+ data_type: 1
1988
+ name: "conv4_BN.running_mean"
1989
+ }
1990
+ initializer {
1991
+ dims: 128
1992
+ data_type: 1
1993
+ name: "conv4_BN.running_var"
1994
+ }
1995
+ initializer {
1996
+ dims: 256
1997
+ dims: 128
1998
+ dims: 64
1999
+ dims: 1
2000
+ data_type: 1
2001
+ name: "conv5.weight"
2002
+ }
2003
+ initializer {
2004
+ dims: 256
2005
+ data_type: 1
2006
+ name: "conv5.bias"
2007
+ }
2008
+ initializer {
2009
+ dims: 256
2010
+ data_type: 1
2011
+ name: "conv5_BN.weight"
2012
+ }
2013
+ initializer {
2014
+ dims: 256
2015
+ data_type: 1
2016
+ name: "conv5_BN.bias"
2017
+ }
2018
+ initializer {
2019
+ dims: 256
2020
+ data_type: 1
2021
+ name: "conv5_BN.running_mean"
2022
+ }
2023
+ initializer {
2024
+ dims: 256
2025
+ data_type: 1
2026
+ name: "conv5_BN.running_var"
2027
+ }
2028
+ initializer {
2029
+ dims: 512
2030
+ dims: 256
2031
+ dims: 64
2032
+ dims: 1
2033
+ data_type: 1
2034
+ name: "conv6.weight"
2035
+ }
2036
+ initializer {
2037
+ dims: 512
2038
+ data_type: 1
2039
+ name: "conv6.bias"
2040
+ }
2041
+ initializer {
2042
+ dims: 512
2043
+ data_type: 1
2044
+ name: "conv6_BN.weight"
2045
+ }
2046
+ initializer {
2047
+ dims: 512
2048
+ data_type: 1
2049
+ name: "conv6_BN.bias"
2050
+ }
2051
+ initializer {
2052
+ dims: 512
2053
+ data_type: 1
2054
+ name: "conv6_BN.running_mean"
2055
+ }
2056
+ initializer {
2057
+ dims: 512
2058
+ data_type: 1
2059
+ name: "conv6_BN.running_var"
2060
+ }
2061
+ initializer {
2062
+ dims: 360
2063
+ dims: 2048
2064
+ data_type: 1
2065
+ name: "classifier.weight"
2066
+ }
2067
+ initializer {
2068
+ dims: 360
2069
+ data_type: 1
2070
+ name: "classifier.bias"
2071
+ }
2072
+ input {
2073
+ name: "input"
2074
+ type {
2075
+ tensor_type {
2076
+ elem_type: 1
2077
+ shape {
2078
+ dim {
2079
+ dim_param: "n"
2080
+ }
2081
+ dim {
2082
+ dim_value: 1024
2083
+ }
2084
+ }
2085
+ }
2086
+ }
2087
+ }
2088
+ output {
2089
+ name: "output"
2090
+ type {
2091
+ tensor_type {
2092
+ elem_type: 1
2093
+ shape {
2094
+ dim {
2095
+ dim_param: "Sigmoidoutput_dim_0"
2096
+ }
2097
+ dim {
2098
+ dim_value: 360
2099
+ }
2100
+ }
2101
+ }
2102
+ }
2103
+ }
2104
+ }
2105
+ opset_import {
2106
+ domain: ""
2107
+ version: 11
2108
+ }
models/onnx/ailia-models/crepe_tiny.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fd045b75d2b0d08fe7bb2711e466a4e76b625714f07ea3aabedb50598d7428e
3
+ size 2193941
models/onnx/ailia-models/crepe_tiny.onnx.prototxt ADDED
The diff for this file is too large to render. See raw diff
 
models/onnx/ailia-models/source.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ https://github.com/axinc-ai/ailia-models/tree/master/audio_processing/rvc
2
+
3
+ https://storage.googleapis.com/ailia-models/rvc/crepe.onnx
4
+ https://storage.googleapis.com/ailia-models/rvc/crepe.onnx.prototxt
5
+
6
+ https://storage.googleapis.com/ailia-models/rvc/crepe_tiny.onnx
7
+ https://storage.googleapis.com/ailia-models/rvc/crepe_tiny.onnx.prototxt