niobures commited on
Commit
7900a1d
·
verified ·
1 Parent(s): 021a33b

AutoSpeech (code, models, paper)

Browse files
.gitattributes CHANGED
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ AutoSpeech.[[:space:]]Neural[[:space:]]Architecture[[:space:]]Search[[:space:]]for[[:space:]]Speaker[[:space:]]Recognition.pdf filter=lfs diff=lfs merge=lfs -text
37
+ models/ailia-models/code/wav/id10270/8jEAjG6SegY/00008.wav filter=lfs diff=lfs merge=lfs -text
38
+ models/ailia-models/code/wav/id10270/x6uYqmx31kE/00001.wav filter=lfs diff=lfs merge=lfs -text
39
+ models/ailia-models/code/wav/id10282/zGjY8J48FoE/00004.wav filter=lfs diff=lfs merge=lfs -text
40
+ models/ailia-models/code/wav/id10283/oGZsanLiXsY/00004.wav filter=lfs diff=lfs merge=lfs -text
41
+ models/ailia-models/code/wav/id10291/oLJhH76d0kU/00004.wav filter=lfs diff=lfs merge=lfs -text
AutoSpeech. Neural Architecture Search for Speaker Recognition.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a2ef48b4fed9b975f1fb880217aa862151cc230eb2b1ecbb45f7a05a5356c77
3
+ size 373725
code/AutoSpeech.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23b17c5470de997f9462b903a574fe907de8167fad10de06fc0aaa120a597260
3
+ size 421245
code/Modified-AutoSpeech.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f6d4df6efc193a453d687fc872fb3a24b76ab2e0c6d1b30bd8cb7defdca4f93
3
+ size 554076
models/ailia-models/code/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2020 VITA-Group
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
models/ailia-models/code/README.md ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AutoSpeech
2
+
3
+ ## Input
4
+
5
+ Audio file
6
+ ```
7
+ Wav file from The VoxCeleb1 Dataset https://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox1.html
8
+
9
+ Default input: wav/id10283/oGZsanLiXsY/00004.wav
10
+ ```
11
+
12
+ Please download the test data set (https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_test_wav.zip) to check various data.
13
+
14
+ ## Output
15
+
16
+ - Identification mode
17
+ Top 5 label.
18
+ ```
19
+ Top5: id10283, id11084, id10200, id11064, id10404
20
+ ```
21
+
22
+ - Verification mode
23
+ Degree of similarity.
24
+ ```
25
+ similar: 0.42575997
26
+ verification: match (threshold: 0.260)
27
+ ```
28
+
29
+ ## Usage
30
+ Automatically downloads the onnx and prototxt files on the first run.
31
+ It is necessary to be connected to the Internet while downloading.
32
+
33
+ For the sample wav,
34
+ ```bash
35
+ $ python3 auto_speech.py
36
+ ```
37
+ It outputs top 5 label. (identification mode)
38
+
39
+ If you want to specify the input file, put the path after the `--input` option.
40
+ ```bash
41
+ $ python3 auto_speech.py --input wav/id10283/oGZsanLiXsY/00004.wav
42
+ ```
43
+
44
+ When two files are specified with the `--input1` and `--input2` options,
45
+ check if two audio files belong to the same person. (verification mode)
46
+ ```bash
47
+ $ python3 auto_speech.py --input1 wav/id10270/8jEAjG6SegY/00008.wav --input2 wav/id10270/x6uYqmx31kE/00001.wav
48
+ ```
49
+
50
+ ## Reference
51
+
52
+ [AutoSpeech: Neural Architecture Search for Speaker Recognition](https://github.com/VITA-Group/AutoSpeech)
53
+
54
+ ## Framework
55
+
56
+ Pytorch
57
+
58
+ ## Model Format
59
+
60
+ ONNX opset=11
61
+
62
+ ## Netron
63
+
64
+ [proposed_iden.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/auto_speech/proposed_iden.onnx.prototxt)
65
+ [proposed_classifier.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/auto_speech/proposed_classifier.onnx.prototxt)
66
+ [proposed_veri.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/auto_speech/proposed_veri.onnx.prototxt)
models/ailia-models/code/VoxCeleb1_ids.txt ADDED
@@ -0,0 +1,1251 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ id10001
2
+ id10002
3
+ id10003
4
+ id10004
5
+ id10005
6
+ id10006
7
+ id10007
8
+ id10008
9
+ id10009
10
+ id10010
11
+ id10011
12
+ id10012
13
+ id10013
14
+ id10014
15
+ id10015
16
+ id10016
17
+ id10017
18
+ id10018
19
+ id10019
20
+ id10020
21
+ id10021
22
+ id10022
23
+ id10023
24
+ id10024
25
+ id10025
26
+ id10026
27
+ id10027
28
+ id10028
29
+ id10029
30
+ id10030
31
+ id10031
32
+ id10032
33
+ id10033
34
+ id10034
35
+ id10035
36
+ id10036
37
+ id10037
38
+ id10038
39
+ id10039
40
+ id10040
41
+ id10041
42
+ id10042
43
+ id10043
44
+ id10044
45
+ id10045
46
+ id10046
47
+ id10047
48
+ id10048
49
+ id10049
50
+ id10050
51
+ id10051
52
+ id10052
53
+ id10053
54
+ id10054
55
+ id10055
56
+ id10056
57
+ id10057
58
+ id10058
59
+ id10059
60
+ id10060
61
+ id10061
62
+ id10062
63
+ id10063
64
+ id10064
65
+ id10065
66
+ id10066
67
+ id10067
68
+ id10068
69
+ id10069
70
+ id10070
71
+ id10071
72
+ id10072
73
+ id10073
74
+ id10074
75
+ id10075
76
+ id10076
77
+ id10077
78
+ id10078
79
+ id10079
80
+ id10080
81
+ id10081
82
+ id10082
83
+ id10083
84
+ id10084
85
+ id10085
86
+ id10086
87
+ id10087
88
+ id10088
89
+ id10089
90
+ id10090
91
+ id10091
92
+ id10092
93
+ id10093
94
+ id10094
95
+ id10095
96
+ id10096
97
+ id10097
98
+ id10098
99
+ id10099
100
+ id10100
101
+ id10101
102
+ id10102
103
+ id10103
104
+ id10104
105
+ id10105
106
+ id10106
107
+ id10107
108
+ id10108
109
+ id10109
110
+ id10110
111
+ id10111
112
+ id10112
113
+ id10113
114
+ id10114
115
+ id10115
116
+ id10116
117
+ id10117
118
+ id10118
119
+ id10119
120
+ id10120
121
+ id10121
122
+ id10122
123
+ id10123
124
+ id10124
125
+ id10125
126
+ id10126
127
+ id10127
128
+ id10128
129
+ id10129
130
+ id10130
131
+ id10131
132
+ id10132
133
+ id10133
134
+ id10134
135
+ id10135
136
+ id10136
137
+ id10137
138
+ id10138
139
+ id10139
140
+ id10140
141
+ id10141
142
+ id10142
143
+ id10143
144
+ id10144
145
+ id10145
146
+ id10146
147
+ id10147
148
+ id10148
149
+ id10149
150
+ id10150
151
+ id10151
152
+ id10152
153
+ id10153
154
+ id10154
155
+ id10155
156
+ id10156
157
+ id10157
158
+ id10158
159
+ id10159
160
+ id10160
161
+ id10161
162
+ id10162
163
+ id10163
164
+ id10164
165
+ id10165
166
+ id10166
167
+ id10167
168
+ id10168
169
+ id10169
170
+ id10170
171
+ id10171
172
+ id10172
173
+ id10173
174
+ id10174
175
+ id10175
176
+ id10176
177
+ id10177
178
+ id10178
179
+ id10179
180
+ id10180
181
+ id10181
182
+ id10182
183
+ id10183
184
+ id10184
185
+ id10185
186
+ id10186
187
+ id10187
188
+ id10188
189
+ id10189
190
+ id10190
191
+ id10191
192
+ id10192
193
+ id10193
194
+ id10194
195
+ id10195
196
+ id10196
197
+ id10197
198
+ id10198
199
+ id10199
200
+ id10200
201
+ id10201
202
+ id10202
203
+ id10203
204
+ id10204
205
+ id10205
206
+ id10206
207
+ id10207
208
+ id10208
209
+ id10209
210
+ id10210
211
+ id10211
212
+ id10212
213
+ id10213
214
+ id10214
215
+ id10215
216
+ id10216
217
+ id10217
218
+ id10218
219
+ id10219
220
+ id10220
221
+ id10221
222
+ id10222
223
+ id10223
224
+ id10224
225
+ id10225
226
+ id10226
227
+ id10227
228
+ id10228
229
+ id10229
230
+ id10230
231
+ id10231
232
+ id10232
233
+ id10233
234
+ id10234
235
+ id10235
236
+ id10236
237
+ id10237
238
+ id10238
239
+ id10239
240
+ id10240
241
+ id10241
242
+ id10242
243
+ id10243
244
+ id10244
245
+ id10245
246
+ id10246
247
+ id10247
248
+ id10248
249
+ id10249
250
+ id10250
251
+ id10251
252
+ id10252
253
+ id10253
254
+ id10254
255
+ id10255
256
+ id10256
257
+ id10257
258
+ id10258
259
+ id10259
260
+ id10260
261
+ id10261
262
+ id10262
263
+ id10263
264
+ id10264
265
+ id10265
266
+ id10266
267
+ id10267
268
+ id10268
269
+ id10269
270
+ id10270
271
+ id10271
272
+ id10272
273
+ id10273
274
+ id10274
275
+ id10275
276
+ id10276
277
+ id10277
278
+ id10278
279
+ id10279
280
+ id10280
281
+ id10281
282
+ id10282
283
+ id10283
284
+ id10284
285
+ id10285
286
+ id10286
287
+ id10287
288
+ id10288
289
+ id10289
290
+ id10290
291
+ id10291
292
+ id10292
293
+ id10293
294
+ id10294
295
+ id10295
296
+ id10296
297
+ id10297
298
+ id10298
299
+ id10299
300
+ id10300
301
+ id10301
302
+ id10302
303
+ id10303
304
+ id10304
305
+ id10305
306
+ id10306
307
+ id10307
308
+ id10308
309
+ id10309
310
+ id10310
311
+ id10311
312
+ id10312
313
+ id10313
314
+ id10314
315
+ id10315
316
+ id10316
317
+ id10317
318
+ id10318
319
+ id10319
320
+ id10320
321
+ id10321
322
+ id10322
323
+ id10323
324
+ id10324
325
+ id10325
326
+ id10326
327
+ id10327
328
+ id10328
329
+ id10329
330
+ id10330
331
+ id10331
332
+ id10332
333
+ id10333
334
+ id10334
335
+ id10335
336
+ id10336
337
+ id10337
338
+ id10338
339
+ id10339
340
+ id10340
341
+ id10341
342
+ id10342
343
+ id10343
344
+ id10344
345
+ id10345
346
+ id10346
347
+ id10347
348
+ id10348
349
+ id10349
350
+ id10350
351
+ id10351
352
+ id10352
353
+ id10353
354
+ id10354
355
+ id10355
356
+ id10356
357
+ id10357
358
+ id10358
359
+ id10359
360
+ id10360
361
+ id10361
362
+ id10362
363
+ id10363
364
+ id10364
365
+ id10365
366
+ id10366
367
+ id10367
368
+ id10368
369
+ id10369
370
+ id10370
371
+ id10371
372
+ id10372
373
+ id10373
374
+ id10374
375
+ id10375
376
+ id10376
377
+ id10377
378
+ id10378
379
+ id10379
380
+ id10380
381
+ id10381
382
+ id10382
383
+ id10383
384
+ id10384
385
+ id10385
386
+ id10386
387
+ id10387
388
+ id10388
389
+ id10389
390
+ id10390
391
+ id10391
392
+ id10392
393
+ id10393
394
+ id10394
395
+ id10395
396
+ id10396
397
+ id10397
398
+ id10398
399
+ id10399
400
+ id10400
401
+ id10401
402
+ id10402
403
+ id10403
404
+ id10404
405
+ id10405
406
+ id10406
407
+ id10407
408
+ id10408
409
+ id10409
410
+ id10410
411
+ id10411
412
+ id10412
413
+ id10413
414
+ id10414
415
+ id10415
416
+ id10416
417
+ id10417
418
+ id10418
419
+ id10419
420
+ id10420
421
+ id10421
422
+ id10422
423
+ id10423
424
+ id10424
425
+ id10425
426
+ id10426
427
+ id10427
428
+ id10428
429
+ id10429
430
+ id10430
431
+ id10431
432
+ id10432
433
+ id10433
434
+ id10434
435
+ id10435
436
+ id10436
437
+ id10437
438
+ id10438
439
+ id10439
440
+ id10440
441
+ id10441
442
+ id10442
443
+ id10443
444
+ id10444
445
+ id10445
446
+ id10446
447
+ id10447
448
+ id10448
449
+ id10449
450
+ id10450
451
+ id10451
452
+ id10452
453
+ id10453
454
+ id10454
455
+ id10455
456
+ id10456
457
+ id10457
458
+ id10458
459
+ id10459
460
+ id10460
461
+ id10461
462
+ id10462
463
+ id10463
464
+ id10464
465
+ id10465
466
+ id10466
467
+ id10467
468
+ id10468
469
+ id10469
470
+ id10470
471
+ id10471
472
+ id10472
473
+ id10473
474
+ id10474
475
+ id10475
476
+ id10476
477
+ id10477
478
+ id10478
479
+ id10479
480
+ id10480
481
+ id10481
482
+ id10482
483
+ id10483
484
+ id10484
485
+ id10485
486
+ id10486
487
+ id10487
488
+ id10488
489
+ id10489
490
+ id10490
491
+ id10491
492
+ id10492
493
+ id10493
494
+ id10494
495
+ id10495
496
+ id10496
497
+ id10497
498
+ id10498
499
+ id10499
500
+ id10500
501
+ id10501
502
+ id10502
503
+ id10503
504
+ id10504
505
+ id10505
506
+ id10506
507
+ id10507
508
+ id10508
509
+ id10509
510
+ id10510
511
+ id10511
512
+ id10512
513
+ id10513
514
+ id10514
515
+ id10515
516
+ id10516
517
+ id10517
518
+ id10518
519
+ id10519
520
+ id10520
521
+ id10521
522
+ id10522
523
+ id10523
524
+ id10524
525
+ id10525
526
+ id10526
527
+ id10527
528
+ id10528
529
+ id10529
530
+ id10530
531
+ id10531
532
+ id10532
533
+ id10533
534
+ id10534
535
+ id10535
536
+ id10536
537
+ id10537
538
+ id10538
539
+ id10539
540
+ id10540
541
+ id10541
542
+ id10542
543
+ id10543
544
+ id10544
545
+ id10545
546
+ id10546
547
+ id10547
548
+ id10548
549
+ id10549
550
+ id10550
551
+ id10551
552
+ id10552
553
+ id10553
554
+ id10554
555
+ id10555
556
+ id10556
557
+ id10557
558
+ id10558
559
+ id10559
560
+ id10560
561
+ id10561
562
+ id10562
563
+ id10563
564
+ id10564
565
+ id10565
566
+ id10566
567
+ id10567
568
+ id10568
569
+ id10569
570
+ id10570
571
+ id10571
572
+ id10572
573
+ id10573
574
+ id10574
575
+ id10575
576
+ id10576
577
+ id10577
578
+ id10578
579
+ id10579
580
+ id10580
581
+ id10581
582
+ id10582
583
+ id10583
584
+ id10584
585
+ id10585
586
+ id10586
587
+ id10587
588
+ id10588
589
+ id10589
590
+ id10590
591
+ id10591
592
+ id10592
593
+ id10593
594
+ id10594
595
+ id10595
596
+ id10596
597
+ id10597
598
+ id10598
599
+ id10599
600
+ id10600
601
+ id10601
602
+ id10602
603
+ id10603
604
+ id10604
605
+ id10605
606
+ id10606
607
+ id10607
608
+ id10608
609
+ id10609
610
+ id10610
611
+ id10611
612
+ id10612
613
+ id10613
614
+ id10614
615
+ id10615
616
+ id10616
617
+ id10617
618
+ id10618
619
+ id10619
620
+ id10620
621
+ id10621
622
+ id10622
623
+ id10623
624
+ id10624
625
+ id10625
626
+ id10626
627
+ id10627
628
+ id10628
629
+ id10629
630
+ id10630
631
+ id10631
632
+ id10632
633
+ id10633
634
+ id10634
635
+ id10635
636
+ id10636
637
+ id10637
638
+ id10638
639
+ id10639
640
+ id10640
641
+ id10641
642
+ id10642
643
+ id10643
644
+ id10644
645
+ id10645
646
+ id10646
647
+ id10647
648
+ id10648
649
+ id10649
650
+ id10650
651
+ id10651
652
+ id10652
653
+ id10653
654
+ id10654
655
+ id10655
656
+ id10656
657
+ id10657
658
+ id10658
659
+ id10659
660
+ id10660
661
+ id10661
662
+ id10662
663
+ id10663
664
+ id10664
665
+ id10665
666
+ id10666
667
+ id10667
668
+ id10668
669
+ id10669
670
+ id10670
671
+ id10671
672
+ id10672
673
+ id10673
674
+ id10674
675
+ id10675
676
+ id10676
677
+ id10677
678
+ id10678
679
+ id10679
680
+ id10680
681
+ id10681
682
+ id10682
683
+ id10683
684
+ id10684
685
+ id10685
686
+ id10686
687
+ id10687
688
+ id10688
689
+ id10689
690
+ id10690
691
+ id10691
692
+ id10692
693
+ id10693
694
+ id10694
695
+ id10695
696
+ id10696
697
+ id10697
698
+ id10698
699
+ id10699
700
+ id10700
701
+ id10701
702
+ id10702
703
+ id10703
704
+ id10704
705
+ id10705
706
+ id10706
707
+ id10707
708
+ id10708
709
+ id10709
710
+ id10710
711
+ id10711
712
+ id10712
713
+ id10713
714
+ id10714
715
+ id10715
716
+ id10716
717
+ id10717
718
+ id10718
719
+ id10719
720
+ id10720
721
+ id10721
722
+ id10722
723
+ id10723
724
+ id10724
725
+ id10725
726
+ id10726
727
+ id10727
728
+ id10728
729
+ id10729
730
+ id10730
731
+ id10731
732
+ id10732
733
+ id10733
734
+ id10734
735
+ id10735
736
+ id10736
737
+ id10737
738
+ id10738
739
+ id10739
740
+ id10740
741
+ id10741
742
+ id10742
743
+ id10743
744
+ id10744
745
+ id10745
746
+ id10746
747
+ id10747
748
+ id10748
749
+ id10749
750
+ id10750
751
+ id10751
752
+ id10752
753
+ id10753
754
+ id10754
755
+ id10755
756
+ id10756
757
+ id10757
758
+ id10758
759
+ id10759
760
+ id10760
761
+ id10761
762
+ id10762
763
+ id10763
764
+ id10764
765
+ id10765
766
+ id10766
767
+ id10767
768
+ id10768
769
+ id10769
770
+ id10770
771
+ id10771
772
+ id10772
773
+ id10773
774
+ id10774
775
+ id10775
776
+ id10776
777
+ id10777
778
+ id10778
779
+ id10779
780
+ id10780
781
+ id10781
782
+ id10782
783
+ id10783
784
+ id10784
785
+ id10785
786
+ id10786
787
+ id10787
788
+ id10788
789
+ id10789
790
+ id10790
791
+ id10791
792
+ id10792
793
+ id10793
794
+ id10794
795
+ id10795
796
+ id10796
797
+ id10797
798
+ id10798
799
+ id10799
800
+ id10800
801
+ id10801
802
+ id10802
803
+ id10803
804
+ id10804
805
+ id10805
806
+ id10806
807
+ id10807
808
+ id10808
809
+ id10809
810
+ id10810
811
+ id10811
812
+ id10812
813
+ id10813
814
+ id10814
815
+ id10815
816
+ id10816
817
+ id10817
818
+ id10818
819
+ id10819
820
+ id10820
821
+ id10821
822
+ id10822
823
+ id10823
824
+ id10824
825
+ id10825
826
+ id10826
827
+ id10827
828
+ id10828
829
+ id10829
830
+ id10830
831
+ id10831
832
+ id10832
833
+ id10833
834
+ id10834
835
+ id10835
836
+ id10836
837
+ id10837
838
+ id10838
839
+ id10839
840
+ id10840
841
+ id10841
842
+ id10842
843
+ id10843
844
+ id10844
845
+ id10845
846
+ id10846
847
+ id10847
848
+ id10848
849
+ id10849
850
+ id10850
851
+ id10851
852
+ id10852
853
+ id10853
854
+ id10854
855
+ id10855
856
+ id10856
857
+ id10857
858
+ id10858
859
+ id10859
860
+ id10860
861
+ id10861
862
+ id10862
863
+ id10863
864
+ id10864
865
+ id10865
866
+ id10866
867
+ id10867
868
+ id10868
869
+ id10869
870
+ id10870
871
+ id10871
872
+ id10872
873
+ id10873
874
+ id10874
875
+ id10875
876
+ id10876
877
+ id10877
878
+ id10878
879
+ id10879
880
+ id10880
881
+ id10881
882
+ id10882
883
+ id10883
884
+ id10884
885
+ id10885
886
+ id10886
887
+ id10887
888
+ id10888
889
+ id10889
890
+ id10890
891
+ id10891
892
+ id10892
893
+ id10893
894
+ id10894
895
+ id10895
896
+ id10896
897
+ id10897
898
+ id10898
899
+ id10899
900
+ id10900
901
+ id10901
902
+ id10902
903
+ id10903
904
+ id10904
905
+ id10905
906
+ id10906
907
+ id10907
908
+ id10908
909
+ id10909
910
+ id10910
911
+ id10911
912
+ id10912
913
+ id10913
914
+ id10914
915
+ id10915
916
+ id10916
917
+ id10917
918
+ id10918
919
+ id10919
920
+ id10920
921
+ id10921
922
+ id10922
923
+ id10923
924
+ id10924
925
+ id10925
926
+ id10926
927
+ id10927
928
+ id10928
929
+ id10929
930
+ id10930
931
+ id10931
932
+ id10932
933
+ id10933
934
+ id10934
935
+ id10935
936
+ id10936
937
+ id10937
938
+ id10938
939
+ id10939
940
+ id10940
941
+ id10941
942
+ id10942
943
+ id10943
944
+ id10944
945
+ id10945
946
+ id10946
947
+ id10947
948
+ id10948
949
+ id10949
950
+ id10950
951
+ id10951
952
+ id10952
953
+ id10953
954
+ id10954
955
+ id10955
956
+ id10956
957
+ id10957
958
+ id10958
959
+ id10959
960
+ id10960
961
+ id10961
962
+ id10962
963
+ id10963
964
+ id10964
965
+ id10965
966
+ id10966
967
+ id10967
968
+ id10968
969
+ id10969
970
+ id10970
971
+ id10971
972
+ id10972
973
+ id10973
974
+ id10974
975
+ id10975
976
+ id10976
977
+ id10977
978
+ id10978
979
+ id10979
980
+ id10980
981
+ id10981
982
+ id10982
983
+ id10983
984
+ id10984
985
+ id10985
986
+ id10986
987
+ id10987
988
+ id10988
989
+ id10989
990
+ id10990
991
+ id10991
992
+ id10992
993
+ id10993
994
+ id10994
995
+ id10995
996
+ id10996
997
+ id10997
998
+ id10998
999
+ id10999
1000
+ id11000
1001
+ id11001
1002
+ id11002
1003
+ id11003
1004
+ id11004
1005
+ id11005
1006
+ id11006
1007
+ id11007
1008
+ id11008
1009
+ id11009
1010
+ id11010
1011
+ id11011
1012
+ id11012
1013
+ id11013
1014
+ id11014
1015
+ id11015
1016
+ id11016
1017
+ id11017
1018
+ id11018
1019
+ id11019
1020
+ id11020
1021
+ id11021
1022
+ id11022
1023
+ id11023
1024
+ id11024
1025
+ id11025
1026
+ id11026
1027
+ id11027
1028
+ id11028
1029
+ id11029
1030
+ id11030
1031
+ id11031
1032
+ id11032
1033
+ id11033
1034
+ id11034
1035
+ id11035
1036
+ id11036
1037
+ id11037
1038
+ id11038
1039
+ id11039
1040
+ id11040
1041
+ id11041
1042
+ id11042
1043
+ id11043
1044
+ id11044
1045
+ id11045
1046
+ id11046
1047
+ id11047
1048
+ id11048
1049
+ id11049
1050
+ id11050
1051
+ id11051
1052
+ id11052
1053
+ id11053
1054
+ id11054
1055
+ id11055
1056
+ id11056
1057
+ id11057
1058
+ id11058
1059
+ id11059
1060
+ id11060
1061
+ id11061
1062
+ id11062
1063
+ id11063
1064
+ id11064
1065
+ id11065
1066
+ id11066
1067
+ id11067
1068
+ id11068
1069
+ id11069
1070
+ id11070
1071
+ id11071
1072
+ id11072
1073
+ id11073
1074
+ id11074
1075
+ id11075
1076
+ id11076
1077
+ id11077
1078
+ id11078
1079
+ id11079
1080
+ id11080
1081
+ id11081
1082
+ id11082
1083
+ id11083
1084
+ id11084
1085
+ id11085
1086
+ id11086
1087
+ id11087
1088
+ id11088
1089
+ id11089
1090
+ id11090
1091
+ id11091
1092
+ id11092
1093
+ id11093
1094
+ id11094
1095
+ id11095
1096
+ id11096
1097
+ id11097
1098
+ id11098
1099
+ id11099
1100
+ id11100
1101
+ id11101
1102
+ id11102
1103
+ id11103
1104
+ id11104
1105
+ id11105
1106
+ id11106
1107
+ id11107
1108
+ id11108
1109
+ id11109
1110
+ id11110
1111
+ id11111
1112
+ id11112
1113
+ id11113
1114
+ id11114
1115
+ id11115
1116
+ id11116
1117
+ id11117
1118
+ id11118
1119
+ id11119
1120
+ id11120
1121
+ id11121
1122
+ id11122
1123
+ id11123
1124
+ id11124
1125
+ id11125
1126
+ id11126
1127
+ id11127
1128
+ id11128
1129
+ id11129
1130
+ id11130
1131
+ id11131
1132
+ id11132
1133
+ id11133
1134
+ id11134
1135
+ id11135
1136
+ id11136
1137
+ id11137
1138
+ id11138
1139
+ id11139
1140
+ id11140
1141
+ id11141
1142
+ id11142
1143
+ id11143
1144
+ id11144
1145
+ id11145
1146
+ id11146
1147
+ id11147
1148
+ id11148
1149
+ id11149
1150
+ id11150
1151
+ id11151
1152
+ id11152
1153
+ id11153
1154
+ id11154
1155
+ id11155
1156
+ id11156
1157
+ id11157
1158
+ id11158
1159
+ id11159
1160
+ id11160
1161
+ id11161
1162
+ id11162
1163
+ id11163
1164
+ id11164
1165
+ id11165
1166
+ id11166
1167
+ id11167
1168
+ id11168
1169
+ id11169
1170
+ id11170
1171
+ id11171
1172
+ id11172
1173
+ id11173
1174
+ id11174
1175
+ id11175
1176
+ id11176
1177
+ id11177
1178
+ id11178
1179
+ id11179
1180
+ id11180
1181
+ id11181
1182
+ id11182
1183
+ id11183
1184
+ id11184
1185
+ id11185
1186
+ id11186
1187
+ id11187
1188
+ id11188
1189
+ id11189
1190
+ id11190
1191
+ id11191
1192
+ id11192
1193
+ id11193
1194
+ id11194
1195
+ id11195
1196
+ id11196
1197
+ id11197
1198
+ id11198
1199
+ id11199
1200
+ id11200
1201
+ id11201
1202
+ id11202
1203
+ id11203
1204
+ id11204
1205
+ id11205
1206
+ id11206
1207
+ id11207
1208
+ id11208
1209
+ id11209
1210
+ id11210
1211
+ id11211
1212
+ id11212
1213
+ id11213
1214
+ id11214
1215
+ id11215
1216
+ id11216
1217
+ id11217
1218
+ id11218
1219
+ id11219
1220
+ id11220
1221
+ id11221
1222
+ id11222
1223
+ id11223
1224
+ id11224
1225
+ id11225
1226
+ id11226
1227
+ id11227
1228
+ id11228
1229
+ id11229
1230
+ id11230
1231
+ id11231
1232
+ id11232
1233
+ id11233
1234
+ id11234
1235
+ id11235
1236
+ id11236
1237
+ id11237
1238
+ id11238
1239
+ id11239
1240
+ id11240
1241
+ id11241
1242
+ id11242
1243
+ id11243
1244
+ id11244
1245
+ id11245
1246
+ id11246
1247
+ id11247
1248
+ id11248
1249
+ id11249
1250
+ id11250
1251
+ id11251
models/ailia-models/code/auto_speech.py ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import time
4
+ import random
5
+
6
+ import numpy as np
7
+ import librosa
8
+
9
+ import ailia
10
+
11
+ # import original modules
12
+ sys.path.append('../../util')
13
+ from arg_utils import get_base_parser, update_parser # noqa: E402
14
+ from model_utils import check_and_download_models # noqa: E402
15
+
16
+ # logger
17
+ from logging import getLogger # noqa: E402
18
+
19
+ logger = getLogger(__name__)
20
+
21
+ # ======================
22
+ # Parameters
23
+ # ======================
24
+
25
+ WEIGHT_IDENT_PATH = './proposed_iden.onnx'
26
+ MODEL_IDENT_PATH = './proposed_iden.onnx.prototxt'
27
+ WEIGHT_CLASSIFIER_PATH = './proposed_classifier.onnx'
28
+ MODEL_CLASSIFIER_PATH = './proposed_classifier.onnx.prototxt'
29
+ WEIGHT_VERI_PATH = './proposed_veri.onnx'
30
+ MODEL_VERI_PATH = './proposed_veri.onnx.prototxt'
31
+ REMOTE_PATH = \
32
+ 'https://storage.googleapis.com/ailia-models/auto_speech/'
33
+
34
+ WAVE_PATH = "wav/id10283/oGZsanLiXsY/00004.wav"
35
+
36
+ # Audio
37
+ SAMPLING_RATE = 16000
38
+
39
+ # Mel-filterbank
40
+ WINDOW_LENGTH = 25 # In milliseconds
41
+ WINDOW_STEP = 10 # In milliseconds
42
+ N_FFT = 512
43
+
44
+ # Audio volume normalization
45
+ AUDIO_NORM_TARGET_dBFS = -30
46
+
47
+ THRESHOLD = 0.26
48
+
49
+ INT16_MAX = (2 ** 15) - 1
50
+
51
+ # ======================
52
+ # Arguemnt Parser Config
53
+ # ======================
54
+
55
+ parser = get_base_parser(
56
+ 'AutoSpeech', WAVE_PATH, None, input_ftype='audio'
57
+ )
58
+ parser.add_argument(
59
+ '-i1', '--input1', metavar='WAV', default=None,
60
+ help='Specify an wav file to compare with the input2 wav. (verification mode)'
61
+ )
62
+ parser.add_argument(
63
+ '-i2', '--input2', metavar='WAV', default=None,
64
+ help='Specify an wav file to compare with the input1 wav. (verification mode)'
65
+ )
66
+ parser.add_argument(
67
+ '-th', '--threshold',
68
+ default=THRESHOLD, type=float,
69
+ help='The similar threshold for verification.'
70
+ )
71
+ args = update_parser(parser)
72
+
73
+
74
+ # ======================
75
+ # Secondaty Functions
76
+ # ======================
77
+
78
+ def read_wave(path):
79
+ # prepare input data
80
+ wav, source_sr = librosa.load(path, sr=None)
81
+ # Resample the wav if needed
82
+ if source_sr is not None and source_sr != SAMPLING_RATE:
83
+ wav = librosa.resample(wav, source_sr, SAMPLING_RATE)
84
+
85
+ return wav
86
+
87
+
88
+ def voxceleb1_ids():
89
+ with open("VoxCeleb1_ids.txt") as f:
90
+ ids = [x.strip() for x in f]
91
+
92
+ return ids
93
+
94
+
95
+ def normalize_volume(wav, target_dBFS, increase_only=False, decrease_only=False):
96
+ if increase_only and decrease_only:
97
+ raise ValueError("Both increase only and decrease only are set")
98
+ rms = np.sqrt(np.mean((wav * INT16_MAX) ** 2))
99
+ wave_dBFS = 20 * np.log10(rms / INT16_MAX)
100
+ dBFS_change = target_dBFS - wave_dBFS
101
+ if dBFS_change < 0 and increase_only or dBFS_change > 0 and decrease_only:
102
+ return wav
103
+ return wav * (10 ** (dBFS_change / 20))
104
+
105
+
106
+ def wav_to_spectrogram(wav):
107
+ frames = np.abs(librosa.core.stft(
108
+ wav,
109
+ n_fft=N_FFT,
110
+ hop_length=int(SAMPLING_RATE * WINDOW_STEP / 1000),
111
+ win_length=int(SAMPLING_RATE * WINDOW_LENGTH / 1000),
112
+ ))
113
+ return frames.astype(np.float32).T
114
+
115
+
116
+ def generate_sequence(feature, partial_n_frames, shift=None):
117
+ while feature.shape[0] <= partial_n_frames:
118
+ feature = np.repeat(feature, 2, axis=0)
119
+ if shift is None:
120
+ shift = partial_n_frames // 2
121
+ test_sequence = []
122
+ start = 0
123
+ while start + partial_n_frames <= feature.shape[0]:
124
+ test_sequence.append(feature[start: start + partial_n_frames])
125
+ start += shift
126
+ test_sequence = np.stack(test_sequence, axis=0)
127
+ return test_sequence
128
+
129
+
130
+ def cosine_similar(a, b, data_is_normalized=False):
131
+ if not data_is_normalized:
132
+ a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
133
+ b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
134
+ return np.dot(a, b.T)
135
+
136
+
137
+ # ======================
138
+ # Main functions
139
+ # ======================
140
+
141
+ def preprocess(wav):
142
+ wav = normalize_volume(wav, AUDIO_NORM_TARGET_dBFS, increase_only=True)
143
+ feature = wav_to_spectrogram(wav)
144
+
145
+ sequence = generate_sequence(feature, partial_n_frames=300)
146
+
147
+ mean = np.load('mean.npy')
148
+ std = np.load('std.npy')
149
+ sequence = (sequence - mean) / std
150
+ # if random.random() < 0.5:
151
+ # sequence = np.flip(sequence, axis=0).copy()
152
+
153
+ return sequence
154
+
155
+
156
+ def predict(wav, net, net_classifier=None):
157
+ # initial preprocesses
158
+ sequence = preprocess(wav)
159
+
160
+ # feedforward
161
+ output = net.predict([sequence])
162
+ output = output[0]
163
+
164
+ output = np.mean(output, axis=0, keepdims=True)
165
+
166
+ if not net_classifier:
167
+ return output
168
+
169
+ output = net_classifier.predict([output])
170
+ output = output[0]
171
+
172
+ idx = np.argsort(output[0])[::-1]
173
+
174
+ return idx
175
+
176
+
177
+ def eval_identification(net, net_classifier):
178
+ ids = voxceleb1_ids()
179
+
180
+ for input_path in args.input:
181
+ logger.info(f'input: {input_path}')
182
+
183
+ # prepare input data
184
+ wav = read_wave(input_path)
185
+
186
+ # inference
187
+ logger.info('Start inference...')
188
+ if args.benchmark:
189
+ logger.info('BENCHMARK mode')
190
+ for i in range(5):
191
+ start = int(round(time.time() * 1000))
192
+ idx = predict(wav, net, net_classifier)
193
+ end = int(round(time.time() * 1000))
194
+ logger.info(f'\tailia processing time {end - start} ms')
195
+ else:
196
+ idx = predict(wav, net, net_classifier)
197
+
198
+ logger.info(' Top5: %s' % ', '.join([ids[i] for i in idx[:5]]))
199
+
200
+ logger.info('Script finished successfully.')
201
+
202
+
203
+ def eval_verification(net):
204
+ threshold = args.threshold
205
+ input1 = args.input1
206
+ input2 = args.input2
207
+
208
+ if input1 is None:
209
+ logger.error('input1 is not specified')
210
+ sys.exit(-1)
211
+ elif not os.path.isfile(input1):
212
+ logger.error('specified input1 is not file path nor directory path')
213
+ sys.exit(-1)
214
+ if input2 is None:
215
+ logger.error('input2 is not specified')
216
+ sys.exit(-1)
217
+ elif not os.path.isfile(input2):
218
+ logger.error('specified input2 is not file path nor directory path')
219
+ sys.exit(-1)
220
+
221
+ logger.info(f'input1: {input1}')
222
+ logger.info(f'input2: {input2}')
223
+
224
+ # prepare input data
225
+ wav1 = read_wave(args.input1)
226
+ wav2 = read_wave(args.input2)
227
+
228
+ # inference
229
+ logger.info('Start inference...')
230
+ if args.benchmark:
231
+ logger.info('BENCHMARK mode')
232
+ for i in range(5):
233
+ start = int(round(time.time() * 1000))
234
+ output = predict(wav1, net)
235
+ output2 = predict(wav2, net)
236
+ end = int(round(time.time() * 1000))
237
+ logger.info(f'\tailia processing time {end - start} ms')
238
+ else:
239
+ output = predict(wav1, net)
240
+ output2 = predict(wav2, net)
241
+
242
+ similar = cosine_similar(output, output2)
243
+ logger.info(' similar: %.8f' % similar[0])
244
+ logger.info(' verification: %s (threshold: %.3f)' %
245
+ ('match' if similar[0] >= threshold else 'unmatch', threshold))
246
+
247
+ logger.info('Script finished successfully.')
248
+
249
+
250
+ def main():
251
+ # model files check and download
252
+ if args.input1 or args.input2:
253
+ check_and_download_models(WEIGHT_VERI_PATH, MODEL_VERI_PATH, REMOTE_PATH)
254
+ else:
255
+ logger.info('Checking identification model...')
256
+ check_and_download_models(WEIGHT_IDENT_PATH, MODEL_IDENT_PATH, REMOTE_PATH)
257
+ logger.info('Checking classification model...')
258
+ check_and_download_models(WEIGHT_CLASSIFIER_PATH, MODEL_CLASSIFIER_PATH, REMOTE_PATH)
259
+
260
+ env_id = args.env_id
261
+
262
+ if args.input1 or args.input2:
263
+ net = ailia.Net(MODEL_VERI_PATH, WEIGHT_VERI_PATH, env_id=env_id)
264
+
265
+ eval_verification(net)
266
+ else:
267
+ # initialize
268
+ net = ailia.Net(MODEL_IDENT_PATH, WEIGHT_IDENT_PATH, env_id=env_id)
269
+ net_classifier = ailia.Net(MODEL_CLASSIFIER_PATH, WEIGHT_CLASSIFIER_PATH, env_id=env_id)
270
+
271
+ eval_identification(net, net_classifier)
272
+
273
+
274
+ if __name__ == '__main__':
275
+ main()
models/ailia-models/code/mean.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb3a4a40ee2f3a8b306d6abf0928de4137264b22ceff4ca70f761de3a1ddba18
3
+ size 1156
models/ailia-models/code/std.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fe00c0f076c78b087e1ef31b13aa38405f6643b3888a8ceb166dc58b4b51dd0
3
+ size 1156
models/ailia-models/code/wav/id10270/8jEAjG6SegY/00008.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59bb3268a431cc6fe39db2b4d4b93bd1156899c6f454a538af83d6faf8490d71
3
+ size 217646
models/ailia-models/code/wav/id10270/x6uYqmx31kE/00001.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7cd1e61e84e78b3a452756f18e993926f81b0fea9b2d57da26105709d03ec4e
3
+ size 181806
models/ailia-models/code/wav/id10282/zGjY8J48FoE/00004.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3be10dee00f0fe7bec730ba89e3fe302b9e1bf3e356d3949c3e53dfdf11b826f
3
+ size 154926
models/ailia-models/code/wav/id10283/oGZsanLiXsY/00004.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f1f4ba2ccbd76ec81f89519a604dbf0435e2b701ffba8dacb034f5324e4aea3
3
+ size 193326
models/ailia-models/code/wav/id10291/oLJhH76d0kU/00004.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54b40b7584e581ad9e7e1d8c15adb29762a7fec670f7fd75f0fbf8b3414c65f4
3
+ size 183086
models/ailia-models/proposed_classifier.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:334c81f1d939a361affbdf933d5d9c035971cb7f07962c2d6a1fc8444b8cff59
3
+ size 10253474
models/ailia-models/proposed_classifier.onnx.prototxt ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ir_version: 6
2
+ producer_name: "pytorch"
3
+ producer_version: "1.9"
4
+ model_version: 0
5
+ graph {
6
+ name: "torch-jit-export"
7
+ node {
8
+ input: "input"
9
+ input: "classifier.weight"
10
+ input: "classifier.bias"
11
+ output: "output"
12
+ name: "Gemm_0"
13
+ op_type: "Gemm"
14
+ attribute {
15
+ name: "alpha"
16
+ f: 1.0
17
+ type: FLOAT
18
+ }
19
+ attribute {
20
+ name: "beta"
21
+ f: 1.0
22
+ type: FLOAT
23
+ }
24
+ attribute {
25
+ name: "transB"
26
+ i: 1
27
+ type: INT
28
+ }
29
+ }
30
+ initializer {
31
+ dims: 1251
32
+ dims: 2048
33
+ data_type: 1
34
+ name: "classifier.weight"
35
+ }
36
+ initializer {
37
+ dims: 1251
38
+ data_type: 1
39
+ name: "classifier.bias"
40
+ }
41
+ input {
42
+ name: "input"
43
+ type {
44
+ tensor_type {
45
+ elem_type: 1
46
+ shape {
47
+ dim {
48
+ dim_param: "n"
49
+ }
50
+ dim {
51
+ dim_value: 2048
52
+ }
53
+ }
54
+ }
55
+ }
56
+ }
57
+ output {
58
+ name: "output"
59
+ type {
60
+ tensor_type {
61
+ elem_type: 1
62
+ shape {
63
+ dim {
64
+ dim_param: "n"
65
+ }
66
+ dim {
67
+ dim_value: 1251
68
+ }
69
+ }
70
+ }
71
+ }
72
+ }
73
+ }
74
+ opset_import {
75
+ version: 11
76
+ }
models/ailia-models/proposed_iden.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3ca35732d8784a15c8fb6bd435e3c038eb4f62ed8a634b1a70e24df1b940ca9
3
+ size 60384076
models/ailia-models/proposed_iden.onnx.prototxt ADDED
The diff for this file is too large to render. See raw diff
 
models/ailia-models/proposed_veri.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88a24fe7ddee235626f57ab53a89cf8f6b86d4a563b61bf87072c500b3855c74
3
+ size 60384096
models/ailia-models/proposed_veri.onnx.prototxt ADDED
The diff for this file is too large to render. See raw diff
 
models/ailia-models/source.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ https://github.com/axinc-ai/ailia-models/tree/master/audio_processing/auto_speech
2
+
3
+ https://storage.googleapis.com/ailia-models/auto_speech/proposed_iden.onnx
4
+ https://storage.googleapis.com/ailia-models/auto_speech/proposed_iden.onnx.prototxt
5
+
6
+ https://storage.googleapis.com/ailia-models/auto_speech/proposed_classifier.onnx
7
+ https://storage.googleapis.com/ailia-models/auto_speech/proposed_classifier.onnx.prototxt
8
+
9
+ https://storage.googleapis.com/ailia-models/auto_speech/proposed_veri.onnx
10
+ https://storage.googleapis.com/ailia-models/auto_speech/proposed_veri.onnx.prototxt