Fixed incorrect speaker counts in the metadata
Browse filesThe old speaker limit seemed to have been missing or something, but the numbers that appeared in the metadata were way higher than they should've been
AMI/ls_eend_ami_step.json
CHANGED
|
@@ -14,6 +14,7 @@
|
|
| 14 |
"input_dim": 345,
|
| 15 |
"key_dim": 64,
|
| 16 |
"max_nspks": 6,
|
|
|
|
| 17 |
"mixed_fp16_exclude_markers": [
|
| 18 |
"model.dec.",
|
| 19 |
"dec_ret",
|
|
@@ -29,7 +30,7 @@
|
|
| 29 |
"enc_ret_",
|
| 30 |
"enc_conv_cache"
|
| 31 |
],
|
| 32 |
-
"n_fft":
|
| 33 |
"n_mels": 23,
|
| 34 |
"num_heads": 4,
|
| 35 |
"real_output_dim": 4,
|
|
|
|
| 14 |
"input_dim": 345,
|
| 15 |
"key_dim": 64,
|
| 16 |
"max_nspks": 6,
|
| 17 |
+
"max_speakers": 4,
|
| 18 |
"mixed_fp16_exclude_markers": [
|
| 19 |
"model.dec.",
|
| 20 |
"dec_ret",
|
|
|
|
| 30 |
"enc_ret_",
|
| 31 |
"enc_conv_cache"
|
| 32 |
],
|
| 33 |
+
"n_fft": 1024,
|
| 34 |
"n_mels": 23,
|
| 35 |
"num_heads": 4,
|
| 36 |
"real_output_dim": 4,
|
CALLHOME/ls_eend_callhome_step.json
CHANGED
|
@@ -14,6 +14,7 @@
|
|
| 14 |
"input_dim": 345,
|
| 15 |
"key_dim": 64,
|
| 16 |
"max_nspks": 9,
|
|
|
|
| 17 |
"mixed_fp16_exclude_markers": [
|
| 18 |
"model.dec.",
|
| 19 |
"dec_ret",
|
|
@@ -29,7 +30,7 @@
|
|
| 29 |
"enc_ret_",
|
| 30 |
"enc_conv_cache"
|
| 31 |
],
|
| 32 |
-
"n_fft":
|
| 33 |
"n_mels": 23,
|
| 34 |
"num_heads": 4,
|
| 35 |
"real_output_dim": 7,
|
|
|
|
| 14 |
"input_dim": 345,
|
| 15 |
"key_dim": 64,
|
| 16 |
"max_nspks": 9,
|
| 17 |
+
"max_speakers": 7,
|
| 18 |
"mixed_fp16_exclude_markers": [
|
| 19 |
"model.dec.",
|
| 20 |
"dec_ret",
|
|
|
|
| 30 |
"enc_ret_",
|
| 31 |
"enc_conv_cache"
|
| 32 |
],
|
| 33 |
+
"n_fft": 1024,
|
| 34 |
"n_mels": 23,
|
| 35 |
"num_heads": 4,
|
| 36 |
"real_output_dim": 7,
|
DIHARD II/ls_eend_dih2_step.json
CHANGED
|
@@ -14,6 +14,7 @@
|
|
| 14 |
"input_dim": 345,
|
| 15 |
"key_dim": 64,
|
| 16 |
"max_nspks": 12,
|
|
|
|
| 17 |
"mixed_fp16_exclude_markers": [
|
| 18 |
"model.dec.",
|
| 19 |
"dec_ret",
|
|
@@ -29,7 +30,7 @@
|
|
| 29 |
"enc_ret_",
|
| 30 |
"enc_conv_cache"
|
| 31 |
],
|
| 32 |
-
"n_fft":
|
| 33 |
"n_mels": 23,
|
| 34 |
"num_heads": 4,
|
| 35 |
"real_output_dim": 10,
|
|
|
|
| 14 |
"input_dim": 345,
|
| 15 |
"key_dim": 64,
|
| 16 |
"max_nspks": 12,
|
| 17 |
+
"max_speakers": 10,
|
| 18 |
"mixed_fp16_exclude_markers": [
|
| 19 |
"model.dec.",
|
| 20 |
"dec_ret",
|
|
|
|
| 30 |
"enc_ret_",
|
| 31 |
"enc_conv_cache"
|
| 32 |
],
|
| 33 |
+
"n_fft": 1024,
|
| 34 |
"n_mels": 23,
|
| 35 |
"num_heads": 4,
|
| 36 |
"real_output_dim": 10,
|
DIHARD III/ls_eend_dih3_step.json
CHANGED
|
@@ -14,6 +14,7 @@
|
|
| 14 |
"input_dim": 345,
|
| 15 |
"key_dim": 64,
|
| 16 |
"max_nspks": 12,
|
|
|
|
| 17 |
"mixed_fp16_exclude_markers": [
|
| 18 |
"model.dec.",
|
| 19 |
"dec_ret",
|
|
@@ -29,7 +30,7 @@
|
|
| 29 |
"enc_ret_",
|
| 30 |
"enc_conv_cache"
|
| 31 |
],
|
| 32 |
-
"n_fft":
|
| 33 |
"n_mels": 23,
|
| 34 |
"num_heads": 4,
|
| 35 |
"real_output_dim": 10,
|
|
|
|
| 14 |
"input_dim": 345,
|
| 15 |
"key_dim": 64,
|
| 16 |
"max_nspks": 12,
|
| 17 |
+
"max_speakers": 10,
|
| 18 |
"mixed_fp16_exclude_markers": [
|
| 19 |
"model.dec.",
|
| 20 |
"dec_ret",
|
|
|
|
| 30 |
"enc_ret_",
|
| 31 |
"enc_conv_cache"
|
| 32 |
],
|
| 33 |
+
"n_fft": 1024,
|
| 34 |
"n_mels": 23,
|
| 35 |
"num_heads": 4,
|
| 36 |
"real_output_dim": 10,
|