File size: 1,237 Bytes
fca8ec9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58f1fb3
fca8ec9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58f1fb3
fca8ec9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
{
  "compute_precision": "float32",
  "context_recp": 7,
  "conv_delay": 9,
  "decoder_layers": 2,
  "encoder_conv_cache_len": 15,
  "encoder_dim": 256,
  "encoder_layers": 4,
  "feat_type": "logmel23_cummn",
  "frame_hz": 10.0,
  "full_output_dim": 9,
  "head_dim": 64,
  "hop_length": 80,
  "input_dim": 345,
  "key_dim": 64,
  "max_nspks": 9,
  "max_speakers": 7,
  "mixed_fp16_exclude_markers": [
    "model.dec.",
    "dec_ret",
    "candidate_dec",
    "attractor",
    "full_logits",
    "decode",
    "convert"
  ],
  "mixed_fp16_include_markers": [
    "model.enc.",
    "model.cnn.",
    "enc_ret_",
    "enc_conv_cache"
  ],
  "n_fft": 1024,
  "n_mels": 23,
  "num_heads": 4,
  "real_output_dim": 7,
  "sample_rate": 8000,
  "state_shapes": {
    "dec_ret_kv": [
      2,
      9,
      4,
      64,
      64
    ],
    "dec_ret_scale": [
      2,
      9,
      4
    ],
    "enc_conv_cache": [
      4,
      1,
      15,
      256
    ],
    "enc_ret_kv": [
      4,
      1,
      4,
      64,
      64
    ],
    "enc_ret_scale": [
      4,
      1,
      4
    ],
    "top_buffer": [
      1,
      19,
      256
    ]
  },
  "subsampling": 10,
  "target_sample_rate": 8000,
  "top_buffer_len": 19,
  "win_length": 200
}