File size: 1,894 Bytes
83c67ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
{
  "model_id": "nvidia/parakeet-rnnt-0.6b",
  "model_type": "parakeet_rnnt",
  "language": "",
  "sample_rate": 16000,
  "max_audio_seconds": 15.0,
  "max_audio_samples": 240000,
  "vocab_size": 1024,
  "blank_id": 1024,
  "checkpoint": {
    "type": "pretrained",
    "model_id": "nvidia/parakeet-rnnt-0.6b"
  },
  "coreml": {
    "compute_precision": "FLOAT32",
    "quantization": "none"
  },
  "components": {
    "mel_encoder": {
      "path": "parakeet_mel_encoder.mlpackage",
      "inputs": {
        "audio_signal": [
          1,
          240000
        ],
        "audio_length": [
          1
        ]
      },
      "outputs": {
        "encoder": [
          1,
          1024,
          188
        ],
        "encoder_length": [
          1
        ]
      }
    },
    "decoder": {
      "path": "parakeet_decoder.mlpackage",
      "inputs": {
        "targets": [
          1,
          1
        ],
        "target_length": [
          1
        ],
        "h_in": [
          2,
          1,
          640
        ],
        "c_in": [
          2,
          1,
          640
        ]
      },
      "outputs": {
        "decoder": [
          1,
          640,
          1
        ],
        "h_out": [
          2,
          1,
          640
        ],
        "c_out": [
          2,
          1,
          640
        ]
      }
    },
    "joint_decision_single_step": {
      "path": "parakeet_joint_decision_single_step.mlpackage",
      "inputs": {
        "encoder_step": [
          1,
          1024,
          1
        ],
        "decoder_step": [
          1,
          640,
          1
        ]
      },
      "outputs": {
        "token_id": [
          1,
          1,
          1
        ],
        "token_prob": [
          1,
          1,
          1
        ],
        "duration": [
          1,
          1,
          1
        ]
      }
    }
  }
}