name: "similarity_model" # Or whatever you call this model backend: "python" max_batch_size: 8 # Input tensors are now raw audio bytes input [ { name: "AUDIO_BYTES_1" data_type: TYPE_STRING # TYPE_STRING is used for variable-length binary data dims: [ 1 ] }, { name: "AUDIO_BYTES_2" data_type: TYPE_STRING dims: [ 1 ] } ] # Output is a single similarity score output [ { name: "SIMILARITY" data_type: TYPE_FP32 dims: [ 1 ] } ]