name: "similarity_model" # Or whatever you call this model
backend: "python"
max_batch_size: 8

# Input tensors are now raw audio bytes
input [
  {
    name: "AUDIO_BYTES_1"
    data_type: TYPE_STRING # TYPE_STRING is used for variable-length binary data
    dims: [ 1 ]
  },
  {
    name: "AUDIO_BYTES_2"
    data_type: TYPE_STRING
    dims: [ 1 ]
  }
]

# Output is a single similarity score
output [
  {
    name: "SIMILARITY"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]