|
|
""" |
|
|
Example usage of the ONNX NanoCodec decoder |
|
|
""" |
|
|
|
|
|
import numpy as np |
|
|
import onnxruntime as ort |
|
|
|
|
|
def example_basic_inference(): |
|
|
"""Basic ONNX inference example""" |
|
|
print("Loading ONNX model...") |
|
|
|
|
|
session = ort.InferenceSession( |
|
|
"nano_codec_decoder.onnx", |
|
|
providers=["CUDAExecutionProvider", "CPUExecutionProvider"] |
|
|
) |
|
|
|
|
|
print(f"Providers: {session.get_providers()}") |
|
|
|
|
|
|
|
|
num_frames = 10 |
|
|
tokens = np.random.randint(0, 500, (1, 4, num_frames), dtype=np.int64) |
|
|
tokens_len = np.array([num_frames], dtype=np.int64) |
|
|
|
|
|
print(f"\nInput tokens: {tokens.shape}") |
|
|
|
|
|
|
|
|
outputs = session.run( |
|
|
None, |
|
|
{"tokens": tokens, "tokens_len": tokens_len} |
|
|
) |
|
|
|
|
|
audio, audio_len = outputs |
|
|
print(f"Output audio: {audio.shape}") |
|
|
print(f"Audio duration: {audio.shape[1] / 22050:.2f} seconds") |
|
|
|
|
|
return audio |
|
|
|
|
|
|
|
|
def example_with_decoder_class(): |
|
|
"""Example using the decoder class""" |
|
|
from onnx_decoder_optimized import ONNXKaniTTSDecoderOptimized |
|
|
|
|
|
print("Initializing decoder...") |
|
|
decoder = ONNXKaniTTSDecoderOptimized( |
|
|
onnx_model_path="nano_codec_decoder.onnx", |
|
|
device="cuda" |
|
|
) |
|
|
|
|
|
|
|
|
print("\nDecoding frames...") |
|
|
for i in range(5): |
|
|
codes = [np.random.randint(0, 500) for _ in range(4)] |
|
|
audio = decoder.decode_frame(codes) |
|
|
print(f" Frame {i+1}: {audio.shape} samples") |
|
|
|
|
|
decoder.reset_history() |
|
|
print("✓ Decoding complete") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
print("="*60) |
|
|
print("ONNX NanoCodec Decoder Examples") |
|
|
print("="*60) |
|
|
|
|
|
|
|
|
print("\n[1/2] Basic inference...") |
|
|
example_basic_inference() |
|
|
|
|
|
|
|
|
print("\n[2/2] Using decoder class...") |
|
|
example_with_decoder_class() |
|
|
|
|
|
print("\n" + "="*60) |
|
|
print("Examples complete!") |
|
|
print("="*60) |
|
|
|