{ "format_version": 1, "checkpoint": "/home/ubuntu/.cache/huggingface/hub/models--Aratako--Irodori-TTS-500M-v2-VoiceDesign/snapshots/456e55708e7183f5c7faa1448209d54aa8991451/model.safetensors", "opset_version": 18, "model": { "latent_dim": 32, "latent_patch_size": 1, "patched_latent_dim": 32, "use_speaker_condition": false, "use_caption_condition": true }, "tokenizers": { "text_repo": "llm-jp/llm-jp-3-150m", "text_add_bos": true, "caption_repo": "llm-jp/llm-jp-3-150m", "caption_add_bos": true }, "rfdit": { "context_path": "rfdit_context_fp32_30s.onnx", "step_path": "rfdit_step_fp32_30s.onnx", "text_len": 256, "caption_len": 512, "ref_steps": 64, "ref_dim": 32, "max_patched_steps": 750, "has_speaker": false, "has_caption": true, "step_buckets": { "1": { "step_path": "rfdit_step_fp32_1s.onnx", "seconds": 1.0, "latent_steps": 25, "max_patched_steps": 25, "step_migraphx_path": "migraphx/rfdit_step_fp32_1s.mxr" }, "2": { "step_path": "rfdit_step_fp32_2s.onnx", "seconds": 2.0, "latent_steps": 50, "max_patched_steps": 50, "step_migraphx_path": "migraphx/rfdit_step_fp32_2s.mxr" }, "4": { "step_path": "rfdit_step_fp32_4s.onnx", "seconds": 4.0, "latent_steps": 100, "max_patched_steps": 100, "step_migraphx_path": "migraphx/rfdit_step_fp32_4s.mxr" }, "8": { "step_path": "rfdit_step_fp32_8s.onnx", "seconds": 8.0, "latent_steps": 200, "max_patched_steps": 200, "step_migraphx_path": "migraphx/rfdit_step_fp32_8s.mxr" }, "12": { "step_path": "rfdit_step_fp32_12s.onnx", "seconds": 12.0, "latent_steps": 300, "max_patched_steps": 300, "step_migraphx_path": "migraphx/rfdit_step_fp32_12s.mxr" }, "16": { "step_path": "rfdit_step_fp32_16s.onnx", "seconds": 16.0, "latent_steps": 400, "max_patched_steps": 400, "step_migraphx_path": "migraphx/rfdit_step_fp32_16s.mxr" }, "24": { "step_path": "rfdit_step_fp32_24s.onnx", "seconds": 24.0, "latent_steps": 600, "max_patched_steps": 600, "step_migraphx_path": "migraphx/rfdit_step_fp32_24s.mxr" }, "30": { "step_path": "rfdit_step_fp32_30s.onnx", "seconds": 30.0, "latent_steps": 750, "max_patched_steps": 750, "step_migraphx_path": "migraphx/rfdit_step_fp32_30s.mxr" } }, "context_migraphx_path": "migraphx/rfdit_context_fp32_30s.mxr" }, "dacvae": { "sample_rate": 48000, "hop_length": 1920, "buckets": { "1": { "path": "dacvae_decode_1s.onnx", "seconds": 1.0, "latent_steps": 25, "patched_steps": 25, "samples": 48000, "migraphx_path": "migraphx/dacvae_decode_1s.mxr" }, "2": { "path": "dacvae_decode_2s.onnx", "seconds": 2.0, "latent_steps": 50, "patched_steps": 50, "samples": 96000, "migraphx_path": "migraphx/dacvae_decode_2s.mxr" }, "4": { "path": "dacvae_decode_4s.onnx", "seconds": 4.0, "latent_steps": 100, "patched_steps": 100, "samples": 192000, "migraphx_path": "migraphx/dacvae_decode_4s.mxr" }, "8": { "path": "dacvae_decode_8s.onnx", "seconds": 8.0, "latent_steps": 200, "patched_steps": 200, "samples": 384000, "migraphx_path": "migraphx/dacvae_decode_8s.mxr" }, "12": { "path": "dacvae_decode_12s.onnx", "seconds": 12.0, "latent_steps": 300, "patched_steps": 300, "samples": 576000, "migraphx_path": "migraphx/dacvae_decode_12s.mxr" }, "16": { "path": "dacvae_decode_16s.onnx", "seconds": 16.0, "latent_steps": 400, "patched_steps": 400, "samples": 768000, "migraphx_path": "migraphx/dacvae_decode_16s.mxr" }, "24": { "path": "dacvae_decode_24s.onnx", "seconds": 24.0, "latent_steps": 600, "patched_steps": 600, "samples": 1152000, "migraphx_path": "migraphx/dacvae_decode_24s.mxr" }, "30": { "path": "dacvae_decode_30s.onnx", "seconds": 30.0, "latent_steps": 750, "patched_steps": 750, "samples": 1440000, "migraphx_path": "migraphx/dacvae_decode_30s.mxr" } } } }