| syntax = "proto3"; | |
| package cosyvoice; | |
| option go_package = "protos/"; | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| // Service | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| service CosyVoice { | |
| rpc Inference(Request) returns (stream Response) {} | |
| } | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| // Topβlevel wrapper | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| message Request { | |
| oneof RequestPayload { | |
| sftRequest sft_request = 1; | |
| zeroshotRequest zero_shot_request = 2; | |
| crosslingualRequest cross_lingual_request = 3; | |
| instructRequest instruct_request = 4; | |
| } | |
| } | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| // Individual request messages | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| message sftRequest { | |
| string spk_id = 1; | |
| string tts_text = 2; | |
| } | |
| message zeroshotRequest { | |
| string tts_text = 1; | |
| string prompt_text = 2; | |
| bytes prompt_audio = 3; // URL (utfβ8) or raw bytes | |
| float speed = 4; // β― optional, 1β―=β―normal speed | |
| } | |
| message crosslingualRequest { | |
| string tts_text = 1; | |
| bytes prompt_audio = 2; | |
| float speed = 3; // optional | |
| } | |
| message instructRequest { | |
| string tts_text = 1; | |
| string instruct_text = 2; | |
| bytes prompt_audio = 3; // URL (utfβ8) or raw bytes | |
| float speed = 4; // optional | |
| } | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| message Response { | |
| bytes tts_audio = 1; | |
| } |