VoiceGate / workflows /voicegate_api.json
YanTianlong's picture
Add VoiceGate workflow assets and TODO
5277693
Raw
History Blame Contribute Delete
9.73 kB
{
"16": {
"inputs": {
"audio": "fcecb7fd3827851a4fb22c52886610aed0a62e5fe32ea46f3b1291ff3dd236ab.mp3",
"audioUI": ""
},
"class_type": "LoadAudio",
"_meta": {
"title": "输入原音频"
}
},
"31": {
"inputs": {
"repo_id": "Qwen/Qwen3-ASR-1.7B",
"precision": "bf16",
"attention": "flash_attention_2",
"max_new_tokens": 1024,
"forced_aligner": "Qwen/Qwen3-ForcedAligner-0.6B"
},
"class_type": "VoiceBridgeASRLoader",
"_meta": {
"title": "VoiceBridge ASR Loader"
}
},
"33": {
"inputs": {
"language": "auto",
"context": "",
"return_timestamps": true,
"model_key": [
"31",
0
],
"audio": [
"99",
0
]
},
"class_type": "VoiceBridgeASRTranscribe",
"_meta": {
"title": "VoiceBridge ASR Transcribe"
}
},
"36": {
"inputs": {
"text": [
"33",
1
],
"language": [
"33",
2
],
"save_srt": true,
"filename_prefix": "VoiceBridge/subtitle",
"forced_aligns": [
"33",
0
]
},
"class_type": "GenerateSRT",
"_meta": {
"title": "Generate SRT"
}
},
"45": {
"inputs": {
"language": "auto",
"context": "",
"return_timestamps": false,
"model_key": [
"31",
0
],
"audio": [
"46",
0
]
},
"class_type": "VoiceBridgeASRTranscribe",
"_meta": {
"title": "VoiceBridge ASR Transcribe"
}
},
"46": {
"inputs": {
"start_index": 0,
"duration": 10.000000000000002,
"audio": [
"99",
0
]
},
"class_type": "TrimAudioDuration",
"_meta": {
"title": "Trim Audio Duration"
}
},
"61": {
"inputs": {
"text": "1\n00:00:00,000 --> 00:00:04,000\n我相信你在各种内容下面应该都见过这样一句类似的评论\n\n2\n00:00:04,240 --> 00:00:04,960\n那几个月之前\n\n3\n00:00:04,960 --> 00:00:06,879\n其实大家把这个当成一种调侃了\n\n4\n00:00:06,879 --> 00:00:07,679\n是一种梗\n\n5\n00:00:07,679 --> 00:00:07,919\n对吧\n\n6\n00:00:08,160 --> 00:00:09,359\n你看到太离谱的东西\n\n7\n00:00:09,439 --> 00:00:10,320\n你会说\n\n8\n00:00:10,320 --> 00:00:10,800\n“哎呀\n\n9\n00:00:10,800 --> 00:00:12,560\n这是AI或者AI太好用了\n\n10\n00:00:12,560 --> 00:00:12,960\n家人\n\n11\n00:00:13,359 --> 00:00:14,240\n”但有意思的就是\n\n12\n00:00:14,240 --> 00:00:15,359\n只过了短短几个月\n\n13\n00:00:15,519 --> 00:00:19,519\n你会发现现在这句“这是AI嘛”背后的情绪它变化了\n\n14\n00:00:19,839 --> 00:00:21,280\n因为AI它又变强了\n\n",
"anything": [
"36",
0
]
},
"class_type": "easy showAnything",
"_meta": {
"title": "Show Any"
}
},
"96": {
"inputs": {
"model_name": "MelBandRoFormer_comfy/MelBandRoformer_fp32.safetensors"
},
"class_type": "MelBandRoFormerModelLoader",
"_meta": {
"title": "Mel-Band RoFormer Model Loader"
}
},
"99": {
"inputs": {
"model": [
"96",
0
],
"audio": [
"16",
0
]
},
"class_type": "MelBandRoFormerSampler",
"_meta": {
"title": "Mel-Band RoFormer Sampler"
}
},
"105": {
"inputs": {
"api_baseurl": "https://api.deepseek.com",
"api_key": "",
"model": "deepseek-v4-flash",
"role": [
"109",
0
],
"prompt": [
"36",
0
],
"temperature": 0.5,
"seed": 1761935326
},
"class_type": "RH_LLMAPI_NODE",
"_meta": {
"title": "Runninghub LLM API Node"
}
},
"107": {
"inputs": {
"text": "1\n00:00:00,000 --> 00:00:04,000\nI bet you've seen a comment like this under all kinds of content.\n\n2\n00:00:04,240 --> 00:00:04,960\nA few months ago,\n\n3\n00:00:04,960 --> 00:00:06,879\neveryone used it as a joke.\n\n4\n00:00:06,879 --> 00:00:07,679\nIt was a meme.\n\n5\n00:00:07,679 --> 00:00:07,919\nRight?\n\n6\n00:00:08,160 --> 00:00:09,359\nYou see something absurd.\n\n7\n00:00:09,439 --> 00:00:10,320\nYou'd say,\n\n8\n00:00:10,320 --> 00:00:10,800\n\"Ah,\n\n9\n00:00:10,800 --> 00:00:12,560\nmust be AI, or AI is too good.\n\n10\n00:00:12,560 --> 00:00:12,960\nBro.\"\n\n11\n00:00:13,359 --> 00:00:14,240\nBut the interesting thing is,\n\n12\n00:00:14,240 --> 00:00:15,359\njust a few months later,\n\n13\n00:00:15,519 --> 00:00:19,519\nnow the emotion behind \"Is this AI?\" has changed.\n\n14\n00:00:19,839 --> 00:00:21,280\nBecause AI has gotten even stronger.",
"anything": [
"105",
0
]
},
"class_type": "easy showAnything",
"_meta": {
"title": "Show Any"
}
},
"108": {
"inputs": {
"text": "You are a professional subtitle translator. Please translate all the subtitles in this SRT file into natural, spoken [Language]. Keep each timestamp exactly the same. For each subtitle line, use the shortest possible phrasing that still captures the original meaning (it doesn't have to be a direct translation, just convey the correct idea). Output the content directly in SRT format, without any explanation or descriptive text."
},
"class_type": "CR Text",
"_meta": {
"title": "🔤 CR Text"
}
},
"109": {
"inputs": {
"texts": [
"108",
0
],
"find": "[Language]",
"replace": [
"110",
0
]
},
"class_type": "ReplaceText",
"_meta": {
"title": "Replace Text (DEPRECATED)"
}
},
"110": {
"inputs": {
"value": "English"
},
"class_type": "easy string",
"_meta": {
"title": "输入目标语言"
}
},
"176": {
"inputs": {
"tempo_limit": 2,
"mini_gap_ms": 10,
"srt_items": [
"177",
1
],
"audios": [
"268",
0
]
},
"class_type": "VoiceBridgeAudioListMergerBySRT",
"_meta": {
"title": "VoiceBridge Audio List Merger by SRT"
}
},
"177": {
"inputs": {
"srt_string": [
"105",
0
]
},
"class_type": "VoiceBridgeSRTSplitter",
"_meta": {
"title": "VoiceBridge SRT Splitter"
}
},
"179": {
"inputs": {
"text": "1\n00:00:00,000 --> 00:00:03,540\nI bet you've seen a comment like this under all kinds of content.\n\n2\n00:00:03,550 --> 00:00:04,681\nA few months ago,\n\n3\n00:00:04,960 --> 00:00:06,851\neveryone used it as a joke.\n\n4\n00:00:06,861 --> 00:00:07,835\nIt was a meme.\n\n5\n00:00:07,845 --> 00:00:08,400\nRight?\n\n6\n00:00:08,410 --> 00:00:09,683\nYou see something absurd.\n\n7\n00:00:09,693 --> 00:00:10,536\nYou'd say,\n\n8\n00:00:10,546 --> 00:00:10,996\n\"Ah,\n\n9\n00:00:11,006 --> 00:00:13,018\nmust be AI, or AI is too good.\n\n10\n00:00:13,028 --> 00:00:13,837\nBro.\"\n\n11\n00:00:13,847 --> 00:00:14,700\nBut the interesting thing is,\n\n12\n00:00:14,710 --> 00:00:15,946\njust a few months later,\n\n13\n00:00:15,956 --> 00:00:20,245\nnow the emotion behind \"Is this AI?\" has changed.\n\n14\n00:00:20,255 --> 00:00:23,795\nBecause AI has gotten even stronger.\n",
"anything": [
"176",
1
]
},
"class_type": "easy showAnything",
"_meta": {
"title": "Show Any"
}
},
"180": {
"inputs": {
"filename_prefix": "audio/ComfyUI",
"quality": "V0",
"audioUI": "",
"audio": [
"251",
0
]
},
"class_type": "SaveAudioMP3",
"_meta": {
"title": "Save Audio (MP3)"
}
},
"206": {
"inputs": {
"control_instruction": "",
"text": [
"177",
0
],
"cfg_value": 2,
"inference_steps": 10,
"seed": 220751697769047,
"ultimate_clone": true,
"reference_audio_text": [
"264",
0
],
"normalize_text": false,
"denoise_reference": false,
"max_len": 4096,
"retry_badcase": true,
"model": [
"207",
0
],
"reference_audio": [
"261",
0
]
},
"class_type": "RunningHub_VoxCPM_Generate",
"_meta": {
"title": "RunningHub VoxCPM Generate Speech"
}
},
"207": {
"inputs": {
"model_name": "VoxCPM2",
"optimize": false,
"lora_name": "None"
},
"class_type": "RunningHub_VoxCPM_LoadModel",
"_meta": {
"title": "RunningHub VoxCPM Load Model"
}
},
"214": {
"inputs": {
"srt_string": [
"179",
0
],
"filename_prefix": "VoiceBridge/subtitle"
},
"class_type": "SaveSRTFromString",
"_meta": {
"title": "Save SRT From String"
}
},
"251": {
"inputs": {
"audio_1": [
"176",
0
],
"audio_2": [
"99",
1
]
},
"class_type": "MergeAudioMW",
"_meta": {
"title": "Merge Audio"
}
},
"261": {
"inputs": {
"any_01": [
"46",
0
]
},
"class_type": "Any Switch (rgthree)",
"_meta": {
"title": "Any Switch (rgthree)"
}
},
"264": {
"inputs": {
"any_01": [
"45",
1
]
},
"class_type": "Any Switch (rgthree)",
"_meta": {
"title": "Any Switch (rgthree)"
}
},
"268": {
"inputs": {
"start_index": 0.30000000000000004,
"duration": 100.00000000000001,
"audio": [
"206",
0
]
},
"class_type": "TrimAudioDuration",
"_meta": {
"title": "Trim Audio Duration"
}
}
}