Spaces:

build-small-hackathon
/

VoiceGate

Running on Zero

App Files Files Community

VoiceGate / workflows /voicegate_api.json

YanTianlong

Add VoiceGate workflow assets and TODO

5277693 23 days ago

Raw

History Blame Contribute Delete

9.73 kB

	{
	"16": {
	"inputs": {
	"audio": "fcecb7fd3827851a4fb22c52886610aed0a62e5fe32ea46f3b1291ff3dd236ab.mp3",
	"audioUI": ""
	},
	"class_type": "LoadAudio",
	"_meta": {
	"title": "输入原音频"
	}
	},
	"31": {
	"inputs": {
	"repo_id": "Qwen/Qwen3-ASR-1.7B",
	"precision": "bf16",
	"attention": "flash_attention_2",
	"max_new_tokens": 1024,
	"forced_aligner": "Qwen/Qwen3-ForcedAligner-0.6B"
	},
	"class_type": "VoiceBridgeASRLoader",
	"_meta": {
	"title": "VoiceBridge ASR Loader"
	}
	},
	"33": {
	"inputs": {
	"language": "auto",
	"context": "",
	"return_timestamps": true,
	"model_key": [
	"31",
	0
	],
	"audio": [
	"99",
	0
	]
	},
	"class_type": "VoiceBridgeASRTranscribe",
	"_meta": {
	"title": "VoiceBridge ASR Transcribe"
	}
	},
	"36": {
	"inputs": {
	"text": [
	"33",
	1
	],
	"language": [
	"33",
	2
	],
	"save_srt": true,
	"filename_prefix": "VoiceBridge/subtitle",
	"forced_aligns": [
	"33",
	0
	]
	},
	"class_type": "GenerateSRT",
	"_meta": {
	"title": "Generate SRT"
	}
	},
	"45": {
	"inputs": {
	"language": "auto",
	"context": "",
	"return_timestamps": false,
	"model_key": [
	"31",
	0
	],
	"audio": [
	"46",
	0
	]
	},
	"class_type": "VoiceBridgeASRTranscribe",
	"_meta": {
	"title": "VoiceBridge ASR Transcribe"
	}
	},
	"46": {
	"inputs": {
	"start_index": 0,
	"duration": 10.000000000000002,
	"audio": [
	"99",
	0
	]
	},
	"class_type": "TrimAudioDuration",
	"_meta": {
	"title": "Trim Audio Duration"
	}
	},
	"61": {
	"inputs": {
	"text": "1\n00:00:00,000 --> 00:00:04,000\n我相信你在各种内容下面应该都见过这样一句类似的评论\n\n2\n00:00:04,240 --> 00:00:04,960\n那几个月之前\n\n3\n00:00:04,960 --> 00:00:06,879\n其实大家把这个当成一种调侃了\n\n4\n00:00:06,879 --> 00:00:07,679\n是一种梗\n\n5\n00:00:07,679 --> 00:00:07,919\n对吧\n\n6\n00:00:08,160 --> 00:00:09,359\n你看到太离谱的东西\n\n7\n00:00:09,439 --> 00:00:10,320\n你会说\n\n8\n00:00:10,320 --> 00:00:10,800\n“哎呀\n\n9\n00:00:10,800 --> 00:00:12,560\n这是AI或者AI太好用了\n\n10\n00:00:12,560 --> 00:00:12,960\n家人\n\n11\n00:00:13,359 --> 00:00:14,240\n”但有意思的就是\n\n12\n00:00:14,240 --> 00:00:15,359\n只过了短短几个月\n\n13\n00:00:15,519 --> 00:00:19,519\n你会发现现在这句“这是AI嘛”背后的情绪它变化了\n\n14\n00:00:19,839 --> 00:00:21,280\n因为AI它又变强了\n\n",
	"anything": [
	"36",
	0
	]
	},
	"class_type": "easy showAnything",
	"_meta": {
	"title": "Show Any"
	}
	},
	"96": {
	"inputs": {
	"model_name": "MelBandRoFormer_comfy/MelBandRoformer_fp32.safetensors"
	},
	"class_type": "MelBandRoFormerModelLoader",
	"_meta": {
	"title": "Mel-Band RoFormer Model Loader"
	}
	},
	"99": {
	"inputs": {
	"model": [
	"96",
	0
	],
	"audio": [
	"16",
	0
	]
	},
	"class_type": "MelBandRoFormerSampler",
	"_meta": {
	"title": "Mel-Band RoFormer Sampler"
	}
	},
	"105": {
	"inputs": {
	"api_baseurl": "https://api.deepseek.com",
	"api_key": "",
	"model": "deepseek-v4-flash",
	"role": [
	"109",
	0
	],
	"prompt": [
	"36",
	0
	],
	"temperature": 0.5,
	"seed": 1761935326
	},
	"class_type": "RH_LLMAPI_NODE",
	"_meta": {
	"title": "Runninghub LLM API Node"
	}
	},
	"107": {
	"inputs": {
	"text": "1\n00:00:00,000 --> 00:00:04,000\nI bet you've seen a comment like this under all kinds of content.\n\n2\n00:00:04,240 --> 00:00:04,960\nA few months ago,\n\n3\n00:00:04,960 --> 00:00:06,879\neveryone used it as a joke.\n\n4\n00:00:06,879 --> 00:00:07,679\nIt was a meme.\n\n5\n00:00:07,679 --> 00:00:07,919\nRight?\n\n6\n00:00:08,160 --> 00:00:09,359\nYou see something absurd.\n\n7\n00:00:09,439 --> 00:00:10,320\nYou'd say,\n\n8\n00:00:10,320 --> 00:00:10,800\n\"Ah,\n\n9\n00:00:10,800 --> 00:00:12,560\nmust be AI, or AI is too good.\n\n10\n00:00:12,560 --> 00:00:12,960\nBro.\"\n\n11\n00:00:13,359 --> 00:00:14,240\nBut the interesting thing is,\n\n12\n00:00:14,240 --> 00:00:15,359\njust a few months later,\n\n13\n00:00:15,519 --> 00:00:19,519\nnow the emotion behind \"Is this AI?\" has changed.\n\n14\n00:00:19,839 --> 00:00:21,280\nBecause AI has gotten even stronger.",
	"anything": [
	"105",
	0
	]
	},
	"class_type": "easy showAnything",
	"_meta": {
	"title": "Show Any"
	}
	},
	"108": {
	"inputs": {
	"text": "You are a professional subtitle translator. Please translate all the subtitles in this SRT file into natural, spoken [Language]. Keep each timestamp exactly the same. For each subtitle line, use the shortest possible phrasing that still captures the original meaning (it doesn't have to be a direct translation, just convey the correct idea). Output the content directly in SRT format, without any explanation or descriptive text."
	},
	"class_type": "CR Text",
	"_meta": {
	"title": "🔤 CR Text"
	}
	},
	"109": {
	"inputs": {
	"texts": [
	"108",
	0
	],
	"find": "[Language]",
	"replace": [
	"110",
	0
	]
	},
	"class_type": "ReplaceText",
	"_meta": {
	"title": "Replace Text (DEPRECATED)"
	}
	},
	"110": {
	"inputs": {
	"value": "English"
	},
	"class_type": "easy string",
	"_meta": {
	"title": "输入目标语言"
	}
	},
	"176": {
	"inputs": {
	"tempo_limit": 2,
	"mini_gap_ms": 10,
	"srt_items": [
	"177",
	1
	],
	"audios": [
	"268",
	0
	]
	},
	"class_type": "VoiceBridgeAudioListMergerBySRT",
	"_meta": {
	"title": "VoiceBridge Audio List Merger by SRT"
	}
	},
	"177": {
	"inputs": {
	"srt_string": [
	"105",
	0
	]
	},
	"class_type": "VoiceBridgeSRTSplitter",
	"_meta": {
	"title": "VoiceBridge SRT Splitter"
	}
	},
	"179": {
	"inputs": {
	"text": "1\n00:00:00,000 --> 00:00:03,540\nI bet you've seen a comment like this under all kinds of content.\n\n2\n00:00:03,550 --> 00:00:04,681\nA few months ago,\n\n3\n00:00:04,960 --> 00:00:06,851\neveryone used it as a joke.\n\n4\n00:00:06,861 --> 00:00:07,835\nIt was a meme.\n\n5\n00:00:07,845 --> 00:00:08,400\nRight?\n\n6\n00:00:08,410 --> 00:00:09,683\nYou see something absurd.\n\n7\n00:00:09,693 --> 00:00:10,536\nYou'd say,\n\n8\n00:00:10,546 --> 00:00:10,996\n\"Ah,\n\n9\n00:00:11,006 --> 00:00:13,018\nmust be AI, or AI is too good.\n\n10\n00:00:13,028 --> 00:00:13,837\nBro.\"\n\n11\n00:00:13,847 --> 00:00:14,700\nBut the interesting thing is,\n\n12\n00:00:14,710 --> 00:00:15,946\njust a few months later,\n\n13\n00:00:15,956 --> 00:00:20,245\nnow the emotion behind \"Is this AI?\" has changed.\n\n14\n00:00:20,255 --> 00:00:23,795\nBecause AI has gotten even stronger.\n",
	"anything": [
	"176",
	1
	]
	},
	"class_type": "easy showAnything",
	"_meta": {
	"title": "Show Any"
	}
	},
	"180": {
	"inputs": {
	"filename_prefix": "audio/ComfyUI",
	"quality": "V0",
	"audioUI": "",
	"audio": [
	"251",
	0
	]
	},
	"class_type": "SaveAudioMP3",
	"_meta": {
	"title": "Save Audio (MP3)"
	}
	},
	"206": {
	"inputs": {
	"control_instruction": "",
	"text": [
	"177",
	0
	],
	"cfg_value": 2,
	"inference_steps": 10,
	"seed": 220751697769047,
	"ultimate_clone": true,
	"reference_audio_text": [
	"264",
	0
	],
	"normalize_text": false,
	"denoise_reference": false,
	"max_len": 4096,
	"retry_badcase": true,
	"model": [
	"207",
	0
	],
	"reference_audio": [
	"261",
	0
	]
	},
	"class_type": "RunningHub_VoxCPM_Generate",
	"_meta": {
	"title": "RunningHub VoxCPM Generate Speech"
	}
	},
	"207": {
	"inputs": {
	"model_name": "VoxCPM2",
	"optimize": false,
	"lora_name": "None"
	},
	"class_type": "RunningHub_VoxCPM_LoadModel",
	"_meta": {
	"title": "RunningHub VoxCPM Load Model"
	}
	},
	"214": {
	"inputs": {
	"srt_string": [
	"179",
	0
	],
	"filename_prefix": "VoiceBridge/subtitle"
	},
	"class_type": "SaveSRTFromString",
	"_meta": {
	"title": "Save SRT From String"
	}
	},
	"251": {
	"inputs": {
	"audio_1": [
	"176",
	0
	],
	"audio_2": [
	"99",
	1
	]
	},
	"class_type": "MergeAudioMW",
	"_meta": {
	"title": "Merge Audio"
	}
	},
	"261": {
	"inputs": {
	"any_01": [
	"46",
	0
	]
	},
	"class_type": "Any Switch (rgthree)",
	"_meta": {
	"title": "Any Switch (rgthree)"
	}
	},
	"264": {
	"inputs": {
	"any_01": [
	"45",
	1
	]
	},
	"class_type": "Any Switch (rgthree)",
	"_meta": {
	"title": "Any Switch (rgthree)"
	}
	},
	"268": {
	"inputs": {
	"start_index": 0.30000000000000004,
	"duration": 100.00000000000001,
	"audio": [
	"206",
	0
	]
	},
	"class_type": "TrimAudioDuration",
	"_meta": {
	"title": "Trim Audio Duration"
	}
	}
	}