Spaces:
Build error
Build error
| import os | |
| import re | |
| import tempfile | |
| import subprocess | |
| import gradio as gr | |
| from pydub import AudioSegment | |
| import soundfile as sf | |
| import NeuTTS_Air_q4_GGUF | |
| # Load NeuTTS-Air (Q8-GGUF) | |
| print("🧠 Loading NeuTTS-Air (Q4-GGUF)...") | |
| tts = NeuTTSAir( | |
| backbone_repo="neuphonic/neutts-air-q4-gguf", | |
| backbone_device="cpu", | |
| codec_repo="neuphonic/neucodec", | |
| codec_device="cpu" | |
| ) | |
| def parse_host_script(script): | |
| lines = script.strip().split('\n') | |
| segments = [] | |
| for line in lines: | |
| stripped = line.strip() | |
| if not stripped: | |
| continue | |
| if stripped.startswith("HOST 1:"): | |
| segments.append(("HOST 1", stripped[len("HOST 1:"):].strip())) | |
| elif stripped.startswith("HOST 2:"): | |
| segments.append(("HOST 2", stripped[len("HOST 2:"):].strip())) | |
| else: | |
| segments.append(("HOST 1", stripped)) | |
| return segments | |
| def generate_audio_from_script(script, ref1_wav, ref1_txt, ref2_wav, ref2_txt): | |
| segments = parse_host_script(script) | |
| ref_map = { | |
| "HOST 1": (ref1_wav, ref1_txt), | |
| "HOST 2": (ref2_wav, ref2_txt) | |
| } | |
| output_files = [] | |
| for i, (tag, text) in enumerate(segments): | |
| ref_wav, ref_text = ref_map.get(tag, ref_map["HOST 1"]) | |
| ref_codes = tts.encode_reference(ref_wav) | |
| wav = tts.infer(text, ref_codes, ref_text) | |
| out_path = f"/tmp/seg_{i}.wav" | |
| sf.write(out_path, wav, 24000) | |
| output_files.append(out_path) | |
| # Concatenate | |
| combined = AudioSegment.empty() | |
| for f in output_files: | |
| combined += AudioSegment.from_wav(f) | |
| final_path = "/tmp/script_audio.wav" | |
| combined.export(final_path, format="wav") | |
| return final_path | |
| def generate_video_with_lipsync(audio_path, ref_video_path): | |
| # Your existing lip-sync pipeline here | |
| # Example placeholder - replace with your actual code: | |
| output_video = f"/tmp/output_{os.path.basename(audio_path)}.mp4" | |
| # Placeholder: Use your lip-sync model (e.g., SadTalker, Wav2Lip, etc.) | |
| # subprocess.run([...]) | |
| # For now, just return the reference video as placeholder | |
| # Replace this with your actual lip-sync logic | |
| return ref_video | |
| def generate_video_podcast(script, ref1_video, ref1_transcript, ref2_video=None, ref2_transcript=None): | |
| # Generate audio from script | |
| audio_path = generate_audio_from_script(script, ref1_video, ref1_transcript, ref2_video, ref2_transcript) | |
| # Generate video with lip-sync | |
| video_path = generate_video_with_lipsync(audio_path, ref1_video) | |
| return video_path, audio_path, "✅ Video podcast generated!" | |
| # Gradio UI | |
| with gr.Blocks(title="2nd-Host AI - Video Podcast Generator") as demo: | |
| gr.Markdown("# 🎥 2nd-Host AI — Video Podcast Generator") | |
| gr.Markdown("Upload reference videos + transcripts. Enter script. Get video with lip-sync.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### HOST 1 Reference") | |
| ref1_video = gr.Video(label="Reference Video (15s, face)", sources=["upload"]) | |
| ref1_txt = gr.Textbox(label="Transcript", placeholder="What they said in video") | |
| with gr.Column(): | |
| gr.Markdown("### HOST 2 Reference (Optional)") | |
| ref2_video = gr.Video(label="Reference Video (15s, face)", sources=["upload"]) | |
| ref2_txt = gr.Textbox(label="Transcript", placeholder="What they said in video") | |
| script = gr.Textbox(label="Script (HOST 1: / HOST 2:)", lines=8) | |
| btn = gr.Button("Generate Video Podcast") | |
| video_out = gr.Video(label="Generated Video") | |
| audio_out = gr.Audio(label="Generated Audio") | |
| status = gr.Textbox(label="Status") | |
| btn.click( | |
| generate_video_podcast, | |
| inputs=[script, ref1_video, ref1_txt, ref2_video, ref2_txt], | |
| outputs=[video_out, audio_out, status] | |
| ) | |
| demo.launch() |