File size: 3,889 Bytes
45c7975
5a51af4
 
 
 
 
 
c6e4b29
5a51af4
 
88a3e7f
5a51af4
88a3e7f
5a51af4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45c7975
5a51af4
 
 
 
 
 
 
 
 
 
 
45c7975
5a51af4
 
 
 
 
 
 
 
45c7975
5a51af4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45c7975
5a51af4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import os
import re
import tempfile
import subprocess
import gradio as gr
from pydub import AudioSegment
import soundfile as sf
import NeuTTS_Air_q4_GGUF

# Load NeuTTS-Air (Q8-GGUF)
print("🧠 Loading NeuTTS-Air (Q4-GGUF)...")
tts = NeuTTSAir(
    backbone_repo="neuphonic/neutts-air-q4-gguf",
    backbone_device="cpu",
    codec_repo="neuphonic/neucodec",
    codec_device="cpu"
)

def parse_host_script(script):
    lines = script.strip().split('\n')
    segments = []
    for line in lines:
        stripped = line.strip()
        if not stripped:
            continue
        if stripped.startswith("HOST 1:"):
            segments.append(("HOST 1", stripped[len("HOST 1:"):].strip()))
        elif stripped.startswith("HOST 2:"):
            segments.append(("HOST 2", stripped[len("HOST 2:"):].strip()))
        else:
            segments.append(("HOST 1", stripped))
    return segments

def generate_audio_from_script(script, ref1_wav, ref1_txt, ref2_wav, ref2_txt):
    segments = parse_host_script(script)
    ref_map = {
        "HOST 1": (ref1_wav, ref1_txt),
        "HOST 2": (ref2_wav, ref2_txt)
    }
    
    output_files = []
    for i, (tag, text) in enumerate(segments):
        ref_wav, ref_text = ref_map.get(tag, ref_map["HOST 1"])
        ref_codes = tts.encode_reference(ref_wav)
        wav = tts.infer(text, ref_codes, ref_text)
        out_path = f"/tmp/seg_{i}.wav"
        sf.write(out_path, wav, 24000)
        output_files.append(out_path)
    
    # Concatenate
    combined = AudioSegment.empty()
    for f in output_files:
        combined += AudioSegment.from_wav(f)
    final_path = "/tmp/script_audio.wav"
    combined.export(final_path, format="wav")
    
    return final_path

def generate_video_with_lipsync(audio_path, ref_video_path):
    # Your existing lip-sync pipeline here
    # Example placeholder - replace with your actual code:
    output_video = f"/tmp/output_{os.path.basename(audio_path)}.mp4"
    
    # Placeholder: Use your lip-sync model (e.g., SadTalker, Wav2Lip, etc.)
    # subprocess.run([...])
    
    # For now, just return the reference video as placeholder
    # Replace this with your actual lip-sync logic
    return ref_video

def generate_video_podcast(script, ref1_video, ref1_transcript, ref2_video=None, ref2_transcript=None):
    # Generate audio from script
    audio_path = generate_audio_from_script(script, ref1_video, ref1_transcript, ref2_video, ref2_transcript)
    
    # Generate video with lip-sync
    video_path = generate_video_with_lipsync(audio_path, ref1_video)
    
    return video_path, audio_path, "✅ Video podcast generated!"

# Gradio UI
with gr.Blocks(title="2nd-Host AI - Video Podcast Generator") as demo:
    gr.Markdown("# 🎥 2nd-Host AI — Video Podcast Generator")
    gr.Markdown("Upload reference videos + transcripts. Enter script. Get video with lip-sync.")
    
    with gr.Row():
        with gr.Column():
            gr.Markdown("### HOST 1 Reference")
            ref1_video = gr.Video(label="Reference Video (15s, face)", sources=["upload"])
            ref1_txt = gr.Textbox(label="Transcript", placeholder="What they said in video")
        
        with gr.Column():
            gr.Markdown("### HOST 2 Reference (Optional)")
            ref2_video = gr.Video(label="Reference Video (15s, face)", sources=["upload"])
            ref2_txt = gr.Textbox(label="Transcript", placeholder="What they said in video")
    
    script = gr.Textbox(label="Script (HOST 1: / HOST 2:)", lines=8)
    btn = gr.Button("Generate Video Podcast")
    
    video_out = gr.Video(label="Generated Video")
    audio_out = gr.Audio(label="Generated Audio")
    status = gr.Textbox(label="Status")
    
    btn.click(
        generate_video_podcast,
        inputs=[script, ref1_video, ref1_txt, ref2_video, ref2_txt],
        outputs=[video_out, audio_out, status]
    )

demo.launch()