Ai_lip_sync

Build error

App Files Files Community

Ai_lip_sync / app.py

Actual-Innocence

Update app.py

c6e4b29 verified 2 months ago

raw

history blame contribute delete

3.89 kB

	import os
	import re
	import tempfile
	import subprocess
	import gradio as gr
	from pydub import AudioSegment
	import soundfile as sf
	import NeuTTS_Air_q4_GGUF

	# Load NeuTTS-Air (Q8-GGUF)
	print("🧠 Loading NeuTTS-Air (Q4-GGUF)...")
	tts = NeuTTSAir(
	backbone_repo="neuphonic/neutts-air-q4-gguf",
	backbone_device="cpu",
	codec_repo="neuphonic/neucodec",
	codec_device="cpu"
	)

	def parse_host_script(script):
	lines = script.strip().split('\n')
	segments = []
	for line in lines:
	stripped = line.strip()
	if not stripped:
	continue
	if stripped.startswith("HOST 1:"):
	segments.append(("HOST 1", stripped[len("HOST 1:"):].strip()))
	elif stripped.startswith("HOST 2:"):
	segments.append(("HOST 2", stripped[len("HOST 2:"):].strip()))
	else:
	segments.append(("HOST 1", stripped))
	return segments

	def generate_audio_from_script(script, ref1_wav, ref1_txt, ref2_wav, ref2_txt):
	segments = parse_host_script(script)
	ref_map = {
	"HOST 1": (ref1_wav, ref1_txt),
	"HOST 2": (ref2_wav, ref2_txt)
	}

	output_files = []
	for i, (tag, text) in enumerate(segments):
	ref_wav, ref_text = ref_map.get(tag, ref_map["HOST 1"])
	ref_codes = tts.encode_reference(ref_wav)
	wav = tts.infer(text, ref_codes, ref_text)
	out_path = f"/tmp/seg_{i}.wav"
	sf.write(out_path, wav, 24000)
	output_files.append(out_path)

	# Concatenate
	combined = AudioSegment.empty()
	for f in output_files:
	combined += AudioSegment.from_wav(f)
	final_path = "/tmp/script_audio.wav"
	combined.export(final_path, format="wav")

	return final_path

	def generate_video_with_lipsync(audio_path, ref_video_path):
	# Your existing lip-sync pipeline here
	# Example placeholder - replace with your actual code:
	output_video = f"/tmp/output_{os.path.basename(audio_path)}.mp4"

	# Placeholder: Use your lip-sync model (e.g., SadTalker, Wav2Lip, etc.)
	# subprocess.run([...])

	# For now, just return the reference video as placeholder
	# Replace this with your actual lip-sync logic
	return ref_video

	def generate_video_podcast(script, ref1_video, ref1_transcript, ref2_video=None, ref2_transcript=None):
	# Generate audio from script
	audio_path = generate_audio_from_script(script, ref1_video, ref1_transcript, ref2_video, ref2_transcript)

	# Generate video with lip-sync
	video_path = generate_video_with_lipsync(audio_path, ref1_video)

	return video_path, audio_path, "✅ Video podcast generated!"

	# Gradio UI
	with gr.Blocks(title="2nd-Host AI - Video Podcast Generator") as demo:
	gr.Markdown("# 🎥 2nd-Host AI — Video Podcast Generator")
	gr.Markdown("Upload reference videos + transcripts. Enter script. Get video with lip-sync.")

	with gr.Row():
	with gr.Column():
	gr.Markdown("### HOST 1 Reference")
	ref1_video = gr.Video(label="Reference Video (15s, face)", sources=["upload"])
	ref1_txt = gr.Textbox(label="Transcript", placeholder="What they said in video")

	with gr.Column():
	gr.Markdown("### HOST 2 Reference (Optional)")
	ref2_video = gr.Video(label="Reference Video (15s, face)", sources=["upload"])
	ref2_txt = gr.Textbox(label="Transcript", placeholder="What they said in video")

	script = gr.Textbox(label="Script (HOST 1: / HOST 2:)", lines=8)
	btn = gr.Button("Generate Video Podcast")

	video_out = gr.Video(label="Generated Video")
	audio_out = gr.Audio(label="Generated Audio")
	status = gr.Textbox(label="Status")

	btn.click(
	generate_video_podcast,
	inputs=[script, ref1_video, ref1_txt, ref2_video, ref2_txt],
	outputs=[video_out, audio_out, status]
	)

	demo.launch()