Spaces:
Sleeping
Sleeping
File size: 6,888 Bytes
85c18a5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 |
"""
Lip-Sync Animation Module
=========================
Generates animated GIFs with lip-sync based on audio amplitude.
Functions:
- audio_to_rms_chunks: Extract amplitude data from audio
- generate_lipsync_gif: Create lip-sync animation GIF
"""
from PIL import Image
import imageio
from pathlib import Path
import time
from typing import List, Optional
import os
def audio_to_rms_chunks(audio_path: str, chunk_ms: int = 80) -> List[float]:
"""
Extract RMS (Root Mean Square) amplitude values from audio.
Splits audio into chunks and calculates the RMS value for each,
which represents the "loudness" of that segment.
Args:
audio_path: Path to the audio file (MP3)
chunk_ms: Duration of each chunk in milliseconds
Returns:
List of RMS values, one per chunk
"""
try:
from pydub import AudioSegment
from pydub.utils import make_chunks
# Check if file exists
if not os.path.exists(audio_path):
raise FileNotFoundError(f"Audio file not found: {audio_path}")
# Load audio file
audio = AudioSegment.from_file(audio_path)
# Split into chunks
chunks = make_chunks(audio, chunk_ms)
# Calculate RMS for each chunk
rms_values = [chunk.rms for chunk in chunks if len(chunk) > 0]
return rms_values if rms_values else [0]
except Exception as e:
print(f"Error processing audio: {e}")
# Return default values if audio processing fails
return [100, 200, 150, 300, 250, 100, 200, 150] # Fallback animation
def audio_to_rms_chunks_simple(audio_path: str, chunk_ms: int = 80) -> List[float]:
"""
Simple fallback method to generate fake RMS values based on file size.
Used when pydub/ffmpeg fails.
Args:
audio_path: Path to the audio file
chunk_ms: Duration of each chunk in milliseconds
Returns:
List of simulated RMS values
"""
import math
try:
# Estimate duration based on file size (rough approximation)
file_size = os.path.getsize(audio_path)
# Approximate: MP3 at 128kbps = 16KB per second
estimated_duration_sec = file_size / 16000
# Calculate number of chunks
num_chunks = max(int(estimated_duration_sec * 1000 / chunk_ms), 10)
# Generate wave-like RMS values for natural-looking lip sync
rms_values = []
for i in range(num_chunks):
# Create a wave pattern
value = 150 + 100 * math.sin(i * 0.5) + 50 * math.sin(i * 1.2)
rms_values.append(max(50, value))
return rms_values
except Exception:
return [100, 200, 150, 300, 250, 100, 200, 150, 100, 200]
def generate_lipsync_gif(
avatar_name: str,
audio_path: str,
avatars_dir: Path,
output_dir: Path,
fps: int = 12,
output_path: Optional[str] = None
) -> str:
"""
Generate a lip-sync animated GIF from avatar images and audio.
The animation works by:
1. Analyzing audio amplitude (RMS) over time
2. Selecting mouth frame based on amplitude level
3. Compositing mouth frame onto base avatar image
4. Combining all frames into an animated GIF
Args:
avatar_name: Name of avatar folder (e.g., 'sample')
audio_path: Path to the audio file to sync with
avatars_dir: Base directory containing avatar folders
output_dir: Directory to save the output GIF
fps: Frames per second for the animation
output_path: Optional custom output path
Returns:
Path to the generated GIF file
Raises:
FileNotFoundError: If avatar base.png or mouth frames not found
"""
# Locate avatar folder and files
avatar_folder = avatars_dir / avatar_name
base_path = avatar_folder / "base.png"
mouth_frames_paths = sorted(avatar_folder.glob("mouth_*.png"))
# Validate avatar files exist
if not base_path.exists():
raise FileNotFoundError(f"Base image not found: {base_path}")
if not mouth_frames_paths:
raise FileNotFoundError(f"No mouth frames found in: {avatar_folder}")
# Load base image (the avatar face)
base_image = Image.open(base_path).convert("RGBA")
size = base_image.size
# Load all mouth frame images
mouth_frames = [
Image.open(path).convert("RGBA").resize(size)
for path in mouth_frames_paths
]
# Calculate chunk duration to match target FPS
chunk_ms = int(1000 / fps)
# Try to extract audio amplitude data
try:
rms_values = audio_to_rms_chunks(audio_path, chunk_ms=chunk_ms)
except Exception as e:
print(f"Primary audio processing failed: {e}")
print("Using fallback animation method...")
rms_values = audio_to_rms_chunks_simple(audio_path, chunk_ms=chunk_ms)
# Handle edge case of empty or invalid audio
if not rms_values or all(v == 0 for v in rms_values):
rms_values = audio_to_rms_chunks_simple(audio_path, chunk_ms=chunk_ms)
# Normalize RMS values to 0-1 range
max_rms = max(rms_values) if max(rms_values) > 0 else 1
# Generate animation frames
frames = []
num_mouth_frames = len(mouth_frames)
for rms in rms_values:
# Calculate mouth openness ratio (0 to 1)
ratio = rms / max_rms
# Map ratio to mouth frame index
mouth_index = int(ratio * (num_mouth_frames - 1))
mouth_index = max(0, min(mouth_index, num_mouth_frames - 1))
# Composite mouth onto base image
mouth = mouth_frames[mouth_index]
frame = Image.alpha_composite(base_image, mouth)
# Convert to RGB for GIF compatibility
frame_rgb = Image.new("RGB", frame.size, (255, 255, 255))
frame_rgb.paste(frame, mask=frame.split()[-1] if frame.mode == 'RGBA' else None)
frames.append(frame_rgb)
# Ensure output directory exists
output_dir.mkdir(parents=True, exist_ok=True)
# Generate output filename
if output_path is None:
timestamp = int(time.time() * 1000)
output_path = str(output_dir / f"lipsync_{timestamp}.gif")
# Save as animated GIF
if frames:
# Use imageio to save GIF
imageio.mimsave(
output_path,
frames,
fps=fps,
loop=0 # Loop forever
)
else:
raise ValueError("No frames generated for animation")
return output_path |