## ENVIRONMENT VARIABLES
# MODAL_VOLUME
# MODAL_TOKEN_ID
# MODAL_ENVIRONMENT
# MODAL_TOKEN_SECRET
import os
import cv2
import time
import uuid
import modal
import shutil
import logging
import gradio as gr
import numpy as np
import soundfile as sf
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
def _preprocess_audio_to_wav_pcm_mono(input_path: str) -> str:
"""
Convert the given audio file to a WAV file with PCM encoding and mono channel.
The original sampling rate is preserved (no resampling).
Returns the path to a temporary processed WAV file.
"""
try:
# Read audio with original sampling rate preserved
data, sr = sf.read(input_path, always_2d=True)
except Exception as e:
logger.error(f"Failed to read audio file '{input_path}': {e}")
raise
try:
# Downmix to mono by averaging channels (handles mono or multi-channel)
mono = data.mean(axis=1)
# Write as 16-bit PCM WAV to a temp path
out_path = f"/tmp/{uuid.uuid4().hex}.wav"
sf.write(out_path, mono, int(sr), subtype="PCM_16", format="WAV")
return out_path
except Exception as e:
logger.error(f"Failed to write processed WAV file for '{input_path}': {e}")
raise
def process_audio(original_audio_path, dubbed_audio_path, email, company_name, tolerance):
"""
This function processes the audio files, handling the logic for duration check,
file upload to presigned URLs, and triggering the processing.
"""
# 1. Check the duration of both audio files.
waveform_app = modal.App("Waveform-Matching")
modal_token_id = os.environ['MODAL_TOKEN_ID']
modal_token_secret = os.environ['MODAL_TOKEN_SECRET']
modal_environment = os.environ['MODAL_ENVIRONMENT']
modal_volume = os.environ['WAVEFORM_MODAL_VOLUME']
processing_id = str(int(time.time()))
# Preprocess audio files: WAV format, PCM encoding, mono, preserve original sampling rate
try:
processed_original = _preprocess_audio_to_wav_pcm_mono(original_audio_path)
processed_dubbed = _preprocess_audio_to_wav_pcm_mono(dubbed_audio_path)
except Exception as e:
logger.error(f"Error preprocessing audio files: {e}")
return "Error preprocessing audio files."
try:
bsodtv_storage = modal.Volume.from_name(modal_volume)
with bsodtv_storage.batch_upload() as batch:
batch.put_file(processed_original, f"/{processing_id}/original_audio.wav")
batch.put_file(processed_dubbed, f"/{processing_id}/dubbed_audio.wav")
except Exception as e:
logger.error(f"Error uploading audio files to Modal Storage: {e}")
return "Error uploading audio files to Cloud Storage."
finally:
# Cleanup temporary processed files
for p in [processed_original, processed_dubbed]:
try:
if p and os.path.exists(p):
os.remove(p)
except Exception:
pass
# 3. Call modal to trigger processing
try:
waveform_matching_function = modal.Function.from_name("Waveform-Matching", "reception_handler")
waveform_matching_function.spawn(
processing_id=processing_id,
original_file="/{}/original_audio.wav".format(processing_id),
dubbed_file="/{}/dubbed_audio.wav".format(processing_id),
email=email,
company_name=company_name,
tolerance_percentage=tolerance
)
except:
return "Error calling Outpost to trigger processing."
return "Processing started. Results will be emailed to you shortly."
def process_video(video_path, notes, email, company_name) -> str:
"""
Process the input video for content moderation using Modal.
Steps:
1. Upload the provided video to the configured Modal Volume.
2. Obtain the video dimensions (width, height).
3. Call the Content-Moderation reception_function via Modal (synchronously with .remote).
4. Download the processed video returned by the function to /tmp with a random UUID filename.
5. Return the local path to the downloaded video.
"""
# Validate inputs
if not video_path or not os.path.exists(video_path):
logger.error("Invalid video path provided to process_video.")
return "Invalid video path."
# Helper to obtain width and height
def _get_video_dimensions(path: str):
try:
# type: ignore
cap = cv2.VideoCapture(path)
if cap.isOpened():
width = int(cap.get(3))
height = int(cap.get(4))
cap.release()
except Exception as e:
logger.debug(f"OpenCV not available or failed to read video dimensions: {e}")
return width, height
try:
# 1. Setup Modal app and volume
_ = os.environ.get('MODAL_TOKEN_ID') # Read to ensure environment readiness (kept for parity with process_audio)
_ = os.environ.get('MODAL_TOKEN_SECRET')
_ = os.environ.get('MODAL_ENVIRONMENT')
modal_volume_name = os.environ['MODERATION_MODAL_VOLUME']
# Unique processing folder and paths
processing_id = str(int(time.time()))
ext = os.path.splitext(video_path)[1]
remote_input_path = f"/{processing_id}/input_video{ext}"
# 2. Upload video to Modal Volume
volume = modal.Volume.from_name(modal_volume_name)
try:
with volume.batch_upload() as batch:
batch.put_file(video_path, remote_input_path)
except Exception as e:
logger.error(f"Error uploading video to Modal Storage: {e}")
return "Error uploading video to Cloud Storage."
# 3. Obtain video dimensions
width, height = _get_video_dimensions(video_path)
# 4. Call Modal function synchronously
try:
moderation_function = modal.Function.from_name("Content-Moderation", "professional_reception_function")
moderation_function.spawn(
input_text=str(notes) if notes is not None else "",
video_path=remote_input_path,
size=(int(width), int(height)),
email=email,
company_name=company_name
)
except Exception as e:
logger.error(f"Error calling Modal reception_function: {e}")
return "Error calling Outpost to trigger processing."
return "Video Request Obtained"
except Exception as e:
logger.error(f"Unexpected error in process_video: {e}")
return "Unexpected error during video processing."
# Create a professional Gradio interface using the Golden ratio (1.618) for proportions
# Define custom CSS for a professional look
css = """
:root {
--main-bg-color: #111827;
--primary-color: #3B82F6;
--secondary-color: #60A5FA;
--text-color: #F9FAFB;
--text-secondary: #9CA3AF;
--card-bg: #1F2937;
--border-color: #374151;
--accent-blue: #3B82F6;
--accent-yellow: #FBBF24;
--accent-red: #EF4444;
--accent-green: #22C55E;
--border-radius: 8px;
--golden-ratio: 1.618;
--font-header: 'Barlow', sans-serif;
--font-body: 'Work Sans', sans-serif;
}
body {
font-family: var(--font-body);
background-color: var(--main-bg-color);
color: var(--text-color);
}
.container {
max-width: 100%;
margin: 0 auto;
padding: calc(20px * var(--golden-ratio));
background-color: var(--main-bg-color);
border-radius: calc(var(--border-radius) * var(--golden-ratio));
box-shadow: 0 10px 30px rgba(0, 0, 0, 0.3);
}
.logo-container {
display: flex;
justify-content: center;
margin-bottom: calc(20px * var(--golden-ratio));
padding: 15px;
background-color: var(--card-bg);
border-radius: var(--border-radius);
border: 1px solid var(--border-color);
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.2);
}
.logo {
max-width: 300px;
max-height: 100px;
transition: transform 0.3s ease;
display: block; /* Ensure it's a block element */
margin: 0 auto; /* This will center a block element within its flex container */
}
.logo:hover {
transform: scale(1.05);
}
.header {
text-align: center;
margin-bottom: calc(30px * var(--golden-ratio));
padding: calc(15px * var(--golden-ratio));
background: linear-gradient(135deg, var(--primary-color) 0%, var(--secondary-color) 100%);
color: white;
border-radius: var(--border-radius);
box-shadow: 0 4px 10px rgba(59, 130, 246, 0.3);
}
.header h1 {
color: white;
font-family: var(--font-header);
font-size: calc(1.5rem * var(--golden-ratio));
margin-bottom: calc(0.5rem * var(--golden-ratio));
text-shadow: 0 2px 4px rgba(0, 0, 0, 0.3);
font-weight: 600;
}
.header p {
color: rgba(255, 255, 255, 0.9);
font-size: 1rem;
max-width: calc(600px * var(--golden-ratio));
margin: 0 auto;
}
.input-section, .output-section {
background-color: var(--card-bg);
border: 1px solid var(--border-color);
border-radius: var(--border-radius);
padding: calc(20px * var(--golden-ratio));
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.2);
margin-bottom: 20px;
transition: all 0.3s ease;
}
.input-section:hover, .output-section:hover {
box-shadow: 0 10px 20px rgba(0, 0, 0, 0.3);
border-color: var(--primary-color);
}
.input-section {
flex: var(--golden-ratio);
}
.output-section {
flex: 1;
}
.footer {
text-align: center;
margin-top: calc(30px * var(--golden-ratio));
padding: 15px;
color: var(--text-secondary);
font-size: 0.9rem;
border-top: 1px solid var(--border-color);
}
/* Improve form elements */
.gradio-slider input[type=range] {
accent-color: var(--primary-color);
}
.gradio-textbox input, .gradio-textbox textarea {
background-color: var(--main-bg-color) !important;
border: 1px solid var(--border-color) !important;
border-radius: var(--border-radius) !important;
padding: 10px !important;
color: var(--text-color) !important;
transition: all 0.3s ease !important;
}
.gradio-textbox input:focus, .gradio-textbox textarea:focus {
border-color: var(--primary-color) !important;
box-shadow: 0 0 0 2px rgba(59, 130, 246, 0.3) !important;
}
.gradio-button {
background-color: var(--primary-color) !important;
color: white !important;
border-radius: var(--border-radius) !important;
padding: calc(10px * var(--golden-ratio)) calc(20px * var(--golden-ratio)) !important;
font-weight: 600 !important;
font-family: var(--font-header) !important;
transition: all 0.3s ease !important;
box-shadow: 0 4px 6px rgba(59, 130, 246, 0.3) !important;
border: none !important;
}
.gradio-button:hover {
background-color: var(--secondary-color) !important;
transform: translateY(-2px);
box-shadow: 0 6px 12px rgba(59, 130, 246, 0.4) !important;
}
/* Golden ratio spacing for elements */
.gradio-row {
margin-bottom: calc(16px * var(--golden-ratio)) !important;
}
/* Additional dark theme adjustments */
.gradio-container {
background-color: var(--main-bg-color) !important;
}
.gradio-form {
background-color: var(--card-bg) !important;
border: 1px solid var(--border-color) !important;
}
/* Labels and text styling */
label {
color: var(--text-color) !important;
font-family: var(--font-body) !important;
}
/* Responsive adjustments */
@media (max-width: 768px) {
.container {
padding: 15px;
}
.input-section, .output-section {
padding: 15px;
}
}
"""
# Create a Blocks interface for more customization
with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple")) as demo:
with gr.Column(elem_classes="container"):
# Header section
with gr.Column(elem_classes="header"):
gr.HTML("""
Professional audio synchronization verification for media localization.
Upload original and dubbed .wav files to start the QC process.