## ENVIRONMENT VARIABLES # MODAL_VOLUME # MODAL_TOKEN_ID # MODAL_ENVIRONMENT # MODAL_TOKEN_SECRET import os import cv2 import time import uuid import modal import shutil import logging import gradio as gr import numpy as np import soundfile as sf logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) def _preprocess_audio_to_wav_pcm_mono(input_path: str) -> str: """ Convert the given audio file to a WAV file with PCM encoding and mono channel. The original sampling rate is preserved (no resampling). Returns the path to a temporary processed WAV file. """ try: # Read audio with original sampling rate preserved data, sr = sf.read(input_path, always_2d=True) except Exception as e: logger.error(f"Failed to read audio file '{input_path}': {e}") raise try: # Downmix to mono by averaging channels (handles mono or multi-channel) mono = data.mean(axis=1) # Write as 16-bit PCM WAV to a temp path out_path = f"/tmp/{uuid.uuid4().hex}.wav" sf.write(out_path, mono, int(sr), subtype="PCM_16", format="WAV") return out_path except Exception as e: logger.error(f"Failed to write processed WAV file for '{input_path}': {e}") raise def process_audio(original_audio_path, dubbed_audio_path, email, company_name, tolerance): """ This function processes the audio files, handling the logic for duration check, file upload to presigned URLs, and triggering the processing. """ # 1. Check the duration of both audio files. waveform_app = modal.App("Waveform-Matching") modal_token_id = os.environ['MODAL_TOKEN_ID'] modal_token_secret = os.environ['MODAL_TOKEN_SECRET'] modal_environment = os.environ['MODAL_ENVIRONMENT'] modal_volume = os.environ['WAVEFORM_MODAL_VOLUME'] processing_id = str(int(time.time())) # Preprocess audio files: WAV format, PCM encoding, mono, preserve original sampling rate try: processed_original = _preprocess_audio_to_wav_pcm_mono(original_audio_path) processed_dubbed = _preprocess_audio_to_wav_pcm_mono(dubbed_audio_path) except Exception as e: logger.error(f"Error preprocessing audio files: {e}") return "Error preprocessing audio files." try: bsodtv_storage = modal.Volume.from_name(modal_volume) with bsodtv_storage.batch_upload() as batch: batch.put_file(processed_original, f"/{processing_id}/original_audio.wav") batch.put_file(processed_dubbed, f"/{processing_id}/dubbed_audio.wav") except Exception as e: logger.error(f"Error uploading audio files to Modal Storage: {e}") return "Error uploading audio files to Cloud Storage." finally: # Cleanup temporary processed files for p in [processed_original, processed_dubbed]: try: if p and os.path.exists(p): os.remove(p) except Exception: pass # 3. Call modal to trigger processing try: waveform_matching_function = modal.Function.from_name("Waveform-Matching", "reception_handler") waveform_matching_function.spawn( processing_id=processing_id, original_file="/{}/original_audio.wav".format(processing_id), dubbed_file="/{}/dubbed_audio.wav".format(processing_id), email=email, company_name=company_name, tolerance_percentage=tolerance ) except: return "Error calling Outpost to trigger processing." return "Processing started. Results will be emailed to you shortly." def process_video(video_path, notes, email, company_name) -> str: """ Process the input video for content moderation using Modal. Steps: 1. Upload the provided video to the configured Modal Volume. 2. Obtain the video dimensions (width, height). 3. Call the Content-Moderation reception_function via Modal (synchronously with .remote). 4. Download the processed video returned by the function to /tmp with a random UUID filename. 5. Return the local path to the downloaded video. """ # Validate inputs if not video_path or not os.path.exists(video_path): logger.error("Invalid video path provided to process_video.") return "Invalid video path." # Helper to obtain width and height def _get_video_dimensions(path: str): try: # type: ignore cap = cv2.VideoCapture(path) if cap.isOpened(): width = int(cap.get(3)) height = int(cap.get(4)) cap.release() except Exception as e: logger.debug(f"OpenCV not available or failed to read video dimensions: {e}") return width, height try: # 1. Setup Modal app and volume _ = os.environ.get('MODAL_TOKEN_ID') # Read to ensure environment readiness (kept for parity with process_audio) _ = os.environ.get('MODAL_TOKEN_SECRET') _ = os.environ.get('MODAL_ENVIRONMENT') modal_volume_name = os.environ['MODERATION_MODAL_VOLUME'] # Unique processing folder and paths processing_id = str(int(time.time())) ext = os.path.splitext(video_path)[1] remote_input_path = f"/{processing_id}/input_video{ext}" # 2. Upload video to Modal Volume volume = modal.Volume.from_name(modal_volume_name) try: with volume.batch_upload() as batch: batch.put_file(video_path, remote_input_path) except Exception as e: logger.error(f"Error uploading video to Modal Storage: {e}") return "Error uploading video to Cloud Storage." # 3. Obtain video dimensions width, height = _get_video_dimensions(video_path) # 4. Call Modal function synchronously try: moderation_function = modal.Function.from_name("Content-Moderation", "professional_reception_function") moderation_function.spawn( input_text=str(notes) if notes is not None else "", video_path=remote_input_path, size=(int(width), int(height)), email=email, company_name=company_name ) except Exception as e: logger.error(f"Error calling Modal reception_function: {e}") return "Error calling Outpost to trigger processing." return "Video Request Obtained" except Exception as e: logger.error(f"Unexpected error in process_video: {e}") return "Unexpected error during video processing." # Create a professional Gradio interface using the Golden ratio (1.618) for proportions # Define custom CSS for a professional look css = """ :root { --main-bg-color: #111827; --primary-color: #3B82F6; --secondary-color: #60A5FA; --text-color: #F9FAFB; --text-secondary: #9CA3AF; --card-bg: #1F2937; --border-color: #374151; --accent-blue: #3B82F6; --accent-yellow: #FBBF24; --accent-red: #EF4444; --accent-green: #22C55E; --border-radius: 8px; --golden-ratio: 1.618; --font-header: 'Barlow', sans-serif; --font-body: 'Work Sans', sans-serif; } body { font-family: var(--font-body); background-color: var(--main-bg-color); color: var(--text-color); } .container { max-width: 100%; margin: 0 auto; padding: calc(20px * var(--golden-ratio)); background-color: var(--main-bg-color); border-radius: calc(var(--border-radius) * var(--golden-ratio)); box-shadow: 0 10px 30px rgba(0, 0, 0, 0.3); } .logo-container { display: flex; justify-content: center; margin-bottom: calc(20px * var(--golden-ratio)); padding: 15px; background-color: var(--card-bg); border-radius: var(--border-radius); border: 1px solid var(--border-color); box-shadow: 0 4px 6px rgba(0, 0, 0, 0.2); } .logo { max-width: 300px; max-height: 100px; transition: transform 0.3s ease; display: block; /* Ensure it's a block element */ margin: 0 auto; /* This will center a block element within its flex container */ } .logo:hover { transform: scale(1.05); } .header { text-align: center; margin-bottom: calc(30px * var(--golden-ratio)); padding: calc(15px * var(--golden-ratio)); background: linear-gradient(135deg, var(--primary-color) 0%, var(--secondary-color) 100%); color: white; border-radius: var(--border-radius); box-shadow: 0 4px 10px rgba(59, 130, 246, 0.3); } .header h1 { color: white; font-family: var(--font-header); font-size: calc(1.5rem * var(--golden-ratio)); margin-bottom: calc(0.5rem * var(--golden-ratio)); text-shadow: 0 2px 4px rgba(0, 0, 0, 0.3); font-weight: 600; } .header p { color: rgba(255, 255, 255, 0.9); font-size: 1rem; max-width: calc(600px * var(--golden-ratio)); margin: 0 auto; } .input-section, .output-section { background-color: var(--card-bg); border: 1px solid var(--border-color); border-radius: var(--border-radius); padding: calc(20px * var(--golden-ratio)); box-shadow: 0 4px 6px rgba(0, 0, 0, 0.2); margin-bottom: 20px; transition: all 0.3s ease; } .input-section:hover, .output-section:hover { box-shadow: 0 10px 20px rgba(0, 0, 0, 0.3); border-color: var(--primary-color); } .input-section { flex: var(--golden-ratio); } .output-section { flex: 1; } .footer { text-align: center; margin-top: calc(30px * var(--golden-ratio)); padding: 15px; color: var(--text-secondary); font-size: 0.9rem; border-top: 1px solid var(--border-color); } /* Improve form elements */ .gradio-slider input[type=range] { accent-color: var(--primary-color); } .gradio-textbox input, .gradio-textbox textarea { background-color: var(--main-bg-color) !important; border: 1px solid var(--border-color) !important; border-radius: var(--border-radius) !important; padding: 10px !important; color: var(--text-color) !important; transition: all 0.3s ease !important; } .gradio-textbox input:focus, .gradio-textbox textarea:focus { border-color: var(--primary-color) !important; box-shadow: 0 0 0 2px rgba(59, 130, 246, 0.3) !important; } .gradio-button { background-color: var(--primary-color) !important; color: white !important; border-radius: var(--border-radius) !important; padding: calc(10px * var(--golden-ratio)) calc(20px * var(--golden-ratio)) !important; font-weight: 600 !important; font-family: var(--font-header) !important; transition: all 0.3s ease !important; box-shadow: 0 4px 6px rgba(59, 130, 246, 0.3) !important; border: none !important; } .gradio-button:hover { background-color: var(--secondary-color) !important; transform: translateY(-2px); box-shadow: 0 6px 12px rgba(59, 130, 246, 0.4) !important; } /* Golden ratio spacing for elements */ .gradio-row { margin-bottom: calc(16px * var(--golden-ratio)) !important; } /* Additional dark theme adjustments */ .gradio-container { background-color: var(--main-bg-color) !important; } .gradio-form { background-color: var(--card-bg) !important; border: 1px solid var(--border-color) !important; } /* Labels and text styling */ label { color: var(--text-color) !important; font-family: var(--font-body) !important; } /* Responsive adjustments */ @media (max-width: 768px) { .container { padding: 15px; } .input-section, .output-section { padding: 15px; } } """ # Create a Blocks interface for more customization with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple")) as demo: with gr.Column(elem_classes="container"): # Header section with gr.Column(elem_classes="header"): gr.HTML("""

BSOD.tv - Dub QC Demo

Professional audio synchronization verification for media localization.
Upload original and dubbed .wav files to start the QC process.

""") # Main content with specified layout with gr.Tabs(): with gr.Tab("Dub Quality Control"): # First Row: Half Original Audio Input, Half Dubbed Audio Input with gr.Row(elem_classes="input-section"): with gr.Column(scale=1): original_audio = gr.Audio(type="filepath", label="Original .wav file", sources=['upload'],format="wav") with gr.Column(scale=1): dubbed_audio = gr.Audio(type="filepath", label="Dubbed .wav file", sources=['upload'],format="wav") # Second Row: 2/3 Email Input 1/3 Company Name Input with gr.Row(elem_classes="input-section"): with gr.Column(scale=2): _email = gr.Textbox(label="Email") with gr.Column(scale=1): _company_name = gr.Textbox(label="Company Name") # Third Row: Tolerance Percentage with gr.Row(elem_classes="input-section"): _tolerance = gr.Slider(0, 100, value=5, label="Tolerance Percentage", info="Set the tolerance for audio comparison.") # Fourth Row: Processing Status with gr.Row(elem_classes="output-section"): output = gr.Text(label="Processing Status") with gr.Row(): submit_btn = gr.Button("Process Audio", variant="primary") with gr.Row(): gr.Markdown("### Results") gr.Markdown("Once processing is complete, results will be emailed to the address provided.") # Footer with gr.Row(elem_classes="footer"): gr.Markdown("© BSOD.tv - Professional Dub Quality Control") with gr.Tab("Content Moderation"): # First Row: Left Department Notes (Textbox), Right Video input with gr.Row(elem_classes="input-section"): with gr.Column(scale=1): cm_notes = gr.Textbox(label="Department Notes", lines=6, placeholder="Enter notes for the moderation team...") with gr.Column(scale=1): cm_video_in = gr.Video(label="Video Input", sources=["upload"], interactive=True) # Second Row: Email and Company Name (2/3 and 1/3 columns) with gr.Row(elem_classes="input-section"): with gr.Column(scale=2): cm_email = gr.Textbox(label="Email") with gr.Column(scale=1): cm_company_name = gr.Textbox(label="Company Name") # Third Row: Single Video Output with gr.Row(elem_classes="output-section"): cm_video_out = gr.Textbox(label="Output") # Final Row: Process button with gr.Row(): cm_process_btn = gr.Button("Process", variant="primary") # Set up the processing function submit_btn.click( fn=process_audio, inputs=[original_audio, dubbed_audio, _email, _company_name, _tolerance], outputs=output ) # Wire Content Moderation processing cm_process_btn.click( fn=process_video, inputs=[cm_video_in, cm_notes, cm_email, cm_company_name], outputs=cm_video_out ) if __name__ == "__main__": # To run this file locally, you'll need to install gradio and requests: # pip install gradio requests demo.launch()