File size: 44,176 Bytes
a8189eb b5e3557 a8189eb b5e3557 a8189eb b5e3557 0992c47 a8189eb a812039 a8189eb 0992c47 f5c2623 a812039 f5c2623 b5e3557 a8189eb b5e3557 a8189eb a812039 f5c2623 a8189eb a812039 0992c47 a8189eb a812039 a8189eb f5c2623 a8189eb 90ea876 a8189eb f5c2623 b5e3557 b313b80 b5e3557 b313b80 b5e3557 b313b80 b5e3557 b313b80 b5e3557 8214b7d b5e3557 1822d3c b5e3557 8214b7d b5e3557 b313b80 0613be4 e459c2b 0613be4 3282c39 0613be4 04a7213 0613be4 749bfee 0613be4 e237e73 0613be4 e459c2b a06400e b313b80 8214b7d b313b80 0613be4 96c1034 0613be4 f5c2623 b313b80 f5c2623 a8189eb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 |
## ENVIRONMENT VARIABLES
# MODAL_VOLUME
# MODAL_TOKEN_ID
# MODAL_ENVIRONMENT
# MODAL_TOKEN_SECRET
import os
import cv2
import time
import uuid
import modal
import shutil
import logging
import gradio as gr
import numpy as np
import soundfile as sf
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
def _preprocess_audio_to_wav_pcm_mono(input_path: str) -> str:
"""
Convert the given audio file to a WAV file with PCM encoding and mono channel.
The original sampling rate is preserved (no resampling).
Returns the path to a temporary processed WAV file.
"""
try:
# Read audio with original sampling rate preserved
data, sr = sf.read(input_path, always_2d=True)
except Exception as e:
logger.error(f"Failed to read audio file '{input_path}': {e}")
raise
try:
# Downmix to mono by averaging channels (handles mono or multi-channel)
mono = data.mean(axis=1)
# Write as 16-bit PCM WAV to a temp path
out_path = f"/tmp/{uuid.uuid4().hex}.wav"
sf.write(out_path, mono, int(sr), subtype="PCM_16", format="WAV")
return out_path
except Exception as e:
logger.error(f"Failed to write processed WAV file for '{input_path}': {e}")
raise
def process_audio(original_audio_path, dubbed_audio_path, email, company_name, tolerance):
"""
This function processes the audio files, handling the logic for duration check,
file upload to presigned URLs, and triggering the processing.
"""
# 1. Check the duration of both audio files.
waveform_app = modal.App("Waveform-Matching")
modal_token_id = os.environ['MODAL_TOKEN_ID']
modal_token_secret = os.environ['MODAL_TOKEN_SECRET']
modal_environment = os.environ['MODAL_ENVIRONMENT']
modal_volume = os.environ['WAVEFORM_MODAL_VOLUME']
processing_id = str(int(time.time()))
# Preprocess audio files: WAV format, PCM encoding, mono, preserve original sampling rate
try:
processed_original = _preprocess_audio_to_wav_pcm_mono(original_audio_path)
processed_dubbed = _preprocess_audio_to_wav_pcm_mono(dubbed_audio_path)
except Exception as e:
logger.error(f"Error preprocessing audio files: {e}")
return "Error preprocessing audio files."
try:
bsodtv_storage = modal.Volume.from_name(modal_volume)
with bsodtv_storage.batch_upload() as batch:
batch.put_file(processed_original, f"/{processing_id}/original_audio.wav")
batch.put_file(processed_dubbed, f"/{processing_id}/dubbed_audio.wav")
except Exception as e:
logger.error(f"Error uploading audio files to Modal Storage: {e}")
return "Error uploading audio files to Cloud Storage."
finally:
# Cleanup temporary processed files
for p in [processed_original, processed_dubbed]:
try:
if p and os.path.exists(p):
os.remove(p)
except Exception:
pass
# 3. Call modal to trigger processing
try:
waveform_matching_function = modal.Function.from_name("Waveform-Matching", "reception_handler")
waveform_matching_function.spawn(
processing_id=processing_id,
original_file="/{}/original_audio.wav".format(processing_id),
dubbed_file="/{}/dubbed_audio.wav".format(processing_id),
email=email,
company_name=company_name,
tolerance_percentage=tolerance
)
except:
return "Error calling Outpost to trigger processing."
return "Processing started. Results will be emailed to you shortly."
def process_video(video_path, notes, email, company_name) -> str:
"""
Process the input video for content moderation using Modal.
Steps:
1. Upload the provided video to the configured Modal Volume.
2. Obtain the video dimensions (width, height).
3. Call the Content-Moderation reception_function via Modal (synchronously with .remote).
4. Download the processed video returned by the function to /tmp with a random UUID filename.
5. Return the local path to the downloaded video.
"""
# Validate inputs
if not video_path or not os.path.exists(video_path):
logger.error("Invalid video path provided to process_video.")
return "Invalid video path."
# Helper to obtain width and height
def _get_video_dimensions(path: str):
try:
# type: ignore
cap = cv2.VideoCapture(path)
if cap.isOpened():
width = int(cap.get(3))
height = int(cap.get(4))
cap.release()
except Exception as e:
logger.debug(f"OpenCV not available or failed to read video dimensions: {e}")
return width, height
try:
# 1. Setup Modal app and volume
_ = os.environ.get('MODAL_TOKEN_ID') # Read to ensure environment readiness (kept for parity with process_audio)
_ = os.environ.get('MODAL_TOKEN_SECRET')
_ = os.environ.get('MODAL_ENVIRONMENT')
modal_volume_name = os.environ['MODERATION_MODAL_VOLUME']
# Unique processing folder and paths
processing_id = str(int(time.time()))
ext = os.path.splitext(video_path)[1]
remote_input_path = f"/{processing_id}/input_video{ext}"
# 2. Upload video to Modal Volume
volume = modal.Volume.from_name(modal_volume_name)
try:
with volume.batch_upload() as batch:
batch.put_file(video_path, remote_input_path)
except Exception as e:
logger.error(f"Error uploading video to Modal Storage: {e}")
return "Error uploading video to Cloud Storage."
# 3. Obtain video dimensions
width, height = _get_video_dimensions(video_path)
# 4. Call Modal function synchronously
try:
moderation_function = modal.Function.from_name("Content-Moderation", "professional_reception_function")
moderation_function.spawn(
input_text=str(notes) if notes is not None else "",
video_path=remote_input_path,
size=(int(width), int(height)),
email=email,
company_name=company_name
)
except Exception as e:
logger.error(f"Error calling Modal reception_function: {e}")
return "Error calling Outpost to trigger processing."
return "Video Request Obtained"
except Exception as e:
logger.error(f"Unexpected error in process_video: {e}")
return "Unexpected error during video processing."
# Create a professional Gradio interface using the Golden ratio (1.618) for proportions
# Define custom CSS for a professional look
css = """
:root {
--main-bg-color: #111827;
--primary-color: #3B82F6;
--secondary-color: #60A5FA;
--text-color: #F9FAFB;
--text-secondary: #9CA3AF;
--card-bg: #1F2937;
--border-color: #374151;
--accent-blue: #3B82F6;
--accent-yellow: #FBBF24;
--accent-red: #EF4444;
--accent-green: #22C55E;
--border-radius: 8px;
--golden-ratio: 1.618;
--font-header: 'Barlow', sans-serif;
--font-body: 'Work Sans', sans-serif;
}
body {
font-family: var(--font-body);
background-color: var(--main-bg-color);
color: var(--text-color);
}
.container {
max-width: 100%;
margin: 0 auto;
padding: calc(20px * var(--golden-ratio));
background-color: var(--main-bg-color);
border-radius: calc(var(--border-radius) * var(--golden-ratio));
box-shadow: 0 10px 30px rgba(0, 0, 0, 0.3);
}
.logo-container {
display: flex;
justify-content: center;
margin-bottom: calc(20px * var(--golden-ratio));
padding: 15px;
background-color: var(--card-bg);
border-radius: var(--border-radius);
border: 1px solid var(--border-color);
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.2);
}
.logo {
max-width: 300px;
max-height: 100px;
transition: transform 0.3s ease;
display: block; /* Ensure it's a block element */
margin: 0 auto; /* This will center a block element within its flex container */
}
.logo:hover {
transform: scale(1.05);
}
.header {
text-align: center;
margin-bottom: calc(30px * var(--golden-ratio));
padding: calc(15px * var(--golden-ratio));
background: linear-gradient(135deg, var(--primary-color) 0%, var(--secondary-color) 100%);
color: white;
border-radius: var(--border-radius);
box-shadow: 0 4px 10px rgba(59, 130, 246, 0.3);
}
.header h1 {
color: white;
font-family: var(--font-header);
font-size: calc(1.5rem * var(--golden-ratio));
margin-bottom: calc(0.5rem * var(--golden-ratio));
text-shadow: 0 2px 4px rgba(0, 0, 0, 0.3);
font-weight: 600;
}
.header p {
color: rgba(255, 255, 255, 0.9);
font-size: 1rem;
max-width: calc(600px * var(--golden-ratio));
margin: 0 auto;
}
.input-section, .output-section {
background-color: var(--card-bg);
border: 1px solid var(--border-color);
border-radius: var(--border-radius);
padding: calc(20px * var(--golden-ratio));
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.2);
margin-bottom: 20px;
transition: all 0.3s ease;
}
.input-section:hover, .output-section:hover {
box-shadow: 0 10px 20px rgba(0, 0, 0, 0.3);
border-color: var(--primary-color);
}
.input-section {
flex: var(--golden-ratio);
}
.output-section {
flex: 1;
}
.footer {
text-align: center;
margin-top: calc(30px * var(--golden-ratio));
padding: 15px;
color: var(--text-secondary);
font-size: 0.9rem;
border-top: 1px solid var(--border-color);
}
/* Improve form elements */
.gradio-slider input[type=range] {
accent-color: var(--primary-color);
}
.gradio-textbox input, .gradio-textbox textarea {
background-color: var(--main-bg-color) !important;
border: 1px solid var(--border-color) !important;
border-radius: var(--border-radius) !important;
padding: 10px !important;
color: var(--text-color) !important;
transition: all 0.3s ease !important;
}
.gradio-textbox input:focus, .gradio-textbox textarea:focus {
border-color: var(--primary-color) !important;
box-shadow: 0 0 0 2px rgba(59, 130, 246, 0.3) !important;
}
.gradio-button {
background-color: var(--primary-color) !important;
color: white !important;
border-radius: var(--border-radius) !important;
padding: calc(10px * var(--golden-ratio)) calc(20px * var(--golden-ratio)) !important;
font-weight: 600 !important;
font-family: var(--font-header) !important;
transition: all 0.3s ease !important;
box-shadow: 0 4px 6px rgba(59, 130, 246, 0.3) !important;
border: none !important;
}
.gradio-button:hover {
background-color: var(--secondary-color) !important;
transform: translateY(-2px);
box-shadow: 0 6px 12px rgba(59, 130, 246, 0.4) !important;
}
/* Golden ratio spacing for elements */
.gradio-row {
margin-bottom: calc(16px * var(--golden-ratio)) !important;
}
/* Additional dark theme adjustments */
.gradio-container {
background-color: var(--main-bg-color) !important;
}
.gradio-form {
background-color: var(--card-bg) !important;
border: 1px solid var(--border-color) !important;
}
/* Labels and text styling */
label {
color: var(--text-color) !important;
font-family: var(--font-body) !important;
}
/* Responsive adjustments */
@media (max-width: 768px) {
.container {
padding: 15px;
}
.input-section, .output-section {
padding: 15px;
}
}
"""
# Create a Blocks interface for more customization
with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple")) as demo:
with gr.Column(elem_classes="container"):
# Header section
with gr.Column(elem_classes="header"):
gr.HTML("""
<img src="" class="logo" alt="Logo">
<h1 style="margin-top: 0;">BSOD.tv - Dub QC Demo</h1>
<p style="font-size: 1.1rem; line-height: 1.618;">
Professional audio synchronization verification for media localization.
<br>Upload original and dubbed .wav files to start the QC process.
</p>
""")
# Main content with specified layout
with gr.Tabs():
with gr.Tab("Dub Quality Control"):
# First Row: Half Original Audio Input, Half Dubbed Audio Input
with gr.Row(elem_classes="input-section"):
with gr.Column(scale=1):
original_audio = gr.Audio(type="filepath", label="Original .wav file", sources=['upload'],format="wav")
with gr.Column(scale=1):
dubbed_audio = gr.Audio(type="filepath", label="Dubbed .wav file", sources=['upload'],format="wav")
# Second Row: 2/3 Email Input 1/3 Company Name Input
with gr.Row(elem_classes="input-section"):
with gr.Column(scale=2):
_email = gr.Textbox(label="Email")
with gr.Column(scale=1):
_company_name = gr.Textbox(label="Company Name")
# Third Row: Tolerance Percentage
with gr.Row(elem_classes="input-section"):
_tolerance = gr.Slider(0, 100, value=5, label="Tolerance Percentage",
info="Set the tolerance for audio comparison.")
# Fourth Row: Processing Status
with gr.Row(elem_classes="output-section"):
output = gr.Text(label="Processing Status")
with gr.Row():
submit_btn = gr.Button("Process Audio", variant="primary")
with gr.Row():
gr.Markdown("### Results")
gr.Markdown("Once processing is complete, results will be emailed to the address provided.")
# Footer
with gr.Row(elem_classes="footer"):
gr.Markdown("© BSOD.tv - Professional Dub Quality Control")
with gr.Tab("Content Moderation"):
# First Row: Left Department Notes (Textbox), Right Video input
with gr.Row(elem_classes="input-section"):
with gr.Column(scale=1):
cm_notes = gr.Textbox(label="Department Notes", lines=6, placeholder="Enter notes for the moderation team...")
with gr.Column(scale=1):
cm_video_in = gr.Video(label="Video Input", sources=["upload"], interactive=True)
# Second Row: Email and Company Name (2/3 and 1/3 columns)
with gr.Row(elem_classes="input-section"):
with gr.Column(scale=2):
cm_email = gr.Textbox(label="Email")
with gr.Column(scale=1):
cm_company_name = gr.Textbox(label="Company Name")
# Third Row: Single Video Output
with gr.Row(elem_classes="output-section"):
cm_video_out = gr.Textbox(label="Output")
# Final Row: Process button
with gr.Row():
cm_process_btn = gr.Button("Process", variant="primary")
# Set up the processing function
submit_btn.click(
fn=process_audio,
inputs=[original_audio, dubbed_audio, _email, _company_name, _tolerance],
outputs=output
)
# Wire Content Moderation processing
cm_process_btn.click(
fn=process_video,
inputs=[cm_video_in, cm_notes, cm_email, cm_company_name],
outputs=cm_video_out
)
if __name__ == "__main__":
# To run this file locally, you'll need to install gradio and requests:
# pip install gradio requests
demo.launch()
|