File size: 44,176 Bytes
a8189eb
 
 
 
 
 
 
 
b5e3557
a8189eb
b5e3557
a8189eb
b5e3557
0992c47
a8189eb
a812039
 
a8189eb
0992c47
 
f5c2623
a812039
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f5c2623
 
 
 
 
 
b5e3557
a8189eb
 
 
b5e3557
a8189eb
a812039
 
 
 
 
 
 
 
 
f5c2623
a8189eb
 
a812039
 
0992c47
 
a8189eb
a812039
 
 
 
 
 
 
 
a8189eb
f5c2623
a8189eb
 
 
 
90ea876
a8189eb
 
 
 
 
 
 
f5c2623
 
b5e3557
b313b80
b5e3557
 
 
 
 
 
 
b313b80
b5e3557
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b313b80
b5e3557
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b313b80
b5e3557
 
8214b7d
 
b5e3557
 
 
1822d3c
 
b5e3557
 
 
 
 
 
8214b7d
b5e3557
 
 
 
b313b80
0613be4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e459c2b
0613be4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3282c39
0613be4
 
04a7213
 
 
0613be4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
749bfee
0613be4
 
 
e237e73
0613be4
 
 
e459c2b
a06400e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b313b80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8214b7d
b313b80
 
 
 
0613be4
 
 
 
96c1034
0613be4
 
f5c2623
b313b80
 
 
 
 
 
 
f5c2623
 
 
a8189eb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
## ENVIRONMENT VARIABLES
# MODAL_VOLUME
# MODAL_TOKEN_ID
# MODAL_ENVIRONMENT
# MODAL_TOKEN_SECRET


import os
import cv2
import time
import uuid
import modal
import shutil
import logging
import gradio as gr
import numpy as np
import soundfile as sf

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)


def _preprocess_audio_to_wav_pcm_mono(input_path: str) -> str:
    """
    Convert the given audio file to a WAV file with PCM encoding and mono channel.
    The original sampling rate is preserved (no resampling).
    Returns the path to a temporary processed WAV file.
    """
    try:
        # Read audio with original sampling rate preserved
        data, sr = sf.read(input_path, always_2d=True)
    except Exception as e:
        logger.error(f"Failed to read audio file '{input_path}': {e}")
        raise

    try:
        # Downmix to mono by averaging channels (handles mono or multi-channel)
        mono = data.mean(axis=1)

        # Write as 16-bit PCM WAV to a temp path
        out_path = f"/tmp/{uuid.uuid4().hex}.wav"
        sf.write(out_path, mono, int(sr), subtype="PCM_16", format="WAV")
        return out_path
    except Exception as e:
        logger.error(f"Failed to write processed WAV file for '{input_path}': {e}")
        raise


def process_audio(original_audio_path, dubbed_audio_path, email, company_name, tolerance):
    """
    This function processes the audio files, handling the logic for duration check,
    file upload to presigned URLs, and triggering the processing.
    """
    # 1. Check the duration of both audio files.
    waveform_app = modal.App("Waveform-Matching")
    modal_token_id = os.environ['MODAL_TOKEN_ID']
    modal_token_secret = os.environ['MODAL_TOKEN_SECRET']
    modal_environment = os.environ['MODAL_ENVIRONMENT']
    modal_volume = os.environ['WAVEFORM_MODAL_VOLUME']
    processing_id = str(int(time.time()))

    # Preprocess audio files: WAV format, PCM encoding, mono, preserve original sampling rate
    try:
        processed_original = _preprocess_audio_to_wav_pcm_mono(original_audio_path)
        processed_dubbed = _preprocess_audio_to_wav_pcm_mono(dubbed_audio_path)
    except Exception as e:
        logger.error(f"Error preprocessing audio files: {e}")
        return "Error preprocessing audio files."

    try:
        bsodtv_storage = modal.Volume.from_name(modal_volume)
        with bsodtv_storage.batch_upload() as batch:
            batch.put_file(processed_original, f"/{processing_id}/original_audio.wav")
            batch.put_file(processed_dubbed, f"/{processing_id}/dubbed_audio.wav")
    except Exception as e:
        logger.error(f"Error uploading audio files to Modal Storage: {e}")
        return "Error uploading audio files to Cloud Storage."
    finally:
        # Cleanup temporary processed files
        for p in [processed_original, processed_dubbed]:
            try:
                if p and os.path.exists(p):
                    os.remove(p)
            except Exception:
                pass
    # 3. Call modal to trigger processing
    try:
        waveform_matching_function = modal.Function.from_name("Waveform-Matching", "reception_handler")
        waveform_matching_function.spawn(
            processing_id=processing_id,
            original_file="/{}/original_audio.wav".format(processing_id),
            dubbed_file="/{}/dubbed_audio.wav".format(processing_id),
            email=email,
            company_name=company_name,
            tolerance_percentage=tolerance
        )
    except:
        return "Error calling Outpost to trigger processing."
    return "Processing started. Results will be emailed to you shortly."


def process_video(video_path, notes, email, company_name) -> str:
    """
    Process the input video for content moderation using Modal.
    Steps:
      1. Upload the provided video to the configured Modal Volume.
      2. Obtain the video dimensions (width, height).
      3. Call the Content-Moderation reception_function via Modal (synchronously with .remote).
      4. Download the processed video returned by the function to /tmp with a random UUID filename.
      5. Return the local path to the downloaded video.
    """
    # Validate inputs
    if not video_path or not os.path.exists(video_path):
        logger.error("Invalid video path provided to process_video.")
        return "Invalid video path."

    # Helper to obtain width and height
    def _get_video_dimensions(path: str):
        try:
            # type: ignore
            cap = cv2.VideoCapture(path)
            if cap.isOpened():
                width = int(cap.get(3))
                height = int(cap.get(4))
                cap.release()
        except Exception as e:
            logger.debug(f"OpenCV not available or failed to read video dimensions: {e}")
        return width, height

    try:
        # 1. Setup Modal app and volume
        _ = os.environ.get('MODAL_TOKEN_ID')  # Read to ensure environment readiness (kept for parity with process_audio)
        _ = os.environ.get('MODAL_TOKEN_SECRET')
        _ = os.environ.get('MODAL_ENVIRONMENT')
        modal_volume_name = os.environ['MODERATION_MODAL_VOLUME']

        # Unique processing folder and paths
        processing_id = str(int(time.time()))
        ext = os.path.splitext(video_path)[1]
        remote_input_path = f"/{processing_id}/input_video{ext}"

        # 2. Upload video to Modal Volume
        volume = modal.Volume.from_name(modal_volume_name)
        try:
            with volume.batch_upload() as batch:
                batch.put_file(video_path, remote_input_path)
        except Exception as e:
            logger.error(f"Error uploading video to Modal Storage: {e}")
            return "Error uploading video to Cloud Storage."

        # 3. Obtain video dimensions
        width, height = _get_video_dimensions(video_path)

        # 4. Call Modal function synchronously
        try:
            moderation_function = modal.Function.from_name("Content-Moderation", "professional_reception_function")
            moderation_function.spawn(
                input_text=str(notes) if notes is not None else "",
                video_path=remote_input_path,
                size=(int(width), int(height)),
                email=email,
                company_name=company_name
            )
        except Exception as e:
            logger.error(f"Error calling Modal reception_function: {e}")
            return "Error calling Outpost to trigger processing."


        return "Video Request Obtained"

    except Exception as e:
        logger.error(f"Unexpected error in process_video: {e}")
        return "Unexpected error during video processing."

# Create a professional Gradio interface using the Golden ratio (1.618) for proportions
# Define custom CSS for a professional look
css = """
:root {
    --main-bg-color: #111827;
    --primary-color: #3B82F6;
    --secondary-color: #60A5FA;
    --text-color: #F9FAFB;
    --text-secondary: #9CA3AF;
    --card-bg: #1F2937;
    --border-color: #374151;
    --accent-blue: #3B82F6;
    --accent-yellow: #FBBF24;
    --accent-red: #EF4444;
    --accent-green: #22C55E;
    --border-radius: 8px;
    --golden-ratio: 1.618;
    --font-header: 'Barlow', sans-serif;
    --font-body: 'Work Sans', sans-serif;
}

body {
    font-family: var(--font-body);
    background-color: var(--main-bg-color);
    color: var(--text-color);
}

.container {
    max-width: 100%;
    margin: 0 auto;
    padding: calc(20px * var(--golden-ratio));
    background-color: var(--main-bg-color);
    border-radius: calc(var(--border-radius) * var(--golden-ratio));
    box-shadow: 0 10px 30px rgba(0, 0, 0, 0.3);
}

.logo-container {
    display: flex;
    justify-content: center;
    margin-bottom: calc(20px * var(--golden-ratio));
    padding: 15px;
    background-color: var(--card-bg);
    border-radius: var(--border-radius);
    border: 1px solid var(--border-color);
    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.2);
}

.logo {
    max-width: 300px;
    max-height: 100px;
    transition: transform 0.3s ease;
    display: block; /* Ensure it's a block element */
    margin: 0 auto; /* This will center a block element within its flex container */

}

.logo:hover {
    transform: scale(1.05);
}

.header {
    text-align: center;
    margin-bottom: calc(30px * var(--golden-ratio));
    padding: calc(15px * var(--golden-ratio));
    background: linear-gradient(135deg, var(--primary-color) 0%, var(--secondary-color) 100%);
    color: white;
    border-radius: var(--border-radius);
    box-shadow: 0 4px 10px rgba(59, 130, 246, 0.3);
}

.header h1 {
    color: white;
    font-family: var(--font-header);
    font-size: calc(1.5rem * var(--golden-ratio));
    margin-bottom: calc(0.5rem * var(--golden-ratio));
    text-shadow: 0 2px 4px rgba(0, 0, 0, 0.3);
    font-weight: 600;
}

.header p {
    color: rgba(255, 255, 255, 0.9);
    font-size: 1rem;
    max-width: calc(600px * var(--golden-ratio));
    margin: 0 auto;
}

.input-section, .output-section {
    background-color: var(--card-bg);
    border: 1px solid var(--border-color);
    border-radius: var(--border-radius);
    padding: calc(20px * var(--golden-ratio));
    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.2);
    margin-bottom: 20px;
    transition: all 0.3s ease;
}

.input-section:hover, .output-section:hover {
    box-shadow: 0 10px 20px rgba(0, 0, 0, 0.3);
    border-color: var(--primary-color);
}

.input-section {
    flex: var(--golden-ratio);
}

.output-section {
    flex: 1;
}

.footer {
    text-align: center;
    margin-top: calc(30px * var(--golden-ratio));
    padding: 15px;
    color: var(--text-secondary);
    font-size: 0.9rem;
    border-top: 1px solid var(--border-color);
}

/* Improve form elements */
.gradio-slider input[type=range] {
    accent-color: var(--primary-color);
}

.gradio-textbox input, .gradio-textbox textarea {
    background-color: var(--main-bg-color) !important;
    border: 1px solid var(--border-color) !important;
    border-radius: var(--border-radius) !important;
    padding: 10px !important;
    color: var(--text-color) !important;
    transition: all 0.3s ease !important;
}

.gradio-textbox input:focus, .gradio-textbox textarea:focus {
    border-color: var(--primary-color) !important;
    box-shadow: 0 0 0 2px rgba(59, 130, 246, 0.3) !important;
}

.gradio-button {
    background-color: var(--primary-color) !important;
    color: white !important;
    border-radius: var(--border-radius) !important;
    padding: calc(10px * var(--golden-ratio)) calc(20px * var(--golden-ratio)) !important;
    font-weight: 600 !important;
    font-family: var(--font-header) !important;
    transition: all 0.3s ease !important;
    box-shadow: 0 4px 6px rgba(59, 130, 246, 0.3) !important;
    border: none !important;
}

.gradio-button:hover {
    background-color: var(--secondary-color) !important;
    transform: translateY(-2px);
    box-shadow: 0 6px 12px rgba(59, 130, 246, 0.4) !important;
}

/* Golden ratio spacing for elements */
.gradio-row {
    margin-bottom: calc(16px * var(--golden-ratio)) !important;
}

/* Additional dark theme adjustments */
.gradio-container {
    background-color: var(--main-bg-color) !important;
}

.gradio-form {
    background-color: var(--card-bg) !important;
    border: 1px solid var(--border-color) !important;
}

/* Labels and text styling */
label {
    color: var(--text-color) !important;
    font-family: var(--font-body) !important;
}

/* Responsive adjustments */
@media (max-width: 768px) {
    .container {
        padding: 15px;
    }

    .input-section, .output-section {
        padding: 15px;
    }
}
"""

# Create a Blocks interface for more customization
with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple")) as demo:
    with gr.Column(elem_classes="container"):
        # Header section
        with gr.Column(elem_classes="header"):
            gr.HTML("""
                <img src="" class="logo" alt="Logo">
                <h1 style="margin-top: 0;">BSOD.tv - Dub QC Demo</h1>
                <p style="font-size: 1.1rem; line-height: 1.618;">
                    Professional audio synchronization verification for media localization.
                    <br>Upload original and dubbed .wav files to start the QC process.
                </p>
            """)

        # Main content with specified layout
        with gr.Tabs():
            with gr.Tab("Dub Quality Control"):
                # First Row: Half Original Audio Input, Half Dubbed Audio Input
                with gr.Row(elem_classes="input-section"):
                    with gr.Column(scale=1):
                        original_audio = gr.Audio(type="filepath", label="Original .wav file", sources=['upload'],format="wav")
                    with gr.Column(scale=1):
                        dubbed_audio = gr.Audio(type="filepath", label="Dubbed .wav file", sources=['upload'],format="wav")

                # Second Row: 2/3 Email Input 1/3 Company Name Input
                with gr.Row(elem_classes="input-section"):
                    with gr.Column(scale=2):
                        _email = gr.Textbox(label="Email")
                    with gr.Column(scale=1):
                        _company_name = gr.Textbox(label="Company Name")

                # Third Row: Tolerance Percentage
                with gr.Row(elem_classes="input-section"):
                    _tolerance = gr.Slider(0, 100, value=5, label="Tolerance Percentage",
                                           info="Set the tolerance for audio comparison.")

                # Fourth Row: Processing Status
                with gr.Row(elem_classes="output-section"):
                    output = gr.Text(label="Processing Status")

                with gr.Row():
                    submit_btn = gr.Button("Process Audio", variant="primary")

                with gr.Row():
                    gr.Markdown("### Results")
                    gr.Markdown("Once processing is complete, results will be emailed to the address provided.")

                # Footer
                with gr.Row(elem_classes="footer"):
                    gr.Markdown("© BSOD.tv - Professional Dub Quality Control")
            with gr.Tab("Content Moderation"):
                # First Row: Left Department Notes (Textbox), Right Video input
                with gr.Row(elem_classes="input-section"):
                    with gr.Column(scale=1):
                        cm_notes = gr.Textbox(label="Department Notes", lines=6, placeholder="Enter notes for the moderation team...")
                    with gr.Column(scale=1):
                        cm_video_in = gr.Video(label="Video Input", sources=["upload"], interactive=True)

                # Second Row: Email and Company Name (2/3 and 1/3 columns)
                with gr.Row(elem_classes="input-section"):
                    with gr.Column(scale=2):
                        cm_email = gr.Textbox(label="Email")
                    with gr.Column(scale=1):
                        cm_company_name = gr.Textbox(label="Company Name")

                # Third Row: Single Video Output
                with gr.Row(elem_classes="output-section"):
                    cm_video_out = gr.Textbox(label="Output")

                # Final Row: Process button
                with gr.Row():
                    cm_process_btn = gr.Button("Process", variant="primary")

    # Set up the processing function
    submit_btn.click(
        fn=process_audio,
        inputs=[original_audio, dubbed_audio, _email, _company_name, _tolerance],
        outputs=output
    )

    # Wire Content Moderation processing
    cm_process_btn.click(
        fn=process_video,
        inputs=[cm_video_in, cm_notes, cm_email, cm_company_name],
        outputs=cm_video_out
    )

if __name__ == "__main__":
    # To run this file locally, you'll need to install gradio and requests:
    # pip install gradio requests
    demo.launch()