Merge branch 'pre-processing-audio' into 'main'
Browse filesAdd email and company_name parameters to Modal reception function
- app.py +48 -2
- requirements.txt +3 -1
app.py
CHANGED
|
@@ -13,10 +13,39 @@ import modal
|
|
| 13 |
import shutil
|
| 14 |
import logging
|
| 15 |
import gradio as gr
|
|
|
|
|
|
|
| 16 |
|
| 17 |
logging.basicConfig(level=logging.DEBUG)
|
| 18 |
logger = logging.getLogger(__name__)
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
def process_audio(original_audio_path, dubbed_audio_path, email, company_name, tolerance):
|
| 21 |
"""
|
| 22 |
This function processes the audio files, handling the logic for duration check,
|
|
@@ -29,14 +58,31 @@ def process_audio(original_audio_path, dubbed_audio_path, email, company_name, t
|
|
| 29 |
modal_environment = os.environ['MODAL_ENVIRONMENT']
|
| 30 |
modal_volume = os.environ['WAVEFORM_MODAL_VOLUME']
|
| 31 |
processing_id = str(int(time.time()))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
try:
|
| 33 |
bsodtv_storage = modal.Volume.from_name(modal_volume)
|
| 34 |
with bsodtv_storage.batch_upload() as batch:
|
| 35 |
-
batch.put_file(
|
| 36 |
-
batch.put_file(
|
| 37 |
except Exception as e:
|
| 38 |
logger.error(f"Error uploading audio files to Modal Storage: {e}")
|
| 39 |
return "Error uploading audio files to Cloud Storage."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
# 3. Call modal to trigger processing
|
| 41 |
try:
|
| 42 |
waveform_matching_function = modal.Function.from_name("Waveform-Matching", "reception_handler")
|
|
|
|
| 13 |
import shutil
|
| 14 |
import logging
|
| 15 |
import gradio as gr
|
| 16 |
+
import numpy as np
|
| 17 |
+
import soundfile as sf
|
| 18 |
|
| 19 |
logging.basicConfig(level=logging.DEBUG)
|
| 20 |
logger = logging.getLogger(__name__)
|
| 21 |
|
| 22 |
+
|
| 23 |
+
def _preprocess_audio_to_wav_pcm_mono(input_path: str) -> str:
|
| 24 |
+
"""
|
| 25 |
+
Convert the given audio file to a WAV file with PCM encoding and mono channel.
|
| 26 |
+
The original sampling rate is preserved (no resampling).
|
| 27 |
+
Returns the path to a temporary processed WAV file.
|
| 28 |
+
"""
|
| 29 |
+
try:
|
| 30 |
+
# Read audio with original sampling rate preserved
|
| 31 |
+
data, sr = sf.read(input_path, always_2d=True)
|
| 32 |
+
except Exception as e:
|
| 33 |
+
logger.error(f"Failed to read audio file '{input_path}': {e}")
|
| 34 |
+
raise
|
| 35 |
+
|
| 36 |
+
try:
|
| 37 |
+
# Downmix to mono by averaging channels (handles mono or multi-channel)
|
| 38 |
+
mono = data.mean(axis=1)
|
| 39 |
+
|
| 40 |
+
# Write as 16-bit PCM WAV to a temp path
|
| 41 |
+
out_path = f"/tmp/{uuid.uuid4().hex}.wav"
|
| 42 |
+
sf.write(out_path, mono, int(sr), subtype="PCM_16", format="WAV")
|
| 43 |
+
return out_path
|
| 44 |
+
except Exception as e:
|
| 45 |
+
logger.error(f"Failed to write processed WAV file for '{input_path}': {e}")
|
| 46 |
+
raise
|
| 47 |
+
|
| 48 |
+
|
| 49 |
def process_audio(original_audio_path, dubbed_audio_path, email, company_name, tolerance):
|
| 50 |
"""
|
| 51 |
This function processes the audio files, handling the logic for duration check,
|
|
|
|
| 58 |
modal_environment = os.environ['MODAL_ENVIRONMENT']
|
| 59 |
modal_volume = os.environ['WAVEFORM_MODAL_VOLUME']
|
| 60 |
processing_id = str(int(time.time()))
|
| 61 |
+
|
| 62 |
+
# Preprocess audio files: WAV format, PCM encoding, mono, preserve original sampling rate
|
| 63 |
+
try:
|
| 64 |
+
processed_original = _preprocess_audio_to_wav_pcm_mono(original_audio_path)
|
| 65 |
+
processed_dubbed = _preprocess_audio_to_wav_pcm_mono(dubbed_audio_path)
|
| 66 |
+
except Exception as e:
|
| 67 |
+
logger.error(f"Error preprocessing audio files: {e}")
|
| 68 |
+
return "Error preprocessing audio files."
|
| 69 |
+
|
| 70 |
try:
|
| 71 |
bsodtv_storage = modal.Volume.from_name(modal_volume)
|
| 72 |
with bsodtv_storage.batch_upload() as batch:
|
| 73 |
+
batch.put_file(processed_original, f"/{processing_id}/original_audio.wav")
|
| 74 |
+
batch.put_file(processed_dubbed, f"/{processing_id}/dubbed_audio.wav")
|
| 75 |
except Exception as e:
|
| 76 |
logger.error(f"Error uploading audio files to Modal Storage: {e}")
|
| 77 |
return "Error uploading audio files to Cloud Storage."
|
| 78 |
+
finally:
|
| 79 |
+
# Cleanup temporary processed files
|
| 80 |
+
for p in [processed_original, processed_dubbed]:
|
| 81 |
+
try:
|
| 82 |
+
if p and os.path.exists(p):
|
| 83 |
+
os.remove(p)
|
| 84 |
+
except Exception:
|
| 85 |
+
pass
|
| 86 |
# 3. Call modal to trigger processing
|
| 87 |
try:
|
| 88 |
waveform_matching_function = modal.Function.from_name("Waveform-Matching", "reception_handler")
|
requirements.txt
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
|
|
| 1 |
modal
|
| 2 |
gradio
|
| 3 |
-
|
|
|
|
|
|
| 1 |
+
numpy
|
| 2 |
modal
|
| 3 |
gradio
|
| 4 |
+
soundfile
|
| 5 |
+
opencv-python-headless
|