Spaces:

bsod-tv
/

Localization-Quality-Control

Sleeping

App Files Files

denizaybey commited on Oct 2, 2025

Commit

bef288f

2 Parent(s): 14da500 a812039

Merge branch 'pre-processing-audio' into 'main'

Browse files

Add email and company_name parameters to Modal reception function

Files changed (2) hide show

app.py +48 -2
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -13,10 +13,39 @@ import modal
 import shutil
 import logging
 import gradio as gr
 logging.basicConfig(level=logging.DEBUG)
 logger = logging.getLogger(__name__)
 def process_audio(original_audio_path, dubbed_audio_path, email, company_name, tolerance):
     """
     This function processes the audio files, handling the logic for duration check,
@@ -29,14 +58,31 @@ def process_audio(original_audio_path, dubbed_audio_path, email, company_name, t
     modal_environment = os.environ['MODAL_ENVIRONMENT']
     modal_volume = os.environ['WAVEFORM_MODAL_VOLUME']
     processing_id = str(int(time.time()))
     try:
         bsodtv_storage = modal.Volume.from_name(modal_volume)
         with bsodtv_storage.batch_upload() as batch:
-            batch.put_file(original_audio_path, "/{}/original_audio.wav".format(processing_id))
-            batch.put_file(dubbed_audio_path, "/{}/dubbed_audio.wav".format(processing_id))
     except Exception as e:
         logger.error(f"Error uploading audio files to Modal Storage: {e}")
         return "Error uploading audio files to Cloud Storage."
     # 3. Call modal to trigger processing
     try:
         waveform_matching_function = modal.Function.from_name("Waveform-Matching", "reception_handler")

 import shutil
 import logging
 import gradio as gr
+import numpy as np
+import soundfile as sf
 logging.basicConfig(level=logging.DEBUG)
 logger = logging.getLogger(__name__)
+def _preprocess_audio_to_wav_pcm_mono(input_path: str) -> str:
+    """
+    Convert the given audio file to a WAV file with PCM encoding and mono channel.
+    The original sampling rate is preserved (no resampling).
+    Returns the path to a temporary processed WAV file.
+    """
+    try:
+        # Read audio with original sampling rate preserved
+        data, sr = sf.read(input_path, always_2d=True)
+    except Exception as e:
+        logger.error(f"Failed to read audio file '{input_path}': {e}")
+        raise
+    try:
+        # Downmix to mono by averaging channels (handles mono or multi-channel)
+        mono = data.mean(axis=1)
+        # Write as 16-bit PCM WAV to a temp path
+        out_path = f"/tmp/{uuid.uuid4().hex}.wav"
+        sf.write(out_path, mono, int(sr), subtype="PCM_16", format="WAV")
+        return out_path
+    except Exception as e:
+        logger.error(f"Failed to write processed WAV file for '{input_path}': {e}")
+        raise
 def process_audio(original_audio_path, dubbed_audio_path, email, company_name, tolerance):
     """
     This function processes the audio files, handling the logic for duration check,
     modal_environment = os.environ['MODAL_ENVIRONMENT']
     modal_volume = os.environ['WAVEFORM_MODAL_VOLUME']
     processing_id = str(int(time.time()))
+    # Preprocess audio files: WAV format, PCM encoding, mono, preserve original sampling rate
+    try:
+        processed_original = _preprocess_audio_to_wav_pcm_mono(original_audio_path)
+        processed_dubbed = _preprocess_audio_to_wav_pcm_mono(dubbed_audio_path)
+    except Exception as e:
+        logger.error(f"Error preprocessing audio files: {e}")
+        return "Error preprocessing audio files."
     try:
         bsodtv_storage = modal.Volume.from_name(modal_volume)
         with bsodtv_storage.batch_upload() as batch:
+            batch.put_file(processed_original, f"/{processing_id}/original_audio.wav")
+            batch.put_file(processed_dubbed, f"/{processing_id}/dubbed_audio.wav")
     except Exception as e:
         logger.error(f"Error uploading audio files to Modal Storage: {e}")
         return "Error uploading audio files to Cloud Storage."
+    finally:
+        # Cleanup temporary processed files
+        for p in [processed_original, processed_dubbed]:
+            try:
+                if p and os.path.exists(p):
+                    os.remove(p)
+            except Exception:
+                pass
     # 3. Call modal to trigger processing
     try:
         waveform_matching_function = modal.Function.from_name("Waveform-Matching", "reception_handler")

requirements.txt CHANGED Viewed

@@ -1,3 +1,5 @@
 modal
 gradio
-opencv-python-headless

+numpy
 modal
 gradio
+soundfile
+opencv-python-headless