Spaces:

norhan12
/

nono

Sleeping

App Files Files Community

norhan12 commited on Jun 11, 2025

Commit

69c67cd

verified ·

1 Parent(s): b655de4

Update process_interview.py

Browse files

Files changed (1) hide show

process_interview.py +73 -7

process_interview.py CHANGED Viewed

@@ -127,39 +127,105 @@ speaker_model, nlp, tokenizer, llm_model = load_models()
 # Audio processing functions
 def preprocess_audio(audio_path: str, output_path: str) -> str:
-    """Preprocess audio to improve quality before transcription."""
     try:
         audio = AudioSegment.from_file(audio_path)
         temp_wav = os.path.join(OUTPUT_DIR, f"temp_{uuid.uuid4()}.wav")
         audio = audio.set_channels(1).set_frame_rate(16000)
         audio = audio.normalize()
         audio.export(temp_wav, format="wav")
         y, sr = librosa.load(temp_wav, sr=16000)
-        reduced_noise = nr.reduce_noise(y=y, sr=sr)
-        sf.write(reduced_noise, output_path, sr)
         os.remove(temp_wav)
         return output_path
     except Exception as e:
-        logger.error(f"Audio preprocessing failed: {str(e)}")
-        if os.path.exists(temp_wav):
             os.remove(temp_wav)
         raise
 def convert_to_wav(audio_path: str, output_dir: str = OUTPUT_DIR) -> str:
     try:
         temp_path = os.path.join(output_dir, f"temp_{uuid.uuid4()}.wav")
         preprocessed_path = preprocess_audio(audio_path, temp_path)
         audio = AudioSegment.from_file(preprocessed_path)
         if audio.channels > 1:
             audio = audio.set_channels(1)
         audio = audio.set_frame_rate(16000)
         wav_file = os.path.join(output_dir, f"{uuid.uuid4()}.wav")
         audio.export(wav_file, format="wav")
         os.remove(temp_path)
         return wav_file
     except Exception as e:
-        logger.error(f"Audio conversion failed: {str(e)}")
-        if os.path.exists(temp_path):
             os.remove(temp_path)
         raise

 # Audio processing functions
 def preprocess_audio(audio_path: str, output_path: str) -> str:
+    """Preprocess audio to improve quality before transcription.
+    Args:
+        audio_path (str): Path to the input audio file.
+        output_path (str): Path to save the preprocessed audio.
+    Returns:
+        str: Path to the preprocessed audio file.
+    Raises:
+        ValueError: If the input audio file is invalid or empty.
+        Exception: For other preprocessing errors.
+    """
+    temp_wav = None
     try:
+        # Validate input file
+        if not os.path.exists(audio_path):
+            logger.error(f"Input audio file {audio_path} does not exist")
+            raise ValueError(f"Audio file {audio_path} does not exist")
+        if os.path.getsize(audio_path) == 0:
+            logger.error(f"Input audio file {audio_path} is empty")
+            raise ValueError(f"Audio file {audio_path} is empty")
+        # Load and preprocess audio with pydub
         audio = AudioSegment.from_file(audio_path)
+        logger.info(f"Input audio: {audio_path}, duration: {len(audio)/1000:.2f}s, channels: {audio.channels}")
         temp_wav = os.path.join(OUTPUT_DIR, f"temp_{uuid.uuid4()}.wav")
         audio = audio.set_channels(1).set_frame_rate(16000)
         audio = audio.normalize()
         audio.export(temp_wav, format="wav")
+        # Load audio with librosa for noise reduction
         y, sr = librosa.load(temp_wav, sr=16000)
+        if len(y) == 0:
+            logger.error(f"Loaded audio {temp_wav} is empty")
+            raise ValueError("Empty audio after loading")
+        logger.info(f"Audio shape: {y.shape}, Sample rate: {sr}")
+        # Attempt noise reduction
+        try:
+            reduced_noise = nr.reduce_noise(y=y, sr=sr)
+            # Ensure reduced_noise is 1D for mono audio
+            if reduced_noise.ndim > 1:
+                logger.warning(f"Reduced noise has unexpected shape {reduced_noise.shape}, flattening to 1D")
+                reduced_noise = reduced_noise.flatten()
+            logger.info(f"Reduced noise shape: {reduced_noise.shape}")
+            sf.write(reduced_noise, output_path, sr)
+        except Exception as e:
+            logger.warning(f"Noise reduction failed for {audio_path}: {str(e)}. Using normalized audio.")
+            audio.export(output_path, format="wav")  # Fallback to normalized audio
         os.remove(temp_wav)
         return output_path
     except Exception as e:
+        logger.error(f"Audio preprocessing failed for {audio_path}: {str(e)}")
+        if temp_wav and os.path.exists(temp_wav):
             os.remove(temp_wav)
         raise
 def convert_to_wav(audio_path: str, output_dir: str = OUTPUT_DIR) -> str:
+    """Convert audio file to WAV format with preprocessing.
+    Args:
+        audio_path (str): Path to the input audio file.
+        output_dir (str): Directory to store the output WAV file.
+    Returns:
+        str: Path to the converted WAV file.
+    Raises:
+        ValueError: If the input audio file is invalid.
+        Exception: For other conversion errors.
+    """
+    temp_path = None
     try:
+        # Validate input file
+        if not os.path.exists(audio_path):
+            logger.error(f"Input audio file {audio_path} does not exist")
+            raise ValueError(f"Audio file {audio_path} does not exist")
+        if os.path.getsize(audio_path) == 0:
+            logger.error(f"Input audio file {audio_path} is empty")
+            raise ValueError(f"Audio file {audio_path} is empty")
         temp_path = os.path.join(output_dir, f"temp_{uuid.uuid4()}.wav")
         preprocessed_path = preprocess_audio(audio_path, temp_path)
         audio = AudioSegment.from_file(preprocessed_path)
         if audio.channels > 1:
+            logger.info(f"Converting {preprocessed_path} from {audio.channels} channels to mono")
             audio = audio.set_channels(1)
         audio = audio.set_frame_rate(16000)
         wav_file = os.path.join(output_dir, f"{uuid.uuid4()}.wav")
         audio.export(wav_file, format="wav")
         os.remove(temp_path)
+        logger.info(f"Successfully converted {audio_path} to {wav_file}")
         return wav_file
     except Exception as e:
+        logger.error(f"Audio conversion failed for {audio_path}: {str(e)}")
+        if temp_path and os.path.exists(temp_path):
             os.remove(temp_path)
         raise