norhan12 commited on
Commit
69c67cd
·
verified ·
1 Parent(s): b655de4

Update process_interview.py

Browse files
Files changed (1) hide show
  1. process_interview.py +73 -7
process_interview.py CHANGED
@@ -127,39 +127,105 @@ speaker_model, nlp, tokenizer, llm_model = load_models()
127
 
128
  # Audio processing functions
129
  def preprocess_audio(audio_path: str, output_path: str) -> str:
130
- """Preprocess audio to improve quality before transcription."""
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  try:
 
 
 
 
 
 
 
 
 
132
  audio = AudioSegment.from_file(audio_path)
 
133
  temp_wav = os.path.join(OUTPUT_DIR, f"temp_{uuid.uuid4()}.wav")
134
  audio = audio.set_channels(1).set_frame_rate(16000)
135
  audio = audio.normalize()
136
  audio.export(temp_wav, format="wav")
 
 
137
  y, sr = librosa.load(temp_wav, sr=16000)
138
- reduced_noise = nr.reduce_noise(y=y, sr=sr)
139
- sf.write(reduced_noise, output_path, sr)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  os.remove(temp_wav)
141
  return output_path
142
  except Exception as e:
143
- logger.error(f"Audio preprocessing failed: {str(e)}")
144
- if os.path.exists(temp_wav):
145
  os.remove(temp_wav)
146
  raise
147
 
148
  def convert_to_wav(audio_path: str, output_dir: str = OUTPUT_DIR) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  try:
 
 
 
 
 
 
 
 
150
  temp_path = os.path.join(output_dir, f"temp_{uuid.uuid4()}.wav")
151
  preprocessed_path = preprocess_audio(audio_path, temp_path)
152
  audio = AudioSegment.from_file(preprocessed_path)
153
  if audio.channels > 1:
 
154
  audio = audio.set_channels(1)
155
  audio = audio.set_frame_rate(16000)
156
  wav_file = os.path.join(output_dir, f"{uuid.uuid4()}.wav")
157
  audio.export(wav_file, format="wav")
158
  os.remove(temp_path)
 
159
  return wav_file
160
  except Exception as e:
161
- logger.error(f"Audio conversion failed: {str(e)}")
162
- if os.path.exists(temp_path):
163
  os.remove(temp_path)
164
  raise
165
 
 
127
 
128
  # Audio processing functions
129
  def preprocess_audio(audio_path: str, output_path: str) -> str:
130
+ """Preprocess audio to improve quality before transcription.
131
+
132
+ Args:
133
+ audio_path (str): Path to the input audio file.
134
+ output_path (str): Path to save the preprocessed audio.
135
+
136
+ Returns:
137
+ str: Path to the preprocessed audio file.
138
+
139
+ Raises:
140
+ ValueError: If the input audio file is invalid or empty.
141
+ Exception: For other preprocessing errors.
142
+ """
143
+ temp_wav = None
144
  try:
145
+ # Validate input file
146
+ if not os.path.exists(audio_path):
147
+ logger.error(f"Input audio file {audio_path} does not exist")
148
+ raise ValueError(f"Audio file {audio_path} does not exist")
149
+ if os.path.getsize(audio_path) == 0:
150
+ logger.error(f"Input audio file {audio_path} is empty")
151
+ raise ValueError(f"Audio file {audio_path} is empty")
152
+
153
+ # Load and preprocess audio with pydub
154
  audio = AudioSegment.from_file(audio_path)
155
+ logger.info(f"Input audio: {audio_path}, duration: {len(audio)/1000:.2f}s, channels: {audio.channels}")
156
  temp_wav = os.path.join(OUTPUT_DIR, f"temp_{uuid.uuid4()}.wav")
157
  audio = audio.set_channels(1).set_frame_rate(16000)
158
  audio = audio.normalize()
159
  audio.export(temp_wav, format="wav")
160
+
161
+ # Load audio with librosa for noise reduction
162
  y, sr = librosa.load(temp_wav, sr=16000)
163
+ if len(y) == 0:
164
+ logger.error(f"Loaded audio {temp_wav} is empty")
165
+ raise ValueError("Empty audio after loading")
166
+
167
+ logger.info(f"Audio shape: {y.shape}, Sample rate: {sr}")
168
+
169
+ # Attempt noise reduction
170
+ try:
171
+ reduced_noise = nr.reduce_noise(y=y, sr=sr)
172
+ # Ensure reduced_noise is 1D for mono audio
173
+ if reduced_noise.ndim > 1:
174
+ logger.warning(f"Reduced noise has unexpected shape {reduced_noise.shape}, flattening to 1D")
175
+ reduced_noise = reduced_noise.flatten()
176
+ logger.info(f"Reduced noise shape: {reduced_noise.shape}")
177
+ sf.write(reduced_noise, output_path, sr)
178
+ except Exception as e:
179
+ logger.warning(f"Noise reduction failed for {audio_path}: {str(e)}. Using normalized audio.")
180
+ audio.export(output_path, format="wav") # Fallback to normalized audio
181
+
182
  os.remove(temp_wav)
183
  return output_path
184
  except Exception as e:
185
+ logger.error(f"Audio preprocessing failed for {audio_path}: {str(e)}")
186
+ if temp_wav and os.path.exists(temp_wav):
187
  os.remove(temp_wav)
188
  raise
189
 
190
  def convert_to_wav(audio_path: str, output_dir: str = OUTPUT_DIR) -> str:
191
+ """Convert audio file to WAV format with preprocessing.
192
+
193
+ Args:
194
+ audio_path (str): Path to the input audio file.
195
+ output_dir (str): Directory to store the output WAV file.
196
+
197
+ Returns:
198
+ str: Path to the converted WAV file.
199
+
200
+ Raises:
201
+ ValueError: If the input audio file is invalid.
202
+ Exception: For other conversion errors.
203
+ """
204
+ temp_path = None
205
  try:
206
+ # Validate input file
207
+ if not os.path.exists(audio_path):
208
+ logger.error(f"Input audio file {audio_path} does not exist")
209
+ raise ValueError(f"Audio file {audio_path} does not exist")
210
+ if os.path.getsize(audio_path) == 0:
211
+ logger.error(f"Input audio file {audio_path} is empty")
212
+ raise ValueError(f"Audio file {audio_path} is empty")
213
+
214
  temp_path = os.path.join(output_dir, f"temp_{uuid.uuid4()}.wav")
215
  preprocessed_path = preprocess_audio(audio_path, temp_path)
216
  audio = AudioSegment.from_file(preprocessed_path)
217
  if audio.channels > 1:
218
+ logger.info(f"Converting {preprocessed_path} from {audio.channels} channels to mono")
219
  audio = audio.set_channels(1)
220
  audio = audio.set_frame_rate(16000)
221
  wav_file = os.path.join(output_dir, f"{uuid.uuid4()}.wav")
222
  audio.export(wav_file, format="wav")
223
  os.remove(temp_path)
224
+ logger.info(f"Successfully converted {audio_path} to {wav_file}")
225
  return wav_file
226
  except Exception as e:
227
+ logger.error(f"Audio conversion failed for {audio_path}: {str(e)}")
228
+ if temp_path and os.path.exists(temp_path):
229
  os.remove(temp_path)
230
  raise
231