Update process_interview.py
Browse files- process_interview.py +73 -7
process_interview.py
CHANGED
|
@@ -127,39 +127,105 @@ speaker_model, nlp, tokenizer, llm_model = load_models()
|
|
| 127 |
|
| 128 |
# Audio processing functions
|
| 129 |
def preprocess_audio(audio_path: str, output_path: str) -> str:
|
| 130 |
-
"""Preprocess audio to improve quality before transcription.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
audio = AudioSegment.from_file(audio_path)
|
|
|
|
| 133 |
temp_wav = os.path.join(OUTPUT_DIR, f"temp_{uuid.uuid4()}.wav")
|
| 134 |
audio = audio.set_channels(1).set_frame_rate(16000)
|
| 135 |
audio = audio.normalize()
|
| 136 |
audio.export(temp_wav, format="wav")
|
|
|
|
|
|
|
| 137 |
y, sr = librosa.load(temp_wav, sr=16000)
|
| 138 |
-
|
| 139 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
os.remove(temp_wav)
|
| 141 |
return output_path
|
| 142 |
except Exception as e:
|
| 143 |
-
logger.error(f"Audio preprocessing failed: {str(e)}")
|
| 144 |
-
if os.path.exists(temp_wav):
|
| 145 |
os.remove(temp_wav)
|
| 146 |
raise
|
| 147 |
|
| 148 |
def convert_to_wav(audio_path: str, output_dir: str = OUTPUT_DIR) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
temp_path = os.path.join(output_dir, f"temp_{uuid.uuid4()}.wav")
|
| 151 |
preprocessed_path = preprocess_audio(audio_path, temp_path)
|
| 152 |
audio = AudioSegment.from_file(preprocessed_path)
|
| 153 |
if audio.channels > 1:
|
|
|
|
| 154 |
audio = audio.set_channels(1)
|
| 155 |
audio = audio.set_frame_rate(16000)
|
| 156 |
wav_file = os.path.join(output_dir, f"{uuid.uuid4()}.wav")
|
| 157 |
audio.export(wav_file, format="wav")
|
| 158 |
os.remove(temp_path)
|
|
|
|
| 159 |
return wav_file
|
| 160 |
except Exception as e:
|
| 161 |
-
logger.error(f"Audio conversion failed: {str(e)}")
|
| 162 |
-
if os.path.exists(temp_path):
|
| 163 |
os.remove(temp_path)
|
| 164 |
raise
|
| 165 |
|
|
|
|
| 127 |
|
| 128 |
# Audio processing functions
|
| 129 |
def preprocess_audio(audio_path: str, output_path: str) -> str:
|
| 130 |
+
"""Preprocess audio to improve quality before transcription.
|
| 131 |
+
|
| 132 |
+
Args:
|
| 133 |
+
audio_path (str): Path to the input audio file.
|
| 134 |
+
output_path (str): Path to save the preprocessed audio.
|
| 135 |
+
|
| 136 |
+
Returns:
|
| 137 |
+
str: Path to the preprocessed audio file.
|
| 138 |
+
|
| 139 |
+
Raises:
|
| 140 |
+
ValueError: If the input audio file is invalid or empty.
|
| 141 |
+
Exception: For other preprocessing errors.
|
| 142 |
+
"""
|
| 143 |
+
temp_wav = None
|
| 144 |
try:
|
| 145 |
+
# Validate input file
|
| 146 |
+
if not os.path.exists(audio_path):
|
| 147 |
+
logger.error(f"Input audio file {audio_path} does not exist")
|
| 148 |
+
raise ValueError(f"Audio file {audio_path} does not exist")
|
| 149 |
+
if os.path.getsize(audio_path) == 0:
|
| 150 |
+
logger.error(f"Input audio file {audio_path} is empty")
|
| 151 |
+
raise ValueError(f"Audio file {audio_path} is empty")
|
| 152 |
+
|
| 153 |
+
# Load and preprocess audio with pydub
|
| 154 |
audio = AudioSegment.from_file(audio_path)
|
| 155 |
+
logger.info(f"Input audio: {audio_path}, duration: {len(audio)/1000:.2f}s, channels: {audio.channels}")
|
| 156 |
temp_wav = os.path.join(OUTPUT_DIR, f"temp_{uuid.uuid4()}.wav")
|
| 157 |
audio = audio.set_channels(1).set_frame_rate(16000)
|
| 158 |
audio = audio.normalize()
|
| 159 |
audio.export(temp_wav, format="wav")
|
| 160 |
+
|
| 161 |
+
# Load audio with librosa for noise reduction
|
| 162 |
y, sr = librosa.load(temp_wav, sr=16000)
|
| 163 |
+
if len(y) == 0:
|
| 164 |
+
logger.error(f"Loaded audio {temp_wav} is empty")
|
| 165 |
+
raise ValueError("Empty audio after loading")
|
| 166 |
+
|
| 167 |
+
logger.info(f"Audio shape: {y.shape}, Sample rate: {sr}")
|
| 168 |
+
|
| 169 |
+
# Attempt noise reduction
|
| 170 |
+
try:
|
| 171 |
+
reduced_noise = nr.reduce_noise(y=y, sr=sr)
|
| 172 |
+
# Ensure reduced_noise is 1D for mono audio
|
| 173 |
+
if reduced_noise.ndim > 1:
|
| 174 |
+
logger.warning(f"Reduced noise has unexpected shape {reduced_noise.shape}, flattening to 1D")
|
| 175 |
+
reduced_noise = reduced_noise.flatten()
|
| 176 |
+
logger.info(f"Reduced noise shape: {reduced_noise.shape}")
|
| 177 |
+
sf.write(reduced_noise, output_path, sr)
|
| 178 |
+
except Exception as e:
|
| 179 |
+
logger.warning(f"Noise reduction failed for {audio_path}: {str(e)}. Using normalized audio.")
|
| 180 |
+
audio.export(output_path, format="wav") # Fallback to normalized audio
|
| 181 |
+
|
| 182 |
os.remove(temp_wav)
|
| 183 |
return output_path
|
| 184 |
except Exception as e:
|
| 185 |
+
logger.error(f"Audio preprocessing failed for {audio_path}: {str(e)}")
|
| 186 |
+
if temp_wav and os.path.exists(temp_wav):
|
| 187 |
os.remove(temp_wav)
|
| 188 |
raise
|
| 189 |
|
| 190 |
def convert_to_wav(audio_path: str, output_dir: str = OUTPUT_DIR) -> str:
|
| 191 |
+
"""Convert audio file to WAV format with preprocessing.
|
| 192 |
+
|
| 193 |
+
Args:
|
| 194 |
+
audio_path (str): Path to the input audio file.
|
| 195 |
+
output_dir (str): Directory to store the output WAV file.
|
| 196 |
+
|
| 197 |
+
Returns:
|
| 198 |
+
str: Path to the converted WAV file.
|
| 199 |
+
|
| 200 |
+
Raises:
|
| 201 |
+
ValueError: If the input audio file is invalid.
|
| 202 |
+
Exception: For other conversion errors.
|
| 203 |
+
"""
|
| 204 |
+
temp_path = None
|
| 205 |
try:
|
| 206 |
+
# Validate input file
|
| 207 |
+
if not os.path.exists(audio_path):
|
| 208 |
+
logger.error(f"Input audio file {audio_path} does not exist")
|
| 209 |
+
raise ValueError(f"Audio file {audio_path} does not exist")
|
| 210 |
+
if os.path.getsize(audio_path) == 0:
|
| 211 |
+
logger.error(f"Input audio file {audio_path} is empty")
|
| 212 |
+
raise ValueError(f"Audio file {audio_path} is empty")
|
| 213 |
+
|
| 214 |
temp_path = os.path.join(output_dir, f"temp_{uuid.uuid4()}.wav")
|
| 215 |
preprocessed_path = preprocess_audio(audio_path, temp_path)
|
| 216 |
audio = AudioSegment.from_file(preprocessed_path)
|
| 217 |
if audio.channels > 1:
|
| 218 |
+
logger.info(f"Converting {preprocessed_path} from {audio.channels} channels to mono")
|
| 219 |
audio = audio.set_channels(1)
|
| 220 |
audio = audio.set_frame_rate(16000)
|
| 221 |
wav_file = os.path.join(output_dir, f"{uuid.uuid4()}.wav")
|
| 222 |
audio.export(wav_file, format="wav")
|
| 223 |
os.remove(temp_path)
|
| 224 |
+
logger.info(f"Successfully converted {audio_path} to {wav_file}")
|
| 225 |
return wav_file
|
| 226 |
except Exception as e:
|
| 227 |
+
logger.error(f"Audio conversion failed for {audio_path}: {str(e)}")
|
| 228 |
+
if temp_path and os.path.exists(temp_path):
|
| 229 |
os.remove(temp_path)
|
| 230 |
raise
|
| 231 |
|