arabic-transcriber / preprocessing.py
hana92's picture
feat: use jiwer for evaluation
5bdc953
raw
history blame contribute delete
412 Bytes
# Function to preprocess audio: convert to 16kHz mono WAV
# def preprocess_audio(input_path):
# audio = AudioSegment.from_file(input_path)
# audio = audio.set_frame_rate(16000).set_channels(1)
# # Export to temp WAV file for Whisper input
# with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_wav:
# audio.export(tmp_wav.name, format="wav")
# return tmp_wav.name