| |
|
|
| |
| |
|
|
| import os |
| from faster_whisper import WhisperModel |
|
|
| |
| |
| LANG="de" |
| WHISPER_MODEL = "large-v3" |
|
|
| |
| model = WhisperModel(WHISPER_MODEL, device="cuda", compute_type="float16") |
| |
| |
|
|
| |
| audio_dir = "./raw_good" |
| output_csv = "./metadata.csv" |
|
|
| |
| audio_files = [f for f in os.listdir(audio_dir) if f.endswith(".wav")] |
| audio_files.sort() |
|
|
| |
| with open(output_csv, "w") as f: |
| for audio_file in audio_files: |
| |
| audio_path = os.path.join(audio_dir, audio_file) |
|
|
| segments, info = model.transcribe(audio_path, language=LANG, beam_size=5) |
| |
| transcription = "" |
| for seg in segments: |
| transcription += " "+seg.text.strip() |
| |
| transcription = transcription.strip() |
|
|
| |
| file_id = os.path.splitext(audio_file)[0] |
| f.write(f"{file_id}|{transcription}\n") |
| print(f"{file_id}|{transcription}") |
|
|
| print(f"Transcriptions complete! Metadata saved to {output_csv}") |
|
|