systemofapwne
/

piper-de-glados

Model card Files Files and versions

piper-de-glados / 2_transcribe.py

systemofapwne's picture

Initial commit

072e103 about 1 year ago

history blame contribute delete

1.63 kB

	#!/usr/bin/env python3

	# Inspired by https://blog.networkchuck.com/posts/how-to-clone-a-voice/
	# Enhanced by https://github.com/SYSTRAN/faster-whisper

	import os
	from faster_whisper import WhisperModel

	# SELECT language and MODEL of the files you want to transcribe
	# I highly suggest to use a large model and to do this on GPU
	LANG="de"
	WHISPER_MODEL = "large-v3"

	# Run on GPU
	model = WhisperModel(WHISPER_MODEL, device="cuda", compute_type="float16")
	# or run on CPU with INT8 (will take ages)
	#model = WhisperModel(WHISPER_MODEL, device="cpu", compute_type="int8")

	# Path to the directory containing the audio files
	audio_dir = "./raw_good"
	output_csv = "./metadata.csv"

	# List all .wav files in the directory
	audio_files = [f for f in os.listdir(audio_dir) if f.endswith(".wav")]
	audio_files.sort() # Sort the files alphabetically (optional)

	# Open the CSV file for writing
	with open(output_csv, "w") as f:
	for audio_file in audio_files:
	# Full path to the audio file
	audio_path = os.path.join(audio_dir, audio_file)

	segments, info = model.transcribe(audio_path, language=LANG, beam_size=5)

	transcription = ""
	for seg in segments:
	transcription += " "+seg.text.strip()

	transcription = transcription.strip()

	# Write the filename (without .wav extension) and transcription to the CSV
	file_id = os.path.splitext(audio_file)[0] # Get file name without extension
	f.write(f"{file_id}\|{transcription}\n")
	print(f"{file_id}\|{transcription}")

	print(f"Transcriptions complete! Metadata saved to {output_csv}")