whisper_medium_ct2 / my_lyric_transcript.py

Upload my_lyric_transcript.py

2193982 verified about 1 year ago

5.61 kB

	#!pip install ctranslate2==4.4.0
	#!pip install faster_whisper==1.1.1
	#!pip install -q praat-textgrids
	#!pip install -q huggingface_hub &> /dev/null

	import os
	import textgrids
	import csv

	#############################################################################def

	def whisper_transcribe(model,sound_file,out_dir):
	"""
	"""
	#import os
	#import textgrids
	#import csv


	try:
	segments, _ = model.transcribe(sound_file,vad_filter=True,word_timestamps=True)
	data = list(segments)
	transcription = data[0].text
	#print(transcription)

	file = open('words.csv',"w")
	for one in data[0].words:
	#print(f'{one.word}:{one.start}-{one.end}')
	line = f'{one.word.strip()};{one.start};{one.end}\n'
	file.write(line)
	file.close()

	new_txtgrid = textgrids.TextGrid()
	new_txtgrid.tier_from_csv('words','words.csv')
	new_txtgrid.xmax=one.end

	file_name = os.path.basename(sound_file) # eg: 'demo.txt'
	name = os.path.splitext(file_name)[0] # eg: 'demo'

	# save .textgrid
	new_txtgrid.write(f'{out_dir}/{name}.TextGrid')

	# save .lab
	f = open(f'{out_dir}/{name}.lab','w')
	f.write(transcription)
	f.close()

	except:
	pass


	#############################################################################def

	def _whisper_transcribe(model,sound_file,out_dir):
	"""
	"""
	#import os
	#import textgrids
	#import csv


	try:
	segments, _ = model.transcribe(sound_file,vad_filter=True,word_timestamps=True)
	data = list(segments)
	transcription = data[0].text
	#print(transcription)

	file_name = os.path.basename(sound_file) # eg: 'demo.txt'
	name = os.path.splitext(file_name)[0] # eg: 'demo'

	# save .lab
	f = open(f'{out_dir}/{name}.lab','w')
	f.write(transcription)
	f.close()

	except:
	pass


	#############################################################################def
	def whisper_folder(model,in_dir,out_dir):
	"""
	from faster_whisper import WhisperModel
	model = WhisperModel('/content/whisper_convert', device="cuda", compute_type="int8_float16")

	"""
	import os
	import glob
	from tqdm import tqdm

	os.makedirs(out_dir, exist_ok=True)
	files = glob.glob(f'{in_dir}/.mp3') + glob.glob(f'{in_dir}/.wav')

	for sound_file in tqdm(files):
	try:
	whisper_transcribe(model,sound_file,out_dir)
	except:
	print(f'error {file}')
	pass

	#############################################################################def
	import os
	import textgrids
	from faster_whisper import WhisperModel
	from concurrent.futures import ThreadPoolExecutor
	from tqdm import tqdm

	def load_models_on_gpus(model_path, num_gpus, compute_type="float16"):
	"""
	Nạp mô hình faster-whisper lên từng GPU và trả về danh sách các mô hình.
	"""
	models = []
	for gpu_id in range(num_gpus):
	os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)
	model = WhisperModel(model_path, device="cuda", compute_type=compute_type)
	models.append((gpu_id, model))
	return models

	def process_file(model, sound_file, out_dir, gpu_id):
	"""
	Hàm xử lý một file trên một GPU cụ thể.
	"""
	os.makedirs(out_dir, exist_ok=True)
	try:
	os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)
	segments, _ = model.transcribe(sound_file, vad_filter=True, word_timestamps=True)
	data = list(segments)
	transcription = data[0].text

	file = open('words.csv', "w")
	for one in data[0].words:
	line = f'{one.word.strip()};{one.start};{one.end}\n'
	file.write(line)
	file.close()

	new_txtgrid = textgrids.TextGrid()
	new_txtgrid.tier_from_csv('words', 'words.csv')
	new_txtgrid.xmax = data[-1].end

	file_name = os.path.basename(sound_file)
	name = os.path.splitext(file_name)[0]

	new_txtgrid.write(f'{out_dir}/{name}.TextGrid')
	with open(f'{out_dir}/{name}.lab', 'w') as f:
	f.write(transcription)

	except Exception as e:
	print(f"Lỗi khi xử lý {sound_file} trên GPU {gpu_id}: {e}")

	def whisper_transcribe_parallel(models, sound_files, out_dir):
	"""
	Hàm chính để xử lý song song trên nhiều GPU với ThreadPoolExecutor.
	"""
	with ThreadPoolExecutor(max_workers=len(models)) as executor:
	futures = []
	for i, sound_file in enumerate(sound_files):
	gpu_id, model = models[i % len(models)]
	futures.append(executor.submit(process_file, model, sound_file, out_dir, gpu_id))

	# Hiển thị thanh tiến trình với tqdm
	for _ in tqdm(futures, desc="Đang xử lý"):
	_.result() # Đợi kết quả

	#############################################################################def

	def folder_upload(repo_id,local_folder,hub_folder,repo_type,hub_token):
	"""
	repo_type: "model", "dataset", "space"

	"""
	import os
	import glob
	from huggingface_hub import Repository, create_repo
	from huggingface_hub import upload_folder

	# create new repo if not existing
	create_repo(repo_id, repo_type=repo_type, exist_ok=True, token=hub_token)

	upload_folder(
	repo_id=repo_id,
	folder_path=local_folder,
	path_in_repo=hub_folder,
	repo_type=repo_type,
	token=hub_token,
	)
	print(f'Uploaded {local_folder} to {repo_id}')