whisper_medium_ct2 / my_lyric_transcript.py
chunping-vi's picture
Upload my_lyric_transcript.py
2193982 verified
#!pip install ctranslate2==4.4.0
#!pip install faster_whisper==1.1.1
#!pip install -q praat-textgrids
#!pip install -q huggingface_hub &> /dev/null
import os
import textgrids
import csv
#############################################################################def
def whisper_transcribe(model,sound_file,out_dir):
"""
"""
#import os
#import textgrids
#import csv
try:
segments, _ = model.transcribe(sound_file,vad_filter=True,word_timestamps=True)
data = list(segments)
transcription = data[0].text
#print(transcription)
file = open('words.csv',"w")
for one in data[0].words:
#print(f'{one.word}:{one.start}-{one.end}')
line = f'{one.word.strip()};{one.start};{one.end}\n'
file.write(line)
file.close()
new_txtgrid = textgrids.TextGrid()
new_txtgrid.tier_from_csv('words','words.csv')
new_txtgrid.xmax=one.end
file_name = os.path.basename(sound_file) # eg: 'demo.txt'
name = os.path.splitext(file_name)[0] # eg: 'demo'
# save .textgrid
new_txtgrid.write(f'{out_dir}/{name}.TextGrid')
# save .lab
f = open(f'{out_dir}/{name}.lab','w')
f.write(transcription)
f.close()
except:
pass
#############################################################################def
def _whisper_transcribe(model,sound_file,out_dir):
"""
"""
#import os
#import textgrids
#import csv
try:
segments, _ = model.transcribe(sound_file,vad_filter=True,word_timestamps=True)
data = list(segments)
transcription = data[0].text
#print(transcription)
file_name = os.path.basename(sound_file) # eg: 'demo.txt'
name = os.path.splitext(file_name)[0] # eg: 'demo'
# save .lab
f = open(f'{out_dir}/{name}.lab','w')
f.write(transcription)
f.close()
except:
pass
#############################################################################def
def whisper_folder(model,in_dir,out_dir):
"""
from faster_whisper import WhisperModel
model = WhisperModel('/content/whisper_convert', device="cuda", compute_type="int8_float16")
"""
import os
import glob
from tqdm import tqdm
os.makedirs(out_dir, exist_ok=True)
files = glob.glob(f'{in_dir}/*.mp3') + glob.glob(f'{in_dir}/*.wav')
for sound_file in tqdm(files):
try:
whisper_transcribe(model,sound_file,out_dir)
except:
print(f'error {file}')
pass
#############################################################################def
import os
import textgrids
from faster_whisper import WhisperModel
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
def load_models_on_gpus(model_path, num_gpus, compute_type="float16"):
"""
Nạp mô hình faster-whisper lên từng GPU và trả về danh sách các mô hình.
"""
models = []
for gpu_id in range(num_gpus):
os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)
model = WhisperModel(model_path, device="cuda", compute_type=compute_type)
models.append((gpu_id, model))
return models
def process_file(model, sound_file, out_dir, gpu_id):
"""
Hàm xử lý một file trên một GPU cụ thể.
"""
os.makedirs(out_dir, exist_ok=True)
try:
os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)
segments, _ = model.transcribe(sound_file, vad_filter=True, word_timestamps=True)
data = list(segments)
transcription = data[0].text
file = open('words.csv', "w")
for one in data[0].words:
line = f'{one.word.strip()};{one.start};{one.end}\n'
file.write(line)
file.close()
new_txtgrid = textgrids.TextGrid()
new_txtgrid.tier_from_csv('words', 'words.csv')
new_txtgrid.xmax = data[-1].end
file_name = os.path.basename(sound_file)
name = os.path.splitext(file_name)[0]
new_txtgrid.write(f'{out_dir}/{name}.TextGrid')
with open(f'{out_dir}/{name}.lab', 'w') as f:
f.write(transcription)
except Exception as e:
print(f"Lỗi khi xử lý {sound_file} trên GPU {gpu_id}: {e}")
def whisper_transcribe_parallel(models, sound_files, out_dir):
"""
Hàm chính để xử lý song song trên nhiều GPU với ThreadPoolExecutor.
"""
with ThreadPoolExecutor(max_workers=len(models)) as executor:
futures = []
for i, sound_file in enumerate(sound_files):
gpu_id, model = models[i % len(models)]
futures.append(executor.submit(process_file, model, sound_file, out_dir, gpu_id))
# Hiển thị thanh tiến trình với tqdm
for _ in tqdm(futures, desc="Đang xử lý"):
_.result() # Đợi kết quả
#############################################################################def
def folder_upload(repo_id,local_folder,hub_folder,repo_type,hub_token):
"""
repo_type: "model", "dataset", "space"
"""
import os
import glob
from huggingface_hub import Repository, create_repo
from huggingface_hub import upload_folder
# create new repo if not existing
create_repo(repo_id, repo_type=repo_type, exist_ok=True, token=hub_token)
upload_folder(
repo_id=repo_id,
folder_path=local_folder,
path_in_repo=hub_folder,
repo_type=repo_type,
token=hub_token,
)
print(f'Uploaded {local_folder} to {repo_id}')