lyric_pho / pho.py
naminh93's picture
Upload 4 files
8de85da verified
# conda install -c conda-forge numpy=1.24.4 scipy librosa soundfile numba llvmlite -y
# conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia -y
# conda install pytorch torchvision torchaudio cpuonly -c pytorch -y
# pip install --no-cache-dir transformers accelerate openai-whisper
# pip install soundfile
# pip install librosa
from transformers import WhisperProcessor, WhisperForConditionalGeneration
import torch
import soundfile as sf
import librosa
# ===== TẢI MODEL =====
processor = WhisperProcessor.from_pretrained("naminh93/lyric_pho", language="vi", task="transcribe")
model = WhisperForConditionalGeneration.from_pretrained("naminh93/lyric_pho")
# ===== ĐỌC FILE ÂM THANH =====
audio_path = "1.wav" # sửa tên file của bạn
audio, sr = librosa.load(audio_path, sr=16000) # ép về 16kHz luôn
# ===== XỬ LÝ =====
inputs = processor(audio, sampling_rate=16000, return_tensors="pt")
with torch.no_grad():
predicted_ids = model.generate(inputs["input_features"])
# ===== LẤY KẾT QUẢ =====
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
print("\n===== KẾT QUẢ NHẬN DẠNG GIỌNG NÓI =====")
print(transcription)