Spaces:
Sleeping
Sleeping
File size: 1,954 Bytes
d78fb8b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import os
from faster_whisper import WhisperModel, BatchedInferencePipeline
import re
def clean_text(text: str) ->str:
text = re.sub(r'\s+', ' ', text)
return text
def preprocess_transcript(segments: list):
"""
Tiền xử lý transcript, trả về dict với data:
- start: thời gian đoạn văn bắt đầu:
- end: thời gian kết thúc của batch
- text: đoạn văn được transcript
"""
processed_segments = []
for segment in segments:
processed_segments.append({
'start': segment.start,
'end': segment.end,
'text': clean_text(segment.text)
})
return processed_segments
def transcript_audio(
input_audio: str = "audio.mp3",
model_size: str = "base",
device: str = "cpu",
compute_type: str = "int8", #float16, float32
beam_size: int = 5,
vad_filter: bool = False):
"""
Thực hiện chuyển đổi
"""
if not os.path.exists(input_audio):
raise FileNotFoundError("file not found")
#Khởi tạo model
model = WhisperModel(model_size, device=device, compute_type=compute_type)
#Cấu hình cho tham số:
transcript_kwargs = {"beam_size": beam_size}
if vad_filter:
transcript_kwargs["vad_filter"] = vad_filter
#Chạy transcription:
batched_model = BatchedInferencePipeline(model=model)
segments, info = batched_model.transcribe(input_audio, **transcript_kwargs, batch_size=16)
segments = list(segments)
processed_segments = preprocess_transcript(segments)
return processed_segments
def save_transcript(segments: list, output):
"""
Lưu transcript
:param segments:
:param output:
:return:
"""
with open(output, 'w', encoding='utf-8') as f:
for segment in segments:
f.write(segment['text'] + '\n') |