File size: 1,954 Bytes
d78fb8b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import os
from faster_whisper import WhisperModel, BatchedInferencePipeline
import re


def clean_text(text: str) ->str:
    text = re.sub(r'\s+', ' ', text)
    return text

def preprocess_transcript(segments: list):
    """

    Tiền xử lý transcript, trả về dict với data:

    - start: thời gian đoạn văn bắt đầu:

    - end: thời gian kết thúc của batch

    - text: đoạn văn được transcript

    """
    processed_segments = []
    for segment in segments:
        processed_segments.append({
            'start': segment.start,
            'end': segment.end,
            'text': clean_text(segment.text)
        })
    return processed_segments

def transcript_audio(

        input_audio: str = "audio.mp3",

        model_size: str = "base",

        device: str = "cpu",

        compute_type: str = "int8", #float16, float32

        beam_size: int = 5,

        vad_filter: bool = False):
    """

    Thực hiện chuyển đổi

    """
    if not os.path.exists(input_audio):
        raise FileNotFoundError("file not found")

    #Khởi tạo model

    model = WhisperModel(model_size, device=device, compute_type=compute_type)

    #Cấu hình cho tham số:
    transcript_kwargs = {"beam_size": beam_size}

    if vad_filter:
        transcript_kwargs["vad_filter"] = vad_filter

    #Chạy transcription:
    batched_model = BatchedInferencePipeline(model=model)
    segments, info = batched_model.transcribe(input_audio, **transcript_kwargs, batch_size=16)
    segments = list(segments)
    processed_segments = preprocess_transcript(segments)
    return processed_segments

def save_transcript(segments: list, output):
    """

    Lưu transcript

    :param segments:

    :param output:

    :return:

    """

    with open(output, 'w', encoding='utf-8') as f:
        for segment in segments:
            f.write(segment['text'] + '\n')