Spaces:
Sleeping
Sleeping
| import os | |
| from faster_whisper import WhisperModel, BatchedInferencePipeline | |
| import re | |
| def clean_text(text: str) ->str: | |
| text = re.sub(r'\s+', ' ', text) | |
| return text | |
| def preprocess_transcript(segments: list): | |
| """ | |
| Tiền xử lý transcript, trả về dict với data: | |
| - start: thời gian đoạn văn bắt đầu: | |
| - end: thời gian kết thúc của batch | |
| - text: đoạn văn được transcript | |
| """ | |
| processed_segments = [] | |
| for segment in segments: | |
| processed_segments.append({ | |
| 'start': segment.start, | |
| 'end': segment.end, | |
| 'text': clean_text(segment.text) | |
| }) | |
| return processed_segments | |
| def transcript_audio( | |
| input_audio: str = "audio.mp3", | |
| model_size: str = "base", | |
| device: str = "cpu", | |
| compute_type: str = "int8", #float16, float32 | |
| beam_size: int = 5, | |
| vad_filter: bool = False): | |
| """ | |
| Thực hiện chuyển đổi | |
| """ | |
| if not os.path.exists(input_audio): | |
| raise FileNotFoundError("file not found") | |
| #Khởi tạo model | |
| model = WhisperModel(model_size, device=device, compute_type=compute_type) | |
| #Cấu hình cho tham số: | |
| transcript_kwargs = {"beam_size": beam_size} | |
| if vad_filter: | |
| transcript_kwargs["vad_filter"] = vad_filter | |
| #Chạy transcription: | |
| batched_model = BatchedInferencePipeline(model=model) | |
| segments, info = batched_model.transcribe(input_audio, **transcript_kwargs, batch_size=16) | |
| segments = list(segments) | |
| processed_segments = preprocess_transcript(segments) | |
| return processed_segments | |
| def save_transcript(segments: list, output): | |
| """ | |
| Lưu transcript | |
| :param segments: | |
| :param output: | |
| :return: | |
| """ | |
| with open(output, 'w', encoding='utf-8') as f: | |
| for segment in segments: | |
| f.write(segment['text'] + '\n') |