Spaces:
Sleeping
Sleeping
| import numpy as np | |
| from faster_whisper import WhisperModel | |
| import librosa | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| class FileTranscriber: | |
| def __init__(self, model_size="base", device="cpu", compute_type="int8"): | |
| self.model_size = model_size | |
| self.device = device | |
| self.compute_type = compute_type | |
| self.model = None | |
| def load_model(self): | |
| if self.model is None: | |
| logger.info(f"Loading Whisper model: {self.model_size}") | |
| try: | |
| self.model = WhisperModel(self.model_size, device=self.device, compute_type=self.compute_type) | |
| logger.info("Model loaded successfully") | |
| except Exception as e: | |
| logger.error(f"Failed to load model: {e}") | |
| raise | |
| def transcribe_file(self, file_path): | |
| self.load_model() | |
| try: | |
| # Load audio | |
| audio_data, sample_rate = librosa.load(file_path, sr=16000) | |
| audio_data = audio_data.astype(np.float32) | |
| # Transcribe | |
| segments, _ = self.model.transcribe(audio_data, beam_size=5, vad_filter=True) | |
| transcription = "" | |
| for segment in segments: | |
| text = segment.text.strip() | |
| if text: | |
| transcription += text + " " | |
| return transcription.strip() | |
| except Exception as e: | |
| logger.error(f"Error transcribing file {file_path}: {e}") | |
| return f"Error: {str(e)}" | |