Spaces:

LucaR84
/

SussurroXRest

Paused

SussurroXRest / speech_models /speech_model_manager.py

code refactor

182c2c2 8 months ago

1.83 kB

	from abc import ABC, abstractmethod
	from typing import Dict, Any, List
	import numpy as np

	class SpeechModelManager(ABC):
	"""Base class for speech transcription models"""

	def __init__(self, model_name: str, device: str):
	self.model_name = model_name
	self.device = device
	self.is_loaded = False

	@abstractmethod
	def load_model(self) -> None:
	"""Load the model into memory"""
	pass

	@abstractmethod
	def transcribe(self, audio_file_path: str, **kwargs) -> Dict[str, Any]:
	"""Transcribe an audio file"""
	pass

	@abstractmethod
	async def transcribe_stream(self, audio_data: bytes, **kwargs) -> Dict[str, Any]:
	"""Transcribe streaming audio data"""
	pass

	def _chunk_audio(self, audio: np.ndarray, sample_rate: int, chunk_duration: float) -> List[np.ndarray]:
	"""Split audio into chunks of specified duration"""
	chunk_size = int(sample_rate * chunk_duration)
	chunks = []

	for i in range(0, len(audio), chunk_size):
	chunk = audio[i:i + chunk_size]
	chunks.append(chunk)

	return chunks

	def _merge_segments(self, segments: List[Dict], chunk_duration: float) -> List[Dict]:
	"""Merge segments and adjust timestamps"""
	merged_segments = []
	time_offset = 0.0

	for i, chunk_segments in enumerate(segments):
	# Adjust timestamps for current chunk
	for segment in chunk_segments:
	segment["start"] += time_offset
	segment["end"] += time_offset
	merged_segments.append(segment)

	# Update time offset for next chunk
	time_offset += chunk_duration

	return merged_segments