| import io |
| import wave |
| import os |
|
|
| from fish_audio_sdk import Session, ASRRequest |
|
|
| from .base import STTOperation |
|
|
| class FishSTT(STTOperation): |
| def __init__(self): |
| super().__init__("fish") |
| self.session = None |
| |
| async def start(self): |
| await super().start() |
| self.session = Session(os.getenv("FISH_API_KEY")) |
| |
| async def unload(self): |
| await super().close() |
| await self.session.close() |
| self.session = None |
| |
| |
| async def configure(self, config_d): |
| '''Configure and validate operation-specific configuration''' |
| return |
| |
| async def get_configuration(self): |
| '''Returns values of configurable fields''' |
| return {} |
|
|
| async def _generate(self, prompt: str = None, audio_bytes: bytes = None, sr: int = None, sw: int = None, ch: int = None, **kwargs): |
| '''Generate a output stream''' |
| audio_data = io.BytesIO() |
| with wave.open(audio_data, 'wb') as f: |
| f.setframerate(sr) |
| f.setsampwidth(sw) |
| f.setnchannels(ch) |
| f.writeframes(audio_bytes) |
| audio_data.seek(0) |
|
|
| response = self.session.asr(ASRRequest(audio=audio_data.read(), language="en", ignore_timestamps=False)) |
| result = response.text |
|
|
| yield {"transcription": result} |