Spaces:
Runtime error
Runtime error
| import whisper | |
| import requests | |
| import ffmpeg | |
| import numpy as np | |
| from typing import List, Dict, Any | |
| def process_audio_from_url(audio_url: str) -> List[Dict[str, Any]]: | |
| # Download the audio file content | |
| response = requests.get(audio_url, stream=True) | |
| response.raise_for_status() | |
| # Use ffmpeg to decode the audio stream | |
| try: | |
| out, _ = ( | |
| ffmpeg | |
| .input('pipe:0') | |
| .output('pipe:1', format='f32le', acodec='pcm_f32le', ac=1, ar='16k') | |
| .run(input=response.raw.read(), capture_stdout=True, capture_stderr=True) | |
| ) | |
| except ffmpeg.Error as e: | |
| raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e | |
| # Convert the audio to the format Whisper expects | |
| audio = np.frombuffer(out, np.float32).flatten() | |
| # Load the Whisper model | |
| model = whisper.load_model("base") | |
| # Transcribe the audio | |
| result = model.transcribe(audio) | |
| segments = [] | |
| for segment in result["segments"]: | |
| segments.append({ | |
| "file_name": audio_url.split("/")[-1], # Extract filename from URL | |
| "text": segment["text"] | |
| }) | |
| return segments | |
| def process_audio_data(audio: np.ndarray, file_name: str) -> List[Dict[str, Any]]: | |
| # Load the Whisper model | |
| model = whisper.load_model("base") | |
| # Transcribe the audio | |
| result = model.transcribe(audio) | |
| segments = [] | |
| for segment in result["segments"]: | |
| segments.append({ | |
| "file_name": file_name, # Ensure file_name is added | |
| "text": segment["text"] | |
| }) | |
| return segments |