Spaces:
Running
Running
File size: 2,821 Bytes
778f672 19f313f da32ee3 778f672 a64f00f 778f672 a64f00f 19f313f a64f00f 19f313f a64f00f 778f672 19f313f a64f00f 19f313f a64f00f 19f313f a64f00f 778f672 da32ee3 a64f00f 19f313f a64f00f da32ee3 19f313f a64f00f 19f313f da32ee3 19f313f a64f00f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 | import whisper
import os
import torch
import warnings
import gc # Garbage Collector for memory management
import tempfile
from pydub import AudioSegment # 🟢 NEW: Add pydub
# Suppress FP16 warnings on CPU
warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
class AgentInput:
def __init__(self, device="cpu"):
print(f"👂 Agent 1 (Input) Online: Preparing Whisper on {device}...")
# USE 'tiny' FOR CLOUD TO PREVENT EXIT CODE 137 (OOM)
# Use 'base' only if you are running on a machine with 16GB+ RAM
self.model_name = "tiny"
try:
# Load the model and immediately collect garbage to free RAM
self.model = whisper.load_model(self.model_name, device=device)
gc.collect()
print(f"✅ Whisper '{self.model_name}' model loaded. RAM optimized.")
except Exception as e:
print(f"⚠️ Load failed: {e}. Attempting emergency load...")
# Emergency fallback to tiny if not already tried
self.model = whisper.load_model("tiny", device=device)
# 🟢 NEW HELPER: Sanitizes corrupted browser audio
def _sanitize_audio(self, audio_path):
try:
# Try to load it regardless of format
audio = AudioSegment.from_file(audio_path)
# Export it to a clean, standard WAV in a temp file
temp_path = os.path.join(tempfile.gettempdir(), f"clean_audio_{os.path.basename(audio_path)}.wav")
audio.export(temp_path, format="wav")
return temp_path
except Exception as e:
print(f"⚠️ Audio Sanitization Warning: {e}")
return audio_path # Fallback to original if pydub fails
def transcribe(self, audio_path, language=None):
if not self.model or not audio_path:
return [{"text": "", "speaker": "SYSTEM"}]
try:
# 🟢 Clean the audio first!
clean_path = self._sanitize_audio(audio_path)
# Ensure fp16=False for CPU to save on conversion overhead
result = self.model.transcribe(audio_path, language=language, fp16=False)
# Clean up after transcription to keep memory low
transcription_text = result["text"].strip()
del result # Delete the raw result object
gc.collect() # Force memory release
# Clean up temp file
if clean_path != audio_path and os.path.exists(clean_path):
os.remove(clean_path)
return [{"text": transcription_text, "speaker": "Speaker 1"}]
except Exception as e:
print(f"❌ Transcription Error: {e}")
return [{"text": "", "speaker": "ERROR"}] |