Spaces:

toecm
/

PureVersation

Running

File size: 2,821 Bytes

778f672
 
 
 
19f313f
da32ee3
 
778f672
a64f00f
 
 
778f672
a64f00f
19f313f
a64f00f
19f313f
 
 
a64f00f
778f672
19f313f
a64f00f
19f313f
 
a64f00f
19f313f
 
a64f00f
778f672
da32ee3
 
 
 
 
 
 
 
 
 
 
 
 
a64f00f
19f313f
 
a64f00f
 
da32ee3
 
 
19f313f
a64f00f
19f313f
 
 
 
 
da32ee3
 
 
 
 
19f313f
a64f00f

import whisper
import os
import torch
import warnings
import gc  # Garbage Collector for memory management
import tempfile
from pydub import AudioSegment # 🟢 NEW: Add pydub

# Suppress FP16 warnings on CPU
warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")

class AgentInput:
    def __init__(self, device="cpu"):
        print(f"👂 Agent 1 (Input) Online: Preparing Whisper on {device}...")
        
        # USE 'tiny' FOR CLOUD TO PREVENT EXIT CODE 137 (OOM)
        # Use 'base' only if you are running on a machine with 16GB+ RAM
        self.model_name = "tiny" 
        
        try:
            # Load the model and immediately collect garbage to free RAM
            self.model = whisper.load_model(self.model_name, device=device)
            gc.collect() 
            print(f"✅ Whisper '{self.model_name}' model loaded. RAM optimized.")
        except Exception as e:
            print(f"⚠️ Load failed: {e}. Attempting emergency load...")
            # Emergency fallback to tiny if not already tried
            self.model = whisper.load_model("tiny", device=device)

    # 🟢 NEW HELPER: Sanitizes corrupted browser audio
    def _sanitize_audio(self, audio_path):
        try:
            # Try to load it regardless of format
            audio = AudioSegment.from_file(audio_path)
            # Export it to a clean, standard WAV in a temp file
            temp_path = os.path.join(tempfile.gettempdir(), f"clean_audio_{os.path.basename(audio_path)}.wav")
            audio.export(temp_path, format="wav")
            return temp_path
        except Exception as e:
            print(f"⚠️ Audio Sanitization Warning: {e}")
            return audio_path # Fallback to original if pydub fails
    
    def transcribe(self, audio_path, language=None):
        if not self.model or not audio_path: 
            return [{"text": "", "speaker": "SYSTEM"}]
        
        try:
            # 🟢 Clean the audio first!
            clean_path = self._sanitize_audio(audio_path)
            
            # Ensure fp16=False for CPU to save on conversion overhead
            result = self.model.transcribe(audio_path, language=language, fp16=False)
            
            # Clean up after transcription to keep memory low
            transcription_text = result["text"].strip()
            del result # Delete the raw result object
            gc.collect() # Force memory release

            # Clean up temp file
            if clean_path != audio_path and os.path.exists(clean_path):
                os.remove(clean_path)
                
            return [{"text": transcription_text, "speaker": "Speaker 1"}]
        except Exception as e:
            print(f"❌ Transcription Error: {e}")
            return [{"text": "", "speaker": "ERROR"}]