Spaces:

Hameed13
/

Huggingface_News_Podcast

Build error

App Files Files Community

Hameed13 commited on May 18, 2025

Commit

ced0e79

verified ·

1 Parent(s): 6ee3346

Update main.py

Browse files

Files changed (1) hide show

main.py +340 -262

main.py CHANGED Viewed

@@ -6,331 +6,409 @@ import logging
 import traceback
 import requests
 import subprocess
-from datetime import datetime
 from pathlib import Path
-from fastapi import FastAPI, HTTPException, Request
 from fastapi.responses import FileResponse, JSONResponse
 from pydantic import BaseModel
 import uvicorn
-import torch
-import torchaudio
-from transformers import AutoModelForCausalLM, AutoTokenizer
 # Configure logging
 logging.basicConfig(
     level=logging.INFO,
-    format='[%(asctime)s] %(levelname)s - %(message)s',
-    datefmt='%Y-%m-%d %H:%M:%S'
 )
-# Print startup time
-logging.info(f"===== Application Startup at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====")
-# Initialize the app
-app = FastAPI()
-# Set environment variable to disable PortAudio requirement
-os.environ["OUTETTS_NO_PORTAUDIO"] = "1"
-# Define the paths for required model files
-tokenizer_path = "saheedniyi/YarnGPT2"
-wav_tokenizer_config_path = "wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml"
-wav_tokenizer_model_path = "wavtokenizer_large_speech_320_24k.ckpt"
-# Patch torch.load to always use weights_only=False
-# This is necessary for compatibility with PyTorch 2.6+
-original_torch_load = torch.load
-def patched_torch_load(*args, **kwargs):
-    if 'weights_only' not in kwargs:
-        kwargs['weights_only'] = False
-    return original_torch_load(*args, **kwargs)
-# Replace the original function with our patched version
-torch.load = patched_torch_load
-# Function to download files with proper error handling
-def download_file(url, destination):
-    logging.info(f"Downloading file from {url} to {destination}")
-    try:
-        # Try to download using requests
-        response = requests.get(url, stream=True)
-        response.raise_for_status()
-        with open(destination, 'wb') as f:
-            for chunk in response.iter_content(chunk_size=8192):
-                f.write(chunk)
-        logging.info(f"Successfully downloaded file to {destination}")
-        return True
-    except Exception as e:
-        logging.error(f"Failed to download file using requests: {str(e)}")
-        # Fallback to wget
         try:
-            logging.info("Trying alternate download method with wget...")
-            result = subprocess.run(['wget', url, '-O', destination],
-                                  check=True,
-                                  capture_output=True,
-                                  text=True)
-            logging.info(f"wget download successful")
-            return True
-        except subprocess.CalledProcessError as e:
-            logging.error(f"wget download failed: {e.stderr}")
-            # Final fallback to curl
-            try:
-                logging.info("Trying final download method with curl...")
-                result = subprocess.run(['curl', '-L', url, '--output', destination],
-                                      check=True,
-                                      capture_output=True,
-                                      text=True)
-                logging.info(f"curl download successful")
-                return True
-            except subprocess.CalledProcessError as e:
-                logging.error(f"curl download failed: {e.stderr}")
-                return False
-# Download required model files if they don't exist
-def download_required_files():
-    # URLs for model files
-    wav_tokenizer_config_url = "https://huggingface.co/novateur/WavTokenizer-medium-speech-75token/resolve/main/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml"
-    wav_tokenizer_model_url = "https://huggingface.co/novateur/WavTokenizer-small-speech-320token/resolve/main/wavtokenizer_large_speech_320_24k.ckpt"
-    # Download config file if it doesn't exist
-    if not os.path.exists(wav_tokenizer_config_path):
-        success = download_file(wav_tokenizer_config_url, wav_tokenizer_config_path)
-        if not success:
-            raise RuntimeError(f"Failed to download config file from {wav_tokenizer_config_url}")
-    # Download model file if it doesn't exist
-    if not os.path.exists(wav_tokenizer_model_path):
-        success = download_file(wav_tokenizer_model_url, wav_tokenizer_model_path)
         if not success:
-            # Try alternate source for the model file (from Google Drive)
             try:
-                logging.info("Installing gdown package...")
-                subprocess.check_call([sys.executable, "-m", "pip", "install", "gdown"])
-                import gdown
-                logging.info("gdown installed successfully")
-                gdrive_url = "https://drive.google.com/uc?id=1-ASeEkrn4HY49yZWHTASgfGFNXdVnLTt"
-                logging.info(f"Trying alternate source for model file: {gdrive_url}")
-                gdown.download(gdrive_url, wav_tokenizer_model_path, quiet=False)
-                if os.path.exists(wav_tokenizer_model_path):
-                    logging.info(f"Successfully downloaded model file using gdown")
-                else:
-                    raise RuntimeError("File not found after gdown download")
             except Exception as e:
-                logging.error(f"Failed to download model file using gdown: {str(e)}")
-                raise RuntimeError(f"Failed to download model file from any source")
-# Function to verify if required files exist
-def verify_required_files():
-    if not os.path.exists(wav_tokenizer_config_path):
-        raise FileNotFoundError(f"Config file not found at {wav_tokenizer_config_path}")
-    if not os.path.exists(wav_tokenizer_model_path):
-        raise FileNotFoundError(f"Model file not found at {wav_tokenizer_model_path}")
-    logging.info("All required files verified")
-# Define TextToSpeech class based on the working Colab code
-class TextToSpeech:
-    def __init__(self):
-        logging.info("Initializing TextToSpeech class...")
-        try:
-            # Import the AudioTokenizerV2 class from yarngpt
-            from yarngpt.audiotokenizer import AudioTokenizerV2
-            logging.info("Successfully imported AudioTokenizerV2 class")
-        except ImportError as e:
-            logging.error(f"Failed to import AudioTokenizerV2 class: {str(e)}")
-            sys.exit(1)
-        # Download required files
-        download_required_files()
-        # Verify files exist
-        verify_required_files()
-        # Detect device
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        logging.info(f"Using device: {self.device}")
-        # Initialize audio tokenizer
         try:
-            self.audio_tokenizer = AudioTokenizerV2(
-                tokenizer_path,
-                wav_tokenizer_model_path,
-                wav_tokenizer_config_path
             )
-            logging.info("Audio tokenizer initialized successfully")
-        except Exception as e:
-            logging.error(f"Failed to initialize audio tokenizer: {str(e)}")
-            raise
-        # Load model
-        try:
-            self.model = AutoModelForCausalLM.from_pretrained(
                 tokenizer_path,
                 torch_dtype="auto"
-            ).to(self.audio_tokenizer.device)
-            logging.info("Model loaded successfully")
-        except Exception as e:
-            logging.error(f"Failed to load model: {str(e)}")
-            raise
-    def tts(self, text, output_file, accent="nigerian", speed=1.0):
-        """
-        Generate Nigerian-accented speech from text
-        Args:
-            text: Text to convert to speech
-            output_file: Path to save the audio file
-            accent: Accent to use (maps to a specific speaker)
-            speed: Speed multiplier (not currently implemented)
-        Returns:
-            Path to generated audio file
-        """
-        logging.info(f"Generating speech for text: '{text[:50]}...'")
-        # Map accent to speaker name
-        speaker_mapping = {
-            "nigerian": "tayo",
-            "yoruba": "idera",
-            "igbo": "chidi",
-            "hausa": "aminu",
-            "default": "tayo"
-        }
-        speaker = speaker_mapping.get(accent.lower(), speaker_mapping["default"])
-        logging.info(f"Using speaker: {speaker}")
-        try:
-            # Create prompt
-            prompt = self.audio_tokenizer.create_prompt(text, lang="english", speaker_name=speaker)
-            input_ids = self.audio_tokenizer.tokenize_prompt(prompt)
-            # Generate output
-            output = self.model.generate(
-                input_ids=input_ids,
-                temperature=0.1,
-                repetition_penalty=1.1,
-                max_length=4000,
-            )
-            # Convert to audio
-            codes = self.audio_tokenizer.get_codes(output)
-            audio = self.audio_tokenizer.get_audio(codes)
-            # Save audio file
-            torchaudio.save(output_file, audio, sample_rate=24000)
-            logging.info(f"Audio saved to {output_file}")
-            return output_file
-        except Exception as e:
-            logging.error(f"Error in TTS generation: {str(e)}")
-            traceback.print_exc()
-            raise
-# Try to initialize TTS engine, but allow app to start even if it fails
-tts_engine = None
-try:
-    logging.info("Starting TTS engine initialization...")
-    tts_engine = TextToSpeech()
-    logging.info("TTS engine initialized successfully")
-except Exception as e:
-    logging.error(f"Failed to initialize TTS engine: {str(e)}")
-    print(traceback.format_exc())
-# Create output directory if it doesn't exist
-output_dir = Path("./output")
-output_dir.mkdir(exist_ok=True)
-# Model for the TTS request
-class TTSRequest(BaseModel):
-    text: str
-    accent: str = "nigerian"  # Default accent
-    speed: float = 1.0  # Default speed
 # Health check endpoint
 @app.get("/")
-def health_check():
     return {
-        "status": "ok",
-        "tts_engine_loaded": tts_engine is not None,
-        "device": tts_engine.device if tts_engine else "not available",
-        "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
     }
-# Text-to-speech endpoint
 @app.post("/tts")
-async def text_to_speech(request: TTSRequest):
     if tts_engine is None:
-        logging.error("TTS engine not initialized")
-        raise HTTPException(status_code=500, detail="TTS engine not initialized")
     try:
-        # Generate a unique filename
-        filename = f"{uuid.uuid4()}.wav"
-        output_path = output_dir / filename
-        # Log the request
-        logging.info(f"Processing TTS request: text='{request.text[:50]}...', accent={request.accent}")
-        # Generate speech
-        tts_engine.tts(
-            text=request.text,
-            output_file=str(output_path),
-            accent=request.accent,
-            speed=request.speed
-        )
-        # Check if file was created
-        if not output_path.exists():
-            logging.error(f"Output file was not created: {output_path}")
-            raise HTTPException(status_code=500, detail="Failed to generate audio file")
-        # Return the audio file
-        logging.info(f"Successfully generated audio: {output_path}")
-        return FileResponse(
-            path=output_path,
-            media_type="audio/wav",
-            filename=filename
-        )
     except Exception as e:
-        logging.error(f"Error in text_to_speech: {str(e)}")
-        print(traceback.format_exc())
-        raise HTTPException(status_code=500, detail=f"Failed to generate speech: {str(e)}")
-# Cleanup old files (run this periodically)
-@app.on_event("startup")
-async def cleanup_old_files():
     try:
-        # Delete files older than 1 hour
-        current_time = time.time()
-        for file_path in output_dir.glob("*.wav"):
-            if current_time - file_path.stat().st_mtime > 3600:  # 1 hour
-                file_path.unlink()
-                logging.info(f"Deleted old file: {file_path}")
     except Exception as e:
-        logging.error(f"Error during cleanup: {str(e)}")
-# Custom exception handler
 @app.exception_handler(Exception)
 async def global_exception_handler(request: Request, exc: Exception):
-    logging.error(f"Unhandled exception: {str(exc)}")
-    print(traceback.format_exc())
     return JSONResponse(
         status_code=500,
-        content={"detail": f"Internal server error: {str(exc)}"}
     )
-# Start server if running as a script
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=7860)

 import traceback
 import requests
 import subprocess
 from pathlib import Path
+from datetime import datetime, timedelta
+from fastapi import FastAPI, HTTPException, Request, BackgroundTasks
 from fastapi.responses import FileResponse, JSONResponse
+from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 import uvicorn
 # Configure logging
 logging.basicConfig(
     level=logging.INFO,
+    format="%(asctime)s | %(levelname)s | %(message)s",
+    handlers=[logging.StreamHandler()]
 )
+logger = logging.getLogger(__name__)
+# Create start-up log entry
+logger.info(f"===== Application Startup at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====")
+# Create output directory for audio files
+os.makedirs("audio_files", exist_ok=True)
+# Initialize FastAPI app
+app = FastAPI(title="Nigerian Text-to-Speech API")
+# Add CORS middleware to allow cross-origin requests (for Streamlit/cURL)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Allows all origins
+    allow_credentials=True,
+    allow_methods=["*"],  # Allows all methods
+    allow_headers=["*"],  # Allows all headers
+)
+# Input validation models
+class TTSRequest(BaseModel):
+    text: str
+    accent: str = "nigerian"  # For backward compatibility
+    voice: str = None         # New parameter (will override accent if provided)
+    language: str = "english" # Default language
+class TTSResponse(BaseModel):
+    audio_url: str
+    audio_base64: str = None  # Base64-encoded audio (optional)
+    text: str
+    voice: str
+    language: str
+# Define available voices and mapping
+AVAILABLE_VOICES = {
+    "female": ["zainab", "idera", "regina", "chinenye", "joke", "remi"],
+    "male": ["jude", "tayo", "umar", "osagie", "onye", "emma"]
+}
+ACCENT_TO_VOICE = {
+    "nigerian": "tayo",
+    "yoruba": "idera",
+    "igbo": "emma",
+    "hausa": "umar"
+}
+AVAILABLE_LANGUAGES = ["english", "yoruba", "igbo", "hausa"]
+# Initialize global variables for model components
+model = None
+audio_tokenizer = None
+tts_engine = None
+def download_required_files():
+    """
+    Download model files from multiple sources with fallback mechanisms.
+    """
+    files_to_download = [
+        {
+            "url": "https://huggingface.co/novateur/WavTokenizer-small-speech-320token/resolve/main/wavtokenizer_large_speech_320_24k.ckpt",
+            "output_path": "wavtokenizer_large_speech_320_24k.ckpt",
+            "gdrive_id": "1-6uQcVGonAdmAiazJ8YEQBHoGzbKXrsW"  # Backup Google Drive ID
+        },
+        {
+            "url": "https://huggingface.co/saheedniyi/YarnGPT2/resolve/main/config.json",
+            "output_path": "saheedniyi_YarnGPT2/config.json",
+            "gdrive_id": None
+        },
+        {
+            "url": "https://huggingface.co/saheedniyi/YarnGPT2/resolve/main/tokenizer_config.json",
+            "output_path": "saheedniyi_YarnGPT2/tokenizer_config.json",
+            "gdrive_id": None
+        },
+        {
+            "url": "https://huggingface.co/saheedniyi/YarnGPT2/resolve/main/pytorch_model.bin",
+            "output_path": "saheedniyi_YarnGPT2/pytorch_model.bin",
+            "gdrive_id": "1-3KU78OGUyPxtjYPSITx6N3vj46aOeFu"  # Backup Google Drive ID
+        },
+        {
+            "url": "https://huggingface.co/saheedniyi/YarnGPT2/resolve/main/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml",
+            "output_path": "wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml",
+            "gdrive_id": None
+        }
+    ]
+    # Prepare directory for model files
+    os.makedirs("saheedniyi_YarnGPT2", exist_ok=True)
+    for file_info in files_to_download:
+        output_path = file_info["output_path"]
+        # Skip if file already exists
+        if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
+            logger.info(f"File already exists: {output_path}")
+            continue
+        logger.info(f"Downloading file: {output_path}")
+        # Try different download methods
+        success = False
+        # Method 1: Direct requests download
         try:
+            logger.info(f"Trying direct download with requests: {file_info['url']}")
+            response = requests.get(file_info['url'], stream=True, timeout=30)
+            if response.status_code == 200:
+                with open(output_path, 'wb') as f:
+                    for chunk in response.iter_content(chunk_size=8192):
+                        f.write(chunk)
+                if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
+                    logger.info(f"Successfully downloaded via requests: {output_path}")
+                    success = True
+        except Exception as e:
+            logger.error(f"Failed to download with requests: {str(e)}")
+        # Method 2: wget if available
         if not success:
             try:
+                logger.info(f"Trying download with wget: {file_info['url']}")
+                subprocess.run(["wget", file_info['url'], "-O", output_path], check=True)
+                if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
+                    logger.info(f"Successfully downloaded via wget: {output_path}")
+                    success = True
+            except Exception as e:
+                logger.error(f"Failed to download with wget: {str(e)}")
+        # Method 3: curl if available
+        if not success:
+            try:
+                logger.info(f"Trying download with curl: {file_info['url']}")
+                subprocess.run(["curl", "-L", file_info['url'], "-o", output_path], check=True)
+                if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
+                    logger.info(f"Successfully downloaded via curl: {output_path}")
+                    success = True
+            except Exception as e:
+                logger.error(f"Failed to download with curl: {str(e)}")
+        # Method 4: gdown from Google Drive (if ID is provided)
+        if not success and file_info["gdrive_id"]:
+            try:
+                logger.info(f"Trying download from Google Drive: {file_info['gdrive_id']}")
+                # Install gdown if not already installed
+                try:
+                    subprocess.run([sys.executable, "-m", "pip", "install", "gdown", "--quiet"], check=True)
+                    import gdown
+                    gdown.download(id=file_info["gdrive_id"], output=output_path, quiet=False)
+                    if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
+                        logger.info(f"Successfully downloaded via gdown: {output_path}")
+                        success = True
+                except Exception as e:
+                    logger.error(f"Failed to install or use gdown: {str(e)}")
             except Exception as e:
+                logger.error(f"Failed to download from Google Drive: {str(e)}")
+        if not success:
+            logger.error(f"All download methods failed for: {output_path}")
+            raise FileNotFoundError(f"Failed to download required file: {output_path}")
+    # Verify all files were downloaded
+    for file_info in files_to_download:
+        if not os.path.exists(file_info["output_path"]) or os.path.getsize(file_info["output_path"]) == 0:
+            raise FileNotFoundError(f"Required file missing or empty: {file_info['output_path']}")
+    logger.info("All required files downloaded successfully!")
+def load_tts_engine():
+    """
+    Load the TTS engine and models with explicit PyTorch version handling.
+    """
+    global model, audio_tokenizer, tts_engine
+    try:
+        # Only import these modules when needed to avoid startup errors
+        import torch
+        import torchaudio
+        # Apply monkey patch for PyTorch 2.6+ compatibility
+        if hasattr(torch, '__version__') and torch.__version__.startswith('2.6'):
+            logger.info(f"Detected PyTorch {torch.__version__}, applying load function patch for weights_only")
+            original_torch_load = torch.load
+            def patched_torch_load(*args, **kwargs):
+                # Add weights_only=False if not explicitly specified
+                if 'weights_only' not in kwargs:
+                    kwargs['weights_only'] = False
+                return original_torch_load(*args, **kwargs)
+            torch.load = patched_torch_load
+        # Now import other dependencies
+        from transformers import AutoModelForCausalLM
         try:
+            # Try to import the WavTokenizer for yarngpt
+            from yarngpt.audiotokenizer import AudioTokenizerV2
+            # Model configuration
+            tokenizer_path = "saheedniyi_YarnGPT2"
+            wav_tokenizer_config_path = "wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml"
+            wav_tokenizer_model_path = "wavtokenizer_large_speech_320_24k.ckpt"
+            logger.info("Loading YarnGPT model and tokenizer...")
+            audio_tokenizer = AudioTokenizerV2(
+                tokenizer_path, wav_tokenizer_model_path, wav_tokenizer_config_path
             )
+            model = AutoModelForCausalLM.from_pretrained(
                 tokenizer_path,
                 torch_dtype="auto"
+            ).to(audio_tokenizer.device)
+            logger.info("YarnGPT model loaded successfully!")
+            class TextToSpeech:
+                def __init__(self):
+                    self.audio_tokenizer = audio_tokenizer
+                    self.model = model
+                def generate_speech(self, text, language="english", speaker_name="tayo"):
+                    # Create prompt and generate audio
+                    prompt = self.audio_tokenizer.create_prompt(text, lang=language, speaker_name=speaker_name)
+                    input_ids = self.audio_tokenizer.tokenize_prompt(prompt)
+                    output = self.model.generate(
+                        input_ids=input_ids,
+                        temperature=0.1,
+                        repetition_penalty=1.1,
+                        max_length=4000,
+                    )
+                    codes = self.audio_tokenizer.get_codes(output)
+                    audio = self.audio_tokenizer.get_audio(codes)
+                    return audio
+            # Initialize TTS engine
+            tts_engine = TextToSpeech()
+            logger.info("TTS engine initialized successfully!")
+            return True
+        except ImportError:
+            logger.error("Failed to import yarngpt modules. Make sure the yarngpt package is installed.")
+            return False
+    except Exception as e:
+        logger.error(f"Error initializing TTS engine: {str(e)}")
+        logger.error(traceback.format_exc())
+        return False
 # Health check endpoint
 @app.get("/")
+async def root():
+    """API health check and info"""
+    status = "ok" if tts_engine is not None else "model_loading_failed"
     return {
+        "status": status,
+        "message": "Nigerian TTS API is running",
+        "available_languages": AVAILABLE_LANGUAGES,
+        "available_voices": AVAILABLE_VOICES,
+        "accent_mapping": ACCENT_TO_VOICE
     }
+# TTS endpoint
 @app.post("/tts")
+async def text_to_speech(request: TTSRequest, background_tasks: BackgroundTasks):
+    """Convert text to Nigerian-accented speech"""
+    # Check if TTS engine is loaded
     if tts_engine is None:
+        raise HTTPException(status_code=503, detail="TTS engine is not initialized. Please try again later.")
+    # Determine voice based on accent or explicitly provided voice
+    voice = request.voice
+    if voice is None:
+        accent = request.accent.lower() if request.accent else "nigerian"
+        voice = ACCENT_TO_VOICE.get(accent, "tayo")  # Default to tayo if accent not recognized
+    # Validate language
+    language = request.language.lower()
+    if language not in AVAILABLE_LANGUAGES:
+        raise HTTPException(status_code=400, detail=f"Language must be one of {AVAILABLE_LANGUAGES}")
+    # Validate voice - combine all available voices
+    all_voices = AVAILABLE_VOICES["female"] + AVAILABLE_VOICES["male"]
+    if voice not in all_voices:
+        raise HTTPException(status_code=400, detail=f"Voice must be one of {all_voices}")
+    # Generate unique filename
+    audio_id = str(uuid.uuid4())
+    output_path = f"audio_files/{audio_id}.wav"
     try:
+        # Generate audio using the TTS engine
+        audio = tts_engine.generate_speech(request.text, language=language, speaker_name=voice)
+        # Import torchaudio here to avoid startup issues
+        import torchaudio
+        # Save audio file
+        torchaudio.save(output_path, audio, sample_rate=24000)
+        # Generate base64 representation for direct embedding
+        import base64
+        with open(output_path, "rb") as audio_file:
+            audio_bytes = audio_file.read()
+            audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
+        # Add task to clean up old files
+        background_tasks.add_task(cleanup_old_files)
+        return {
+            "audio_url": f"/audio/{audio_id}.wav",
+            "audio_base64": audio_base64,
+            "text": request.text,
+            "voice": voice,
+            "language": language
+        }
     except Exception as e:
+        logger.error(f"Error generating audio: {str(e)}")
+        logger.error(traceback.format_exc())
+        raise HTTPException(status_code=500, detail=f"Error generating audio: {str(e)}")
+# Serve audio files
+@app.get("/audio/{filename}")
+async def get_audio(filename: str):
+    """Serve audio files"""
+    file_path = f"audio_files/{filename}"
+    if not os.path.exists(file_path):
+        raise HTTPException(status_code=404, detail="Audio file not found")
+    return FileResponse(file_path, media_type="audio/wav")
+# Cleanup function to remove old files
+def cleanup_old_files():
+    """Delete audio files older than 6 hours to manage disk space"""
     try:
+        now = datetime.now()
+        audio_dir = "audio_files"
+        if not os.path.exists(audio_dir):
+            return
+        for filename in os.listdir(audio_dir):
+            if not filename.endswith(".wav"):
+                continue
+            file_path = os.path.join(audio_dir, filename)
+            file_mod_time = datetime.fromtimestamp(os.path.getmtime(file_path))
+            # Delete files older than 6 hours
+            if now - file_mod_time > timedelta(hours=6):
+                os.remove(file_path)
+                logger.info(f"Deleted old audio file: {filename}")
     except Exception as e:
+        logger.error(f"Error cleaning up old files: {e}")
+# Custom exception handler for better error responses
 @app.exception_handler(Exception)
 async def global_exception_handler(request: Request, exc: Exception):
+    logger.error(f"Unhandled exception: {str(exc)}")
+    logger.error(traceback.format_exc())
     return JSONResponse(
         status_code=500,
+        content={"detail": f"An unexpected error occurred: {str(exc)}"}
     )
+# Initialize on startup
+@app.on_event("startup")
+async def startup_event():
+    # Download required files first
+    try:
+        download_required_files()
+    except Exception as e:
+        logger.error(f"Failed to download required files: {str(e)}")
+        logger.error(traceback.format_exc())
+        return
+    # Then try to load the TTS engine
+    try:
+        # Install yarngpt first
+        subprocess.run([sys.executable, "-m", "pip", "install", "git+https://github.com/saheedniyi02/yarngpt.git", "--quiet"], check=True)
+        logger.info("Successfully installed yarngpt package")
+        # Load TTS engine
+        success = load_tts_engine()
+        if not success:
+            logger.error("Failed to initialize TTS engine")
+    except Exception as e:
+        logger.error(f"Failed to initialize app: {str(e)}")
+        logger.error(traceback.format_exc())
+# Main entry point
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=7860)