Spaces:

Hameed13
/

Huggingface_News_Podcast

Build error

App Files Files Community

Hameed13 commited on May 20, 2025

Commit

1f406e0

verified ·

1 Parent(s): cf42333

Update main.py

Browse files

Files changed (1) hide show

main.py +94 -139

main.py CHANGED Viewed

@@ -1,159 +1,114 @@
-from fastapi import FastAPI, HTTPException, BackgroundTasks
-from fastapi.responses import FileResponse
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
-import os
-import uuid
 import torch
-import torchaudio
-import base64
-from transformers import AutoModelForCausalLM
-from yarngpt.audiotokenizer import AudioTokenizerV2
-import uvicorn
-from datetime import datetime, timedelta
-app = FastAPI(title="Nigerian TTS API")
-# Add CORS middleware to allow requests from any origin
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"],  # Allows all origins
     allow_credentials=True,
-    allow_methods=["*"],  # Allows all methods
-    allow_headers=["*"],  # Allows all headers
 )
-# Model configuration paths
-tokenizer_path = "saheedniyi/YarnGPT2"
-wav_tokenizer_config_path = "./wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml"
-wav_tokenizer_model_path = "./wavtokenizer_large_speech_320_24k.ckpt"
-# Initialize model (only once when the API starts)
-print("Loading YarnGPT model and tokenizer...")
-audio_tokenizer = AudioTokenizerV2(
-    tokenizer_path, wav_tokenizer_model_path, wav_tokenizer_config_path
-)
-model = AutoModelForCausalLM.from_pretrained(tokenizer_path, torch_dtype="auto").to(audio_tokenizer.device)
-print("Model loaded successfully!")
-# Available voices and languages
-AVAILABLE_VOICES = {
-    "female": ["zainab", "idera", "regina", "chinenye", "joke", "remi"],
-    "male": ["jude", "tayo", "umar", "osagie", "onye", "emma"]
-}
-AVAILABLE_LANGUAGES = ["english", "yoruba", "igbo", "hausa"]
-# Input validation model
 class TTSRequest(BaseModel):
     text: str
-    language: str = "english"
-    voice: str = "idera"
-# Output model with base64-encoded audio
-class TTSResponse(BaseModel):
-    audio_base64: str  # Base64-encoded audio data
-    audio_url: str     # Keep for backward compatibility
-    text: str
-    voice: str
-    language: str
 @app.get("/")
-async def root():
-    """API health check and info"""
-    return {
-        "status": "ok",
-        "message": "Nigerian TTS API is running",
-        "available_languages": AVAILABLE_LANGUAGES,
-        "available_voices": AVAILABLE_VOICES
-    }
-@app.post("/tts", response_model=TTSResponse)
-async def text_to_speech(request: TTSRequest, background_tasks: BackgroundTasks):
-    """Convert text to Nigerian-accented speech"""
-    # Validate inputs
-    if request.language not in AVAILABLE_LANGUAGES:
-        raise HTTPException(status_code=400, detail=f"Language must be one of {AVAILABLE_LANGUAGES}")
-    all_voices = AVAILABLE_VOICES["female"] + AVAILABLE_VOICES["male"]
-    if request.voice not in all_voices:
-        raise HTTPException(status_code=400, detail=f"Voice must be one of {all_voices}")
-    # Generate unique filename
-    audio_id = str(uuid.uuid4())
-    output_path = f"audio_files/{audio_id}.wav"
-    os.makedirs("audio_files", exist_ok=True)
     try:
-        # Create prompt and generate audio
-        prompt = audio_tokenizer.create_prompt(request.text, lang=request.language, speaker_name=request.voice)
-        input_ids = audio_tokenizer.tokenize_prompt(prompt)
-        output = model.generate(
-            input_ids=input_ids,
-            temperature=0.1,
-            repetition_penalty=1.1,
-            max_length=4000,
         )
-        codes = audio_tokenizer.get_codes(output)
-        audio = audio_tokenizer.get_audio(codes)
-        # Save audio file
-        torchaudio.save(output_path, audio, sample_rate=24000)
-        # Read the file and encode as base64
-        with open(output_path, "rb") as audio_file:
-            audio_bytes = audio_file.read()
-            audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
-        # Clean up old files after a while
-        background_tasks.add_task(cleanup_old_files)
-        return TTSResponse(
-            audio_base64=audio_base64,
-            audio_url=f"/audio/{audio_id}.wav",  # Keep for compatibility
-            text=request.text,
-            voice=request.voice,
-            language=request.language
-        )
     except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Error generating audio: {str(e)}")
-# File serving endpoint (keep for backward compatibility)
-@app.get("/audio/{filename}")
-async def get_audio(filename: str):
-    file_path = f"audio_files/{filename}"
-    if not os.path.exists(file_path):
-        raise HTTPException(status_code=404, detail="Audio file not found")
-    return FileResponse(file_path, media_type="audio/wav")
-# Cleanup function to remove old files
-def cleanup_old_files():
-    """Delete audio files older than 6 hours to manage disk space"""
-    try:
-        now = datetime.now()
-        audio_dir = "audio_files"
-        if not os.path.exists(audio_dir):
-            return
-        for filename in os.listdir(audio_dir):
-            if not filename.endswith(".wav"):
-                continue
-            file_path = os.path.join(audio_dir, filename)
-            file_mod_time = datetime.fromtimestamp(os.path.getmtime(file_path))
-            # Delete files older than 6 hours
-            if now - file_mod_time > timedelta(hours=6):
-                os.remove(file_path)
-                print(f"Deleted old audio file: {filename}")
-    except Exception as e:
-        print(f"Error cleaning up old files: {e}")
-# For Hugging Face Spaces, we'll use the default port 7860
 if __name__ == "__main__":
-    print("Starting Nigerian TTS API server...")
     uvicorn.run(app, host="0.0.0.0", port=7860)

+import os
+from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
+import sys
+import time
+import numpy as np
 import torch
+import logging
+from pathlib import Path
+# Setup logging
+logging.basicConfig(level=logging.INFO,
+                    format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+# Set absolute paths for model files
+MODEL_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models")
+MODEL_CHECKPOINT = os.path.join(MODEL_DIR, "wavtokenizer_large_speech_320_24k.ckpt")
+MODEL_CONFIG = os.path.join(MODEL_DIR, "wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml")
+# Check that model files exist
+if not os.path.exists(MODEL_CHECKPOINT):
+    logger.error(f"Model checkpoint not found: {MODEL_CHECKPOINT}")
+    raise FileNotFoundError(f"Model checkpoint not found: {MODEL_CHECKPOINT}")
+if not os.path.exists(MODEL_CONFIG):
+    logger.error(f"Model config not found: {MODEL_CONFIG}")
+    raise FileNotFoundError(f"Model config not found: {MODEL_CONFIG}")
+logger.info(f"Loading YarnGPT model from {MODEL_CHECKPOINT} and {MODEL_CONFIG}")
+# Import TTS modules only after verifying files exist
+try:
+    from yarngpt.generate import generate_audio, save_audio
+    logger.info("Successfully imported yarngpt modules")
+except ImportError as e:
+    logger.error(f"Failed to import YarnGPT modules: {e}")
+    raise
+# Create FastAPI app
+app = FastAPI(title="YarnGPT TTS API")
+# Configure CORS
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["*"],  # Allow all origins
     allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
 )
 class TTSRequest(BaseModel):
     text: str
+    temperature: float = 0.2
+    top_p: float = 0.7
+    top_k: int = 50
+    speed: float = 1.0
+    seed: int = 42
 @app.get("/")
+def read_root():
+    return {"message": "YarnGPT TTS API is running. Send POST requests to /tts endpoint."}
+@app.post("/tts")
+async def text_to_speech(request: TTSRequest):
     try:
+        logger.info(f"Processing TTS request: {request.text[:50]}...")
+        # Set random seed if provided
+        if request.seed is not None:
+            torch.manual_seed(request.seed)
+            np.random.seed(request.seed)
+        # Generate audio
+        start_time = time.time()
+        audio = generate_audio(
+            request.text,
+            checkpoint_path=MODEL_CHECKPOINT,
+            config_path=MODEL_CONFIG,
+            temperature=request.temperature,
+            top_p=request.top_p,
+            top_k=request.top_k,
+            speed=request.speed
         )
+        # Convert audio to base64
+        import base64
+        import io
+        audio_io = io.BytesIO()
+        save_audio(audio_io, audio, sample_rate=24000)
+        audio_io.seek(0)
+        audio_base64 = base64.b64encode(audio_io.read()).decode('utf-8')
+        generation_time = time.time() - start_time
+        logger.info(f"Generated audio in {generation_time:.2f} seconds")
+        return {
+            "audio": audio_base64,
+            "generation_time": generation_time
+        }
     except Exception as e:
+        logger.error(f"Error generating speech: {str(e)}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Error generating speech: {str(e)}")
+@app.get("/health")
+def health_check():
+    return {"status": "ok", "models_loaded": True}
+# For local testing
 if __name__ == "__main__":
+    import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)