Spaces:

Hameed13
/

Huggingface_News_Podcast

Build error

App Files Files Community

Hameed13 commited on May 19, 2025

Commit

809bcb3

verified ·

1 Parent(s): 6b625c9

Update main.py

Browse files

Files changed (1) hide show

main.py +92 -132

main.py CHANGED Viewed

@@ -1,47 +1,40 @@
 from fastapi import FastAPI, HTTPException, BackgroundTasks
-from fastapi.responses import FileResponse, JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 import os
-import sys
-import time
 import torch
 import torchaudio
 import base64
 from transformers import AutoModelForCausalLM
-from huggingface_hub import hf_hub_download
-import logging
 from datetime import datetime, timedelta
-import uuid
-from typing import Optional
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-)
-logger = logging.getLogger(__name__)
-# Initialize FastAPI app
-app = FastAPI(
-    title="Nigerian TTS API",
-    description="API for Nigerian Text-to-Speech using YarnGPT",
-    version="1.0.0"
-)
-# Add CORS middleware
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"],
     allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
 )
-# Constants
-MODEL_ID = "saheedniyi/YarnGPT2"
-AUDIO_DIR = "audio_files"
-os.makedirs(AUDIO_DIR, exist_ok=True)
 # Available voices and languages
 AVAILABLE_VOICES = {
@@ -50,150 +43,117 @@ AVAILABLE_VOICES = {
 }
 AVAILABLE_LANGUAGES = ["english", "yoruba", "igbo", "hausa"]
-# Model initialization
-def initialize_model():
-    try:
-        logger.info("Loading YarnGPT model and tokenizer...")
-        # Download necessary files from HuggingFace Hub
-        wav_tokenizer_config = hf_hub_download(
-            repo_id=MODEL_ID,
-            filename="wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml"
-        )
-        wav_tokenizer_model = hf_hub_download(
-            repo_id=MODEL_ID,
-            filename="wavtokenizer_large_speech_320_24k.ckpt"
-        )
-        # Import AudioTokenizer here to ensure files are downloaded first
-        from yarngpt.audiotokenizer import AudioTokenizerV2
-        audio_tokenizer = AudioTokenizerV2(
-            MODEL_ID,
-            wav_tokenizer_model,
-            wav_tokenizer_config
-        )
-        model = AutoModelForCausalLM.from_pretrained(
-            MODEL_ID,
-            torch_dtype="auto"
-        ).to(audio_tokenizer.device)
-        logger.info("Model loaded successfully!")
-        return audio_tokenizer, model
-    except Exception as e:
-        logger.error(f"Error initializing model: {str(e)}")
-        raise
-# Initialize model at startup
-audio_tokenizer, model = initialize_model()
-# Pydantic models
 class TTSRequest(BaseModel):
     text: str
     language: str = "english"
     voice: str = "idera"
 class TTSResponse(BaseModel):
-    audio_base64: str
     text: str
     voice: str
     language: str
-# Cleanup function
-def cleanup_old_files(max_age_hours: int = 6):
-    """Delete audio files older than specified hours"""
-    try:
-        now = datetime.now()
-        for filename in os.listdir(AUDIO_DIR):
-            if not filename.endswith(".wav"):
-                continue
-            file_path = os.path.join(AUDIO_DIR, filename)
-            file_mod_time = datetime.fromtimestamp(os.path.getmtime(file_path))
-            if now - file_mod_time > timedelta(hours=max_age_hours):
-                os.remove(file_path)
-                logger.info(f"Deleted old audio file: {filename}")
-    except Exception as e:
-        logger.error(f"Error cleaning up files: {str(e)}")
-# API endpoints
 @app.get("/")
 async def root():
-    """Health check endpoint"""
     return {
-        "status": "healthy",
-        "model": MODEL_ID,
         "available_languages": AVAILABLE_LANGUAGES,
         "available_voices": AVAILABLE_VOICES
     }
 @app.post("/tts", response_model=TTSResponse)
 async def text_to_speech(request: TTSRequest, background_tasks: BackgroundTasks):
-    """Generate Nigerian-accented speech from text"""
     # Validate inputs
     if request.language not in AVAILABLE_LANGUAGES:
-        raise HTTPException(
-            status_code=400,
-            detail=f"Language must be one of {AVAILABLE_LANGUAGES}"
-        )
     all_voices = AVAILABLE_VOICES["female"] + AVAILABLE_VOICES["male"]
     if request.voice not in all_voices:
-        raise HTTPException(
-            status_code=400,
-            detail=f"Voice must be one of {all_voices}"
-        )
     try:
-        # Generate unique filename
-        audio_id = str(uuid.uuid4())
-        output_path = os.path.join(AUDIO_DIR, f"{audio_id}.wav")
-        # Generate audio
-        prompt = audio_tokenizer.create_prompt(
-            request.text,
-            lang=request.language,
-            speaker_name=request.voice
-        )
         input_ids = audio_tokenizer.tokenize_prompt(prompt)
-        with torch.no_grad():
-            output = model.generate(
-                input_ids=input_ids,
-                temperature=0.1,
-                repetition_penalty=1.1,
-                max_length=4000,
-            )
         codes = audio_tokenizer.get_codes(output)
         audio = audio_tokenizer.get_audio(codes)
         # Save audio file
         torchaudio.save(output_path, audio, sample_rate=24000)
-        # Read and encode as base64
         with open(output_path, "rb") as audio_file:
             audio_bytes = audio_file.read()
             audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
-        # Schedule cleanup
         background_tasks.add_task(cleanup_old_files)
         return TTSResponse(
             audio_base64=audio_base64,
             text=request.text,
             voice=request.voice,
             language=request.language
         )
     except Exception as e:
-        logger.error(f"Error generating audio: {str(e)}")
-        raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":
-    import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)

 from fastapi import FastAPI, HTTPException, BackgroundTasks
+from fastapi.responses import FileResponse
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 import os
+import uuid
 import torch
 import torchaudio
 import base64
 from transformers import AutoModelForCausalLM
+from yarngpt.audiotokenizer import AudioTokenizerV2
+import uvicorn
 from datetime import datetime, timedelta
+app = FastAPI(title="Nigerian TTS API")
+# Add CORS middleware to allow requests from any origin
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["*"],  # Allows all origins
     allow_credentials=True,
+    allow_methods=["*"],  # Allows all methods
+    allow_headers=["*"],  # Allows all headers
 )
+# Model configuration paths
+tokenizer_path = "saheedniyi/YarnGPT2"
+wav_tokenizer_config_path = "./wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml"
+wav_tokenizer_model_path = "./wavtokenizer_large_speech_320_24k.ckpt"
+# Initialize model (only once when the API starts)
+print("Loading YarnGPT model and tokenizer...")
+audio_tokenizer = AudioTokenizerV2(
+    tokenizer_path, wav_tokenizer_model_path, wav_tokenizer_config_path
+)
+model = AutoModelForCausalLM.from_pretrained(tokenizer_path, torch_dtype="auto").to(audio_tokenizer.device)
+print("Model loaded successfully!")
 # Available voices and languages
 AVAILABLE_VOICES = {
 }
 AVAILABLE_LANGUAGES = ["english", "yoruba", "igbo", "hausa"]
+# Input validation model
 class TTSRequest(BaseModel):
     text: str
     language: str = "english"
     voice: str = "idera"
+# Output model with base64-encoded audio
 class TTSResponse(BaseModel):
+    audio_base64: str  # Base64-encoded audio data
+    audio_url: str     # Keep for backward compatibility
     text: str
     voice: str
     language: str
 @app.get("/")
 async def root():
+    """API health check and info"""
     return {
+        "status": "ok",
+        "message": "Nigerian TTS API is running",
         "available_languages": AVAILABLE_LANGUAGES,
         "available_voices": AVAILABLE_VOICES
     }
 @app.post("/tts", response_model=TTSResponse)
 async def text_to_speech(request: TTSRequest, background_tasks: BackgroundTasks):
+    """Convert text to Nigerian-accented speech"""
     # Validate inputs
     if request.language not in AVAILABLE_LANGUAGES:
+        raise HTTPException(status_code=400, detail=f"Language must be one of {AVAILABLE_LANGUAGES}")
     all_voices = AVAILABLE_VOICES["female"] + AVAILABLE_VOICES["male"]
     if request.voice not in all_voices:
+        raise HTTPException(status_code=400, detail=f"Voice must be one of {all_voices}")
+    # Generate unique filename
+    audio_id = str(uuid.uuid4())
+    output_path = f"audio_files/{audio_id}.wav"
+    os.makedirs("audio_files", exist_ok=True)
     try:
+        # Create prompt and generate audio
+        prompt = audio_tokenizer.create_prompt(request.text, lang=request.language, speaker_name=request.voice)
         input_ids = audio_tokenizer.tokenize_prompt(prompt)
+        output = model.generate(
+            input_ids=input_ids,
+            temperature=0.1,
+            repetition_penalty=1.1,
+            max_length=4000,
+        )
         codes = audio_tokenizer.get_codes(output)
         audio = audio_tokenizer.get_audio(codes)
         # Save audio file
         torchaudio.save(output_path, audio, sample_rate=24000)
+        # Read the file and encode as base64
         with open(output_path, "rb") as audio_file:
             audio_bytes = audio_file.read()
             audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
+        # Clean up old files after a while
         background_tasks.add_task(cleanup_old_files)
         return TTSResponse(
             audio_base64=audio_base64,
+            audio_url=f"/audio/{audio_id}.wav",  # Keep for compatibility
             text=request.text,
             voice=request.voice,
             language=request.language
         )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error generating audio: {str(e)}")
+# File serving endpoint (keep for backward compatibility)
+@app.get("/audio/{filename}")
+async def get_audio(filename: str):
+    file_path = f"audio_files/{filename}"
+    if not os.path.exists(file_path):
+        raise HTTPException(status_code=404, detail="Audio file not found")
+    return FileResponse(file_path, media_type="audio/wav")
+# Cleanup function to remove old files
+def cleanup_old_files():
+    """Delete audio files older than 6 hours to manage disk space"""
+    try:
+        now = datetime.now()
+        audio_dir = "audio_files"
+        if not os.path.exists(audio_dir):
+            return
+        for filename in os.listdir(audio_dir):
+            if not filename.endswith(".wav"):
+                continue
+            file_path = os.path.join(audio_dir, filename)
+            file_mod_time = datetime.fromtimestamp(os.path.getmtime(file_path))
+            # Delete files older than 6 hours
+            if now - file_mod_time > timedelta(hours=6):
+                os.remove(file_path)
+                print(f"Deleted old audio file: {filename}")
     except Exception as e:
+        print(f"Error cleaning up old files: {e}")
+# For Hugging Face Spaces, we'll use the default port 7860
 if __name__ == "__main__":
+    print("Starting Nigerian TTS API server...")
     uvicorn.run(app, host="0.0.0.0", port=7860)