Spaces:

akpande2
/

Aurator_test

Paused

File size: 3,898 Bytes

from fastapi import FastAPI, File, UploadFile, Form, HTTPException
from fastapi.responses import HTMLResponse, JSONResponse, FileResponse
from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware
import tempfile
import os
import uuid
from pathlib import Path
import logging

# Import your existing pipeline
from kid_coach_pipeline import EnhancedPublicSpeakingCoach

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = FastAPI(title="Aurator - AI Speech Coach")

# Enable CORS
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Create directories
static_dir = Path("static")
static_dir.mkdir(exist_ok=True)
audio_dir = static_dir / "audio"
audio_dir.mkdir(exist_ok=True)

app.mount("/static", StaticFiles(directory="static"), name="static")

# ============= WEB UI =============
@app.get("/", response_class=HTMLResponse)
async def home():
    with open("index.html", "r") as f:
        return f.read()

# ============= API ENDPOINTS =============

@app.post("/analyze")
async def EnhancedPublicSpeakingCoach(
    audio_file: UploadFile = File(...),
    avatar_gender: str = Form("male")
):
    """
    Analyze speech from uploaded audio file
    """
    try:
        logger.info(f"Received audio file: {audio_file.filename}, Gender: {avatar_gender}")
        
        # Save uploaded file
        temp_id = str(uuid.uuid4())
        # Support both .webm (from browser) and .wav
        file_ext = ".webm" if audio_file.filename.endswith(".webm") else ".wav"
        temp_input_path = f"/tmp/recording_{temp_id}{file_ext}"
        
        with open(temp_input_path, "wb") as f:
            content = await audio_file.read()
            f.write(content)
            logger.info(f"Saved to: {temp_input_path}, Size: {len(content)} bytes")
        
        # Convert webm to wav if needed
        if file_ext == ".webm":
            import subprocess
            temp_wav_path = f"/tmp/recording_{temp_id}.wav"
            try:
                subprocess.run([
                    'ffmpeg', '-i', temp_input_path,
                    '-ar', '16000', '-ac', '1', '-f', 'wav',
                    temp_wav_path
                ], check=True, capture_output=True)
                analysis_path = temp_wav_path
                logger.info(f"Converted to WAV: {temp_wav_path}")
            except subprocess.CalledProcessError as e:
                logger.error(f"FFmpeg conversion failed: {e.stderr.decode()}")
                # Try to use original file
                analysis_path = temp_input_path
        else:
            analysis_path = temp_input_path
        
        # Run your analysis pipeline
        logger.info("Starting analysis pipeline...")
        results = EnhancedPublicSpeakingCoach(analysis_path, avatar_gender)
        logger.info("Analysis complete")
        
        # Clean up temp files
        if os.path.exists(temp_input_path):
            os.unlink(temp_input_path)
        if file_ext == ".webm" and os.path.exists(temp_wav_path):
            os.unlink(temp_wav_path)
        
        return JSONResponse(content=results)
        
    except Exception as e:
        logger.error(f"Analysis error: {str(e)}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/audio/{filename}")
async def get_audio(filename: str):
    """Serve generated audio files"""
    file_path = audio_dir / filename
    if file_path.exists():
        return FileResponse(file_path)
    raise HTTPException(status_code=404, detail="Audio file not found")

@app.get("/health")
async def health():
    """Health check"""
    return {"status": "healthy", "service": "Aurator Speech Coach"}

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)