from fastapi import FastAPI, File, UploadFile, Form, HTTPException from fastapi.responses import HTMLResponse, JSONResponse, FileResponse from fastapi.staticfiles import StaticFiles from fastapi.middleware.cors import CORSMiddleware import tempfile import os import uuid from pathlib import Path import logging # Import your existing pipeline from kid_coach_pipeline import EnhancedPublicSpeakingCoach # Setup logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) app = FastAPI(title="Aurator - AI Speech Coach") # Enable CORS app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Create directories static_dir = Path("static") static_dir.mkdir(exist_ok=True) audio_dir = static_dir / "audio" audio_dir.mkdir(exist_ok=True) app.mount("/static", StaticFiles(directory="static"), name="static") # ============= WEB UI ============= @app.get("/", response_class=HTMLResponse) async def home(): with open("index.html", "r") as f: return f.read() # ============= API ENDPOINTS ============= @app.post("/analyze") async def EnhancedPublicSpeakingCoach( audio_file: UploadFile = File(...), avatar_gender: str = Form("male") ): """ Analyze speech from uploaded audio file """ try: logger.info(f"Received audio file: {audio_file.filename}, Gender: {avatar_gender}") # Save uploaded file temp_id = str(uuid.uuid4()) # Support both .webm (from browser) and .wav file_ext = ".webm" if audio_file.filename.endswith(".webm") else ".wav" temp_input_path = f"/tmp/recording_{temp_id}{file_ext}" with open(temp_input_path, "wb") as f: content = await audio_file.read() f.write(content) logger.info(f"Saved to: {temp_input_path}, Size: {len(content)} bytes") # Convert webm to wav if needed if file_ext == ".webm": import subprocess temp_wav_path = f"/tmp/recording_{temp_id}.wav" try: subprocess.run([ 'ffmpeg', '-i', temp_input_path, '-ar', '16000', '-ac', '1', '-f', 'wav', temp_wav_path ], check=True, capture_output=True) analysis_path = temp_wav_path logger.info(f"Converted to WAV: {temp_wav_path}") except subprocess.CalledProcessError as e: logger.error(f"FFmpeg conversion failed: {e.stderr.decode()}") # Try to use original file analysis_path = temp_input_path else: analysis_path = temp_input_path # Run your analysis pipeline logger.info("Starting analysis pipeline...") results = EnhancedPublicSpeakingCoach(analysis_path, avatar_gender) logger.info("Analysis complete") # Clean up temp files if os.path.exists(temp_input_path): os.unlink(temp_input_path) if file_ext == ".webm" and os.path.exists(temp_wav_path): os.unlink(temp_wav_path) return JSONResponse(content=results) except Exception as e: logger.error(f"Analysis error: {str(e)}", exc_info=True) raise HTTPException(status_code=500, detail=str(e)) @app.get("/audio/{filename}") async def get_audio(filename: str): """Serve generated audio files""" file_path = audio_dir / filename if file_path.exists(): return FileResponse(file_path) raise HTTPException(status_code=404, detail="Audio file not found") @app.get("/health") async def health(): """Health check""" return {"status": "healthy", "service": "Aurator Speech Coach"} if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)