Aurator_test / main.py
akpande2's picture
Update main.py
717e842 verified
from fastapi import FastAPI, File, UploadFile, Form, HTTPException
from fastapi.responses import HTMLResponse, JSONResponse, FileResponse
from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware
import tempfile
import os
import uuid
from pathlib import Path
import logging
# Import your existing pipeline
from kid_coach_pipeline import EnhancedPublicSpeakingCoach
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
app = FastAPI(title="Aurator - AI Speech Coach")
# Enable CORS
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Create directories
static_dir = Path("static")
static_dir.mkdir(exist_ok=True)
audio_dir = static_dir / "audio"
audio_dir.mkdir(exist_ok=True)
app.mount("/static", StaticFiles(directory="static"), name="static")
# ============= WEB UI =============
@app.get("/", response_class=HTMLResponse)
async def home():
with open("index.html", "r") as f:
return f.read()
# ============= API ENDPOINTS =============
@app.post("/analyze")
async def EnhancedPublicSpeakingCoach(
audio_file: UploadFile = File(...),
avatar_gender: str = Form("male")
):
"""
Analyze speech from uploaded audio file
"""
try:
logger.info(f"Received audio file: {audio_file.filename}, Gender: {avatar_gender}")
# Save uploaded file
temp_id = str(uuid.uuid4())
# Support both .webm (from browser) and .wav
file_ext = ".webm" if audio_file.filename.endswith(".webm") else ".wav"
temp_input_path = f"/tmp/recording_{temp_id}{file_ext}"
with open(temp_input_path, "wb") as f:
content = await audio_file.read()
f.write(content)
logger.info(f"Saved to: {temp_input_path}, Size: {len(content)} bytes")
# Convert webm to wav if needed
if file_ext == ".webm":
import subprocess
temp_wav_path = f"/tmp/recording_{temp_id}.wav"
try:
subprocess.run([
'ffmpeg', '-i', temp_input_path,
'-ar', '16000', '-ac', '1', '-f', 'wav',
temp_wav_path
], check=True, capture_output=True)
analysis_path = temp_wav_path
logger.info(f"Converted to WAV: {temp_wav_path}")
except subprocess.CalledProcessError as e:
logger.error(f"FFmpeg conversion failed: {e.stderr.decode()}")
# Try to use original file
analysis_path = temp_input_path
else:
analysis_path = temp_input_path
# Run your analysis pipeline
logger.info("Starting analysis pipeline...")
results = EnhancedPublicSpeakingCoach(analysis_path, avatar_gender)
logger.info("Analysis complete")
# Clean up temp files
if os.path.exists(temp_input_path):
os.unlink(temp_input_path)
if file_ext == ".webm" and os.path.exists(temp_wav_path):
os.unlink(temp_wav_path)
return JSONResponse(content=results)
except Exception as e:
logger.error(f"Analysis error: {str(e)}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@app.get("/audio/{filename}")
async def get_audio(filename: str):
"""Serve generated audio files"""
file_path = audio_dir / filename
if file_path.exists():
return FileResponse(file_path)
raise HTTPException(status_code=404, detail="Audio file not found")
@app.get("/health")
async def health():
"""Health check"""
return {"status": "healthy", "service": "Aurator Speech Coach"}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)