""" MagicPath AI Vocal Effects Server - DiffVox LLM 통합 버전 ========================================================= """ from fastapi import FastAPI, UploadFile, File, Form, HTTPException from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import FileResponse, JSONResponse from pathlib import Path import tempfile import os import uuid import base64 import logging from datetime import datetime # 로깅 설정 logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) print(f"\n===== Application Startup at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====\n") # 내부 모듈 from models.ai_effector import AIEffector from audio_processing.effect_chain import EffectChain # ============================================ # 설정 # ============================================ # 학습된 모델 경로 - repo_id와 subfolder 분리! MODEL_REPO_ID = os.environ.get("DIFFVOX_MODEL_REPO", "heybaeheef/KU_SW_Academy") MODEL_SUBFOLDER = os.environ.get("DIFFVOX_MODEL_SUBFOLDER", "checkpoints") BASE_MODEL_NAME = os.environ.get("BASE_MODEL_NAME", "Qwen/Qwen3-8B") AUDIO_FEATURE_DIM = int(os.environ.get("AUDIO_FEATURE_DIM", "64")) USE_HUGGINGFACE = os.environ.get("USE_HUGGINGFACE", "true").lower() == "true" # 임시 파일 저장 경로 TEMP_DIR = Path(tempfile.gettempdir()) / "magicpath" TEMP_DIR.mkdir(exist_ok=True) # ============================================ # FastAPI 앱 초기화 # ============================================ app = FastAPI( title="MagicPath AI Vocal Effects", description="AI-powered vocal effect processing server (DiffVox LLM 통합)", version="2.0.0" ) # CORS 설정 app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # 전역 객체 초기화 print("=" * 60) print("MagicPath AI Vocal Effects Server v2.0") print("=" * 60) print(f"Model Repo: {MODEL_REPO_ID}") print(f"Model Subfolder: {MODEL_SUBFOLDER}") print(f"Base Model: {BASE_MODEL_NAME}") print(f"Audio Feature Dim: {AUDIO_FEATURE_DIM}") print(f"Use Hugging Face: {USE_HUGGINGFACE}") print("=" * 60) ai_effector = AIEffector( model_repo_id=MODEL_REPO_ID, model_subfolder=MODEL_SUBFOLDER, base_model_name=BASE_MODEL_NAME, audio_feature_dim=AUDIO_FEATURE_DIM, use_huggingface=USE_HUGGINGFACE ) effect_chain = EffectChain() # ============================================ # API 엔드포인트 # ============================================ @app.get("/") async def root(): """서버 정보""" return { "status": "running", "message": "MagicPath AI Vocal Effects Server v2.0 (DiffVox LLM)", "ai_model_loaded": ai_effector.is_loaded(), "model_repo": MODEL_REPO_ID, "model_subfolder": MODEL_SUBFOLDER, "endpoints": { "POST /process": "오디오 파일 처리 후 반환", "POST /predict": "파라미터만 예측 (JSON)", "POST /process_with_params": "오디오 처리 + 파라미터 반환", "GET /health": "서버 상태 확인" } } @app.get("/health") async def health_check(): """서버 및 모델 상태 확인""" return { "status": "healthy", "ai_model_loaded": ai_effector.is_loaded(), "supported_effects": effect_chain.get_available_effects(), "model_repo": MODEL_REPO_ID, "base_model": BASE_MODEL_NAME } @app.post("/predict") async def predict_parameters( audio: UploadFile = File(..., description="Dry 보컬 오디오 파일"), prompt: str = Form("", description="텍스트 명령 (예: 'warm', 'bright')") ): """AI 모델로 이펙터 파라미터 예측""" try: input_path = TEMP_DIR / f"{uuid.uuid4()}_{audio.filename}" with open(input_path, "wb") as f: content = await audio.read() f.write(content) parameters = ai_effector.predict( audio_path=str(input_path), text_prompt=prompt ) os.remove(input_path) return JSONResponse(content={ "status": "success", "prompt": prompt, "ai_model_used": ai_effector.is_loaded(), "parameters": parameters }) except Exception as e: logger.error(f"Predict error: {e}") raise HTTPException(status_code=500, detail=str(e)) @app.post("/process") async def process_audio( audio: UploadFile = File(..., description="Dry 보컬 오디오 파일"), prompt: str = Form("", description="텍스트 명령 (예: 'warm', 'bright')") ): """AI가 예측한 파라미터로 실제 오디오 처리""" input_path = None output_path = None try: file_id = str(uuid.uuid4()) input_path = TEMP_DIR / f"{file_id}_input_{audio.filename}" output_path = TEMP_DIR / f"{file_id}_output.wav" with open(input_path, "wb") as f: content = await audio.read() f.write(content) parameters = ai_effector.predict( audio_path=str(input_path), text_prompt=prompt ) effect_chain.process( input_path=str(input_path), output_path=str(output_path), parameters=parameters ) os.remove(input_path) return FileResponse( path=str(output_path), media_type="audio/wav", filename=f"processed_{audio.filename.rsplit('.', 1)[0]}.wav", background=None ) except Exception as e: logger.error(f"Process error: {e}") if input_path and Path(input_path).exists(): os.remove(input_path) if output_path and Path(output_path).exists(): os.remove(output_path) raise HTTPException(status_code=500, detail=str(e)) @app.post("/process_with_params") async def process_audio_with_params( audio: UploadFile = File(..., description="Dry 보컬 오디오 파일"), prompt: str = Form("", description="텍스트 명령") ): """오디오 처리 + 사용된 파라미터도 함께 반환""" input_path = None output_path = None try: file_id = str(uuid.uuid4()) input_path = TEMP_DIR / f"{file_id}_input_{audio.filename}" output_path = TEMP_DIR / f"{file_id}_output.wav" with open(input_path, "wb") as f: content = await audio.read() f.write(content) parameters = ai_effector.predict( audio_path=str(input_path), text_prompt=prompt ) effect_chain.process( input_path=str(input_path), output_path=str(output_path), parameters=parameters ) os.remove(input_path) with open(output_path, "rb") as f: audio_base64 = base64.b64encode(f.read()).decode('utf-8') os.remove(output_path) return JSONResponse(content={ "status": "success", "prompt": prompt, "ai_model_used": ai_effector.is_loaded(), "parameters": parameters, "audio_base64": audio_base64, "audio_format": "wav" }) except Exception as e: logger.error(f"Process with params error: {e}") if input_path and Path(input_path).exists(): os.remove(input_path) if output_path and Path(output_path).exists(): os.remove(output_path) raise HTTPException(status_code=500, detail=str(e)) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)