KU_SW_Academy / main.py
heybaeheef's picture
Upload 9 files
3cc9d6f verified
raw
history blame
8.48 kB
"""
MagicPath AI Vocal Effects Server - DiffVox LLM ํ†ตํ•ฉ ๋ฒ„์ „
=========================================================
Dry ๋ณด์ปฌ ํŒŒ์ผ์„ ๋ฐ›์•„์„œ ํ•™์Šต๋œ AI๊ฐ€ ์ดํŽ™ํ„ฐ ํŒŒ๋ผ๋ฏธํ„ฐ๋ฅผ ์˜ˆ์ธกํ•˜๊ณ ,
์‹ค์ œ๋กœ ์ดํŽ™ํŠธ๋ฅผ ์ ์šฉํ•œ ์˜ค๋””์˜ค๋ฅผ ๋ฐ˜ํ™˜ํ•˜๋Š” ์„œ๋ฒ„
"""
from fastapi import FastAPI, UploadFile, File, Form, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse, JSONResponse
import tempfile
import os
import uuid
from pathlib import Path
# ๋‚ด๋ถ€ ๋ชจ๋“ˆ
from models.ai_effector import AIEffector
from audio_processing.effect_chain import EffectChain
# ============================================
# ์„ค์ •
# ============================================
# ํ•™์Šต๋œ ๋ชจ๋ธ ๊ฒฝ๋กœ (Hugging Face ๋ ˆํฌ ๋˜๋Š” ๋กœ์ปฌ ๊ฒฝ๋กœ)
MODEL_PATH = os.environ.get("DIFFVOX_MODEL_PATH", "heybaeheef/KU_SW_Academy")
BASE_MODEL_NAME = os.environ.get("BASE_MODEL_NAME", "Qwen/Qwen3-8B")
AUDIO_FEATURE_DIM = int(os.environ.get("AUDIO_FEATURE_DIM", "64"))
USE_HUGGINGFACE = os.environ.get("USE_HUGGINGFACE", "true").lower() == "true"
# ============================================
# FastAPI ์•ฑ ์ดˆ๊ธฐํ™”
# ============================================
app = FastAPI(
title="MagicPath AI Vocal Effects",
description="AI-powered vocal effect processing server (DiffVox LLM ํ†ตํ•ฉ)",
version="2.0.0"
)
# CORS ์„ค์ •
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # ๋ฐฐํฌ ์‹œ ํŠน์ • ๋„๋ฉ”์ธ์œผ๋กœ ์ œํ•œ ๊ถŒ์žฅ
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# ์ „์—ญ ๊ฐ์ฒด ์ดˆ๊ธฐํ™”
print("=" * 60)
print("MagicPath AI Vocal Effects Server v2.0")
print("=" * 60)
print(f"Model Path: {MODEL_PATH}")
print(f"Base Model: {BASE_MODEL_NAME}")
print(f"Audio Feature Dim: {AUDIO_FEATURE_DIM}")
print(f"Use Hugging Face: {USE_HUGGINGFACE}")
print("=" * 60)
ai_effector = AIEffector(
model_path=MODEL_PATH,
base_model_name=BASE_MODEL_NAME,
audio_feature_dim=AUDIO_FEATURE_DIM,
use_huggingface=USE_HUGGINGFACE
)
effect_chain = EffectChain()
# ์ž„์‹œ ํŒŒ์ผ ์ €์žฅ ๊ฒฝ๋กœ
TEMP_DIR = Path(tempfile.gettempdir()) / "magicpath"
TEMP_DIR.mkdir(exist_ok=True)
# ============================================
# API ์—”๋“œํฌ์ธํŠธ
# ============================================
@app.get("/")
async def root():
"""์„œ๋ฒ„ ์ •๋ณด"""
return {
"status": "running",
"message": "MagicPath AI Vocal Effects Server v2.0 (DiffVox LLM)",
"ai_model_loaded": ai_effector.is_loaded(),
"endpoints": {
"POST /process": "์˜ค๋””์˜ค ํŒŒ์ผ ์ฒ˜๋ฆฌ ํ›„ ๋ฐ˜ํ™˜",
"POST /predict": "ํŒŒ๋ผ๋ฏธํ„ฐ๋งŒ ์˜ˆ์ธก (JSON)",
"GET /health": "์„œ๋ฒ„ ์ƒํƒœ ํ™•์ธ"
}
}
@app.get("/health")
async def health_check():
"""์„œ๋ฒ„ ๋ฐ ๋ชจ๋ธ ์ƒํƒœ ํ™•์ธ"""
return {
"status": "healthy",
"ai_model_loaded": ai_effector.is_loaded(),
"supported_effects": effect_chain.get_available_effects(),
"model_path": MODEL_PATH,
"base_model": BASE_MODEL_NAME
}
@app.post("/predict")
async def predict_parameters(
audio: UploadFile = File(..., description="Dry ๋ณด์ปฌ ์˜ค๋””์˜ค ํŒŒ์ผ"),
prompt: str = Form("", description="ํ…์ŠคํŠธ ๋ช…๋ น (์˜ˆ: 'warm', 'bright')")
):
"""
AI ๋ชจ๋ธ๋กœ ์ดํŽ™ํ„ฐ ํŒŒ๋ผ๋ฏธํ„ฐ ์˜ˆ์ธก (์˜ค๋””์˜ค ์ฒ˜๋ฆฌ ์—†์ด)
- audio: wav, mp3 ๋“ฑ ์˜ค๋””์˜ค ํŒŒ์ผ
- prompt: ์›ํ•˜๋Š” ์‚ฌ์šด๋“œ ์„ค๋ช…
Returns: ์˜ˆ์ธก๋œ ์ดํŽ™ํ„ฐ ํŒŒ๋ผ๋ฏธํ„ฐ JSON
"""
try:
# ์ž„์‹œ ํŒŒ์ผ๋กœ ์ €์žฅ
input_path = TEMP_DIR / f"{uuid.uuid4()}_{audio.filename}"
with open(input_path, "wb") as f:
content = await audio.read()
f.write(content)
# AI ๋ชจ๋ธ๋กœ ํŒŒ๋ผ๋ฏธํ„ฐ ์˜ˆ์ธก
parameters = ai_effector.predict(
audio_path=str(input_path),
text_prompt=prompt
)
# ์ž„์‹œ ํŒŒ์ผ ์‚ญ์ œ
os.remove(input_path)
return JSONResponse(content={
"status": "success",
"prompt": prompt,
"ai_model_used": ai_effector.is_loaded(),
"parameters": parameters
})
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/process")
async def process_audio(
audio: UploadFile = File(..., description="Dry ๋ณด์ปฌ ์˜ค๋””์˜ค ํŒŒ์ผ"),
prompt: str = Form("", description="ํ…์ŠคํŠธ ๋ช…๋ น (์˜ˆ: 'warm', 'bright')")
):
"""
AI๊ฐ€ ์˜ˆ์ธกํ•œ ํŒŒ๋ผ๋ฏธํ„ฐ๋กœ ์‹ค์ œ ์˜ค๋””์˜ค ์ฒ˜๋ฆฌ
- audio: wav, mp3 ๋“ฑ ์˜ค๋””์˜ค ํŒŒ์ผ
- prompt: ์›ํ•˜๋Š” ์‚ฌ์šด๋“œ ์„ค๋ช…
Returns: ์ฒ˜๋ฆฌ๋œ ์˜ค๋””์˜ค ํŒŒ์ผ (wav)
"""
input_path = None
output_path = None
try:
# ์ž„์‹œ ํŒŒ์ผ ๊ฒฝ๋กœ ์ƒ์„ฑ
file_id = str(uuid.uuid4())
input_path = TEMP_DIR / f"{file_id}_input_{audio.filename}"
output_path = TEMP_DIR / f"{file_id}_output.wav"
# ์ž…๋ ฅ ํŒŒ์ผ ์ €์žฅ
with open(input_path, "wb") as f:
content = await audio.read()
f.write(content)
print(f"[Process] ์ž…๋ ฅ ํŒŒ์ผ: {input_path}")
print(f"[Process] ํ”„๋กฌํ”„ํŠธ: {prompt}")
# Step 1: AI ๋ชจ๋ธ๋กœ ํŒŒ๋ผ๋ฏธํ„ฐ ์˜ˆ์ธก
parameters = ai_effector.predict(
audio_path=str(input_path),
text_prompt=prompt
)
print(f"[Process] ์˜ˆ์ธก๋œ ํŒŒ๋ผ๋ฏธํ„ฐ: {len(parameters)}๊ฐœ")
# Step 2: ์ดํŽ™ํ„ฐ ์ฒด์ธ์œผ๋กœ ์˜ค๋””์˜ค ์ฒ˜๋ฆฌ
effect_chain.process(
input_path=str(input_path),
output_path=str(output_path),
parameters=parameters
)
# ์ž…๋ ฅ ํŒŒ์ผ ์‚ญ์ œ
os.remove(input_path)
# ์ฒ˜๋ฆฌ๋œ ์˜ค๋””์˜ค ๋ฐ˜ํ™˜
return FileResponse(
path=str(output_path),
media_type="audio/wav",
filename=f"processed_{audio.filename.rsplit('.', 1)[0]}.wav",
background=None
)
except Exception as e:
# ์—๋Ÿฌ ์‹œ ์ž„์‹œ ํŒŒ์ผ ์ •๋ฆฌ
if input_path and input_path.exists():
os.remove(input_path)
if output_path and output_path.exists():
os.remove(output_path)
print(f"[Process] โŒ ์—๋Ÿฌ: {e}")
import traceback
traceback.print_exc()
raise HTTPException(status_code=500, detail=str(e))
@app.post("/process_with_params")
async def process_audio_with_params(
audio: UploadFile = File(..., description="Dry ๋ณด์ปฌ ์˜ค๋””์˜ค ํŒŒ์ผ"),
prompt: str = Form("", description="ํ…์ŠคํŠธ ๋ช…๋ น")
):
"""
์˜ค๋””์˜ค ์ฒ˜๋ฆฌ + ์‚ฌ์šฉ๋œ ํŒŒ๋ผ๋ฏธํ„ฐ๋„ ํ•จ๊ป˜ ๋ฐ˜ํ™˜
Returns: JSON (์ฒ˜๋ฆฌ๋œ ์˜ค๋””์˜ค URL + ํŒŒ๋ผ๋ฏธํ„ฐ)
"""
input_path = None
output_path = None
try:
file_id = str(uuid.uuid4())
input_path = TEMP_DIR / f"{file_id}_input_{audio.filename}"
output_path = TEMP_DIR / f"{file_id}_output.wav"
with open(input_path, "wb") as f:
content = await audio.read()
f.write(content)
# AI ํŒŒ๋ผ๋ฏธํ„ฐ ์˜ˆ์ธก
parameters = ai_effector.predict(
audio_path=str(input_path),
text_prompt=prompt
)
# ์˜ค๋””์˜ค ์ฒ˜๋ฆฌ
effect_chain.process(
input_path=str(input_path),
output_path=str(output_path),
parameters=parameters
)
os.remove(input_path)
# Base64 ์ธ์ฝ”๋”ฉ์œผ๋กœ ์˜ค๋””์˜ค ๋ฐ˜ํ™˜ (๋˜๋Š” URL)
import base64
with open(output_path, "rb") as f:
audio_base64 = base64.b64encode(f.read()).decode('utf-8')
os.remove(output_path)
return JSONResponse(content={
"status": "success",
"prompt": prompt,
"ai_model_used": ai_effector.is_loaded(),
"parameters": parameters,
"audio_base64": audio_base64,
"audio_format": "wav"
})
except Exception as e:
if input_path and input_path.exists():
os.remove(input_path)
if output_path and output_path.exists():
os.remove(output_path)
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)