Spaces:

heybaeheef
/

KU_SW_Academy

Running on A10G

App Files Files Community

heybaeheef commited on 4 days ago

Commit

8212fa0

verified ·

1 Parent(s): 7d6c6f0

Upload 9 files

Browse files

Files changed (1) hide show

main.py +19 -59

main.py CHANGED Viewed

@@ -1,17 +1,16 @@
 """
 MagicPath AI Vocal Effects Server - DiffVox LLM 통합 버전
 =========================================================
-Dry 보컬 파일을 받아서 학습된 AI가 이펙터 파라미터를 예측하고,
-실제로 이펙트를 적용한 오디오를 반환하는 서버
 """
 from fastapi import FastAPI, UploadFile, File, Form, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import FileResponse, JSONResponse
 import tempfile
 import os
 import uuid
-MODEL_PATH = os.environ.get("DIFFVOX_MODEL_PATH", "heybaeheef/KU_SW_Academy/checkpoints")
 # 내부 모듈
 from models.ai_effector import AIEffector
@@ -21,12 +20,16 @@ from audio_processing.effect_chain import EffectChain
 # 설정
 # ============================================
-# 학습된 모델 경로 (Hugging Face 레포 또는 로컬 경로)
-MODEL_PATH = os.environ.get("DIFFVOX_MODEL_PATH", "heybaeheef/KU_SW_Academy")
 BASE_MODEL_NAME = os.environ.get("BASE_MODEL_NAME", "Qwen/Qwen3-8B")
 AUDIO_FEATURE_DIM = int(os.environ.get("AUDIO_FEATURE_DIM", "64"))
 USE_HUGGINGFACE = os.environ.get("USE_HUGGINGFACE", "true").lower() == "true"
 # ============================================
 # FastAPI 앱 초기화
 # ============================================
@@ -40,7 +43,7 @@ app = FastAPI(
 # CORS 설정
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"],  # 배포 시 특정 도메인으로 제한 권장
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
@@ -64,10 +67,6 @@ ai_effector = AIEffector(
 )
 effect_chain = EffectChain()
-# 임시 파일 저장 경로
-TEMP_DIR = Path(tempfile.gettempdir()) / "magicpath"
-TEMP_DIR.mkdir(exist_ok=True)
 # ============================================
 # API 엔드포인트
@@ -80,9 +79,11 @@ async def root():
         "status": "running",
         "message": "MagicPath AI Vocal Effects Server v2.0 (DiffVox LLM)",
         "ai_model_loaded": ai_effector.is_loaded(),
         "endpoints": {
             "POST /process": "오디오 파일 처리 후 반환",
             "POST /predict": "파라미터만 예측 (JSON)",
             "GET /health": "서버 상태 확인"
         }
     }
@@ -105,28 +106,18 @@ async def predict_parameters(
     audio: UploadFile = File(..., description="Dry 보컬 오디오 파일"),
     prompt: str = Form("", description="텍스트 명령 (예: 'warm', 'bright')")
 ):
-    """
-    AI 모델로 이펙터 파라미터 예측 (오디오 처리 없이)
-    - audio: wav, mp3 등 오디오 파일
-    - prompt: 원하는 사운드 설명
-    Returns: 예측된 이펙터 파라미터 JSON
-    """
     try:
-        # 임시 파일로 저장
         input_path = TEMP_DIR / f"{uuid.uuid4()}_{audio.filename}"
         with open(input_path, "wb") as f:
             content = await audio.read()
             f.write(content)
-        # AI 모델로 파라미터 예측
         parameters = ai_effector.predict(
             audio_path=str(input_path),
             text_prompt=prompt
         )
-        # 임시 파일 삭제
         os.remove(input_path)
         return JSONResponse(content={
@@ -145,50 +136,32 @@ async def process_audio(
     audio: UploadFile = File(..., description="Dry 보컬 오디오 파일"),
     prompt: str = Form("", description="텍스트 명령 (예: 'warm', 'bright')")
 ):
-    """
-    AI가 예측한 파라미터로 실제 오디오 처리
-    - audio: wav, mp3 등 오디오 파일
-    - prompt: 원하는 사운드 설명
-    Returns: 처리된 오디오 파일 (wav)
-    """
     input_path = None
     output_path = None
     try:
-        # 임시 파일 경로 생성
         file_id = str(uuid.uuid4())
         input_path = TEMP_DIR / f"{file_id}_input_{audio.filename}"
         output_path = TEMP_DIR / f"{file_id}_output.wav"
-        # 입력 파일 저장
         with open(input_path, "wb") as f:
             content = await audio.read()
             f.write(content)
-        print(f"[Process] 입력 파일: {input_path}")
-        print(f"[Process] 프롬프트: {prompt}")
-        # Step 1: AI 모델로 파라미터 예측
         parameters = ai_effector.predict(
             audio_path=str(input_path),
             text_prompt=prompt
         )
-        print(f"[Process] 예측된 파라미터: {len(parameters)}개")
-        # Step 2: 이펙터 체인���로 오디오 처리
         effect_chain.process(
             input_path=str(input_path),
             output_path=str(output_path),
             parameters=parameters
         )
-        # 입력 파일 삭제
         os.remove(input_path)
-        # 처리된 오디오 반환
         return FileResponse(
             path=str(output_path),
             media_type="audio/wav",
@@ -197,15 +170,10 @@ async def process_audio(
         )
     except Exception as e:
-        # 에러 시 임시 파일 정리
-        if input_path and input_path.exists():
             os.remove(input_path)
-        if output_path and output_path.exists():
             os.remove(output_path)
-        print(f"[Process] ❌ 에러: {e}")
-        import traceback
-        traceback.print_exc()
         raise HTTPException(status_code=500, detail=str(e))
@@ -214,11 +182,7 @@ async def process_audio_with_params(
     audio: UploadFile = File(..., description="Dry 보컬 오디오 파일"),
     prompt: str = Form("", description="텍스트 명령")
 ):
-    """
-    오디오 처리 + 사용된 파라미터도 함께 반환
-    Returns: JSON (처리된 오디오 URL + 파라미터)
-    """
     input_path = None
     output_path = None
@@ -231,13 +195,11 @@ async def process_audio_with_params(
             content = await audio.read()
             f.write(content)
-        # AI 파라미터 예측
         parameters = ai_effector.predict(
             audio_path=str(input_path),
             text_prompt=prompt
         )
-        # 오디오 처리
         effect_chain.process(
             input_path=str(input_path),
             output_path=str(output_path),
@@ -246,8 +208,6 @@ async def process_audio_with_params(
         os.remove(input_path)
-        # Base64 인코딩으로 오디오 반환 (또는 URL)
-        import base64
         with open(output_path, "rb") as f:
             audio_base64 = base64.b64encode(f.read()).decode('utf-8')
@@ -263,13 +223,13 @@ async def process_audio_with_params(
         })
     except Exception as e:
-        if input_path and input_path.exists():
             os.remove(input_path)
-        if output_path and output_path.exists():
             os.remove(output_path)
         raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=8000)

 """
 MagicPath AI Vocal Effects Server - DiffVox LLM 통합 버전
 =========================================================
 """
 from fastapi import FastAPI, UploadFile, File, Form, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import FileResponse, JSONResponse
+from pathlib import Path
 import tempfile
 import os
 import uuid
+import base64
 # 내부 모듈
 from models.ai_effector import AIEffector
 # 설정
 # ============================================
+# 학습된 모델 경로 - checkpoints 폴더 포함!
+MODEL_PATH = os.environ.get("DIFFVOX_MODEL_PATH", "heybaeheef/KU_SW_Academy/checkpoints")
 BASE_MODEL_NAME = os.environ.get("BASE_MODEL_NAME", "Qwen/Qwen3-8B")
 AUDIO_FEATURE_DIM = int(os.environ.get("AUDIO_FEATURE_DIM", "64"))
 USE_HUGGINGFACE = os.environ.get("USE_HUGGINGFACE", "true").lower() == "true"
+# 임시 파일 저장 경로 - 먼저 정의
+TEMP_DIR = Path(tempfile.gettempdir()) / "magicpath"
+TEMP_DIR.mkdir(exist_ok=True)
 # ============================================
 # FastAPI 앱 초기화
 # ============================================
 # CORS 설정
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["*"],
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 )
 effect_chain = EffectChain()
 # ============================================
 # API 엔드포인트
         "status": "running",
         "message": "MagicPath AI Vocal Effects Server v2.0 (DiffVox LLM)",
         "ai_model_loaded": ai_effector.is_loaded(),
+        "model_path": MODEL_PATH,
         "endpoints": {
             "POST /process": "오디오 파일 처리 후 반환",
             "POST /predict": "파라미터만 예측 (JSON)",
+            "POST /process_with_params": "오디오 처리 + 파라미터 반환",
             "GET /health": "서버 상태 확인"
         }
     }
     audio: UploadFile = File(..., description="Dry 보컬 오디오 파일"),
     prompt: str = Form("", description="텍스트 명령 (예: 'warm', 'bright')")
 ):
+    """AI 모델로 이펙터 파라미터 예측"""
     try:
         input_path = TEMP_DIR / f"{uuid.uuid4()}_{audio.filename}"
         with open(input_path, "wb") as f:
             content = await audio.read()
             f.write(content)
         parameters = ai_effector.predict(
             audio_path=str(input_path),
             text_prompt=prompt
         )
         os.remove(input_path)
         return JSONResponse(content={
     audio: UploadFile = File(..., description="Dry 보컬 오디오 파일"),
     prompt: str = Form("", description="텍스트 명령 (예: 'warm', 'bright')")
 ):
+    """AI가 예측한 파라미터로 실제 오디오 처리"""
     input_path = None
     output_path = None
     try:
         file_id = str(uuid.uuid4())
         input_path = TEMP_DIR / f"{file_id}_input_{audio.filename}"
         output_path = TEMP_DIR / f"{file_id}_output.wav"
         with open(input_path, "wb") as f:
             content = await audio.read()
             f.write(content)
         parameters = ai_effector.predict(
             audio_path=str(input_path),
             text_prompt=prompt
         )
         effect_chain.process(
             input_path=str(input_path),
             output_path=str(output_path),
             parameters=parameters
         )
         os.remove(input_path)
         return FileResponse(
             path=str(output_path),
             media_type="audio/wav",
         )
     except Exception as e:
+        if input_path and Path(input_path).exists():
             os.remove(input_path)
+        if output_path and Path(output_path).exists():
             os.remove(output_path)
         raise HTTPException(status_code=500, detail=str(e))
     audio: UploadFile = File(..., description="Dry 보컬 오디오 파일"),
     prompt: str = Form("", description="텍스트 명령")
 ):
+    """오디오 처리 + 사용된 파라미터도 함께 반환"""
     input_path = None
     output_path = None
             content = await audio.read()
             f.write(content)
         parameters = ai_effector.predict(
             audio_path=str(input_path),
             text_prompt=prompt
         )
         effect_chain.process(
             input_path=str(input_path),
             output_path=str(output_path),
         os.remove(input_path)
         with open(output_path, "rb") as f:
             audio_base64 = base64.b64encode(f.read()).decode('utf-8')
         })
     except Exception as e:
+        if input_path and Path(input_path).exists():
             os.remove(input_path)
+        if output_path and Path(output_path).exists():
             os.remove(output_path)
         raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)