Spaces:

heybaeheef
/

KU_SW_Academy

Paused

App Files Files Community

heybaeheef commited on Dec 10, 2025

Commit

5c6cdde

1 Parent(s): 8212fa0

Fix model loading with subfolder parameter

Browse files

Files changed (6) hide show

audio_processing/__init__.py +4 -0
audio_processing/effect_chain.py +131 -225
main.py +22 -7
models/__init__.py +4 -0
models/ai_effector.py +254 -328
requirements.txt +3 -0

audio_processing/__init__.py CHANGED Viewed

	@@ -0,0 +1,4 @@

+# audio_processing package
+from .effect_chain import EffectChain
+__all__ = ["EffectChain"]

audio_processing/effect_chain.py CHANGED Viewed

@@ -1,255 +1,161 @@
 """
-Audio Effect Chain
-==================
-실제 오디오에 이펙트를 적용하는 처리 체인
-pedalboard 라이브러리 사용 (Spotify에서 만든 오디오 플러그인 라이브러리)
-- 고품질 VST 수준의 이펙트
-- Python에서 쉽게 사용 가능
-- 실시간 처리도 가능
 """
 import numpy as np
-from pathlib import Path
-from typing import Dict, Any, List
 import soundfile as sf
-# pedalboard - 오디오 이펙트 라이브러리
 from pedalboard import (
-    Pedalboard,
-    Compressor,
     Gain,
-    LowShelfFilter,
     HighShelfFilter,
     PeakFilter,
     Delay,
     Reverb,
     Distortion,
-    Limiter,
-    HighpassFilter,
-    LowpassFilter
 )
-from pedalboard.io import AudioFile
 class EffectChain:
-    """오디오 이펙트 처리 체인"""
-    AVAILABLE_EFFECTS = [
-        "eq_lowshelf",
-        "eq_highshelf",
-        "eq_peak1",
-        "eq_peak2",
-        "compressor",
-        "distortion",
-        "delay",
-        "reverb",
-        "limiter"
-    ]
-    def __init__(self):
-        """이펙트 체인 초기화"""
-        pass
     def get_available_effects(self) -> List[str]:
-        """사용 가능한 이펙트 목록 반환"""
-        return self.AVAILABLE_EFFECTS.copy()
-    def process(
-        self,
-        input_path: str,
-        output_path: str,
-        parameters: Dict[str, float]
-    ) -> None:
-        """
-        오디오 파일에 이펙트 체인 적용
-        Args:
-            input_path: 입력 오디오 파일 경로
-            output_path: 출력 오디오 파일 경로
-            parameters: 이펙터 파라미터 딕셔너리
-        """
-        # 오디오 파일 읽기
-        audio, sample_rate = sf.read(input_path)
-        # 모노면 스테레오로 변환 (일부 이펙트가 스테레오 필요)
-        if len(audio.shape) == 1:
-            audio = np.column_stack([audio, audio])
-        # float32로 변환
-        audio = audio.astype(np.float32)
-        # 이펙트 체인 구성
-        board = self._build_pedalboard(parameters, sample_rate)
-        # 이펙트 적용
-        processed = board(audio, sample_rate)
-        # Wet/Dry 믹스 적용
-        wet_mix = parameters.get("final_wet_mix", 0.5)
-        final_audio = (1 - wet_mix) * audio + wet_mix * processed
-        # 클리핑 방지
-        final_audio = np.clip(final_audio, -1.0, 1.0)
-        # 출력 파일 저장
-        sf.write(output_path, final_audio, sample_rate)
-        print(f"[EffectChain] 처리 완료: {output_path}")
-    def _build_pedalboard(
-        self,
-        params: Dict[str, float],
-        sample_rate: int
-    ) -> Pedalboard:
-        """
-        파라미터로부터 pedalboard 이펙트 체인 구성
-        """
         effects = []
-        # === EQ Section ===
-        # Low Shelf EQ
-        if params.get("eq_lowshelf_gain", 0) != 0:
-            effects.append(
-                LowShelfFilter(
-                    cutoff_frequency_hz=params.get("eq_lowshelf_freq", 200),
-                    gain_db=params.get("eq_lowshelf_gain", 0),
-                    q=0.707
-                )
-            )
-        # High Shelf EQ
-        if params.get("eq_highshelf_gain", 0) != 0:
-            effects.append(
-                HighShelfFilter(
-                    cutoff_frequency_hz=params.get("eq_highshelf_freq", 8000),
-                    gain_db=params.get("eq_highshelf_gain", 0),
-                    q=0.707
-                )
-            )
-        # Peak EQ 1
-        if params.get("eq_peak1_gain", 0) != 0:
-            effects.append(
-                PeakFilter(
-                    cutoff_frequency_hz=params.get("eq_peak1_freq", 1000),
-                    gain_db=params.get("eq_peak1_gain", 0),
-                    q=params.get("eq_peak1_q", 1.0)
-                )
-            )
-        # Peak EQ 2
-        if params.get("eq_peak2_gain", 0) != 0:
-            effects.append(
-                PeakFilter(
-                    cutoff_frequency_hz=params.get("eq_peak2_freq", 3000),
-                    gain_db=params.get("eq_peak2_gain", 0),
-                    q=params.get("eq_peak2_q", 1.0)
-                )
-            )
-        # === Dynamics Section ===
-        # Compressor
-        threshold = params.get("compressor_threshold", -24)
-        ratio = params.get("compressor_ratio", 4.0)
-        if ratio > 1.0:
-            effects.append(
-                Compressor(
-                    threshold_db=threshold,
-                    ratio=ratio,
-                    attack_ms=params.get("compressor_attack", 5),
-                    release_ms=params.get("compressor_release", 50)
-                )
-            )
-            # Makeup Gain
-            makeup = params.get("compressor_makeup", 0)
-            if makeup != 0:
-                effects.append(Gain(gain_db=makeup))
-        # === Distortion Section ===
-        distortion_amount = params.get("distortion_amount", 0)
-        if distortion_amount > 0:
-            # pedalboard의 Distortion은 0-100 범위
-            effects.append(
-                Distortion(drive_db=distortion_amount * 40)  # 0-1 -> 0-40dB
-            )
-            # Distortion 후 톤 조절 (Tone = LPF)
-            tone = params.get("distortion_tone", 0.5)
-            lpf_freq = 2000 + tone * 10000  # 2kHz ~ 12kHz
-            effects.append(
-                LowpassFilter(cutoff_frequency_hz=lpf_freq)
-            )
-        # === Time-based Effects Section ===
         # Delay
-        delay_mix = params.get("delay_mix", 0)
-        if delay_mix > 0:
-            delay_time_ms = params.get("delay_time", 250)
-            effects.append(
-                Delay(
-                    delay_seconds=delay_time_ms / 1000,
-                    feedback=params.get("delay_feedback", 0.3),
-                    mix=delay_mix
-                )
-            )
-        # Reverb
-        reverb_wet = params.get("reverb_wet_dry", 0)
-        if reverb_wet > 0:
-            effects.append(
-                Reverb(
-                    room_size=params.get("reverb_room_size", 0.5),
-                    damping=params.get("reverb_damping", 0.5),
-                    wet_level=reverb_wet,
-                    dry_level=1 - reverb_wet,
-                    width=1.0
-                )
-            )
-        # === Output Section ===
-        # Limiter (클리핑 방지)
-        effects.append(
-            Limiter(
-                threshold_db=-1.0,
-                release_ms=100
-            )
-        )
         return Pedalboard(effects)
-    def process_realtime(
-        self,
-        audio_chunk: np.ndarray,
-        sample_rate: int,
         parameters: Dict[str, float]
-    ) -> np.ndarray:
-        """
-        실시간 오디오 청크 처리 (스트리밍용)
-        Args:
-            audio_chunk: 오디오 데이터 배열
-            sample_rate: 샘플레이트
-            parameters: 이펙터 파라미터
-        Returns:
-            처리된 오디오 청크
-        """
-        if len(audio_chunk.shape) == 1:
-            audio_chunk = np.column_stack([audio_chunk, audio_chunk])
-        audio_chunk = audio_chunk.astype(np.float32)
-        board = self._build_pedalboard(parameters, sample_rate)
-        processed = board(audio_chunk, sample_rate)
-        wet_mix = parameters.get("final_wet_mix", 0.5)
-        final = (1 - wet_mix) * audio_chunk + wet_mix * processed
-        return np.clip(final, -1.0, 1.0)

 """
+Effect Chain - Pedalboard 기반 오디오 이펙트 처리
+=================================================
 """
 import numpy as np
 import soundfile as sf
+from typing import Dict, List, Optional
 from pedalboard import (
+    Pedalboard,
+    Compressor,
     Gain,
     HighShelfFilter,
+    LowShelfFilter,
     PeakFilter,
     Delay,
     Reverb,
     Distortion,
+    Limiter
 )
 class EffectChain:
+    """Pedalboard 기반 이펙트 체인"""
+    def __init__(self, sample_rate: int = 44100):
+        self.sample_rate = sample_rate
+        self.available_effects = [
+            "eq_peak1", "eq_peak2",
+            "eq_lowshelf", "eq_highshelf",
+            "distortion", "delay", "compressor",
+            "reverb", "limiter"
+        ]
     def get_available_effects(self) -> List[str]:
+        """사용 가능한 이펙트 목록"""
+        return self.available_effects
+    def _build_pedalboard(self, params: Dict[str, float]) -> Pedalboard:
+        """파라미터로 Pedalboard 구성"""
         effects = []
+        # Compressor (항상 적용)
+        effects.append(Compressor(
+            threshold_db=-18.0,
+            ratio=2.0,
+            attack_ms=10.0,
+            release_ms=100.0
+        ))
+        # EQ Peak 1
+        freq1 = params.get("eq_peak1.params.freq", 1000.0)
+        gain1 = params.get("eq_peak1.params.gain", 0.0)
+        q1 = params.get("eq_peak1.params.q", 1.0)
+        if abs(gain1) > 0.1:
+            effects.append(PeakFilter(
+                cutoff_frequency_hz=max(20, min(20000, freq1)),
+                gain_db=max(-12, min(12, gain1)),
+                q=max(0.1, min(10, q1))
+            ))
+        # EQ Peak 2
+        freq2 = params.get("eq_peak2.params.freq", 4000.0)
+        gain2 = params.get("eq_peak2.params.gain", 0.0)
+        q2 = params.get("eq_peak2.params.q", 1.0)
+        if abs(gain2) > 0.1:
+            effects.append(PeakFilter(
+                cutoff_frequency_hz=max(20, min(20000, freq2)),
+                gain_db=max(-12, min(12, gain2)),
+                q=max(0.1, min(10, q2))
+            ))
+        # Low Shelf
+        freq_low = params.get("eq_lowshelf.params.freq", 200.0)
+        gain_low = params.get("eq_lowshelf.params.gain", 0.0)
+        if abs(gain_low) > 0.1:
+            effects.append(LowShelfFilter(
+                cutoff_frequency_hz=max(20, min(2000, freq_low)),
+                gain_db=max(-12, min(12, gain_low)),
+                q=0.707
+            ))
+        # High Shelf
+        freq_high = params.get("eq_highshelf.params.freq", 8000.0)
+        gain_high = params.get("eq_highshelf.params.gain", 0.0)
+        if abs(gain_high) > 0.1:
+            effects.append(HighShelfFilter(
+                cutoff_frequency_hz=max(1000, min(20000, freq_high)),
+                gain_db=max(-12, min(12, gain_high)),
+                q=0.707
+            ))
+        # Distortion
+        dist_amount = params.get("distortion_amount", 0.0)
+        if dist_amount > 0.01:
+            effects.append(Distortion(
+                drive_db=max(0, min(20, dist_amount * 100))
+            ))
         # Delay
+        delay_time = params.get("delay.delay_time", 0.02)
+        delay_feedback = params.get("delay.feedback", 0.3)
+        delay_mix = params.get("delay.mix", 0.2)
+        if delay_mix > 0.01:
+            effects.append(Delay(
+                delay_seconds=max(0.01, min(1.0, delay_time)),
+                feedback=max(0.0, min(0.9, delay_feedback)),
+                mix=max(0.0, min(1.0, delay_mix))
+            ))
+        # Limiter (항상 마지막에)
+        effects.append(Limiter(threshold_db=-1.0))
         return Pedalboard(effects)
+    def process(
+        self,
+        input_path: str,
+        output_path: str,
         parameters: Dict[str, float]
+    ) -> bool:
+        """오디오 파일 처리"""
+        try:
+            # 오디오 로드
+            audio, sr = sf.read(input_path)
+            # 모노/스테레오 처리
+            if len(audio.shape) == 1:
+                audio = audio.reshape(-1, 1)
+            # float32로 변환
+            audio = audio.astype(np.float32)
+            # Pedalboard 구성
+            board = self._build_pedalboard(parameters)
+            # 처리
+            processed = board(audio, sr)
+            # Wet/Dry 믹스
+            wet_mix = parameters.get("final_wet_mix", 0.5)
+            wet_mix = max(0.0, min(1.0, wet_mix))
+            # 길이 맞추기
+            min_len = min(len(audio), len(processed))
+            output = audio[:min_len] * (1 - wet_mix) + processed[:min_len] * wet_mix
+            # 클리핑 방지
+            output = np.clip(output, -1.0, 1.0)
+            # 저장
+            sf.write(output_path, output, sr)
+            print(f"[EffectChain] ✅ 처리 완료: {output_path}")
+            return True
+        except Exception as e:
+            print(f"[EffectChain] ❌ 처리 실패: {e}")
+            raise e

main.py CHANGED Viewed

@@ -11,6 +11,14 @@ import tempfile
 import os
 import uuid
 import base64
 # 내부 모듈
 from models.ai_effector import AIEffector
@@ -20,13 +28,14 @@ from audio_processing.effect_chain import EffectChain
 # 설정
 # ============================================
-# 학습된 모델 경로 - checkpoints 폴더 포함!
-MODEL_PATH = os.environ.get("DIFFVOX_MODEL_PATH", "heybaeheef/KU_SW_Academy/checkpoints")
 BASE_MODEL_NAME = os.environ.get("BASE_MODEL_NAME", "Qwen/Qwen3-8B")
 AUDIO_FEATURE_DIM = int(os.environ.get("AUDIO_FEATURE_DIM", "64"))
 USE_HUGGINGFACE = os.environ.get("USE_HUGGINGFACE", "true").lower() == "true"
-# 임시 파일 저장 경로 - 먼저 정의
 TEMP_DIR = Path(tempfile.gettempdir()) / "magicpath"
 TEMP_DIR.mkdir(exist_ok=True)
@@ -53,14 +62,16 @@ app.add_middleware(
 print("=" * 60)
 print("MagicPath AI Vocal Effects Server v2.0")
 print("=" * 60)
-print(f"Model Path: {MODEL_PATH}")
 print(f"Base Model: {BASE_MODEL_NAME}")
 print(f"Audio Feature Dim: {AUDIO_FEATURE_DIM}")
 print(f"Use Hugging Face: {USE_HUGGINGFACE}")
 print("=" * 60)
 ai_effector = AIEffector(
-    model_path=MODEL_PATH,
     base_model_name=BASE_MODEL_NAME,
     audio_feature_dim=AUDIO_FEATURE_DIM,
     use_huggingface=USE_HUGGINGFACE
@@ -79,7 +90,8 @@ async def root():
         "status": "running",
         "message": "MagicPath AI Vocal Effects Server v2.0 (DiffVox LLM)",
         "ai_model_loaded": ai_effector.is_loaded(),
-        "model_path": MODEL_PATH,
         "endpoints": {
             "POST /process": "오디오 파일 처리 후 반환",
             "POST /predict": "파라미터만 예측 (JSON)",
@@ -96,7 +108,7 @@ async def health_check():
         "status": "healthy",
         "ai_model_loaded": ai_effector.is_loaded(),
         "supported_effects": effect_chain.get_available_effects(),
-        "model_path": MODEL_PATH,
         "base_model": BASE_MODEL_NAME
     }
@@ -128,6 +140,7 @@ async def predict_parameters(
         })
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
@@ -170,6 +183,7 @@ async def process_audio(
         )
     except Exception as e:
         if input_path and Path(input_path).exists():
             os.remove(input_path)
         if output_path and Path(output_path).exists():
@@ -223,6 +237,7 @@ async def process_audio_with_params(
         })
     except Exception as e:
         if input_path and Path(input_path).exists():
             os.remove(input_path)
         if output_path and Path(output_path).exists():

 import os
 import uuid
 import base64
+import logging
+from datetime import datetime
+# 로깅 설정
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+print(f"\n===== Application Startup at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====\n")
 # 내부 모듈
 from models.ai_effector import AIEffector
 # 설정
 # ============================================
+# 학습된 모델 경로 - repo_id와 subfolder 분리!
+MODEL_REPO_ID = os.environ.get("DIFFVOX_MODEL_REPO", "heybaeheef/KU_SW_Academy")
+MODEL_SUBFOLDER = os.environ.get("DIFFVOX_MODEL_SUBFOLDER", "checkpoints")
 BASE_MODEL_NAME = os.environ.get("BASE_MODEL_NAME", "Qwen/Qwen3-8B")
 AUDIO_FEATURE_DIM = int(os.environ.get("AUDIO_FEATURE_DIM", "64"))
 USE_HUGGINGFACE = os.environ.get("USE_HUGGINGFACE", "true").lower() == "true"
+# 임시 파일 저장 경로
 TEMP_DIR = Path(tempfile.gettempdir()) / "magicpath"
 TEMP_DIR.mkdir(exist_ok=True)
 print("=" * 60)
 print("MagicPath AI Vocal Effects Server v2.0")
 print("=" * 60)
+print(f"Model Repo: {MODEL_REPO_ID}")
+print(f"Model Subfolder: {MODEL_SUBFOLDER}")
 print(f"Base Model: {BASE_MODEL_NAME}")
 print(f"Audio Feature Dim: {AUDIO_FEATURE_DIM}")
 print(f"Use Hugging Face: {USE_HUGGINGFACE}")
 print("=" * 60)
 ai_effector = AIEffector(
+    model_repo_id=MODEL_REPO_ID,
+    model_subfolder=MODEL_SUBFOLDER,
     base_model_name=BASE_MODEL_NAME,
     audio_feature_dim=AUDIO_FEATURE_DIM,
     use_huggingface=USE_HUGGINGFACE
         "status": "running",
         "message": "MagicPath AI Vocal Effects Server v2.0 (DiffVox LLM)",
         "ai_model_loaded": ai_effector.is_loaded(),
+        "model_repo": MODEL_REPO_ID,
+        "model_subfolder": MODEL_SUBFOLDER,
         "endpoints": {
             "POST /process": "오디오 파일 처리 후 반환",
             "POST /predict": "파라미터만 예측 (JSON)",
         "status": "healthy",
         "ai_model_loaded": ai_effector.is_loaded(),
         "supported_effects": effect_chain.get_available_effects(),
+        "model_repo": MODEL_REPO_ID,
         "base_model": BASE_MODEL_NAME
     }
         })
     except Exception as e:
+        logger.error(f"Predict error: {e}")
         raise HTTPException(status_code=500, detail=str(e))
         )
     except Exception as e:
+        logger.error(f"Process error: {e}")
         if input_path and Path(input_path).exists():
             os.remove(input_path)
         if output_path and Path(output_path).exists():
         })
     except Exception as e:
+        logger.error(f"Process with params error: {e}")
         if input_path and Path(input_path).exists():
             os.remove(input_path)
         if output_path and Path(output_path).exists():

models/__init__.py CHANGED Viewed

	@@ -0,0 +1,4 @@

+# models package
+from .ai_effector import AIEffector
+__all__ = ["AIEffector"]

models/ai_effector.py CHANGED Viewed

@@ -1,269 +1,169 @@
 """
-AI Effector Model - DiffVox LLM 통합 버전
-==========================================
-CLAP 인코더 + 학습된 LLM을 사용하여 오디오에서 이펙터 파라미터를 예측
-DiffVox LLM 파라미터 → MagicPath 웹 파라미터 자동 변환
 """
 import json
 import re
-import os
-from pathlib import Path
-from typing import Dict, Any, Optional
 import torch
-# AI 모델 관련 import (설치 필요)
-try:
-    from transformers import AutoModelForCausalLM, AutoTokenizer
-    from peft import PeftModel
-    TRANSFORMERS_AVAILABLE = True
-except ImportError:
-    TRANSFORMERS_AVAILABLE = False
-    print("[AIEffector] transformers/peft 미설치 - 프리셋 모드로 동작")
-# CLAP 인코더 (별도 파일)
-try:
-    from models.audio_encoder import AudioEncoder
-    AUDIO_ENCODER_AVAILABLE = True
-except ImportError:
-    AUDIO_ENCODER_AVAILABLE = False
-    print("[AIEffector] AudioEncoder 미설치 - 프리셋 모드로 동작")
-class ParameterMapper:
-    """DiffVox LLM 파라미터 ↔ MagicPath 웹 파라미터 변환"""
-    # DiffVox LLM → MagicPath 웹 매핑
-    DIFFVOX_TO_WEB = {
-        # EQ Low Shelf
-        "eq_lowshelf.params.gain": "eq_lowshelf_gain",
-        "eq_lowshelf.params.parametrizations.freq.original": "eq_lowshelf_freq",
-        # EQ High Shelf
-        "eq_highshelf.params.gain": "eq_highshelf_gain",
-        "eq_highshelf.params.parametrizations.freq.original": "eq_highshelf_freq",
-        # EQ Peak 1
-        "eq_peak1.params.gain": "eq_peak1_gain",
-        "eq_peak1.params.parametrizations.freq.original": "eq_peak1_freq",
-        "eq_peak1.params.parametrizations.Q.original": "eq_peak1_q",
-        # EQ Peak 2
-        "eq_peak2.params.gain": "eq_peak2_gain",
-        "eq_peak2.params.parametrizations.freq.original": "eq_peak2_freq",
-        "eq_peak2.params.parametrizations.Q.original": "eq_peak2_q",
-        # Delay
-        "delay.delay_time": "delay_time",
-        "delay.feedback": "delay_feedback",
-        "delay.mix": "delay_mix",
-        # Distortion
-        "distortion_amount": "distortion_amount",
-        # Master
-        "final_wet_mix": "final_wet_mix",
-    }
-    # 역방향 매핑
-    WEB_TO_DIFFVOX = {v: k for k, v in DIFFVOX_TO_WEB.items()}
-    # 값 변환 규칙 (정규화된 값 → 실제 값)
-    VALUE_TRANSFORMS = {
-        # EQ gain: -1~1 → -12~12 dB
-        "eq_lowshelf_gain": lambda x: x * 12,
-        "eq_highshelf_gain": lambda x: x * 12,
-        "eq_peak1_gain": lambda x: x * 12,
-        "eq_peak2_gain": lambda x: x * 12,
-        # EQ freq: 정규화된 값 → Hz (로그 스케일 역변환 필요할 수 있음)
-        "eq_lowshelf_freq": lambda x: 20 * (20000/20) ** ((x + 1) / 2),  # -1~1 → 20~20000
-        "eq_highshelf_freq": lambda x: 20 * (20000/20) ** ((x + 1) / 2),
-        "eq_peak1_freq": lambda x: 20 * (20000/20) ** ((x + 1) / 2),
-        "eq_peak2_freq": lambda x: 20 * (20000/20) ** ((x + 1) / 2),
-        # Q: -1~1 → 0.1~10
-        "eq_peak1_q": lambda x: 0.1 * (10/0.1) ** ((x + 1) / 2),
-        "eq_peak2_q": lambda x: 0.1 * (10/0.1) ** ((x + 1) / 2),
-        # Delay time: -1~1 → 0~1000 ms
-        "delay_time": lambda x: (x + 1) / 2 * 1000,
-        # Delay feedback: -1~1 → 0~1
-        "delay_feedback": lambda x: (x + 1) / 2,
-        # Delay mix: -1~1 → 0~1
-        "delay_mix": lambda x: (x + 1) / 2,
-        # Distortion: -1~1 → 0~1
-        "distortion_amount": lambda x: (x + 1) / 2,
-        # Wet mix: -1~1 → 0~1
-        "final_wet_mix": lambda x: (x + 1) / 2,
     }
-    @classmethod
-    def diffvox_to_web(cls, diffvox_params: Dict[str, float]) -> Dict[str, float]:
-        """DiffVox LLM 출력 → MagicPath 웹 파라미터"""
-        web_params = {}
-        for diffvox_key, value in diffvox_params.items():
-            # 키 변환
-            if diffvox_key in cls.DIFFVOX_TO_WEB:
-                web_key = cls.DIFFVOX_TO_WEB[diffvox_key]
-            else:
-                # 매핑에 없으면 스킵
-                continue
-            # 값 변환
-            if web_key in cls.VALUE_TRANSFORMS:
-                try:
-                    web_params[web_key] = cls.VALUE_TRANSFORMS[web_key](value)
-                except:
-                    web_params[web_key] = value
-            else:
-                web_params[web_key] = value
-        return web_params
-class ParameterParser:
-    """LLM 출력에서 파라미터 JSON 추출"""
-    @staticmethod
-    def parse(llm_output: str) -> Optional[Dict]:
-        """LLM 출력에서 파라미터 딕셔너리 추출"""
-        # 방법 1: JSON 블록 찾기
-        json_patterns = [
-            r'\{[^{}]*\}',
-            r'\{(?:[^{}]|\{[^{}]*\})*\}',
-        ]
-        for pattern in json_patterns:
-            matches = re.findall(pattern, llm_output, re.DOTALL)
-            for match in matches:
-                try:
-                    params = json.loads(match)
-                    if isinstance(params, dict) and len(params) > 0:
-                        return params
-                except json.JSONDecodeError:
-                    continue
-        # 방법 2: key: value 패턴 파싱
-        param_pattern = r'"([^"]+)":\s*([-\d.]+)'
-        matches = re.findall(param_pattern, llm_output)
-        if matches:
-            params = {}
-            for key, value in matches:
-                try:
-                    params[key] = float(value)
-                except ValueError:
-                    params[key] = value
-            if params:
-                return params
-        return None
 class AIEffector:
-    """AI 기반 이펙터 파라미터 예측 모델 - DiffVox LLM 통합"""
-    # 기본 파라미터
-    DEFAULT_PARAMS = {
-        "eq_lowshelf_gain": 0.0,
-        "eq_lowshelf_freq": 200,
-        "eq_highshelf_gain": 0.0,
-        "eq_highshelf_freq": 8000,
-        "eq_peak1_gain": 0.0,
-        "eq_peak1_freq": 1000,
-        "eq_peak1_q": 1.0,
-        "eq_peak2_gain": 0.0,
-        "eq_peak2_freq": 3000,
-        "eq_peak2_q": 1.0,
-        "compressor_threshold": -24,
-        "compressor_ratio": 4.0,
-        "compressor_attack": 5,
-        "compressor_release": 50,
-        "compressor_makeup": 0.0,
-        "distortion_amount": 0.0,
-        "distortion_tone": 0.5,
-        "delay_time": 250,
-        "delay_feedback": 0.3,
-        "delay_mix": 0.0,
-        "reverb_room_size": 0.5,
-        "reverb_damping": 0.5,
-        "reverb_wet_dry": 0.0,
-        "final_wet_mix": 0.5
-    }
-    # 프리셋 (fallback용)
-    PRESETS = {
-        "warm": {
-            "eq_lowshelf_gain": 5.5,
-            "eq_lowshelf_freq": 200,
-            "eq_highshelf_gain": -1.5,
-            "eq_highshelf_freq": 8000,
-            "eq_peak1_gain": 2.0,
-            "eq_peak1_freq": 400,
-            "eq_peak1_q": 1.0,
-            "compressor_threshold": -18,
-            "compressor_ratio": 3.0,
-            "distortion_amount": 0.05,
-            "reverb_room_size": 0.4,
-            "reverb_wet_dry": 0.15,
-            "final_wet_mix": 0.5
-        },
-        "bright": {
-            "eq_lowshelf_gain": -2.0,
-            "eq_lowshelf_freq": 150,
-            "eq_highshelf_gain": 4.0,
-            "eq_highshelf_freq": 6000,
-            "eq_peak1_gain": 1.0,
-            "eq_peak1_freq": 3000,
-            "compressor_threshold": -20,
-            "compressor_ratio": 6.0,
-            "reverb_room_size": 0.3,
-            "reverb_wet_dry": 0.1,
-            "final_wet_mix": 0.5
-        },
-    }
     def __init__(
-        self,
-        model_path: Optional[str] = None,
         base_model_name: str = "Qwen/Qwen3-8B",
         audio_feature_dim: int = 64,
         use_huggingface: bool = True
     ):
-        """
-        AI 모델 초기화
-        Args:
-            model_path: 학습된 LoRA 모델 경로 (로컬 또는 Hugging Face 레포)
-            base_model_name: 베이스 LLM 모델 이름
-            audio_feature_dim: 오디오 특징 차원 (CLAP 출력)
-            use_huggingface: True면 model_path를 Hugging Face 레포로 간주
-        """
         self.model = None
         self.tokenizer = None
-        self.audio_encoder = None
-        self.model_loaded = False
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        self.base_model_name = base_model_name
-        self.audio_feature_dim = audio_feature_dim
-        self.use_huggingface = use_huggingface
-        if model_path:
-            self._load_model(model_path)
-    def _load_model(self, model_path: str):
-        """학습된 LoRA 모델 로드 (로컬 또는 Hugging Face)"""
-        if not TRANSFORMERS_AVAILABLE:
-            print("[AIEffector] transformers/peft 미설치")
-            return
-        # 로컬 경로인지 Hugging Face 레포인지 확인
-        is_local = os.path.exists(model_path)
-        if not is_local and not self.use_huggingface:
-            print(f"[AIEffector] 로컬 모델 경로 없음: {model_path}")
-            return
         try:
-            if self.use_huggingface and not is_local:
-                print(f"[AIEffector] Hugging Face에서 모델 로딩: {model_path}")
-            else:
-                print(f"[AIEffector] 로컬 모델 로딩: {model_path}")
             # 토크나이저 로드
             self.tokenizer = AutoTokenizer.from_pretrained(
@@ -276,129 +176,155 @@ class AIEffector:
             # 베이스 모델 로드
             base_model = AutoModelForCausalLM.from_pretrained(
                 self.base_model_name,
-                torch_dtype=torch.bfloat16,
-                device_map="auto",
                 trust_remote_code=True,
             )
-            # LoRA 어댑터 적용 (Hugging Face 레포 또는 로컬 경로)
-            self.model = PeftModel.from_pretrained(
-                base_model,
-                model_path,  # Hugging Face 레포 이름 또는 로컬 경로
-                is_trainable=False
-            )
-            self.model.eval()
-            # 오디오 인코더 로드
-            if AUDIO_ENCODER_AVAILABLE:
-                self.audio_encoder = AudioEncoder(
-                    output_dim=self.audio_feature_dim,
-                    reduction_method="pool"
                 )
-                print("[AIEffector] AudioEncoder 로드 완료")
-            self.model_loaded = True
-            print("[AIEffector] ✅ 모델 로드 완료")
         except Exception as e:
             print(f"[AIEffector] ❌ 모델 로드 실패: {e}")
-            import traceback
-            traceback.print_exc()
-            self.model_loaded = False
     def is_loaded(self) -> bool:
-        """AI 모델 로드 상태 확인"""
-        return self.model_loaded
-    def predict(self, audio_path: str, text_prompt: str) -> Dict[str, float]:
-        """
-        오디오와 텍스트로부터 이펙터 파라미터 예측
-        Args:
-            audio_path: 입력 오디오 파일 경로
-            text_prompt: 사용자 텍스트 명령
-        Returns:
-            MagicPath 웹 형식의 이펙터 파라미터 딕셔너리
-        """
-        if self.model_loaded and self.audio_encoder:
-            return self._predict_with_model(audio_path, text_prompt)
-        else:
-            return self._predict_with_preset(text_prompt)
-    def _predict_with_model(self, audio_path: str, text_prompt: str) -> Dict[str, float]:
-        """학습된 DiffVox LLM으로 추론"""
         try:
-            # 1. 오디오 특징 추출
             audio_features = self.audio_encoder.get_audio_features(audio_path)
-            if not audio_features:
-                print("[AIEffector] 오디오 특징 추출 실패, 프리셋 사용")
-                return self._predict_with_preset(text_prompt)
-            # 2. 프롬프트 구성 (train_model.py와 동일한 형식)
-            audio_state_str = json.dumps(audio_features)
-            prompt = f"""Task: Convert text to audio parameters.
-Audio: {audio_state_str}
-Text: {text_prompt}
-Parameters:"""
-            # 3. LLM 추론
             inputs = self.tokenizer(
                 prompt,
                 return_tensors="pt",
                 truncation=True,
-                max_length=1500
             ).to(self.device)
             with torch.no_grad():
                 outputs = self.model.generate(
                     **inputs,
-                    max_new_tokens=500,
-                    temperature=0.1,
                     do_sample=False,
-                    pad_token_id=self.tokenizer.eos_token_id,
                 )
-            generated_text = self.tokenizer.decode(
-                outputs[0][inputs['input_ids'].shape[1]:],
-                skip_special_tokens=True
-            ).strip()
-            print(f"[AIEffector] LLM 출력: {generated_text[:200]}...")
-            # 4. 파라미터 파싱
-            diffvox_params = ParameterParser.parse(generated_text)
-            if not diffvox_params:
-                print("[AIEffector] 파라미터 파싱 실패, 프리셋 사용")
-                return self._predict_with_preset(text_prompt)
-            # 5. DiffVox → Web 파라미터 변환
-            web_params = ParameterMapper.diffvox_to_web(diffvox_params)
-            # 6. 기본값과 병합
-            result = self.DEFAULT_PARAMS.copy()
-            result.update(web_params)
-            print(f"[AIEffector] ✅ AI 파라미터 생성 완료: {len(web_params)}개 파라미터")
-            return result
         except Exception as e:
-            print(f"[AIEffector] 추론 에러: {e}")
-            import traceback
-            traceback.print_exc()
-            return self._predict_with_preset(text_prompt)
-    def _predict_with_preset(self, text_prompt: str) -> Dict[str, float]:
-        """프리셋 기반 파라미터 반환 (fallback)"""
-        prompt_lower = text_prompt.lower()
-        for preset_name, preset_params in self.PRESETS.items():
-            if preset_name in prompt_lower:
-                print(f"[AIEffector] 프리셋 매칭: '{preset_name}'")
-                result = self.DEFAULT_PARAMS.copy()
-                result.update(preset_params)
-                return result
-        print("[AIEffector] 프리셋 매칭 실패, 기본값 반환")
-        return self.DEFAULT_PARAMS.copy()

 """
+AI Effector - DiffVox LLM 기반 이펙트 파라미터 예측
+===================================================
 """
+import os
 import json
 import re
 import torch
+import numpy as np
+from typing import Dict, List, Optional, Any
+from pathlib import Path
+import warnings
+warnings.filterwarnings("ignore")
+# 기본 파라미터 (모델 로드 실패 시 사용)
+DEFAULT_PARAMETERS = {
+    "eq_peak1.params.freq": 1000.0,
+    "eq_peak1.params.gain": 0.0,
+    "eq_peak1.params.q": 1.0,
+    "eq_peak2.params.freq": 4000.0,
+    "eq_peak2.params.gain": 0.0,
+    "eq_peak2.params.q": 1.0,
+    "eq_lowshelf.params.freq": 200.0,
+    "eq_lowshelf.params.gain": 0.0,
+    "eq_lowshelf.params.q": 0.707,
+    "eq_highshelf.params.freq": 8000.0,
+    "eq_highshelf.params.gain": 0.0,
+    "eq_highshelf.params.q": 0.707,
+    "distortion_amount": 0.0,
+    "delay.delay_time": 0.02,
+    "delay.feedback": 0.3,
+    "delay.mix": 0.2,
+    "final_wet_mix": 0.5
+}
+# 스타일 프리셋 (AI 없이도 작동)
+STYLE_PRESETS = {
+    "warm": {
+        "eq_lowshelf.params.gain": 3.0,
+        "eq_highshelf.params.gain": -1.0,
+        "distortion_amount": 0.05,
+    },
+    "bright": {
+        "eq_highshelf.params.gain": 4.0,
+        "eq_peak2.params.gain": 2.0,
+        "eq_lowshelf.params.gain": -1.0,
+    },
+    "vintage": {
+        "eq_lowshelf.params.gain": 2.0,
+        "eq_highshelf.params.gain": -2.0,
+        "distortion_amount": 0.1,
+        "delay.mix": 0.15,
+    },
+    "modern": {
+        "eq_peak1.params.gain": 2.0,
+        "eq_peak2.params.gain": 3.0,
+        "eq_highshelf.params.gain": 2.0,
+    },
+    "spacious": {
+        "delay.delay_time": 0.05,
+        "delay.feedback": 0.4,
+        "delay.mix": 0.35,
+    },
+    "dry": {
+        "final_wet_mix": 0.2,
+        "delay.mix": 0.0,
+    },
+    "saturated": {
+        "distortion_amount": 0.15,
+        "eq_lowshelf.params.gain": 1.0,
     }
+}
+class AudioEncoder:
+    """간소화된 오디오 인코더 (CLAP 대체)"""
+    def __init__(self, output_dim: int = 64):
+        self.output_dim = output_dim
+        self.sr = 44100
+    def get_audio_features(self, audio_path: str) -> List[float]:
+        """오디오에서 특징 추출 (간소화 버전)"""
+        try:
+            import librosa
+            y, sr = librosa.load(audio_path, sr=self.sr, duration=5.0)
+            # 기본 특징 추출
+            features = []
+            # MFCC (20개)
+            mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
+            features.extend(np.mean(mfcc, axis=1).tolist())
+            # Spectral features
+            spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
+            spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
+            spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
+            features.extend([spectral_centroid / 10000, spectral_bandwidth / 10000, spectral_rolloff / 10000])
+            # RMS energy
+            rms = np.mean(librosa.feature.rms(y=y))
+            features.append(float(rms))
+            # Zero crossing rate
+            zcr = np.mean(librosa.feature.zero_crossing_rate(y))
+            features.append(float(zcr))
+            # Chroma (12개)
+            chroma = librosa.feature.chroma_stft(y=y, sr=sr)
+            features.extend(np.mean(chroma, axis=1).tolist())
+            # Pad or truncate to output_dim
+            if len(features) < self.output_dim:
+                features.extend([0.0] * (self.output_dim - len(features)))
+            else:
+                features = features[:self.output_dim]
+            return features
+        except Exception as e:
+            print(f"[AudioEncoder] 특징 추출 실패: {e}")
+            return [0.0] * self.output_dim
 class AIEffector:
+    """AI 기반 이펙터 파라미터 예측"""
     def __init__(
+        self,
+        model_repo_id: str = "heybaeheef/KU_SW_Academy",
+        model_subfolder: str = "checkpoints",
         base_model_name: str = "Qwen/Qwen3-8B",
         audio_feature_dim: int = 64,
         use_huggingface: bool = True
     ):
+        self.model_repo_id = model_repo_id
+        self.model_subfolder = model_subfolder
+        self.base_model_name = base_model_name
+        self.audio_feature_dim = audio_feature_dim
+        self.use_huggingface = use_huggingface
         self.model = None
         self.tokenizer = None
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        # 오디오 인코더
+        self.audio_encoder = AudioEncoder(output_dim=audio_feature_dim)
+        # 모델 로드 시도
+        self._load_model()
+    def _load_model(self):
+        """모델 로드"""
         try:
+            from transformers import AutoModelForCausalLM, AutoTokenizer
+            from peft import PeftModel
+            print(f"[AIEffector] 모델 로딩 시작...")
+            print(f"  - Base Model: {self.base_model_name}")
+            print(f"  - Adapter Repo: {self.model_repo_id}")
+            print(f"  - Adapter Subfolder: {self.model_subfolder}")
             # 토크나이저 로드
             self.tokenizer = AutoTokenizer.from_pretrained(
             # 베이스 모델 로드
             base_model = AutoModelForCausalLM.from_pretrained(
                 self.base_model_name,
+                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+                device_map="auto" if torch.cuda.is_available() else None,
                 trust_remote_code=True,
+                low_cpu_mem_usage=True
             )
+            # LoRA 어댑터 로드 (subfolder 파라미터 사용!)
+            if self.use_huggingface:
+                print(f"[AIEffector] Hugging Face에서 LoRA 어댑터 로딩...")
+                self.model = PeftModel.from_pretrained(
+                    base_model,
+                    self.model_repo_id,
+                    subfolder=self.model_subfolder,  # 핵심 수정!
+                    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+                )
+            else:
+                # 로컬 경로 사용
+                local_path = os.path.join(self.model_repo_id, self.model_subfolder)
+                print(f"[AIEffector] 로컬에서 LoRA 어댑터 로딩: {local_path}")
+                self.model = PeftModel.from_pretrained(
+                    base_model,
+                    local_path,
+                    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                 )
+            self.model.eval()
+            print(f"[AIEffector] ✅ 모델 로드 성공!")
         except Exception as e:
             print(f"[AIEffector] ❌ 모델 로드 실패: {e}")
+            print(f"[AIEffector] 폴백 모드로 전환 (프리셋 기반)")
+            self.model = None
+            self.tokenizer = None
     def is_loaded(self) -> bool:
+        """모델 로드 여부"""
+        return self.model is not None
+    def _apply_preset(self, prompt: str) -> Dict[str, float]:
+        """프롬프트에서 프리셋 매칭"""
+        params = DEFAULT_PARAMETERS.copy()
+        prompt_lower = prompt.lower()
+        for style_name, style_params in STYLE_PRESETS.items():
+            if style_name in prompt_lower:
+                params.update(style_params)
+        return params
+    def _format_prompt(self, text_prompt: str, audio_features: List[float]) -> str:
+        """LLM 입력 프롬프트 포맷팅"""
+        # 오디오 특징을 간결하게 표현
+        audio_summary = ", ".join([f"{v:.3f}" for v in audio_features[:8]])
+        prompt = f"""You are an audio effect parameter predictor.
+Input:
+- Text description: {text_prompt}
+- Audio features (first 8): [{audio_summary}]
+Output the effect parameters as JSON:
+```json
+{{
+    "eq_peak1.params.freq": <float>,
+    "eq_peak1.params.gain": <float>,
+    "eq_peak1.params.q": <float>,
+    "eq_peak2.params.freq": <float>,
+    "eq_peak2.params.gain": <float>,
+    "eq_peak2.params.q": <float>,
+    "eq_lowshelf.params.freq": <float>,
+    "eq_lowshelf.params.gain": <float>,
+    "eq_lowshelf.params.q": <float>,
+    "eq_highshelf.params.freq": <float>,
+    "eq_highshelf.params.gain": <float>,
+    "eq_highshelf.params.q": <float>,
+    "distortion_amount": <float>,
+    "delay.delay_time": <float>,
+    "delay.feedback": <float>,
+    "delay.mix": <float>,
+    "final_wet_mix": <float>
+}}
+```
+JSON output:"""
+        return prompt
+    def _parse_output(self, output_text: str) -> Dict[str, float]:
+        """LLM 출력에서 파라미터 추출"""
+        try:
+            # JSON 블록 찾기
+            json_match = re.search(r'\{[^{}]*\}', output_text, re.DOTALL)
+            if json_match:
+                params = json.loads(json_match.group())
+                # 유효성 검사 및 기본값 병합
+                result = DEFAULT_PARAMETERS.copy()
+                for key, value in params.items():
+                    if key in result and isinstance(value, (int, float)):
+                        result[key] = float(value)
+                return result
+        except Exception as e:
+            print(f"[AIEffector] 출력 파싱 실패: {e}")
+        return DEFAULT_PARAMETERS.copy()
+    def predict(self, audio_path: str, text_prompt: str = "") -> Dict[str, float]:
+        """파라미터 예측"""
+        # 모델이 없으면 프리셋 사용
+        if not self.is_loaded():
+            print(f"[AIEffector] 프리셋 모드 사용 (prompt: {text_prompt})")
+            return self._apply_preset(text_prompt)
         try:
+            # 오디오 특징 추출
             audio_features = self.audio_encoder.get_audio_features(audio_path)
+            # 프롬프트 생성
+            prompt = self._format_prompt(text_prompt, audio_features)
+            # 토큰화
             inputs = self.tokenizer(
                 prompt,
                 return_tensors="pt",
                 truncation=True,
+                max_length=1024
             ).to(self.device)
+            # 생성
             with torch.no_grad():
                 outputs = self.model.generate(
                     **inputs,
+                    max_new_tokens=256,
                     do_sample=False,
+                    temperature=0.1,
+                    pad_token_id=self.tokenizer.pad_token_id
                 )
+            # 디코딩
+            output_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            # 파싱
+            params = self._parse_output(output_text)
+            print(f"[AIEffector] ✅ AI 예측 완료")
+            return params
         except Exception as e:
+            print(f"[AIEffector] 예측 실패: {e}, 프리셋으로 폴백")
+            return self._apply_preset(text_prompt)

requirements.txt CHANGED Viewed

@@ -18,3 +18,6 @@ transformers>=4.36.0
 peft>=0.7.0
 huggingface_hub>=0.20.0
 accelerate>=0.25.0

 peft>=0.7.0
 huggingface_hub>=0.20.0
 accelerate>=0.25.0
+# 추가 의존성
+scipy>=1.10.0