heybaeheef commited on
Commit
56b428e
·
1 Parent(s): 41363a4

Remove server code, keep LoRA adapter only

Browse files
audio_processing/__init__.py DELETED
@@ -1,4 +0,0 @@
1
- # audio_processing package
2
- from .effect_chain import EffectChain
3
-
4
- __all__ = ["EffectChain"]
 
 
 
 
 
audio_processing/effect_chain.py DELETED
@@ -1,161 +0,0 @@
1
- """
2
- Effect Chain - Pedalboard 기반 오디오 이펙트 처리
3
- =================================================
4
- """
5
-
6
- import numpy as np
7
- import soundfile as sf
8
- from typing import Dict, List, Optional
9
- from pedalboard import (
10
- Pedalboard,
11
- Compressor,
12
- Gain,
13
- HighShelfFilter,
14
- LowShelfFilter,
15
- PeakFilter,
16
- Delay,
17
- Reverb,
18
- Distortion,
19
- Limiter
20
- )
21
-
22
-
23
- class EffectChain:
24
- """Pedalboard 기반 이펙트 체인"""
25
-
26
- def __init__(self, sample_rate: int = 44100):
27
- self.sample_rate = sample_rate
28
-
29
- self.available_effects = [
30
- "eq_peak1", "eq_peak2",
31
- "eq_lowshelf", "eq_highshelf",
32
- "distortion", "delay", "compressor",
33
- "reverb", "limiter"
34
- ]
35
-
36
- def get_available_effects(self) -> List[str]:
37
- """사용 가능한 이펙트 목록"""
38
- return self.available_effects
39
-
40
- def _build_pedalboard(self, params: Dict[str, float]) -> Pedalboard:
41
- """파라미터로 Pedalboard 구성"""
42
-
43
- effects = []
44
-
45
- # Compressor (항상 적용)
46
- effects.append(Compressor(
47
- threshold_db=-18.0,
48
- ratio=2.0,
49
- attack_ms=10.0,
50
- release_ms=100.0
51
- ))
52
-
53
- # EQ Peak 1
54
- freq1 = params.get("eq_peak1.params.freq", 1000.0)
55
- gain1 = params.get("eq_peak1.params.gain", 0.0)
56
- q1 = params.get("eq_peak1.params.q", 1.0)
57
- if abs(gain1) > 0.1:
58
- effects.append(PeakFilter(
59
- cutoff_frequency_hz=max(20, min(20000, freq1)),
60
- gain_db=max(-12, min(12, gain1)),
61
- q=max(0.1, min(10, q1))
62
- ))
63
-
64
- # EQ Peak 2
65
- freq2 = params.get("eq_peak2.params.freq", 4000.0)
66
- gain2 = params.get("eq_peak2.params.gain", 0.0)
67
- q2 = params.get("eq_peak2.params.q", 1.0)
68
- if abs(gain2) > 0.1:
69
- effects.append(PeakFilter(
70
- cutoff_frequency_hz=max(20, min(20000, freq2)),
71
- gain_db=max(-12, min(12, gain2)),
72
- q=max(0.1, min(10, q2))
73
- ))
74
-
75
- # Low Shelf
76
- freq_low = params.get("eq_lowshelf.params.freq", 200.0)
77
- gain_low = params.get("eq_lowshelf.params.gain", 0.0)
78
- if abs(gain_low) > 0.1:
79
- effects.append(LowShelfFilter(
80
- cutoff_frequency_hz=max(20, min(2000, freq_low)),
81
- gain_db=max(-12, min(12, gain_low)),
82
- q=0.707
83
- ))
84
-
85
- # High Shelf
86
- freq_high = params.get("eq_highshelf.params.freq", 8000.0)
87
- gain_high = params.get("eq_highshelf.params.gain", 0.0)
88
- if abs(gain_high) > 0.1:
89
- effects.append(HighShelfFilter(
90
- cutoff_frequency_hz=max(1000, min(20000, freq_high)),
91
- gain_db=max(-12, min(12, gain_high)),
92
- q=0.707
93
- ))
94
-
95
- # Distortion
96
- dist_amount = params.get("distortion_amount", 0.0)
97
- if dist_amount > 0.01:
98
- effects.append(Distortion(
99
- drive_db=max(0, min(20, dist_amount * 100))
100
- ))
101
-
102
- # Delay
103
- delay_time = params.get("delay.delay_time", 0.02)
104
- delay_feedback = params.get("delay.feedback", 0.3)
105
- delay_mix = params.get("delay.mix", 0.2)
106
- if delay_mix > 0.01:
107
- effects.append(Delay(
108
- delay_seconds=max(0.01, min(1.0, delay_time)),
109
- feedback=max(0.0, min(0.9, delay_feedback)),
110
- mix=max(0.0, min(1.0, delay_mix))
111
- ))
112
-
113
- # Limiter (항상 마지막에)
114
- effects.append(Limiter(threshold_db=-1.0))
115
-
116
- return Pedalboard(effects)
117
-
118
- def process(
119
- self,
120
- input_path: str,
121
- output_path: str,
122
- parameters: Dict[str, float]
123
- ) -> bool:
124
- """오디오 파일 처리"""
125
- try:
126
- # 오디오 로드
127
- audio, sr = sf.read(input_path)
128
-
129
- # 모노/스테레오 처리
130
- if len(audio.shape) == 1:
131
- audio = audio.reshape(-1, 1)
132
-
133
- # float32로 변환
134
- audio = audio.astype(np.float32)
135
-
136
- # Pedalboard 구성
137
- board = self._build_pedalboard(parameters)
138
-
139
- # 처리
140
- processed = board(audio, sr)
141
-
142
- # Wet/Dry 믹스
143
- wet_mix = parameters.get("final_wet_mix", 0.5)
144
- wet_mix = max(0.0, min(1.0, wet_mix))
145
-
146
- # 길이 맞추기
147
- min_len = min(len(audio), len(processed))
148
- output = audio[:min_len] * (1 - wet_mix) + processed[:min_len] * wet_mix
149
-
150
- # 클리핑 방지
151
- output = np.clip(output, -1.0, 1.0)
152
-
153
- # 저장
154
- sf.write(output_path, output, sr)
155
-
156
- print(f"[EffectChain] ✅ 처리 완료: {output_path}")
157
- return True
158
-
159
- except Exception as e:
160
- print(f"[EffectChain] ❌ 처리 실패: {e}")
161
- raise e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
main.py DELETED
@@ -1,250 +0,0 @@
1
- """
2
- MagicPath AI Vocal Effects Server - DiffVox LLM 통합 버전
3
- =========================================================
4
- """
5
-
6
- from fastapi import FastAPI, UploadFile, File, Form, HTTPException
7
- from fastapi.middleware.cors import CORSMiddleware
8
- from fastapi.responses import FileResponse, JSONResponse
9
- from pathlib import Path
10
- import tempfile
11
- import os
12
- import uuid
13
- import base64
14
- import logging
15
- from datetime import datetime
16
-
17
- # 로깅 설정
18
- logging.basicConfig(level=logging.INFO)
19
- logger = logging.getLogger(__name__)
20
-
21
- print(f"\n===== Application Startup at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====\n")
22
-
23
- # 내부 모듈
24
- from models.ai_effector import AIEffector
25
- from audio_processing.effect_chain import EffectChain
26
-
27
- # ============================================
28
- # 설정
29
- # ============================================
30
-
31
- # 학습된 모델 경로 - repo_id와 subfolder 분리!
32
- MODEL_REPO_ID = os.environ.get("DIFFVOX_MODEL_REPO", "heybaeheef/KU_SW_Academy")
33
- MODEL_SUBFOLDER = os.environ.get("DIFFVOX_MODEL_SUBFOLDER", "checkpoints")
34
- BASE_MODEL_NAME = os.environ.get("BASE_MODEL_NAME", "Qwen/Qwen3-8B")
35
- AUDIO_FEATURE_DIM = int(os.environ.get("AUDIO_FEATURE_DIM", "64"))
36
- USE_HUGGINGFACE = os.environ.get("USE_HUGGINGFACE", "true").lower() == "true"
37
-
38
- # 임시 파일 저장 경로
39
- TEMP_DIR = Path(tempfile.gettempdir()) / "magicpath"
40
- TEMP_DIR.mkdir(exist_ok=True)
41
-
42
- # ============================================
43
- # FastAPI 앱 초기화
44
- # ============================================
45
-
46
- app = FastAPI(
47
- title="MagicPath AI Vocal Effects",
48
- description="AI-powered vocal effect processing server (DiffVox LLM 통합)",
49
- version="2.0.0"
50
- )
51
-
52
- # CORS 설정
53
- app.add_middleware(
54
- CORSMiddleware,
55
- allow_origins=["*"],
56
- allow_credentials=True,
57
- allow_methods=["*"],
58
- allow_headers=["*"],
59
- )
60
-
61
- # 전역 객체 초기화
62
- print("=" * 60)
63
- print("MagicPath AI Vocal Effects Server v2.0")
64
- print("=" * 60)
65
- print(f"Model Repo: {MODEL_REPO_ID}")
66
- print(f"Model Subfolder: {MODEL_SUBFOLDER}")
67
- print(f"Base Model: {BASE_MODEL_NAME}")
68
- print(f"Audio Feature Dim: {AUDIO_FEATURE_DIM}")
69
- print(f"Use Hugging Face: {USE_HUGGINGFACE}")
70
- print("=" * 60)
71
-
72
- ai_effector = AIEffector(
73
- model_repo_id=MODEL_REPO_ID,
74
- model_subfolder=MODEL_SUBFOLDER,
75
- base_model_name=BASE_MODEL_NAME,
76
- audio_feature_dim=AUDIO_FEATURE_DIM,
77
- use_huggingface=USE_HUGGINGFACE
78
- )
79
- effect_chain = EffectChain()
80
-
81
-
82
- # ============================================
83
- # API 엔드포인트
84
- # ============================================
85
-
86
- @app.get("/")
87
- async def root():
88
- """서버 정보"""
89
- return {
90
- "status": "running",
91
- "message": "MagicPath AI Vocal Effects Server v2.0 (DiffVox LLM)",
92
- "ai_model_loaded": ai_effector.is_loaded(),
93
- "model_repo": MODEL_REPO_ID,
94
- "model_subfolder": MODEL_SUBFOLDER,
95
- "endpoints": {
96
- "POST /process": "오디오 파일 처리 후 반환",
97
- "POST /predict": "파라미터만 예측 (JSON)",
98
- "POST /process_with_params": "오디오 처리 + 파라미터 반환",
99
- "GET /health": "서버 상태 확인"
100
- }
101
- }
102
-
103
-
104
- @app.get("/health")
105
- async def health_check():
106
- """서버 및 모델 상태 확인"""
107
- return {
108
- "status": "healthy",
109
- "ai_model_loaded": ai_effector.is_loaded(),
110
- "supported_effects": effect_chain.get_available_effects(),
111
- "model_repo": MODEL_REPO_ID,
112
- "base_model": BASE_MODEL_NAME
113
- }
114
-
115
-
116
- @app.post("/predict")
117
- async def predict_parameters(
118
- audio: UploadFile = File(..., description="Dry 보컬 오디오 파일"),
119
- prompt: str = Form("", description="텍스트 명령 (예: 'warm', 'bright')")
120
- ):
121
- """AI 모델로 이펙터 파라미터 예측"""
122
- try:
123
- input_path = TEMP_DIR / f"{uuid.uuid4()}_{audio.filename}"
124
- with open(input_path, "wb") as f:
125
- content = await audio.read()
126
- f.write(content)
127
-
128
- parameters = ai_effector.predict(
129
- audio_path=str(input_path),
130
- text_prompt=prompt
131
- )
132
-
133
- os.remove(input_path)
134
-
135
- return JSONResponse(content={
136
- "status": "success",
137
- "prompt": prompt,
138
- "ai_model_used": ai_effector.is_loaded(),
139
- "parameters": parameters
140
- })
141
-
142
- except Exception as e:
143
- logger.error(f"Predict error: {e}")
144
- raise HTTPException(status_code=500, detail=str(e))
145
-
146
-
147
- @app.post("/process")
148
- async def process_audio(
149
- audio: UploadFile = File(..., description="Dry 보컬 오디오 파일"),
150
- prompt: str = Form("", description="텍스트 명령 (예: 'warm', 'bright')")
151
- ):
152
- """AI가 예측한 파라미터로 실제 오디오 처리"""
153
- input_path = None
154
- output_path = None
155
-
156
- try:
157
- file_id = str(uuid.uuid4())
158
- input_path = TEMP_DIR / f"{file_id}_input_{audio.filename}"
159
- output_path = TEMP_DIR / f"{file_id}_output.wav"
160
-
161
- with open(input_path, "wb") as f:
162
- content = await audio.read()
163
- f.write(content)
164
-
165
- parameters = ai_effector.predict(
166
- audio_path=str(input_path),
167
- text_prompt=prompt
168
- )
169
-
170
- effect_chain.process(
171
- input_path=str(input_path),
172
- output_path=str(output_path),
173
- parameters=parameters
174
- )
175
-
176
- os.remove(input_path)
177
-
178
- return FileResponse(
179
- path=str(output_path),
180
- media_type="audio/wav",
181
- filename=f"processed_{audio.filename.rsplit('.', 1)[0]}.wav",
182
- background=None
183
- )
184
-
185
- except Exception as e:
186
- logger.error(f"Process error: {e}")
187
- if input_path and Path(input_path).exists():
188
- os.remove(input_path)
189
- if output_path and Path(output_path).exists():
190
- os.remove(output_path)
191
- raise HTTPException(status_code=500, detail=str(e))
192
-
193
-
194
- @app.post("/process_with_params")
195
- async def process_audio_with_params(
196
- audio: UploadFile = File(..., description="Dry 보컬 오디오 파일"),
197
- prompt: str = Form("", description="텍스트 명령")
198
- ):
199
- """오디오 처리 + 사용된 파라미터도 함께 반환"""
200
- input_path = None
201
- output_path = None
202
-
203
- try:
204
- file_id = str(uuid.uuid4())
205
- input_path = TEMP_DIR / f"{file_id}_input_{audio.filename}"
206
- output_path = TEMP_DIR / f"{file_id}_output.wav"
207
-
208
- with open(input_path, "wb") as f:
209
- content = await audio.read()
210
- f.write(content)
211
-
212
- parameters = ai_effector.predict(
213
- audio_path=str(input_path),
214
- text_prompt=prompt
215
- )
216
-
217
- effect_chain.process(
218
- input_path=str(input_path),
219
- output_path=str(output_path),
220
- parameters=parameters
221
- )
222
-
223
- os.remove(input_path)
224
-
225
- with open(output_path, "rb") as f:
226
- audio_base64 = base64.b64encode(f.read()).decode('utf-8')
227
-
228
- os.remove(output_path)
229
-
230
- return JSONResponse(content={
231
- "status": "success",
232
- "prompt": prompt,
233
- "ai_model_used": ai_effector.is_loaded(),
234
- "parameters": parameters,
235
- "audio_base64": audio_base64,
236
- "audio_format": "wav"
237
- })
238
-
239
- except Exception as e:
240
- logger.error(f"Process with params error: {e}")
241
- if input_path and Path(input_path).exists():
242
- os.remove(input_path)
243
- if output_path and Path(output_path).exists():
244
- os.remove(output_path)
245
- raise HTTPException(status_code=500, detail=str(e))
246
-
247
-
248
- if __name__ == "__main__":
249
- import uvicorn
250
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/__init__.py DELETED
@@ -1,4 +0,0 @@
1
- # models package
2
- from .ai_effector import AIEffector
3
-
4
- __all__ = ["AIEffector"]
 
 
 
 
 
models/ai_effector.py DELETED
@@ -1,330 +0,0 @@
1
- """
2
- AI Effector - DiffVox LLM 기반 이펙트 파라미터 예측
3
- ===================================================
4
- """
5
-
6
- import os
7
- import json
8
- import re
9
- import torch
10
- import numpy as np
11
- from typing import Dict, List, Optional, Any
12
- from pathlib import Path
13
- import warnings
14
-
15
- warnings.filterwarnings("ignore")
16
-
17
- # 기본 파라미터 (모델 로드 실패 시 사용)
18
- DEFAULT_PARAMETERS = {
19
- "eq_peak1.params.freq": 1000.0,
20
- "eq_peak1.params.gain": 0.0,
21
- "eq_peak1.params.q": 1.0,
22
- "eq_peak2.params.freq": 4000.0,
23
- "eq_peak2.params.gain": 0.0,
24
- "eq_peak2.params.q": 1.0,
25
- "eq_lowshelf.params.freq": 200.0,
26
- "eq_lowshelf.params.gain": 0.0,
27
- "eq_lowshelf.params.q": 0.707,
28
- "eq_highshelf.params.freq": 8000.0,
29
- "eq_highshelf.params.gain": 0.0,
30
- "eq_highshelf.params.q": 0.707,
31
- "distortion_amount": 0.0,
32
- "delay.delay_time": 0.02,
33
- "delay.feedback": 0.3,
34
- "delay.mix": 0.2,
35
- "final_wet_mix": 0.5
36
- }
37
-
38
- # 스타일 프리셋 (AI 없이도 작동)
39
- STYLE_PRESETS = {
40
- "warm": {
41
- "eq_lowshelf.params.gain": 3.0,
42
- "eq_highshelf.params.gain": -1.0,
43
- "distortion_amount": 0.05,
44
- },
45
- "bright": {
46
- "eq_highshelf.params.gain": 4.0,
47
- "eq_peak2.params.gain": 2.0,
48
- "eq_lowshelf.params.gain": -1.0,
49
- },
50
- "vintage": {
51
- "eq_lowshelf.params.gain": 2.0,
52
- "eq_highshelf.params.gain": -2.0,
53
- "distortion_amount": 0.1,
54
- "delay.mix": 0.15,
55
- },
56
- "modern": {
57
- "eq_peak1.params.gain": 2.0,
58
- "eq_peak2.params.gain": 3.0,
59
- "eq_highshelf.params.gain": 2.0,
60
- },
61
- "spacious": {
62
- "delay.delay_time": 0.05,
63
- "delay.feedback": 0.4,
64
- "delay.mix": 0.35,
65
- },
66
- "dry": {
67
- "final_wet_mix": 0.2,
68
- "delay.mix": 0.0,
69
- },
70
- "saturated": {
71
- "distortion_amount": 0.15,
72
- "eq_lowshelf.params.gain": 1.0,
73
- }
74
- }
75
-
76
-
77
- class AudioEncoder:
78
- """간소화된 오디오 인코더 (CLAP 대체)"""
79
-
80
- def __init__(self, output_dim: int = 64):
81
- self.output_dim = output_dim
82
- self.sr = 44100
83
-
84
- def get_audio_features(self, audio_path: str) -> List[float]:
85
- """오디오에서 특징 추출 (간소화 버전)"""
86
- try:
87
- import librosa
88
-
89
- y, sr = librosa.load(audio_path, sr=self.sr, duration=5.0)
90
-
91
- # 기본 특징 추출
92
- features = []
93
-
94
- # MFCC (20개)
95
- mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
96
- features.extend(np.mean(mfcc, axis=1).tolist())
97
-
98
- # Spectral features
99
- spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
100
- spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
101
- spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
102
-
103
- features.extend([spectral_centroid / 10000, spectral_bandwidth / 10000, spectral_rolloff / 10000])
104
-
105
- # RMS energy
106
- rms = np.mean(librosa.feature.rms(y=y))
107
- features.append(float(rms))
108
-
109
- # Zero crossing rate
110
- zcr = np.mean(librosa.feature.zero_crossing_rate(y))
111
- features.append(float(zcr))
112
-
113
- # Chroma (12개)
114
- chroma = librosa.feature.chroma_stft(y=y, sr=sr)
115
- features.extend(np.mean(chroma, axis=1).tolist())
116
-
117
- # Pad or truncate to output_dim
118
- if len(features) < self.output_dim:
119
- features.extend([0.0] * (self.output_dim - len(features)))
120
- else:
121
- features = features[:self.output_dim]
122
-
123
- return features
124
-
125
- except Exception as e:
126
- print(f"[AudioEncoder] 특징 추출 실패: {e}")
127
- return [0.0] * self.output_dim
128
-
129
-
130
- class AIEffector:
131
- """AI 기반 이펙터 파라미터 예측"""
132
-
133
- def __init__(
134
- self,
135
- model_repo_id: str = "heybaeheef/KU_SW_Academy",
136
- model_subfolder: str = "checkpoints",
137
- base_model_name: str = "Qwen/Qwen3-8B",
138
- audio_feature_dim: int = 64,
139
- use_huggingface: bool = True
140
- ):
141
- self.model_repo_id = model_repo_id
142
- self.model_subfolder = model_subfolder
143
- self.base_model_name = base_model_name
144
- self.audio_feature_dim = audio_feature_dim
145
- self.use_huggingface = use_huggingface
146
-
147
- self.model = None
148
- self.tokenizer = None
149
- self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
150
-
151
- # 오디오 인코더
152
- self.audio_encoder = AudioEncoder(output_dim=audio_feature_dim)
153
-
154
- # 모델 로드 시도
155
- self._load_model()
156
-
157
- def _load_model(self):
158
- """모델 로드"""
159
- try:
160
- from transformers import AutoModelForCausalLM, AutoTokenizer
161
- from peft import PeftModel
162
-
163
- print(f"[AIEffector] 모델 로딩 시작...")
164
- print(f" - Base Model: {self.base_model_name}")
165
- print(f" - Adapter Repo: {self.model_repo_id}")
166
- print(f" - Adapter Subfolder: {self.model_subfolder}")
167
-
168
- # 토크나이저 로드
169
- self.tokenizer = AutoTokenizer.from_pretrained(
170
- self.base_model_name,
171
- trust_remote_code=True
172
- )
173
- if self.tokenizer.pad_token is None:
174
- self.tokenizer.pad_token = self.tokenizer.eos_token
175
-
176
- # 베이스 모델 로드
177
- base_model = AutoModelForCausalLM.from_pretrained(
178
- self.base_model_name,
179
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
180
- device_map="auto" if torch.cuda.is_available() else None,
181
- trust_remote_code=True,
182
- low_cpu_mem_usage=True
183
- )
184
-
185
- # LoRA 어댑터 로드 (subfolder 파라미터 사용!)
186
- if self.use_huggingface:
187
- print(f"[AIEffector] Hugging Face에서 LoRA 어댑터 로딩...")
188
- self.model = PeftModel.from_pretrained(
189
- base_model,
190
- self.model_repo_id,
191
- subfolder=self.model_subfolder, # 핵심 수정!
192
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
193
- )
194
- else:
195
- # 로컬 경로 사용
196
- local_path = os.path.join(self.model_repo_id, self.model_subfolder)
197
- print(f"[AIEffector] 로컬에서 LoRA 어댑터 로딩: {local_path}")
198
- self.model = PeftModel.from_pretrained(
199
- base_model,
200
- local_path,
201
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
202
- )
203
-
204
- self.model.eval()
205
- print(f"[AIEffector] ✅ 모델 로드 성공!")
206
-
207
- except Exception as e:
208
- print(f"[AIEffector] ❌ 모델 로드 실패: {e}")
209
- print(f"[AIEffector] 폴백 모드로 전환 (프리셋 기반)")
210
- self.model = None
211
- self.tokenizer = None
212
-
213
- def is_loaded(self) -> bool:
214
- """모델 로드 여부"""
215
- return self.model is not None
216
-
217
- def _apply_preset(self, prompt: str) -> Dict[str, float]:
218
- """프롬프트에서 프리셋 매칭"""
219
- params = DEFAULT_PARAMETERS.copy()
220
- prompt_lower = prompt.lower()
221
-
222
- for style_name, style_params in STYLE_PRESETS.items():
223
- if style_name in prompt_lower:
224
- params.update(style_params)
225
-
226
- return params
227
-
228
- def _format_prompt(self, text_prompt: str, audio_features: List[float]) -> str:
229
- """LLM 입력 프롬프트 포맷팅"""
230
- # 오디오 특징을 간결하게 표현
231
- audio_summary = ", ".join([f"{v:.3f}" for v in audio_features[:8]])
232
-
233
- prompt = f"""You are an audio effect parameter predictor.
234
-
235
- Input:
236
- - Text description: {text_prompt}
237
- - Audio features (first 8): [{audio_summary}]
238
-
239
- Output the effect parameters as JSON:
240
- ```json
241
- {{
242
- "eq_peak1.params.freq": <float>,
243
- "eq_peak1.params.gain": <float>,
244
- "eq_peak1.params.q": <float>,
245
- "eq_peak2.params.freq": <float>,
246
- "eq_peak2.params.gain": <float>,
247
- "eq_peak2.params.q": <float>,
248
- "eq_lowshelf.params.freq": <float>,
249
- "eq_lowshelf.params.gain": <float>,
250
- "eq_lowshelf.params.q": <float>,
251
- "eq_highshelf.params.freq": <float>,
252
- "eq_highshelf.params.gain": <float>,
253
- "eq_highshelf.params.q": <float>,
254
- "distortion_amount": <float>,
255
- "delay.delay_time": <float>,
256
- "delay.feedback": <float>,
257
- "delay.mix": <float>,
258
- "final_wet_mix": <float>
259
- }}
260
- ```
261
-
262
- JSON output:"""
263
-
264
- return prompt
265
-
266
- def _parse_output(self, output_text: str) -> Dict[str, float]:
267
- """LLM 출력에서 파라미터 추출"""
268
- try:
269
- # JSON 블록 찾기
270
- json_match = re.search(r'\{[^{}]*\}', output_text, re.DOTALL)
271
- if json_match:
272
- params = json.loads(json_match.group())
273
-
274
- # 유효성 검사 및 기본값 병합
275
- result = DEFAULT_PARAMETERS.copy()
276
- for key, value in params.items():
277
- if key in result and isinstance(value, (int, float)):
278
- result[key] = float(value)
279
-
280
- return result
281
- except Exception as e:
282
- print(f"[AIEffector] 출력 파싱 실패: {e}")
283
-
284
- return DEFAULT_PARAMETERS.copy()
285
-
286
- def predict(self, audio_path: str, text_prompt: str = "") -> Dict[str, float]:
287
- """파라미터 예측"""
288
-
289
- # 모델이 없으면 프리셋 사용
290
- if not self.is_loaded():
291
- print(f"[AIEffector] 프리셋 모드 사용 (prompt: {text_prompt})")
292
- return self._apply_preset(text_prompt)
293
-
294
- try:
295
- # 오디오 특징 추출
296
- audio_features = self.audio_encoder.get_audio_features(audio_path)
297
-
298
- # 프롬프트 생성
299
- prompt = self._format_prompt(text_prompt, audio_features)
300
-
301
- # 토큰화
302
- inputs = self.tokenizer(
303
- prompt,
304
- return_tensors="pt",
305
- truncation=True,
306
- max_length=1024
307
- ).to(self.device)
308
-
309
- # 생성
310
- with torch.no_grad():
311
- outputs = self.model.generate(
312
- **inputs,
313
- max_new_tokens=256,
314
- do_sample=False,
315
- temperature=0.1,
316
- pad_token_id=self.tokenizer.pad_token_id
317
- )
318
-
319
- # 디코딩
320
- output_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
321
-
322
- # 파싱
323
- params = self._parse_output(output_text)
324
-
325
- print(f"[AIEffector] ✅ AI 예측 완료")
326
- return params
327
-
328
- except Exception as e:
329
- print(f"[AIEffector] 예측 실패: {e}, 프리셋으로 폴백")
330
- return self._apply_preset(text_prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt DELETED
@@ -1,23 +0,0 @@
1
- # MagicPath Server - DiffVox LLM 통합 버전
2
- # ==========================================
3
-
4
- # 웹 서버
5
- fastapi>=0.104.0
6
- uvicorn>=0.24.0
7
- python-multipart>=0.0.6
8
-
9
- # 오디오 처리
10
- soundfile>=0.12.0
11
- pedalboard>=0.8.0
12
- librosa>=0.10.0
13
- numpy>=1.24.0
14
-
15
- # AI 모델
16
- torch>=2.2.0
17
- transformers>=4.36.0
18
- peft>=0.7.0
19
- huggingface_hub>=0.20.0
20
- accelerate>=0.25.0
21
-
22
- # 추가 의존성
23
- scipy>=1.10.0