heybaeheef commited on
Commit
17a7958
Β·
verified Β·
1 Parent(s): 53b5dcc

Upload ai_effector.py

Browse files
Files changed (1) hide show
  1. models/ai_effector.py +503 -0
models/ai_effector.py ADDED
@@ -0,0 +1,503 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ AI Effector - DiffVox LLM 기반 μ΄νŽ™νŠΈ νŒŒλΌλ―Έν„° 예츑
3
+ ===================================================
4
+ V9: Compressor threshold λ²”μœ„ μˆ˜μ • (0 ~ -5dB)
5
+ """
6
+
7
+ import os
8
+ import json
9
+ import re
10
+ import math
11
+ import torch
12
+ import numpy as np
13
+ from typing import Dict, List, Optional, Any, Tuple
14
+ from pathlib import Path
15
+ from datetime import datetime
16
+ import warnings
17
+
18
+ warnings.filterwarnings("ignore")
19
+
20
+
21
+ def sigmoid(x: float) -> float:
22
+ try:
23
+ return 1 / (1 + math.exp(-x))
24
+ except OverflowError:
25
+ return 0.0 if x < 0 else 1.0
26
+
27
+
28
+ def minmax_transform(raw: float, min_val: float, max_val: float) -> float:
29
+ return sigmoid(raw) * (max_val - min_val) + min_val
30
+
31
+
32
+ PARAM_TRANSFORMS = {
33
+ "eq_peak1.params.freq": {"type": "minmax", "min": 33.0, "max": 17500.0},
34
+ "eq_peak1.params.Q": {"type": "minmax", "min": 0.2, "max": 20.0},
35
+ "eq_peak1.params.gain": {"type": "none"},
36
+ "eq_peak2.params.freq": {"type": "minmax", "min": 33.0, "max": 17500.0},
37
+ "eq_peak2.params.Q": {"type": "minmax", "min": 0.2, "max": 20.0},
38
+ "eq_peak2.params.gain": {"type": "none"},
39
+ "eq_lowshelf.params.freq": {"type": "minmax", "min": 30.0, "max": 200.0},
40
+ "eq_lowshelf.params.gain": {"type": "none"},
41
+ "eq_highshelf.params.freq": {"type": "minmax", "min": 2500.0, "max": 16000.0},
42
+ "eq_highshelf.params.gain": {"type": "none"},
43
+ "delay.delay_time": {"type": "none"},
44
+ "delay.feedback": {"type": "sigmoid"},
45
+ "delay.mix": {"type": "sigmoid"},
46
+ "distortion_amount": {"type": "sigmoid_scale", "scale": 0.1},
47
+ "final_wet_mix": {"type": "sigmoid"},
48
+ }
49
+
50
+ DEFAULT_PARAMETERS = {
51
+ "eq_peak1.params.freq": 1000.0,
52
+ "eq_peak1.params.gain": 0.0,
53
+ "eq_peak1.params.Q": 1.0,
54
+ "eq_peak2.params.freq": 4000.0,
55
+ "eq_peak2.params.gain": 0.0,
56
+ "eq_peak2.params.Q": 1.0,
57
+ "eq_lowshelf.params.freq": 115.0,
58
+ "eq_lowshelf.params.gain": 0.0,
59
+ "eq_highshelf.params.freq": 8000.0,
60
+ "eq_highshelf.params.gain": 0.0,
61
+ # V9: Compressor threshold κΈ°λ³Έκ°’ -3dB
62
+ "compressor.threshold": -3.0,
63
+ "compressor.ratio": 2.0,
64
+ "distortion_amount": 0.0,
65
+ "delay.delay_time": 0.02,
66
+ "delay.feedback": 0.15,
67
+ "delay.mix": 0.1,
68
+ "reverb.room_size": 0.3,
69
+ "reverb.damping": 0.5,
70
+ "reverb.wet_level": 0.0,
71
+ "reverb.dry_level": 1.0,
72
+ "final_wet_mix": 0.5
73
+ }
74
+
75
+ # V9: Compressor threshold λ²”μœ„ 0 ~ -5dB
76
+ PARAM_RANGES = {
77
+ "eq_peak1.params.freq": (33.0, 17500.0),
78
+ "eq_peak1.params.gain": (-12.0, 12.0),
79
+ "eq_peak1.params.Q": (0.2, 20.0),
80
+ "eq_peak2.params.freq": (33.0, 17500.0),
81
+ "eq_peak2.params.gain": (-12.0, 12.0),
82
+ "eq_peak2.params.Q": (0.2, 20.0),
83
+ "eq_lowshelf.params.freq": (30.0, 200.0),
84
+ "eq_lowshelf.params.gain": (-12.0, 12.0),
85
+ "eq_highshelf.params.freq": (2500.0, 16000.0),
86
+ "eq_highshelf.params.gain": (-12.0, 12.0),
87
+ # V9: 0 ~ -5dB (κ°€λ²Όμš΄ μ••μΆ•)
88
+ "compressor.threshold": (-5.0, 0.0),
89
+ "compressor.ratio": (1.5, 4.0),
90
+ "distortion_amount": (0.0, 0.05),
91
+ "delay.delay_time": (0.01, 0.3),
92
+ "delay.feedback": (0.0, 0.25),
93
+ "delay.mix": (0.0, 0.2),
94
+ "reverb.room_size": (0.0, 0.6),
95
+ "reverb.damping": (0.0, 1.0),
96
+ "reverb.wet_level": (0.0, 0.3),
97
+ "reverb.dry_level": (0.7, 1.0),
98
+ "final_wet_mix": (0.3, 0.7),
99
+ }
100
+
101
+ SYNONYM_MAP = {
102
+ "calm": "warm soft", "relaxed": "warm soft", "chill": "warm soft",
103
+ "smooth": "warm", "mellow": "warm soft", "breezy": "bright spacious",
104
+ "airy": "bright spacious", "light": "bright", "crisp": "bright",
105
+ "clean": "bright", "dreamy": "warm spacious", "ethereal": "bright spacious",
106
+ "atmospheric": "spacious", "ambient": "spacious warm",
107
+ "aggressive": "saturated bright", "powerful": "saturated",
108
+ "punchy": "saturated bright", "hard": "saturated",
109
+ "gritty": "saturated dark", "soft": "warm", "harsh": "bright saturated",
110
+ "muddy": "dark", "thin": "bright", "thick": "warm dark",
111
+ "full": "warm", "reverb": "spacious", "echo": "spacious", "wet": "spacious",
112
+ }
113
+
114
+ # V9: Compressor threshold 0 ~ -5dB λ²”μœ„
115
+ STYLE_PRESETS = {
116
+ "warm": {
117
+ "compressor.threshold": -3.0,
118
+ "compressor.ratio": 2.0,
119
+ "delay.delay_time": 0.02,
120
+ "delay.feedback": 0.12,
121
+ "delay.mix": 0.08,
122
+ "reverb.room_size": 0.25,
123
+ "reverb.wet_level": 0.1,
124
+ "reverb.dry_level": 0.9,
125
+ },
126
+ "bright": {
127
+ "compressor.threshold": -2.0,
128
+ "compressor.ratio": 2.0,
129
+ "delay.delay_time": 0.02,
130
+ "delay.feedback": 0.1,
131
+ "delay.mix": 0.06,
132
+ "reverb.room_size": 0.2,
133
+ "reverb.wet_level": 0.08,
134
+ "reverb.dry_level": 0.92,
135
+ },
136
+ "spacious": {
137
+ "compressor.threshold": -4.0,
138
+ "compressor.ratio": 1.8,
139
+ "delay.delay_time": 0.06,
140
+ "delay.feedback": 0.2,
141
+ "delay.mix": 0.15,
142
+ "reverb.room_size": 0.45,
143
+ "reverb.wet_level": 0.2,
144
+ "reverb.dry_level": 0.8,
145
+ },
146
+ "dark": {
147
+ "compressor.threshold": -4.0,
148
+ "compressor.ratio": 2.0,
149
+ "delay.delay_time": 0.03,
150
+ "delay.feedback": 0.15,
151
+ "delay.mix": 0.1,
152
+ "reverb.room_size": 0.35,
153
+ "reverb.wet_level": 0.15,
154
+ "reverb.dry_level": 0.85,
155
+ },
156
+ "saturated": {
157
+ "compressor.threshold": -2.0,
158
+ "compressor.ratio": 3.0,
159
+ "delay.delay_time": 0.02,
160
+ "delay.feedback": 0.08,
161
+ "delay.mix": 0.05,
162
+ "reverb.room_size": 0.15,
163
+ "reverb.wet_level": 0.06,
164
+ "reverb.dry_level": 0.94,
165
+ },
166
+ "soft": {
167
+ "compressor.threshold": -5.0,
168
+ "compressor.ratio": 1.5,
169
+ "delay.delay_time": 0.025,
170
+ "delay.feedback": 0.15,
171
+ "delay.mix": 0.1,
172
+ "reverb.room_size": 0.3,
173
+ "reverb.wet_level": 0.12,
174
+ "reverb.dry_level": 0.88,
175
+ },
176
+ }
177
+
178
+
179
+ class CLAPAudioEncoder:
180
+ def __init__(self, output_dim: int = 64, model_name: str = "laion/larger_clap_music"):
181
+ self.output_dim = output_dim
182
+ self.model_name = model_name
183
+ self.target_sr = 48000
184
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
185
+ self.model = None
186
+ self.processor = None
187
+ self._load_model()
188
+
189
+ def _load_model(self):
190
+ try:
191
+ from transformers import ClapModel, ClapProcessor
192
+ print(f"[CLAPEncoder] CLAP λͺ¨λΈ λ‘œλ”© 쀑: {self.model_name}")
193
+ self.processor = ClapProcessor.from_pretrained(self.model_name)
194
+ self.model = ClapModel.from_pretrained(self.model_name)
195
+ self.model = self.model.to(self.device)
196
+ self.model.eval()
197
+ print(f"[CLAPEncoder] βœ… CLAP λͺ¨λΈ λ‘œλ“œ μ™„λ£Œ")
198
+ except Exception as e:
199
+ print(f"[CLAPEncoder] ❌ λͺ¨λΈ λ‘œλ“œ μ‹€νŒ¨: {e}")
200
+
201
+ def get_audio_features(self, audio_path: str) -> List[float]:
202
+ if self.model is None:
203
+ return [0.0] * self.output_dim
204
+ try:
205
+ import librosa
206
+ audio, sr = librosa.load(audio_path, sr=self.target_sr, mono=True)
207
+ inputs = self.processor(audios=audio, sampling_rate=self.target_sr, return_tensors="pt", padding=True).to(self.device)
208
+ with torch.no_grad():
209
+ outputs = self.model.get_audio_features(**inputs)
210
+ features_512 = outputs[0].cpu().numpy()
211
+ return self._reduce_dimension(features_512).tolist()
212
+ except Exception as e:
213
+ print(f"[CLAPEncoder] νŠΉμ§• μΆ”μΆœ μ‹€νŒ¨: {e}")
214
+ return [0.0] * self.output_dim
215
+
216
+ def _reduce_dimension(self, features: np.ndarray) -> np.ndarray:
217
+ current_dim = len(features)
218
+ if current_dim == self.output_dim:
219
+ return features
220
+ pool_size = current_dim // self.output_dim
221
+ remainder = current_dim % self.output_dim
222
+ pooled = []
223
+ idx = 0
224
+ for i in range(self.output_dim):
225
+ size = pool_size + (1 if i < remainder else 0)
226
+ pooled.append(np.mean(features[idx:idx+size]))
227
+ idx += size
228
+ return np.array(pooled)
229
+
230
+ def is_loaded(self) -> bool:
231
+ return self.model is not None
232
+
233
+
234
+ class AIEffector:
235
+ def __init__(self, model_repo_id: str = "heybaeheef/KU_SW_Academy", model_subfolder: str = "checkpoints", base_model_name: str = "Qwen/Qwen3-8B", audio_feature_dim: int = 64, use_huggingface: bool = True):
236
+ self.model_repo_id = model_repo_id
237
+ self.model_subfolder = model_subfolder
238
+ self.base_model_name = base_model_name
239
+ self.audio_feature_dim = audio_feature_dim
240
+ self.use_huggingface = use_huggingface
241
+ self.model = None
242
+ self.tokenizer = None
243
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
244
+ print(f"[AIEffector V9] CLAP 인코더 μ΄ˆκΈ°ν™”...")
245
+ self.audio_encoder = CLAPAudioEncoder(output_dim=audio_feature_dim)
246
+ self.request_count = 0
247
+ self._load_model()
248
+
249
+ def _load_model(self):
250
+ try:
251
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
252
+ from peft import PeftModel
253
+ print(f"[AIEffector] 베이슀 λͺ¨λΈ λ‘œλ”©: {self.base_model_name}")
254
+ if torch.cuda.is_available():
255
+ bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True)
256
+ base_model = AutoModelForCausalLM.from_pretrained(self.base_model_name, quantization_config=bnb_config, device_map="auto", trust_remote_code=True)
257
+ else:
258
+ base_model = AutoModelForCausalLM.from_pretrained(self.base_model_name, torch_dtype=torch.float32, device_map="auto", trust_remote_code=True)
259
+ self.tokenizer = AutoTokenizer.from_pretrained(self.base_model_name, trust_remote_code=True)
260
+ if self.tokenizer.pad_token is None:
261
+ self.tokenizer.pad_token = self.tokenizer.eos_token
262
+ print(f"[AIEffector] LoRA μ–΄λŒ‘ν„° λ‘œλ”©...")
263
+ if self.use_huggingface:
264
+ self.model = PeftModel.from_pretrained(base_model, self.model_repo_id, subfolder=self.model_subfolder, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
265
+ else:
266
+ local_path = os.path.join(self.model_repo_id, self.model_subfolder)
267
+ self.model = PeftModel.from_pretrained(base_model, local_path, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
268
+ self.model.eval()
269
+ print(f"[AIEffector] βœ… λͺ¨λΈ λ‘œλ“œ 성곡!")
270
+ except Exception as e:
271
+ print(f"[AIEffector] ❌ λͺ¨λΈ λ‘œλ“œ μ‹€νŒ¨: {e}")
272
+ import traceback
273
+ traceback.print_exc()
274
+ self.model = None
275
+ self.tokenizer = None
276
+
277
+ def is_loaded(self) -> bool:
278
+ return self.model is not None
279
+
280
+ def _preprocess_text(self, text: str) -> str:
281
+ text_lower = text.lower()
282
+ for synonym, replacement in SYNONYM_MAP.items():
283
+ if synonym in text_lower:
284
+ text_lower = text_lower.replace(synonym, replacement)
285
+ return text_lower
286
+
287
+ def _apply_preset(self, prompt: str) -> Dict[str, float]:
288
+ params = {}
289
+ prompt_lower = prompt.lower()
290
+ matched = []
291
+ for style_name, style_params in STYLE_PRESETS.items():
292
+ if style_name in prompt_lower:
293
+ params.update(style_params)
294
+ matched.append(style_name)
295
+ if matched:
296
+ print(f" [Preset] λ§€μΉ­: {matched}")
297
+ else:
298
+ params.update(STYLE_PRESETS["warm"])
299
+ print(f" [Preset] κΈ°λ³Έκ°’ 적용: warm")
300
+ return params
301
+
302
+ def _format_prompt(self, text_prompt: str, audio_features: List[float]) -> str:
303
+ audio_state_str = json.dumps(audio_features)
304
+ return f"""Task: Convert text to audio parameters.
305
+ Audio: {audio_state_str}
306
+ Text: {text_prompt}
307
+ Parameters:"""
308
+
309
+ def _preprocess_json(self, json_str: str) -> str:
310
+ json_str = re.sub(r'(\d)_(\d)', r'\1\2', json_str)
311
+ json_str = re.sub(r',(\s*[}\]])', r'\1', json_str)
312
+ json_str = re.sub(r'\bNaN\b', '0', json_str)
313
+ json_str = re.sub(r'\bInfinity\b', '999999', json_str)
314
+ json_str = re.sub(r'-Infinity\b', '-999999', json_str)
315
+ return json_str
316
+
317
+ def _normalize_key(self, key: str) -> str:
318
+ return re.sub(r'\.parametrizations\.(\w+)\.original', r'.\1', key)
319
+
320
+ def _extract_json_object(self, text: str) -> Optional[str]:
321
+ start = text.find('{')
322
+ if start == -1:
323
+ return None
324
+ depth = 0
325
+ for i, char in enumerate(text[start:], start):
326
+ if char == '{':
327
+ depth += 1
328
+ elif char == '}':
329
+ depth -= 1
330
+ if depth == 0:
331
+ return text[start:i+1]
332
+ return None
333
+
334
+ def _convert_raw_to_actual(self, params: Dict[str, float]) -> Dict[str, float]:
335
+ result = params.copy()
336
+ for key, transform in PARAM_TRANSFORMS.items():
337
+ if key not in result:
338
+ continue
339
+ raw = result[key]
340
+ transform_type = transform["type"]
341
+ if transform_type == "none":
342
+ actual = raw
343
+ elif transform_type == "minmax":
344
+ actual = minmax_transform(raw, transform["min"], transform["max"])
345
+ print(f" [MinMax] {key}: {raw:.4f} β†’ {actual:.2f}")
346
+ elif transform_type == "sigmoid":
347
+ actual = sigmoid(raw)
348
+ print(f" [Sigmoid] {key}: {raw:.4f} β†’ {actual:.4f}")
349
+ elif transform_type == "sigmoid_scale":
350
+ actual = sigmoid(raw) * transform["scale"]
351
+ print(f" [Sigmoid*{transform['scale']}] {key}: {raw:.4f} β†’ {actual:.4f}")
352
+ else:
353
+ actual = raw
354
+ result[key] = actual
355
+ return result
356
+
357
+ def _parse_output(self, output_text: str) -> Dict[str, float]:
358
+ print(f" [Parse] Raw output 길이: {len(output_text)} 문자")
359
+ try:
360
+ text = re.sub(r'<think>.*?</think>', '', output_text, flags=re.DOTALL)
361
+ code_match = re.search(r'```(?:json)?\s*([\s\S]*?)```', text)
362
+ if code_match:
363
+ text = code_match.group(1)
364
+ json_str = self._extract_json_object(text)
365
+ if json_str:
366
+ print(f" [Parse] JSON 발견 (길이: {len(json_str)})")
367
+ json_str = self._preprocess_json(json_str)
368
+ raw_params = json.loads(json_str)
369
+ result = DEFAULT_PARAMETERS.copy()
370
+ parsed_count = 0
371
+ for key, value in raw_params.items():
372
+ try:
373
+ norm_key = self._normalize_key(key)
374
+ float_val = float(value)
375
+ if norm_key in DEFAULT_PARAMETERS:
376
+ result[norm_key] = float_val
377
+ parsed_count += 1
378
+ else:
379
+ for default_key in DEFAULT_PARAMETERS.keys():
380
+ norm_parts = norm_key.split('.')
381
+ default_parts = default_key.split('.')
382
+ if len(norm_parts) >= 3 and len(default_parts) >= 3:
383
+ if norm_parts[0] == default_parts[0] and norm_parts[-1] == default_parts[-1]:
384
+ result[default_key] = float_val
385
+ parsed_count += 1
386
+ break
387
+ except (ValueError, TypeError):
388
+ pass
389
+ print(f" [Parse] βœ… {parsed_count}개 νŒŒλΌλ―Έν„° 맀핑됨")
390
+ return result
391
+ except json.JSONDecodeError as e:
392
+ print(f" [Parse] ❌ JSON μ—λŸ¬: {e}")
393
+ except Exception as e:
394
+ print(f" [Parse] ❌ μ˜ˆμ™Έ: {e}")
395
+ print(f" [Parse] ⚠️ κΈ°λ³Έκ°’ 폴백")
396
+ return DEFAULT_PARAMETERS.copy()
397
+
398
+ def predict(self, audio_path: str, text_prompt: str = "") -> Dict[str, float]:
399
+ self.request_count += 1
400
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
401
+ print(f"\n{'='*60}")
402
+ print(f"[AIEffector V9] 🎡 μš”μ²­ #{self.request_count} - {timestamp}")
403
+ print(f"{'='*60}")
404
+ print(f" πŸ“‚ μ˜€λ””μ˜€: {Path(audio_path).name}")
405
+ print(f" πŸ’¬ 원본: '{text_prompt}'")
406
+ processed_prompt = self._preprocess_text(text_prompt)
407
+ print(f" πŸ€– λͺ¨λΈ: {'AI' if self.is_loaded() else '프리셋'}")
408
+
409
+ if not self.is_loaded():
410
+ print(f"\n ⚠️ AI λͺ¨λΈ λ―Έλ‘œλ“œ")
411
+ params = DEFAULT_PARAMETERS.copy()
412
+ params.update(self._apply_preset(processed_prompt))
413
+ self._log_parameters(params)
414
+ return self._convert_to_effect_chain_format(params)
415
+
416
+ try:
417
+ print(f"\n πŸ“Š [Step 1] CLAP νŠΉμ§• μΆ”μΆœ...")
418
+ audio_features = self.audio_encoder.get_audio_features(audio_path)
419
+ if not audio_features or all(f == 0 for f in audio_features):
420
+ print(f" ⚠️ μ‹€νŒ¨, 프리셋 폴백")
421
+ params = DEFAULT_PARAMETERS.copy()
422
+ params.update(self._apply_preset(processed_prompt))
423
+ self._log_parameters(params)
424
+ return self._convert_to_effect_chain_format(params)
425
+ print(f" βœ… {len(audio_features)}차원")
426
+
427
+ print(f"\n πŸ”€ [Step 2] ν”„λ‘¬ν”„νŠΈ 생성...")
428
+ prompt = self._format_prompt(processed_prompt, audio_features)
429
+
430
+ print(f"\n πŸ”’ [Step 3] 토큰화...")
431
+ inputs = self.tokenizer(prompt, return_tensors="pt", truncation=False).to(self.device)
432
+ print(f" 토큰 수: {inputs['input_ids'].shape[1]}")
433
+
434
+ print(f"\n 🧠 [Step 4] LLM μΆ”λ‘ ...")
435
+ import time
436
+ start = time.time()
437
+ with torch.no_grad():
438
+ outputs = self.model.generate(**inputs, max_new_tokens=500, do_sample=False, temperature=0.1, pad_token_id=self.tokenizer.pad_token_id, eos_token_id=self.tokenizer.eos_token_id)
439
+ print(f" μΆ”λ‘  μ‹œκ°„: {time.time()-start:.2f}초")
440
+
441
+ print(f"\n πŸ“ [Step 5] λ””μ½”λ”©...")
442
+ gen_tokens = outputs[0][inputs['input_ids'].shape[1]:]
443
+ output_text = self.tokenizer.decode(gen_tokens, skip_special_tokens=True).strip()
444
+ print(f" 좜λ ₯ (처음 500자):\n{output_text[:500]}")
445
+
446
+ print(f"\n πŸ”§ [Step 6] νŒŒμ‹±...")
447
+ raw_params = self._parse_output(output_text)
448
+
449
+ print(f"\n πŸ”„ [Step 7] Raw β†’ Actual λ³€ν™˜...")
450
+ actual_params = self._convert_raw_to_actual(raw_params)
451
+
452
+ print(f"\n πŸ“ [Step 8] κ°’ ν΄λž¨ν•‘ (EQ만)...")
453
+ eq_keys = [k for k in PARAM_RANGES.keys() if k.startswith('eq_')]
454
+ for key in eq_keys:
455
+ if key in actual_params:
456
+ min_val, max_val = PARAM_RANGES[key]
457
+ original = actual_params[key]
458
+ clamped = max(min_val, min(max_val, original))
459
+ if abs(clamped - original) > 0.001:
460
+ print(f" [Clamp] {key}: {original:.4f} β†’ {clamped:.4f}")
461
+ actual_params[key] = clamped
462
+
463
+ print(f"\n πŸŽ›οΈ [Step 9] 프리셋 적용 (Compressor/Reverb/Delay)...")
464
+ preset = self._apply_preset(processed_prompt)
465
+ for key in preset:
466
+ actual_params[key] = preset[key]
467
+ print(f" {key}: {preset[key]}")
468
+
469
+ actual_params["final_wet_mix"] = max(0.3, min(0.7, actual_params.get("final_wet_mix", 0.5)))
470
+ print(f" final_wet_mix: {actual_params['final_wet_mix']:.2f}")
471
+
472
+ self._log_parameters(actual_params)
473
+ print(f"\n βœ… μ™„λ£Œ!")
474
+ print(f"{'='*60}\n")
475
+ return self._convert_to_effect_chain_format(actual_params)
476
+
477
+ except Exception as e:
478
+ print(f"\n ❌ μ‹€νŒ¨: {e}")
479
+ import traceback
480
+ traceback.print_exc()
481
+ params = DEFAULT_PARAMETERS.copy()
482
+ params.update(self._apply_preset(processed_prompt))
483
+ self._log_parameters(params)
484
+ return self._convert_to_effect_chain_format(params)
485
+
486
+ def _convert_to_effect_chain_format(self, params: Dict[str, float]) -> Dict[str, float]:
487
+ result = {}
488
+ for key, value in params.items():
489
+ new_key = key.replace('.Q', '.q')
490
+ result[new_key] = value
491
+ return result
492
+
493
+ def _log_parameters(self, params: Dict[str, float]):
494
+ print(f"\n πŸ“‹ μ΅œμ’… νŒŒλΌλ―Έν„°:")
495
+ print(f" [EQ Peak 1] freq={params.get('eq_peak1.params.freq',0):.0f}Hz, gain={params.get('eq_peak1.params.gain',0):.2f}dB, Q={params.get('eq_peak1.params.Q',0):.2f}")
496
+ print(f" [EQ Peak 2] freq={params.get('eq_peak2.params.freq',0):.0f}Hz, gain={params.get('eq_peak2.params.gain',0):.2f}dB, Q={params.get('eq_peak2.params.Q',0):.2f}")
497
+ print(f" [Low Shelf] freq={params.get('eq_lowshelf.params.freq',0):.0f}Hz, gain={params.get('eq_lowshelf.params.gain',0):.2f}dB")
498
+ print(f" [High Shelf] freq={params.get('eq_highshelf.params.freq',0):.0f}Hz, gain={params.get('eq_highshelf.params.gain',0):.2f}dB")
499
+ print(f" [Compressor] threshold={params.get('compressor.threshold',-3):.1f}dB, ratio={params.get('compressor.ratio',2):.1f}")
500
+ print(f" [Distortion] {params.get('distortion_amount',0):.4f}")
501
+ print(f" [Delay] time={params.get('delay.delay_time',0):.3f}s, fb={params.get('delay.feedback',0):.2f}, mix={params.get('delay.mix',0):.2f}")
502
+ print(f" [Reverb] room={params.get('reverb.room_size',0):.2f}, damp={params.get('reverb.damping',0):.2f}, wet={params.get('reverb.wet_level',0):.2f}, dry={params.get('reverb.dry_level',1):.2f}")
503
+ print(f" [Wet Mix] {params.get('final_wet_mix',0):.2f}")