heybaeheef commited on
Commit
6dd0b5f
ยท
verified ยท
1 Parent(s): 5740613

Delete models/ai_effector.py

Browse files
Files changed (1) hide show
  1. models/ai_effector.py +0 -738
models/ai_effector.py DELETED
@@ -1,738 +0,0 @@
1
- """
2
- AI Effector - DiffVox LLM ๊ธฐ๋ฐ˜ ์ดํŽ™ํŠธ ํŒŒ๋ผ๋ฏธํ„ฐ ์˜ˆ์ธก
3
- ===================================================
4
- V5: ๋ชจ๋“  parametrization ๋ณ€ํ™˜ ์ ์šฉ
5
- - freq: MinMax(min, max) ๋ณ€ํ™˜
6
- - Q: MinMax(min, max) ๋ณ€ํ™˜
7
- - delay.feedback, delay.mix: sigmoid
8
- - distortion_amount: sigmoid * 0.1
9
- - final_wet_mix: sigmoid
10
- """
11
-
12
- import os
13
- import json
14
- import re
15
- import math
16
- import torch
17
- import numpy as np
18
- from typing import Dict, List, Optional, Any, Tuple
19
- from pathlib import Path
20
- from datetime import datetime
21
- import warnings
22
-
23
- warnings.filterwarnings("ignore")
24
-
25
-
26
- def sigmoid(x: float) -> float:
27
- """์‹œ๊ทธ๋ชจ์ด๋“œ ํ•จ์ˆ˜"""
28
- try:
29
- return 1 / (1 + math.exp(-x))
30
- except OverflowError:
31
- return 0.0 if x < 0 else 1.0
32
-
33
-
34
- def minmax_transform(raw: float, min_val: float, max_val: float) -> float:
35
- """MinMax ๋ณ€ํ™˜: sigmoid(raw) * (max - min) + min"""
36
- return sigmoid(raw) * (max_val - min_val) + min_val
37
-
38
-
39
- # =====================================================
40
- # fx.py์—์„œ ๊ฐ€์ ธ์˜จ ํŒŒ๋ผ๋ฏธํ„ฐ ๋ฒ”์œ„ (์ •ํ™•ํ•œ ๊ฐ’!)
41
- # =====================================================
42
- PARAM_TRANSFORMS = {
43
- # Peak EQ 1 & 2
44
- "eq_peak1.params.freq": {"type": "minmax", "min": 33.0, "max": 17500.0},
45
- "eq_peak1.params.Q": {"type": "minmax", "min": 0.2, "max": 20.0},
46
- "eq_peak1.params.gain": {"type": "none"}, # gain์€ ๋ณ€ํ™˜ ์—†์Œ
47
-
48
- "eq_peak2.params.freq": {"type": "minmax", "min": 33.0, "max": 17500.0},
49
- "eq_peak2.params.Q": {"type": "minmax", "min": 0.2, "max": 20.0},
50
- "eq_peak2.params.gain": {"type": "none"},
51
-
52
- # LowShelf
53
- "eq_lowshelf.params.freq": {"type": "minmax", "min": 30.0, "max": 200.0},
54
- "eq_lowshelf.params.gain": {"type": "none"},
55
-
56
- # HighShelf
57
- "eq_highshelf.params.freq": {"type": "minmax", "min": 2500.0, "max": 16000.0},
58
- "eq_highshelf.params.gain": {"type": "none"},
59
-
60
- # Delay
61
- "delay.delay_time": {"type": "none"}, # ํ•™์Šต ์•ˆ๋จ, ํ•ญ์ƒ 0.02
62
- "delay.feedback": {"type": "sigmoid"},
63
- "delay.mix": {"type": "sigmoid"},
64
-
65
- # Distortion
66
- "distortion_amount": {"type": "sigmoid_scale", "scale": 0.1},
67
-
68
- # Wet Mix
69
- "final_wet_mix": {"type": "sigmoid"},
70
- }
71
-
72
- # ๊ธฐ๋ณธ ํŒŒ๋ผ๋ฏธํ„ฐ (๋ณ€ํ™˜ ํ›„ ์‹ค์ œ ๊ฐ’)
73
- DEFAULT_PARAMETERS = {
74
- "eq_peak1.params.freq": 1000.0,
75
- "eq_peak1.params.gain": 0.0,
76
- "eq_peak1.params.Q": 1.0,
77
- "eq_peak2.params.freq": 4000.0,
78
- "eq_peak2.params.gain": 0.0,
79
- "eq_peak2.params.Q": 1.0,
80
- "eq_lowshelf.params.freq": 115.0,
81
- "eq_lowshelf.params.gain": 0.0,
82
- "eq_highshelf.params.freq": 8000.0,
83
- "eq_highshelf.params.gain": 0.0,
84
- "distortion_amount": 0.0,
85
- "delay.delay_time": 0.02,
86
- "delay.feedback": 0.3,
87
- "delay.mix": 0.2,
88
- # Compressor (๊ธฐ๋ณธ๊ฐ’ ์ˆ˜์ •: -24 โ†’ -5)
89
- "compressor.threshold_db": -5.0,
90
- "compressor.ratio": 2.0,
91
- # Reverb (LLM์ด ์ถ”๋ก )
92
- "reverb.room_size": 0.3,
93
- "reverb.damping": 0.5,
94
- "reverb.wet_level": 0.2,
95
- "reverb.dry_level": 0.8,
96
- # Wet Mix
97
- "final_wet_mix": 0.5
98
- }
99
-
100
- # ํŒŒ๋ผ๋ฏธํ„ฐ ๋ฒ”์œ„ ์ œํ•œ (๋ณ€ํ™˜ ํ›„)
101
- PARAM_RANGES = {
102
- "eq_peak1.params.freq": (33.0, 17500.0),
103
- "eq_peak1.params.gain": (-12.0, 12.0),
104
- "eq_peak1.params.Q": (0.2, 20.0),
105
- "eq_peak2.params.freq": (33.0, 17500.0),
106
- "eq_peak2.params.gain": (-12.0, 12.0),
107
- "eq_peak2.params.Q": (0.2, 20.0),
108
- "eq_lowshelf.params.freq": (30.0, 200.0),
109
- "eq_lowshelf.params.gain": (-12.0, 12.0),
110
- "eq_highshelf.params.freq": (2500.0, 16000.0),
111
- "eq_highshelf.params.gain": (-12.0, 12.0),
112
- "distortion_amount": (0.0, 0.1),
113
- "delay.delay_time": (0.01, 1.0),
114
- "delay.feedback": (0.0, 0.95),
115
- "delay.mix": (0.0, 1.0),
116
- # Compressor
117
- "compressor.threshold_db": (-40.0, 0.0),
118
- "compressor.ratio": (1.0, 20.0),
119
- # Reverb (0~1 ๋ฒ”์œ„)
120
- "reverb.room_size": (0.0, 1.0),
121
- "reverb.damping": (0.0, 1.0),
122
- "reverb.wet_level": (0.0, 1.0),
123
- "reverb.dry_level": (0.0, 1.0),
124
- # Wet Mix
125
- "final_wet_mix": (0.0, 1.0),
126
- }
127
-
128
- # ๋™์˜์–ด ๋งคํ•‘
129
- SYNONYM_MAP = {
130
- "calm": "warm soft",
131
- "relaxed": "warm soft",
132
- "chill": "warm soft",
133
- "smooth": "warm",
134
- "mellow": "warm soft",
135
- "breezy": "bright spacious",
136
- "airy": "bright spacious",
137
- "light": "bright",
138
- "crisp": "bright",
139
- "clean": "bright",
140
- "dreamy": "warm spacious ambient",
141
- "ethereal": "bright spacious ambient",
142
- "atmospheric": "spacious ambient",
143
- "aggressive": "saturated bright",
144
- "powerful": "saturated",
145
- "punchy": "saturated bright",
146
- "hard": "saturated",
147
- "gritty": "saturated dark",
148
- "soft": "warm",
149
- "harsh": "bright saturated",
150
- "muddy": "dark",
151
- "thin": "bright",
152
- "thick": "warm dark",
153
- "full": "warm",
154
- # Reverb ๊ด€๋ จ - ๋ณ€ํ™˜ํ•˜์ง€ ์•Š๊ณ  ๊ทธ๋Œ€๋กœ ์œ ์ง€
155
- # "reverb"๋Š” spacious๋กœ ๋ณ€ํ™˜ํ•˜์ง€ ์•Š์Œ (LLM์ด ์ง์ ‘ ์ถ”๋ก ํ•˜๋„๋ก)
156
- "echo": "spacious",
157
- "wet": "spacious",
158
- "holy": "spacious cathedral",
159
- "church": "cathedral",
160
- "room": "hall",
161
- }
162
-
163
- # ํ”„๋ฆฌ์…‹ (delay.delay_time, reverb ๋ณด์™„์šฉ)
164
- STYLE_PRESETS = {
165
- "warm": {},
166
- "bright": {},
167
- "spacious": {
168
- "delay.delay_time": 0.05,
169
- "reverb.room_size": 0.6,
170
- "reverb.wet_level": 0.4,
171
- "reverb.dry_level": 0.6,
172
- },
173
- "dark": {},
174
- "saturated": {},
175
- "soft": {},
176
- # Reverb ๊ด€๋ จ ํ”„๋ฆฌ์…‹
177
- "hall": {
178
- "reverb.room_size": 0.8,
179
- "reverb.damping": 0.4,
180
- "reverb.wet_level": 0.5,
181
- "reverb.dry_level": 0.5,
182
- },
183
- "cathedral": {
184
- "reverb.room_size": 0.95,
185
- "reverb.damping": 0.3,
186
- "reverb.wet_level": 0.6,
187
- "reverb.dry_level": 0.4,
188
- },
189
- "ambient": {
190
- "reverb.room_size": 0.7,
191
- "reverb.damping": 0.5,
192
- "reverb.wet_level": 0.45,
193
- "reverb.dry_level": 0.55,
194
- },
195
- "dry": {
196
- "reverb.room_size": 0.1,
197
- "reverb.wet_level": 0.05,
198
- "reverb.dry_level": 0.95,
199
- },
200
- }
201
-
202
-
203
- class CLAPAudioEncoder:
204
- """CLAP ๊ธฐ๋ฐ˜ ์˜ค๋””์˜ค ์ธ์ฝ”๋”"""
205
-
206
- def __init__(self, output_dim: int = 64, model_name: str = "laion/larger_clap_music"):
207
- self.output_dim = output_dim
208
- self.model_name = model_name
209
- self.target_sr = 48000
210
- self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
211
-
212
- self.model = None
213
- self.processor = None
214
- self._load_model()
215
-
216
- def _load_model(self):
217
- try:
218
- from transformers import ClapModel, ClapProcessor
219
-
220
- print(f"[CLAPEncoder] CLAP ๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘: {self.model_name}")
221
-
222
- self.processor = ClapProcessor.from_pretrained(self.model_name)
223
- self.model = ClapModel.from_pretrained(self.model_name)
224
- self.model = self.model.to(self.device)
225
- self.model.eval()
226
-
227
- print(f"[CLAPEncoder] โœ… CLAP ๋ชจ๋ธ ๋กœ๋“œ ์™„๋ฃŒ")
228
-
229
- except Exception as e:
230
- print(f"[CLAPEncoder] โŒ ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {e}")
231
-
232
- def get_audio_features(self, audio_path: str) -> List[float]:
233
- if self.model is None:
234
- return [0.0] * self.output_dim
235
-
236
- try:
237
- import librosa
238
-
239
- audio, sr = librosa.load(audio_path, sr=self.target_sr, mono=True)
240
-
241
- inputs = self.processor(
242
- audios=audio,
243
- sampling_rate=self.target_sr,
244
- return_tensors="pt",
245
- padding=True
246
- ).to(self.device)
247
-
248
- with torch.no_grad():
249
- outputs = self.model.get_audio_features(**inputs)
250
-
251
- features_512 = outputs[0].cpu().numpy()
252
- features_64 = self._reduce_dimension(features_512)
253
-
254
- return features_64.tolist()
255
-
256
- except Exception as e:
257
- print(f"[CLAPEncoder] ํŠน์ง• ์ถ”์ถœ ์‹คํŒจ: {e}")
258
- return [0.0] * self.output_dim
259
-
260
- def _reduce_dimension(self, features: np.ndarray) -> np.ndarray:
261
- current_dim = len(features)
262
- if current_dim == self.output_dim:
263
- return features
264
-
265
- pool_size = current_dim // self.output_dim
266
- remainder = current_dim % self.output_dim
267
-
268
- pooled = []
269
- idx = 0
270
- for i in range(self.output_dim):
271
- size = pool_size + (1 if i < remainder else 0)
272
- pooled.append(np.mean(features[idx:idx+size]))
273
- idx += size
274
-
275
- return np.array(pooled)
276
-
277
- def is_loaded(self) -> bool:
278
- return self.model is not None
279
-
280
-
281
- class AIEffector:
282
- """AI ๊ธฐ๋ฐ˜ ์ดํŽ™ํ„ฐ ํŒŒ๋ผ๋ฏธํ„ฐ ์˜ˆ์ธก (V5)"""
283
-
284
- def __init__(
285
- self,
286
- model_repo_id: str = "heybaeheef/KU_SW_Academy",
287
- model_subfolder: str = "checkpoints",
288
- base_model_name: str = "Qwen/Qwen3-8B",
289
- audio_feature_dim: int = 64,
290
- use_huggingface: bool = True
291
- ):
292
- self.model_repo_id = model_repo_id
293
- self.model_subfolder = model_subfolder
294
- self.base_model_name = base_model_name
295
- self.audio_feature_dim = audio_feature_dim
296
- self.use_huggingface = use_huggingface
297
-
298
- self.model = None
299
- self.tokenizer = None
300
- self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
301
-
302
- print(f"[AIEffector V5] CLAP ์ธ์ฝ”๋” ์ดˆ๊ธฐํ™”...")
303
- self.audio_encoder = CLAPAudioEncoder(output_dim=audio_feature_dim)
304
-
305
- self.request_count = 0
306
- self._load_model()
307
-
308
- def _load_model(self):
309
- try:
310
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
311
- from peft import PeftModel
312
-
313
- print(f"[AIEffector] ๋ฒ ์ด์Šค ๋ชจ๋ธ ๋กœ๋”ฉ: {self.base_model_name}")
314
-
315
- if torch.cuda.is_available():
316
- bnb_config = BitsAndBytesConfig(
317
- load_in_4bit=True,
318
- bnb_4bit_quant_type="nf4",
319
- bnb_4bit_compute_dtype=torch.float16,
320
- bnb_4bit_use_double_quant=True
321
- )
322
- base_model = AutoModelForCausalLM.from_pretrained(
323
- self.base_model_name,
324
- quantization_config=bnb_config,
325
- device_map="auto",
326
- trust_remote_code=True
327
- )
328
- else:
329
- base_model = AutoModelForCausalLM.from_pretrained(
330
- self.base_model_name,
331
- torch_dtype=torch.float32,
332
- device_map="auto",
333
- trust_remote_code=True
334
- )
335
-
336
- self.tokenizer = AutoTokenizer.from_pretrained(
337
- self.base_model_name,
338
- trust_remote_code=True
339
- )
340
-
341
- if self.tokenizer.pad_token is None:
342
- self.tokenizer.pad_token = self.tokenizer.eos_token
343
-
344
- print(f"[AIEffector] LoRA ์–ด๋Œ‘ํ„ฐ ๋กœ๋”ฉ...")
345
-
346
- if self.use_huggingface:
347
- self.model = PeftModel.from_pretrained(
348
- base_model,
349
- self.model_repo_id,
350
- subfolder=self.model_subfolder,
351
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
352
- )
353
- else:
354
- local_path = os.path.join(self.model_repo_id, self.model_subfolder)
355
- self.model = PeftModel.from_pretrained(
356
- base_model,
357
- local_path,
358
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
359
- )
360
-
361
- self.model.eval()
362
- print(f"[AIEffector] โœ… ๋ชจ๋ธ ๋กœ๋“œ ์„ฑ๊ณต!")
363
-
364
- except Exception as e:
365
- print(f"[AIEffector] โŒ ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {e}")
366
- import traceback
367
- traceback.print_exc()
368
- self.model = None
369
- self.tokenizer = None
370
-
371
- def is_loaded(self) -> bool:
372
- return self.model is not None
373
-
374
- def _preprocess_text(self, text: str) -> str:
375
- text_lower = text.lower()
376
- for synonym, replacement in SYNONYM_MAP.items():
377
- if synonym in text_lower:
378
- text_lower = text_lower.replace(synonym, replacement)
379
- print(f" [Synonym] '{synonym}' โ†’ '{replacement}'")
380
- return text_lower
381
-
382
- def _apply_preset(self, prompt: str) -> Dict[str, float]:
383
- params = {}
384
- prompt_lower = prompt.lower()
385
-
386
- matched = []
387
- for style_name, style_params in STYLE_PRESETS.items():
388
- if style_name in prompt_lower:
389
- params.update(style_params)
390
- matched.append(style_name)
391
-
392
- if matched:
393
- print(f" [Preset] ๋งค์นญ: {matched}")
394
-
395
- return params
396
-
397
- def _format_prompt(self, text_prompt: str, audio_features: List[float]) -> str:
398
- audio_state_str = json.dumps(audio_features)
399
-
400
- # Reverb ์ถ”๋ก ์„ ์œ„ํ•œ ๊ฐ€์ด๋“œ ์ถ”๊ฐ€
401
- return f"""Task: Convert text to audio parameters.
402
-
403
- For reverb parameters, infer appropriate values based on the text description:
404
- - "reverb", "spacious", "wet", "hall", "cathedral" โ†’ room_size: 0.5-0.9, wet_level: 0.3-0.6
405
- - "dry", "tight", "close" โ†’ room_size: 0.1-0.3, wet_level: 0.1-0.2
406
- - "ambient", "atmospheric" โ†’ room_size: 0.6-0.8, damping: 0.3-0.5
407
-
408
- Audio: {audio_state_str}
409
- Text: {text_prompt}
410
- Parameters (include reverb.room_size, reverb.damping, reverb.wet_level, reverb.dry_level):"""
411
-
412
- def _preprocess_json(self, json_str: str) -> str:
413
- # ์ˆซ์ž ์–ธ๋”์Šค์ฝ”์–ด ์ œ๊ฑฐ
414
- json_str = re.sub(r'(\d)_(\d)', r'\1\2', json_str)
415
-
416
- # โ˜…โ˜…โ˜… ์ˆซ์ž ์ค‘๊ฐ„์˜ ์ด์ƒํ•œ ๋ฌธ์ž ์ œ๊ฑฐ โ˜…โ˜…โ˜…
417
- # ์˜ˆ: 0.302 S59910583495616915 โ†’ 0.302
418
- # ์˜ˆ: -1.70eyJ59999999999999 โ†’ -1.70
419
- json_str = re.sub(r'(-?\d+\.?\d*)\s*[a-zA-Z][a-zA-Z0-9]*', r'\1', json_str)
420
-
421
- # ์ˆซ์ž ๋’ค์˜ ์ด์ƒํ•œ ๋ฌธ์ž์—ด ์ œ๊ฑฐ (๋” ๊ณต๊ฒฉ์ )
422
- # ์˜ˆ: 0.302S599 โ†’ 0.302
423
- json_str = re.sub(r'(\d)([A-Za-z])', r'\1', json_str)
424
-
425
- # Trailing comma ์ œ๊ฑฐ
426
- json_str = re.sub(r',(\s*[}\]])', r'\1', json_str)
427
-
428
- # NaN, Infinity
429
- json_str = re.sub(r'\bNaN\b', '0', json_str)
430
- json_str = re.sub(r'\bInfinity\b', '999999', json_str)
431
- json_str = re.sub(r'-Infinity\b', '-999999', json_str)
432
-
433
- return json_str
434
-
435
- def _normalize_key(self, key: str) -> str:
436
- """
437
- ํ‚ค ์ •๊ทœํ™”
438
- eq_peak1.params.parametrizations.freq.original โ†’ eq_peak1.params.freq
439
- eq_peak1.params.parametrizations.Q.original โ†’ eq_peak1.params.Q
440
- """
441
- key = re.sub(r'\.parametrizations\.(\w+)\.original', r'.\1', key)
442
- return key
443
-
444
- def _extract_json_object(self, text: str) -> Optional[str]:
445
- start = text.find('{')
446
- if start == -1:
447
- return None
448
-
449
- depth = 0
450
- for i, char in enumerate(text[start:], start):
451
- if char == '{':
452
- depth += 1
453
- elif char == '}':
454
- depth -= 1
455
- if depth == 0:
456
- return text[start:i+1]
457
- return None
458
-
459
- def _convert_raw_to_actual(self, params: Dict[str, float]) -> Dict[str, float]:
460
- """
461
- โ˜…โ˜…โ˜… ํ•ต์‹ฌ: ํ•™์Šต ๋ฐ์ดํ„ฐ์˜ raw ๊ฐ’์„ ์‹ค์ œ ๊ฐ’์œผ๋กœ ๋ณ€ํ™˜ โ˜…โ˜…โ˜…
462
-
463
- fx.py์˜ parametrization์— ๋”ฐ๋ผ ๋ณ€ํ™˜:
464
- - MinMax: sigmoid(raw) * (max - min) + min
465
- - sigmoid: sigmoid(raw)
466
- - sigmoid_scale: sigmoid(raw) * scale
467
- """
468
- result = params.copy()
469
-
470
- for key, transform in PARAM_TRANSFORMS.items():
471
- if key not in result:
472
- continue
473
-
474
- raw = result[key]
475
- transform_type = transform["type"]
476
-
477
- if transform_type == "none":
478
- # ๋ณ€ํ™˜ ์—†์Œ (gain ๋“ฑ)
479
- actual = raw
480
-
481
- elif transform_type == "minmax":
482
- # MinMax ๋ณ€ํ™˜
483
- min_val = transform["min"]
484
- max_val = transform["max"]
485
- actual = minmax_transform(raw, min_val, max_val)
486
- print(f" [MinMax] {key}: {raw:.4f} โ†’ {actual:.2f} (range: {min_val}-{max_val})")
487
-
488
- elif transform_type == "sigmoid":
489
- # sigmoid ๋ณ€ํ™˜
490
- actual = sigmoid(raw)
491
- print(f" [Sigmoid] {key}: {raw:.4f} โ†’ {actual:.4f}")
492
-
493
- elif transform_type == "sigmoid_scale":
494
- # sigmoid * scale ๋ณ€ํ™˜
495
- scale = transform["scale"]
496
- actual = sigmoid(raw) * scale
497
- print(f" [Sigmoid*{scale}] {key}: {raw:.4f} โ†’ {actual:.4f}")
498
-
499
- else:
500
- actual = raw
501
-
502
- result[key] = actual
503
-
504
- return result
505
-
506
- def _clamp_values(self, params: Dict[str, float]) -> Dict[str, float]:
507
- result = params.copy()
508
-
509
- for key, (min_val, max_val) in PARAM_RANGES.items():
510
- if key in result:
511
- original = result[key]
512
- clamped = max(min_val, min(max_val, original))
513
- if abs(clamped - original) > 0.001:
514
- print(f" [Clamp] {key}: {original:.4f} โ†’ {clamped:.4f}")
515
- result[key] = clamped
516
-
517
- return result
518
-
519
- def _parse_output(self, output_text: str) -> Tuple[Dict[str, float], bool]:
520
- """
521
- LLM ์ถœ๋ ฅ ํŒŒ์‹ฑ
522
- Returns: (ํŒŒ๋ผ๋ฏธํ„ฐ ๋”•์…”๋„ˆ๋ฆฌ, ํŒŒ์‹ฑ ์„ฑ๊ณต ์—ฌ๋ถ€)
523
- """
524
-
525
- print(f" [Parse] Raw output ๊ธธ์ด: {len(output_text)} ๋ฌธ์ž")
526
-
527
- json_str = None
528
-
529
- try:
530
- text = output_text
531
-
532
- # <think> ํƒœ๊ทธ ์ œ๊ฑฐ
533
- text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
534
-
535
- # ์ฝ”๋“œ๋ธ”๋ก ์ถ”์ถœ
536
- code_match = re.search(r'```(?:json)?\s*([\s\S]*?)```', text)
537
- if code_match:
538
- text = code_match.group(1)
539
-
540
- # JSON ์ถ”์ถœ
541
- json_str = self._extract_json_object(text)
542
-
543
- if json_str:
544
- print(f" [Parse] JSON ๋ฐœ๊ฒฌ (๊ธธ์ด: {len(json_str)})")
545
-
546
- # ์ „์ฒ˜๋ฆฌ (๊ฐ•ํ™”)
547
- json_str = self._preprocess_json(json_str)
548
-
549
- # ํŒŒ์‹ฑ
550
- raw_params = json.loads(json_str)
551
-
552
- # ๊ฒฐ๊ณผ ๋งคํ•‘
553
- result = DEFAULT_PARAMETERS.copy()
554
- parsed_count = 0
555
-
556
- for key, value in raw_params.items():
557
- try:
558
- # ํ‚ค ์ •๊ทœํ™”
559
- norm_key = self._normalize_key(key)
560
- float_val = float(value)
561
-
562
- # ๋งค์นญ
563
- if norm_key in DEFAULT_PARAMETERS:
564
- result[norm_key] = float_val
565
- parsed_count += 1
566
- else:
567
- # ๋ถ€๋ถ„ ๋งค์นญ ์‹œ๋„
568
- for default_key in DEFAULT_PARAMETERS.keys():
569
- norm_parts = norm_key.split('.')
570
- default_parts = default_key.split('.')
571
-
572
- if len(norm_parts) >= 3 and len(default_parts) >= 3:
573
- if norm_parts[0] == default_parts[0] and norm_parts[-1] == default_parts[-1]:
574
- result[default_key] = float_val
575
- parsed_count += 1
576
- break
577
-
578
- except (ValueError, TypeError) as e:
579
- print(f" [Parse] ๋ณ€ํ™˜ ์‹คํŒจ: {key}={value}")
580
-
581
- print(f" [Parse] โœ… {parsed_count}๊ฐœ ํŒŒ๋ผ๋ฏธํ„ฐ ๋งคํ•‘๋จ")
582
- return result, True # โ† ์„ฑ๊ณต
583
-
584
- except json.JSONDecodeError as e:
585
- print(f" [Parse] โŒ JSON ์—๋Ÿฌ: {e}")
586
- except Exception as e:
587
- print(f" [Parse] โŒ ์˜ˆ์™ธ: {e}")
588
-
589
- print(f" [Parse] โš ๏ธ ๊ธฐ๋ณธ๊ฐ’ ํด๋ฐฑ")
590
- return DEFAULT_PARAMETERS.copy(), False # โ† ์‹คํŒจ
591
-
592
- def predict(self, audio_path: str, text_prompt: str = "") -> Dict[str, float]:
593
- """ํŒŒ๋ผ๋ฏธํ„ฐ ์˜ˆ์ธก"""
594
-
595
- self.request_count += 1
596
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
597
-
598
- print(f"\n{'='*60}")
599
- print(f"[AIEffector V5] ๐ŸŽต ์š”์ฒญ #{self.request_count} - {timestamp}")
600
- print(f"{'='*60}")
601
- print(f" ๐Ÿ“‚ ์˜ค๋””์˜ค: {Path(audio_path).name}")
602
- print(f" ๐Ÿ’ฌ ์›๋ณธ: '{text_prompt}'")
603
-
604
- # ๋™์˜์–ด ๋ณ€ํ™˜
605
- processed_prompt = self._preprocess_text(text_prompt)
606
- if processed_prompt != text_prompt.lower():
607
- print(f" ๐Ÿ’ฌ ๋ณ€ํ™˜: '{processed_prompt}'")
608
-
609
- print(f" ๐Ÿค– ๋ชจ๋ธ: {'AI' if self.is_loaded() else 'ํ”„๋ฆฌ์…‹'}")
610
-
611
- # ๋ชจ๋ธ ์—†์œผ๋ฉด ํ”„๋ฆฌ์…‹
612
- if not self.is_loaded():
613
- print(f"\n โš ๏ธ AI ๋ชจ๋ธ ๋ฏธ๋กœ๋“œ")
614
- params = DEFAULT_PARAMETERS.copy()
615
- params.update(self._apply_preset(processed_prompt))
616
- self._log_parameters(params)
617
- return self._convert_to_effect_chain_format(params)
618
-
619
- try:
620
- # 1. CLAP ํŠน์ง• ์ถ”์ถœ
621
- print(f"\n ๐Ÿ“Š [Step 1] CLAP ํŠน์ง• ์ถ”์ถœ...")
622
- audio_features = self.audio_encoder.get_audio_features(audio_path)
623
-
624
- if not audio_features or all(f == 0 for f in audio_features):
625
- print(f" โš ๏ธ ์‹คํŒจ, ํ”„๋ฆฌ์…‹ ํด๋ฐฑ")
626
- params = DEFAULT_PARAMETERS.copy()
627
- params.update(self._apply_preset(processed_prompt))
628
- self._log_parameters(params)
629
- return self._convert_to_effect_chain_format(params)
630
-
631
- print(f" โœ… {len(audio_features)}์ฐจ์›")
632
-
633
- # 2. ํ”„๋กฌํ”„ํŠธ ์ƒ์„ฑ
634
- print(f"\n ๐Ÿ”ค [Step 2] ํ”„๋กฌํ”„ํŠธ ์ƒ์„ฑ...")
635
- prompt = self._format_prompt(processed_prompt, audio_features)
636
-
637
- # 3. ํ† ํฐํ™” (๊ธธ์ด ์ œํ•œ ์—†์Œ)
638
- print(f"\n ๐Ÿ”ข [Step 3] ํ† ํฐํ™”...")
639
- inputs = self.tokenizer(
640
- prompt,
641
- return_tensors="pt",
642
- truncation=False,
643
- ).to(self.device)
644
- print(f" ํ† ํฐ ์ˆ˜: {inputs['input_ids'].shape[1]}")
645
-
646
- # 4. LLM ์ƒ์„ฑ
647
- print(f"\n ๐Ÿง  [Step 4] LLM ์ถ”๋ก ...")
648
- import time
649
- start = time.time()
650
-
651
- with torch.no_grad():
652
- outputs = self.model.generate(
653
- **inputs,
654
- max_new_tokens=500,
655
- do_sample=False,
656
- temperature=0.1,
657
- pad_token_id=self.tokenizer.pad_token_id,
658
- eos_token_id=self.tokenizer.eos_token_id,
659
- )
660
-
661
- print(f" ์ถ”๋ก  ์‹œ๊ฐ„: {time.time()-start:.2f}์ดˆ")
662
-
663
- # 5. ๋””์ฝ”๋”ฉ
664
- print(f"\n ๐Ÿ“ [Step 5] ๋””์ฝ”๋”ฉ...")
665
- gen_tokens = outputs[0][inputs['input_ids'].shape[1]:]
666
- output_text = self.tokenizer.decode(gen_tokens, skip_special_tokens=True).strip()
667
- print(f" ์ถœ๋ ฅ (์ฒ˜์Œ 500์ž):\n{output_text[:500]}")
668
-
669
- # 6. ํŒŒ์‹ฑ
670
- print(f"\n ๐Ÿ”ง [Step 6] ํŒŒ์‹ฑ...")
671
- raw_params, parse_success = self._parse_output(output_text)
672
-
673
- # 7. โ˜…โ˜…โ˜… Raw โ†’ Actual ๋ณ€ํ™˜ (ํŒŒ์‹ฑ ์„ฑ๊ณต ์‹œ์—๋งŒ!) โ˜…โ˜…โ˜…
674
- if parse_success:
675
- print(f"\n ๐Ÿ”„ [Step 7] Raw โ†’ Actual ๋ณ€ํ™˜...")
676
- actual_params = self._convert_raw_to_actual(raw_params)
677
- else:
678
- print(f"\n ๐Ÿ”„ [Step 7] ๋ณ€ํ™˜ ๊ฑด๋„ˆ๋œ€ (๊ธฐ๋ณธ๊ฐ’ ์‚ฌ์šฉ)")
679
- actual_params = raw_params # ๊ธฐ๋ณธ๊ฐ’์€ ์ด๋ฏธ ์‹ค์ œ ๊ฐ’
680
-
681
- # 8. ๊ฐ’ ํด๋žจํ•‘
682
- print(f"\n ๐Ÿ“ [Step 8] ๊ฐ’ ํด๋žจํ•‘...")
683
- clamped_params = self._clamp_values(actual_params)
684
-
685
- # 9. ํ”„๋ฆฌ์…‹ ๋ณด์™„ (delay.delay_time, reverb ๋“ฑ ๋ฏธํ•™์Šต ํŒŒ๋ผ๋ฏธํ„ฐ)
686
- print(f"\n ๐ŸŽ›๏ธ [Step 9] ํ”„๋ฆฌ์…‹ ๋ณด์™„...")
687
- preset = self._apply_preset(processed_prompt)
688
-
689
- # delay.delay_time ๋ณด์™„
690
- if 'delay.delay_time' in preset:
691
- clamped_params['delay.delay_time'] = preset['delay.delay_time']
692
- print(f" delay.delay_time: {preset['delay.delay_time']} (ํ”„๋ฆฌ์…‹)")
693
-
694
- # reverb ํŒŒ๋ผ๋ฏธํ„ฐ ๋ณด์™„ (LLM์ด ์ถœ๋ ฅ ์•ˆ ํ–ˆ์œผ๋ฉด ํ”„๋ฆฌ์…‹ ์‚ฌ์šฉ)
695
- reverb_keys = ['reverb.room_size', 'reverb.damping', 'reverb.wet_level', 'reverb.dry_level']
696
- for key in reverb_keys:
697
- if key in preset:
698
- # ํ”„๋ฆฌ์…‹ ๊ฐ’์ด ๊ธฐ๋ณธ๊ฐ’๊ณผ ๋‹ค๋ฅด๋ฉด ์ ์šฉ
699
- if preset[key] != DEFAULT_PARAMETERS.get(key):
700
- clamped_params[key] = preset[key]
701
- print(f" {key}: {preset[key]} (ํ”„๋ฆฌ์…‹)")
702
-
703
- # 10. ๋กœ๊น…
704
- self._log_parameters(clamped_params)
705
-
706
- print(f"\n โœ… ์™„๋ฃŒ!")
707
- print(f"{'='*60}\n")
708
-
709
- return self._convert_to_effect_chain_format(clamped_params)
710
-
711
- except Exception as e:
712
- print(f"\n โŒ ์‹คํŒจ: {e}")
713
- import traceback
714
- traceback.print_exc()
715
- params = DEFAULT_PARAMETERS.copy()
716
- params.update(self._apply_preset(processed_prompt))
717
- self._log_parameters(params)
718
- return self._convert_to_effect_chain_format(params)
719
-
720
- def _convert_to_effect_chain_format(self, params: Dict[str, float]) -> Dict[str, float]:
721
- """effect_chain.py ํ˜•์‹ (Q โ†’ q)"""
722
- result = {}
723
- for key, value in params.items():
724
- new_key = key.replace('.Q', '.q')
725
- result[new_key] = value
726
- return result
727
-
728
- def _log_parameters(self, params: Dict[str, float]):
729
- print(f"\n ๐Ÿ“‹ ์ตœ์ข… ํŒŒ๋ผ๋ฏธํ„ฐ:")
730
- print(f" [EQ Peak 1] freq={params.get('eq_peak1.params.freq',0):.0f}Hz, gain={params.get('eq_peak1.params.gain',0):.2f}dB, Q={params.get('eq_peak1.params.Q',0):.2f}")
731
- print(f" [EQ Peak 2] freq={params.get('eq_peak2.params.freq',0):.0f}Hz, gain={params.get('eq_peak2.params.gain',0):.2f}dB, Q={params.get('eq_peak2.params.Q',0):.2f}")
732
- print(f" [Low Shelf] freq={params.get('eq_lowshelf.params.freq',0):.0f}Hz, gain={params.get('eq_lowshelf.params.gain',0):.2f}dB")
733
- print(f" [High Shelf] freq={params.get('eq_highshelf.params.freq',0):.0f}Hz, gain={params.get('eq_highshelf.params.gain',0):.2f}dB")
734
- print(f" [Compressor] threshold={params.get('compressor.threshold_db',0):.1f}dB, ratio={params.get('compressor.ratio',0):.1f}")
735
- print(f" [Distortion] {params.get('distortion_amount',0):.4f}")
736
- print(f" [Delay] time={params.get('delay.delay_time',0):.3f}s, fb={params.get('delay.feedback',0):.2f}, mix={params.get('delay.mix',0):.2f}")
737
- print(f" [Reverb] room={params.get('reverb.room_size',0):.2f}, damp={params.get('reverb.damping',0):.2f}, wet={params.get('reverb.wet_level',0):.2f}, dry={params.get('reverb.dry_level',0):.2f}")
738
- print(f" [Wet Mix] {params.get('final_wet_mix',0):.2f}")