heybaeheef commited on
Commit
23c5882
ยท
verified ยท
1 Parent(s): 465c46b

Delete models/ai_effector.py

Browse files
Files changed (1) hide show
  1. models/ai_effector.py +0 -692
models/ai_effector.py DELETED
@@ -1,692 +0,0 @@
1
- """
2
- AI Effector - DiffVox LLM ๊ธฐ๋ฐ˜ ์ดํŽ™ํŠธ ํŒŒ๋ผ๋ฏธํ„ฐ ์˜ˆ์ธก
3
- ===================================================
4
- V6: Compressor/Reverb ํŒŒ๋ผ๋ฏธํ„ฐ ์ถ”๊ฐ€
5
- - freq: MinMax(min, max) ๋ณ€ํ™˜
6
- - Q: MinMax(min, max) ๋ณ€ํ™˜
7
- - delay.feedback, delay.mix: sigmoid
8
- - distortion_amount: sigmoid * 0.1
9
- - final_wet_mix: sigmoid
10
- - Compressor/Reverb: ํ”„๋ฆฌ์…‹ ๊ธฐ๋ฐ˜ (ํ•™์Šต๋˜์ง€ ์•Š์Œ)
11
- """
12
-
13
- import os
14
- import json
15
- import re
16
- import math
17
- import torch
18
- import numpy as np
19
- from typing import Dict, List, Optional, Any, Tuple
20
- from pathlib import Path
21
- from datetime import datetime
22
- import warnings
23
-
24
- warnings.filterwarnings("ignore")
25
-
26
-
27
- def sigmoid(x: float) -> float:
28
- """์‹œ๊ทธ๋ชจ์ด๋“œ ํ•จ์ˆ˜"""
29
- try:
30
- return 1 / (1 + math.exp(-x))
31
- except OverflowError:
32
- return 0.0 if x < 0 else 1.0
33
-
34
-
35
- def minmax_transform(raw: float, min_val: float, max_val: float) -> float:
36
- """MinMax ๋ณ€ํ™˜: sigmoid(raw) * (max - min) + min"""
37
- return sigmoid(raw) * (max_val - min_val) + min_val
38
-
39
-
40
- # =====================================================
41
- # fx.py์—์„œ ๊ฐ€์ ธ์˜จ ํŒŒ๋ผ๋ฏธํ„ฐ ๋ฒ”์œ„ (์ •ํ™•ํ•œ ๊ฐ’!)
42
- # =====================================================
43
- PARAM_TRANSFORMS = {
44
- # Peak EQ 1 & 2
45
- "eq_peak1.params.freq": {"type": "minmax", "min": 33.0, "max": 17500.0},
46
- "eq_peak1.params.Q": {"type": "minmax", "min": 0.2, "max": 20.0},
47
- "eq_peak1.params.gain": {"type": "none"},
48
-
49
- "eq_peak2.params.freq": {"type": "minmax", "min": 33.0, "max": 17500.0},
50
- "eq_peak2.params.Q": {"type": "minmax", "min": 0.2, "max": 20.0},
51
- "eq_peak2.params.gain": {"type": "none"},
52
-
53
- # LowShelf
54
- "eq_lowshelf.params.freq": {"type": "minmax", "min": 30.0, "max": 200.0},
55
- "eq_lowshelf.params.gain": {"type": "none"},
56
-
57
- # HighShelf
58
- "eq_highshelf.params.freq": {"type": "minmax", "min": 2500.0, "max": 16000.0},
59
- "eq_highshelf.params.gain": {"type": "none"},
60
-
61
- # Delay
62
- "delay.delay_time": {"type": "none"},
63
- "delay.feedback": {"type": "sigmoid"},
64
- "delay.mix": {"type": "sigmoid"},
65
-
66
- # Distortion
67
- "distortion_amount": {"type": "sigmoid_scale", "scale": 0.1},
68
-
69
- # Wet Mix
70
- "final_wet_mix": {"type": "sigmoid"},
71
- }
72
-
73
- # =====================================================
74
- # ๊ธฐ๋ณธ ํŒŒ๋ผ๋ฏธํ„ฐ (V6: Compressor/Reverb ์ถ”๊ฐ€)
75
- # =====================================================
76
- DEFAULT_PARAMETERS = {
77
- # EQ
78
- "eq_peak1.params.freq": 1000.0,
79
- "eq_peak1.params.gain": 0.0,
80
- "eq_peak1.params.Q": 1.0,
81
- "eq_peak2.params.freq": 4000.0,
82
- "eq_peak2.params.gain": 0.0,
83
- "eq_peak2.params.Q": 1.0,
84
- "eq_lowshelf.params.freq": 115.0,
85
- "eq_lowshelf.params.gain": 0.0,
86
- "eq_highshelf.params.freq": 8000.0,
87
- "eq_highshelf.params.gain": 0.0,
88
-
89
- # Compressor (ํ•™์Šต๋˜์ง€ ์•Š์Œ - ํ”„๋ฆฌ์…‹ ๊ธฐ๋ฐ˜)
90
- "compressor.threshold": -18.0,
91
- "compressor.ratio": 2.0,
92
-
93
- # Distortion
94
- "distortion_amount": 0.0,
95
-
96
- # Delay
97
- "delay.delay_time": 0.02,
98
- "delay.feedback": 0.3,
99
- "delay.mix": 0.2,
100
-
101
- # Reverb (ํ•™์Šต๋˜์ง€ ์•Š์Œ - ํ”„๋ฆฌ์…‹ ๊ธฐ๋ฐ˜)
102
- "reverb.room_size": 0.3,
103
- "reverb.damping": 0.5,
104
- "reverb.wet_level": 0.0,
105
- "reverb.dry_level": 1.0,
106
-
107
- # Master
108
- "final_wet_mix": 0.5
109
- }
110
-
111
- # ํŒŒ๋ผ๋ฏธํ„ฐ ๋ฒ”์œ„ ์ œํ•œ
112
- PARAM_RANGES = {
113
- "eq_peak1.params.freq": (33.0, 17500.0),
114
- "eq_peak1.params.gain": (-12.0, 12.0),
115
- "eq_peak1.params.Q": (0.2, 20.0),
116
- "eq_peak2.params.freq": (33.0, 17500.0),
117
- "eq_peak2.params.gain": (-12.0, 12.0),
118
- "eq_peak2.params.Q": (0.2, 20.0),
119
- "eq_lowshelf.params.freq": (30.0, 200.0),
120
- "eq_lowshelf.params.gain": (-12.0, 12.0),
121
- "eq_highshelf.params.freq": (2500.0, 16000.0),
122
- "eq_highshelf.params.gain": (-12.0, 12.0),
123
- "compressor.threshold": (-40.0, 0.0),
124
- "compressor.ratio": (1.0, 20.0),
125
- "distortion_amount": (0.0, 0.1),
126
- "delay.delay_time": (0.01, 1.0),
127
- "delay.feedback": (0.0, 0.95),
128
- "delay.mix": (0.0, 1.0),
129
- "reverb.room_size": (0.0, 1.0),
130
- "reverb.damping": (0.0, 1.0),
131
- "reverb.wet_level": (0.0, 1.0),
132
- "reverb.dry_level": (0.0, 1.0),
133
- "final_wet_mix": (0.0, 1.0),
134
- }
135
-
136
- # ๋™์˜์–ด ๋งคํ•‘
137
- SYNONYM_MAP = {
138
- "calm": "warm soft",
139
- "relaxed": "warm soft",
140
- "chill": "warm soft",
141
- "smooth": "warm",
142
- "mellow": "warm soft",
143
- "breezy": "bright spacious",
144
- "airy": "bright spacious",
145
- "light": "bright",
146
- "crisp": "bright",
147
- "clean": "bright",
148
- "dreamy": "warm spacious",
149
- "ethereal": "bright spacious",
150
- "atmospheric": "spacious",
151
- "ambient": "spacious warm",
152
- "aggressive": "saturated bright",
153
- "powerful": "saturated",
154
- "punchy": "saturated bright",
155
- "hard": "saturated",
156
- "gritty": "saturated dark",
157
- "soft": "warm",
158
- "harsh": "bright saturated",
159
- "muddy": "dark",
160
- "thin": "bright",
161
- "thick": "warm dark",
162
- "full": "warm",
163
- "reverb": "spacious",
164
- "echo": "spacious",
165
- "wet": "spacious",
166
- }
167
-
168
- # =====================================================
169
- # ์Šคํƒ€์ผ ํ”„๋ฆฌ์…‹ (V6: Compressor/Reverb ํฌํ•จ)
170
- # =====================================================
171
- STYLE_PRESETS = {
172
- "warm": {
173
- "compressor.threshold": -15.0,
174
- "compressor.ratio": 3.0,
175
- "reverb.room_size": 0.2,
176
- "reverb.wet_level": 0.1,
177
- "reverb.dry_level": 0.9,
178
- },
179
- "bright": {
180
- "compressor.threshold": -12.0,
181
- "compressor.ratio": 2.5,
182
- "reverb.room_size": 0.15,
183
- "reverb.wet_level": 0.08,
184
- "reverb.dry_level": 0.92,
185
- },
186
- "spacious": {
187
- "delay.delay_time": 0.05,
188
- "compressor.threshold": -18.0,
189
- "compressor.ratio": 2.0,
190
- "reverb.room_size": 0.6,
191
- "reverb.wet_level": 0.35,
192
- "reverb.dry_level": 0.65,
193
- },
194
- "dark": {
195
- "compressor.threshold": -20.0,
196
- "compressor.ratio": 2.5,
197
- "reverb.room_size": 0.4,
198
- "reverb.wet_level": 0.2,
199
- "reverb.dry_level": 0.8,
200
- },
201
- "saturated": {
202
- "compressor.threshold": -10.0,
203
- "compressor.ratio": 4.0,
204
- "reverb.room_size": 0.1,
205
- "reverb.wet_level": 0.05,
206
- "reverb.dry_level": 0.95,
207
- },
208
- "soft": {
209
- "compressor.threshold": -22.0,
210
- "compressor.ratio": 1.5,
211
- "reverb.room_size": 0.3,
212
- "reverb.wet_level": 0.15,
213
- "reverb.dry_level": 0.85,
214
- },
215
- }
216
-
217
-
218
- class CLAPAudioEncoder:
219
- """CLAP ๊ธฐ๋ฐ˜ ์˜ค๋””์˜ค ์ธ์ฝ”๋”"""
220
-
221
- def __init__(self, output_dim: int = 64, model_name: str = "laion/larger_clap_music"):
222
- self.output_dim = output_dim
223
- self.model_name = model_name
224
- self.target_sr = 48000
225
- self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
226
-
227
- self.model = None
228
- self.processor = None
229
- self._load_model()
230
-
231
- def _load_model(self):
232
- try:
233
- from transformers import ClapModel, ClapProcessor
234
-
235
- print(f"[CLAPEncoder] CLAP ๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘: {self.model_name}")
236
-
237
- self.processor = ClapProcessor.from_pretrained(self.model_name)
238
- self.model = ClapModel.from_pretrained(self.model_name)
239
- self.model = self.model.to(self.device)
240
- self.model.eval()
241
-
242
- print(f"[CLAPEncoder] โœ… CLAP ๋ชจ๋ธ ๋กœ๋“œ ์™„๋ฃŒ")
243
-
244
- except Exception as e:
245
- print(f"[CLAPEncoder] โŒ ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {e}")
246
-
247
- def get_audio_features(self, audio_path: str) -> List[float]:
248
- if self.model is None:
249
- return [0.0] * self.output_dim
250
-
251
- try:
252
- import librosa
253
-
254
- audio, sr = librosa.load(audio_path, sr=self.target_sr, mono=True)
255
-
256
- inputs = self.processor(
257
- audios=audio,
258
- sampling_rate=self.target_sr,
259
- return_tensors="pt",
260
- padding=True
261
- ).to(self.device)
262
-
263
- with torch.no_grad():
264
- outputs = self.model.get_audio_features(**inputs)
265
-
266
- features_512 = outputs[0].cpu().numpy()
267
- features_64 = self._reduce_dimension(features_512)
268
-
269
- return features_64.tolist()
270
-
271
- except Exception as e:
272
- print(f"[CLAPEncoder] ํŠน์ง• ์ถ”์ถœ ์‹คํŒจ: {e}")
273
- return [0.0] * self.output_dim
274
-
275
- def _reduce_dimension(self, features: np.ndarray) -> np.ndarray:
276
- current_dim = len(features)
277
- if current_dim == self.output_dim:
278
- return features
279
-
280
- pool_size = current_dim // self.output_dim
281
- remainder = current_dim % self.output_dim
282
-
283
- pooled = []
284
- idx = 0
285
- for i in range(self.output_dim):
286
- size = pool_size + (1 if i < remainder else 0)
287
- pooled.append(np.mean(features[idx:idx+size]))
288
- idx += size
289
-
290
- return np.array(pooled)
291
-
292
- def is_loaded(self) -> bool:
293
- return self.model is not None
294
-
295
-
296
- class AIEffector:
297
- """AI ๊ธฐ๋ฐ˜ ์ดํŽ™ํ„ฐ ํŒŒ๋ผ๋ฏธํ„ฐ ์˜ˆ์ธก (V6)"""
298
-
299
- def __init__(
300
- self,
301
- model_repo_id: str = "heybaeheef/KU_SW_Academy",
302
- model_subfolder: str = "checkpoints",
303
- base_model_name: str = "Qwen/Qwen3-8B",
304
- audio_feature_dim: int = 64,
305
- use_huggingface: bool = True
306
- ):
307
- self.model_repo_id = model_repo_id
308
- self.model_subfolder = model_subfolder
309
- self.base_model_name = base_model_name
310
- self.audio_feature_dim = audio_feature_dim
311
- self.use_huggingface = use_huggingface
312
-
313
- self.model = None
314
- self.tokenizer = None
315
- self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
316
-
317
- print(f"[AIEffector V6] CLAP ์ธ์ฝ”๋” ์ดˆ๊ธฐํ™”...")
318
- self.audio_encoder = CLAPAudioEncoder(output_dim=audio_feature_dim)
319
-
320
- self.request_count = 0
321
- self._load_model()
322
-
323
- def _load_model(self):
324
- try:
325
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
326
- from peft import PeftModel
327
-
328
- print(f"[AIEffector] ๋ฒ ์ด์Šค ๋ชจ๋ธ ๋กœ๋”ฉ: {self.base_model_name}")
329
-
330
- if torch.cuda.is_available():
331
- bnb_config = BitsAndBytesConfig(
332
- load_in_4bit=True,
333
- bnb_4bit_quant_type="nf4",
334
- bnb_4bit_compute_dtype=torch.float16,
335
- bnb_4bit_use_double_quant=True
336
- )
337
- base_model = AutoModelForCausalLM.from_pretrained(
338
- self.base_model_name,
339
- quantization_config=bnb_config,
340
- device_map="auto",
341
- trust_remote_code=True
342
- )
343
- else:
344
- base_model = AutoModelForCausalLM.from_pretrained(
345
- self.base_model_name,
346
- torch_dtype=torch.float32,
347
- device_map="auto",
348
- trust_remote_code=True
349
- )
350
-
351
- self.tokenizer = AutoTokenizer.from_pretrained(
352
- self.base_model_name,
353
- trust_remote_code=True
354
- )
355
-
356
- if self.tokenizer.pad_token is None:
357
- self.tokenizer.pad_token = self.tokenizer.eos_token
358
-
359
- print(f"[AIEffector] LoRA ์–ด๋Œ‘ํ„ฐ ๋กœ๋”ฉ...")
360
-
361
- if self.use_huggingface:
362
- self.model = PeftModel.from_pretrained(
363
- base_model,
364
- self.model_repo_id,
365
- subfolder=self.model_subfolder,
366
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
367
- )
368
- else:
369
- local_path = os.path.join(self.model_repo_id, self.model_subfolder)
370
- self.model = PeftModel.from_pretrained(
371
- base_model,
372
- local_path,
373
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
374
- )
375
-
376
- self.model.eval()
377
- print(f"[AIEffector] โœ… ๋ชจ๋ธ ๋กœ๋“œ ์„ฑ๊ณต!")
378
-
379
- except Exception as e:
380
- print(f"[AIEffector] โŒ ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {e}")
381
- import traceback
382
- traceback.print_exc()
383
- self.model = None
384
- self.tokenizer = None
385
-
386
- def is_loaded(self) -> bool:
387
- return self.model is not None
388
-
389
- def _preprocess_text(self, text: str) -> str:
390
- text_lower = text.lower()
391
- for synonym, replacement in SYNONYM_MAP.items():
392
- if synonym in text_lower:
393
- text_lower = text_lower.replace(synonym, replacement)
394
- print(f" [Synonym] '{synonym}' โ†’ '{replacement}'")
395
- return text_lower
396
-
397
- def _apply_preset(self, prompt: str) -> Dict[str, float]:
398
- """ํ”„๋ฆฌ์…‹ ์ ์šฉ - Compressor/Reverb ํŒŒ๋ผ๋ฏธํ„ฐ ์„ค์ •"""
399
- params = {}
400
- prompt_lower = prompt.lower()
401
-
402
- matched = []
403
- for style_name, style_params in STYLE_PRESETS.items():
404
- if style_name in prompt_lower:
405
- params.update(style_params)
406
- matched.append(style_name)
407
-
408
- if matched:
409
- print(f" [Preset] ๋งค์นญ: {matched}")
410
- else:
411
- # ๊ธฐ๋ณธ ํ”„๋ฆฌ์…‹ (๋งค์นญ ์—†์„ ๋•Œ)
412
- params.update(STYLE_PRESETS["warm"])
413
- print(f" [Preset] ๊ธฐ๋ณธ๊ฐ’ ์ ์šฉ: warm")
414
-
415
- return params
416
-
417
- def _format_prompt(self, text_prompt: str, audio_features: List[float]) -> str:
418
- audio_state_str = json.dumps(audio_features)
419
- return f"""Task: Convert text to audio parameters.
420
- Audio: {audio_state_str}
421
- Text: {text_prompt}
422
- Parameters:"""
423
-
424
- def _preprocess_json(self, json_str: str) -> str:
425
- json_str = re.sub(r'(\d)_(\d)', r'\1\2', json_str)
426
- json_str = re.sub(r',(\s*[}\]])', r'\1', json_str)
427
- json_str = re.sub(r'\bNaN\b', '0', json_str)
428
- json_str = re.sub(r'\bInfinity\b', '999999', json_str)
429
- json_str = re.sub(r'-Infinity\b', '-999999', json_str)
430
- return json_str
431
-
432
- def _normalize_key(self, key: str) -> str:
433
- key = re.sub(r'\.parametrizations\.(\w+)\.original', r'.\1', key)
434
- return key
435
-
436
- def _extract_json_object(self, text: str) -> Optional[str]:
437
- start = text.find('{')
438
- if start == -1:
439
- return None
440
-
441
- depth = 0
442
- for i, char in enumerate(text[start:], start):
443
- if char == '{':
444
- depth += 1
445
- elif char == '}':
446
- depth -= 1
447
- if depth == 0:
448
- return text[start:i+1]
449
- return None
450
-
451
- def _convert_raw_to_actual(self, params: Dict[str, float]) -> Dict[str, float]:
452
- """Raw ๊ฐ’์„ ์‹ค์ œ ๊ฐ’์œผ๏ฟฝ๏ฟฝ๏ฟฝ ๋ณ€ํ™˜"""
453
- result = params.copy()
454
-
455
- for key, transform in PARAM_TRANSFORMS.items():
456
- if key not in result:
457
- continue
458
-
459
- raw = result[key]
460
- transform_type = transform["type"]
461
-
462
- if transform_type == "none":
463
- actual = raw
464
-
465
- elif transform_type == "minmax":
466
- min_val = transform["min"]
467
- max_val = transform["max"]
468
- actual = minmax_transform(raw, min_val, max_val)
469
- print(f" [MinMax] {key}: {raw:.4f} โ†’ {actual:.2f} (range: {min_val}-{max_val})")
470
-
471
- elif transform_type == "sigmoid":
472
- actual = sigmoid(raw)
473
- print(f" [Sigmoid] {key}: {raw:.4f} โ†’ {actual:.4f}")
474
-
475
- elif transform_type == "sigmoid_scale":
476
- scale = transform["scale"]
477
- actual = sigmoid(raw) * scale
478
- print(f" [Sigmoid*{scale}] {key}: {raw:.4f} โ†’ {actual:.4f}")
479
-
480
- else:
481
- actual = raw
482
-
483
- result[key] = actual
484
-
485
- return result
486
-
487
- def _clamp_values(self, params: Dict[str, float]) -> Dict[str, float]:
488
- result = params.copy()
489
-
490
- for key, (min_val, max_val) in PARAM_RANGES.items():
491
- if key in result:
492
- original = result[key]
493
- clamped = max(min_val, min(max_val, original))
494
- if abs(clamped - original) > 0.001:
495
- print(f" [Clamp] {key}: {original:.4f} โ†’ {clamped:.4f}")
496
- result[key] = clamped
497
-
498
- return result
499
-
500
- def _parse_output(self, output_text: str) -> Dict[str, float]:
501
- """LLM ์ถœ๋ ฅ ํŒŒ์‹ฑ"""
502
-
503
- print(f" [Parse] Raw output ๊ธธ์ด: {len(output_text)} ๋ฌธ์ž")
504
-
505
- json_str = None
506
-
507
- try:
508
- text = output_text
509
-
510
- text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
511
-
512
- code_match = re.search(r'```(?:json)?\s*([\s\S]*?)```', text)
513
- if code_match:
514
- text = code_match.group(1)
515
-
516
- json_str = self._extract_json_object(text)
517
-
518
- if json_str:
519
- print(f" [Parse] JSON ๋ฐœ๊ฒฌ (๊ธธ์ด: {len(json_str)})")
520
-
521
- json_str = self._preprocess_json(json_str)
522
-
523
- raw_params = json.loads(json_str)
524
-
525
- result = DEFAULT_PARAMETERS.copy()
526
- parsed_count = 0
527
-
528
- for key, value in raw_params.items():
529
- try:
530
- norm_key = self._normalize_key(key)
531
- float_val = float(value)
532
-
533
- if norm_key in DEFAULT_PARAMETERS:
534
- result[norm_key] = float_val
535
- parsed_count += 1
536
- else:
537
- for default_key in DEFAULT_PARAMETERS.keys():
538
- norm_parts = norm_key.split('.')
539
- default_parts = default_key.split('.')
540
-
541
- if len(norm_parts) >= 3 and len(default_parts) >= 3:
542
- if norm_parts[0] == default_parts[0] and norm_parts[-1] == default_parts[-1]:
543
- result[default_key] = float_val
544
- parsed_count += 1
545
- break
546
-
547
- except (ValueError, TypeError) as e:
548
- print(f" [Parse] ๋ณ€ํ™˜ ์‹คํŒจ: {key}={value}")
549
-
550
- print(f" [Parse] โœ… {parsed_count}๊ฐœ ํŒŒ๋ผ๋ฏธํ„ฐ ๋งคํ•‘๋จ")
551
- return result
552
-
553
- except json.JSONDecodeError as e:
554
- print(f" [Parse] โŒ JSON ์—๋Ÿฌ: {e}")
555
- except Exception as e:
556
- print(f" [Parse] โŒ ์˜ˆ์™ธ: {e}")
557
-
558
- print(f" [Parse] โš ๏ธ ๊ธฐ๋ณธ๊ฐ’ ํด๋ฐฑ")
559
- return DEFAULT_PARAMETERS.copy()
560
-
561
- def predict(self, audio_path: str, text_prompt: str = "") -> Dict[str, float]:
562
- """ํŒŒ๋ผ๋ฏธํ„ฐ ์˜ˆ์ธก"""
563
-
564
- self.request_count += 1
565
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
566
-
567
- print(f"\n{'='*60}")
568
- print(f"[AIEffector V6] ๐ŸŽต ์š”์ฒญ #{self.request_count} - {timestamp}")
569
- print(f"{'='*60}")
570
- print(f" ๐Ÿ“‚ ์˜ค๋””์˜ค: {Path(audio_path).name}")
571
- print(f" ๐Ÿ’ฌ ์›๋ณธ: '{text_prompt}'")
572
-
573
- # ๋™์˜์–ด ๋ณ€ํ™˜
574
- processed_prompt = self._preprocess_text(text_prompt)
575
- if processed_prompt != text_prompt.lower():
576
- print(f" ๐Ÿ’ฌ ๋ณ€ํ™˜: '{processed_prompt}'")
577
-
578
- print(f" ๐Ÿค– ๋ชจ๋ธ: {'AI' if self.is_loaded() else 'ํ”„๋ฆฌ์…‹'}")
579
-
580
- # ๋ชจ๋ธ ์—†์œผ๋ฉด ํ”„๋ฆฌ์…‹
581
- if not self.is_loaded():
582
- print(f"\n โš ๏ธ AI ๋ชจ๋ธ ๋ฏธ๋กœ๋“œ")
583
- params = DEFAULT_PARAMETERS.copy()
584
- params.update(self._apply_preset(processed_prompt))
585
- self._log_parameters(params)
586
- return self._convert_to_effect_chain_format(params)
587
-
588
- try:
589
- # 1. CLAP ํŠน์ง• ์ถ”์ถœ
590
- print(f"\n ๐Ÿ“Š [Step 1] CLAP ํŠน์ง• ์ถ”์ถœ...")
591
- audio_features = self.audio_encoder.get_audio_features(audio_path)
592
-
593
- if not audio_features or all(f == 0 for f in audio_features):
594
- print(f" โš ๏ธ ์‹คํŒจ, ํ”„๋ฆฌ์…‹ ํด๋ฐฑ")
595
- params = DEFAULT_PARAMETERS.copy()
596
- params.update(self._apply_preset(processed_prompt))
597
- self._log_parameters(params)
598
- return self._convert_to_effect_chain_format(params)
599
-
600
- print(f" โœ… {len(audio_features)}์ฐจ์›")
601
-
602
- # 2. ํ”„๋กฌํ”„ํŠธ ์ƒ์„ฑ
603
- print(f"\n ๐Ÿ”ค [Step 2] ํ”„๋กฌํ”„ํŠธ ์ƒ์„ฑ...")
604
- prompt = self._format_prompt(processed_prompt, audio_features)
605
-
606
- # 3. ํ† ํฐํ™”
607
- print(f"\n ๐Ÿ”ข [Step 3] ํ† ํฐํ™”...")
608
- inputs = self.tokenizer(
609
- prompt,
610
- return_tensors="pt",
611
- truncation=False,
612
- ).to(self.device)
613
- print(f" ํ† ํฐ ์ˆ˜: {inputs['input_ids'].shape[1]}")
614
-
615
- # 4. LLM ์ƒ์„ฑ
616
- print(f"\n ๐Ÿง  [Step 4] LLM ์ถ”๋ก ...")
617
- import time
618
- start = time.time()
619
-
620
- with torch.no_grad():
621
- outputs = self.model.generate(
622
- **inputs,
623
- max_new_tokens=500,
624
- do_sample=False,
625
- temperature=0.1,
626
- pad_token_id=self.tokenizer.pad_token_id,
627
- eos_token_id=self.tokenizer.eos_token_id,
628
- )
629
-
630
- print(f" ์ถ”๋ก  ์‹œ๊ฐ„: {time.time()-start:.2f}์ดˆ")
631
-
632
- # 5. ๋””์ฝ”๋”ฉ
633
- print(f"\n ๐Ÿ“ [Step 5] ๋””์ฝ”๋”ฉ...")
634
- gen_tokens = outputs[0][inputs['input_ids'].shape[1]:]
635
- output_text = self.tokenizer.decode(gen_tokens, skip_special_tokens=True).strip()
636
- print(f" ์ถœ๋ ฅ (์ฒ˜์Œ 500์ž):\n{output_text[:500]}")
637
-
638
- # 6. ํŒŒ์‹ฑ
639
- print(f"\n ๐Ÿ”ง [Step 6] ํŒŒ์‹ฑ...")
640
- raw_params = self._parse_output(output_text)
641
-
642
- # 7. Raw โ†’ Actual ๋ณ€ํ™˜
643
- print(f"\n ๐Ÿ”„ [Step 7] Raw โ†’ Actual ๋ณ€ํ™˜...")
644
- actual_params = self._convert_raw_to_actual(raw_params)
645
-
646
- # 8. ๊ฐ’ ํด๋žจํ•‘
647
- print(f"\n ๐Ÿ“ [Step 8] ๊ฐ’ ํด๋žจํ•‘...")
648
- clamped_params = self._clamp_values(actual_params)
649
-
650
- # 9. ํ”„๋ฆฌ์…‹ ๋ณด์™„ (Compressor/Reverb - ํ•™์Šต๋˜์ง€ ์•Š์€ ํŒŒ๋ผ๋ฏธํ„ฐ)
651
- print(f"\n ๐ŸŽ›๏ธ [Step 9] ํ”„๋ฆฌ์…‹ ๋ณด์™„ (Compressor/Reverb)...")
652
- preset = self._apply_preset(processed_prompt)
653
- for key in preset:
654
- clamped_params[key] = preset[key]
655
- print(f" {key}: {preset[key]}")
656
-
657
- # 10. ๋กœ๊น…
658
- self._log_parameters(clamped_params)
659
-
660
- print(f"\n โœ… ์™„๋ฃŒ!")
661
- print(f"{'='*60}\n")
662
-
663
- return self._convert_to_effect_chain_format(clamped_params)
664
-
665
- except Exception as e:
666
- print(f"\n โŒ ์‹คํŒจ: {e}")
667
- import traceback
668
- traceback.print_exc()
669
- params = DEFAULT_PARAMETERS.copy()
670
- params.update(self._apply_preset(processed_prompt))
671
- self._log_parameters(params)
672
- return self._convert_to_effect_chain_format(params)
673
-
674
- def _convert_to_effect_chain_format(self, params: Dict[str, float]) -> Dict[str, float]:
675
- """effect_chain.py ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜ (Q โ†’ q)"""
676
- result = {}
677
- for key, value in params.items():
678
- new_key = key.replace('.Q', '.q')
679
- result[new_key] = value
680
- return result
681
-
682
- def _log_parameters(self, params: Dict[str, float]):
683
- print(f"\n ๐Ÿ“‹ ์ตœ์ข… ํŒŒ๋ผ๋ฏธํ„ฐ:")
684
- print(f" [EQ Peak 1] freq={params.get('eq_peak1.params.freq',0):.0f}Hz, gain={params.get('eq_peak1.params.gain',0):.2f}dB, Q={params.get('eq_peak1.params.Q',0):.2f}")
685
- print(f" [EQ Peak 2] freq={params.get('eq_peak2.params.freq',0):.0f}Hz, gain={params.get('eq_peak2.params.gain',0):.2f}dB, Q={params.get('eq_peak2.params.Q',0):.2f}")
686
- print(f" [Low Shelf] freq={params.get('eq_lowshelf.params.freq',0):.0f}Hz, gain={params.get('eq_lowshelf.params.gain',0):.2f}dB")
687
- print(f" [High Shelf] freq={params.get('eq_highshelf.params.freq',0):.0f}Hz, gain={params.get('eq_highshelf.params.gain',0):.2f}dB")
688
- print(f" [Compressor] threshold={params.get('compressor.threshold',-18):.1f}dB, ratio={params.get('compressor.ratio',2):.1f}")
689
- print(f" [Distortion] {params.get('distortion_amount',0):.4f}")
690
- print(f" [Delay] time={params.get('delay.delay_time',0):.3f}s, fb={params.get('delay.feedback',0):.2f}, mix={params.get('delay.mix',0):.2f}")
691
- print(f" [Reverb] room={params.get('reverb.room_size',0):.2f}, damp={params.get('reverb.damping',0):.2f}, wet={params.get('reverb.wet_level',0):.2f}, dry={params.get('reverb.dry_level',1):.2f}")
692
- print(f" [Wet Mix] {params.get('final_wet_mix',0):.2f}")