heybaeheef commited on
Commit
7a4005e
ยท
verified ยท
1 Parent(s): a6c03bb

Delete models/ai_effector.py

Browse files
Files changed (1) hide show
  1. models/ai_effector.py +0 -539
models/ai_effector.py DELETED
@@ -1,539 +0,0 @@
1
- """
2
- AI Effector - DiffVox LLM ๊ธฐ๋ฐ˜ ์ดํŽ™ํŠธ ํŒŒ๋ผ๋ฏธํ„ฐ ์˜ˆ์ธก
3
- ===================================================
4
- V2: ํ•™์Šต๊ณผ ๋™์ผํ•œ CLAP ์ธ์ฝ”๋” + ํ”„๋กฌํ”„ํŠธ ํ˜•์‹ ์‚ฌ์šฉ
5
- """
6
-
7
- import os
8
- import json
9
- import re
10
- import torch
11
- import numpy as np
12
- from typing import Dict, List, Optional, Any
13
- from pathlib import Path
14
- from datetime import datetime
15
- import warnings
16
-
17
- warnings.filterwarnings("ignore")
18
-
19
- # ๊ธฐ๋ณธ ํŒŒ๋ผ๋ฏธํ„ฐ (๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ ์‹œ ์‚ฌ์šฉ)
20
- DEFAULT_PARAMETERS = {
21
- "eq_peak1.params.freq": 1000.0,
22
- "eq_peak1.params.gain": 0.0,
23
- "eq_peak1.params.Q": 1.0, # ๋Œ€๋ฌธ์ž Q (ํ•™์Šต ๋ฐ์ดํ„ฐ์™€ ์ผ์น˜)
24
- "eq_peak2.params.freq": 4000.0,
25
- "eq_peak2.params.gain": 0.0,
26
- "eq_peak2.params.Q": 1.0,
27
- "eq_lowshelf.params.freq": 200.0,
28
- "eq_lowshelf.params.gain": 0.0,
29
- "eq_highshelf.params.freq": 8000.0,
30
- "eq_highshelf.params.gain": 0.0,
31
- "distortion_amount": 0.0,
32
- "delay.delay_time": 0.02,
33
- "delay.feedback": 0.3,
34
- "delay.mix": 0.2,
35
- "final_wet_mix": 0.5
36
- }
37
-
38
- # ์Šคํƒ€์ผ ํ”„๋ฆฌ์…‹ (AI ์—†์ด๋„ ์ž‘๋™)
39
- STYLE_PRESETS = {
40
- "warm": {
41
- "eq_lowshelf.params.gain": 3.0,
42
- "eq_highshelf.params.gain": -1.0,
43
- "distortion_amount": 0.05,
44
- },
45
- "bright": {
46
- "eq_highshelf.params.gain": 4.0,
47
- "eq_peak2.params.gain": 2.0,
48
- "eq_lowshelf.params.gain": -1.0,
49
- },
50
- "vintage": {
51
- "eq_lowshelf.params.gain": 2.0,
52
- "eq_highshelf.params.gain": -2.0,
53
- "distortion_amount": 0.1,
54
- "delay.mix": 0.15,
55
- },
56
- "modern": {
57
- "eq_peak1.params.gain": 2.0,
58
- "eq_peak2.params.gain": 3.0,
59
- "eq_highshelf.params.gain": 2.0,
60
- },
61
- "spacious": {
62
- "delay.delay_time": 0.05,
63
- "delay.feedback": 0.4,
64
- "delay.mix": 0.35,
65
- },
66
- "dry": {
67
- "final_wet_mix": 0.2,
68
- "delay.mix": 0.0,
69
- },
70
- "saturated": {
71
- "distortion_amount": 0.15,
72
- "eq_lowshelf.params.gain": 1.0,
73
- }
74
- }
75
-
76
-
77
- class CLAPAudioEncoder:
78
- """
79
- CLAP ๊ธฐ๋ฐ˜ ์˜ค๋””์˜ค ์ธ์ฝ”๋” (ํ•™์Šต ์‹œ์™€ ๋™์ผ)
80
- laion/larger_clap_music ๋ชจ๋ธ ์‚ฌ์šฉ, 512โ†’64 pooling
81
- """
82
-
83
- def __init__(self, output_dim: int = 64, model_name: str = "laion/larger_clap_music"):
84
- self.output_dim = output_dim
85
- self.model_name = model_name
86
- self.target_sr = 48000 # CLAP์€ 48kHz ์‚ฌ์šฉ
87
- self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
88
-
89
- self.model = None
90
- self.processor = None
91
- self._load_model()
92
-
93
- def _load_model(self):
94
- """CLAP ๋ชจ๋ธ ๋กœ๋“œ"""
95
- try:
96
- from transformers import ClapModel, ClapProcessor
97
-
98
- print(f"[CLAPEncoder] CLAP ๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘: {self.model_name}")
99
-
100
- self.processor = ClapProcessor.from_pretrained(self.model_name)
101
- self.model = ClapModel.from_pretrained(self.model_name)
102
- self.model = self.model.to(self.device)
103
- self.model.eval()
104
-
105
- print(f"[CLAPEncoder] โœ… CLAP ๋ชจ๋ธ ๋กœ๋“œ ์™„๋ฃŒ (512โ†’{self.output_dim} pooling)")
106
-
107
- except ImportError:
108
- print("[CLAPEncoder] โŒ transformers ๋ฏธ์„ค์น˜")
109
- print(" pip install transformers")
110
- except Exception as e:
111
- print(f"[CLAPEncoder] โŒ ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {e}")
112
- import traceback
113
- traceback.print_exc()
114
-
115
- def get_audio_features(self, audio_path: str) -> List[float]:
116
- """
117
- ์˜ค๋””์˜ค ํŒŒ์ผ์—์„œ 64์ฐจ์› ํŠน์ง• ๋ฒกํ„ฐ ์ถ”์ถœ (ํ•™์Šต๊ณผ ๋™์ผํ•œ ๋ฐฉ์‹)
118
- """
119
- if self.model is None:
120
- print("[CLAPEncoder] ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์Œ, ๋นˆ ํŠน์ง• ๋ฐ˜ํ™˜")
121
- return [0.0] * self.output_dim
122
-
123
- try:
124
- import librosa
125
-
126
- # 1. ์˜ค๋””์˜ค ๋กœ๋“œ (48kHz๋กœ ๋ฆฌ์ƒ˜ํ”Œ๋ง - CLAP ์š”๊ตฌ์‚ฌํ•ญ)
127
- audio, sr = librosa.load(audio_path, sr=self.target_sr, mono=True)
128
-
129
- # 2. CLAP ์ž…๋ ฅ ์ค€๋น„
130
- inputs = self.processor(
131
- audios=audio,
132
- sampling_rate=self.target_sr,
133
- return_tensors="pt",
134
- padding=True
135
- ).to(self.device)
136
-
137
- # 3. ํŠน์ง• ์ถ”์ถœ
138
- with torch.no_grad():
139
- outputs = self.model.get_audio_features(**inputs)
140
-
141
- # [1, 512] ํ˜•ํƒœ์˜ ํ…์„œ
142
- features_512 = outputs[0].cpu().numpy()
143
-
144
- # 4. 512 โ†’ 64 ์ฐจ์› ์ถ•์†Œ (ํ‰๊ท  ํ’€๋ง, ํ•™์Šต๊ณผ ๋™์ผ)
145
- features_64 = self._reduce_dimension(features_512)
146
-
147
- return features_64.tolist()
148
-
149
- except Exception as e:
150
- print(f"[CLAPEncoder] ํŠน์ง• ์ถ”์ถœ ์‹คํŒจ: {e}")
151
- import traceback
152
- traceback.print_exc()
153
- return [0.0] * self.output_dim
154
-
155
- def _reduce_dimension(self, features: np.ndarray) -> np.ndarray:
156
- """512์ฐจ์› โ†’ 64์ฐจ์› ํ‰๊ท  ํ’€๋ง (ํ•™์Šต๊ณผ ๋™์ผํ•œ ๋ฐฉ์‹)"""
157
- current_dim = len(features)
158
-
159
- if current_dim == self.output_dim:
160
- return features
161
-
162
- # ํ‰๊ท  ํ’€๋ง: 8๊ฐœ์”ฉ ๋ฌถ์–ด์„œ ํ‰๊ท  (512 / 64 = 8)
163
- pool_size = current_dim // self.output_dim
164
- remainder = current_dim % self.output_dim
165
-
166
- pooled = []
167
- idx = 0
168
- for i in range(self.output_dim):
169
- size = pool_size + (1 if i < remainder else 0)
170
- pooled.append(np.mean(features[idx:idx+size]))
171
- idx += size
172
-
173
- return np.array(pooled)
174
-
175
- def is_loaded(self) -> bool:
176
- return self.model is not None
177
-
178
-
179
- class AIEffector:
180
- """AI ๊ธฐ๋ฐ˜ ์ดํŽ™ํ„ฐ ํŒŒ๋ผ๋ฏธํ„ฐ ์˜ˆ์ธก (V2: ํ•™์Šต๊ณผ ๋™์ผํ•œ ์„ค์ •)"""
181
-
182
- def __init__(
183
- self,
184
- model_repo_id: str = "heybaeheef/KU_SW_Academy",
185
- model_subfolder: str = "checkpoints",
186
- base_model_name: str = "Qwen/Qwen3-8B",
187
- audio_feature_dim: int = 64,
188
- use_huggingface: bool = True
189
- ):
190
- self.model_repo_id = model_repo_id
191
- self.model_subfolder = model_subfolder
192
- self.base_model_name = base_model_name
193
- self.audio_feature_dim = audio_feature_dim
194
- self.use_huggingface = use_huggingface
195
-
196
- self.model = None
197
- self.tokenizer = None
198
- self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
199
-
200
- # โ˜…โ˜…โ˜… ํ•ต์‹ฌ ์ˆ˜์ •: CLAP ์˜ค๋””์˜ค ์ธ์ฝ”๋” ์‚ฌ์šฉ (ํ•™์Šต๊ณผ ๋™์ผ) โ˜…โ˜…โ˜…
201
- print(f"[AIEffector] CLAP ์˜ค๋””์˜ค ์ธ์ฝ”๋” ์ดˆ๊ธฐํ™” ์ค‘...")
202
- self.audio_encoder = CLAPAudioEncoder(output_dim=audio_feature_dim)
203
-
204
- # ์š”์ฒญ ์นด์šดํ„ฐ
205
- self.request_count = 0
206
-
207
- # ๋ชจ๋ธ ๋กœ๋“œ ์‹œ๋„
208
- self._load_model()
209
-
210
- def _load_model(self):
211
- """๋ชจ๋ธ ๋กœ๋“œ"""
212
- try:
213
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
214
- from peft import PeftModel
215
-
216
- print(f"[AIEffector] ๋ฒ ์ด์Šค ๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘: {self.base_model_name}")
217
-
218
- # 4bit ์–‘์žํ™” ์„ค์ •
219
- if torch.cuda.is_available():
220
- bnb_config = BitsAndBytesConfig(
221
- load_in_4bit=True,
222
- bnb_4bit_quant_type="nf4",
223
- bnb_4bit_compute_dtype=torch.float16,
224
- bnb_4bit_use_double_quant=True
225
- )
226
- base_model = AutoModelForCausalLM.from_pretrained(
227
- self.base_model_name,
228
- quantization_config=bnb_config,
229
- device_map="auto",
230
- trust_remote_code=True
231
- )
232
- else:
233
- base_model = AutoModelForCausalLM.from_pretrained(
234
- self.base_model_name,
235
- torch_dtype=torch.float32,
236
- device_map="auto",
237
- trust_remote_code=True
238
- )
239
-
240
- self.tokenizer = AutoTokenizer.from_pretrained(
241
- self.base_model_name,
242
- trust_remote_code=True
243
- )
244
-
245
- if self.tokenizer.pad_token is None:
246
- self.tokenizer.pad_token = self.tokenizer.eos_token
247
-
248
- print(f"[AIEffector] LoRA ์–ด๋Œ‘ํ„ฐ ๋กœ๋”ฉ ์ค‘...")
249
-
250
- if self.use_huggingface:
251
- print(f"[AIEffector] HuggingFace์—์„œ LoRA ๋กœ๋”ฉ: {self.model_repo_id}/{self.model_subfolder}")
252
- self.model = PeftModel.from_pretrained(
253
- base_model,
254
- self.model_repo_id,
255
- subfolder=self.model_subfolder,
256
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
257
- )
258
- else:
259
- local_path = os.path.join(self.model_repo_id, self.model_subfolder)
260
- print(f"[AIEffector] ๋กœ์ปฌ์—์„œ LoRA ์–ด๋Œ‘ํ„ฐ ๋กœ๋”ฉ: {local_path}")
261
- self.model = PeftModel.from_pretrained(
262
- base_model,
263
- local_path,
264
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
265
- )
266
-
267
- self.model.eval()
268
- print(f"[AIEffector] โœ… ๋ชจ๋ธ ๋กœ๋“œ ์„ฑ๊ณต!")
269
-
270
- except Exception as e:
271
- print(f"[AIEffector] โŒ ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {e}")
272
- import traceback
273
- traceback.print_exc()
274
- print(f"[AIEffector] ํด๋ฐฑ ๋ชจ๋“œ๋กœ ์ „ํ™˜ (ํ”„๋ฆฌ์…‹ ๊ธฐ๋ฐ˜)")
275
- self.model = None
276
- self.tokenizer = None
277
-
278
- def is_loaded(self) -> bool:
279
- """๋ชจ๋ธ ๋กœ๋“œ ์—ฌ๋ถ€"""
280
- return self.model is not None
281
-
282
- def _apply_preset(self, prompt: str) -> Dict[str, float]:
283
- """ํ”„๋กฌํ”„ํŠธ์—์„œ ํ”„๋ฆฌ์…‹ ๋งค์นญ"""
284
- params = DEFAULT_PARAMETERS.copy()
285
- prompt_lower = prompt.lower()
286
-
287
- matched_presets = []
288
- for style_name, style_params in STYLE_PRESETS.items():
289
- if style_name in prompt_lower:
290
- params.update(style_params)
291
- matched_presets.append(style_name)
292
-
293
- if matched_presets:
294
- print(f" [Preset] ๋งค์นญ๋œ ํ”„๋ฆฌ์…‹: {matched_presets}")
295
-
296
- return params
297
-
298
- def _format_prompt(self, text_prompt: str, audio_features: List[float]) -> str:
299
- """
300
- โ˜…โ˜…โ˜… ํ•ต์‹ฌ ์ˆ˜์ •: ํ•™์Šต ์‹œ์™€ ๋™์ผํ•œ ํ”„๋กฌํ”„ํŠธ ํ˜•์‹ ์‚ฌ์šฉ โ˜…โ˜…โ˜…
301
- train_model.py์˜ 243-246์ค„๊ณผ ๋™์ผํ•œ ํ˜•์‹
302
- """
303
- audio_state_str = json.dumps(audio_features)
304
-
305
- # ํ•™์Šต ์‹œ์™€ ์™„์ „ํžˆ ๋™์ผํ•œ ํ˜•์‹!
306
- prompt = f"""Task: Convert text to audio parameters.
307
- Audio: {audio_state_str}
308
- Text: {text_prompt}
309
- Parameters:"""
310
-
311
- return prompt
312
-
313
- def _parse_output(self, output_text: str) -> Dict[str, float]:
314
- """LLM ์ถœ๋ ฅ์—์„œ ํŒŒ๋ผ๋ฏธํ„ฐ ์ถ”์ถœ (ํ–ฅ์ƒ๋œ ๋ฒ„์ „)"""
315
-
316
- print(f" [Parse] Raw output ๊ธธ์ด: {len(output_text)} ๋ฌธ์ž")
317
-
318
- try:
319
- text = output_text
320
-
321
- # 1. <think>...</think> ํƒœ๊ทธ ์ œ๊ฑฐ (Qwen3 thinking mode)
322
- text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
323
-
324
- # 2. ๋งˆํฌ๋‹ค์šด ์ฝ”๋“œ๋ธ”๋ก ์ถ”์ถœ
325
- code_block_match = re.search(r'```(?:json)?\s*([\s\S]*?)```', text)
326
- if code_block_match:
327
- text = code_block_match.group(1)
328
- print(f" [Parse] ์ฝ”๋“œ๋ธ”๋ก์—์„œ JSON ์ถ”์ถœ")
329
-
330
- # 3. JSON ๊ฐ์ฒด ์ฐพ๊ธฐ (์ค‘์ฒฉ ๋ธŒ๋ ˆ์ด์Šค ์ง€์›)
331
- json_str = self._extract_json_object(text)
332
-
333
- if json_str:
334
- print(f" [Parse] ์ถ”์ถœ๋œ JSON (์ฒ˜์Œ 200์ž):\n{json_str[:200]}...")
335
-
336
- # 4. JSON ์ „์ฒ˜๋ฆฌ
337
- json_str = self._preprocess_json(json_str)
338
-
339
- # 5. ํŒŒ์‹ฑ ์‹œ๋„
340
- params = json.loads(json_str)
341
-
342
- # 6. ๊ฒฐ๊ณผ ๊ฒ€์ฆ ๋ฐ ๋งคํ•‘
343
- result = DEFAULT_PARAMETERS.copy()
344
- for key, value in params.items():
345
- # ํ‚ค ์ •๊ทœํ™” (๋Œ€์†Œ๋ฌธ์ž ์ฒ˜๋ฆฌ)
346
- normalized_key = self._normalize_key(key)
347
- if normalized_key in result:
348
- try:
349
- result[normalized_key] = float(value)
350
- except (ValueError, TypeError):
351
- pass
352
-
353
- print(f" [Parse] โœ… ํŒŒ์‹ฑ ์„ฑ๊ณต! {len(params)}๊ฐœ ํŒŒ๋ผ๋ฏธํ„ฐ ์ถ”์ถœ")
354
- return result
355
- else:
356
- print(f" [Parse] โŒ JSON ๊ฐ์ฒด๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Œ")
357
-
358
- except json.JSONDecodeError as e:
359
- print(f" [Parse] โŒ JSON ํŒŒ์‹ฑ ์—๋Ÿฌ: {e}")
360
- if json_str:
361
- print(f" [Parse] ๋ฌธ์ œ ์œ„์น˜ ๊ทผ์ฒ˜: ...{json_str[max(0, e.pos-20):e.pos+20]}...")
362
- except Exception as e:
363
- print(f" [Parse] โŒ ์˜ˆ์™ธ ๋ฐœ์ƒ: {e}")
364
-
365
- print(f" [Parse] โš ๏ธ ๊ธฐ๋ณธ๊ฐ’์œผ๋กœ ํด๋ฐฑ")
366
- return DEFAULT_PARAMETERS.copy()
367
-
368
- def _normalize_key(self, key: str) -> str:
369
- """ํŒŒ๋ผ๋ฏธํ„ฐ ํ‚ค ์ •๊ทœํ™” (๋Œ€์†Œ๋ฌธ์ž ์ฒ˜๋ฆฌ)"""
370
- # Q/q ์ •๊ทœํ™”
371
- if key.endswith('.q'):
372
- return key[:-2] + '.Q'
373
- return key
374
-
375
- def _extract_json_object(self, text: str) -> Optional[str]:
376
- """ํ…์ŠคํŠธ์—์„œ JSON ๊ฐ์ฒด ์ถ”์ถœ (์ค‘์ฒฉ ๋ธŒ๋ ˆ์ด์Šค ์ง€์›)"""
377
- start = text.find('{')
378
- if start == -1:
379
- return None
380
-
381
- depth = 0
382
- for i, char in enumerate(text[start:], start):
383
- if char == '{':
384
- depth += 1
385
- elif char == '}':
386
- depth -= 1
387
- if depth == 0:
388
- return text[start:i+1]
389
-
390
- return None
391
-
392
- def _preprocess_json(self, json_str: str) -> str:
393
- """JSON ๋ฌธ์ž์—ด ์ „์ฒ˜๋ฆฌ"""
394
- # Trailing comma ์ œ๊ฑฐ
395
- json_str = re.sub(r',(\s*[}\]])', r'\1', json_str)
396
-
397
- # NaN, Infinity ์ฒ˜๋ฆฌ
398
- json_str = re.sub(r'\bNaN\b', '0', json_str)
399
- json_str = re.sub(r'\bInfinity\b', '999999', json_str)
400
- json_str = re.sub(r'-Infinity\b', '-999999', json_str)
401
-
402
- return json_str
403
-
404
- def predict(self, audio_path: str, text_prompt: str = "") -> Dict[str, float]:
405
- """ํŒŒ๋ผ๋ฏธํ„ฐ ์˜ˆ์ธก"""
406
-
407
- self.request_count += 1
408
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
409
-
410
- print(f"\n{'='*60}")
411
- print(f"[AIEffector] ๐ŸŽต ์š”์ฒญ #{self.request_count} - {timestamp}")
412
- print(f"{'='*60}")
413
- print(f" ๐Ÿ“‚ ์˜ค๋””์˜ค ํŒŒ์ผ: {Path(audio_path).name}")
414
- print(f" ๐Ÿ’ฌ ํ…์ŠคํŠธ ํ”„๋กฌํ”„ํŠธ: '{text_prompt}'")
415
- print(f" ๐Ÿค– ๋ชจ๋ธ ์ƒํƒœ: {'AI ๋ชจ๋“œ' if self.is_loaded() else 'ํ”„๋ฆฌ์…‹ ๋ชจ๋“œ'}")
416
- print(f" ๐ŸŽง ์ธ์ฝ”๋”: CLAP (ํ•™์Šต๊ณผ ๋™์ผ)")
417
-
418
- # ๋ชจ๋ธ์ด ์—†์œผ๋ฉด ํ”„๋ฆฌ์…‹ ์‚ฌ์šฉ
419
- if not self.is_loaded():
420
- print(f"\n โš ๏ธ AI ๋ชจ๋ธ ๋ฏธ๋กœ๋“œ - ํ”„๋ฆฌ์…‹ ๋ชจ๋“œ ์‚ฌ์šฉ")
421
- params = self._apply_preset(text_prompt)
422
- self._log_parameters(params)
423
- return self._convert_to_effect_chain_format(params)
424
-
425
- try:
426
- # 1. CLAP ์˜ค๋””์˜ค ํŠน์ง• ์ถ”์ถœ (ํ•™์Šต๊ณผ ๋™์ผ)
427
- print(f"\n ๐Ÿ“Š [Step 1] CLAP ์˜ค๋””์˜ค ํŠน์ง• ์ถ”์ถœ ์ค‘...")
428
- audio_features = self.audio_encoder.get_audio_features(audio_path)
429
-
430
- if not audio_features or all(f == 0 for f in audio_features):
431
- print(f" โš ๏ธ ํŠน์ง• ์ถ”์ถœ ์‹คํŒจ, ํ”„๋ฆฌ์…‹์œผ๋กœ ํด๋ฐฑ")
432
- params = self._apply_preset(text_prompt)
433
- self._log_parameters(params)
434
- return self._convert_to_effect_chain_format(params)
435
-
436
- print(f" โœ… {len(audio_features)}์ฐจ์› ํŠน์ง• ์ถ”์ถœ ์™„๋ฃŒ")
437
- print(f" - ํŠน์ง• ๋ฒกํ„ฐ (์ฒ˜์Œ 8๊ฐœ): {[round(v, 3) for v in audio_features[:8]]}")
438
-
439
- # 2. LLM ํ”„๋กฌํ”„ํŠธ ์ƒ์„ฑ (ํ•™์Šต๊ณผ ๋™์ผํ•œ ํ˜•์‹)
440
- print(f"\n ๐Ÿ”ค [Step 2] LLM ํ”„๋กฌํ”„ํŠธ ์ƒ์„ฑ ์ค‘ (ํ•™์Šต ํ˜•์‹)...")
441
- prompt = self._format_prompt(text_prompt, audio_features)
442
- print(f" - ํ”„๋กฌํ”„ํŠธ ๊ธธ์ด: {len(prompt)} ๋ฌธ์ž")
443
-
444
- # 3. ํ† ํฐํ™”
445
- print(f"\n ๐Ÿ”ข [Step 3] ํ† ํฐํ™” ์ค‘...")
446
- inputs = self.tokenizer(
447
- prompt,
448
- return_tensors="pt",
449
- truncation=True,
450
- max_length=1500 # ํ•™์Šต ์‹œ์™€ ๋™์ผ
451
- ).to(self.device)
452
- print(f" - ์ž…๋ ฅ ํ† ํฐ ์ˆ˜: {inputs['input_ids'].shape[1]}")
453
-
454
- # 4. LLM ์ƒ์„ฑ
455
- print(f"\n ๐Ÿง  [Step 4] LLM ์ถ”๋ก  ์ค‘...")
456
- import time
457
- start_time = time.time()
458
-
459
- with torch.no_grad():
460
- outputs = self.model.generate(
461
- **inputs,
462
- max_new_tokens=500,
463
- do_sample=False,
464
- temperature=0.1,
465
- pad_token_id=self.tokenizer.pad_token_id,
466
- eos_token_id=self.tokenizer.eos_token_id,
467
- )
468
-
469
- inference_time = time.time() - start_time
470
- print(f" - ์ถ”๋ก  ์‹œ๊ฐ„: {inference_time:.2f}์ดˆ")
471
-
472
- # 5. ๋””์ฝ”๋”ฉ (์ƒ์„ฑ๋œ ๋ถ€๋ถ„๋งŒ)
473
- print(f"\n ๐Ÿ“ [Step 5] ์ถœ๋ ฅ ๋””์ฝ”๋”ฉ ์ค‘...")
474
- generated_tokens = outputs[0][inputs['input_ids'].shape[1]:]
475
- output_text = self.tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
476
-
477
- print(f" - LLM ์ถœ๋ ฅ (์ฒ˜์Œ 300์ž):\n{output_text[:300]}")
478
-
479
- # 6. ํŒŒ์‹ฑ
480
- print(f"\n ๐Ÿ”ง [Step 6] ํŒŒ๋ผ๋ฏธํ„ฐ ํŒŒ์‹ฑ ์ค‘...")
481
- params = self._parse_output(output_text)
482
-
483
- # 7. ๊ฒฐ๊ณผ ๋กœ๊น…
484
- self._log_parameters(params)
485
-
486
- print(f"\n โœ… AI ์˜ˆ์ธก ์™„๋ฃŒ!")
487
- print(f"{'='*60}\n")
488
-
489
- # effect_chain.py ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜
490
- return self._convert_to_effect_chain_format(params)
491
-
492
- except Exception as e:
493
- print(f"\n โŒ ์˜ˆ์ธก ์‹คํŒจ: {e}")
494
- import traceback
495
- traceback.print_exc()
496
- print(f" โš ๏ธ ํ”„๋ฆฌ์…‹์œผ๋กœ ํด๋ฐฑ...")
497
- params = self._apply_preset(text_prompt)
498
- self._log_parameters(params)
499
- return self._convert_to_effect_chain_format(params)
500
-
501
- def _convert_to_effect_chain_format(self, params: Dict[str, float]) -> Dict[str, float]:
502
- """
503
- ํ•™์Šต ๋ฐ์ดํ„ฐ ํ˜•์‹ โ†’ effect_chain.py ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜
504
- ์ฃผ๋กœ Q/q ๋Œ€์†Œ๋ฌธ์ž ์ฒ˜๋ฆฌ
505
- """
506
- result = {}
507
- for key, value in params.items():
508
- # Q โ†’ q ๋ณ€ํ™˜ (effect_chain.py๋Š” ์†Œ๋ฌธ์ž q ์‚ฌ์šฉ)
509
- new_key = key.replace('.Q', '.q')
510
- result[new_key] = value
511
- return result
512
-
513
- def _log_parameters(self, params: Dict[str, float]):
514
- """์˜ˆ์ธก๋œ ํŒŒ๋ผ๋ฏธํ„ฐ ๋กœ๊น…"""
515
- print(f"\n ๐Ÿ“‹ ์˜ˆ์ธก๋œ ํŒŒ๋ผ๋ฏธํ„ฐ:")
516
- print(f" [EQ Peak 1]")
517
- print(f" - Freq: {params.get('eq_peak1.params.freq', 0):.1f} Hz")
518
- print(f" - Gain: {params.get('eq_peak1.params.gain', 0):.2f} dB")
519
- print(f" - Q: {params.get('eq_peak1.params.Q', params.get('eq_peak1.params.q', 0)):.2f}")
520
-
521
- print(f" [EQ Peak 2]")
522
- print(f" - Freq: {params.get('eq_peak2.params.freq', 0):.1f} Hz")
523
- print(f" - Gain: {params.get('eq_peak2.params.gain', 0):.2f} dB")
524
- print(f" - Q: {params.get('eq_peak2.params.Q', params.get('eq_peak2.params.q', 0)):.2f}")
525
-
526
- print(f" [Low Shelf]")
527
- print(f" - Freq: {params.get('eq_lowshelf.params.freq', 0):.1f} Hz")
528
- print(f" - Gain: {params.get('eq_lowshelf.params.gain', 0):.2f} dB")
529
-
530
- print(f" [High Shelf]")
531
- print(f" - Freq: {params.get('eq_highshelf.params.freq', 0):.1f} Hz")
532
- print(f" - Gain: {params.get('eq_highshelf.params.gain', 0):.2f} dB")
533
-
534
- print(f" [Effects]")
535
- print(f" - Distortion: {params.get('distortion_amount', 0):.3f}")
536
- print(f" - Delay Time: {params.get('delay.delay_time', 0):.3f}s")
537
- print(f" - Delay Feedback: {params.get('delay.feedback', 0):.2f}")
538
- print(f" - Delay Mix: {params.get('delay.mix', 0):.2f}")
539
- print(f" - Final Wet Mix: {params.get('final_wet_mix', 0):.2f}")