heybaeheef commited on
Commit
70de690
ยท
1 Parent(s): 5c6cdde

Add detailed logging

Browse files
Files changed (1) hide show
  1. models/ai_effector.py +179 -27
models/ai_effector.py CHANGED
@@ -1,6 +1,7 @@
1
  """
2
  AI Effector - DiffVox LLM ๊ธฐ๋ฐ˜ ์ดํŽ™ํŠธ ํŒŒ๋ผ๋ฏธํ„ฐ ์˜ˆ์ธก
3
  ===================================================
 
4
  """
5
 
6
  import os
@@ -10,6 +11,7 @@ import torch
10
  import numpy as np
11
  from typing import Dict, List, Optional, Any
12
  from pathlib import Path
 
13
  import warnings
14
 
15
  warnings.filterwarnings("ignore")
@@ -81,19 +83,25 @@ class AudioEncoder:
81
  self.output_dim = output_dim
82
  self.sr = 44100
83
 
84
- def get_audio_features(self, audio_path: str) -> List[float]:
85
- """์˜ค๋””์˜ค์—์„œ ํŠน์ง• ์ถ”์ถœ (๊ฐ„์†Œํ™” ๋ฒ„์ „)"""
86
  try:
87
  import librosa
88
 
89
  y, sr = librosa.load(audio_path, sr=self.sr, duration=5.0)
90
 
 
 
 
91
  # ๊ธฐ๋ณธ ํŠน์ง• ์ถ”์ถœ
92
  features = []
 
93
 
94
  # MFCC (20๊ฐœ)
95
  mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
96
- features.extend(np.mean(mfcc, axis=1).tolist())
 
 
97
 
98
  # Spectral features
99
  spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
@@ -101,18 +109,54 @@ class AudioEncoder:
101
  spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
102
 
103
  features.extend([spectral_centroid / 10000, spectral_bandwidth / 10000, spectral_rolloff / 10000])
 
 
 
104
 
105
  # RMS energy
106
  rms = np.mean(librosa.feature.rms(y=y))
107
  features.append(float(rms))
 
108
 
109
  # Zero crossing rate
110
  zcr = np.mean(librosa.feature.zero_crossing_rate(y))
111
  features.append(float(zcr))
 
112
 
113
  # Chroma (12๊ฐœ)
114
  chroma = librosa.feature.chroma_stft(y=y, sr=sr)
115
- features.extend(np.mean(chroma, axis=1).tolist())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
  # Pad or truncate to output_dim
118
  if len(features) < self.output_dim:
@@ -120,11 +164,21 @@ class AudioEncoder:
120
  else:
121
  features = features[:self.output_dim]
122
 
123
- return features
 
 
 
 
 
124
 
125
  except Exception as e:
126
- print(f"[AudioEncoder] ํŠน์ง• ์ถ”์ถœ ์‹คํŒจ: {e}")
127
- return [0.0] * self.output_dim
 
 
 
 
 
128
 
129
 
130
  class AIEffector:
@@ -151,13 +205,16 @@ class AIEffector:
151
  # ์˜ค๋””์˜ค ์ธ์ฝ”๋”
152
  self.audio_encoder = AudioEncoder(output_dim=audio_feature_dim)
153
 
 
 
 
154
  # ๋ชจ๋ธ ๋กœ๋“œ ์‹œ๋„
155
  self._load_model()
156
 
157
  def _load_model(self):
158
  """๋ชจ๋ธ ๋กœ๋“œ"""
159
  try:
160
- from transformers import AutoModelForCausalLM, AutoTokenizer
161
  from peft import PeftModel
162
 
163
  print(f"[AIEffector] ๋ชจ๋ธ ๋กœ๋”ฉ ์‹œ์ž‘...")
@@ -173,9 +230,24 @@ class AIEffector:
173
  if self.tokenizer.pad_token is None:
174
  self.tokenizer.pad_token = self.tokenizer.eos_token
175
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  # ๋ฒ ์ด์Šค ๋ชจ๋ธ ๋กœ๋“œ
177
  base_model = AutoModelForCausalLM.from_pretrained(
178
  self.base_model_name,
 
179
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
180
  device_map="auto" if torch.cuda.is_available() else None,
181
  trust_remote_code=True,
@@ -188,11 +260,10 @@ class AIEffector:
188
  self.model = PeftModel.from_pretrained(
189
  base_model,
190
  self.model_repo_id,
191
- subfolder=self.model_subfolder, # ํ•ต์‹ฌ ์ˆ˜์ •!
192
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
193
  )
194
  else:
195
- # ๋กœ์ปฌ ๊ฒฝ๋กœ ์‚ฌ์šฉ
196
  local_path = os.path.join(self.model_repo_id, self.model_subfolder)
197
  print(f"[AIEffector] ๋กœ์ปฌ์—์„œ LoRA ์–ด๋Œ‘ํ„ฐ ๋กœ๋”ฉ: {local_path}")
198
  self.model = PeftModel.from_pretrained(
@@ -219,15 +290,19 @@ class AIEffector:
219
  params = DEFAULT_PARAMETERS.copy()
220
  prompt_lower = prompt.lower()
221
 
 
222
  for style_name, style_params in STYLE_PRESETS.items():
223
  if style_name in prompt_lower:
224
  params.update(style_params)
 
 
 
 
225
 
226
  return params
227
 
228
  def _format_prompt(self, text_prompt: str, audio_features: List[float]) -> str:
229
  """LLM ์ž…๋ ฅ ํ”„๋กฌํ”„ํŠธ ํฌ๋งทํŒ…"""
230
- # ์˜ค๋””์˜ค ํŠน์ง•์„ ๊ฐ„๊ฒฐํ•˜๊ฒŒ ํ‘œํ˜„
231
  audio_summary = ", ".join([f"{v:.3f}" for v in audio_features[:8]])
232
 
233
  prompt = f"""You are an audio effect parameter predictor.
@@ -266,12 +341,10 @@ JSON output:"""
266
  def _parse_output(self, output_text: str) -> Dict[str, float]:
267
  """LLM ์ถœ๋ ฅ์—์„œ ํŒŒ๋ผ๋ฏธํ„ฐ ์ถ”์ถœ"""
268
  try:
269
- # JSON ๋ธ”๋ก ์ฐพ๊ธฐ
270
  json_match = re.search(r'\{[^{}]*\}', output_text, re.DOTALL)
271
  if json_match:
272
  params = json.loads(json_match.group())
273
 
274
- # ์œ ํšจ์„ฑ ๊ฒ€์‚ฌ ๋ฐ ๊ธฐ๋ณธ๊ฐ’ ๋ณ‘ํ•ฉ
275
  result = DEFAULT_PARAMETERS.copy()
276
  for key, value in params.items():
277
  if key in result and isinstance(value, (int, float)):
@@ -279,34 +352,66 @@ JSON output:"""
279
 
280
  return result
281
  except Exception as e:
282
- print(f"[AIEffector] ์ถœ๋ ฅ ํŒŒ์‹ฑ ์‹คํŒจ: {e}")
283
 
284
  return DEFAULT_PARAMETERS.copy()
285
 
286
  def predict(self, audio_path: str, text_prompt: str = "") -> Dict[str, float]:
287
- """ํŒŒ๋ผ๋ฏธํ„ฐ ์˜ˆ์ธก"""
 
 
 
 
 
 
 
 
 
 
288
 
289
  # ๋ชจ๋ธ์ด ์—†์œผ๋ฉด ํ”„๋ฆฌ์…‹ ์‚ฌ์šฉ
290
  if not self.is_loaded():
291
- print(f"[AIEffector] ํ”„๋ฆฌ์…‹ ๋ชจ๋“œ ์‚ฌ์šฉ (prompt: {text_prompt})")
292
- return self._apply_preset(text_prompt)
 
 
293
 
294
  try:
295
- # ์˜ค๋””์˜ค ํŠน์ง• ์ถ”์ถœ
296
- audio_features = self.audio_encoder.get_audio_features(audio_path)
 
 
 
 
 
 
 
 
 
 
 
 
297
 
298
- # ํ”„๋กฌํ”„ํŠธ ์ƒ์„ฑ
 
299
  prompt = self._format_prompt(text_prompt, audio_features)
 
300
 
301
- # ํ† ํฐํ™”
 
302
  inputs = self.tokenizer(
303
  prompt,
304
  return_tensors="pt",
305
  truncation=True,
306
  max_length=1024
307
  ).to(self.device)
 
 
 
 
 
 
308
 
309
- # ์ƒ์„ฑ
310
  with torch.no_grad():
311
  outputs = self.model.generate(
312
  **inputs,
@@ -316,15 +421,62 @@ JSON output:"""
316
  pad_token_id=self.tokenizer.pad_token_id
317
  )
318
 
319
- # ๋””์ฝ”๋”ฉ
 
 
 
 
 
320
  output_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
321
 
322
- # ํŒŒ์‹ฑ
 
 
 
 
 
 
323
  params = self._parse_output(output_text)
324
 
325
- print(f"[AIEffector] โœ… AI ์˜ˆ์ธก ์™„๋ฃŒ")
 
 
 
 
 
326
  return params
327
 
328
  except Exception as e:
329
- print(f"[AIEffector] ์˜ˆ์ธก ์‹คํŒจ: {e}, ํ”„๋ฆฌ์…‹์œผ๋กœ ํด๋ฐฑ")
330
- return self._apply_preset(text_prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
  AI Effector - DiffVox LLM ๊ธฐ๋ฐ˜ ์ดํŽ™ํŠธ ํŒŒ๋ผ๋ฏธํ„ฐ ์˜ˆ์ธก
3
  ===================================================
4
+ ์ƒ์„ธ ๋กœ๊ทธ ๋ฒ„์ „
5
  """
6
 
7
  import os
 
11
  import numpy as np
12
  from typing import Dict, List, Optional, Any
13
  from pathlib import Path
14
+ from datetime import datetime
15
  import warnings
16
 
17
  warnings.filterwarnings("ignore")
 
83
  self.output_dim = output_dim
84
  self.sr = 44100
85
 
86
+ def get_audio_features(self, audio_path: str) -> Dict:
87
+ """์˜ค๋””์˜ค์—์„œ ํŠน์ง• ์ถ”์ถœ (์ƒ์„ธ ์ •๋ณด ํฌํ•จ)"""
88
  try:
89
  import librosa
90
 
91
  y, sr = librosa.load(audio_path, sr=self.sr, duration=5.0)
92
 
93
+ # ๊ธฐ๋ณธ ์˜ค๋””์˜ค ์ •๋ณด
94
+ duration = len(y) / sr
95
+
96
  # ๊ธฐ๋ณธ ํŠน์ง• ์ถ”์ถœ
97
  features = []
98
+ feature_details = {}
99
 
100
  # MFCC (20๊ฐœ)
101
  mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
102
+ mfcc_mean = np.mean(mfcc, axis=1).tolist()
103
+ features.extend(mfcc_mean)
104
+ feature_details["mfcc_mean"] = [round(v, 4) for v in mfcc_mean[:5]] # ์ฒ˜์Œ 5๊ฐœ๋งŒ
105
 
106
  # Spectral features
107
  spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
 
109
  spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
110
 
111
  features.extend([spectral_centroid / 10000, spectral_bandwidth / 10000, spectral_rolloff / 10000])
112
+ feature_details["spectral_centroid"] = round(spectral_centroid, 2)
113
+ feature_details["spectral_bandwidth"] = round(spectral_bandwidth, 2)
114
+ feature_details["spectral_rolloff"] = round(spectral_rolloff, 2)
115
 
116
  # RMS energy
117
  rms = np.mean(librosa.feature.rms(y=y))
118
  features.append(float(rms))
119
+ feature_details["rms_energy"] = round(float(rms), 4)
120
 
121
  # Zero crossing rate
122
  zcr = np.mean(librosa.feature.zero_crossing_rate(y))
123
  features.append(float(zcr))
124
+ feature_details["zero_crossing_rate"] = round(float(zcr), 4)
125
 
126
  # Chroma (12๊ฐœ)
127
  chroma = librosa.feature.chroma_stft(y=y, sr=sr)
128
+ chroma_mean = np.mean(chroma, axis=1).tolist()
129
+ features.extend(chroma_mean)
130
+ feature_details["chroma_mean"] = [round(v, 4) for v in chroma_mean[:5]] # ์ฒ˜์Œ 5๊ฐœ๋งŒ
131
+
132
+ # ํ”ผ์น˜ ์ถ”์ •
133
+ pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
134
+ pitch_values = []
135
+ for t in range(pitches.shape[1]):
136
+ index = magnitudes[:, t].argmax()
137
+ pitch = pitches[index, t]
138
+ if pitch > 0:
139
+ pitch_values.append(pitch)
140
+ median_pitch = np.median(pitch_values) if pitch_values else 0
141
+ feature_details["estimated_pitch_hz"] = round(float(median_pitch), 2)
142
+
143
+ # ์Œ์ƒ‰ ๋ถ„์„
144
+ if spectral_centroid > 3000:
145
+ brightness = "bright"
146
+ elif spectral_centroid > 1500:
147
+ brightness = "neutral"
148
+ else:
149
+ brightness = "dark"
150
+ feature_details["brightness"] = brightness
151
+
152
+ # ์—๋„ˆ์ง€ ๋ถ„์„
153
+ if rms > 0.1:
154
+ intensity = "powerful"
155
+ elif rms > 0.03:
156
+ intensity = "moderate"
157
+ else:
158
+ intensity = "soft"
159
+ feature_details["intensity"] = intensity
160
 
161
  # Pad or truncate to output_dim
162
  if len(features) < self.output_dim:
 
164
  else:
165
  features = features[:self.output_dim]
166
 
167
+ return {
168
+ "features": features,
169
+ "details": feature_details,
170
+ "duration_sec": round(duration, 2),
171
+ "sample_rate": sr
172
+ }
173
 
174
  except Exception as e:
175
+ print(f"[AudioEncoder] โŒ ํŠน์ง• ์ถ”์ถœ ์‹คํŒจ: {e}")
176
+ return {
177
+ "features": [0.0] * self.output_dim,
178
+ "details": {"error": str(e)},
179
+ "duration_sec": 0,
180
+ "sample_rate": self.sr
181
+ }
182
 
183
 
184
  class AIEffector:
 
205
  # ์˜ค๋””์˜ค ์ธ์ฝ”๋”
206
  self.audio_encoder = AudioEncoder(output_dim=audio_feature_dim)
207
 
208
+ # ์š”์ฒญ ์นด๏ฟฝ๏ฟฝํ„ฐ
209
+ self.request_count = 0
210
+
211
  # ๋ชจ๋ธ ๋กœ๋“œ ์‹œ๋„
212
  self._load_model()
213
 
214
  def _load_model(self):
215
  """๋ชจ๋ธ ๋กœ๋“œ"""
216
  try:
217
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
218
  from peft import PeftModel
219
 
220
  print(f"[AIEffector] ๋ชจ๋ธ ๋กœ๋”ฉ ์‹œ์ž‘...")
 
230
  if self.tokenizer.pad_token is None:
231
  self.tokenizer.pad_token = self.tokenizer.eos_token
232
 
233
+ # 4bit ์–‘์žํ™” ์„ค์ • (๋ฉ”๋ชจ๋ฆฌ ์ ˆ์•ฝ)
234
+ quantization_config = None
235
+ if torch.cuda.is_available():
236
+ try:
237
+ quantization_config = BitsAndBytesConfig(
238
+ load_in_4bit=True,
239
+ bnb_4bit_compute_dtype=torch.float16,
240
+ bnb_4bit_use_double_quant=True,
241
+ bnb_4bit_quant_type="nf4"
242
+ )
243
+ print(f" - 4bit ์–‘์žํ™” ํ™œ์„ฑํ™”")
244
+ except Exception as e:
245
+ print(f" - 4bit ์–‘์žํ™” ์‹คํŒจ, ๊ธฐ๋ณธ ๋กœ๋“œ: {e}")
246
+
247
  # ๋ฒ ์ด์Šค ๋ชจ๋ธ ๋กœ๋“œ
248
  base_model = AutoModelForCausalLM.from_pretrained(
249
  self.base_model_name,
250
+ quantization_config=quantization_config,
251
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
252
  device_map="auto" if torch.cuda.is_available() else None,
253
  trust_remote_code=True,
 
260
  self.model = PeftModel.from_pretrained(
261
  base_model,
262
  self.model_repo_id,
263
+ subfolder=self.model_subfolder,
264
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
265
  )
266
  else:
 
267
  local_path = os.path.join(self.model_repo_id, self.model_subfolder)
268
  print(f"[AIEffector] ๋กœ์ปฌ์—์„œ LoRA ์–ด๋Œ‘ํ„ฐ ๋กœ๋”ฉ: {local_path}")
269
  self.model = PeftModel.from_pretrained(
 
290
  params = DEFAULT_PARAMETERS.copy()
291
  prompt_lower = prompt.lower()
292
 
293
+ matched_presets = []
294
  for style_name, style_params in STYLE_PRESETS.items():
295
  if style_name in prompt_lower:
296
  params.update(style_params)
297
+ matched_presets.append(style_name)
298
+
299
+ if matched_presets:
300
+ print(f" [Preset] ๋งค์นญ๋œ ํ”„๋ฆฌ์…‹: {matched_presets}")
301
 
302
  return params
303
 
304
  def _format_prompt(self, text_prompt: str, audio_features: List[float]) -> str:
305
  """LLM ์ž…๋ ฅ ํ”„๋กฌํ”„ํŠธ ํฌ๋งทํŒ…"""
 
306
  audio_summary = ", ".join([f"{v:.3f}" for v in audio_features[:8]])
307
 
308
  prompt = f"""You are an audio effect parameter predictor.
 
341
  def _parse_output(self, output_text: str) -> Dict[str, float]:
342
  """LLM ์ถœ๋ ฅ์—์„œ ํŒŒ๋ผ๋ฏธํ„ฐ ์ถ”์ถœ"""
343
  try:
 
344
  json_match = re.search(r'\{[^{}]*\}', output_text, re.DOTALL)
345
  if json_match:
346
  params = json.loads(json_match.group())
347
 
 
348
  result = DEFAULT_PARAMETERS.copy()
349
  for key, value in params.items():
350
  if key in result and isinstance(value, (int, float)):
 
352
 
353
  return result
354
  except Exception as e:
355
+ print(f" [Parse] โŒ ์ถœ๋ ฅ ํŒŒ์‹ฑ ์‹คํŒจ: {e}")
356
 
357
  return DEFAULT_PARAMETERS.copy()
358
 
359
  def predict(self, audio_path: str, text_prompt: str = "") -> Dict[str, float]:
360
+ """ํŒŒ๋ผ๋ฏธํ„ฐ ์˜ˆ์ธก (์ƒ์„ธ ๋กœ๊ทธ ํฌํ•จ)"""
361
+
362
+ self.request_count += 1
363
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
364
+
365
+ print(f"\n{'='*60}")
366
+ print(f"[AIEffector] ๐ŸŽต ์š”์ฒญ #{self.request_count} - {timestamp}")
367
+ print(f"{'='*60}")
368
+ print(f" ๐Ÿ“‚ ์˜ค๋””์˜ค ํŒŒ์ผ: {Path(audio_path).name}")
369
+ print(f" ๐Ÿ’ฌ ํ…์ŠคํŠธ ํ”„๋กฌํ”„ํŠธ: '{text_prompt}'")
370
+ print(f" ๐Ÿค– ๋ชจ๋ธ ์ƒํƒœ: {'AI ๋ชจ๋“œ' if self.is_loaded() else 'ํ”„๋ฆฌ์…‹ ๋ชจ๋“œ'}")
371
 
372
  # ๋ชจ๋ธ์ด ์—†์œผ๋ฉด ํ”„๋ฆฌ์…‹ ์‚ฌ์šฉ
373
  if not self.is_loaded():
374
+ print(f"\n โš ๏ธ AI ๋ชจ๋ธ ๏ฟฝ๏ฟฝ๏ฟฝ๋กœ๋“œ - ํ”„๋ฆฌ์…‹ ๋ชจ๋“œ ์‚ฌ์šฉ")
375
+ params = self._apply_preset(text_prompt)
376
+ self._log_parameters(params)
377
+ return params
378
 
379
  try:
380
+ # 1. ์˜ค๋””์˜ค ํŠน์ง• ์ถ”์ถœ
381
+ print(f"\n ๐Ÿ“Š [Step 1] ์˜ค๋””์˜ค ํŠน์ง• ์ถ”์ถœ ์ค‘...")
382
+ audio_result = self.audio_encoder.get_audio_features(audio_path)
383
+ audio_features = audio_result["features"]
384
+ audio_details = audio_result["details"]
385
+
386
+ print(f" - ์˜ค๋””์˜ค ๊ธธ์ด: {audio_result['duration_sec']}์ดˆ")
387
+ print(f" - ์ƒ˜ํ”Œ๋ ˆ์ดํŠธ: {audio_result['sample_rate']}Hz")
388
+ print(f" - ์ถ”์ • ํ”ผ์น˜: {audio_details.get('estimated_pitch_hz', 'N/A')}Hz")
389
+ print(f" - ๋ฐ๊ธฐ: {audio_details.get('brightness', 'N/A')}")
390
+ print(f" - ๊ฐ•๋„: {audio_details.get('intensity', 'N/A')}")
391
+ print(f" - Spectral Centroid: {audio_details.get('spectral_centroid', 'N/A')}")
392
+ print(f" - RMS Energy: {audio_details.get('rms_energy', 'N/A')}")
393
+ print(f" - ํŠน์ง• ๋ฒกํ„ฐ (์ฒ˜์Œ 8๊ฐœ): {[round(v, 3) for v in audio_features[:8]]}")
394
 
395
+ # 2. LLM ํ”„๋กฌํ”„ํŠธ ์ƒ์„ฑ
396
+ print(f"\n ๐Ÿ”ค [Step 2] LLM ํ”„๋กฌํ”„ํŠธ ์ƒ์„ฑ ์ค‘...")
397
  prompt = self._format_prompt(text_prompt, audio_features)
398
+ print(f" - ํ”„๋กฌํ”„ํŠธ ๊ธธ์ด: {len(prompt)} ๋ฌธ์ž")
399
 
400
+ # 3. ํ† ํฐํ™”
401
+ print(f"\n ๐Ÿ”ข [Step 3] ํ† ํฐํ™” ์ค‘...")
402
  inputs = self.tokenizer(
403
  prompt,
404
  return_tensors="pt",
405
  truncation=True,
406
  max_length=1024
407
  ).to(self.device)
408
+ print(f" - ์ž…๋ ฅ ํ† ํฐ ์ˆ˜: {inputs['input_ids'].shape[1]}")
409
+
410
+ # 4. LLM ์ƒ์„ฑ
411
+ print(f"\n ๐Ÿง  [Step 4] LLM ์ถ”๋ก  ์ค‘...")
412
+ import time
413
+ start_time = time.time()
414
 
 
415
  with torch.no_grad():
416
  outputs = self.model.generate(
417
  **inputs,
 
421
  pad_token_id=self.tokenizer.pad_token_id
422
  )
423
 
424
+ inference_time = time.time() - start_time
425
+ print(f" - ์ถ”๋ก  ์‹œ๊ฐ„: {inference_time:.2f}์ดˆ")
426
+ print(f" - ์ถœ๋ ฅ ํ† ํฐ ์ˆ˜: {outputs.shape[1]}")
427
+
428
+ # 5. ๋””์ฝ”๋”ฉ
429
+ print(f"\n ๐Ÿ“ [Step 5] ์ถœ๋ ฅ ๋””์ฝ”๋”ฉ ์ค‘...")
430
  output_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
431
 
432
+ # JSON ๋ถ€๋ถ„๋งŒ ์ถ”์ถœํ•ด์„œ ๋กœ๊ทธ
433
+ json_match = re.search(r'\{[^{}]*\}', output_text, re.DOTALL)
434
+ if json_match:
435
+ print(f" - LLM ์ถœ๋ ฅ JSON:\n{json_match.group()}")
436
+
437
+ # 6. ํŒŒ์‹ฑ
438
+ print(f"\n ๐Ÿ”ง [Step 6] ํŒŒ๋ผ๋ฏธํ„ฐ ํŒŒ์‹ฑ ์ค‘...")
439
  params = self._parse_output(output_text)
440
 
441
+ # 7. ๊ฒฐ๊ณผ ๋กœ๊น…
442
+ self._log_parameters(params)
443
+
444
+ print(f"\n โœ… AI ์˜ˆ์ธก ์™„๋ฃŒ!")
445
+ print(f"{'='*60}\n")
446
+
447
  return params
448
 
449
  except Exception as e:
450
+ print(f"\n โŒ ์˜ˆ์ธก ์‹คํŒจ: {e}")
451
+ print(f" โš ๏ธ ํ”„๋ฆฌ์…‹์œผ๋กœ ํด๋ฐฑ...")
452
+ params = self._apply_preset(text_prompt)
453
+ self._log_parameters(params)
454
+ return params
455
+
456
+ def _log_parameters(self, params: Dict[str, float]):
457
+ """์˜ˆ์ธก๋œ ํŒŒ๋ผ๋ฏธํ„ฐ ๋กœ๊น…"""
458
+ print(f"\n ๐Ÿ“‹ ์˜ˆ์ธก๋œ ํŒŒ๋ผ๋ฏธํ„ฐ:")
459
+ print(f" [EQ Peak 1]")
460
+ print(f" - Freq: {params.get('eq_peak1.params.freq', 0):.1f} Hz")
461
+ print(f" - Gain: {params.get('eq_peak1.params.gain', 0):.2f} dB")
462
+ print(f" - Q: {params.get('eq_peak1.params.q', 0):.2f}")
463
+
464
+ print(f" [EQ Peak 2]")
465
+ print(f" - Freq: {params.get('eq_peak2.params.freq', 0):.1f} Hz")
466
+ print(f" - Gain: {params.get('eq_peak2.params.gain', 0):.2f} dB")
467
+ print(f" - Q: {params.get('eq_peak2.params.q', 0):.2f}")
468
+
469
+ print(f" [Low Shelf]")
470
+ print(f" - Freq: {params.get('eq_lowshelf.params.freq', 0):.1f} Hz")
471
+ print(f" - Gain: {params.get('eq_lowshelf.params.gain', 0):.2f} dB")
472
+
473
+ print(f" [High Shelf]")
474
+ print(f" - Freq: {params.get('eq_highshelf.params.freq', 0):.1f} Hz")
475
+ print(f" - Gain: {params.get('eq_highshelf.params.gain', 0):.2f} dB")
476
+
477
+ print(f" [Effects]")
478
+ print(f" - Distortion: {params.get('distortion_amount', 0):.3f}")
479
+ print(f" - Delay Time: {params.get('delay.delay_time', 0):.3f}s")
480
+ print(f" - Delay Feedback: {params.get('delay.feedback', 0):.2f}")
481
+ print(f" - Delay Mix: {params.get('delay.mix', 0):.2f}")
482
+ print(f" - Final Wet Mix: {params.get('final_wet_mix', 0):.2f}")