heybaeheef commited on
Commit
3bfa04b
ยท
1 Parent(s): 70de690

Fix: Add DiffVox parameter conversion (sigmoid/minmax)

Browse files
Files changed (1) hide show
  1. audio_processing/effect_chain.py +181 -46
audio_processing/effect_chain.py CHANGED
@@ -1,10 +1,12 @@
1
  """
2
- Effect Chain - Pedalboard ๊ธฐ๋ฐ˜ ์˜ค๋””์˜ค ์ดํŽ™ํŠธ ์ฒ˜๋ฆฌ
3
- =================================================
 
4
  """
5
 
6
  import numpy as np
7
  import soundfile as sf
 
8
  from typing import Dict, List, Optional
9
  from pedalboard import (
10
  Pedalboard,
@@ -20,17 +22,134 @@ from pedalboard import (
20
  )
21
 
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  class EffectChain:
24
- """Pedalboard ๊ธฐ๋ฐ˜ ์ดํŽ™ํŠธ ์ฒด์ธ"""
25
 
26
  def __init__(self, sample_rate: int = 44100):
27
  self.sample_rate = sample_rate
 
28
 
29
  self.available_effects = [
30
  "eq_peak1", "eq_peak2",
31
  "eq_lowshelf", "eq_highshelf",
32
- "distortion", "delay", "compressor",
33
- "reverb", "limiter"
34
  ]
35
 
36
  def get_available_effects(self) -> List[str]:
@@ -38,11 +157,11 @@ class EffectChain:
38
  return self.available_effects
39
 
40
  def _build_pedalboard(self, params: Dict[str, float]) -> Pedalboard:
41
- """ํŒŒ๋ผ๋ฏธํ„ฐ๋กœ Pedalboard ๊ตฌ์„ฑ"""
42
 
43
  effects = []
44
 
45
- # Compressor (ํ•ญ์ƒ ์ ์šฉ)
46
  effects.append(Compressor(
47
  threshold_db=-18.0,
48
  ratio=2.0,
@@ -50,67 +169,72 @@ class EffectChain:
50
  release_ms=100.0
51
  ))
52
 
53
- # EQ Peak 1
54
- freq1 = params.get("eq_peak1.params.freq", 1000.0)
55
- gain1 = params.get("eq_peak1.params.gain", 0.0)
56
- q1 = params.get("eq_peak1.params.q", 1.0)
57
  if abs(gain1) > 0.1:
 
 
 
58
  effects.append(PeakFilter(
59
  cutoff_frequency_hz=max(20, min(20000, freq1)),
60
- gain_db=max(-12, min(12, gain1)),
61
  q=max(0.1, min(10, q1))
62
  ))
63
 
64
- # EQ Peak 2
65
- freq2 = params.get("eq_peak2.params.freq", 4000.0)
66
- gain2 = params.get("eq_peak2.params.gain", 0.0)
67
- q2 = params.get("eq_peak2.params.q", 1.0)
68
  if abs(gain2) > 0.1:
 
 
 
69
  effects.append(PeakFilter(
70
  cutoff_frequency_hz=max(20, min(20000, freq2)),
71
- gain_db=max(-12, min(12, gain2)),
72
  q=max(0.1, min(10, q2))
73
  ))
74
 
75
- # Low Shelf
76
- freq_low = params.get("eq_lowshelf.params.freq", 200.0)
77
- gain_low = params.get("eq_lowshelf.params.gain", 0.0)
78
  if abs(gain_low) > 0.1:
 
 
79
  effects.append(LowShelfFilter(
80
  cutoff_frequency_hz=max(20, min(2000, freq_low)),
81
- gain_db=max(-12, min(12, gain_low)),
82
  q=0.707
83
  ))
84
 
85
- # High Shelf
86
- freq_high = params.get("eq_highshelf.params.freq", 8000.0)
87
- gain_high = params.get("eq_highshelf.params.gain", 0.0)
88
  if abs(gain_high) > 0.1:
 
 
89
  effects.append(HighShelfFilter(
90
  cutoff_frequency_hz=max(1000, min(20000, freq_high)),
91
- gain_db=max(-12, min(12, gain_high)),
92
  q=0.707
93
  ))
94
 
95
- # Distortion
96
- dist_amount = params.get("distortion_amount", 0.0)
97
- if dist_amount > 0.01:
98
- effects.append(Distortion(
99
- drive_db=max(0, min(20, dist_amount * 100))
100
- ))
101
 
102
- # Delay
103
- delay_time = params.get("delay.delay_time", 0.02)
104
- delay_feedback = params.get("delay.feedback", 0.3)
105
- delay_mix = params.get("delay.mix", 0.2)
106
  if delay_mix > 0.01:
 
 
 
107
  effects.append(Delay(
108
  delay_seconds=max(0.01, min(1.0, delay_time)),
109
  feedback=max(0.0, min(0.9, delay_feedback)),
110
  mix=max(0.0, min(1.0, delay_mix))
111
  ))
112
 
113
- # Limiter (ํ•ญ์ƒ ๋งˆ์ง€๋ง‰์—)
114
  effects.append(Limiter(threshold_db=-1.0))
115
 
116
  return Pedalboard(effects)
@@ -123,7 +247,7 @@ class EffectChain:
123
  ) -> bool:
124
  """์˜ค๋””์˜ค ํŒŒ์ผ ์ฒ˜๋ฆฌ"""
125
  try:
126
- # ์˜ค๋””์˜ค ๋กœ๋“œ
127
  audio, sr = sf.read(input_path)
128
 
129
  # ๋ชจ๋…ธ/์Šคํ…Œ๋ ˆ์˜ค ์ฒ˜๋ฆฌ
@@ -133,15 +257,24 @@ class EffectChain:
133
  # float32๋กœ ๋ณ€ํ™˜
134
  audio = audio.astype(np.float32)
135
 
136
- # Pedalboard ๊ตฌ์„ฑ
137
- board = self._build_pedalboard(parameters)
 
 
 
 
 
 
 
 
 
138
 
139
- # ์ฒ˜๋ฆฌ
140
  processed = board(audio, sr)
141
 
142
- # Wet/Dry ๋ฏน์Šค
143
- wet_mix = parameters.get("final_wet_mix", 0.5)
144
- wet_mix = max(0.0, min(1.0, wet_mix))
145
 
146
  # ๊ธธ์ด ๋งž์ถ”๊ธฐ
147
  min_len = min(len(audio), len(processed))
@@ -150,12 +283,14 @@ class EffectChain:
150
  # ํด๋ฆฌํ•‘ ๋ฐฉ์ง€
151
  output = np.clip(output, -1.0, 1.0)
152
 
153
- # ์ €์žฅ
154
  sf.write(output_path, output, sr)
155
 
156
- print(f"[EffectChain] โœ… ์ฒ˜๋ฆฌ ์™„๋ฃŒ: {output_path}")
157
  return True
158
 
159
  except Exception as e:
160
- print(f"[EffectChain] โŒ ์ฒ˜๋ฆฌ ์‹คํŒจ: {e}")
 
 
161
  raise e
 
1
  """
2
+ Effect Chain - DiffVox ํŒŒ๋ผ๋ฏธํ„ฐ ํ˜ธํ™˜ ๋ฒ„์ „
3
+ ==========================================
4
+ LLM์ด ์ถœ๋ ฅํ•˜๋Š” DiffVox ํ˜•์‹ ํŒŒ๋ผ๋ฏธํ„ฐ๋ฅผ Pedalboard ์ดํŽ™ํŠธ๋กœ ๋ณ€ํ™˜
5
  """
6
 
7
  import numpy as np
8
  import soundfile as sf
9
+ import torch
10
  from typing import Dict, List, Optional
11
  from pedalboard import (
12
  Pedalboard,
 
22
  )
23
 
24
 
25
+ class ParameterConverter:
26
+ """DiffVox ํŒŒ๋ผ๋ฏธํ„ฐ๋ฅผ Pedalboard ํŒŒ๋ผ๋ฏธํ„ฐ๋กœ ๋ณ€ํ™˜"""
27
+
28
+ def __init__(self, sr: int = 44100):
29
+ self.sr = sr
30
+
31
+ def sigmoid(self, x: float) -> float:
32
+ """Sigmoid ๋ณ€ํ™˜"""
33
+ return 1.0 / (1.0 + np.exp(-x))
34
+
35
+ def minmax(self, x: float, min_val: float, max_val: float) -> float:
36
+ """MinMax ๋ณ€ํ™˜ (sigmoid ๊ธฐ๋ฐ˜)"""
37
+ return self.sigmoid(x) * (max_val - min_val) + min_val
38
+
39
+ def convert_freq(self, original_value: float, min_freq: float = 20.0) -> float:
40
+ """์ฃผํŒŒ์ˆ˜ ํŒŒ๋ผ๋ฏธํ„ฐ ๋ณ€ํ™˜"""
41
+ max_freq = self.sr / 2.0 # Nyquist
42
+ return self.minmax(original_value, min_freq, max_freq)
43
+
44
+ def convert_q(self, original_value: float) -> float:
45
+ """Q ํŒŒ๋ผ๋ฏธํ„ฐ ๋ณ€ํ™˜"""
46
+ return self.minmax(original_value, 0.1, 10.0)
47
+
48
+ def convert_params(self, raw_params: Dict[str, float]) -> Dict[str, float]:
49
+ """
50
+ LLM ์ถœ๋ ฅ ํŒŒ๋ผ๋ฏธํ„ฐ๋ฅผ ์‹ค์ œ ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๊ฐ’์œผ๋กœ ๋ณ€ํ™˜
51
+
52
+ LLM ์ถœ๋ ฅ ์˜ˆ์‹œ:
53
+ - eq_peak1.params.gain: -0.35 (์ง์ ‘ dB ๊ฐ’)
54
+ - eq_peak1.params.parametrizations.freq.original: -2.57 (๋ณ€ํ™˜ ํ•„์š”)
55
+ - eq_peak1.params.parametrizations.Q.original: -4.13 (๋ณ€ํ™˜ ํ•„์š”)
56
+ """
57
+ converted = {}
58
+
59
+ # EQ Peak 1
60
+ converted["eq_peak1_freq"] = self._get_freq(raw_params, "eq_peak1")
61
+ converted["eq_peak1_gain"] = self._get_gain(raw_params, "eq_peak1")
62
+ converted["eq_peak1_q"] = self._get_q(raw_params, "eq_peak1")
63
+
64
+ # EQ Peak 2
65
+ converted["eq_peak2_freq"] = self._get_freq(raw_params, "eq_peak2")
66
+ converted["eq_peak2_gain"] = self._get_gain(raw_params, "eq_peak2")
67
+ converted["eq_peak2_q"] = self._get_q(raw_params, "eq_peak2")
68
+
69
+ # Low Shelf
70
+ converted["eq_lowshelf_freq"] = self._get_freq(raw_params, "eq_lowshelf", default=200.0)
71
+ converted["eq_lowshelf_gain"] = self._get_gain(raw_params, "eq_lowshelf")
72
+
73
+ # High Shelf
74
+ converted["eq_highshelf_freq"] = self._get_freq(raw_params, "eq_highshelf", default=8000.0)
75
+ converted["eq_highshelf_gain"] = self._get_gain(raw_params, "eq_highshelf")
76
+
77
+ # Distortion
78
+ dist_raw = raw_params.get("distortion_amount", 0.0)
79
+ # sigmoid(x) * 0.1 ํ˜•ํƒœ์˜€์Œ
80
+ converted["distortion"] = max(0, self.sigmoid(dist_raw) * 0.1)
81
+
82
+ # Delay
83
+ converted["delay_time"] = raw_params.get("delay.delay_time", 0.02)
84
+ delay_feedback_raw = raw_params.get("delay.feedback", 0.3)
85
+ converted["delay_feedback"] = self.sigmoid(delay_feedback_raw)
86
+ delay_mix_raw = raw_params.get("delay.mix", 0.2)
87
+ converted["delay_mix"] = self.sigmoid(delay_mix_raw)
88
+
89
+ # Final Mix
90
+ final_mix_raw = raw_params.get("final_wet_mix", 0.5)
91
+ converted["final_wet_mix"] = self.sigmoid(final_mix_raw)
92
+
93
+ return converted
94
+
95
+ def _get_freq(self, params: Dict, prefix: str, default: float = 1000.0) -> float:
96
+ """์ฃผํŒŒ์ˆ˜ ๊ฐ’ ์ถ”์ถœ ๋ฐ ๋ณ€ํ™˜"""
97
+ # parametrizations ํ˜•์‹ ํ™•์ธ
98
+ key_param = f"{prefix}.params.parametrizations.freq.original"
99
+ key_direct = f"{prefix}.params.freq"
100
+
101
+ if key_param in params:
102
+ return self.convert_freq(params[key_param])
103
+ elif key_direct in params:
104
+ # ์ด๋ฏธ ๋ณ€ํ™˜๋œ ๊ฐ’์ผ ์ˆ˜ ์žˆ์Œ
105
+ val = params[key_direct]
106
+ if 20 <= val <= self.sr / 2:
107
+ return val
108
+ else:
109
+ return self.convert_freq(val)
110
+ return default
111
+
112
+ def _get_gain(self, params: Dict, prefix: str) -> float:
113
+ """๊ฒŒ์ธ ๊ฐ’ ์ถ”์ถœ (dB, ๋ณ€ํ™˜ ๋ถˆํ•„์š”)"""
114
+ key = f"{prefix}.params.gain"
115
+ gain = params.get(key, 0.0)
116
+ # ๋ฒ”์œ„ ์ œํ•œ (-12dB ~ +12dB)
117
+ return max(-12.0, min(12.0, gain))
118
+
119
+ def _get_q(self, params: Dict, prefix: str, default: float = 1.0) -> float:
120
+ """Q ๊ฐ’ ์ถ”์ถœ ๋ฐ ๋ณ€ํ™˜"""
121
+ key_param = f"{prefix}.params.parametrizations.Q.original"
122
+ key_direct = f"{prefix}.params.q"
123
+ key_direct2 = f"{prefix}.params.Q"
124
+
125
+ if key_param in params:
126
+ return self.convert_q(params[key_param])
127
+ elif key_direct in params:
128
+ val = params[key_direct]
129
+ if 0.1 <= val <= 10:
130
+ return val
131
+ else:
132
+ return self.convert_q(val)
133
+ elif key_direct2 in params:
134
+ val = params[key_direct2]
135
+ if 0.1 <= val <= 10:
136
+ return val
137
+ else:
138
+ return self.convert_q(val)
139
+ return default
140
+
141
+
142
  class EffectChain:
143
+ """DiffVox ํ˜ธํ™˜ ์ดํŽ™ํŠธ ์ฒด์ธ"""
144
 
145
  def __init__(self, sample_rate: int = 44100):
146
  self.sample_rate = sample_rate
147
+ self.converter = ParameterConverter(sr=sample_rate)
148
 
149
  self.available_effects = [
150
  "eq_peak1", "eq_peak2",
151
  "eq_lowshelf", "eq_highshelf",
152
+ "distortion", "delay", "compressor"
 
153
  ]
154
 
155
  def get_available_effects(self) -> List[str]:
 
157
  return self.available_effects
158
 
159
  def _build_pedalboard(self, params: Dict[str, float]) -> Pedalboard:
160
+ """๋ณ€ํ™˜๋œ ํŒŒ๋ผ๋ฏธํ„ฐ๋กœ Pedalboard ๊ตฌ์„ฑ"""
161
 
162
  effects = []
163
 
164
+ # 1. Compressor (ํ•ญ์ƒ ์ ์šฉ)
165
  effects.append(Compressor(
166
  threshold_db=-18.0,
167
  ratio=2.0,
 
169
  release_ms=100.0
170
  ))
171
 
172
+ # 2. EQ Peak 1
173
+ gain1 = params["eq_peak1_gain"]
 
 
174
  if abs(gain1) > 0.1:
175
+ freq1 = params["eq_peak1_freq"]
176
+ q1 = params["eq_peak1_q"]
177
+ print(f" [EQ Peak 1] freq={freq1:.1f}Hz, gain={gain1:.2f}dB, Q={q1:.2f}")
178
  effects.append(PeakFilter(
179
  cutoff_frequency_hz=max(20, min(20000, freq1)),
180
+ gain_db=gain1,
181
  q=max(0.1, min(10, q1))
182
  ))
183
 
184
+ # 3. EQ Peak 2
185
+ gain2 = params["eq_peak2_gain"]
 
 
186
  if abs(gain2) > 0.1:
187
+ freq2 = params["eq_peak2_freq"]
188
+ q2 = params["eq_peak2_q"]
189
+ print(f" [EQ Peak 2] freq={freq2:.1f}Hz, gain={gain2:.2f}dB, Q={q2:.2f}")
190
  effects.append(PeakFilter(
191
  cutoff_frequency_hz=max(20, min(20000, freq2)),
192
+ gain_db=gain2,
193
  q=max(0.1, min(10, q2))
194
  ))
195
 
196
+ # 4. Low Shelf
197
+ gain_low = params["eq_lowshelf_gain"]
 
198
  if abs(gain_low) > 0.1:
199
+ freq_low = params["eq_lowshelf_freq"]
200
+ print(f" [Low Shelf] freq={freq_low:.1f}Hz, gain={gain_low:.2f}dB")
201
  effects.append(LowShelfFilter(
202
  cutoff_frequency_hz=max(20, min(2000, freq_low)),
203
+ gain_db=gain_low,
204
  q=0.707
205
  ))
206
 
207
+ # 5. High Shelf
208
+ gain_high = params["eq_highshelf_gain"]
 
209
  if abs(gain_high) > 0.1:
210
+ freq_high = params["eq_highshelf_freq"]
211
+ print(f" [High Shelf] freq={freq_high:.1f}Hz, gain={gain_high:.2f}dB")
212
  effects.append(HighShelfFilter(
213
  cutoff_frequency_hz=max(1000, min(20000, freq_high)),
214
+ gain_db=gain_high,
215
  q=0.707
216
  ))
217
 
218
+ # 6. Distortion
219
+ dist = params["distortion"]
220
+ if dist > 0.005:
221
+ drive_db = dist * 200 # 0.1 โ†’ 20dB
222
+ print(f" [Distortion] drive={drive_db:.1f}dB")
223
+ effects.append(Distortion(drive_db=min(20, drive_db)))
224
 
225
+ # 7. Delay
226
+ delay_mix = params["delay_mix"]
 
 
227
  if delay_mix > 0.01:
228
+ delay_time = params["delay_time"]
229
+ delay_feedback = params["delay_feedback"]
230
+ print(f" [Delay] time={delay_time:.3f}s, feedback={delay_feedback:.2f}, mix={delay_mix:.2f}")
231
  effects.append(Delay(
232
  delay_seconds=max(0.01, min(1.0, delay_time)),
233
  feedback=max(0.0, min(0.9, delay_feedback)),
234
  mix=max(0.0, min(1.0, delay_mix))
235
  ))
236
 
237
+ # 8. Limiter (ํ•ญ์ƒ ๋งˆ์ง€๋ง‰)
238
  effects.append(Limiter(threshold_db=-1.0))
239
 
240
  return Pedalboard(effects)
 
247
  ) -> bool:
248
  """์˜ค๋””์˜ค ํŒŒ์ผ ์ฒ˜๋ฆฌ"""
249
  try:
250
+ # 1. ์˜ค๋””์˜ค ๋กœ๋“œ
251
  audio, sr = sf.read(input_path)
252
 
253
  # ๋ชจ๋…ธ/์Šคํ…Œ๋ ˆ์˜ค ์ฒ˜๋ฆฌ
 
257
  # float32๋กœ ๋ณ€ํ™˜
258
  audio = audio.astype(np.float32)
259
 
260
+ # 2. ํŒŒ๋ผ๋ฏธํ„ฐ ๋ณ€ํ™˜
261
+ print(f"\n [EffectChain] ํŒŒ๋ผ๋ฏธํ„ฐ ๋ณ€ํ™˜ ์ค‘...")
262
+ converted_params = self.converter.convert_params(parameters)
263
+
264
+ print(f" [EffectChain] ๋ณ€ํ™˜๋œ ํŒŒ๋ผ๋ฏธํ„ฐ:")
265
+ for key, value in converted_params.items():
266
+ print(f" {key}: {value:.4f}")
267
+
268
+ # 3. Pedalboard ๊ตฌ์„ฑ
269
+ print(f"\n [EffectChain] ์ดํŽ™ํŠธ ์ฒด์ธ ๊ตฌ์„ฑ ์ค‘...")
270
+ board = self._build_pedalboard(converted_params)
271
 
272
+ # 4. ์ดํŽ™ํŠธ ์ ์šฉ
273
  processed = board(audio, sr)
274
 
275
+ # 5. Wet/Dry ๋ฏน์Šค
276
+ wet_mix = converted_params["final_wet_mix"]
277
+ print(f" [Mix] wet={wet_mix:.2f}, dry={1-wet_mix:.2f}")
278
 
279
  # ๊ธธ์ด ๋งž์ถ”๊ธฐ
280
  min_len = min(len(audio), len(processed))
 
283
  # ํด๋ฆฌํ•‘ ๋ฐฉ์ง€
284
  output = np.clip(output, -1.0, 1.0)
285
 
286
+ # 6. ์ €์žฅ
287
  sf.write(output_path, output, sr)
288
 
289
+ print(f"\n [EffectChain] โœ… ์ฒ˜๋ฆฌ ์™„๋ฃŒ: {output_path}")
290
  return True
291
 
292
  except Exception as e:
293
+ print(f" [EffectChain] โŒ ์ฒ˜๋ฆฌ ์‹คํŒจ: {e}")
294
+ import traceback
295
+ traceback.print_exc()
296
  raise e