naohiro701 commited on
Commit
86ef1c7
·
verified ·
1 Parent(s): c389cfc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +172 -92
app.py CHANGED
@@ -1,110 +1,190 @@
1
  import numpy as np
2
  import matplotlib.pyplot as plt
3
- from matplotlib.animation import FuncAnimation
4
  from pydub import AudioSegment
5
- from scipy.fftpack import fft
6
  import streamlit as st
7
  import tempfile
8
  import os
9
  import subprocess
10
 
11
- # Define frequency ranges for musical notes based on 440Hz
12
- base_frequency = 440
13
- note_names = ["A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"]
14
- colors = plt.cm.hsv(np.linspace(0, 1, len(note_names)))
15
 
16
- # Map frequency to color based on 440Hz intervals
17
  def frequency_to_color(freq):
18
- if freq < base_frequency / 2:
19
- return "gray" # Below audible range
20
-
21
- interval_index = int(np.log2(freq / base_frequency) * 12) % len(note_names)
22
- return colors[interval_index]
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- # Streamlit App
25
  def main():
26
- st.title("MP3 Fourier Transform Visualizer: ドレミドリブン")
27
 
28
  uploaded_file = st.file_uploader("音声ファイルをアップロード (MP3)", type=["mp3"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- if uploaded_file is not None:
31
- # Convert MP3 to WAV for easier processing
32
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_mp3:
33
- temp_mp3.write(uploaded_file.read())
34
- audio = AudioSegment.from_file(temp_mp3.name)
35
-
36
- samples = np.array(audio.get_array_of_samples())
37
- sample_rate = audio.frame_rate
38
-
39
- # Normalize samples
40
- if audio.channels == 2:
41
- samples = samples.reshape((-1, 2))
42
- samples = samples.mean(axis=1) # Convert to mono
43
-
44
- # Define FFT parameters
45
- chunk_size = 2048 # Number of samples per frame
46
- overlap = 1024 # Overlap between frames
47
- step_size = chunk_size - overlap
48
-
49
- # Calculate the FFT for each chunk
50
- freqs = np.fft.rfftfreq(chunk_size, d=1/sample_rate)
51
- n_chunks = (len(samples) - chunk_size) // step_size + 1
52
- fft_frames = []
53
-
54
- for i in range(n_chunks):
55
- chunk = samples[i * step_size:i * step_size + chunk_size]
56
- windowed = chunk * np.hanning(len(chunk))
57
- spectrum = np.abs(fft(windowed)[:len(freqs)])
58
- fft_frames.append(spectrum)
59
-
60
- fft_frames = np.array(fft_frames)
61
-
62
- # Create animation with artistic elements
63
- fig, ax = plt.subplots(facecolor="black")
64
- line, = ax.plot(freqs, fft_frames[0], lw=2)
65
- ax.set_xlim(0, np.max(freqs))
66
- ax.set_ylim(0, np.max(fft_frames))
67
- ax.set_xlabel("Frequency (Hz)", color="white")
68
- ax.set_ylabel("Amplitude", color="white")
69
- ax.set_title("音の周波数スペクトル", color="white")
70
- ax.tick_params(colors="white")
71
- fig.patch.set_facecolor("black")
72
-
73
- # Highlight 440Hz intervals with vertical lines
74
- for i in range(1, int(np.max(freqs) / base_frequency) + 1):
75
- ax.axvline(i * base_frequency, color="white", linestyle="--", alpha=0.5)
76
-
77
- def update(frame):
78
- line.set_ydata(fft_frames[frame])
79
- line.set_color(frequency_to_color(freqs[int(frame % len(freqs))]))
80
- ax.set_facecolor(plt.cm.viridis(frame / len(fft_frames))) # Dynamic background color
81
- return line,
82
-
83
- ani = FuncAnimation(fig, update, frames=len(fft_frames), blit=True)
84
-
85
- # Save animation to a temporary file
86
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
87
- ani.save(temp_video.name, fps=30, extra_args=['-vcodec', 'libx264'])
88
- video_path = temp_video.name
89
-
90
- # Merge audio and video using ffmpeg
91
- output_path = "output_art_video.mp4"
92
- audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
93
- audio.export(audio_path, format="wav")
94
-
95
- ffmpeg_command = [
96
- "ffmpeg", "-y", "-i", video_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", output_path
97
- ]
98
- subprocess.run(ffmpeg_command)
99
-
100
- st.success("動画を生成しました!以下のリンクからダウンロードできます。")
101
- with open(output_path, "rb") as file:
102
- st.download_button(label="動画をダウンロード", data=file, file_name="output_art_video.mp4", mime="video/mp4")
103
-
104
- # Cleanup temporary files
105
- os.remove(temp_mp3.name)
106
- os.remove(video_path)
107
- os.remove(audio_path)
108
 
109
  if __name__ == "__main__":
110
  main()
 
1
  import numpy as np
2
  import matplotlib.pyplot as plt
3
+ from matplotlib.animation import FuncAnimation, FFMpegWriter
4
  from pydub import AudioSegment
5
+ from scipy.fft import rfft, rfftfreq
6
  import streamlit as st
7
  import tempfile
8
  import os
9
  import subprocess
10
 
11
+ # ----- 設定 -----
12
+ BASE_FREQUENCY = 440 # ラ基準
13
+ NOTE_NAMES = ["A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"]
 
14
 
 
15
  def frequency_to_color(freq):
16
+ """
17
+ 周波数 freq をノートに変換し、そのノートの色を返すサンプル関数。
18
+ カラーマップは hsv を使用し、12 音を 0 ~ 1 に均等に割り振るイメージ。
19
+ """
20
+ # 周波数が低すぎる場合はグレーなどに
21
+ if freq < BASE_FREQUENCY / 2:
22
+ return (0.5, 0.5, 0.5) # gray
23
+
24
+ semitone_index = int(round(12 * np.log2(freq / BASE_FREQUENCY)))
25
+ note_idx = semitone_index % 12
26
+
27
+ # HSV 空間で note_idx / 12 を色相に対応させる (S=1, V=1)
28
+ color_hsv = (note_idx / 12, 1.0, 1.0)
29
+ # matplotlib で hsv -> rgb 変換
30
+ import colorsys
31
+ return colorsys.hsv_to_rgb(*color_hsv)
32
 
 
33
  def main():
34
+ st.title("周波数特性アート動画生成")
35
 
36
  uploaded_file = st.file_uploader("音声ファイルをアップロード (MP3)", type=["mp3"])
37
+ if uploaded_file is None:
38
+ st.info("MP3ファイルをアップロードしてください。")
39
+ return
40
+
41
+ # ----- Step1: MP3 -> AudioSegment 変換 -----
42
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_mp3:
43
+ temp_mp3.write(uploaded_file.read())
44
+ audio = AudioSegment.from_file(temp_mp3.name)
45
+
46
+ # ----- Step2: numpy配列化 -----
47
+ samples = np.array(audio.get_array_of_samples(), dtype=float)
48
+ sample_rate = audio.frame_rate
49
+
50
+ # ステレオ→モノラル
51
+ if audio.channels == 2:
52
+ samples = samples.reshape((-1, 2)).mean(axis=1)
53
+
54
+ # 正規化(-1~1)
55
+ samples /= np.iinfo(audio.array_type).max
56
+
57
+ st.write(f"サンプリングレート: {sample_rate} Hz")
58
+ st.write(f"サンプル数: {len(samples)}")
59
+
60
+ # ----- FFTパラメータ -----
61
+ chunk_size = 2048
62
+ overlap = 1024
63
+ step_size = chunk_size - overlap
64
+ freqs = rfftfreq(chunk_size, d=1/sample_rate)
65
+
66
+ # チャンク数
67
+ n_chunks = (len(samples) - chunk_size) // step_size + 1
68
+ st.write(f"フレーム数: {n_chunks}")
69
+
70
+ # ----- 各チャンクのFFTを計算し、ピーク周波数とスペクトル総量を保存 -----
71
+ peak_freqs = []
72
+ total_amps = [] # 全周波数成分の合計(ざっくり振幅)
73
+
74
+ window = np.hanning(chunk_size)
75
+
76
+ for i in range(n_chunks):
77
+ start = i * step_size
78
+ end = start + chunk_size
79
+ chunk = samples[start:end] * window
80
+ spectrum = np.abs(rfft(chunk))
81
+
82
+ peak_index = np.argmax(spectrum)
83
+ peak_freq = freqs[peak_index]
84
+ peak_freqs.append(peak_freq)
85
+
86
+ total_amp = np.sum(spectrum)
87
+ total_amps.append(total_amp)
88
+
89
+ peak_freqs = np.array(peak_freqs)
90
+ total_amps = np.array(total_amps)
91
+
92
+ # ----- Step3: Matplotlib アニメーション作成 -----
93
+ fig, ax = plt.subplots(figsize=(6, 6), facecolor="black")
94
+ ax.set_facecolor("black")
95
+ ax.set_xlim(-1, 1)
96
+ ax.set_ylim(-1, 1)
97
+ ax.set_aspect("equal")
98
+ ax.axis("off")
99
+
100
+ scatter_plot = ax.scatter([], [], s=10, c=[], alpha=0.8)
101
+
102
+ # 描画用の座標(ランダムに散らした点を固定しておき、フレームごとに色とサイズを変えるなど)
103
+ # あるいはスパイラルを生成しておき、それを変化させる
104
+ num_points = 200
105
+ angles = np.linspace(0, 4 * np.pi, num_points)
106
+ radii = np.linspace(0.05, 0.5, num_points)
107
+ x_base = radii * np.cos(angles)
108
+ y_base = radii * np.sin(angles)
109
+
110
+ def init():
111
+ scatter_plot.set_offsets([])
112
+ scatter_plot.set_array(np.array([]))
113
+ return (scatter_plot,)
114
+
115
+ def update(frame):
116
+ # フレームに応じてピーク周波数 -> 色
117
+ p_freq = peak_freqs[frame]
118
+ c = frequency_to_color(p_freq)
119
+
120
+ # 総エネルギー -> 大きさのスケーリング
121
+ amp_scale = total_amps[frame]
122
+ # 過剰に大きくならないように対数スケールをかける
123
+ amp_scale = np.log10(amp_scale + 1)
124
+
125
+ # スパイラル座標をフレームごとにちょ���とずつ変形したり回転したりしてみる
126
+ theta_shift = 0.1 * frame
127
+ x = x_base * (1 + 0.05 * np.sin(theta_shift))
128
+ y = y_base * (1 + 0.05 * np.cos(theta_shift))
129
+
130
+ # 回転
131
+ rot = 0.05 * frame
132
+ cos_r = np.cos(rot)
133
+ sin_r = np.sin(rot)
134
+ x_rot = x * cos_r - y * sin_r
135
+ y_rot = x * sin_r + y * cos_r
136
+
137
+ # 散布図に設定
138
+ coords = np.column_stack((x_rot, y_rot))
139
+ scatter_plot.set_offsets(coords)
140
+
141
+ # サイズと色を更新 (全点同じ色、サイズも統一)
142
+ sizes = (30 + 200 * amp_scale) * np.ones(num_points)
143
+ scatter_plot.set_sizes(sizes)
144
+ colors = np.array([c for _ in range(num_points)])
145
+ scatter_plot.set_facecolor(colors)
146
+
147
+ return (scatter_plot,)
148
+
149
+ ani = FuncAnimation(
150
+ fig, update, frames=n_chunks, init_func=init, blit=True, interval=10
151
+ )
152
+
153
+ # ----- Step4: Matplotlib アニメーションを一時ファイルに保存 (.mp4) -----
154
+ video_temp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
155
+ video_path = video_temp.name
156
+ video_temp.close()
157
+
158
+ writer = FFMpegWriter(fps=30, codec="libx264")
159
+ ani.save(video_path, writer=writer, dpi=150)
160
+ plt.close(fig) # Figure を閉じる
161
+
162
+ # ----- Step5: 音声と合成する -----
163
+ # まずは AudioSegment -> WAV 化(ffmpeg が aac にエンコードするので可)
164
+ audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
165
+ audio.export(audio_path, format="wav")
166
+
167
+ output_path = tempfile.NamedTemporaryFile(delete=False, suffix="_output.mp4").name
168
+
169
+ ffmpeg_command = [
170
+ "ffmpeg", "-y",
171
+ "-i", video_path,
172
+ "-i", audio_path,
173
+ "-c:v", "copy",
174
+ "-c:a", "aac",
175
+ output_path
176
+ ]
177
+ subprocess.run(ffmpeg_command)
178
+
179
+ # ----- Step6: Streamlit に動画を表示 -----
180
+ st.video(output_path)
181
+
182
+ # ----- Cleanup -----
183
+ os.remove(video_path)
184
+ os.remove(audio_path)
185
+ os.remove(output_path)
186
+ os.remove(temp_mp3.name)
187
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
 
189
  if __name__ == "__main__":
190
  main()