Spaces:

naohiro701
/

Fourier_Transform_Visualizer

Sleeping

App Files Files Community

naohiro701 commited on Jan 16, 2025

Commit

f69cb22

verified ·

1 Parent(s): 90ea247

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -81

app.py CHANGED Viewed

@@ -10,136 +10,139 @@ from pydub import AudioSegment
 from scipy.fft import rfft, rfftfreq
 from scipy.signal import get_window
 def main():
-    st.title("フーリエ・サイケデリック・アート 2.0")
     uploaded_file = st.file_uploader("アートに変換する音声ファイルをアップロード (MP3)", type=["mp3"])
     if uploaded_file is not None:
-        st.write("**アップロード完了**。映像を生成中です…")
         # --- Step 1: MP3 -> WAV 変換 & numpy配列化 ---
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_mp3:
             temp_mp3.write(uploaded_file.read())
             audio = AudioSegment.from_file(temp_mp3.name)
-        # numpy配列に変換（モノラル & 正規化）
         samples = np.array(audio.get_array_of_samples(), dtype=float)
         sample_rate = audio.frame_rate
-        if audio.channels == 2:  # ステレオ → モノラル
             samples = samples.reshape((-1, 2)).mean(axis=1)
         max_val = np.max(np.abs(samples))
         if max_val != 0:
             samples /= max_val
         # --- パラメータ設定 ---
-        chunk_size = 2048
-        overlap = 1024
         step_size = chunk_size - overlap
-        # ウィンドウ（ハニング窓）
         window = get_window("hann", chunk_size)
-        # 何個のチャンクを処理するか
         n_chunks = (len(samples) - chunk_size) // step_size + 1
         if n_chunks < 1:
             st.error("音声が短すぎて処理できません。もう少し長い音源をアップロードしてください。")
             return
-        # 実際に処理する音声の長さ（秒）
         total_seconds = n_chunks * (step_size / sample_rate)
-        # 音声が長過ぎる場合はトリミングして同期
         if len(audio) > int(total_seconds * 1000):
             audio = audio[: int(total_seconds * 1000)]
-        # FFT用の周波数軸
-        freqs = rfftfreq(chunk_size, d=1/sample_rate)
-        # 全チャンク分の FFT をまとめて計算
-        fft_frames = []
         for i in range(n_chunks):
             start = i * step_size
             end = start + chunk_size
             chunk = samples[start:end] * window
-            spec = np.abs(rfft(chunk))
-            fft_frames.append(spec)
-        fft_frames = np.array(fft_frames)
-        max_fft = np.max(fft_frames)
-        # --- Step 2: Matplotlib アニメーション ---
-        # ポーラ座標に変換（周波数→角度, 振幅→半径）
-        fig = plt.figure(figsize=(6, 6))
-        ax = fig.add_subplot(111, projection="polar")
         ax.set_facecolor("black")
-        fig.patch.set_facecolor("black")
-        plt.axis("off")  # 軸を消してアート感を高める
-        # 初期フレーム
-        theta = freqs / np.max(freqs) * 2 * np.pi
-        r_init = fft_frames[0]
-        # 散布図 (Polar Scatter)
-        sc = ax.scatter(
-            theta,
-            r_init,
-            c=r_init,
-            cmap="plasma",       # ここを "hsv", "twilight", "rainbow" などに変えてもOK
-            alpha=0.8,
-            s=(np.sqrt(r_init) + 1) * 60,  # 振幅に応じてサイズ変化
-            edgecolors="none"
-        )
-        ax.set_ylim(0, max_fft)
-        # カラースケールの固定
-        sc.set_clim(0, max_fft)
         def update(frame):
-            # FFT振幅
-            new_r = fft_frames[frame]
-            # スワール回転角
-            swirl = 2 * np.pi * (frame / 100.0)
-            # 周波数→角度 + スワール
-            new_theta = (theta + swirl) % (2 * np.pi)
-            # 散布図を更新
-            sc.set_offsets(np.column_stack((new_theta, new_r)))
-            # カラーをちょっと派手に変化させたい場合
-            color_data = (new_r + swirl * 10) % max_fft
-            sc.set_array(color_data)
-            # マーカーサイズもフレームごとに変化
-            sc.set_sizes((np.sqrt(new_r) + 1) * 60)
-            return (sc,)
-        # フレームレートを計算 (映像の全長が音声と揃うように)
-        # 1フレーム = step_size サンプル分 → その長さ: step_size / sample_rate (秒)
-        # 全フレーム数 = n_chunks
-        # 全体秒数 = n_chunks * (step_size / sample_rate)
-        # fps = n_chunks / 全体秒数 = sample_rate / step_size
-        fps = sample_rate / step_size
         ani = FuncAnimation(
             fig,
             update,
             frames=n_chunks,
-            interval=1000 / fps,  # ミリ秒間隔
             blit=True
         )
-        # --- Step 3: 一時的に動画ファイル(mp4)として保存 ---
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
             ani.save(temp_video.name, fps=fps, extra_args=["-vcodec", "libx264"])
             video_path = temp_video.name
-        # --- Step 4: 切り詰め済みの音声を WAV にエクスポート ---
         audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
         audio.export(audio_path, format="wav")
-        # --- Step 5: FFmpeg で動画と音声を合成 ---
         output_path = tempfile.NamedTemporaryFile(delete=False, suffix="_output.mp4").name
         ffmpeg_command = [
             "ffmpeg", "-y",
@@ -147,16 +150,15 @@ def main():
             "-i", audio_path,
             "-c:v", "copy",
             "-c:a", "aac",
-            "-strict", "experimental",  # 必要に応じて
             output_path
         ]
         subprocess.run(ffmpeg_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        # --- Step 6: Streamlit で再生 ---
-        st.write("**生成完了！** 以下の動画をお楽しみください。")
         st.video(output_path)
-        # 後処理
         os.remove(temp_mp3.name)
         os.remove(video_path)
         os.remove(audio_path)

 from scipy.fft import rfft, rfftfreq
 from scipy.signal import get_window
+# 12 音階の名称
+NOTE_NAMES = ["C", "C#", "D", "D#", "E", "F",
+              "F#", "G", "G#", "A", "A#", "B"]
+def freq_to_note_index(freq, base_freq=440.0):
+    """
+    周波数 freq が A=440Hz を中心として、どの音名に該当するかを 0~11 のインデックスで返す。
+    freq <= 0 の場合は None を返す。
+    """
+    if freq <= 0:
+        return None
+    # A4(=440Hz) 基準で、半音インデックスを計算
+    # A4 からの半音数: n = 12*log2(freq/440.0)
+    semitone = round(12 * np.log2(freq / base_freq))
+    # 12 で割って 0~11 の音階にする（C=0, C#=1, … B=11 を想定するときは、調整が必要）
+    # ここでは A=9 として扱います。
+    # 例: A4 -> semitone=0 -> index=9 (NOTE_NAMES[9] == "A")
+    #     C4 -> semitone=-9 -> index=0 (NOTE_NAMES[0] == "C") など
+    note_index = (9 + semitone) % 12
+    return note_index
 def main():
+    st.title("ドレミ分解アニメーション")
     uploaded_file = st.file_uploader("アートに変換する音声ファイルをアップロード (MP3)", type=["mp3"])
     if uploaded_file is not None:
+        st.write("アップロード完了。動画を生成します...")
         # --- Step 1: MP3 -> WAV 変換 & numpy配列化 ---
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_mp3:
             temp_mp3.write(uploaded_file.read())
             audio = AudioSegment.from_file(temp_mp3.name)
+        # モノラルにして、振幅を -1~1 に正規化
         samples = np.array(audio.get_array_of_samples(), dtype=float)
         sample_rate = audio.frame_rate
+        if audio.channels == 2:
             samples = samples.reshape((-1, 2)).mean(axis=1)
         max_val = np.max(np.abs(samples))
         if max_val != 0:
             samples /= max_val
         # --- パラメータ設定 ---
+        chunk_size = 2048    # FFT サイズ
+        overlap = 1024       # オーバーラップ
         step_size = chunk_size - overlap
         window = get_window("hann", chunk_size)
+        # 処理チャンク数の算出
         n_chunks = (len(samples) - chunk_size) // step_size + 1
         if n_chunks < 1:
             st.error("音声が短すぎて処理できません。もう少し長い音源をアップロードしてください。")
             return
+        # 動画・音声の長さの同期用
         total_seconds = n_chunks * (step_size / sample_rate)
+        # 音声が長い場合は切り詰め
         if len(audio) > int(total_seconds * 1000):
             audio = audio[: int(total_seconds * 1000)]
+        # FFT 用周波数軸
+        freqs = rfftfreq(chunk_size, d=1.0 / sample_rate)
+        # --- Step 2: 各フレームごとに 12 音階のエネルギー量を集計 ---
+        note_energies_list = []
         for i in range(n_chunks):
             start = i * step_size
             end = start + chunk_size
             chunk = samples[start:end] * window
+            spectrum = np.abs(rfft(chunk))  # 0 ~ Nyquist 領域
+            # 12 音階分のエネルギーを格納するための配列
+            energies = np.zeros(12, dtype=float)
+            # 各周波数ビンに対してどのノートに属するか判定し、振幅を加算
+            for bin_idx, amp in enumerate(spectrum):
+                freq = freqs[bin_idx]
+                note_idx = freq_to_note_index(freq, base_freq=440.0)
+                if note_idx is not None:
+                    energies[note_idx] += amp
+            note_energies_list.append(energies)
+        note_energies_list = np.array(note_energies_list)
+        max_energy = np.max(note_energies_list)
+        # --- Step 3: Matplotlib アニメーションで棒グラフ表示 ---
+        fig, ax = plt.subplots(figsize=(6, 4))
+        fig.patch.set_facecolor("black")  # 背景を黒く
         ax.set_facecolor("black")
+        ax.set_ylim(0, max_energy * 1.1)  # 余裕を持たせた Y 軸上限
+        ax.set_xticks(range(12))
+        ax.set_xticklabels(NOTE_NAMES, color="white")
+        ax.tick_params(axis='y', colors='white')
+        ax.spines['bottom'].set_color('white')
+        ax.spines['left'].set_color('white')
+        ax.spines['top'].set_color('white')
+        ax.spines['right'].set_color('white')
+        # カラーマップを使って棒の色を一つずつ変える
+        cmap = plt.cm.get_cmap('rainbow', 12)
+        bar_colors = [cmap(i) for i in range(12)]
+        bars = ax.bar(range(12), note_energies_list[0], color=bar_colors)
         def update(frame):
+            energies = note_energies_list[frame]
+            for b, e in zip(bars, energies):
+                b.set_height(e)
+            return bars
+        # FPS の計算（アニメーションの秒数を音声に合わせる）
+        fps = sample_rate / step_size  # 1フレーム当たり step_size / sample_rate 秒
         ani = FuncAnimation(
             fig,
             update,
             frames=n_chunks,
+            interval=1000 / fps,  # ミリ秒
             blit=True
         )
+        # --- Step 4: Matplotlib アニメーションを一時的に mp4 に保存 ---
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
             ani.save(temp_video.name, fps=fps, extra_args=["-vcodec", "libx264"])
             video_path = temp_video.name
+        # --- Step 5: トリミング済み音声を WAV として出力 ---
         audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
         audio.export(audio_path, format="wav")
+        # --- Step 6: FFmpeg で動画と音声を合成 ---
         output_path = tempfile.NamedTemporaryFile(delete=False, suffix="_output.mp4").name
         ffmpeg_command = [
             "ffmpeg", "-y",
             "-i", audio_path,
             "-c:v", "copy",
             "-c:a", "aac",
             output_path
         ]
         subprocess.run(ffmpeg_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        # --- Step 7: Streamlit で動画を再生 ---
+        st.write("**生成した動画はこちら**")
         st.video(output_path)
+        # 後片付け
         os.remove(temp_mp3.name)
         os.remove(video_path)
         os.remove(audio_path)