Spaces:

naohiro701
/

Fourier_Transform_Visualizer

Sleeping

App Files Files Community

naohiro701 commited on Jan 15, 2025

Commit

86ef1c7

verified ·

1 Parent(s): c389cfc

Update app.py

Browse files

Files changed (1) hide show

app.py +172 -92

app.py CHANGED Viewed

@@ -1,110 +1,190 @@
 import numpy as np
 import matplotlib.pyplot as plt
-from matplotlib.animation import FuncAnimation
 from pydub import AudioSegment
-from scipy.fftpack import fft
 import streamlit as st
 import tempfile
 import os
 import subprocess
-# Define frequency ranges for musical notes based on 440Hz
-base_frequency = 440
-note_names = ["A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"]
-colors = plt.cm.hsv(np.linspace(0, 1, len(note_names)))
-# Map frequency to color based on 440Hz intervals
 def frequency_to_color(freq):
-    if freq < base_frequency / 2:
-        return "gray"  # Below audible range
-    interval_index = int(np.log2(freq / base_frequency) * 12) % len(note_names)
-    return colors[interval_index]
-# Streamlit App
 def main():
-    st.title("MP3 Fourier Transform Visualizer: ドレミドリブン")
     uploaded_file = st.file_uploader("音声ファイルをアップロード (MP3)", type=["mp3"])
-    if uploaded_file is not None:
-        # Convert MP3 to WAV for easier processing
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_mp3:
-            temp_mp3.write(uploaded_file.read())
-            audio = AudioSegment.from_file(temp_mp3.name)
-        samples = np.array(audio.get_array_of_samples())
-        sample_rate = audio.frame_rate
-        # Normalize samples
-        if audio.channels == 2:
-            samples = samples.reshape((-1, 2))
-            samples = samples.mean(axis=1)  # Convert to mono
-        # Define FFT parameters
-        chunk_size = 2048  # Number of samples per frame
-        overlap = 1024     # Overlap between frames
-        step_size = chunk_size - overlap
-        # Calculate the FFT for each chunk
-        freqs = np.fft.rfftfreq(chunk_size, d=1/sample_rate)
-        n_chunks = (len(samples) - chunk_size) // step_size + 1
-        fft_frames = []
-        for i in range(n_chunks):
-            chunk = samples[i * step_size:i * step_size + chunk_size]
-            windowed = chunk * np.hanning(len(chunk))
-            spectrum = np.abs(fft(windowed)[:len(freqs)])
-            fft_frames.append(spectrum)
-        fft_frames = np.array(fft_frames)
-        # Create animation with artistic elements
-        fig, ax = plt.subplots(facecolor="black")
-        line, = ax.plot(freqs, fft_frames[0], lw=2)
-        ax.set_xlim(0, np.max(freqs))
-        ax.set_ylim(0, np.max(fft_frames))
-        ax.set_xlabel("Frequency (Hz)", color="white")
-        ax.set_ylabel("Amplitude", color="white")
-        ax.set_title("音の周波数スペクトル", color="white")
-        ax.tick_params(colors="white")
-        fig.patch.set_facecolor("black")
-        # Highlight 440Hz intervals with vertical lines
-        for i in range(1, int(np.max(freqs) / base_frequency) + 1):
-            ax.axvline(i * base_frequency, color="white", linestyle="--", alpha=0.5)
-        def update(frame):
-            line.set_ydata(fft_frames[frame])
-            line.set_color(frequency_to_color(freqs[int(frame % len(freqs))]))
-            ax.set_facecolor(plt.cm.viridis(frame / len(fft_frames)))  # Dynamic background color
-            return line,
-        ani = FuncAnimation(fig, update, frames=len(fft_frames), blit=True)
-        # Save animation to a temporary file
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
-            ani.save(temp_video.name, fps=30, extra_args=['-vcodec', 'libx264'])
-            video_path = temp_video.name
-        # Merge audio and video using ffmpeg
-        output_path = "output_art_video.mp4"
-        audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
-        audio.export(audio_path, format="wav")
-        ffmpeg_command = [
-            "ffmpeg", "-y", "-i", video_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", output_path
-        ]
-        subprocess.run(ffmpeg_command)
-        st.success("動画を生成しました！以下のリンクからダウンロードできます。")
-        with open(output_path, "rb") as file:
-            st.download_button(label="動画をダウンロード", data=file, file_name="output_art_video.mp4", mime="video/mp4")
-        # Cleanup temporary files
-        os.remove(temp_mp3.name)
-        os.remove(video_path)
-        os.remove(audio_path)
 if __name__ == "__main__":
     main()

 import numpy as np
 import matplotlib.pyplot as plt
+from matplotlib.animation import FuncAnimation, FFMpegWriter
 from pydub import AudioSegment
+from scipy.fft import rfft, rfftfreq
 import streamlit as st
 import tempfile
 import os
 import subprocess
+# ----- 設定 -----
+BASE_FREQUENCY = 440  # ラ基準
+NOTE_NAMES = ["A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"]
 def frequency_to_color(freq):
+    """
+    周波数 freq をノートに変換し、そのノートの色を返すサンプル関数。
+    カラーマップは hsv を使用し、12 音を 0 ~ 1 に均等に割り振るイメージ。
+    """
+    # 周波数が低すぎる場合はグレーなどに
+    if freq < BASE_FREQUENCY / 2:
+        return (0.5, 0.5, 0.5)  # gray
+    semitone_index = int(round(12 * np.log2(freq / BASE_FREQUENCY)))
+    note_idx = semitone_index % 12
+    # HSV 空間で note_idx / 12 を色相に対応させる (S=1, V=1)
+    color_hsv = (note_idx / 12, 1.0, 1.0)
+    # matplotlib で hsv -> rgb 変換
+    import colorsys
+    return colorsys.hsv_to_rgb(*color_hsv)
 def main():
+    st.title("周波数特性アート動画生成")
     uploaded_file = st.file_uploader("音声ファイルをアップロード (MP3)", type=["mp3"])
+    if uploaded_file is None:
+        st.info("MP3ファイルをアップロードしてください。")
+        return
+    # ----- Step1: MP3 -> AudioSegment 変換 -----
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_mp3:
+        temp_mp3.write(uploaded_file.read())
+        audio = AudioSegment.from_file(temp_mp3.name)
+    # ----- Step2: numpy配列化 -----
+    samples = np.array(audio.get_array_of_samples(), dtype=float)
+    sample_rate = audio.frame_rate
+    # ステレオ→モノラル
+    if audio.channels == 2:
+        samples = samples.reshape((-1, 2)).mean(axis=1)
+    # 正規化（-1～1）
+    samples /= np.iinfo(audio.array_type).max
+    st.write(f"サンプリングレート: {sample_rate} Hz")
+    st.write(f"サンプル数: {len(samples)}")
+    # ----- FFTパラメータ -----
+    chunk_size = 2048
+    overlap = 1024
+    step_size = chunk_size - overlap
+    freqs = rfftfreq(chunk_size, d=1/sample_rate)
+    # チャンク数
+    n_chunks = (len(samples) - chunk_size) // step_size + 1
+    st.write(f"フレーム数: {n_chunks}")
+    # ----- 各チャンクのFFTを計算し、ピーク周波数とスペクトル総量を保存 -----
+    peak_freqs = []
+    total_amps = []  # 全周波数成分の合計（ざっくり振幅）
+    window = np.hanning(chunk_size)
+    for i in range(n_chunks):
+        start = i * step_size
+        end = start + chunk_size
+        chunk = samples[start:end] * window
+        spectrum = np.abs(rfft(chunk))
+        peak_index = np.argmax(spectrum)
+        peak_freq = freqs[peak_index]
+        peak_freqs.append(peak_freq)
+        total_amp = np.sum(spectrum)
+        total_amps.append(total_amp)
+    peak_freqs = np.array(peak_freqs)
+    total_amps = np.array(total_amps)
+    # ----- Step3: Matplotlib アニメーション作成 -----
+    fig, ax = plt.subplots(figsize=(6, 6), facecolor="black")
+    ax.set_facecolor("black")
+    ax.set_xlim(-1, 1)
+    ax.set_ylim(-1, 1)
+    ax.set_aspect("equal")
+    ax.axis("off")
+    scatter_plot = ax.scatter([], [], s=10, c=[], alpha=0.8)
+    # 描画用の座標（ランダムに散らした点を固定しておき、フレームごとに色とサイズを変えるなど）
+    # あるいはスパイラルを生成しておき、それを変化させる
+    num_points = 200
+    angles = np.linspace(0, 4 * np.pi, num_points)
+    radii = np.linspace(0.05, 0.5, num_points)
+    x_base = radii * np.cos(angles)
+    y_base = radii * np.sin(angles)
+    def init():
+        scatter_plot.set_offsets([])
+        scatter_plot.set_array(np.array([]))
+        return (scatter_plot,)
+    def update(frame):
+        # フレームに応じてピーク周波数 -> 色
+        p_freq = peak_freqs[frame]
+        c = frequency_to_color(p_freq)
+        # 総エネルギー -> 大きさのスケーリング
+        amp_scale = total_amps[frame]
+        # 過剰に大きくならないように対数スケールをかける
+        amp_scale = np.log10(amp_scale + 1)
+        # スパイラル座標をフレームごとにちょ���とずつ変形したり回転したりしてみる
+        theta_shift = 0.1 * frame
+        x = x_base * (1 + 0.05 * np.sin(theta_shift))
+        y = y_base * (1 + 0.05 * np.cos(theta_shift))
+        # 回転
+        rot = 0.05 * frame
+        cos_r = np.cos(rot)
+        sin_r = np.sin(rot)
+        x_rot = x * cos_r - y * sin_r
+        y_rot = x * sin_r + y * cos_r
+        # 散布図に設定
+        coords = np.column_stack((x_rot, y_rot))
+        scatter_plot.set_offsets(coords)
+        # サイズと色を更新 (全点同じ色、サイズも統一)
+        sizes = (30 + 200 * amp_scale) * np.ones(num_points)
+        scatter_plot.set_sizes(sizes)
+        colors = np.array([c for _ in range(num_points)])
+        scatter_plot.set_facecolor(colors)
+        return (scatter_plot,)
+    ani = FuncAnimation(
+        fig, update, frames=n_chunks, init_func=init, blit=True, interval=10
+    )
+    # ----- Step4: Matplotlib アニメーションを一時ファイルに保存 (.mp4) -----
+    video_temp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
+    video_path = video_temp.name
+    video_temp.close()
+    writer = FFMpegWriter(fps=30, codec="libx264")
+    ani.save(video_path, writer=writer, dpi=150)
+    plt.close(fig)  # Figure を閉じる
+    # ----- Step5: 音声と合成する -----
+    # まずは AudioSegment -> WAV 化（ffmpeg が aac にエンコードするので可）
+    audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
+    audio.export(audio_path, format="wav")
+    output_path = tempfile.NamedTemporaryFile(delete=False, suffix="_output.mp4").name
+    ffmpeg_command = [
+        "ffmpeg", "-y",
+        "-i", video_path,
+        "-i", audio_path,
+        "-c:v", "copy",
+        "-c:a", "aac",
+        output_path
+    ]
+    subprocess.run(ffmpeg_command)
+    # ----- Step6: Streamlit に動画を表示 -----
+    st.video(output_path)
+    # ----- Cleanup -----
+    os.remove(video_path)
+    os.remove(audio_path)
+    os.remove(output_path)
+    os.remove(temp_mp3.name)
 if __name__ == "__main__":
     main()