Spaces:

naohiro701
/

Fourier_Transform_Visualizer

Sleeping

App Files Files Community

naohiro701 commited on Jan 16, 2025

Commit

1fd1f2a

verified ·

1 Parent(s): f69cb22

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -45

app.py CHANGED Viewed

@@ -10,41 +10,44 @@ from pydub import AudioSegment
 from scipy.fft import rfft, rfftfreq
 from scipy.signal import get_window
-# 12 音階の名称
 NOTE_NAMES = ["C", "C#", "D", "D#", "E", "F",
               "F#", "G", "G#", "A", "A#", "B"]
 def freq_to_note_index(freq, base_freq=440.0):
     """
-    周波数 freq が A=440Hz を中心として、どの音名に該当するかを 0~11 のインデックスで返す。
-    freq <= 0 の場合は None を返す。
     """
     if freq <= 0:
         return None
-    # A4(=440Hz) 基準で、半音インデックスを計算
-    # A4 からの半音数: n = 12*log2(freq/440.0)
     semitone = round(12 * np.log2(freq / base_freq))
-    # 12 で割って 0~11 の音階にする（C=0, C#=1, … B=11 を想定するときは、調整が必要）
-    # ここでは A=9 として扱います。
-    # 例: A4 -> semitone=0 -> index=9 (NOTE_NAMES[9] == "A")
-    #     C4 -> semitone=-9 -> index=0 (NOTE_NAMES[0] == "C") など
     note_index = (9 + semitone) % 12
     return note_index
 def main():
-    st.title("ドレミ分解アニメーション")
-    uploaded_file = st.file_uploader("アートに変換する音声ファイルをアップロード (MP3)", type=["mp3"])
     if uploaded_file is not None:
-        st.write("アップロード完了。動画を生成します...")
-        # --- Step 1: MP3 -> WAV 変換 & numpy配列化 ---
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_mp3:
             temp_mp3.write(uploaded_file.read())
             audio = AudioSegment.from_file(temp_mp3.name)
-        # モノラルにして、振幅を -1~1 に正規化
         samples = np.array(audio.get_array_of_samples(), dtype=float)
         sample_rate = audio.frame_rate
         if audio.channels == 2:
@@ -53,40 +56,39 @@ def main():
         if max_val != 0:
             samples /= max_val
-        # --- パラメータ設定 ---
-        chunk_size = 2048    # FFT サイズ
-        overlap = 1024       # オーバーラップ
         step_size = chunk_size - overlap
         window = get_window("hann", chunk_size)
-        # 処理チャンク数の算出
         n_chunks = (len(samples) - chunk_size) // step_size + 1
         if n_chunks < 1:
-            st.error("音声が短すぎて処理できません。もう少し長い音源をアップロードしてください。")
             return
-        # 動画・音声の長さの同期用
         total_seconds = n_chunks * (step_size / sample_rate)
-        # 音声が長い場合は切り詰め
         if len(audio) > int(total_seconds * 1000):
             audio = audio[: int(total_seconds * 1000)]
-        # FFT 用周波数軸
         freqs = rfftfreq(chunk_size, d=1.0 / sample_rate)
-        # --- Step 2: 各フレームごとに 12 音階のエネルギー量を集計 ---
         note_energies_list = []
         for i in range(n_chunks):
             start = i * step_size
             end = start + chunk_size
             chunk = samples[start:end] * window
-            spectrum = np.abs(rfft(chunk))  # 0 ~ Nyquist 領域
-            # 12 音階分のエネルギーを格納するための配列
             energies = np.zeros(12, dtype=float)
-            # 各周波数ビンに対してどのノートに属するか判定し、振幅を加算
             for bin_idx, amp in enumerate(spectrum):
                 freq = freqs[bin_idx]
                 note_idx = freq_to_note_index(freq, base_freq=440.0)
@@ -98,23 +100,20 @@ def main():
         note_energies_list = np.array(note_energies_list)
         max_energy = np.max(note_energies_list)
-        # --- Step 3: Matplotlib アニメーションで棒グラフ表示 ---
         fig, ax = plt.subplots(figsize=(6, 4))
-        fig.patch.set_facecolor("black")  # 背景を黒く
         ax.set_facecolor("black")
-        ax.set_ylim(0, max_energy * 1.1)  # 余裕を持たせた Y 軸上限
         ax.set_xticks(range(12))
         ax.set_xticklabels(NOTE_NAMES, color="white")
         ax.tick_params(axis='y', colors='white')
-        ax.spines['bottom'].set_color('white')
-        ax.spines['left'].set_color('white')
-        ax.spines['top'].set_color('white')
-        ax.spines['right'].set_color('white')
-        # カラーマップを使って棒の色を一つずつ変える
         cmap = plt.cm.get_cmap('rainbow', 12)
         bar_colors = [cmap(i) for i in range(12)]
         bars = ax.bar(range(12), note_energies_list[0], color=bar_colors)
         def update(frame):
@@ -123,26 +122,26 @@ def main():
                 b.set_height(e)
             return bars
-        # FPS の計算（アニメーションの秒数を音声に合わせる）
-        fps = sample_rate / step_size  # 1フレーム当たり step_size / sample_rate 秒
         ani = FuncAnimation(
             fig,
             update,
             frames=n_chunks,
-            interval=1000 / fps,  # ミリ秒
             blit=True
         )
-        # --- Step 4: Matplotlib アニメーションを一時的に mp4 に保存 ---
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
             ani.save(temp_video.name, fps=fps, extra_args=["-vcodec", "libx264"])
             video_path = temp_video.name
-        # --- Step 5: トリミング済み音声を WAV として出力 ---
         audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
         audio.export(audio_path, format="wav")
-        # --- Step 6: FFmpeg で動画と音声を合成 ---
         output_path = tempfile.NamedTemporaryFile(delete=False, suffix="_output.mp4").name
         ffmpeg_command = [
             "ffmpeg", "-y",
@@ -154,11 +153,11 @@ def main():
         ]
         subprocess.run(ffmpeg_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        # --- Step 7: Streamlit で動画を再生 ---
-        st.write("**生成した動画はこちら**")
         st.video(output_path)
-        # 後片付け
         os.remove(temp_mp3.name)
         os.remove(video_path)
         os.remove(audio_path)

 from scipy.fft import rfft, rfftfreq
 from scipy.signal import get_window
+# Names for the 12 musical notes
 NOTE_NAMES = ["C", "C#", "D", "D#", "E", "F",
               "F#", "G", "G#", "A", "A#", "B"]
 def freq_to_note_index(freq, base_freq=440.0):
     """
+    Maps a frequency value (freq) to an index (0-11) corresponding to a musical note.
+    Uses A=440 Hz as the reference.
+    Returns None if freq <= 0.
+    For example:
+      - freq_to_note_index(440) -> 9  (which corresponds to "A")
+      - freq_to_note_index(261.63) -> 0 (approximately C)
     """
     if freq <= 0:
         return None
+    # Calculate the number of semitones relative to A4 = 440Hz
     semitone = round(12 * np.log2(freq / base_freq))
+    # Shift so that semitone=0 (A4) maps to index=9 within our 12-note array.
+    # This means:
+    #   index = (9 + semitone) mod 12
     note_index = (9 + semitone) % 12
     return note_index
 def main():
+    st.title("Doremi Frequency Decomposition Animation")
+    uploaded_file = st.file_uploader("Upload an MP3 file to analyze:", type=["mp3"])
     if uploaded_file is not None:
+        st.write("File uploaded. Generating video...")
+        # --- Step 1: Convert MP3 to WAV & load into a NumPy array ---
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_mp3:
             temp_mp3.write(uploaded_file.read())
             audio = AudioSegment.from_file(temp_mp3.name)
+        # Convert stereo to mono and normalize to -1..+1
         samples = np.array(audio.get_array_of_samples(), dtype=float)
         sample_rate = audio.frame_rate
         if audio.channels == 2:
         if max_val != 0:
             samples /= max_val
+        # --- Parameters for the FFT ---
+        chunk_size = 2048       # FFT size
+        overlap = 1024          # Overlap
         step_size = chunk_size - overlap
         window = get_window("hann", chunk_size)
+        # Calculate how many chunks we can process
         n_chunks = (len(samples) - chunk_size) // step_size + 1
         if n_chunks < 1:
+            st.error("Audio is too short to process. Please upload a longer file.")
             return
+        # Determine how long the resulting animation will be (in seconds)
         total_seconds = n_chunks * (step_size / sample_rate)
+        # If the audio is longer than total_seconds, trim it to match
         if len(audio) > int(total_seconds * 1000):
             audio = audio[: int(total_seconds * 1000)]
+        # Frequency axis for the FFT
         freqs = rfftfreq(chunk_size, d=1.0 / sample_rate)
+        # --- Step 2: Compute the energies in the 12 notes for each chunk ---
         note_energies_list = []
         for i in range(n_chunks):
             start = i * step_size
             end = start + chunk_size
             chunk = samples[start:end] * window
+            spectrum = np.abs(rfft(chunk))  # Magnitude spectrum (0..Nyquist)
             energies = np.zeros(12, dtype=float)
+            # For each frequency bin, find which note it belongs to, then accumulate amplitude
             for bin_idx, amp in enumerate(spectrum):
                 freq = freqs[bin_idx]
                 note_idx = freq_to_note_index(freq, base_freq=440.0)
         note_energies_list = np.array(note_energies_list)
         max_energy = np.max(note_energies_list)
+        # --- Step 3: Create a bar-chart animation with Matplotlib ---
         fig, ax = plt.subplots(figsize=(6, 4))
+        fig.patch.set_facecolor("black")
         ax.set_facecolor("black")
+        ax.set_ylim(0, max_energy * 1.1)  # Y-limit to accommodate peaks
         ax.set_xticks(range(12))
         ax.set_xticklabels(NOTE_NAMES, color="white")
         ax.tick_params(axis='y', colors='white')
+        for spine in ax.spines.values():
+            spine.set_color('white')
+        # Use a color map for the bars
         cmap = plt.cm.get_cmap('rainbow', 12)
         bar_colors = [cmap(i) for i in range(12)]
         bars = ax.bar(range(12), note_energies_list[0], color=bar_colors)
         def update(frame):
                 b.set_height(e)
             return bars
+        # Calculate FPS so video length matches audio length
+        fps = sample_rate / step_size
         ani = FuncAnimation(
             fig,
             update,
             frames=n_chunks,
+            interval=1000 / fps,  # interval in milliseconds
             blit=True
         )
+        # --- Step 4: Save the Matplotlib animation as MP4 ---
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
             ani.save(temp_video.name, fps=fps, extra_args=["-vcodec", "libx264"])
             video_path = temp_video.name
+        # --- Step 5: Export the trimmed audio to WAV ---
         audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
         audio.export(audio_path, format="wav")
+        # --- Step 6: Merge video and audio with FFmpeg ---
         output_path = tempfile.NamedTemporaryFile(delete=False, suffix="_output.mp4").name
         ffmpeg_command = [
             "ffmpeg", "-y",
         ]
         subprocess.run(ffmpeg_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        # --- Step 7: Display the final video in Streamlit ---
+        st.write("**Here is your Doremi decomposition video:**")
         st.video(output_path)
+        # Cleanup temp files
         os.remove(temp_mp3.name)
         os.remove(video_path)
         os.remove(audio_path)