naohiro701 commited on
Commit
c389cfc
·
verified ·
1 Parent(s): d2533af

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -83
app.py CHANGED
@@ -1,115 +1,110 @@
1
  import numpy as np
2
  import matplotlib.pyplot as plt
 
 
 
3
  import streamlit as st
4
  import tempfile
5
  import os
6
- from pydub import AudioSegment
7
- from scipy.signal import get_window
8
- from scipy.fft import rfft, rfftfreq
9
- import plotly.express as px
10
 
11
  # Define frequency ranges for musical notes based on 440Hz
12
  base_frequency = 440
13
  note_names = ["A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"]
 
14
 
15
- def frequency_to_note(freq):
16
- """周波数 freq がどのノートに近いかを返す関数。"""
17
- if freq <= 0:
18
- return None
19
- # A4(=440Hz) をゼロとした半音インデックスを求める
20
- semitone_index = int(round(12 * np.log2(freq / base_frequency)))
21
- note_name = note_names[semitone_index % 12]
22
- octave = 4 + (semitone_index // 12)
23
- return f"{note_name}{octave}"
24
 
 
25
  def main():
26
- st.title("スペクトログラム&ピーク周波数可視化")
27
 
28
  uploaded_file = st.file_uploader("音声ファイルをアップロード (MP3)", type=["mp3"])
29
-
30
  if uploaded_file is not None:
31
  # Convert MP3 to WAV for easier processing
32
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_mp3:
33
  temp_mp3.write(uploaded_file.read())
34
  audio = AudioSegment.from_file(temp_mp3.name)
35
 
36
- samples = np.array(audio.get_array_of_samples(), dtype=float)
37
  sample_rate = audio.frame_rate
38
 
39
- # Convert stereo to mono if needed
40
  if audio.channels == 2:
41
  samples = samples.reshape((-1, 2))
42
- samples = samples.mean(axis=1)
43
-
44
- # Normalize to [-1, 1]
45
- samples /= np.iinfo(audio.array_type).max
46
-
47
- st.write(f"サンプリングレート: {sample_rate} Hz")
48
- st.write(f"サンプル数: {len(samples)}")
49
-
50
- # Parameters
51
- chunk_size = st.sidebar.slider("FFTサイズ (Chunk Size)", min_value=1024, max_value=8192, value=2048, step=1024)
52
- overlap = st.sidebar.slider("オーバーラップ (Overlap)", min_value=0, max_value=chunk_size-1, value=1024, step=256)
53
-
54
- window_type = st.sidebar.selectbox("ウィンドウ関数の種類", ["hann", "hamming", "blackman", "rect"])
55
- if window_type == "rect":
56
- window = np.ones(chunk_size)
57
- else:
58
- window = get_window(window_type, chunk_size)
59
-
60
  step_size = chunk_size - overlap
61
- freqs = rfftfreq(chunk_size, d=1/sample_rate)
62
-
63
- # Calculate number of chunks
64
- n_chunks = (len(samples) - chunk_size) // step_size + 1
65
 
66
- # Prepare array for spectrogram: shape (n_chunks, len(freqs))
67
- spec_data = np.zeros((n_chunks, len(freqs)))
68
- peak_freqs = []
 
69
 
70
  for i in range(n_chunks):
71
- start = i * step_size
72
- end = start + chunk_size
73
- chunk = samples[start:end] * window
74
-
75
- spec = np.abs(rfft(chunk))
76
- spec_data[i, :] = spec
77
-
78
- # ピーク周波数を取得
79
- peak_index = np.argmax(spec)
80
- peak_frequency = freqs[peak_index]
81
- peak_freqs.append(peak_frequency)
82
-
83
- # フレームごとのピーク周波数をノート名に変換
84
- peak_notes = [frequency_to_note(f) for f in peak_freqs]
85
-
86
- # スペクトログラム描画 (plotly)
87
- # spec_data: shape (time, frequency)
88
- fig = px.imshow(
89
- 20 * np.log10(spec_data + 1e-8), # dBスケールで可視化
90
- origin='lower',
91
- aspect='auto',
92
- labels=dict(x="Time (frames)", y="Frequency Bin", color="Magnitude (dB)"),
93
- color_continuous_scale='viridis'
94
- )
95
- st.plotly_chart(fig, use_container_width=True)
96
-
97
- # ピーク周波数のラインプロット
98
- time_axis = np.arange(n_chunks)
99
- fig_peak = px.line(
100
- x=time_axis, y=peak_freqs,
101
- title="フレームごとのピーク周波数",
102
- labels={"x":"Frame index", "y":"Peak Frequency (Hz)"}
103
- )
104
-
105
- # ピークノート(テキスト表示用)
106
- note_text = [pn if pn is not None else "" for pn in peak_notes]
107
- fig_peak.add_scatter(x=time_axis, y=peak_freqs, mode="text", text=note_text, textposition="top center")
108
-
109
- st.plotly_chart(fig_peak, use_container_width=True)
110
-
111
- # 一時ファイルの削除
 
 
 
 
 
 
 
 
 
112
  os.remove(temp_mp3.name)
 
 
113
 
114
  if __name__ == "__main__":
115
  main()
 
1
  import numpy as np
2
  import matplotlib.pyplot as plt
3
+ from matplotlib.animation import FuncAnimation
4
+ from pydub import AudioSegment
5
+ from scipy.fftpack import fft
6
  import streamlit as st
7
  import tempfile
8
  import os
9
+ import subprocess
 
 
 
10
 
11
  # Define frequency ranges for musical notes based on 440Hz
12
  base_frequency = 440
13
  note_names = ["A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"]
14
+ colors = plt.cm.hsv(np.linspace(0, 1, len(note_names)))
15
 
16
+ # Map frequency to color based on 440Hz intervals
17
+ def frequency_to_color(freq):
18
+ if freq < base_frequency / 2:
19
+ return "gray" # Below audible range
20
+
21
+ interval_index = int(np.log2(freq / base_frequency) * 12) % len(note_names)
22
+ return colors[interval_index]
 
 
23
 
24
+ # Streamlit App
25
  def main():
26
+ st.title("MP3 Fourier Transform Visualizer: ドレミドリブン")
27
 
28
  uploaded_file = st.file_uploader("音声ファイルをアップロード (MP3)", type=["mp3"])
29
+
30
  if uploaded_file is not None:
31
  # Convert MP3 to WAV for easier processing
32
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_mp3:
33
  temp_mp3.write(uploaded_file.read())
34
  audio = AudioSegment.from_file(temp_mp3.name)
35
 
36
+ samples = np.array(audio.get_array_of_samples())
37
  sample_rate = audio.frame_rate
38
 
39
+ # Normalize samples
40
  if audio.channels == 2:
41
  samples = samples.reshape((-1, 2))
42
+ samples = samples.mean(axis=1) # Convert to mono
43
+
44
+ # Define FFT parameters
45
+ chunk_size = 2048 # Number of samples per frame
46
+ overlap = 1024 # Overlap between frames
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  step_size = chunk_size - overlap
 
 
 
 
48
 
49
+ # Calculate the FFT for each chunk
50
+ freqs = np.fft.rfftfreq(chunk_size, d=1/sample_rate)
51
+ n_chunks = (len(samples) - chunk_size) // step_size + 1
52
+ fft_frames = []
53
 
54
  for i in range(n_chunks):
55
+ chunk = samples[i * step_size:i * step_size + chunk_size]
56
+ windowed = chunk * np.hanning(len(chunk))
57
+ spectrum = np.abs(fft(windowed)[:len(freqs)])
58
+ fft_frames.append(spectrum)
59
+
60
+ fft_frames = np.array(fft_frames)
61
+
62
+ # Create animation with artistic elements
63
+ fig, ax = plt.subplots(facecolor="black")
64
+ line, = ax.plot(freqs, fft_frames[0], lw=2)
65
+ ax.set_xlim(0, np.max(freqs))
66
+ ax.set_ylim(0, np.max(fft_frames))
67
+ ax.set_xlabel("Frequency (Hz)", color="white")
68
+ ax.set_ylabel("Amplitude", color="white")
69
+ ax.set_title("音の周波数スペクトル", color="white")
70
+ ax.tick_params(colors="white")
71
+ fig.patch.set_facecolor("black")
72
+
73
+ # Highlight 440Hz intervals with vertical lines
74
+ for i in range(1, int(np.max(freqs) / base_frequency) + 1):
75
+ ax.axvline(i * base_frequency, color="white", linestyle="--", alpha=0.5)
76
+
77
+ def update(frame):
78
+ line.set_ydata(fft_frames[frame])
79
+ line.set_color(frequency_to_color(freqs[int(frame % len(freqs))]))
80
+ ax.set_facecolor(plt.cm.viridis(frame / len(fft_frames))) # Dynamic background color
81
+ return line,
82
+
83
+ ani = FuncAnimation(fig, update, frames=len(fft_frames), blit=True)
84
+
85
+ # Save animation to a temporary file
86
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
87
+ ani.save(temp_video.name, fps=30, extra_args=['-vcodec', 'libx264'])
88
+ video_path = temp_video.name
89
+
90
+ # Merge audio and video using ffmpeg
91
+ output_path = "output_art_video.mp4"
92
+ audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
93
+ audio.export(audio_path, format="wav")
94
+
95
+ ffmpeg_command = [
96
+ "ffmpeg", "-y", "-i", video_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", output_path
97
+ ]
98
+ subprocess.run(ffmpeg_command)
99
+
100
+ st.success("動画を生成しました!以下のリンクからダウンロードできます。")
101
+ with open(output_path, "rb") as file:
102
+ st.download_button(label="動画をダウンロード", data=file, file_name="output_art_video.mp4", mime="video/mp4")
103
+
104
+ # Cleanup temporary files
105
  os.remove(temp_mp3.name)
106
+ os.remove(video_path)
107
+ os.remove(audio_path)
108
 
109
  if __name__ == "__main__":
110
  main()