naohiro701 commited on
Commit
1fd1f2a
·
verified ·
1 Parent(s): f69cb22

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -45
app.py CHANGED
@@ -10,41 +10,44 @@ from pydub import AudioSegment
10
  from scipy.fft import rfft, rfftfreq
11
  from scipy.signal import get_window
12
 
13
- # 12 音階の名称
14
  NOTE_NAMES = ["C", "C#", "D", "D#", "E", "F",
15
  "F#", "G", "G#", "A", "A#", "B"]
16
 
17
  def freq_to_note_index(freq, base_freq=440.0):
18
  """
19
- 周波数 freq A=440Hz を中心として、どの音名に該当するかを 0~11 のインデックスで返す。
20
- freq <= 0 の場合は None を返す。
 
 
 
 
 
21
  """
22
  if freq <= 0:
23
  return None
24
- # A4(=440Hz) 基準で、半音インデックスを計算
25
- # A4 からの半音数: n = 12*log2(freq/440.0)
26
  semitone = round(12 * np.log2(freq / base_freq))
27
- # 12 で割って 0~11 の音階にする(C=0, C#=1, B=11 を想定するときは、調整が必要)
28
- # ここでは A=9 として扱います。
29
- # 例: A4 -> semitone=0 -> index=9 (NOTE_NAMES[9] == "A")
30
- # C4 -> semitone=-9 -> index=0 (NOTE_NAMES[0] == "C") など
31
  note_index = (9 + semitone) % 12
32
  return note_index
33
 
34
  def main():
35
- st.title("ドレミ分解アニメーション")
36
 
37
- uploaded_file = st.file_uploader("アートに変換する音声ファイルをアップロード (MP3)", type=["mp3"])
38
 
39
  if uploaded_file is not None:
40
- st.write("アップロード完了。動画を生成します...")
41
 
42
- # --- Step 1: MP3 -> WAV 変換 & numpy配列化 ---
43
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_mp3:
44
  temp_mp3.write(uploaded_file.read())
45
  audio = AudioSegment.from_file(temp_mp3.name)
46
 
47
- # モノラルにして、振幅を -1~1 に正規化
48
  samples = np.array(audio.get_array_of_samples(), dtype=float)
49
  sample_rate = audio.frame_rate
50
  if audio.channels == 2:
@@ -53,40 +56,39 @@ def main():
53
  if max_val != 0:
54
  samples /= max_val
55
 
56
- # --- パラメータ設定 ---
57
- chunk_size = 2048 # FFT サイズ
58
- overlap = 1024 # オーバーラップ
59
  step_size = chunk_size - overlap
60
  window = get_window("hann", chunk_size)
61
 
62
- # 処理チャンク数の算出
63
  n_chunks = (len(samples) - chunk_size) // step_size + 1
64
  if n_chunks < 1:
65
- st.error("音声が短すぎて処理できません。もう少し長い音源をアップロードしてください。")
66
  return
67
 
68
- # 動画・音声の長さの同期用
69
  total_seconds = n_chunks * (step_size / sample_rate)
70
- # 音声が長い場合は切り詰め
 
71
  if len(audio) > int(total_seconds * 1000):
72
  audio = audio[: int(total_seconds * 1000)]
73
 
74
- # FFT 用周波数軸
75
  freqs = rfftfreq(chunk_size, d=1.0 / sample_rate)
76
 
77
- # --- Step 2: 各フレームごとに 12 音階のエネルギー量を集計 ---
78
  note_energies_list = []
79
  for i in range(n_chunks):
80
  start = i * step_size
81
  end = start + chunk_size
82
  chunk = samples[start:end] * window
83
 
84
- spectrum = np.abs(rfft(chunk)) # 0 ~ Nyquist 領域
85
 
86
- # 12 音階分のエネルギーを格納するための配列
87
  energies = np.zeros(12, dtype=float)
88
-
89
- # 各周波数ビンに対してどのノートに属するか判定し、振幅を加算
90
  for bin_idx, amp in enumerate(spectrum):
91
  freq = freqs[bin_idx]
92
  note_idx = freq_to_note_index(freq, base_freq=440.0)
@@ -98,23 +100,20 @@ def main():
98
  note_energies_list = np.array(note_energies_list)
99
  max_energy = np.max(note_energies_list)
100
 
101
- # --- Step 3: Matplotlib アニメーションで棒グラフ表示 ---
102
  fig, ax = plt.subplots(figsize=(6, 4))
103
- fig.patch.set_facecolor("black") # 背景を黒く
104
  ax.set_facecolor("black")
105
- ax.set_ylim(0, max_energy * 1.1) # 余裕を持たせた Y 軸上限
106
  ax.set_xticks(range(12))
107
  ax.set_xticklabels(NOTE_NAMES, color="white")
108
  ax.tick_params(axis='y', colors='white')
109
- ax.spines['bottom'].set_color('white')
110
- ax.spines['left'].set_color('white')
111
- ax.spines['top'].set_color('white')
112
- ax.spines['right'].set_color('white')
113
 
114
- # カラーマップを使って棒の色を一つずつ変える
115
  cmap = plt.cm.get_cmap('rainbow', 12)
116
  bar_colors = [cmap(i) for i in range(12)]
117
-
118
  bars = ax.bar(range(12), note_energies_list[0], color=bar_colors)
119
 
120
  def update(frame):
@@ -123,26 +122,26 @@ def main():
123
  b.set_height(e)
124
  return bars
125
 
126
- # FPS の計算(アニメーションの秒数を音声に合わせる)
127
- fps = sample_rate / step_size # 1フレーム当たり step_size / sample_rate 秒
128
  ani = FuncAnimation(
129
  fig,
130
  update,
131
  frames=n_chunks,
132
- interval=1000 / fps, # ミリ秒
133
  blit=True
134
  )
135
 
136
- # --- Step 4: Matplotlib アニメーションを一時的に mp4 に保存 ---
137
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
138
  ani.save(temp_video.name, fps=fps, extra_args=["-vcodec", "libx264"])
139
  video_path = temp_video.name
140
 
141
- # --- Step 5: トリミング済み音声を WAV として出力 ---
142
  audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
143
  audio.export(audio_path, format="wav")
144
 
145
- # --- Step 6: FFmpeg で動画と音声を合成 ---
146
  output_path = tempfile.NamedTemporaryFile(delete=False, suffix="_output.mp4").name
147
  ffmpeg_command = [
148
  "ffmpeg", "-y",
@@ -154,11 +153,11 @@ def main():
154
  ]
155
  subprocess.run(ffmpeg_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
156
 
157
- # --- Step 7: Streamlit で動画を再生 ---
158
- st.write("**生成した動画はこちら**")
159
  st.video(output_path)
160
 
161
- # 後片付け
162
  os.remove(temp_mp3.name)
163
  os.remove(video_path)
164
  os.remove(audio_path)
 
10
  from scipy.fft import rfft, rfftfreq
11
  from scipy.signal import get_window
12
 
13
+ # Names for the 12 musical notes
14
  NOTE_NAMES = ["C", "C#", "D", "D#", "E", "F",
15
  "F#", "G", "G#", "A", "A#", "B"]
16
 
17
  def freq_to_note_index(freq, base_freq=440.0):
18
  """
19
+ Maps a frequency value (freq) to an index (0-11) corresponding to a musical note.
20
+ Uses A=440 Hz as the reference.
21
+
22
+ Returns None if freq <= 0.
23
+ For example:
24
+ - freq_to_note_index(440) -> 9 (which corresponds to "A")
25
+ - freq_to_note_index(261.63) -> 0 (approximately C)
26
  """
27
  if freq <= 0:
28
  return None
29
+ # Calculate the number of semitones relative to A4 = 440Hz
 
30
  semitone = round(12 * np.log2(freq / base_freq))
31
+ # Shift so that semitone=0 (A4) maps to index=9 within our 12-note array.
32
+ # This means:
33
+ # index = (9 + semitone) mod 12
 
34
  note_index = (9 + semitone) % 12
35
  return note_index
36
 
37
  def main():
38
+ st.title("Doremi Frequency Decomposition Animation")
39
 
40
+ uploaded_file = st.file_uploader("Upload an MP3 file to analyze:", type=["mp3"])
41
 
42
  if uploaded_file is not None:
43
+ st.write("File uploaded. Generating video...")
44
 
45
+ # --- Step 1: Convert MP3 to WAV & load into a NumPy array ---
46
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_mp3:
47
  temp_mp3.write(uploaded_file.read())
48
  audio = AudioSegment.from_file(temp_mp3.name)
49
 
50
+ # Convert stereo to mono and normalize to -1..+1
51
  samples = np.array(audio.get_array_of_samples(), dtype=float)
52
  sample_rate = audio.frame_rate
53
  if audio.channels == 2:
 
56
  if max_val != 0:
57
  samples /= max_val
58
 
59
+ # --- Parameters for the FFT ---
60
+ chunk_size = 2048 # FFT size
61
+ overlap = 1024 # Overlap
62
  step_size = chunk_size - overlap
63
  window = get_window("hann", chunk_size)
64
 
65
+ # Calculate how many chunks we can process
66
  n_chunks = (len(samples) - chunk_size) // step_size + 1
67
  if n_chunks < 1:
68
+ st.error("Audio is too short to process. Please upload a longer file.")
69
  return
70
 
71
+ # Determine how long the resulting animation will be (in seconds)
72
  total_seconds = n_chunks * (step_size / sample_rate)
73
+
74
+ # If the audio is longer than total_seconds, trim it to match
75
  if len(audio) > int(total_seconds * 1000):
76
  audio = audio[: int(total_seconds * 1000)]
77
 
78
+ # Frequency axis for the FFT
79
  freqs = rfftfreq(chunk_size, d=1.0 / sample_rate)
80
 
81
+ # --- Step 2: Compute the energies in the 12 notes for each chunk ---
82
  note_energies_list = []
83
  for i in range(n_chunks):
84
  start = i * step_size
85
  end = start + chunk_size
86
  chunk = samples[start:end] * window
87
 
88
+ spectrum = np.abs(rfft(chunk)) # Magnitude spectrum (0..Nyquist)
89
 
 
90
  energies = np.zeros(12, dtype=float)
91
+ # For each frequency bin, find which note it belongs to, then accumulate amplitude
 
92
  for bin_idx, amp in enumerate(spectrum):
93
  freq = freqs[bin_idx]
94
  note_idx = freq_to_note_index(freq, base_freq=440.0)
 
100
  note_energies_list = np.array(note_energies_list)
101
  max_energy = np.max(note_energies_list)
102
 
103
+ # --- Step 3: Create a bar-chart animation with Matplotlib ---
104
  fig, ax = plt.subplots(figsize=(6, 4))
105
+ fig.patch.set_facecolor("black")
106
  ax.set_facecolor("black")
107
+ ax.set_ylim(0, max_energy * 1.1) # Y-limit to accommodate peaks
108
  ax.set_xticks(range(12))
109
  ax.set_xticklabels(NOTE_NAMES, color="white")
110
  ax.tick_params(axis='y', colors='white')
111
+ for spine in ax.spines.values():
112
+ spine.set_color('white')
 
 
113
 
114
+ # Use a color map for the bars
115
  cmap = plt.cm.get_cmap('rainbow', 12)
116
  bar_colors = [cmap(i) for i in range(12)]
 
117
  bars = ax.bar(range(12), note_energies_list[0], color=bar_colors)
118
 
119
  def update(frame):
 
122
  b.set_height(e)
123
  return bars
124
 
125
+ # Calculate FPS so video length matches audio length
126
+ fps = sample_rate / step_size
127
  ani = FuncAnimation(
128
  fig,
129
  update,
130
  frames=n_chunks,
131
+ interval=1000 / fps, # interval in milliseconds
132
  blit=True
133
  )
134
 
135
+ # --- Step 4: Save the Matplotlib animation as MP4 ---
136
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
137
  ani.save(temp_video.name, fps=fps, extra_args=["-vcodec", "libx264"])
138
  video_path = temp_video.name
139
 
140
+ # --- Step 5: Export the trimmed audio to WAV ---
141
  audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
142
  audio.export(audio_path, format="wav")
143
 
144
+ # --- Step 6: Merge video and audio with FFmpeg ---
145
  output_path = tempfile.NamedTemporaryFile(delete=False, suffix="_output.mp4").name
146
  ffmpeg_command = [
147
  "ffmpeg", "-y",
 
153
  ]
154
  subprocess.run(ffmpeg_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
155
 
156
+ # --- Step 7: Display the final video in Streamlit ---
157
+ st.write("**Here is your Doremi decomposition video:**")
158
  st.video(output_path)
159
 
160
+ # Cleanup temp files
161
  os.remove(temp_mp3.name)
162
  os.remove(video_path)
163
  os.remove(audio_path)