Update app.py
Browse files
app.py
CHANGED
|
@@ -10,41 +10,44 @@ from pydub import AudioSegment
|
|
| 10 |
from scipy.fft import rfft, rfftfreq
|
| 11 |
from scipy.signal import get_window
|
| 12 |
|
| 13 |
-
# 12
|
| 14 |
NOTE_NAMES = ["C", "C#", "D", "D#", "E", "F",
|
| 15 |
"F#", "G", "G#", "A", "A#", "B"]
|
| 16 |
|
| 17 |
def freq_to_note_index(freq, base_freq=440.0):
|
| 18 |
"""
|
| 19 |
-
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
"""
|
| 22 |
if freq <= 0:
|
| 23 |
return None
|
| 24 |
-
# A4
|
| 25 |
-
# A4 からの半音数: n = 12*log2(freq/440.0)
|
| 26 |
semitone = round(12 * np.log2(freq / base_freq))
|
| 27 |
-
#
|
| 28 |
-
#
|
| 29 |
-
#
|
| 30 |
-
# C4 -> semitone=-9 -> index=0 (NOTE_NAMES[0] == "C") など
|
| 31 |
note_index = (9 + semitone) % 12
|
| 32 |
return note_index
|
| 33 |
|
| 34 |
def main():
|
| 35 |
-
st.title("
|
| 36 |
|
| 37 |
-
uploaded_file = st.file_uploader("
|
| 38 |
|
| 39 |
if uploaded_file is not None:
|
| 40 |
-
st.write("
|
| 41 |
|
| 42 |
-
# --- Step 1: MP3
|
| 43 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_mp3:
|
| 44 |
temp_mp3.write(uploaded_file.read())
|
| 45 |
audio = AudioSegment.from_file(temp_mp3.name)
|
| 46 |
|
| 47 |
-
#
|
| 48 |
samples = np.array(audio.get_array_of_samples(), dtype=float)
|
| 49 |
sample_rate = audio.frame_rate
|
| 50 |
if audio.channels == 2:
|
|
@@ -53,40 +56,39 @@ def main():
|
|
| 53 |
if max_val != 0:
|
| 54 |
samples /= max_val
|
| 55 |
|
| 56 |
-
# ---
|
| 57 |
-
chunk_size = 2048
|
| 58 |
-
overlap = 1024
|
| 59 |
step_size = chunk_size - overlap
|
| 60 |
window = get_window("hann", chunk_size)
|
| 61 |
|
| 62 |
-
#
|
| 63 |
n_chunks = (len(samples) - chunk_size) // step_size + 1
|
| 64 |
if n_chunks < 1:
|
| 65 |
-
st.error("
|
| 66 |
return
|
| 67 |
|
| 68 |
-
#
|
| 69 |
total_seconds = n_chunks * (step_size / sample_rate)
|
| 70 |
-
|
|
|
|
| 71 |
if len(audio) > int(total_seconds * 1000):
|
| 72 |
audio = audio[: int(total_seconds * 1000)]
|
| 73 |
|
| 74 |
-
# FFT
|
| 75 |
freqs = rfftfreq(chunk_size, d=1.0 / sample_rate)
|
| 76 |
|
| 77 |
-
# --- Step 2:
|
| 78 |
note_energies_list = []
|
| 79 |
for i in range(n_chunks):
|
| 80 |
start = i * step_size
|
| 81 |
end = start + chunk_size
|
| 82 |
chunk = samples[start:end] * window
|
| 83 |
|
| 84 |
-
spectrum = np.abs(rfft(chunk)) #
|
| 85 |
|
| 86 |
-
# 12 音階分のエネルギーを格納するための配列
|
| 87 |
energies = np.zeros(12, dtype=float)
|
| 88 |
-
|
| 89 |
-
# 各周波数ビンに対してどのノートに属するか判定し、振幅を加算
|
| 90 |
for bin_idx, amp in enumerate(spectrum):
|
| 91 |
freq = freqs[bin_idx]
|
| 92 |
note_idx = freq_to_note_index(freq, base_freq=440.0)
|
|
@@ -98,23 +100,20 @@ def main():
|
|
| 98 |
note_energies_list = np.array(note_energies_list)
|
| 99 |
max_energy = np.max(note_energies_list)
|
| 100 |
|
| 101 |
-
# --- Step 3: Matplotlib
|
| 102 |
fig, ax = plt.subplots(figsize=(6, 4))
|
| 103 |
-
fig.patch.set_facecolor("black")
|
| 104 |
ax.set_facecolor("black")
|
| 105 |
-
ax.set_ylim(0, max_energy * 1.1) #
|
| 106 |
ax.set_xticks(range(12))
|
| 107 |
ax.set_xticklabels(NOTE_NAMES, color="white")
|
| 108 |
ax.tick_params(axis='y', colors='white')
|
| 109 |
-
ax.spines
|
| 110 |
-
|
| 111 |
-
ax.spines['top'].set_color('white')
|
| 112 |
-
ax.spines['right'].set_color('white')
|
| 113 |
|
| 114 |
-
#
|
| 115 |
cmap = plt.cm.get_cmap('rainbow', 12)
|
| 116 |
bar_colors = [cmap(i) for i in range(12)]
|
| 117 |
-
|
| 118 |
bars = ax.bar(range(12), note_energies_list[0], color=bar_colors)
|
| 119 |
|
| 120 |
def update(frame):
|
|
@@ -123,26 +122,26 @@ def main():
|
|
| 123 |
b.set_height(e)
|
| 124 |
return bars
|
| 125 |
|
| 126 |
-
# FPS
|
| 127 |
-
fps = sample_rate / step_size
|
| 128 |
ani = FuncAnimation(
|
| 129 |
fig,
|
| 130 |
update,
|
| 131 |
frames=n_chunks,
|
| 132 |
-
interval=1000 / fps, #
|
| 133 |
blit=True
|
| 134 |
)
|
| 135 |
|
| 136 |
-
# --- Step 4: Matplotlib
|
| 137 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
|
| 138 |
ani.save(temp_video.name, fps=fps, extra_args=["-vcodec", "libx264"])
|
| 139 |
video_path = temp_video.name
|
| 140 |
|
| 141 |
-
# --- Step 5:
|
| 142 |
audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
|
| 143 |
audio.export(audio_path, format="wav")
|
| 144 |
|
| 145 |
-
# --- Step 6: FFmpeg
|
| 146 |
output_path = tempfile.NamedTemporaryFile(delete=False, suffix="_output.mp4").name
|
| 147 |
ffmpeg_command = [
|
| 148 |
"ffmpeg", "-y",
|
|
@@ -154,11 +153,11 @@ def main():
|
|
| 154 |
]
|
| 155 |
subprocess.run(ffmpeg_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
| 156 |
|
| 157 |
-
# --- Step 7: Streamlit
|
| 158 |
-
st.write("
|
| 159 |
st.video(output_path)
|
| 160 |
|
| 161 |
-
#
|
| 162 |
os.remove(temp_mp3.name)
|
| 163 |
os.remove(video_path)
|
| 164 |
os.remove(audio_path)
|
|
|
|
| 10 |
from scipy.fft import rfft, rfftfreq
|
| 11 |
from scipy.signal import get_window
|
| 12 |
|
| 13 |
+
# Names for the 12 musical notes
|
| 14 |
NOTE_NAMES = ["C", "C#", "D", "D#", "E", "F",
|
| 15 |
"F#", "G", "G#", "A", "A#", "B"]
|
| 16 |
|
| 17 |
def freq_to_note_index(freq, base_freq=440.0):
|
| 18 |
"""
|
| 19 |
+
Maps a frequency value (freq) to an index (0-11) corresponding to a musical note.
|
| 20 |
+
Uses A=440 Hz as the reference.
|
| 21 |
+
|
| 22 |
+
Returns None if freq <= 0.
|
| 23 |
+
For example:
|
| 24 |
+
- freq_to_note_index(440) -> 9 (which corresponds to "A")
|
| 25 |
+
- freq_to_note_index(261.63) -> 0 (approximately C)
|
| 26 |
"""
|
| 27 |
if freq <= 0:
|
| 28 |
return None
|
| 29 |
+
# Calculate the number of semitones relative to A4 = 440Hz
|
|
|
|
| 30 |
semitone = round(12 * np.log2(freq / base_freq))
|
| 31 |
+
# Shift so that semitone=0 (A4) maps to index=9 within our 12-note array.
|
| 32 |
+
# This means:
|
| 33 |
+
# index = (9 + semitone) mod 12
|
|
|
|
| 34 |
note_index = (9 + semitone) % 12
|
| 35 |
return note_index
|
| 36 |
|
| 37 |
def main():
|
| 38 |
+
st.title("Doremi Frequency Decomposition Animation")
|
| 39 |
|
| 40 |
+
uploaded_file = st.file_uploader("Upload an MP3 file to analyze:", type=["mp3"])
|
| 41 |
|
| 42 |
if uploaded_file is not None:
|
| 43 |
+
st.write("File uploaded. Generating video...")
|
| 44 |
|
| 45 |
+
# --- Step 1: Convert MP3 to WAV & load into a NumPy array ---
|
| 46 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_mp3:
|
| 47 |
temp_mp3.write(uploaded_file.read())
|
| 48 |
audio = AudioSegment.from_file(temp_mp3.name)
|
| 49 |
|
| 50 |
+
# Convert stereo to mono and normalize to -1..+1
|
| 51 |
samples = np.array(audio.get_array_of_samples(), dtype=float)
|
| 52 |
sample_rate = audio.frame_rate
|
| 53 |
if audio.channels == 2:
|
|
|
|
| 56 |
if max_val != 0:
|
| 57 |
samples /= max_val
|
| 58 |
|
| 59 |
+
# --- Parameters for the FFT ---
|
| 60 |
+
chunk_size = 2048 # FFT size
|
| 61 |
+
overlap = 1024 # Overlap
|
| 62 |
step_size = chunk_size - overlap
|
| 63 |
window = get_window("hann", chunk_size)
|
| 64 |
|
| 65 |
+
# Calculate how many chunks we can process
|
| 66 |
n_chunks = (len(samples) - chunk_size) // step_size + 1
|
| 67 |
if n_chunks < 1:
|
| 68 |
+
st.error("Audio is too short to process. Please upload a longer file.")
|
| 69 |
return
|
| 70 |
|
| 71 |
+
# Determine how long the resulting animation will be (in seconds)
|
| 72 |
total_seconds = n_chunks * (step_size / sample_rate)
|
| 73 |
+
|
| 74 |
+
# If the audio is longer than total_seconds, trim it to match
|
| 75 |
if len(audio) > int(total_seconds * 1000):
|
| 76 |
audio = audio[: int(total_seconds * 1000)]
|
| 77 |
|
| 78 |
+
# Frequency axis for the FFT
|
| 79 |
freqs = rfftfreq(chunk_size, d=1.0 / sample_rate)
|
| 80 |
|
| 81 |
+
# --- Step 2: Compute the energies in the 12 notes for each chunk ---
|
| 82 |
note_energies_list = []
|
| 83 |
for i in range(n_chunks):
|
| 84 |
start = i * step_size
|
| 85 |
end = start + chunk_size
|
| 86 |
chunk = samples[start:end] * window
|
| 87 |
|
| 88 |
+
spectrum = np.abs(rfft(chunk)) # Magnitude spectrum (0..Nyquist)
|
| 89 |
|
|
|
|
| 90 |
energies = np.zeros(12, dtype=float)
|
| 91 |
+
# For each frequency bin, find which note it belongs to, then accumulate amplitude
|
|
|
|
| 92 |
for bin_idx, amp in enumerate(spectrum):
|
| 93 |
freq = freqs[bin_idx]
|
| 94 |
note_idx = freq_to_note_index(freq, base_freq=440.0)
|
|
|
|
| 100 |
note_energies_list = np.array(note_energies_list)
|
| 101 |
max_energy = np.max(note_energies_list)
|
| 102 |
|
| 103 |
+
# --- Step 3: Create a bar-chart animation with Matplotlib ---
|
| 104 |
fig, ax = plt.subplots(figsize=(6, 4))
|
| 105 |
+
fig.patch.set_facecolor("black")
|
| 106 |
ax.set_facecolor("black")
|
| 107 |
+
ax.set_ylim(0, max_energy * 1.1) # Y-limit to accommodate peaks
|
| 108 |
ax.set_xticks(range(12))
|
| 109 |
ax.set_xticklabels(NOTE_NAMES, color="white")
|
| 110 |
ax.tick_params(axis='y', colors='white')
|
| 111 |
+
for spine in ax.spines.values():
|
| 112 |
+
spine.set_color('white')
|
|
|
|
|
|
|
| 113 |
|
| 114 |
+
# Use a color map for the bars
|
| 115 |
cmap = plt.cm.get_cmap('rainbow', 12)
|
| 116 |
bar_colors = [cmap(i) for i in range(12)]
|
|
|
|
| 117 |
bars = ax.bar(range(12), note_energies_list[0], color=bar_colors)
|
| 118 |
|
| 119 |
def update(frame):
|
|
|
|
| 122 |
b.set_height(e)
|
| 123 |
return bars
|
| 124 |
|
| 125 |
+
# Calculate FPS so video length matches audio length
|
| 126 |
+
fps = sample_rate / step_size
|
| 127 |
ani = FuncAnimation(
|
| 128 |
fig,
|
| 129 |
update,
|
| 130 |
frames=n_chunks,
|
| 131 |
+
interval=1000 / fps, # interval in milliseconds
|
| 132 |
blit=True
|
| 133 |
)
|
| 134 |
|
| 135 |
+
# --- Step 4: Save the Matplotlib animation as MP4 ---
|
| 136 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
|
| 137 |
ani.save(temp_video.name, fps=fps, extra_args=["-vcodec", "libx264"])
|
| 138 |
video_path = temp_video.name
|
| 139 |
|
| 140 |
+
# --- Step 5: Export the trimmed audio to WAV ---
|
| 141 |
audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
|
| 142 |
audio.export(audio_path, format="wav")
|
| 143 |
|
| 144 |
+
# --- Step 6: Merge video and audio with FFmpeg ---
|
| 145 |
output_path = tempfile.NamedTemporaryFile(delete=False, suffix="_output.mp4").name
|
| 146 |
ffmpeg_command = [
|
| 147 |
"ffmpeg", "-y",
|
|
|
|
| 153 |
]
|
| 154 |
subprocess.run(ffmpeg_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
| 155 |
|
| 156 |
+
# --- Step 7: Display the final video in Streamlit ---
|
| 157 |
+
st.write("**Here is your Doremi decomposition video:**")
|
| 158 |
st.video(output_path)
|
| 159 |
|
| 160 |
+
# Cleanup temp files
|
| 161 |
os.remove(temp_mp3.name)
|
| 162 |
os.remove(video_path)
|
| 163 |
os.remove(audio_path)
|