File size: 6,196 Bytes
e531916 647b493 e531916 647b493 c389cfc 86ef1c7 e531916 f8b7e64 1fd1f2a f69cb22 1fd1f2a f69cb22 1fd1f2a f69cb22 1fd1f2a f69cb22 647b493 1fd1f2a e531916 1fd1f2a e531916 1fd1f2a e531916 1fd1f2a e531916 1fd1f2a e531916 f69cb22 90ea247 e531916 1fd1f2a e531916 90ea247 1fd1f2a e531916 90ea247 1fd1f2a 90ea247 f69cb22 1fd1f2a 90ea247 1fd1f2a 90ea247 f69cb22 1fd1f2a f69cb22 90ea247 1fd1f2a f69cb22 e531916 90ea247 1fd1f2a f69cb22 1fd1f2a f69cb22 1fd1f2a f69cb22 1fd1f2a 90ea247 1fd1f2a f69cb22 1fd1f2a e531916 1fd1f2a f69cb22 e531916 f69cb22 90ea247 1fd1f2a 90ea247 1fd1f2a 90ea247 e531916 1fd1f2a e531916 90ea247 e531916 f69cb22 1fd1f2a e531916 1fd1f2a e531916 1fd1f2a e531916 1fd1f2a e531916 647b493 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
import streamlit as st
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import tempfile
import os
import subprocess
from pydub import AudioSegment
from scipy.fft import rfft, rfftfreq
from scipy.signal import get_window
# Names for the 12 musical notes
NOTE_NAMES = ["C", "C#", "D", "D#", "E", "F",
"F#", "G", "G#", "A", "A#", "B"]
def freq_to_note_index(freq, base_freq=440.0):
"""
Maps a frequency value (freq) to an index (0-11) corresponding to a musical note.
Uses A=440 Hz as the reference.
Returns None if freq <= 0.
For example:
- freq_to_note_index(440) -> 9 (which corresponds to "A")
- freq_to_note_index(261.63) -> 0 (approximately C)
"""
if freq <= 0:
return None
# Calculate the number of semitones relative to A4 = 440Hz
semitone = round(12 * np.log2(freq / base_freq))
# Shift so that semitone=0 (A4) maps to index=9 within our 12-note array.
# This means:
# index = (9 + semitone) mod 12
note_index = (9 + semitone) % 12
return note_index
def main():
st.title("Doremi Frequency Decomposition Animation")
uploaded_file = st.file_uploader("Upload an MP3 file to analyze:", type=["mp3"])
if uploaded_file is not None:
st.write("File uploaded. Generating video...")
# --- Step 1: Convert MP3 to WAV & load into a NumPy array ---
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_mp3:
temp_mp3.write(uploaded_file.read())
audio = AudioSegment.from_file(temp_mp3.name)
# Convert stereo to mono and normalize to -1..+1
samples = np.array(audio.get_array_of_samples(), dtype=float)
sample_rate = audio.frame_rate
if audio.channels == 2:
samples = samples.reshape((-1, 2)).mean(axis=1)
max_val = np.max(np.abs(samples))
if max_val != 0:
samples /= max_val
# --- Parameters for the FFT ---
chunk_size = 2048 # FFT size
overlap = 1024 # Overlap
step_size = chunk_size - overlap
window = get_window("hann", chunk_size)
# Calculate how many chunks we can process
n_chunks = (len(samples) - chunk_size) // step_size + 1
if n_chunks < 1:
st.error("Audio is too short to process. Please upload a longer file.")
return
# Determine how long the resulting animation will be (in seconds)
total_seconds = n_chunks * (step_size / sample_rate)
# If the audio is longer than total_seconds, trim it to match
if len(audio) > int(total_seconds * 1000):
audio = audio[: int(total_seconds * 1000)]
# Frequency axis for the FFT
freqs = rfftfreq(chunk_size, d=1.0 / sample_rate)
# --- Step 2: Compute the energies in the 12 notes for each chunk ---
note_energies_list = []
for i in range(n_chunks):
start = i * step_size
end = start + chunk_size
chunk = samples[start:end] * window
spectrum = np.abs(rfft(chunk)) # Magnitude spectrum (0..Nyquist)
energies = np.zeros(12, dtype=float)
# For each frequency bin, find which note it belongs to, then accumulate amplitude
for bin_idx, amp in enumerate(spectrum):
freq = freqs[bin_idx]
note_idx = freq_to_note_index(freq, base_freq=440.0)
if note_idx is not None:
energies[note_idx] += amp
note_energies_list.append(energies)
note_energies_list = np.array(note_energies_list)
max_energy = np.max(note_energies_list)
# --- Step 3: Create a bar-chart animation with Matplotlib ---
fig, ax = plt.subplots(figsize=(6, 4))
fig.patch.set_facecolor("black")
ax.set_facecolor("black")
ax.set_ylim(0, max_energy * 1.1) # Y-limit to accommodate peaks
ax.set_xticks(range(12))
ax.set_xticklabels(NOTE_NAMES, color="white")
ax.tick_params(axis='y', colors='white')
for spine in ax.spines.values():
spine.set_color('white')
# Use a color map for the bars
cmap = plt.cm.get_cmap('rainbow', 12)
bar_colors = [cmap(i) for i in range(12)]
bars = ax.bar(range(12), note_energies_list[0], color=bar_colors)
def update(frame):
energies = note_energies_list[frame]
for b, e in zip(bars, energies):
b.set_height(e)
return bars
# Calculate FPS so video length matches audio length
fps = sample_rate / step_size
ani = FuncAnimation(
fig,
update,
frames=n_chunks,
interval=1000 / fps, # interval in milliseconds
blit=True
)
# --- Step 4: Save the Matplotlib animation as MP4 ---
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
ani.save(temp_video.name, fps=fps, extra_args=["-vcodec", "libx264"])
video_path = temp_video.name
# --- Step 5: Export the trimmed audio to WAV ---
audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
audio.export(audio_path, format="wav")
# --- Step 6: Merge video and audio with FFmpeg ---
output_path = tempfile.NamedTemporaryFile(delete=False, suffix="_output.mp4").name
ffmpeg_command = [
"ffmpeg", "-y",
"-i", video_path,
"-i", audio_path,
"-c:v", "copy",
"-c:a", "aac",
output_path
]
subprocess.run(ffmpeg_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
# --- Step 7: Display the final video in Streamlit ---
st.write("**Here is your Doremi decomposition video:**")
st.video(output_path)
# Cleanup temp files
os.remove(temp_mp3.name)
os.remove(video_path)
os.remove(audio_path)
os.remove(output_path)
if __name__ == "__main__":
main()
|