File size: 6,196 Bytes
e531916
647b493
 
e531916
647b493
 
c389cfc
86ef1c7
e531916
 
 
f8b7e64
1fd1f2a
f69cb22
 
 
 
 
1fd1f2a
 
 
 
 
 
 
f69cb22
 
 
1fd1f2a
f69cb22
1fd1f2a
 
 
f69cb22
 
 
647b493
1fd1f2a
e531916
1fd1f2a
e531916
 
1fd1f2a
e531916
1fd1f2a
e531916
 
 
 
1fd1f2a
e531916
 
f69cb22
90ea247
 
 
 
e531916
1fd1f2a
 
 
e531916
 
90ea247
1fd1f2a
e531916
90ea247
1fd1f2a
90ea247
f69cb22
1fd1f2a
90ea247
1fd1f2a
 
90ea247
 
f69cb22
1fd1f2a
f69cb22
90ea247
1fd1f2a
f69cb22
e531916
 
 
 
90ea247
1fd1f2a
f69cb22
 
1fd1f2a
f69cb22
 
 
 
 
 
 
 
 
 
 
1fd1f2a
f69cb22
1fd1f2a
90ea247
1fd1f2a
f69cb22
 
 
1fd1f2a
 
e531916
1fd1f2a
f69cb22
 
 
e531916
 
f69cb22
 
 
 
90ea247
1fd1f2a
 
90ea247
 
 
 
1fd1f2a
90ea247
 
e531916
1fd1f2a
e531916
90ea247
e531916
f69cb22
1fd1f2a
e531916
 
 
1fd1f2a
e531916
 
 
 
 
 
 
 
 
 
 
1fd1f2a
 
e531916
 
1fd1f2a
e531916
 
 
 
647b493
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import streamlit as st
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import tempfile
import os
import subprocess

from pydub import AudioSegment
from scipy.fft import rfft, rfftfreq
from scipy.signal import get_window

# Names for the 12 musical notes
NOTE_NAMES = ["C", "C#", "D", "D#", "E", "F",
              "F#", "G", "G#", "A", "A#", "B"]

def freq_to_note_index(freq, base_freq=440.0):
    """
    Maps a frequency value (freq) to an index (0-11) corresponding to a musical note.
    Uses A=440 Hz as the reference.
    
    Returns None if freq <= 0.
    For example:
      - freq_to_note_index(440) -> 9  (which corresponds to "A")
      - freq_to_note_index(261.63) -> 0 (approximately C)
    """
    if freq <= 0:
        return None
    # Calculate the number of semitones relative to A4 = 440Hz
    semitone = round(12 * np.log2(freq / base_freq))
    # Shift so that semitone=0 (A4) maps to index=9 within our 12-note array.
    # This means:
    #   index = (9 + semitone) mod 12
    note_index = (9 + semitone) % 12
    return note_index

def main():
    st.title("Doremi Frequency Decomposition Animation")

    uploaded_file = st.file_uploader("Upload an MP3 file to analyze:", type=["mp3"])
    
    if uploaded_file is not None:
        st.write("File uploaded. Generating video...")

        # --- Step 1: Convert MP3 to WAV & load into a NumPy array ---
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_mp3:
            temp_mp3.write(uploaded_file.read())
            audio = AudioSegment.from_file(temp_mp3.name)
        
        # Convert stereo to mono and normalize to -1..+1
        samples = np.array(audio.get_array_of_samples(), dtype=float)
        sample_rate = audio.frame_rate
        if audio.channels == 2:
            samples = samples.reshape((-1, 2)).mean(axis=1)
        max_val = np.max(np.abs(samples))
        if max_val != 0:
            samples /= max_val

        # --- Parameters for the FFT ---
        chunk_size = 2048       # FFT size
        overlap = 1024          # Overlap
        step_size = chunk_size - overlap
        window = get_window("hann", chunk_size)

        # Calculate how many chunks we can process
        n_chunks = (len(samples) - chunk_size) // step_size + 1
        if n_chunks < 1:
            st.error("Audio is too short to process. Please upload a longer file.")
            return
        
        # Determine how long the resulting animation will be (in seconds)
        total_seconds = n_chunks * (step_size / sample_rate)

        # If the audio is longer than total_seconds, trim it to match
        if len(audio) > int(total_seconds * 1000):
            audio = audio[: int(total_seconds * 1000)]
        
        # Frequency axis for the FFT
        freqs = rfftfreq(chunk_size, d=1.0 / sample_rate)

        # --- Step 2: Compute the energies in the 12 notes for each chunk ---
        note_energies_list = []
        for i in range(n_chunks):
            start = i * step_size
            end = start + chunk_size
            chunk = samples[start:end] * window

            spectrum = np.abs(rfft(chunk))  # Magnitude spectrum (0..Nyquist)
            
            energies = np.zeros(12, dtype=float)
            # For each frequency bin, find which note it belongs to, then accumulate amplitude
            for bin_idx, amp in enumerate(spectrum):
                freq = freqs[bin_idx]
                note_idx = freq_to_note_index(freq, base_freq=440.0)
                if note_idx is not None:
                    energies[note_idx] += amp
            
            note_energies_list.append(energies)

        note_energies_list = np.array(note_energies_list)
        max_energy = np.max(note_energies_list)

        # --- Step 3: Create a bar-chart animation with Matplotlib ---
        fig, ax = plt.subplots(figsize=(6, 4))
        fig.patch.set_facecolor("black")
        ax.set_facecolor("black")
        ax.set_ylim(0, max_energy * 1.1)  # Y-limit to accommodate peaks
        ax.set_xticks(range(12))
        ax.set_xticklabels(NOTE_NAMES, color="white")
        ax.tick_params(axis='y', colors='white')
        for spine in ax.spines.values():
            spine.set_color('white')

        # Use a color map for the bars
        cmap = plt.cm.get_cmap('rainbow', 12)
        bar_colors = [cmap(i) for i in range(12)]
        bars = ax.bar(range(12), note_energies_list[0], color=bar_colors)

        def update(frame):
            energies = note_energies_list[frame]
            for b, e in zip(bars, energies):
                b.set_height(e)
            return bars

        # Calculate FPS so video length matches audio length
        fps = sample_rate / step_size
        ani = FuncAnimation(
            fig,
            update,
            frames=n_chunks,
            interval=1000 / fps,  # interval in milliseconds
            blit=True
        )

        # --- Step 4: Save the Matplotlib animation as MP4 ---
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
            ani.save(temp_video.name, fps=fps, extra_args=["-vcodec", "libx264"])
            video_path = temp_video.name

        # --- Step 5: Export the trimmed audio to WAV ---
        audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
        audio.export(audio_path, format="wav")

        # --- Step 6: Merge video and audio with FFmpeg ---
        output_path = tempfile.NamedTemporaryFile(delete=False, suffix="_output.mp4").name
        ffmpeg_command = [
            "ffmpeg", "-y",
            "-i", video_path,
            "-i", audio_path,
            "-c:v", "copy",
            "-c:a", "aac",
            output_path
        ]
        subprocess.run(ffmpeg_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

        # --- Step 7: Display the final video in Streamlit ---
        st.write("**Here is your Doremi decomposition video:**")
        st.video(output_path)

        # Cleanup temp files
        os.remove(temp_mp3.name)
        os.remove(video_path)
        os.remove(audio_path)
        os.remove(output_path)

if __name__ == "__main__":
    main()