Spaces:

naohiro701
/

Fourier_Transform_Visualizer

Sleeping

App Files Files Community

Fourier_Transform_Visualizer / app.py

naohiro701

Update app.py

1fd1f2a verified 12 months ago

raw

history blame contribute delete

6.2 kB

	import streamlit as st
	import numpy as np
	import matplotlib.pyplot as plt
	from matplotlib.animation import FuncAnimation
	import tempfile
	import os
	import subprocess

	from pydub import AudioSegment
	from scipy.fft import rfft, rfftfreq
	from scipy.signal import get_window

	# Names for the 12 musical notes
	NOTE_NAMES = ["C", "C#", "D", "D#", "E", "F",
	"F#", "G", "G#", "A", "A#", "B"]

	def freq_to_note_index(freq, base_freq=440.0):
	"""
	Maps a frequency value (freq) to an index (0-11) corresponding to a musical note.
	Uses A=440 Hz as the reference.

	Returns None if freq <= 0.
	For example:
	- freq_to_note_index(440) -> 9 (which corresponds to "A")
	- freq_to_note_index(261.63) -> 0 (approximately C)
	"""
	if freq <= 0:
	return None
	# Calculate the number of semitones relative to A4 = 440Hz
	semitone = round(12 * np.log2(freq / base_freq))
	# Shift so that semitone=0 (A4) maps to index=9 within our 12-note array.
	# This means:
	# index = (9 + semitone) mod 12
	note_index = (9 + semitone) % 12
	return note_index

	def main():
	st.title("Doremi Frequency Decomposition Animation")

	uploaded_file = st.file_uploader("Upload an MP3 file to analyze:", type=["mp3"])

	if uploaded_file is not None:
	st.write("File uploaded. Generating video...")

	# --- Step 1: Convert MP3 to WAV & load into a NumPy array ---
	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_mp3:
	temp_mp3.write(uploaded_file.read())
	audio = AudioSegment.from_file(temp_mp3.name)

	# Convert stereo to mono and normalize to -1..+1
	samples = np.array(audio.get_array_of_samples(), dtype=float)
	sample_rate = audio.frame_rate
	if audio.channels == 2:
	samples = samples.reshape((-1, 2)).mean(axis=1)
	max_val = np.max(np.abs(samples))
	if max_val != 0:
	samples /= max_val

	# --- Parameters for the FFT ---
	chunk_size = 2048 # FFT size
	overlap = 1024 # Overlap
	step_size = chunk_size - overlap
	window = get_window("hann", chunk_size)

	# Calculate how many chunks we can process
	n_chunks = (len(samples) - chunk_size) // step_size + 1
	if n_chunks < 1:
	st.error("Audio is too short to process. Please upload a longer file.")
	return

	# Determine how long the resulting animation will be (in seconds)
	total_seconds = n_chunks * (step_size / sample_rate)

	# If the audio is longer than total_seconds, trim it to match
	if len(audio) > int(total_seconds * 1000):
	audio = audio[: int(total_seconds * 1000)]

	# Frequency axis for the FFT
	freqs = rfftfreq(chunk_size, d=1.0 / sample_rate)

	# --- Step 2: Compute the energies in the 12 notes for each chunk ---
	note_energies_list = []
	for i in range(n_chunks):
	start = i * step_size
	end = start + chunk_size
	chunk = samples[start:end] * window

	spectrum = np.abs(rfft(chunk)) # Magnitude spectrum (0..Nyquist)

	energies = np.zeros(12, dtype=float)
	# For each frequency bin, find which note it belongs to, then accumulate amplitude
	for bin_idx, amp in enumerate(spectrum):
	freq = freqs[bin_idx]
	note_idx = freq_to_note_index(freq, base_freq=440.0)
	if note_idx is not None:
	energies[note_idx] += amp

	note_energies_list.append(energies)

	note_energies_list = np.array(note_energies_list)
	max_energy = np.max(note_energies_list)

	# --- Step 3: Create a bar-chart animation with Matplotlib ---
	fig, ax = plt.subplots(figsize=(6, 4))
	fig.patch.set_facecolor("black")
	ax.set_facecolor("black")
	ax.set_ylim(0, max_energy * 1.1) # Y-limit to accommodate peaks
	ax.set_xticks(range(12))
	ax.set_xticklabels(NOTE_NAMES, color="white")
	ax.tick_params(axis='y', colors='white')
	for spine in ax.spines.values():
	spine.set_color('white')

	# Use a color map for the bars
	cmap = plt.cm.get_cmap('rainbow', 12)
	bar_colors = [cmap(i) for i in range(12)]
	bars = ax.bar(range(12), note_energies_list[0], color=bar_colors)

	def update(frame):
	energies = note_energies_list[frame]
	for b, e in zip(bars, energies):
	b.set_height(e)
	return bars

	# Calculate FPS so video length matches audio length
	fps = sample_rate / step_size
	ani = FuncAnimation(
	fig,
	update,
	frames=n_chunks,
	interval=1000 / fps, # interval in milliseconds
	blit=True
	)

	# --- Step 4: Save the Matplotlib animation as MP4 ---
	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
	ani.save(temp_video.name, fps=fps, extra_args=["-vcodec", "libx264"])
	video_path = temp_video.name

	# --- Step 5: Export the trimmed audio to WAV ---
	audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
	audio.export(audio_path, format="wav")

	# --- Step 6: Merge video and audio with FFmpeg ---
	output_path = tempfile.NamedTemporaryFile(delete=False, suffix="_output.mp4").name
	ffmpeg_command = [
	"ffmpeg", "-y",
	"-i", video_path,
	"-i", audio_path,
	"-c:v", "copy",
	"-c:a", "aac",
	output_path
	]
	subprocess.run(ffmpeg_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

	# --- Step 7: Display the final video in Streamlit ---
	st.write("Here is your Doremi decomposition video:")
	st.video(output_path)

	# Cleanup temp files
	os.remove(temp_mp3.name)
	os.remove(video_path)
	os.remove(audio_path)
	os.remove(output_path)

	if __name__ == "__main__":
	main()