Update app.py
Browse files
app.py
CHANGED
|
@@ -10,136 +10,139 @@ from pydub import AudioSegment
|
|
| 10 |
from scipy.fft import rfft, rfftfreq
|
| 11 |
from scipy.signal import get_window
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
def main():
|
| 14 |
-
st.title("
|
| 15 |
|
| 16 |
uploaded_file = st.file_uploader("アートに変換する音声ファイルをアップロード (MP3)", type=["mp3"])
|
| 17 |
|
| 18 |
if uploaded_file is not None:
|
| 19 |
-
st.write("
|
| 20 |
|
| 21 |
# --- Step 1: MP3 -> WAV 変換 & numpy配列化 ---
|
| 22 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_mp3:
|
| 23 |
temp_mp3.write(uploaded_file.read())
|
| 24 |
audio = AudioSegment.from_file(temp_mp3.name)
|
| 25 |
|
| 26 |
-
#
|
| 27 |
samples = np.array(audio.get_array_of_samples(), dtype=float)
|
| 28 |
sample_rate = audio.frame_rate
|
| 29 |
-
|
| 30 |
-
if audio.channels == 2: # ステレオ → モノラル
|
| 31 |
samples = samples.reshape((-1, 2)).mean(axis=1)
|
| 32 |
max_val = np.max(np.abs(samples))
|
| 33 |
if max_val != 0:
|
| 34 |
samples /= max_val
|
| 35 |
|
| 36 |
# --- パラメータ設定 ---
|
| 37 |
-
chunk_size = 2048
|
| 38 |
-
overlap = 1024
|
| 39 |
step_size = chunk_size - overlap
|
| 40 |
-
|
| 41 |
-
# ウィンドウ(ハニング窓)
|
| 42 |
window = get_window("hann", chunk_size)
|
| 43 |
|
| 44 |
-
#
|
| 45 |
n_chunks = (len(samples) - chunk_size) // step_size + 1
|
| 46 |
if n_chunks < 1:
|
| 47 |
st.error("音声が短すぎて処理できません。もう少し長い音源をアップロードしてください。")
|
| 48 |
return
|
| 49 |
-
|
| 50 |
-
#
|
| 51 |
total_seconds = n_chunks * (step_size / sample_rate)
|
| 52 |
-
|
| 53 |
-
# 音声が長過ぎる場合はトリミングして同期
|
| 54 |
if len(audio) > int(total_seconds * 1000):
|
| 55 |
audio = audio[: int(total_seconds * 1000)]
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
-
#
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
# 全チャンク分の FFT をまとめて計算
|
| 61 |
-
fft_frames = []
|
| 62 |
for i in range(n_chunks):
|
| 63 |
start = i * step_size
|
| 64 |
end = start + chunk_size
|
| 65 |
chunk = samples[start:end] * window
|
| 66 |
-
spec = np.abs(rfft(chunk))
|
| 67 |
-
fft_frames.append(spec)
|
| 68 |
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
ax.set_facecolor("black")
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
sc = ax.scatter(
|
| 86 |
-
theta,
|
| 87 |
-
r_init,
|
| 88 |
-
c=r_init,
|
| 89 |
-
cmap="plasma", # ここを "hsv", "twilight", "rainbow" などに変えてもOK
|
| 90 |
-
alpha=0.8,
|
| 91 |
-
s=(np.sqrt(r_init) + 1) * 60, # 振幅に応じてサイズ変化
|
| 92 |
-
edgecolors="none"
|
| 93 |
-
)
|
| 94 |
-
ax.set_ylim(0, max_fft)
|
| 95 |
|
| 96 |
-
#
|
| 97 |
-
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
def update(frame):
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
# 周波数→角度 + スワール
|
| 105 |
-
new_theta = (theta + swirl) % (2 * np.pi)
|
| 106 |
-
|
| 107 |
-
# 散布図を更新
|
| 108 |
-
sc.set_offsets(np.column_stack((new_theta, new_r)))
|
| 109 |
-
# カラーをちょっと派手に変化させたい場合
|
| 110 |
-
color_data = (new_r + swirl * 10) % max_fft
|
| 111 |
-
sc.set_array(color_data)
|
| 112 |
-
|
| 113 |
-
# マーカーサイズもフレームごとに変化
|
| 114 |
-
sc.set_sizes((np.sqrt(new_r) + 1) * 60)
|
| 115 |
-
|
| 116 |
-
return (sc,)
|
| 117 |
-
|
| 118 |
-
# フレームレートを計算 (映像の全長が音声と揃うように)
|
| 119 |
-
# 1フレーム = step_size サンプル分 → その長さ: step_size / sample_rate (秒)
|
| 120 |
-
# 全フレーム数 = n_chunks
|
| 121 |
-
# 全体秒数 = n_chunks * (step_size / sample_rate)
|
| 122 |
-
# fps = n_chunks / 全体秒数 = sample_rate / step_size
|
| 123 |
-
fps = sample_rate / step_size
|
| 124 |
|
|
|
|
|
|
|
| 125 |
ani = FuncAnimation(
|
| 126 |
fig,
|
| 127 |
update,
|
| 128 |
frames=n_chunks,
|
| 129 |
-
interval=1000 / fps, #
|
| 130 |
blit=True
|
| 131 |
)
|
| 132 |
|
| 133 |
-
# --- Step
|
| 134 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
|
| 135 |
ani.save(temp_video.name, fps=fps, extra_args=["-vcodec", "libx264"])
|
| 136 |
video_path = temp_video.name
|
| 137 |
-
|
| 138 |
-
# --- Step
|
| 139 |
audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
|
| 140 |
audio.export(audio_path, format="wav")
|
| 141 |
|
| 142 |
-
# --- Step
|
| 143 |
output_path = tempfile.NamedTemporaryFile(delete=False, suffix="_output.mp4").name
|
| 144 |
ffmpeg_command = [
|
| 145 |
"ffmpeg", "-y",
|
|
@@ -147,16 +150,15 @@ def main():
|
|
| 147 |
"-i", audio_path,
|
| 148 |
"-c:v", "copy",
|
| 149 |
"-c:a", "aac",
|
| 150 |
-
"-strict", "experimental", # 必要に応じて
|
| 151 |
output_path
|
| 152 |
]
|
| 153 |
subprocess.run(ffmpeg_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
| 154 |
|
| 155 |
-
# --- Step
|
| 156 |
-
st.write("
|
| 157 |
st.video(output_path)
|
| 158 |
|
| 159 |
-
#
|
| 160 |
os.remove(temp_mp3.name)
|
| 161 |
os.remove(video_path)
|
| 162 |
os.remove(audio_path)
|
|
|
|
| 10 |
from scipy.fft import rfft, rfftfreq
|
| 11 |
from scipy.signal import get_window
|
| 12 |
|
| 13 |
+
# 12 音階の名称
|
| 14 |
+
NOTE_NAMES = ["C", "C#", "D", "D#", "E", "F",
|
| 15 |
+
"F#", "G", "G#", "A", "A#", "B"]
|
| 16 |
+
|
| 17 |
+
def freq_to_note_index(freq, base_freq=440.0):
|
| 18 |
+
"""
|
| 19 |
+
周波数 freq が A=440Hz を中心として、どの音名に該当するかを 0~11 のインデックスで返す。
|
| 20 |
+
freq <= 0 の場合は None を返す。
|
| 21 |
+
"""
|
| 22 |
+
if freq <= 0:
|
| 23 |
+
return None
|
| 24 |
+
# A4(=440Hz) 基準で、半音インデックスを計算
|
| 25 |
+
# A4 からの半音数: n = 12*log2(freq/440.0)
|
| 26 |
+
semitone = round(12 * np.log2(freq / base_freq))
|
| 27 |
+
# 12 で割って 0~11 の音階にする(C=0, C#=1, … B=11 を想定するときは、調整が必要)
|
| 28 |
+
# ここでは A=9 として扱います。
|
| 29 |
+
# 例: A4 -> semitone=0 -> index=9 (NOTE_NAMES[9] == "A")
|
| 30 |
+
# C4 -> semitone=-9 -> index=0 (NOTE_NAMES[0] == "C") など
|
| 31 |
+
note_index = (9 + semitone) % 12
|
| 32 |
+
return note_index
|
| 33 |
+
|
| 34 |
def main():
|
| 35 |
+
st.title("ドレミ分解アニメーション")
|
| 36 |
|
| 37 |
uploaded_file = st.file_uploader("アートに変換する音声ファイルをアップロード (MP3)", type=["mp3"])
|
| 38 |
|
| 39 |
if uploaded_file is not None:
|
| 40 |
+
st.write("アップロード完了。動画を生成します...")
|
| 41 |
|
| 42 |
# --- Step 1: MP3 -> WAV 変換 & numpy配列化 ---
|
| 43 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_mp3:
|
| 44 |
temp_mp3.write(uploaded_file.read())
|
| 45 |
audio = AudioSegment.from_file(temp_mp3.name)
|
| 46 |
|
| 47 |
+
# モノラルにして、振幅を -1~1 に正規化
|
| 48 |
samples = np.array(audio.get_array_of_samples(), dtype=float)
|
| 49 |
sample_rate = audio.frame_rate
|
| 50 |
+
if audio.channels == 2:
|
|
|
|
| 51 |
samples = samples.reshape((-1, 2)).mean(axis=1)
|
| 52 |
max_val = np.max(np.abs(samples))
|
| 53 |
if max_val != 0:
|
| 54 |
samples /= max_val
|
| 55 |
|
| 56 |
# --- パラメータ設定 ---
|
| 57 |
+
chunk_size = 2048 # FFT サイズ
|
| 58 |
+
overlap = 1024 # オーバーラップ
|
| 59 |
step_size = chunk_size - overlap
|
|
|
|
|
|
|
| 60 |
window = get_window("hann", chunk_size)
|
| 61 |
|
| 62 |
+
# 処理チャンク数の算出
|
| 63 |
n_chunks = (len(samples) - chunk_size) // step_size + 1
|
| 64 |
if n_chunks < 1:
|
| 65 |
st.error("音声が短すぎて処理できません。もう少し長い音源をアップロードしてください。")
|
| 66 |
return
|
| 67 |
+
|
| 68 |
+
# 動画・音声の長さの同期用
|
| 69 |
total_seconds = n_chunks * (step_size / sample_rate)
|
| 70 |
+
# 音声が長い場合は切り詰め
|
|
|
|
| 71 |
if len(audio) > int(total_seconds * 1000):
|
| 72 |
audio = audio[: int(total_seconds * 1000)]
|
| 73 |
+
|
| 74 |
+
# FFT 用周波数軸
|
| 75 |
+
freqs = rfftfreq(chunk_size, d=1.0 / sample_rate)
|
| 76 |
|
| 77 |
+
# --- Step 2: 各フレームごとに 12 音階のエネルギー量を集計 ---
|
| 78 |
+
note_energies_list = []
|
|
|
|
|
|
|
|
|
|
| 79 |
for i in range(n_chunks):
|
| 80 |
start = i * step_size
|
| 81 |
end = start + chunk_size
|
| 82 |
chunk = samples[start:end] * window
|
|
|
|
|
|
|
| 83 |
|
| 84 |
+
spectrum = np.abs(rfft(chunk)) # 0 ~ Nyquist 領域
|
| 85 |
+
|
| 86 |
+
# 12 音階分のエネルギーを格納するための配列
|
| 87 |
+
energies = np.zeros(12, dtype=float)
|
| 88 |
+
|
| 89 |
+
# 各周波数ビンに対してどのノートに属するか判定し、振幅を加算
|
| 90 |
+
for bin_idx, amp in enumerate(spectrum):
|
| 91 |
+
freq = freqs[bin_idx]
|
| 92 |
+
note_idx = freq_to_note_index(freq, base_freq=440.0)
|
| 93 |
+
if note_idx is not None:
|
| 94 |
+
energies[note_idx] += amp
|
| 95 |
+
|
| 96 |
+
note_energies_list.append(energies)
|
| 97 |
+
|
| 98 |
+
note_energies_list = np.array(note_energies_list)
|
| 99 |
+
max_energy = np.max(note_energies_list)
|
| 100 |
+
|
| 101 |
+
# --- Step 3: Matplotlib アニメーションで棒グラフ表示 ---
|
| 102 |
+
fig, ax = plt.subplots(figsize=(6, 4))
|
| 103 |
+
fig.patch.set_facecolor("black") # 背景を黒く
|
| 104 |
ax.set_facecolor("black")
|
| 105 |
+
ax.set_ylim(0, max_energy * 1.1) # 余裕を持たせた Y 軸上限
|
| 106 |
+
ax.set_xticks(range(12))
|
| 107 |
+
ax.set_xticklabels(NOTE_NAMES, color="white")
|
| 108 |
+
ax.tick_params(axis='y', colors='white')
|
| 109 |
+
ax.spines['bottom'].set_color('white')
|
| 110 |
+
ax.spines['left'].set_color('white')
|
| 111 |
+
ax.spines['top'].set_color('white')
|
| 112 |
+
ax.spines['right'].set_color('white')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
+
# カラーマップを使って棒の色を一つずつ変える
|
| 115 |
+
cmap = plt.cm.get_cmap('rainbow', 12)
|
| 116 |
+
bar_colors = [cmap(i) for i in range(12)]
|
| 117 |
+
|
| 118 |
+
bars = ax.bar(range(12), note_energies_list[0], color=bar_colors)
|
| 119 |
|
| 120 |
def update(frame):
|
| 121 |
+
energies = note_energies_list[frame]
|
| 122 |
+
for b, e in zip(bars, energies):
|
| 123 |
+
b.set_height(e)
|
| 124 |
+
return bars
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
|
| 126 |
+
# FPS の計算(アニメーションの秒数を音声に合わせる)
|
| 127 |
+
fps = sample_rate / step_size # 1フレーム当たり step_size / sample_rate 秒
|
| 128 |
ani = FuncAnimation(
|
| 129 |
fig,
|
| 130 |
update,
|
| 131 |
frames=n_chunks,
|
| 132 |
+
interval=1000 / fps, # ミリ秒
|
| 133 |
blit=True
|
| 134 |
)
|
| 135 |
|
| 136 |
+
# --- Step 4: Matplotlib アニメーションを一時的に mp4 に保存 ---
|
| 137 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
|
| 138 |
ani.save(temp_video.name, fps=fps, extra_args=["-vcodec", "libx264"])
|
| 139 |
video_path = temp_video.name
|
| 140 |
+
|
| 141 |
+
# --- Step 5: トリミング済み音声を WAV として出力 ---
|
| 142 |
audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
|
| 143 |
audio.export(audio_path, format="wav")
|
| 144 |
|
| 145 |
+
# --- Step 6: FFmpeg で動画と音声を合成 ---
|
| 146 |
output_path = tempfile.NamedTemporaryFile(delete=False, suffix="_output.mp4").name
|
| 147 |
ffmpeg_command = [
|
| 148 |
"ffmpeg", "-y",
|
|
|
|
| 150 |
"-i", audio_path,
|
| 151 |
"-c:v", "copy",
|
| 152 |
"-c:a", "aac",
|
|
|
|
| 153 |
output_path
|
| 154 |
]
|
| 155 |
subprocess.run(ffmpeg_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
| 156 |
|
| 157 |
+
# --- Step 7: Streamlit で動画を再生 ---
|
| 158 |
+
st.write("**生成した動画はこちら**")
|
| 159 |
st.video(output_path)
|
| 160 |
|
| 161 |
+
# 後片付け
|
| 162 |
os.remove(temp_mp3.name)
|
| 163 |
os.remove(video_path)
|
| 164 |
os.remove(audio_path)
|