Jihuai's picture
have to create an orphan branch to bypass large file history: cleanup .ipynb and create LFS
d572f56
from collections import defaultdict
import glob
import json
import math
import os
import shutil
from itertools import chain
from pprint import pprint
from types import SimpleNamespace
import numpy as np
import pandas as pd
from omegaconf import OmegaConf
from tqdm.contrib.concurrent import process_map
from tqdm import tqdm as tdqm, tqdm
import torchaudio as ta
import librosa
taxonomy = {
"vocals": [
"lead male singer",
"lead female singer",
"human choir",
"background vocals",
"other (vocoder, beatboxing etc)",
],
"bass": [
"bass guitar",
"bass synthesizer (moog etc)",
"contrabass/double bass (bass of instrings)",
"tuba (bass of brass)",
"bassoon (bass of woodwind)",
],
"drums": [
"snare drum",
"toms",
"kick drum",
"cymbals",
"overheads",
"full acoustic drumkit",
"drum machine",
"hi-hat"
],
"other": [
"fx/processed sound, scratches, gun shots, explosions etc",
"click track",
],
"guitar": [
"clean electric guitar",
"distorted electric guitar",
"lap steel guitar or slide guitar",
"acoustic guitar",
],
"other plucked": ["banjo, mandolin, ukulele, harp etc"],
"percussion": [
"a-tonal percussion (claps, shakers, congas, cowbell etc)",
"pitched percussion (mallets, glockenspiel, ...)",
],
"piano": [
"grand piano",
"electric piano (rhodes, wurlitzer, piano sound alike)",
],
"other keys": [
"organ, electric organ",
"synth pad",
"synth lead",
"other sounds (hapischord, melotron etc)",
],
"bowed strings": [
"violin (solo)",
"viola (solo)",
"cello (solo)",
"violin section",
"viola section",
"cello section",
"string section",
"other strings",
],
"wind": [
"brass (trumpet, trombone, french horn, brass etc)",
"flutes (piccolo, bamboo flute, panpipes, flutes etc)",
"reeds (saxophone, clarinets, oboe, english horn, bagpipe)",
"other wind",
],
}
def clean_npy_other_vox(data_root="/storage/home/hcoda1/1/kwatchar3/data/data/moisesdb/npyq"):
npys = glob.glob(os.path.join(data_root, "**/*.npy"), recursive=True)
npys = [npy for npy in npys if "other" in npy]
npys = [npy for npy in npys if "vdbo_" not in npy]
npys = [npy for npy in npys if "other_" not in npy]
stems = set([
os.path.basename(npy).split(".")[0] for npy in npys
])
assert len(stems) == 1
for npy in tqdm(npys):
shutil.move(npy, npy.replace("other", "other_vocals"))
def clean_track_inst(inst):
if "vocoder" in inst:
inst = "other_vocals"
if "fx" in inst:
inst = "fx"
if "contrabass_double_bass" in inst:
inst = "double_bass"
if "banjo" in inst:
return "other_plucked"
if "(" in inst:
inst = inst.split("(")[0]
for s in [",", "-"]:
if s in inst:
inst = inst.replace(s, "")
for s in ["/"]:
if s in inst:
inst = inst.replace(s, "_")
if inst[-1] == "_":
inst = inst[:-1]
return inst
taxonomy = {
k.replace(" ", "_"): [clean_track_inst(i.replace(" ", "_")) for i in v] for k, v in taxonomy.items()
}
fine_to_coarse = {}
for k, v in taxonomy.items():
for vv in v:
fine_to_coarse[vv] = k
# pprint(fine_to_coarse)
def save_taxonomy():
with open("taxonomy.json", "w") as f:
json.dump(taxonomy, f, indent=4)
taxonomy_coarse = list(taxonomy.keys())
with open("taxonomy_coarse.json", "w") as f:
json.dump(taxonomy_coarse, f, indent=4)
taxonomy_fine = list(chain(*taxonomy.values()))
count_ = defaultdict(int)
for t in taxonomy_fine:
count_[t] += 1
with open("taxonomy_fine.json", "w") as f:
json.dump(taxonomy_fine, f, indent=4)
possible_coarse = list(taxonomy.keys())
possible_fine = list(set(chain(*taxonomy.values())))
def trim_and_mix(audios, length_=None):
length = min([a.shape[-1] for a in audios])
if length_ is not None:
length = min(length, length_)
audios = [a[..., :length] for a in audios]
return np.sum(np.stack(audios, axis=0), axis=0), length
def retrim_npys(saved_npy, new_length):
print("retrimming")
for npy in saved_npy:
audio = np.load(npy)
audio = audio[..., :new_length]
np.save(npy, audio)
def convert_one(inout):
input_path = inout.input_path
output_root = inout.output_root
song_id = os.path.basename(input_path)
output_root = os.path.join(output_root, song_id)
os.makedirs(output_root, exist_ok=True)
metadata = OmegaConf.load(os.path.join(input_path, "data.json"))
stems = metadata.stems
min_length = None
saved_npy = []
all_tracks = []
other_tracks = []
outfile = None
added_tracks = set()
duplicated_tracks = set()
track_to_stem = defaultdict(list)
added_stems = set()
duplicated_stems = set()
stem_name_to_stems = defaultdict(list)
for stem in stems:
stem_name = stem.stemName
stem_name_to_stems[stem_name].append(stem)
for stem_name in tqdm(stem_name_to_stems):
stem_tracks = []
for stem in stem_name_to_stems[stem_name]:
stem_name = stem.stemName
if stem_name in added_stems:
print(f"Duplicate stem {stem_name} in {song_id}")
duplicated_stems.add(stem_name)
added_stems.add(stem_name)
for track in stem.tracks:
track_inst = track.trackType
track_inst = clean_track_inst(track_inst)
if track_inst in added_tracks:
if stem_name in track_to_stem[track_inst]:
continue
print(f"Duplicate track {track_inst} in {song_id}")
print(f"Stems: {track_to_stem[track_inst]}")
duplicated_tracks.add(track_inst)
raise ValueError
else:
added_tracks.add(track_inst)
track_to_stem[track_inst].append(stem_name)
track_id = track.id
audio, fs = ta.load(os.path.join(input_path, stem_name, f"{track_id}.wav"))
if fs != 44100:
print(f"fs is {fs} for {track_id}")
with open(os.path.join(output_root, "fs.txt"), "w") as f:
f.write(f"{song_id}\t{track_id}\t{fs}\n")
if min_length is None:
min_length = audio.shape[-1]
else:
if audio.shape[-1] < min_length:
min_length = audio.shape[-1]
if len(saved_npy) > 0:
retrim_npys(saved_npy, min_length)
audio = audio[..., :min_length]
audio = audio.numpy()
audio = audio.astype(np.float32)
if audio.shape[0] == 1:
print("mono")
if audio.shape[0] > 2:
print("multi channel")
assert outfile is None
outfile = os.path.join(output_root, f"{track_inst}.npy")
np.save(outfile, audio)
saved_npy.append(outfile)
outfile = None
stem_tracks.append(audio)
audio = None
stem_track, min_length = trim_and_mix(stem_tracks)
assert outfile is None
outfile = os.path.join(output_root, f"{stem_name}.npy")
np.save(outfile, stem_track)
saved_npy.append(outfile)
outfile = None
all_tracks.append(stem_track)
if stem_name not in ["vocals", "drums", "bass"]:
# print(f"Putting {stem_name} in other")
other_tracks.append(stem_track)
assert outfile is None
all_track, min_length_ = trim_and_mix(all_tracks, min_length)
outfile = os.path.join(output_root, f"mixture.npy")
np.save(outfile, all_track)
if min_length_ != min_length:
retrim_npys(saved_npy, min_length_)
min_length = min_length_
saved_npy.append(outfile)
outfile = None
other_track, min_length_ = trim_and_mix(other_tracks, min_length)
np.save(os.path.join(output_root, f"vdbo_others.npy"), other_track)
if min_length_ != min_length:
retrim_npys(saved_npy, min_length_)
min_length = min_length_
def convert_to_npy(
data_root="/storage/home/hcoda1/1/kwatchar3/data/data/moisesdb/canonical",
output_root="/storage/home/hcoda1/1/kwatchar3/data/data/moisesdb/npy2",
):
if output_root is None:
output_root = os.path.join(os.path.dirname(data_root), "npy")
files = os.listdir(data_root)
files = [
os.path.join(data_root, f)
for f in files
if os.path.isdir(os.path.join(data_root, f))
]
inout = [SimpleNamespace(input_path=f, output_root=output_root) for f in files]
process_map(convert_one, inout)
# for io in tdqm(inout):
# convert_one(io)
def make_others_one(input_path, dry_run=False):
other_stems = [k for k in taxonomy.keys() if k not in ["vocals", "bass", "drums"]]
npys = glob.glob(os.path.join(input_path, "**/*.npy"), recursive=True)
npys = [npy for npy in npys if ".dbfs" not in npy]
npys = [npy for npy in npys if ".query" not in npy]
npys = [npy for npy in npys if "mixture" not in npy]
npys = [npy for npy in npys if os.path.basename(npy).split(".")[0] in other_stems]
print(f"Using stems: {[os.path.basename(npy).split('.')[0] for npy in npys]}")
if len(npys) == 0:
audio = np.zeros_like(np.load(os.path.join(input_path, "mixture.npy")))
else:
audio = [np.load(npy) for npy in npys]
audio = np.sum(np.stack(audio, axis=0), axis=0)
assert audio.shape[0] == 2
output = os.path.join(input_path, "vdbo_others.npy")
if dry_run:
return
np.save(output, audio)
def check_vdbo_one(f):
s = np.sum(
np.stack(
[
np.load(os.path.join(f, s + ".npy"))
for s in ["vocals", "drums", "bass", "vdbo_others"]
if os.path.exists(os.path.join(f, s + ".npy"))
],
axis=0,
),
axis=0,
)
m = np.load(os.path.join(f, "mixture.npy"))
snr = 10 * np.log10(np.mean(np.square(m)) / np.mean(np.square(s - m)))
print(snr)
return snr
def check_vdbo(data_root="/storage/home/hcoda1/1/kwatchar3/data/data/moisesdb/npy2"):
files = os.listdir(data_root)
files = [
os.path.join(data_root, f)
for f in files
if os.path.isdir(os.path.join(data_root, f))
]
snrs = process_map(check_vdbo_one, files)
np.save("/storage/home/hcoda1/1/kwatchar3/data/vdbo.npy", np.array(snrs))
def make_others(data_root="/storage/home/hcoda1/1/kwatchar3/data/data/moisesdb/npy2"):
files = os.listdir(data_root)
files = [
os.path.join(data_root, f)
for f in files
if os.path.isdir(os.path.join(data_root, f))
]
process_map(make_others_one, files)
# for f in tqdm(files):
# make_others_one(f, dry_run=False)
def extract_metadata_one(input_path):
song_id = os.path.basename(input_path)
metadata = OmegaConf.load(os.path.join(input_path, "data.json"))
song = metadata.song
artist = metadata.artist
genre = metadata.genre
stems = metadata.stems
data_out = []
for stem in stems:
stem_name = stem.stemName
stem_id = stem.id
for track in stem.tracks:
track_inst = track.trackType
track_id = track.id
data_out.append(
{
"song_id": song_id,
"song": song,
"artist": artist,
"genre": genre,
"stem_name": stem_name,
"stem_id": stem_id,
"track_inst": track_inst,
"track_id": track_id,
"has_bleed": track.has_bleed,
}
)
return data_out
def consolidate_metadata(
data_root="/home/kwatchar3/Documents/data/moisesdb/canonical",
):
files = os.listdir(data_root)
files = [
os.path.join(data_root, f)
for f in files
if os.path.isdir(os.path.join(data_root, f))
]
data = process_map(extract_metadata_one, files)
df = pd.DataFrame.from_records(list(chain(*data)))
df.to_csv(os.path.join(os.path.dirname(data_root), "metadata.csv"), index=False)
def clean_canonical(data_root="/home/kwatchar3/Documents/data/moisesdb/canonical"):
npy = glob.glob(os.path.join(data_root, "**/*.npy"), recursive=True)
for n in tqdm(npy):
os.remove(n)
def remove_dbfs(data_root="/storage/home/hcoda1/1/kwatchar3/data/data/moisesdb/npy"):
npy = glob.glob(os.path.join(data_root, "**/*.dbfs.npy"), recursive=True)
for n in tqdm(npy):
os.remove(n)
def make_split(
metadata_path="/home/kwatchar3/Documents/data/moisesdb/metadata.csv",
n_splits=5,
seed=42,
):
df = pd.read_csv(metadata_path)
# print(df.columns)
df = df[["song_id", "genre"]].drop_duplicates()
genres = df["genre"].value_counts()
genres_map = {g: g if c > n_splits else "other" for g, c in genres.items()}
df["genre"] = df["genre"].map(genres_map)
n_samples = len(df)
n_per_split = n_samples // n_splits
np.random.seed(seed)
from sklearn.model_selection import train_test_split
splits = []
df_ = df.copy()
for i in range(n_splits - 1):
df_, test = train_test_split(
df_,
test_size=n_per_split,
random_state=seed,
stratify=df_["genre"],
shuffle=True,
)
dfs = test[["song_id"]].copy().sort_values(by="song_id")
dfs["split"] = i + 1
splits.append(dfs)
test = df_
dfs = test[["song_id"]].copy().sort_values(by="song_id")
dfs["split"] = n_splits
splits.append(dfs)
splits = pd.concat(splits)
splits.to_csv(
os.path.join(os.path.dirname(metadata_path), "splits.csv"), index=False
)
def consolidate_stems(data_root="/home/kwatchar3/Documents/data/moisesdb/npy"):
metadata = pd.read_csv(os.path.join(os.path.dirname(data_root), "metadata.csv"))
dfg = metadata.groupby("song_id")[["stem_name", "track_inst"]]
pprint(dfg)
df = []
def make_stem_dict(song_id, track_inst, stem_names):
d = {"song_id": song_id}
for inst in possible_fine:
d[inst] = int(inst in track_inst)
for inst in possible_coarse:
d[inst] = int(inst in stem_names)
return d
for song_id, dfgg in dfg:
track_inst = dfgg["track_inst"].tolist()
track_inst = list(set(track_inst))
track_inst = [clean_track_inst(inst) for inst in track_inst]
stem_names = dfgg["stem_name"].tolist()
stem_names = list(set([clean_track_inst(inst) for inst in stem_names]))
d = make_stem_dict(song_id, track_inst, stem_names)
df.append(d)
print(df)
df = pd.DataFrame.from_records(df)
df.to_csv(os.path.join(os.path.dirname(data_root), "stems.csv"), index=False)
def get_dbfs(data_root="/home/kwatchar3/Documents/data/moisesdb/npy"):
npys = glob.glob(os.path.join(data_root, "**/*.npy"), recursive=True)
dbfs = []
for npy in tqdm(npys):
audio = np.load(npy)
song_id = os.path.basename(os.path.dirname(npy))
track_id = os.path.basename(npy).split(".")[0]
dbfs.append(
{
"song_id": song_id,
"track_id": track_id,
"dbfs": 10 * np.log10(np.mean(np.square(audio))),
}
)
dbfs = pd.DataFrame.from_records(dbfs)
dbfs.to_csv(os.path.join(os.path.dirname(data_root), "dbfs.csv"), index=False)
return dbfs
def get_dbfs_by_chunk_one(inout):
audio = np.load(inout.audio_path, mmap_mode="r")
chunk_size = inout.chunk_size
fs = inout.fs
hop_size = inout.hop_size
n_chan, n_samples = audio.shape
chunk_size_samples = int(chunk_size * fs)
hop_size_samples = int(hop_size * fs)
x2win = np.lib.stride_tricks.sliding_window_view(
np.square(audio), chunk_size_samples, axis=1
)[:, ::hop_size_samples, :]
x2win_mean = np.mean(x2win, axis=(0, 2))
x2win_mean[x2win_mean == 0] = 1e-8
dbfs = 10 * np.log10(x2win_mean)
# song_id = os.path.basename(os.path.dirname(inout.audio_path))
track_id = os.path.basename(inout.audio_path).split(".")[0]
np.save(
os.path.join(os.path.dirname(inout.audio_path), f"{track_id}.dbfs.npy"), dbfs
)
def clean_data_root(data_root="/home/kwatchar3/Documents/data/moisesdb/npy"):
npys = glob.glob(os.path.join(data_root, "**/*.npy"), recursive=True)
for npy in tqdm(npys):
if ".dbfs" in npy or ".query" in npy:
# print("removing", npy)
os.remove(npy)
#
def get_dbfs_by_chunk(
data_root="/home/kwatchar3/Documents/data/moisesdb/npy",
query_root="/home/kwatchar3/Documents/data/moisesdb/npyq",
):
npys = glob.glob(os.path.join(data_root, "**/*.npy"), recursive=True)
inout = [
SimpleNamespace(
audio_path=npy,
chunk_size=1,
hop_size=0.125,
fs=44100,
output_path=npy.replace(data_root, query_root).replace(
".npy", ".query.npy"
),
)
for npy in npys
]
process_map(get_dbfs_by_chunk_one, inout, chunksize=2)
def round_samples(seconds, fs, hop_size, downsample):
n_frames = math.ceil(seconds * fs / hop_size) + 1
n_frames_down = math.ceil(n_frames / downsample)
n_frames = n_frames_down * downsample
n_samples = (n_frames - 1) * hop_size
return int(n_samples)
def get_query_one(inout):
audio = np.load(inout.audio_path, mmap_mode="r")
chunk_size = inout.chunk_size
fs = inout.fs
output_path = inout.output_path
round = inout.round
hop_size = inout.hop_size
if round:
chunk_size_samples = round_samples(chunk_size, fs, 512, 2**6)
else:
chunk_size_samples = int(chunk_size * fs)
audio_mono = np.mean(audio, axis=0)
onset = librosa.onset.onset_detect(
y=audio_mono, sr=fs, units="frames", hop_length=hop_size
)
onset_strength = librosa.onset.onset_strength(
y=audio_mono, sr=fs, hop_length=hop_size
)
n_frames_per_chunk = chunk_size_samples // hop_size
onset_strength_slide = np.lib.stride_tricks.sliding_window_view(
onset_strength, n_frames_per_chunk, axis=0
)
onset_strength = np.mean(onset_strength_slide, axis=1)
max_onset_frame = np.argmax(onset_strength)
max_onset_samples = librosa.frames_to_samples(max_onset_frame)
track_id = os.path.basename(inout.audio_path).split(".")[0]
segment = audio[:, max_onset_samples : max_onset_samples + chunk_size_samples]
os.makedirs(os.path.dirname(output_path), exist_ok=True)
np.save(output_path, segment)
def get_query_from_onset(
data_root="/storage/home/hcoda1/1/kwatchar3/data/data/moisesdb/npy2", # "/home/kwatchar3/Documents/data/moisesdb/npy",
query_root="/storage/home/hcoda1/1/kwatchar3/data/data/moisesdb/npyq", # "/home/kwatchar3/Documents/data/moisesdb/npyq",
query_file="query-10s",
pmap=True,
):
npys = glob.glob(os.path.join(data_root, "**/*.npy"), recursive=True)
npys = [npy for npy in npys if "dbfs" not in npy]
inout = [
SimpleNamespace(
audio_path=npy,
chunk_size=10,
hop_size=512,
round=False,
fs=44100,
output_path=npy.replace(data_root, query_root).replace(
".npy", f".{query_file}.npy"
),
)
for npy in npys
]
if pmap:
process_map(get_query_one, inout, chunksize=2, max_workers=24)
else:
for io in tqdm(inout):
get_query_one(io)
def get_durations(data_root="/home/kwatchar3/Documents/data/moisesdb/npy"):
npys = glob.glob(os.path.join(data_root, "**/mixture.npy"), recursive=True)
durations = []
for npy in tqdm(npys):
audio = np.load(npy, mmap_mode="r")
song_id = os.path.basename(os.path.dirname(npy))
track_id = os.path.basename(npy).split(".")[0]
durations.append(
{
"song_id": song_id,
"track_id": track_id,
"duration": audio.shape[-1] / 44100,
}
)
durations = pd.DataFrame.from_records(durations)
durations.to_csv(
os.path.join(os.path.dirname(data_root), "durations.csv"), index=False
)
return durations
def clean_query_root(
data_root="/home/kwatchar3/Documents/data/moisesdb/npy",
query_root="/home/kwatchar3/Documents/data/moisesdb/npyq",
):
npys = glob.glob(os.path.join(data_root, "**/*.query.npy"), recursive=True)
for npy in tqdm(npys):
dst = npy.replace(data_root, query_root)
dstdir = os.path.dirname(dst)
os.makedirs(dstdir, exist_ok=True)
shutil.move(npy, dst)
def make_test_indices(
metadata_path="/storage/home/hcoda1/1/kwatchar3/data/data/moisesdb/metadata.csv",
stem_path="/storage/home/hcoda1/1/kwatchar3/data/data/moisesdb/stems.csv",
splits_path="/storage/home/hcoda1/1/kwatchar3/data/data/moisesdb/splits.csv",
test_split=5,
):
coarse_stems = set(taxonomy.keys())
fine_stems = set(chain(*taxonomy.values()))
metadata = pd.read_csv(metadata_path)
splits = pd.read_csv(splits_path)
stems = pd.read_csv(stem_path)
file_in_test = splits[splits["split"] == test_split]["song_id"].tolist()
stems_test = stems[stems["song_id"].isin(file_in_test)]
metadata_test = metadata[metadata["song_id"].isin(file_in_test)]
splits_test = splits[splits["split"] == test_split]
stems_test = stems_test.set_index("song_id")
metadata_test = metadata_test.drop_duplicates("song_id").set_index("song_id")
splits_test = splits_test.set_index("song_id")
stem_to_song_id = defaultdict(list)
song_id_to_stem = defaultdict(list)
for song_id in file_in_test:
stems_ = stems_test.loc[song_id]
stem_names = stems_.T
stem_names = stem_names[stem_names == 1].index.tolist()
for stem in stem_names:
stem_to_song_id[stem].append(song_id)
song_id_to_stem[song_id] = stem_names
indices = []
no_query = []
for song_id in file_in_test:
genre = metadata_test.loc[song_id, "genre"]
# print(genre)
artist = metadata_test.loc[song_id, "artist"]
# print(artist)
stems_ = song_id_to_stem[song_id]
for stem in stems_:
possible_query = stem_to_song_id[stem]
possible_query = [p for p in possible_query if p != song_id]
if len(possible_query) == 0:
print(f"No possible query for {song_id} with {stem}")
no_query.append(
{
"song_id": song_id,
"stem": stem
}
)
continue
query_df = metadata_test.loc[possible_query, ["genre", "artist"]]
assert len(query_df) > 0
query_df_ = query_df.copy()
same_genre = True
different_artist = True
query_df = query_df[(query_df["genre"] == genre) & (query_df["artist"] != artist)]
if len(query_df) == 0:
same_genre = False
different_artist = True
query_df = query_df_.copy()
query_df = query_df[(query_df["artist"] != artist)]
if len(query_df) == 0:
same_genre = True
different_artist = False
query_df = query_df_.copy()
query_df = query_df[(query_df["genre"] == genre)]
if len(query_df) == 0:
same_genre = False
different_artist = False
query_df = query_df_.copy()
query_id = query_df.sample(1).index[0]
indices.append(
{
"song_id": song_id,
"query_id": query_id,
"stem": stem,
"same_genre": same_genre,
"different_artist": different_artist
}
)
indices = pd.DataFrame.from_records(indices)
no_query = pd.DataFrame.from_records(no_query)
indices.to_csv(
os.path.join(os.path.dirname(metadata_path), "test_indices.csv"), index=False
)
no_query.to_csv(
os.path.join(os.path.dirname(metadata_path), "no_query.csv"), index=False
)
print("Total number of queries:", len(indices))
print("Total number of no queries:", len(no_query))
query_type = indices.groupby(["same_genre", "different_artist"]).size()
print(query_type)
if __name__ == "__main__":
import fire
fire.Fire()