from collections import defaultdict import glob import json import math import os import shutil from itertools import chain from pprint import pprint from types import SimpleNamespace import numpy as np import pandas as pd from omegaconf import OmegaConf from tqdm.contrib.concurrent import process_map from tqdm import tqdm as tdqm, tqdm import torchaudio as ta import librosa taxonomy = { "vocals": [ "lead male singer", "lead female singer", "human choir", "background vocals", "other (vocoder, beatboxing etc)", ], "bass": [ "bass guitar", "bass synthesizer (moog etc)", "contrabass/double bass (bass of instrings)", "tuba (bass of brass)", "bassoon (bass of woodwind)", ], "drums": [ "snare drum", "toms", "kick drum", "cymbals", "overheads", "full acoustic drumkit", "drum machine", "hi-hat" ], "other": [ "fx/processed sound, scratches, gun shots, explosions etc", "click track", ], "guitar": [ "clean electric guitar", "distorted electric guitar", "lap steel guitar or slide guitar", "acoustic guitar", ], "other plucked": ["banjo, mandolin, ukulele, harp etc"], "percussion": [ "a-tonal percussion (claps, shakers, congas, cowbell etc)", "pitched percussion (mallets, glockenspiel, ...)", ], "piano": [ "grand piano", "electric piano (rhodes, wurlitzer, piano sound alike)", ], "other keys": [ "organ, electric organ", "synth pad", "synth lead", "other sounds (hapischord, melotron etc)", ], "bowed strings": [ "violin (solo)", "viola (solo)", "cello (solo)", "violin section", "viola section", "cello section", "string section", "other strings", ], "wind": [ "brass (trumpet, trombone, french horn, brass etc)", "flutes (piccolo, bamboo flute, panpipes, flutes etc)", "reeds (saxophone, clarinets, oboe, english horn, bagpipe)", "other wind", ], } def clean_npy_other_vox(data_root="/storage/home/hcoda1/1/kwatchar3/data/data/moisesdb/npyq"): npys = glob.glob(os.path.join(data_root, "**/*.npy"), recursive=True) npys = [npy for npy in npys if "other" in npy] npys = [npy for npy in npys if "vdbo_" not in npy] npys = [npy for npy in npys if "other_" not in npy] stems = set([ os.path.basename(npy).split(".")[0] for npy in npys ]) assert len(stems) == 1 for npy in tqdm(npys): shutil.move(npy, npy.replace("other", "other_vocals")) def clean_track_inst(inst): if "vocoder" in inst: inst = "other_vocals" if "fx" in inst: inst = "fx" if "contrabass_double_bass" in inst: inst = "double_bass" if "banjo" in inst: return "other_plucked" if "(" in inst: inst = inst.split("(")[0] for s in [",", "-"]: if s in inst: inst = inst.replace(s, "") for s in ["/"]: if s in inst: inst = inst.replace(s, "_") if inst[-1] == "_": inst = inst[:-1] return inst taxonomy = { k.replace(" ", "_"): [clean_track_inst(i.replace(" ", "_")) for i in v] for k, v in taxonomy.items() } fine_to_coarse = {} for k, v in taxonomy.items(): for vv in v: fine_to_coarse[vv] = k # pprint(fine_to_coarse) def save_taxonomy(): with open("taxonomy.json", "w") as f: json.dump(taxonomy, f, indent=4) taxonomy_coarse = list(taxonomy.keys()) with open("taxonomy_coarse.json", "w") as f: json.dump(taxonomy_coarse, f, indent=4) taxonomy_fine = list(chain(*taxonomy.values())) count_ = defaultdict(int) for t in taxonomy_fine: count_[t] += 1 with open("taxonomy_fine.json", "w") as f: json.dump(taxonomy_fine, f, indent=4) possible_coarse = list(taxonomy.keys()) possible_fine = list(set(chain(*taxonomy.values()))) def trim_and_mix(audios, length_=None): length = min([a.shape[-1] for a in audios]) if length_ is not None: length = min(length, length_) audios = [a[..., :length] for a in audios] return np.sum(np.stack(audios, axis=0), axis=0), length def retrim_npys(saved_npy, new_length): print("retrimming") for npy in saved_npy: audio = np.load(npy) audio = audio[..., :new_length] np.save(npy, audio) def convert_one(inout): input_path = inout.input_path output_root = inout.output_root song_id = os.path.basename(input_path) output_root = os.path.join(output_root, song_id) os.makedirs(output_root, exist_ok=True) metadata = OmegaConf.load(os.path.join(input_path, "data.json")) stems = metadata.stems min_length = None saved_npy = [] all_tracks = [] other_tracks = [] outfile = None added_tracks = set() duplicated_tracks = set() track_to_stem = defaultdict(list) added_stems = set() duplicated_stems = set() stem_name_to_stems = defaultdict(list) for stem in stems: stem_name = stem.stemName stem_name_to_stems[stem_name].append(stem) for stem_name in tqdm(stem_name_to_stems): stem_tracks = [] for stem in stem_name_to_stems[stem_name]: stem_name = stem.stemName if stem_name in added_stems: print(f"Duplicate stem {stem_name} in {song_id}") duplicated_stems.add(stem_name) added_stems.add(stem_name) for track in stem.tracks: track_inst = track.trackType track_inst = clean_track_inst(track_inst) if track_inst in added_tracks: if stem_name in track_to_stem[track_inst]: continue print(f"Duplicate track {track_inst} in {song_id}") print(f"Stems: {track_to_stem[track_inst]}") duplicated_tracks.add(track_inst) raise ValueError else: added_tracks.add(track_inst) track_to_stem[track_inst].append(stem_name) track_id = track.id audio, fs = ta.load(os.path.join(input_path, stem_name, f"{track_id}.wav")) if fs != 44100: print(f"fs is {fs} for {track_id}") with open(os.path.join(output_root, "fs.txt"), "w") as f: f.write(f"{song_id}\t{track_id}\t{fs}\n") if min_length is None: min_length = audio.shape[-1] else: if audio.shape[-1] < min_length: min_length = audio.shape[-1] if len(saved_npy) > 0: retrim_npys(saved_npy, min_length) audio = audio[..., :min_length] audio = audio.numpy() audio = audio.astype(np.float32) if audio.shape[0] == 1: print("mono") if audio.shape[0] > 2: print("multi channel") assert outfile is None outfile = os.path.join(output_root, f"{track_inst}.npy") np.save(outfile, audio) saved_npy.append(outfile) outfile = None stem_tracks.append(audio) audio = None stem_track, min_length = trim_and_mix(stem_tracks) assert outfile is None outfile = os.path.join(output_root, f"{stem_name}.npy") np.save(outfile, stem_track) saved_npy.append(outfile) outfile = None all_tracks.append(stem_track) if stem_name not in ["vocals", "drums", "bass"]: # print(f"Putting {stem_name} in other") other_tracks.append(stem_track) assert outfile is None all_track, min_length_ = trim_and_mix(all_tracks, min_length) outfile = os.path.join(output_root, f"mixture.npy") np.save(outfile, all_track) if min_length_ != min_length: retrim_npys(saved_npy, min_length_) min_length = min_length_ saved_npy.append(outfile) outfile = None other_track, min_length_ = trim_and_mix(other_tracks, min_length) np.save(os.path.join(output_root, f"vdbo_others.npy"), other_track) if min_length_ != min_length: retrim_npys(saved_npy, min_length_) min_length = min_length_ def convert_to_npy( data_root="/storage/home/hcoda1/1/kwatchar3/data/data/moisesdb/canonical", output_root="/storage/home/hcoda1/1/kwatchar3/data/data/moisesdb/npy2", ): if output_root is None: output_root = os.path.join(os.path.dirname(data_root), "npy") files = os.listdir(data_root) files = [ os.path.join(data_root, f) for f in files if os.path.isdir(os.path.join(data_root, f)) ] inout = [SimpleNamespace(input_path=f, output_root=output_root) for f in files] process_map(convert_one, inout) # for io in tdqm(inout): # convert_one(io) def make_others_one(input_path, dry_run=False): other_stems = [k for k in taxonomy.keys() if k not in ["vocals", "bass", "drums"]] npys = glob.glob(os.path.join(input_path, "**/*.npy"), recursive=True) npys = [npy for npy in npys if ".dbfs" not in npy] npys = [npy for npy in npys if ".query" not in npy] npys = [npy for npy in npys if "mixture" not in npy] npys = [npy for npy in npys if os.path.basename(npy).split(".")[0] in other_stems] print(f"Using stems: {[os.path.basename(npy).split('.')[0] for npy in npys]}") if len(npys) == 0: audio = np.zeros_like(np.load(os.path.join(input_path, "mixture.npy"))) else: audio = [np.load(npy) for npy in npys] audio = np.sum(np.stack(audio, axis=0), axis=0) assert audio.shape[0] == 2 output = os.path.join(input_path, "vdbo_others.npy") if dry_run: return np.save(output, audio) def check_vdbo_one(f): s = np.sum( np.stack( [ np.load(os.path.join(f, s + ".npy")) for s in ["vocals", "drums", "bass", "vdbo_others"] if os.path.exists(os.path.join(f, s + ".npy")) ], axis=0, ), axis=0, ) m = np.load(os.path.join(f, "mixture.npy")) snr = 10 * np.log10(np.mean(np.square(m)) / np.mean(np.square(s - m))) print(snr) return snr def check_vdbo(data_root="/storage/home/hcoda1/1/kwatchar3/data/data/moisesdb/npy2"): files = os.listdir(data_root) files = [ os.path.join(data_root, f) for f in files if os.path.isdir(os.path.join(data_root, f)) ] snrs = process_map(check_vdbo_one, files) np.save("/storage/home/hcoda1/1/kwatchar3/data/vdbo.npy", np.array(snrs)) def make_others(data_root="/storage/home/hcoda1/1/kwatchar3/data/data/moisesdb/npy2"): files = os.listdir(data_root) files = [ os.path.join(data_root, f) for f in files if os.path.isdir(os.path.join(data_root, f)) ] process_map(make_others_one, files) # for f in tqdm(files): # make_others_one(f, dry_run=False) def extract_metadata_one(input_path): song_id = os.path.basename(input_path) metadata = OmegaConf.load(os.path.join(input_path, "data.json")) song = metadata.song artist = metadata.artist genre = metadata.genre stems = metadata.stems data_out = [] for stem in stems: stem_name = stem.stemName stem_id = stem.id for track in stem.tracks: track_inst = track.trackType track_id = track.id data_out.append( { "song_id": song_id, "song": song, "artist": artist, "genre": genre, "stem_name": stem_name, "stem_id": stem_id, "track_inst": track_inst, "track_id": track_id, "has_bleed": track.has_bleed, } ) return data_out def consolidate_metadata( data_root="/home/kwatchar3/Documents/data/moisesdb/canonical", ): files = os.listdir(data_root) files = [ os.path.join(data_root, f) for f in files if os.path.isdir(os.path.join(data_root, f)) ] data = process_map(extract_metadata_one, files) df = pd.DataFrame.from_records(list(chain(*data))) df.to_csv(os.path.join(os.path.dirname(data_root), "metadata.csv"), index=False) def clean_canonical(data_root="/home/kwatchar3/Documents/data/moisesdb/canonical"): npy = glob.glob(os.path.join(data_root, "**/*.npy"), recursive=True) for n in tqdm(npy): os.remove(n) def remove_dbfs(data_root="/storage/home/hcoda1/1/kwatchar3/data/data/moisesdb/npy"): npy = glob.glob(os.path.join(data_root, "**/*.dbfs.npy"), recursive=True) for n in tqdm(npy): os.remove(n) def make_split( metadata_path="/home/kwatchar3/Documents/data/moisesdb/metadata.csv", n_splits=5, seed=42, ): df = pd.read_csv(metadata_path) # print(df.columns) df = df[["song_id", "genre"]].drop_duplicates() genres = df["genre"].value_counts() genres_map = {g: g if c > n_splits else "other" for g, c in genres.items()} df["genre"] = df["genre"].map(genres_map) n_samples = len(df) n_per_split = n_samples // n_splits np.random.seed(seed) from sklearn.model_selection import train_test_split splits = [] df_ = df.copy() for i in range(n_splits - 1): df_, test = train_test_split( df_, test_size=n_per_split, random_state=seed, stratify=df_["genre"], shuffle=True, ) dfs = test[["song_id"]].copy().sort_values(by="song_id") dfs["split"] = i + 1 splits.append(dfs) test = df_ dfs = test[["song_id"]].copy().sort_values(by="song_id") dfs["split"] = n_splits splits.append(dfs) splits = pd.concat(splits) splits.to_csv( os.path.join(os.path.dirname(metadata_path), "splits.csv"), index=False ) def consolidate_stems(data_root="/home/kwatchar3/Documents/data/moisesdb/npy"): metadata = pd.read_csv(os.path.join(os.path.dirname(data_root), "metadata.csv")) dfg = metadata.groupby("song_id")[["stem_name", "track_inst"]] pprint(dfg) df = [] def make_stem_dict(song_id, track_inst, stem_names): d = {"song_id": song_id} for inst in possible_fine: d[inst] = int(inst in track_inst) for inst in possible_coarse: d[inst] = int(inst in stem_names) return d for song_id, dfgg in dfg: track_inst = dfgg["track_inst"].tolist() track_inst = list(set(track_inst)) track_inst = [clean_track_inst(inst) for inst in track_inst] stem_names = dfgg["stem_name"].tolist() stem_names = list(set([clean_track_inst(inst) for inst in stem_names])) d = make_stem_dict(song_id, track_inst, stem_names) df.append(d) print(df) df = pd.DataFrame.from_records(df) df.to_csv(os.path.join(os.path.dirname(data_root), "stems.csv"), index=False) def get_dbfs(data_root="/home/kwatchar3/Documents/data/moisesdb/npy"): npys = glob.glob(os.path.join(data_root, "**/*.npy"), recursive=True) dbfs = [] for npy in tqdm(npys): audio = np.load(npy) song_id = os.path.basename(os.path.dirname(npy)) track_id = os.path.basename(npy).split(".")[0] dbfs.append( { "song_id": song_id, "track_id": track_id, "dbfs": 10 * np.log10(np.mean(np.square(audio))), } ) dbfs = pd.DataFrame.from_records(dbfs) dbfs.to_csv(os.path.join(os.path.dirname(data_root), "dbfs.csv"), index=False) return dbfs def get_dbfs_by_chunk_one(inout): audio = np.load(inout.audio_path, mmap_mode="r") chunk_size = inout.chunk_size fs = inout.fs hop_size = inout.hop_size n_chan, n_samples = audio.shape chunk_size_samples = int(chunk_size * fs) hop_size_samples = int(hop_size * fs) x2win = np.lib.stride_tricks.sliding_window_view( np.square(audio), chunk_size_samples, axis=1 )[:, ::hop_size_samples, :] x2win_mean = np.mean(x2win, axis=(0, 2)) x2win_mean[x2win_mean == 0] = 1e-8 dbfs = 10 * np.log10(x2win_mean) # song_id = os.path.basename(os.path.dirname(inout.audio_path)) track_id = os.path.basename(inout.audio_path).split(".")[0] np.save( os.path.join(os.path.dirname(inout.audio_path), f"{track_id}.dbfs.npy"), dbfs ) def clean_data_root(data_root="/home/kwatchar3/Documents/data/moisesdb/npy"): npys = glob.glob(os.path.join(data_root, "**/*.npy"), recursive=True) for npy in tqdm(npys): if ".dbfs" in npy or ".query" in npy: # print("removing", npy) os.remove(npy) # def get_dbfs_by_chunk( data_root="/home/kwatchar3/Documents/data/moisesdb/npy", query_root="/home/kwatchar3/Documents/data/moisesdb/npyq", ): npys = glob.glob(os.path.join(data_root, "**/*.npy"), recursive=True) inout = [ SimpleNamespace( audio_path=npy, chunk_size=1, hop_size=0.125, fs=44100, output_path=npy.replace(data_root, query_root).replace( ".npy", ".query.npy" ), ) for npy in npys ] process_map(get_dbfs_by_chunk_one, inout, chunksize=2) def round_samples(seconds, fs, hop_size, downsample): n_frames = math.ceil(seconds * fs / hop_size) + 1 n_frames_down = math.ceil(n_frames / downsample) n_frames = n_frames_down * downsample n_samples = (n_frames - 1) * hop_size return int(n_samples) def get_query_one(inout): audio = np.load(inout.audio_path, mmap_mode="r") chunk_size = inout.chunk_size fs = inout.fs output_path = inout.output_path round = inout.round hop_size = inout.hop_size if round: chunk_size_samples = round_samples(chunk_size, fs, 512, 2**6) else: chunk_size_samples = int(chunk_size * fs) audio_mono = np.mean(audio, axis=0) onset = librosa.onset.onset_detect( y=audio_mono, sr=fs, units="frames", hop_length=hop_size ) onset_strength = librosa.onset.onset_strength( y=audio_mono, sr=fs, hop_length=hop_size ) n_frames_per_chunk = chunk_size_samples // hop_size onset_strength_slide = np.lib.stride_tricks.sliding_window_view( onset_strength, n_frames_per_chunk, axis=0 ) onset_strength = np.mean(onset_strength_slide, axis=1) max_onset_frame = np.argmax(onset_strength) max_onset_samples = librosa.frames_to_samples(max_onset_frame) track_id = os.path.basename(inout.audio_path).split(".")[0] segment = audio[:, max_onset_samples : max_onset_samples + chunk_size_samples] os.makedirs(os.path.dirname(output_path), exist_ok=True) np.save(output_path, segment) def get_query_from_onset( data_root="/storage/home/hcoda1/1/kwatchar3/data/data/moisesdb/npy2", # "/home/kwatchar3/Documents/data/moisesdb/npy", query_root="/storage/home/hcoda1/1/kwatchar3/data/data/moisesdb/npyq", # "/home/kwatchar3/Documents/data/moisesdb/npyq", query_file="query-10s", pmap=True, ): npys = glob.glob(os.path.join(data_root, "**/*.npy"), recursive=True) npys = [npy for npy in npys if "dbfs" not in npy] inout = [ SimpleNamespace( audio_path=npy, chunk_size=10, hop_size=512, round=False, fs=44100, output_path=npy.replace(data_root, query_root).replace( ".npy", f".{query_file}.npy" ), ) for npy in npys ] if pmap: process_map(get_query_one, inout, chunksize=2, max_workers=24) else: for io in tqdm(inout): get_query_one(io) def get_durations(data_root="/home/kwatchar3/Documents/data/moisesdb/npy"): npys = glob.glob(os.path.join(data_root, "**/mixture.npy"), recursive=True) durations = [] for npy in tqdm(npys): audio = np.load(npy, mmap_mode="r") song_id = os.path.basename(os.path.dirname(npy)) track_id = os.path.basename(npy).split(".")[0] durations.append( { "song_id": song_id, "track_id": track_id, "duration": audio.shape[-1] / 44100, } ) durations = pd.DataFrame.from_records(durations) durations.to_csv( os.path.join(os.path.dirname(data_root), "durations.csv"), index=False ) return durations def clean_query_root( data_root="/home/kwatchar3/Documents/data/moisesdb/npy", query_root="/home/kwatchar3/Documents/data/moisesdb/npyq", ): npys = glob.glob(os.path.join(data_root, "**/*.query.npy"), recursive=True) for npy in tqdm(npys): dst = npy.replace(data_root, query_root) dstdir = os.path.dirname(dst) os.makedirs(dstdir, exist_ok=True) shutil.move(npy, dst) def make_test_indices( metadata_path="/storage/home/hcoda1/1/kwatchar3/data/data/moisesdb/metadata.csv", stem_path="/storage/home/hcoda1/1/kwatchar3/data/data/moisesdb/stems.csv", splits_path="/storage/home/hcoda1/1/kwatchar3/data/data/moisesdb/splits.csv", test_split=5, ): coarse_stems = set(taxonomy.keys()) fine_stems = set(chain(*taxonomy.values())) metadata = pd.read_csv(metadata_path) splits = pd.read_csv(splits_path) stems = pd.read_csv(stem_path) file_in_test = splits[splits["split"] == test_split]["song_id"].tolist() stems_test = stems[stems["song_id"].isin(file_in_test)] metadata_test = metadata[metadata["song_id"].isin(file_in_test)] splits_test = splits[splits["split"] == test_split] stems_test = stems_test.set_index("song_id") metadata_test = metadata_test.drop_duplicates("song_id").set_index("song_id") splits_test = splits_test.set_index("song_id") stem_to_song_id = defaultdict(list) song_id_to_stem = defaultdict(list) for song_id in file_in_test: stems_ = stems_test.loc[song_id] stem_names = stems_.T stem_names = stem_names[stem_names == 1].index.tolist() for stem in stem_names: stem_to_song_id[stem].append(song_id) song_id_to_stem[song_id] = stem_names indices = [] no_query = [] for song_id in file_in_test: genre = metadata_test.loc[song_id, "genre"] # print(genre) artist = metadata_test.loc[song_id, "artist"] # print(artist) stems_ = song_id_to_stem[song_id] for stem in stems_: possible_query = stem_to_song_id[stem] possible_query = [p for p in possible_query if p != song_id] if len(possible_query) == 0: print(f"No possible query for {song_id} with {stem}") no_query.append( { "song_id": song_id, "stem": stem } ) continue query_df = metadata_test.loc[possible_query, ["genre", "artist"]] assert len(query_df) > 0 query_df_ = query_df.copy() same_genre = True different_artist = True query_df = query_df[(query_df["genre"] == genre) & (query_df["artist"] != artist)] if len(query_df) == 0: same_genre = False different_artist = True query_df = query_df_.copy() query_df = query_df[(query_df["artist"] != artist)] if len(query_df) == 0: same_genre = True different_artist = False query_df = query_df_.copy() query_df = query_df[(query_df["genre"] == genre)] if len(query_df) == 0: same_genre = False different_artist = False query_df = query_df_.copy() query_id = query_df.sample(1).index[0] indices.append( { "song_id": song_id, "query_id": query_id, "stem": stem, "same_genre": same_genre, "different_artist": different_artist } ) indices = pd.DataFrame.from_records(indices) no_query = pd.DataFrame.from_records(no_query) indices.to_csv( os.path.join(os.path.dirname(metadata_path), "test_indices.csv"), index=False ) no_query.to_csv( os.path.join(os.path.dirname(metadata_path), "no_query.csv"), index=False ) print("Total number of queries:", len(indices)) print("Total number of no queries:", len(no_query)) query_type = indices.groupby(["same_genre", "different_artist"]).size() print(query_type) if __name__ == "__main__": import fire fire.Fire()