beatalignment / alignment.py

Upload folder using huggingface_hub

151b875 verified 9 months ago

9.33 kB

	import numpy as np
	import scipy.interpolate
	from anticipation.convert import midi_to_events
	from anticipation.config import *
	from anticipation.vocab import *
	from itertools import combinations

	def load_annotation_file(file_path):
	annotations = []

	with open(file_path, 'r') as f:
	for line in f:
	parts = line.strip().split('\t')
	if len(parts) >= 3:
	timestamp = float(parts[0])
	annotation = parts[2]
	annotations.append((timestamp, annotation))

	return annotations

	def compare_annotations(file1_path, file2_path, interpolate=True):
	"""
	Creates a mapping between downbeat and beat times in two annotation files.
	Inputs are timestamps in the first file, outputs are timestamps in the second file
	"""

	annotations1 = load_annotation_file(file1_path)
	annotations2 = load_annotation_file(file2_path)

	min_length = min(len(annotations1), len(annotations2))
	if len(annotations1) != len(annotations2):
	shorter_file = file1_path if len(annotations1) == min_length else file2_path
	print(f'Number of annotations in {file1_path} and {file2_path} do not match.')
	print(f"Proceeding with the first {min_length} annotations from {shorter_file}.")


	data = []

	for i in range(min_length):
	data.append((annotations1[i][0], annotations2[i][0]))

	x,y = list(zip(*data))

	if interpolate:
	map = scipy.interpolate.interp1d(x, y)

	return map

	else:
	return x,y

	def power_set(lst, min_length=2, max_length=6):
	result = []
	# Only iterate from min_length to max_length (inclusive)
	for i in range(min_length, min(max_length + 1, len(lst) + 1)):
	result.extend(combinations(lst, i))
	return result

	def align_tokens(file1, file2, file3, file4, skip_Nones=True):
	# turn midi into events, without quantizing so we can get 16 digits of precision in arrival time
	perf = midi_to_events(file1, quantize=False)
	score = midi_to_events(file2, quantize=False)

	p_beats, s_beats = compare_annotations(file3,file4,interpolate=False)
	s_beats = np.array(s_beats)
	p_beats = np.array(p_beats)
	map = compare_annotations(file3, file4)

	# create tuples, scaling arrival time back to seconds, which is the unit the annotation mapping uses
	p_tuples = [[perf[3i]/TIME_RESOLUTION, perf[3i+1] - DUR_OFFSET, perf[3*i+2] - NOTE_OFFSET] for i in range(int(len(perf)/3))]
	s_tuples = [[score[3i]/TIME_RESOLUTION, score[3i+1] - DUR_OFFSET, score[3*i+2] - NOTE_OFFSET] for i in range(int(len(score)/3))]
	p_times = [tup[0] for tup in p_tuples]
	s_times = [tup[0] for tup in s_tuples]

	tol = 1e-4

	# match score notes with corresponding beats in annotation file
	s_tuples_b = []
	assigned = []

	for tup in s_tuples:
	mask = np.abs(tup[0] - s_beats) <= tol
	if sum(mask):
	beat = list(np.where(mask)[0])[0]
	s_tuples_b.append((tup[0], tup[1], tup[2], beat))
	assigned.append(beat)
	else:
	s_tuples_b.append(tup)

	for i in range(len(s_beats)):
	if i not in assigned:
	print(f'could not find notes in score associated with beat {i}')

	# match perf notes with corresponding beats in annotation file
	p_tuples_b = []
	assigned = []

	for tup in p_tuples:
	mask = np.abs(tup[0] - p_beats) <= tol
	if sum(mask):
	beat = list(np.where(mask)[0])[0]
	p_tuples_b.append((tup[0], tup[1], tup[2], beat))
	assigned.append(beat)
	else:
	p_tuples_b.append(tup)

	for j in [i for i in range(len(p_beats)) if i not in assigned]:
	beat = p_beats[j]

	candidates = [tup[0] for tup in p_tuples_b if len(tup)==3 and abs(tup[0]-beat)<=0.5]

	success = False
	for subset in power_set(candidates):
	if np.abs(np.average(subset) - beat) <= tol:
	for time in subset:
	k = p_times.index(time)
	p_tuples_b[k] = (p_tuples_b[k][0], p_tuples_b[k][1], p_tuples_b[k][2], j)
	success = True
	# print(f'at beat {j} succeeded in finding notes at times {subset} with average time {np.average(subset)} close to {beat}')
	break
	if not success:
	print(f'could not find notes in perf associated with beat {j}')

	# match score and perf notes that occurred on the same beats, then between
	# (almost all correctly) matched beats, we want to use the map to match off-beat notes
	# outside of the mapping range and domain, just match notes with the same pitch
	matched_tuples = []

	s_tuples_b_copy = s_tuples_b.copy()

	p_min = map.x.min()
	p_max = map.x.max()
	s_min = map.y.min()
	s_max = map.y.max()

	for i, p_tuple in enumerate(p_tuples_b):
	for j, s_tuple in enumerate(s_tuples_b_copy):

	p_time, p_note = p_tuple[0], p_tuple[2]
	s_time, s_note = s_tuple[0], s_tuple[2]

	k = s_tuples_b.index(s_tuple)

	if len(p_tuple) == 4 and len(s_tuple) == 4 and p_tuple[2:] == s_tuple[2:]:
	matched_tuples.append([p_tuple,i,s_tuple,k])
	s_tuples_b_copy.remove(s_tuple)
	elif len(p_tuple) == 3 and len(s_tuple) == 3 and p_time < p_min and s_time < s_min and p_note == s_note:
	matched_tuples.append([p_tuple,i,s_tuple,k])
	s_tuples_b_copy.remove(s_tuple)
	elif len(p_tuple) == 3 and len(s_tuple) == 3 and p_time > p_max and s_time > s_max and p_note == s_note:
	matched_tuples.append([p_tuple,i,s_tuple,k])
	s_tuples_b_copy.remove(s_tuple)
	elif len(p_tuple) == 3 and len(s_tuple) == 3 and p_min <= p_time <= p_max and s_min <= s_time <= s_max and \
	np.abs(map(p_time) - s_time) < .1 and p_note == s_note:
	matched_tuples.append([p_tuple,i,s_tuple,k])
	s_tuples_b_copy.remove(s_tuple)
	if p_tuple not in [l[0] for l in matched_tuples] and not skip_Nones:
	matched_tuples.append([p_tuple,i,[None,None,None],None])

	# revert back to token format and remove beat indices
	for i, l in enumerate(matched_tuples):
	# performance tokens should have control offset
	l[0] = [round(l[0][0]*TIME_RESOLUTION), l[0][1]+DUR_OFFSET, l[0][2]+NOTE_OFFSET]
	l[0] = [CONTROL_OFFSET + t for t in l[0]]

	if l[2][0] != None:
	l[2] = [round(l[2][0]*TIME_RESOLUTION), l[2][1]+DUR_OFFSET, l[2][2]+NOTE_OFFSET]
	matched_tuples[i] = l

	return matched_tuples

	def align_tokens2(file1, file2, file3, file4, skip_Nones=True, thres=0.1):
	# turn midi into events, without quantizing so we can get 16 digits of precision in arrival time
	perf = midi_to_events(file1, quantize=False)
	score = midi_to_events(file2, quantize=False)

	p_beats, s_beats = compare_annotations(file3,file4,interpolate=False)
	s_beats = np.array(s_beats)
	p_beats = np.array(p_beats)
	map = compare_annotations(file3, file4)

	# create tuples, scaling arrival time back to seconds, which is the unit the annotation mapping uses
	p_tuples = [[perf[3i]/TIME_RESOLUTION, perf[3i+1] - DUR_OFFSET, perf[3*i+2] - NOTE_OFFSET] for i in range(int(len(perf)/3))]
	s_tuples = [[score[3i]/TIME_RESOLUTION, score[3i+1] - DUR_OFFSET, score[3*i+2] - NOTE_OFFSET] for i in range(int(len(score)/3))]

	matched_tuples = []

	s_tuples_copy = s_tuples.copy()

	p_min = map.x.min()
	p_max = map.x.max()

	for i, p_tuple in enumerate(p_tuples):
	best_dist = np.inf
	best_match = [None, None, None]
	best_index = None

	p_time, p_note = p_tuple[0], p_tuple[2]

	if p_min <= p_time <= p_max:

	for j, s_tuple in enumerate(s_tuples_copy):

	s_time, s_note = s_tuple[0], s_tuple[2]

	k = s_tuples.index(s_tuple)

	dist = np.abs(map(p_time) - s_time)

	if p_note != s_note:
	continue # not a match (wrong pitch)

	if dist <= thres and dist <= best_dist: # found a possible match
	best_dist = dist
	best_match = s_tuple
	best_index = k

	if best_index is not None:
	matched_tuples.append([p_tuple,i,best_match,best_index])
	s_tuples_copy.remove(best_match)
	elif not skip_Nones:
	matched_tuples.append([p_tuple,i,best_match,best_index])


	# revert back to token format and remove beat indices
	for i, l in enumerate(matched_tuples):
	# performance tokens should have control offset
	l[0] = [round(l[0][0]*TIME_RESOLUTION), l[0][1]+DUR_OFFSET, l[0][2]+NOTE_OFFSET]
	l[0] = [CONTROL_OFFSET + t for t in l[0]]

	if l[2][0] != None:
	l[2] = [round(l[2][0]*TIME_RESOLUTION), l[2][1]+DUR_OFFSET, l[2][2]+NOTE_OFFSET]
	matched_tuples[i] = l

	return matched_tuples