File size: 9,329 Bytes

151b875

import numpy as np
import scipy.interpolate
from anticipation.convert import midi_to_events
from anticipation.config import *
from anticipation.vocab import *
from itertools import combinations

def load_annotation_file(file_path):
    annotations = []
    
    with open(file_path, 'r') as f:
        for line in f:
            parts = line.strip().split('\t')
            if len(parts) >= 3:
                timestamp = float(parts[0])
                annotation = parts[2]
                annotations.append((timestamp, annotation))
    
    return annotations

def compare_annotations(file1_path, file2_path, interpolate=True):
    """

    Creates a mapping between downbeat and beat times in two annotation files.

    Inputs are timestamps in the first file, outputs are timestamps in the second file

    """

    annotations1 = load_annotation_file(file1_path)
    annotations2 = load_annotation_file(file2_path)

    min_length = min(len(annotations1), len(annotations2))
    if len(annotations1) != len(annotations2):
        shorter_file = file1_path if len(annotations1) == min_length else file2_path
        print(f'Number of annotations in {file1_path} and {file2_path} do not match.')
        print(f"Proceeding with the first {min_length} annotations from {shorter_file}.")

    
    data = []

    for i in range(min_length):   
        data.append((annotations1[i][0], annotations2[i][0]))

    x,y = list(zip(*data))

    if interpolate:
        map = scipy.interpolate.interp1d(x, y)
        
        return map

    else:
        return x,y
    
def power_set(lst, min_length=2, max_length=6):
    result = []
    # Only iterate from min_length to max_length (inclusive)
    for i in range(min_length, min(max_length + 1, len(lst) + 1)):
        result.extend(combinations(lst, i))
    return result
    
def align_tokens(file1, file2, file3, file4, skip_Nones=True):
    # turn midi into events, without quantizing so we can get 16 digits of precision in arrival time
    perf = midi_to_events(file1, quantize=False)
    score = midi_to_events(file2, quantize=False)

    p_beats, s_beats = compare_annotations(file3,file4,interpolate=False)
    s_beats = np.array(s_beats)
    p_beats = np.array(p_beats)
    map = compare_annotations(file3, file4)

    # create tuples, scaling arrival time back to seconds, which is the unit the annotation mapping uses
    p_tuples = [[perf[3*i]/TIME_RESOLUTION, perf[3*i+1] - DUR_OFFSET, perf[3*i+2] - NOTE_OFFSET] for i in range(int(len(perf)/3))]
    s_tuples = [[score[3*i]/TIME_RESOLUTION, score[3*i+1] - DUR_OFFSET, score[3*i+2] - NOTE_OFFSET] for i in range(int(len(score)/3))]
    p_times = [tup[0] for tup in p_tuples]
    s_times = [tup[0] for tup in s_tuples]

    tol = 1e-4

    # match score notes with corresponding beats in annotation file
    s_tuples_b = []
    assigned = []

    for tup in s_tuples:
        mask = np.abs(tup[0] - s_beats) <= tol
        if sum(mask):
            beat = list(np.where(mask)[0])[0]
            s_tuples_b.append((tup[0], tup[1], tup[2], beat))
            assigned.append(beat)        
        else:
            s_tuples_b.append(tup)

    for i in range(len(s_beats)):
        if i not in assigned:
            print(f'could not find notes in score associated with beat {i}')

    # match perf notes with corresponding beats in annotation file
    p_tuples_b = []
    assigned = []

    for tup in p_tuples:
        mask = np.abs(tup[0] - p_beats) <= tol
        if sum(mask):
            beat = list(np.where(mask)[0])[0]
            p_tuples_b.append((tup[0], tup[1], tup[2], beat))
            assigned.append(beat)
        else:
            p_tuples_b.append(tup)

    for j in [i for i in range(len(p_beats)) if i not in assigned]:
        beat = p_beats[j]

        candidates = [tup[0] for tup in p_tuples_b if len(tup)==3 and abs(tup[0]-beat)<=0.5]

        success = False
        for subset in power_set(candidates):
            if np.abs(np.average(subset) - beat) <= tol:
                for time in subset:
                    k = p_times.index(time)
                    p_tuples_b[k] = (p_tuples_b[k][0], p_tuples_b[k][1], p_tuples_b[k][2], j)
                success = True
                # print(f'at beat {j} succeeded in finding notes at times {subset} with average time {np.average(subset)} close to {beat}')
                break
        if not success:
            print(f'could not find notes in perf associated with beat {j}')

    # match score and perf notes that occurred on the same beats, then between 
    # (almost all correctly) matched beats, we want to use the map to match off-beat notes
    # outside of the mapping range and domain, just match notes with the same pitch
    matched_tuples = []

    s_tuples_b_copy = s_tuples_b.copy()

    p_min = map.x.min()
    p_max = map.x.max()
    s_min = map.y.min()
    s_max = map.y.max()

    for i, p_tuple in enumerate(p_tuples_b):
        for j, s_tuple in enumerate(s_tuples_b_copy):

            p_time, p_note = p_tuple[0], p_tuple[2]
            s_time, s_note = s_tuple[0], s_tuple[2] 

            k = s_tuples_b.index(s_tuple)

            if len(p_tuple) == 4 and len(s_tuple) == 4 and p_tuple[2:] == s_tuple[2:]:
                matched_tuples.append([p_tuple,i,s_tuple,k])
                s_tuples_b_copy.remove(s_tuple)
            elif len(p_tuple) == 3 and len(s_tuple) == 3 and p_time < p_min and s_time < s_min and p_note == s_note:
                matched_tuples.append([p_tuple,i,s_tuple,k])
                s_tuples_b_copy.remove(s_tuple)
            elif len(p_tuple) == 3 and len(s_tuple) == 3 and p_time > p_max and s_time > s_max and p_note == s_note:
                matched_tuples.append([p_tuple,i,s_tuple,k])
                s_tuples_b_copy.remove(s_tuple)
            elif len(p_tuple) == 3 and len(s_tuple) == 3 and p_min <= p_time <= p_max and s_min <= s_time <= s_max and \
                np.abs(map(p_time) - s_time) < .1 and p_note == s_note:
                matched_tuples.append([p_tuple,i,s_tuple,k])
                s_tuples_b_copy.remove(s_tuple)
        if p_tuple not in [l[0] for l in matched_tuples] and not skip_Nones:
            matched_tuples.append([p_tuple,i,[None,None,None],None])

    # revert back to token format and remove beat indices
    for i, l in enumerate(matched_tuples):
        # performance tokens should have control offset
        l[0] = [round(l[0][0]*TIME_RESOLUTION), l[0][1]+DUR_OFFSET, l[0][2]+NOTE_OFFSET]
        l[0] = [CONTROL_OFFSET + t for t in l[0]]

        if l[2][0] != None:
            l[2] = [round(l[2][0]*TIME_RESOLUTION), l[2][1]+DUR_OFFSET, l[2][2]+NOTE_OFFSET]
        matched_tuples[i] = l

    return matched_tuples

def align_tokens2(file1, file2, file3, file4, skip_Nones=True, thres=0.1):
    # turn midi into events, without quantizing so we can get 16 digits of precision in arrival time
    perf = midi_to_events(file1, quantize=False)
    score = midi_to_events(file2, quantize=False)

    p_beats, s_beats = compare_annotations(file3,file4,interpolate=False)
    s_beats = np.array(s_beats)
    p_beats = np.array(p_beats)
    map = compare_annotations(file3, file4)

    # create tuples, scaling arrival time back to seconds, which is the unit the annotation mapping uses
    p_tuples = [[perf[3*i]/TIME_RESOLUTION, perf[3*i+1] - DUR_OFFSET, perf[3*i+2] - NOTE_OFFSET] for i in range(int(len(perf)/3))]
    s_tuples = [[score[3*i]/TIME_RESOLUTION, score[3*i+1] - DUR_OFFSET, score[3*i+2] - NOTE_OFFSET] for i in range(int(len(score)/3))]

    matched_tuples = []

    s_tuples_copy = s_tuples.copy()

    p_min = map.x.min()
    p_max = map.x.max()

    for i, p_tuple in enumerate(p_tuples):
        best_dist = np.inf
        best_match = [None, None, None]
        best_index = None

        p_time, p_note = p_tuple[0], p_tuple[2]

        if p_min <= p_time <= p_max:

            for j, s_tuple in enumerate(s_tuples_copy):

                s_time, s_note = s_tuple[0], s_tuple[2] 

                k = s_tuples.index(s_tuple)

                dist = np.abs(map(p_time) - s_time)

                if p_note != s_note:
                    continue # not a match (wrong pitch)

                if dist <= thres and dist <= best_dist: # found a possible match
                    best_dist = dist
                    best_match = s_tuple
                    best_index = k

        if best_index is not None:
            matched_tuples.append([p_tuple,i,best_match,best_index])
            s_tuples_copy.remove(best_match)
        elif not skip_Nones:
            matched_tuples.append([p_tuple,i,best_match,best_index])


    # revert back to token format and remove beat indices
    for i, l in enumerate(matched_tuples):
        # performance tokens should have control offset
        l[0] = [round(l[0][0]*TIME_RESOLUTION), l[0][1]+DUR_OFFSET, l[0][2]+NOTE_OFFSET]
        l[0] = [CONTROL_OFFSET + t for t in l[0]]

        if l[2][0] != None:
            l[2] = [round(l[2][0]*TIME_RESOLUTION), l[2][1]+DUR_OFFSET, l[2][2]+NOTE_OFFSET]
        matched_tuples[i] = l

    return matched_tuples