Spaces:

mippia
/

MPD-demo

Build error

MPD-demo / compare_utils.py

slslslrhfem

first_push

5288edb 5 months ago

12.4 kB

	import torch
	import numpy as np

	def remove_1(points):
	filtered_points = [point for point in points if point[2] != 1]
	return filtered_points


	class CompareHelper:
	def __init__(self, data):
	self.data = data

	def __lt__(self, other):
	return self.data[0] < other.data[0]


	def get_duration_in_interval(chord, start_interval, end_interval):
	"""Interval 내에서 chord의 지속 시간을 반환합니다."""
	return min(chord['end'], end_interval) - max(chord['start'], start_interval)


	def shift_image_optimized(image, x_shift, y_shift): # 이거 y랑 x랑 뒤집어야함.. time, pitch
	# 이미지를 x와 y 방향으로 동시에 시프트
	_, _, height, width = image.size()

	# torch.roll을 사용하여 이미지를 시프트
	shifted_image = torch.roll(image, shifts=(x_shift, y_shift), dims=(3, 2))

	# 시프트에 따라 이미지의 가장자리를 잘라냄
	if x_shift > 0:
	shifted_image[:, :, :, :x_shift] = 0
	elif x_shift < 0:
	shifted_image[:, :, :, x_shift:] = 0

	#if y_shift > 0:
	# shifted_image[:, :, :y_shift, :] = 0
	#elif y_shift < 0:
	# shifted_image[:, :, y_shift:, :] = 0
	return shifted_image


	def algorithmic_collate3(batch):
	imgs, labels, points = zip(*batch)
	return_images = []
	return_labels = []
	return_points = []

	for img_list in imgs:
	return_images.extend(img_list) # 한 단계 더 풀어줌
	for label in labels:
	return_labels.extend(label)
	for point in points:
	return_points.extend(point)

	return return_images, return_labels, return_points

	def quantize_image(image):
	"""
	Quantize the given image tensor.

	:param image: torch.Tensor, shape [1, 128, 192], binary values
	:return: torch.Tensor, shape [1, 128, 64], quantized values
	"""

	quantized_image = torch.zeros(1, 128, 64)

	# Loop through each new pixel position
	for i in range(64):
	# Define the original image slice indexes

	# For the first slice, consider only first 2 columns
	if i == 0:
	start_idx = 0
	end_idx = start_idx + 2
	# For other slices, consider 3 columns
	else:
	start_idx = i * 3 - 1
	end_idx = start_idx + 3

	# Check if there's at least one '1' in the window
	quantized_image[:, :, i] = (image[:, :, start_idx:end_idx].sum(dim=2) > 0).float()

	return quantized_image

	def piano_roll_to_chroma(piano_roll):
	"""
	Convert a binary piano roll tensor to a binary chroma tensor.

	Parameters:
	piano_roll (torch.Tensor): The binary piano roll tensor with shape
	(batch_size, num_channels, num_pitches, num_frames).

	Returns:
	torch.Tensor: The binary chroma tensor with shape
	(batch_size, num_channels, 12, num_frames).
	"""
	if piano_roll.shape[2] == 12:
	return piano_roll

	# Ensure the piano roll is binary
	binary_piano_roll = (piano_roll > 0).float()

	# Initialize chroma tensor
	chroma = torch.zeros(
	(binary_piano_roll.shape[0], binary_piano_roll.shape[1], 12, binary_piano_roll.shape[3]),
	device=binary_piano_roll.device,
	)

	# Sum along the pitch classes modulo 12 (pitches)
	for i in range(12):
	chroma[:, :, i, :] = binary_piano_roll[:, :, i::12, :].max(dim=2).values

	return chroma

	def calculate_correlation(tensor1, tensor2, max_shift,device):
	#tensor1 = apply_gaussian_filter_1d_to_batch(tensor1,1.5)
	# 초기 최대 상관계수 행렬을 낮은 값으로 초기화
	max_correlation = torch.full((tensor1.size(0), tensor2.size(0)), float('-inf')).to(device)

	for shift in range(-max_shift, max_shift + 1):

	# tensor2를 시프트
	shifted_tensor2 = torch.roll(tensor2, shifts=shift, dims=1)
	#shifted_tensor2 = apply_gaussian_filter_1d_to_batch(torch.roll(tensor2, shifts=shift, dims=1),1.5)

	# 코사인 유사도 계산
	tensor1_norm = tensor1 / tensor1.norm(dim=1, keepdim=True)
	tensor2_norm = shifted_tensor2 / tensor2.norm(dim=1, keepdim=True)


	cosine_similarity = torch.mm(tensor1_norm, tensor2_norm.t())
	max_correlation = torch.max(max_correlation, cosine_similarity)
	"""

	# L1 코사인 유사도라 해야하나..? 여튼 단순 노트 유사도 계산
	tensor1_expanded = tensor1.unsqueeze(1)
	tensor2_expanded = shifted_tensor2.unsqueeze(0)
	both_one = tensor1_expanded * tensor2_expanded

	# 두 벡터 모두에서 1인 요소의 개수 및 1인 요소의 총합 계산
	both_one_sum = both_one.sum(dim=2)
	total_one_sum = tensor1_expanded.sum(dim=2) + tensor2_expanded.sum(dim=2)
	metric_matrix = both_one_sum / total_one_sum
	max_correlation = torch.max(max_correlation, metric_matrix)
	"""

	return max_correlation




	def infos_to_pianorolls(info, use_all):
	pianorolls={}
	#chromas={} # chroma deprecated
	CONLON_points={}

	# melody_pianorolls={}
	# bass_pianorolls={}
	vocal_pianorolls={}
	# boundary_pianorolls={}

	#melody_chromas={}
	#bass_chromas={}
	#vocal_chromas={}

	# melody_CONLON_points={}
	# bass_CONLON_points={}
	vocal_CONLON_points={}
	# boundary_CONLON_points={}

	start_points = infos_to_startpoint(info, use_all)

	#shift_val = np.argmax(chart_fit)
	shift_val = 0
	for idx, i in enumerate(start_points):
	#bass를 좀 깔끔하게 만듭니다. Heuristic함
	"""
	cleansed_bass={}
	for key, bar in info.bass_info.items():
	if len(bar)>0:
	bar=np.array(bar)
	remain_notes=[]
	to_quantize = 16 # 16분 음표 하나당 최대 1개의 Note를 남깁니다.
	idx_quantize = 48/to_quantize
	for j in range(to_quantize):
	bass_idx = np.where((bar[:,4]//idx_quantize == j))
	notes = bar[bass_idx]
	best_note = get_best_bass(chart_info, notes)
	if best_note is not None:
	remain_notes.append(best_note)
	cleansed_bass[key] = np.array(remain_notes)
	"""
	# cleansed_bass = info['bass_info']
	# melody = [
	# info['melody_info'].get(str(i), []) if info['melody_info'] is not None else [],
	# info['melody_info'].get(str(i+1), []) if info['melody_info'] is not None else [],
	# info['melody_info'].get(str(i+2), []) if info['melody_info'] is not None else [],
	# info['melody_info'].get(str(i+3), []) if info['melody_info'] is not None else []
	# ]

	# bass = [
	# info['bass_info'].get(str(i), []) if info['bass_info'] is not None else [],
	# info['bass_info'].get(str(i+1), []) if info['bass_info'] is not None else [],
	# info['bass_info'].get(str(i+2), []) if info['bass_info'] is not None else [],
	# info['bass_info'].get(str(i+3), []) if info['bass_info'] is not None else []
	# ]

	vocal = [
	info['vocal_info'].get(str(i), []) if info['vocal_info'] is not None else [],
	info['vocal_info'].get(str(i+1), []) if info['vocal_info'] is not None else [],
	info['vocal_info'].get(str(i+2), []) if info['vocal_info'] is not None else [],
	info['vocal_info'].get(str(i+3), []) if info['vocal_info'] is not None else []
	]

	# boundary = [
	# info['boundaries'].get(str(i), []) if info['boundaries'] is not None else [],
	# info['boundaries'].get(str(i+1), []) if info['boundaries'] is not None else [],
	# info['boundaries'].get(str(i+2), []) if info['boundaries'] is not None else [],
	# info['boundaries'].get(str(i+3), []) if info['boundaries'] is not None else []
	# ]
	#piano = [info.piano_info.get(str(i),[]),info.piano_info.get(str(i+1),[]),info.piano_info.get(str(i+2), []),info.piano_info.get(str(i+3),[])]

	# melody_pianoroll, melody_CONLON_point = bar_notes_to_pianoroll(melody, shift_val)
	# bass_pianoroll, bass_CONLON_point = bar_notes_to_pianoroll(bass, shift_val)
	vocal_pianoroll,vocal_CONLON_point = bar_notes_to_pianoroll(vocal, shift_val)
	# boundary_pianoroll, boundary_CONLON_point = bar_notes_to_pianoroll(boundary, shift_val)
	#piano_pianoroll, piano_chroma, piano_CONLON_point = bar_notes_to_pianoroll(piano, shift_val)

	# melody_pianorolls[idx]=melody_pianoroll
	# bass_pianorolls[idx] = bass_pianoroll
	vocal_pianorolls[idx] = vocal_pianoroll
	# boundary_pianorolls[idx]= boundary_pianoroll
	#piano_pianorolls[idx] = piano_pianoroll

	#melody_chromas[idx]=melody_chroma
	#bass_chromas[idx] = bass_chroma
	#vocal_chromas[idx] = vocal_chroma
	#piano_chromas[idx] = piano_chroma

	# melody_CONLON_points[idx] = melody_CONLON_point
	# bass_CONLON_points[idx] = bass_CONLON_point
	vocal_CONLON_points[idx] = vocal_CONLON_point
	# boundary_CONLON_points[idx] = boundary_CONLON_point
	#piano_CONLON_points[idx] = piano_CONLON_point


	# pianorolls['melody'] = melody_pianorolls
	# pianorolls['bass'] = bass_pianorolls
	pianorolls['vocal'] = vocal_pianorolls
	# pianorolls['boundary'] = boundary_pianorolls
	#pianorolls['piano'] = piano_pianorolls

	#chromas['melody'] = melody_chromas
	#chromas['bass'] = bass_chromas
	#chromas['vocal'] = vocal_chromas
	#chromas['piano'] = piano_chromas

	# CONLON_points['melody'] = melody_CONLON_points
	# CONLON_points['bass'] = bass_CONLON_points
	CONLON_points['vocal'] = vocal_CONLON_points
	# CONLON_points['boundary'] = boundary_CONLON_points
	#CONLON_points['piano'] = piano_CONLON_points


	return pianorolls, start_points, CONLON_points # chroma deprecated



	def bar_notes_to_pianoroll(bars,shift_val):
	pianoroll = np.zeros((192,128)) #
	conlon_points = []
	for j, bar in enumerate(bars):
	j_offset = j * 48 # 반복되는 계산을 변수에 저장
	for note in bar:
	start, pitch, end = int(note[4]), int(note[2]), int(note[5])
	duration = (end - start + 1)
	start_idx = start + j_offset # 인덱스 계산 최적화
	end_idx = end + j_offset + 1
	conlon_points.append([start_idx, pitch, duration])
	pianoroll[start_idx:end_idx, pitch] = 1 # 슬라이싱을 사용한 효율적인 할당
	return pianoroll, conlon_points

	def infos_to_startpoint(info,use_all):
	downbeat_start = info['downbeat_start']


	boundary = round((info['beat_times'][-1] -downbeat_start)/(4*(info['beat_times'][1]-info['beat_times'][0])))-1

	song_structure_sp = [i for i in range(boundary+1)]
	song_structure_sp = refine_breakpoints_custom(song_structure_sp)
	if use_all:
	song_structure_sp = [i for i in range(song_structure_sp[-1])]
	return song_structure_sp

	def refine_breakpoints_custom(breakpoints, interval=4):
	refined = []

	unique_breakpoints = []
	for point in breakpoints:
	if point not in unique_breakpoints and point>0: # 0빼고 시작이 애매하긴한데, 예를 들어 verse가 6에서 시작이면 0~4보냐 2~6을 보냐 차이.
	unique_breakpoints.append(point)

	# Determine the starting point
	if len(unique_breakpoints)==0:
	unique_breakpoints.append(0)
	starting_point = unique_breakpoints[0] % interval
	if starting_point != unique_breakpoints[0]:
	for point in range(starting_point, unique_breakpoints[0], interval):
	if point > -1: # Ensure the point is positive
	refined.append(point)

	for i in range(len(unique_breakpoints)):
	# Add the current breakpoint
	refined.append(unique_breakpoints[i])

	# Check if there is a next breakpoint
	if i + 1 < len(unique_breakpoints):
	next_point = unique_breakpoints[i]
	while next_point + 2*interval <= unique_breakpoints[i + 1]:
	next_point += interval
	refined.append(next_point)
	if len(refined)==0:
	refined = [0]
	return refined