ccss17's picture
init
a377b7b
from pathlib import Path
import pandas as pd
import midii
def get_files(dir_path, type, sort=False):
paths = Path(dir_path).rglob(f"*.{type}")
if sort:
return sorted(paths, key=lambda p: p.stem)
else:
return paths
def _preprocess_sort_by_start_time(df):
df = df.sort_values(by="start_time").reset_index(drop=True)
return df
def _preprocess_remove_front_back_silence(df):
is_valid_lyric = df["lyric"] != " "
valid_indices = df.index[is_valid_lyric].tolist()
first_valid_idx = valid_indices[0]
last_valid_idx = valid_indices[-1]
df = df.iloc[first_valid_idx : last_valid_idx + 1].reset_index(drop=True)
return df
def _preprocess_silence_pitch_zero(df):
df.loc[df["lyric"] == " ", "pitch"] = 0
return df
def _preprocess_merge_silence(df):
output_notes = []
i = 0
n = len(df)
while i < n:
current_row = df.iloc[i] # Pandas Series
if current_row["lyric"] == " ":
merged_start_time = current_row["start_time"]
merged_end_time = current_row["end_time"]
j = i + 1
while j < n and df.iloc[j]["lyric"] == " ":
merged_end_time = df.iloc[j][
"end_time"
] # 마지막 공백의 end_time으로 업데이트
j += 1
merged_item = {
"start_time": merged_start_time,
"end_time": merged_end_time,
"pitch": 0,
"lyric": " ",
"duration": merged_end_time - merged_start_time,
}
output_notes.append(merged_item)
i = j # 병합된 블록 다음으로 인덱스 이동
else:
non_space_item = {
"start_time": current_row["start_time"],
"end_time": current_row["end_time"],
"pitch": current_row["pitch"],
"lyric": current_row["lyric"],
"duration": current_row["duration"],
}
output_notes.append(non_space_item)
i += 1
df = pd.DataFrame(output_notes)
return df
def _preprocess_remove_short_silence(df, threshold=0.3):
processed_notes = []
absorbed_time = 0.0
for i in range(len(df)):
current_note_s = df.iloc[i]
if (
current_note_s["lyric"] == " "
and current_note_s["duration"] < threshold
):
absorbed_time += current_note_s["duration"]
continue
else:
note_to_add = current_note_s.to_dict()
if absorbed_time > 0:
note_to_add["start_time"] -= absorbed_time
note_to_add["duration"] = (
note_to_add["end_time"] - note_to_add["start_time"]
)
absorbed_time = 0.0
processed_notes.append(note_to_add)
df = pd.DataFrame(processed_notes)
return df
def _preprocess_add_quantized_duration_col(df, ticks_per_beat, unit="32"):
unit_tick = midii.beat2tick(
midii.NOTE[f"n/{unit}"].beat, ticks_per_beat=ticks_per_beat
)
df["quantized_duration"], _ = midii.quantize(
df["duration"].values, unit=unit_tick
)
return df