slslslrhfem
commited on
Commit
ยท
07191d3
1
Parent(s):
e17c464
change delete mechanism
Browse files- segment_transcription.py +120 -65
segment_transcription.py
CHANGED
|
@@ -11,6 +11,8 @@ from madmom.features.beats import DBNBeatTrackingProcessor
|
|
| 11 |
import shutil
|
| 12 |
from madmom.features.downbeats import DBNDownBeatTrackingProcessor
|
| 13 |
from utils import vocal_midi2note, quantize, chord_quantize, save_to_json
|
|
|
|
|
|
|
| 14 |
|
| 15 |
downbeat_model = Demixed_DilatedTransformerModel(attn_len=5, instr=5, ntoken=2,
|
| 16 |
dmodel=256, nhead=8, d_hid=1024,
|
|
@@ -26,81 +28,134 @@ downbeat_tracker = DBNDownBeatTrackingProcessor(beats_per_bar=[3, 4],
|
|
| 26 |
device = 'cuda'
|
| 27 |
|
| 28 |
def segment_transcription(audio_path):
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
|
|
|
| 33 |
wav_path = audio_path
|
| 34 |
wav_name = os.path.splitext(os.path.basename(wav_path))[0]
|
| 35 |
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
to_name = "separated/htdemucs_6s/" + wav_name + "/" + wav_name + ".wav"
|
| 40 |
-
os.rename(others_name, to_name)
|
| 41 |
|
| 42 |
-
|
| 43 |
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
abs_file_path = os.path.abspath(wav_path)
|
| 55 |
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
}
|
| 69 |
|
| 70 |
-
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
sav_path = wav_path[:-4] + ".json"
|
| 77 |
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
vocal_infos = quantize(vocal_notes, beat_times, downbeat_start, chord_time_gap)
|
| 81 |
-
# chord_infos = chord_quantize(chord_info, beat_times)
|
| 82 |
-
wav_music_info = Music_info(
|
| 83 |
-
melody_info=None,
|
| 84 |
-
bass_info=None,
|
| 85 |
-
chord_info=None,
|
| 86 |
-
vocal_info=vocal_infos,
|
| 87 |
-
chart_scale=None,
|
| 88 |
-
title=str(wav_name),
|
| 89 |
-
bpm=int(bpm),
|
| 90 |
-
rhythm=int(rhythm),
|
| 91 |
-
downbeat_start=float(downbeat_start),
|
| 92 |
-
beat_times=beat_times,
|
| 93 |
-
boundaries=None,
|
| 94 |
-
segment_label=None,
|
| 95 |
-
link=None,
|
| 96 |
-
)
|
| 97 |
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
return sav_path
|
| 104 |
-
|
| 105 |
-
|
| 106 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
import shutil
|
| 12 |
from madmom.features.downbeats import DBNDownBeatTrackingProcessor
|
| 13 |
from utils import vocal_midi2note, quantize, chord_quantize, save_to_json
|
| 14 |
+
import time
|
| 15 |
+
import uuid
|
| 16 |
|
| 17 |
downbeat_model = Demixed_DilatedTransformerModel(attn_len=5, instr=5, ntoken=2,
|
| 18 |
dmodel=256, nhead=8, d_hid=1024,
|
|
|
|
| 28 |
device = 'cuda'
|
| 29 |
|
| 30 |
def segment_transcription(audio_path):
|
| 31 |
+
"""
|
| 32 |
+
๊ฐ์ ๋ segment_transcription ํจ์
|
| 33 |
+
- ๊ณ ์ ํ ์์ ํด๋ ์ฌ์ฉ์ผ๋ก ๋์ ์ฒ๋ฆฌ ์ง์
|
| 34 |
+
- try-finally๋ก ์์ ํ ํ์ผ ์ ๋ฆฌ
|
| 35 |
+
"""
|
| 36 |
wav_path = audio_path
|
| 37 |
wav_name = os.path.splitext(os.path.basename(wav_path))[0]
|
| 38 |
|
| 39 |
+
# ๊ณ ์ ํ ํด๋๋ช
์์ฑ (ํ์์คํฌํ + UUID)
|
| 40 |
+
unique_id = f"{wav_name}_{int(time.time() * 1000)}_{str(uuid.uuid4())[:8]}"
|
| 41 |
+
separated_base = f"separated_{unique_id}"
|
|
|
|
|
|
|
| 42 |
|
| 43 |
+
print(f"Processing {wav_name} in temporary folder: {separated_base}")
|
| 44 |
|
| 45 |
+
try:
|
| 46 |
+
# ์ฒซ ๋ฒ์งธ ๋ถ๋ฆฌ: piano vs no_piano
|
| 47 |
+
print("Step 1: Separating piano...")
|
| 48 |
+
demucs.separate.main([
|
| 49 |
+
"--two-stems", "piano",
|
| 50 |
+
"-n", "htdemucs_6s",
|
| 51 |
+
"-o", separated_base,
|
| 52 |
+
wav_path
|
| 53 |
+
])
|
| 54 |
+
|
| 55 |
+
piano_wav_name = f"{separated_base}/htdemucs_6s/{wav_name}/piano.wav"
|
| 56 |
+
others_name = f"{separated_base}/htdemucs_6s/{wav_name}/no_piano.wav"
|
| 57 |
+
to_name = f"{separated_base}/htdemucs_6s/{wav_name}/{wav_name}.wav"
|
| 58 |
+
|
| 59 |
+
# ํ์ผ๋ช
๋ณ๊ฒฝ
|
| 60 |
+
if os.path.exists(others_name):
|
| 61 |
+
os.rename(others_name, to_name)
|
| 62 |
+
else:
|
| 63 |
+
raise FileNotFoundError(f"Expected file not found: {others_name}")
|
| 64 |
+
|
| 65 |
+
# ๋ ๋ฒ์งธ ๋ถ๋ฆฌ: vocals, drums, bass, other
|
| 66 |
+
print("Step 2: Separating vocals, drums, bass, other...")
|
| 67 |
+
demucs.separate.main([
|
| 68 |
+
"-n", "htdemucs",
|
| 69 |
+
"-o", separated_base,
|
| 70 |
+
to_name
|
| 71 |
+
])
|
| 72 |
+
|
| 73 |
+
# ๋ถ๋ฆฌ๋ ํ์ผ ๊ฒฝ๋ก๋ค
|
| 74 |
+
vocal_wav_name = f"{separated_base}/htdemucs/{wav_name}/vocals.wav"
|
| 75 |
+
drum_wav_name = f"{separated_base}/htdemucs/{wav_name}/drums.wav"
|
| 76 |
+
other_wav_name = f"{separated_base}/htdemucs/{wav_name}/other.wav"
|
| 77 |
+
bass_wav_name = f"{separated_base}/htdemucs/{wav_name}/bass.wav"
|
| 78 |
|
| 79 |
+
# ํ์ผ ์กด์ฌ ํ์ธ
|
| 80 |
+
required_files = [vocal_wav_name, drum_wav_name, other_wav_name, bass_wav_name, piano_wav_name]
|
| 81 |
+
for file_path in required_files:
|
| 82 |
+
if not os.path.exists(file_path):
|
| 83 |
+
raise FileNotFoundError(f"Required separated file not found: {file_path}")
|
|
|
|
| 84 |
|
| 85 |
+
vocal_wav_path = os.path.abspath(vocal_wav_name)
|
| 86 |
+
drum_wav_path = os.path.abspath(drum_wav_name)
|
| 87 |
+
other_wav_path = os.path.abspath(other_wav_name)
|
| 88 |
+
bass_wav_path = os.path.abspath(bass_wav_name)
|
| 89 |
+
abs_wav_path = os.path.abspath(wav_path)
|
| 90 |
|
| 91 |
+
print("Step 3: Loading separated audio files...")
|
| 92 |
+
vocals = librosa.load(vocal_wav_name, sr=44100, mono=False)[0]
|
| 93 |
+
piano = librosa.load(piano_wav_name, sr=44100, mono=False)[0]
|
| 94 |
+
drums = librosa.load(drum_wav_name, sr=44100, mono=False)[0]
|
| 95 |
+
bass = librosa.load(bass_wav_name, sr=44100, mono=False)[0]
|
| 96 |
+
other = librosa.load(other_wav_name, sr=44100, mono=False)[0]
|
|
|
|
| 97 |
|
| 98 |
+
spleeter_dict = {
|
| 99 |
+
'vocals': np.asarray(vocals).T,
|
| 100 |
+
'piano': np.asarray(piano).T,
|
| 101 |
+
'drums': np.asarray(drums).T,
|
| 102 |
+
'bass': np.asarray(bass).T,
|
| 103 |
+
'other': np.asarray(other).T
|
| 104 |
+
}
|
| 105 |
|
| 106 |
+
print("Step 4: Combining piano and other tracks...")
|
| 107 |
+
real_others = librosa.load(piano_wav_name, sr=44100, mono=False)[0] + librosa.load(other_wav_name, sr=44100, mono=False)[0]
|
| 108 |
+
soundfile.write(other_wav_name, real_others.T, 44100)
|
|
|
|
| 109 |
|
| 110 |
+
print("Step 5: Quantizing audio...")
|
| 111 |
+
quantize_result = wav_quantizing(wav_path, spleeter_dict, downbeat_model, beat_tracker, downbeat_tracker, device)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
+
print("Step 6: Transcribing vocals...")
|
| 114 |
+
vocal_notes = vocal_midi2note(vocal_trans(vocal_wav_path, device=device))
|
| 115 |
+
|
| 116 |
+
# chord_info = transcript("chord", wav_path)[1] # ์ฃผ์ ์ฒ๋ฆฌ๋จ
|
| 117 |
+
sav_path = wav_path[:-4] + ".json"
|
|
|
|
|
|
|
|
|
|
| 118 |
|
| 119 |
+
beat_times, downbeat_start, rhythm, bpm = quantize_result[0]
|
| 120 |
+
chord_time_gap = (beat_times[1] - beat_times[0]) * rhythm
|
| 121 |
+
vocal_infos = quantize(vocal_notes, beat_times, downbeat_start, chord_time_gap)
|
| 122 |
+
# chord_infos = chord_quantize(chord_info, beat_times) # ์ฃผ์ ์ฒ๋ฆฌ๋จ
|
| 123 |
+
|
| 124 |
+
print("Step 7: Creating music info object...")
|
| 125 |
+
wav_music_info = Music_info(
|
| 126 |
+
melody_info=None,
|
| 127 |
+
bass_info=None,
|
| 128 |
+
chord_info=None,
|
| 129 |
+
vocal_info=vocal_infos,
|
| 130 |
+
chart_scale=None,
|
| 131 |
+
title=str(wav_name),
|
| 132 |
+
bpm=int(bpm),
|
| 133 |
+
rhythm=int(rhythm),
|
| 134 |
+
downbeat_start=float(downbeat_start),
|
| 135 |
+
beat_times=beat_times,
|
| 136 |
+
boundaries=None,
|
| 137 |
+
segment_label=None,
|
| 138 |
+
link=None,
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
os.makedirs(os.path.dirname(sav_path), exist_ok=True)
|
| 142 |
+
save_to_json(wav_music_info, sav_path)
|
| 143 |
+
|
| 144 |
+
print(f"Successfully processed {wav_name} -> {sav_path}")
|
| 145 |
+
return sav_path
|
| 146 |
+
|
| 147 |
+
except Exception as e:
|
| 148 |
+
print(f"Error processing {wav_name}: {str(e)}")
|
| 149 |
+
raise e
|
| 150 |
+
|
| 151 |
+
finally:
|
| 152 |
+
# ํด๋น ์ฒ๋ฆฌ ์ธ์
์ ์์ ํด๋๋ง ์ ๋ฆฌ
|
| 153 |
+
if os.path.exists(separated_base):
|
| 154 |
+
print(f"Cleaning up temporary folder: {separated_base}")
|
| 155 |
+
try:
|
| 156 |
+
shutil.rmtree(separated_base)
|
| 157 |
+
print(f"Successfully cleaned up: {separated_base}")
|
| 158 |
+
except Exception as cleanup_error:
|
| 159 |
+
print(f"Warning: Failed to clean up {separated_base}: {cleanup_error}")
|
| 160 |
+
else:
|
| 161 |
+
print(f"Temporary folder {separated_base} not found (already cleaned up?)")
|