slslslrhfem commited on
Commit
07191d3
ยท
1 Parent(s): e17c464

change delete mechanism

Browse files
Files changed (1) hide show
  1. segment_transcription.py +120 -65
segment_transcription.py CHANGED
@@ -11,6 +11,8 @@ from madmom.features.beats import DBNBeatTrackingProcessor
11
  import shutil
12
  from madmom.features.downbeats import DBNDownBeatTrackingProcessor
13
  from utils import vocal_midi2note, quantize, chord_quantize, save_to_json
 
 
14
 
15
  downbeat_model = Demixed_DilatedTransformerModel(attn_len=5, instr=5, ntoken=2,
16
  dmodel=256, nhead=8, d_hid=1024,
@@ -26,81 +28,134 @@ downbeat_tracker = DBNDownBeatTrackingProcessor(beats_per_bar=[3, 4],
26
  device = 'cuda'
27
 
28
  def segment_transcription(audio_path):
29
- # Make it simple, just a demucs and bpm quantization, and vocal_transcription and chord transciption only!
30
- # ...Maybe not simple
31
- # we use chord transcription from omnizart, which needs python 3.8 file
32
-
 
33
  wav_path = audio_path
34
  wav_name = os.path.splitext(os.path.basename(wav_path))[0]
35
 
36
- demucs.separate.main(["--two-stems", "piano", "-n", "htdemucs_6s", wav_path])
37
- piano_wav_name = "separated/htdemucs_6s/" + wav_name + "/piano.wav"
38
- others_name = "separated/htdemucs_6s/" + wav_name + "/no_piano.wav"
39
- to_name = "separated/htdemucs_6s/" + wav_name + "/" + wav_name + ".wav"
40
- os.rename(others_name, to_name)
41
 
42
- demucs.separate.main(["-n", "htdemucs", to_name])
43
 
44
- vocal_wav_name = "separated/htdemucs/" + wav_name + "/vocals.wav"
45
- drum_wav_name = "separated/htdemucs/" + wav_name + "/drums.wav"
46
- other_wav_name = "separated/htdemucs/" + wav_name + "/other.wav"
47
- bass_wav_name = "separated/htdemucs/" + wav_name + "/bass.wav"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
- vocal_wav_path = os.path.abspath("separated/htdemucs/" + wav_name + "/vocals.wav")
50
- drum_wav_path = os.path.abspath("separated/htdemucs/" + wav_name + "/drums.wav")
51
- other_wav_path = os.path.abspath("separated/htdemucs/" + wav_name + "/other.wav")
52
- bass_wav_path = os.path.abspath("separated/htdemucs/" + wav_name + "/bass.wav")
53
- abs_wav_path = os.path.abspath(wav_path)
54
- abs_file_path = os.path.abspath(wav_path)
55
 
56
- vocals = librosa.load(vocal_wav_name, sr=44100, mono=False)[0]
57
- piano = librosa.load(piano_wav_name, sr=44100, mono=False)[0]
58
- drums = librosa.load(drum_wav_name, sr=44100, mono=False)[0]
59
- bass = librosa.load(bass_wav_name, sr=44100, mono=False)[0]
60
- other = librosa.load(other_wav_name, sr=44100, mono=False)[0]
61
 
62
- spleeter_dict = {
63
- 'vocals': np.asarray(vocals).T,
64
- 'piano': np.asarray(piano).T,
65
- 'drums': np.asarray(drums).T,
66
- 'bass': np.asarray(bass).T,
67
- 'other': np.asarray(other).T
68
- }
69
 
70
- real_others = librosa.load(piano_wav_name, sr=44100, mono=False)[0] + librosa.load(other_wav_name, sr=44100, mono=False)[0]
71
- soundfile.write(other_wav_name, real_others.T, 44100)
 
 
 
 
 
72
 
73
- quantize_result = wav_quantizing(wav_path, spleeter_dict, downbeat_model, beat_tracker, downbeat_tracker, device)
74
- vocal_notes = vocal_midi2note(vocal_trans(vocal_wav_path, device=device))
75
- #chord_info = transcript("chord", wav_path)[1]
76
- sav_path = wav_path[:-4] + ".json"
77
 
78
- beat_times, downbeat_start, rhythm, bpm = quantize_result[0]
79
- chord_time_gap = (beat_times[1] - beat_times[0]) * rhythm
80
- vocal_infos = quantize(vocal_notes, beat_times, downbeat_start, chord_time_gap)
81
- # chord_infos = chord_quantize(chord_info, beat_times)
82
- wav_music_info = Music_info(
83
- melody_info=None,
84
- bass_info=None,
85
- chord_info=None,
86
- vocal_info=vocal_infos,
87
- chart_scale=None,
88
- title=str(wav_name),
89
- bpm=int(bpm),
90
- rhythm=int(rhythm),
91
- downbeat_start=float(downbeat_start),
92
- beat_times=beat_times,
93
- boundaries=None,
94
- segment_label=None,
95
- link=None,
96
- )
97
 
98
- os.makedirs(os.path.dirname(sav_path), exist_ok=True)
99
- save_to_json(wav_music_info, sav_path)
100
- if os.path.exists("separated"):
101
- shutil.rmtree("separated")
102
-
103
- return sav_path
104
-
105
-
106
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  import shutil
12
  from madmom.features.downbeats import DBNDownBeatTrackingProcessor
13
  from utils import vocal_midi2note, quantize, chord_quantize, save_to_json
14
+ import time
15
+ import uuid
16
 
17
  downbeat_model = Demixed_DilatedTransformerModel(attn_len=5, instr=5, ntoken=2,
18
  dmodel=256, nhead=8, d_hid=1024,
 
28
  device = 'cuda'
29
 
30
  def segment_transcription(audio_path):
31
+ """
32
+ ๊ฐœ์„ ๋œ segment_transcription ํ•จ์ˆ˜
33
+ - ๊ณ ์œ ํ•œ ์ž„์‹œ ํด๋” ์‚ฌ์šฉ์œผ๋กœ ๋™์‹œ ์ฒ˜๋ฆฌ ์ง€์›
34
+ - try-finally๋กœ ์•ˆ์ „ํ•œ ํŒŒ์ผ ์ •๋ฆฌ
35
+ """
36
  wav_path = audio_path
37
  wav_name = os.path.splitext(os.path.basename(wav_path))[0]
38
 
39
+ # ๊ณ ์œ ํ•œ ํด๋”๋ช… ์ƒ์„ฑ (ํƒ€์ž„์Šคํƒฌํ”„ + UUID)
40
+ unique_id = f"{wav_name}_{int(time.time() * 1000)}_{str(uuid.uuid4())[:8]}"
41
+ separated_base = f"separated_{unique_id}"
 
 
42
 
43
+ print(f"Processing {wav_name} in temporary folder: {separated_base}")
44
 
45
+ try:
46
+ # ์ฒซ ๋ฒˆ์งธ ๋ถ„๋ฆฌ: piano vs no_piano
47
+ print("Step 1: Separating piano...")
48
+ demucs.separate.main([
49
+ "--two-stems", "piano",
50
+ "-n", "htdemucs_6s",
51
+ "-o", separated_base,
52
+ wav_path
53
+ ])
54
+
55
+ piano_wav_name = f"{separated_base}/htdemucs_6s/{wav_name}/piano.wav"
56
+ others_name = f"{separated_base}/htdemucs_6s/{wav_name}/no_piano.wav"
57
+ to_name = f"{separated_base}/htdemucs_6s/{wav_name}/{wav_name}.wav"
58
+
59
+ # ํŒŒ์ผ๋ช… ๋ณ€๊ฒฝ
60
+ if os.path.exists(others_name):
61
+ os.rename(others_name, to_name)
62
+ else:
63
+ raise FileNotFoundError(f"Expected file not found: {others_name}")
64
+
65
+ # ๋‘ ๋ฒˆ์งธ ๋ถ„๋ฆฌ: vocals, drums, bass, other
66
+ print("Step 2: Separating vocals, drums, bass, other...")
67
+ demucs.separate.main([
68
+ "-n", "htdemucs",
69
+ "-o", separated_base,
70
+ to_name
71
+ ])
72
+
73
+ # ๋ถ„๋ฆฌ๋œ ํŒŒ์ผ ๊ฒฝ๋กœ๋“ค
74
+ vocal_wav_name = f"{separated_base}/htdemucs/{wav_name}/vocals.wav"
75
+ drum_wav_name = f"{separated_base}/htdemucs/{wav_name}/drums.wav"
76
+ other_wav_name = f"{separated_base}/htdemucs/{wav_name}/other.wav"
77
+ bass_wav_name = f"{separated_base}/htdemucs/{wav_name}/bass.wav"
78
 
79
+ # ํŒŒ์ผ ์กด์žฌ ํ™•์ธ
80
+ required_files = [vocal_wav_name, drum_wav_name, other_wav_name, bass_wav_name, piano_wav_name]
81
+ for file_path in required_files:
82
+ if not os.path.exists(file_path):
83
+ raise FileNotFoundError(f"Required separated file not found: {file_path}")
 
84
 
85
+ vocal_wav_path = os.path.abspath(vocal_wav_name)
86
+ drum_wav_path = os.path.abspath(drum_wav_name)
87
+ other_wav_path = os.path.abspath(other_wav_name)
88
+ bass_wav_path = os.path.abspath(bass_wav_name)
89
+ abs_wav_path = os.path.abspath(wav_path)
90
 
91
+ print("Step 3: Loading separated audio files...")
92
+ vocals = librosa.load(vocal_wav_name, sr=44100, mono=False)[0]
93
+ piano = librosa.load(piano_wav_name, sr=44100, mono=False)[0]
94
+ drums = librosa.load(drum_wav_name, sr=44100, mono=False)[0]
95
+ bass = librosa.load(bass_wav_name, sr=44100, mono=False)[0]
96
+ other = librosa.load(other_wav_name, sr=44100, mono=False)[0]
 
97
 
98
+ spleeter_dict = {
99
+ 'vocals': np.asarray(vocals).T,
100
+ 'piano': np.asarray(piano).T,
101
+ 'drums': np.asarray(drums).T,
102
+ 'bass': np.asarray(bass).T,
103
+ 'other': np.asarray(other).T
104
+ }
105
 
106
+ print("Step 4: Combining piano and other tracks...")
107
+ real_others = librosa.load(piano_wav_name, sr=44100, mono=False)[0] + librosa.load(other_wav_name, sr=44100, mono=False)[0]
108
+ soundfile.write(other_wav_name, real_others.T, 44100)
 
109
 
110
+ print("Step 5: Quantizing audio...")
111
+ quantize_result = wav_quantizing(wav_path, spleeter_dict, downbeat_model, beat_tracker, downbeat_tracker, device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
+ print("Step 6: Transcribing vocals...")
114
+ vocal_notes = vocal_midi2note(vocal_trans(vocal_wav_path, device=device))
115
+
116
+ # chord_info = transcript("chord", wav_path)[1] # ์ฃผ์„ ์ฒ˜๋ฆฌ๋จ
117
+ sav_path = wav_path[:-4] + ".json"
 
 
 
118
 
119
+ beat_times, downbeat_start, rhythm, bpm = quantize_result[0]
120
+ chord_time_gap = (beat_times[1] - beat_times[0]) * rhythm
121
+ vocal_infos = quantize(vocal_notes, beat_times, downbeat_start, chord_time_gap)
122
+ # chord_infos = chord_quantize(chord_info, beat_times) # ์ฃผ์„ ์ฒ˜๋ฆฌ๋จ
123
+
124
+ print("Step 7: Creating music info object...")
125
+ wav_music_info = Music_info(
126
+ melody_info=None,
127
+ bass_info=None,
128
+ chord_info=None,
129
+ vocal_info=vocal_infos,
130
+ chart_scale=None,
131
+ title=str(wav_name),
132
+ bpm=int(bpm),
133
+ rhythm=int(rhythm),
134
+ downbeat_start=float(downbeat_start),
135
+ beat_times=beat_times,
136
+ boundaries=None,
137
+ segment_label=None,
138
+ link=None,
139
+ )
140
+
141
+ os.makedirs(os.path.dirname(sav_path), exist_ok=True)
142
+ save_to_json(wav_music_info, sav_path)
143
+
144
+ print(f"Successfully processed {wav_name} -> {sav_path}")
145
+ return sav_path
146
+
147
+ except Exception as e:
148
+ print(f"Error processing {wav_name}: {str(e)}")
149
+ raise e
150
+
151
+ finally:
152
+ # ํ•ด๋‹น ์ฒ˜๋ฆฌ ์„ธ์…˜์˜ ์ž„์‹œ ํด๋”๋งŒ ์ •๋ฆฌ
153
+ if os.path.exists(separated_base):
154
+ print(f"Cleaning up temporary folder: {separated_base}")
155
+ try:
156
+ shutil.rmtree(separated_base)
157
+ print(f"Successfully cleaned up: {separated_base}")
158
+ except Exception as cleanup_error:
159
+ print(f"Warning: Failed to clean up {separated_base}: {cleanup_error}")
160
+ else:
161
+ print(f"Temporary folder {separated_base} not found (already cleaned up?)")