Hev832 commited on
Commit
0383056
·
verified ·
1 Parent(s): a7d5cd7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -2
app.py CHANGED
@@ -1,6 +1,103 @@
1
- import os
 
 
 
 
 
 
 
 
2
 
 
3
 
 
 
 
 
4
 
 
 
 
 
 
5
 
6
- os.system("python runfile.py")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from functools import partial
3
+ from pathlib import Path
4
+ import librosa
5
+ import numpy as np
6
+ import matplotlib.pyplot as plt
7
+ import soundfile as sf
8
+ import scipy.signal as sig
9
+ import psola
10
 
11
+ SEMITONES_IN_OCTAVE = 12
12
 
13
+ def degrees_from(scale: str):
14
+ degrees = librosa.key_to_degrees(scale)
15
+ degrees = np.concatenate((degrees, [degrees[0] + SEMITONES_IN_OCTAVE]))
16
+ return degrees
17
 
18
+ def closest_pitch(f0):
19
+ midi_note = np.around(librosa.hz_to_midi(f0))
20
+ nan_indices = np.isnan(f0)
21
+ midi_note[nan_indices] = np.nan
22
+ return librosa.midi_to_hz(midi_note)
23
 
24
+ def closest_pitch_from_scale(f0, scale):
25
+ if np.isnan(f0):
26
+ return np.nan
27
+ degrees = degrees_from(scale)
28
+ midi_note = librosa.hz_to_midi(f0)
29
+ degree = midi_note % SEMITONES_IN_OCTAVE
30
+ degree_id = np.argmin(np.abs(degrees - degree))
31
+ degree_difference = degree - degrees[degree_id]
32
+ midi_note -= degree_difference
33
+ return librosa.midi_to_hz(midi_note)
34
+
35
+ def aclosest_pitch_from_scale(f0, scale):
36
+ sanitized_pitch = np.zeros_like(f0)
37
+ for i in np.arange(f0.shape[0]):
38
+ sanitized_pitch[i] = closest_pitch_from_scale(f0[i], scale)
39
+ smoothed_sanitized_pitch = sig.medfilt(sanitized_pitch, kernel_size=11)
40
+ smoothed_sanitized_pitch[np.isnan(smoothed_sanitized_pitch)] = sanitized_pitch[np.isnan(smoothed_sanitized_pitch)]
41
+ return smoothed_sanitized_pitch
42
+
43
+ def autotune(audio, sr, correction_function, plot=False):
44
+ frame_length = 2048
45
+ hop_length = frame_length // 4
46
+ fmin = librosa.note_to_hz('C2')
47
+ fmax = librosa.note_to_hz('C7')
48
+
49
+ f0, voiced_flag, voiced_probabilities = librosa.pyin(audio, frame_length=frame_length, hop_length=hop_length, sr=sr, fmin=fmin, fmax=fmax)
50
+ corrected_f0 = correction_function(f0)
51
+
52
+ if plot:
53
+ stft = librosa.stft(audio, n_fft=frame_length, hop_length=hop_length)
54
+ time_points = librosa.times_like(stft, sr=sr, hop_length=hop_length)
55
+ log_stft = librosa.amplitude_to_db(np.abs(stft), ref=np.max)
56
+ fig, ax = plt.subplots()
57
+ img = librosa.display.specshow(log_stft, x_axis='time', y_axis='log', ax=ax, sr=sr, hop_length=hop_length, fmin=fmin, fmax=fmax)
58
+ fig.colorbar(img, ax=ax, format="%+2.f dB")
59
+ ax.plot(time_points, f0, label='original pitch', color='cyan', linewidth=2)
60
+ ax.plot(time_points, corrected_f0, label='corrected pitch', color='orange', linewidth=1)
61
+ ax.legend(loc='upper right')
62
+ plt.ylabel('Frequency [Hz]')
63
+ plt.xlabel('Time [M:SS]')
64
+ plt.savefig('pitch_correction.png', dpi=300, bbox_inches='tight')
65
+ plt.close()
66
+
67
+ return psola.vocode(audio, sample_rate=int(sr), target_pitch=corrected_f0, fmin=fmin, fmax=fmax)
68
+
69
+ def process_audio(vocals_file, correction_method, scale, plot):
70
+ y, sr = librosa.load(vocals_file, sr=None, mono=False)
71
+ if y.ndim > 1:
72
+ y = y[0, :]
73
+
74
+ correction_function = closest_pitch if correction_method == 'closest' else partial(aclosest_pitch_from_scale, scale=scale)
75
+ pitch_corrected_y = autotune(y, sr, correction_function, plot)
76
+
77
+ output_file = "pitch_corrected_audio.wav"
78
+ sf.write(output_file, pitch_corrected_y, sr)
79
+
80
+ if plot:
81
+ return output_file, 'pitch_correction.png'
82
+ return output_file, None
83
+
84
+ def main():
85
+ with gr.Blocks(title="Hex AutoTune") as demo:
86
+ gr.Markdown("# Hex Auto-Tune Audio with Pitch Correction")
87
+ with gr.Row():
88
+ with gr.Column():
89
+ vocals_file = gr.Audio(source="upload", type="filepath", label="Upload Vocals File")
90
+ correction_method = gr.Radio(["closest", "scale"], label="Correction Method", value="closest")
91
+ scale = gr.Textbox(label="Scale (only for 'scale' method)", placeholder="e.g., C:maj")
92
+ plot = gr.Checkbox(label="Generate Pitch Correction Plot", value=False)
93
+ submit = gr.Button("Process")
94
+ with gr.Column():
95
+ output_audio = gr.Audio(label="Pitch Corrected Audio")
96
+ output_image = gr.Image(label="Pitch Correction Plot (if selected)")
97
+
98
+ submit.click(fn=process_audio, inputs=[vocals_file, correction_method, scale, plot], outputs=[output_audio, output_image])
99
+
100
+ demo.launch()
101
+
102
+ if __name__ == '__main__':
103
+ main()