File size: 1,932 Bytes
faaf907
 
 
 
 
 
2ae534a
faaf907
 
30dc2c6
faaf907
 
30dc2c6
 
 
faaf907
30dc2c6
 
 
 
 
 
faaf907
 
 
 
 
 
 
 
 
 
 
f5bfa40
 
 
30dc2c6
2ae534a
 
 
 
30dc2c6
 
2ae534a
 
 
f5bfa40
2ae534a
 
faaf907
 
 
a7d5cd7
faaf907
 
 
 
 
 
 
2ae534a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import librosa
import numpy as np
import soundfile as sf
import gradio as gr
import argparse

def auto_tune(audio_path, target_pitch=440.0):
    # Load the audio file
    y, sr = librosa.load(audio_path)

    # Extract pitch using librosa's piptrack method
    pitches, magnitudes = librosa.core.piptrack(y=y, sr=sr)

    # Convert 2D pitch and magnitude arrays to 1D
    pitches = pitches[magnitudes > np.median(magnitudes)]
    
    # Filter out zero values (unvoiced frames)
    pitches = pitches[pitches > 0]

    if len(pitches) == 0:
        raise ValueError("No pitch detected in the audio. Ensure the input audio contains a detectable pitch.")

    # Calculate the tuning ratio to shift the pitch to the target pitch
    median_pitch = np.median(pitches)
    tuning_ratio = target_pitch / median_pitch
    
    # Adjust the pitch of the audio
    y_tuned = librosa.effects.pitch_shift(y, sr, n_steps=np.log2(tuning_ratio))
    
    return y_tuned, sr

def run_gradio():
    def gradio_interface(audio, target_pitch):
        if audio is None:
            return "Error: No audio input provided."

        try:
            # Save the input audio file
            audio_path = "input_audio.wav"
            sf.write(audio_path, audio[1], audio[0])

            # Apply auto-tune
            y_tuned, sr = auto_tune(audio_path, target_pitch=target_pitch)
            
            # Return the tuned audio to Gradio for playback
            return (sr, y_tuned)
        
        except Exception as e:
            return f"Error occurred: {str(e)}"

    iface = gr.Interface(
        fn=gradio_interface,
        inputs=[gr.Audio(type="numpy"), gr.Number(value=440.0, label="Target Pitch")],
        outputs=gr.Audio(type="numpy", label="Tuned Audio"),
        title="Auto-Tune Voice",
        description="Upload or record your voice and apply auto-tune.",
    )
    iface.launch()

if __name__ == "__main__":
    run_gradio()