import gradio as gr import librosa import librosa.display import matplotlib.pyplot as plt import numpy as np from PIL import Image from io import BytesIO import matplotlib matplotlib.use('Agg') # Set the backend to 'Agg' for non-GUI environments def plot_pitch_contour(audio_file): # librosa can handle BytesIO directly, so no need to save to an intermediary file y, sr = librosa.load(audio_file, sr=None) fmin = librosa.note_to_hz('C2') fmax = librosa.note_to_hz('C6') pitch, voiced_flag, voiced_probs = librosa.pyin(y, fmin=fmin, fmax=fmax, sr=sr) pitch[~np.isfinite(pitch)] = 0 plt.figure(figsize=(14, 5)) librosa.display.waveshow(y, sr=sr) times = librosa.times_like(pitch, sr=sr) plt.scatter(times, pitch, color='blue', s=5) # Increased dot size for visibility plt.title('Pitch Contour') plt.xlabel('Time (s)') plt.ylabel('Pitch (Hz)') plt.ylim(0, 350) # Set the upper limit to the max pitch value buf = BytesIO() plt.savefig(buf, format="png", dpi=100) plt.close() buf.seek(0) img = Image.open(buf) img_array = np.array(img) return img_array # Return numpy array directly usable by Gradio def analyze_audio(audio_data): # The input audio_data is a temporary file created by Gradio, so we just need its path plot_img_array = plot_pitch_contour(audio_data) return audio_data, plot_img_array iface = gr.Interface( fn=analyze_audio, inputs=gr.Audio(type="filepath", label="Upload or Record your voice"), outputs=["audio", "image"], title="Voice Recording and Pitch Contour Visualization", description="Upload or record your voice and visualize the pitch contour." ) iface.launch()