intonation / app.py
MK-316's picture
Update app.py
633f6c5 verified
import gradio as gr
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from io import BytesIO
import matplotlib
matplotlib.use('Agg') # Set the backend to 'Agg' for non-GUI environments
def plot_pitch_contour(audio_file):
# librosa can handle BytesIO directly, so no need to save to an intermediary file
y, sr = librosa.load(audio_file, sr=None)
fmin = librosa.note_to_hz('C2')
fmax = librosa.note_to_hz('C6')
pitch, voiced_flag, voiced_probs = librosa.pyin(y, fmin=fmin, fmax=fmax, sr=sr)
pitch[~np.isfinite(pitch)] = 0
plt.figure(figsize=(14, 5))
librosa.display.waveshow(y, sr=sr)
times = librosa.times_like(pitch, sr=sr)
plt.scatter(times, pitch, color='blue', s=5) # Increased dot size for visibility
plt.title('Pitch Contour')
plt.xlabel('Time (s)')
plt.ylabel('Pitch (Hz)')
plt.ylim(0, 350) # Set the upper limit to the max pitch value
buf = BytesIO()
plt.savefig(buf, format="png", dpi=100)
plt.close()
buf.seek(0)
img = Image.open(buf)
img_array = np.array(img)
return img_array # Return numpy array directly usable by Gradio
def analyze_audio(audio_data):
# The input audio_data is a temporary file created by Gradio, so we just need its path
plot_img_array = plot_pitch_contour(audio_data)
return audio_data, plot_img_array
iface = gr.Interface(
fn=analyze_audio,
inputs=gr.Audio(type="filepath", label="Upload or Record your voice"),
outputs=["audio", "image"],
title="Voice Recording and Pitch Contour Visualization",
description="Upload or record your voice and visualize the pitch contour."
)
iface.launch()