import gradio as gr
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from io import BytesIO

import matplotlib
matplotlib.use('Agg')  # Set the backend to 'Agg' for non-GUI environments

def plot_pitch_contour(audio_file):
    # librosa can handle BytesIO directly, so no need to save to an intermediary file
    y, sr = librosa.load(audio_file, sr=None)
    fmin = librosa.note_to_hz('C2')
    fmax = librosa.note_to_hz('C6')
    pitch, voiced_flag, voiced_probs = librosa.pyin(y, fmin=fmin, fmax=fmax, sr=sr)
    pitch[~np.isfinite(pitch)] = 0
    
    plt.figure(figsize=(14, 5))
    librosa.display.waveshow(y, sr=sr)
    times = librosa.times_like(pitch, sr=sr)
    plt.scatter(times, pitch, color='blue', s=5)  # Increased dot size for visibility
    
    plt.title('Pitch Contour')
    plt.xlabel('Time (s)')
    plt.ylabel('Pitch (Hz)')
    plt.ylim(0, 350)  # Set the upper limit to the max pitch value
    
    buf = BytesIO()
    plt.savefig(buf, format="png", dpi=100)
    plt.close()
    buf.seek(0)
    
    img = Image.open(buf)
    img_array = np.array(img)
    return img_array  # Return numpy array directly usable by Gradio

def analyze_audio(audio_data):
    # The input audio_data is a temporary file created by Gradio, so we just need its path
    plot_img_array = plot_pitch_contour(audio_data)
    return audio_data, plot_img_array

iface = gr.Interface(
    fn=analyze_audio,
    inputs=gr.Audio(type="filepath", label="Upload or Record your voice"),
    outputs=["audio", "image"],
    title="Voice Recording and Pitch Contour Visualization",
    description="Upload or record your voice and visualize the pitch contour."
)


iface.launch()