import numpy as np 
import librosa
import math
from tensorflow import keras
import streamlit as st
import time

def get_mfcc(audio_signal, num_mfcc=13, n_fft=2048, hop_length=512, num_segments=5):
    #This function will be extracting mfcc from our audio signal. 
    new_data = {
        "mfcc": []
    }

    SAMPLE_RATE = 22050
    signal,sample_rate = librosa.load(audio_signal,sr = SAMPLE_RATE)
    TRACK_DURATION = int(librosa.get_duration(signal)) # measured in seconds
    SAMPLES_PER_TRACK = SAMPLE_RATE * TRACK_DURATION
    samples_per_segment = int(SAMPLES_PER_TRACK / num_segments)
    num_mfcc_vectors_per_segment = math.ceil(samples_per_segment / hop_length)

    for d in range(num_segments):
        # calculate start and finish sample for current segment
        start = samples_per_segment * d
        finish = start + samples_per_segment
        # extract mfcc
        mfcc = librosa.feature.mfcc(signal[start:finish], sample_rate, n_mfcc=num_mfcc, n_fft=n_fft, hop_length=hop_length)
        mfcc = mfcc.T
        # store only mfcc feature with expected number of vectors
        if len(mfcc) == num_mfcc_vectors_per_segment:
            new_data["mfcc"].append(mfcc.tolist())

    return new_data["mfcc"]

def prediction(mfcc):
    # This function will provide us with prediction labels from our CNN model.
    cnn_model = keras.models.load_model('music-gen-clasiify-v1.h5')
    mfcc = np.array(mfcc)
    mfcc = mfcc[...,np.newaxis]
    prediction = cnn_model.predict(mfcc)
    return max(np.argmax(prediction,axis = 1))

def get_genre(prediction):
    # This function will provide us with genre.
    pred = ''
    if prediction == 0:
        pred = 'Blues'
        
    elif prediction == 1:
        pred = 'Classical'
    
    elif prediction == 2:
        pred = 'Country'
    
    elif prediction == 3:
        pred = 'Disco'
    
    elif prediction == 4:
        pred = 'Hip Hop'
        
    elif prediction == 5:
        pred = 'Jazz'    
    
    elif prediction == 6:
        pred = 'Metal'
    
    elif prediction == 7:
        pred = 'Pop'
    
    elif prediction == 8:
        pred = 'Reggae'
    
    elif prediction == 9:
        pred = 'Rock'
    
    return pred


def main():
    # Few Instructions 
    # The music sample should not exceed more then 30 sec.
    # 0-> Blues 1-> classical 2-> country 3-> disco 4-> hiphop 5-> jazz 6-> metal 7-> pop 8-> reggae 9-> rock
    # Right now only 10 genres are supported as we used GTZAN Dataset for music Genre Classification.
    st.set_page_config(layout='wide',page_title='Genre Classification',page_icon='🎵')
    st.title('Music Genre Classifcation With CNN')
    st.markdown('We use **GTZAN** Dataset which is a very popular dataset for Audio Classification. The Uploaded sample of audio file should be of less then **30sec** and **.WAV** format for best results try to provide sections that have the most **elemental** or **instrumental ensemble** and should be of 30sec. If you want to test the model select ***Untrained Samples***. The model right now support only 10 genre which are blues, jazz, rock, metal, country, reagge, hiphop, pop, disco. A project by Tushar Nautiyal')        
    selected_item = st.selectbox('Select Either Uploaded Samples or Upload your own',['Untrained Samples','Upload'])
    # after this selection we will upload file or use a untrained Samples.
    if selected_item is not None:
        if selected_item == 'Upload':
            files = st.file_uploader('Select .WAV File with maximum 30sec Time', type='wav', accept_multiple_files=False)
            
            if files is not None:
                audio,sr = librosa.load(files,sr = 22050)
                duration = int(librosa.get_duration(audio))
                if 'file_uploaded' not in st.session_state:
                    st.session_state['file_uploaded'] = True
                
                if duration>30:
                    st.session_state['file_uploaded'] = False
                    st.write('Reupload File as it exceeds the time limit')
                    bar = st.progress(0)
                    i = 0
                    st.write('Please Reupload Files')
                    for percent_complete in range(100):
                        time.sleep(0.01)
                        bar.progress(i+1)
                        i = i+1       
                    st.write("Reupload files Thank You.")

                elif st.session_state['file_uploaded'] == True:
                    st.audio(files, format="audio/wav", start_time=0)              
        
        
        elif selected_item == 'Untrained Samples':
            selected_file = st.selectbox("Select A Sample", ['Blues','Jazz','Country','Classical','Hiphop','Metal','Pop','Reggae','Rock'])
            files = f'{selected_file}.wav'
            st.audio(files, format="audio/wav", start_time=0)
        submitted = st.button("Submit")
        
    if submitted:
        with st.spinner('Model is Trying to predict your genre! Wait for it'):
            signal = files
            mfcc_for_track = get_mfcc(signal)
    
            # After getting mfcc lets use our model to predict
            predict = prediction(mfcc_for_track)
            genre = get_genre(int(predict))
        st.success('Yes its Done and here is the answer!')
        st.markdown(f'The Genre for your music is 🎵  : **{genre}** Music')

if __name__ == '__main__':
    main()