|
|
import librosa |
|
|
import joblib |
|
|
import numpy as np |
|
|
|
|
|
def features_extractor(file): |
|
|
audio,sample_rate = librosa.load(file,duration=2) |
|
|
mfccs_features = librosa.feature.mfcc(y=audio,sr=sample_rate,n_mfcc=40) |
|
|
mfccs_scaled_features = np.mean(mfccs_features.T,axis=0) |
|
|
return mfccs_scaled_features |
|
|
|
|
|
|
|
|
def extract_audio_features(y, sr): |
|
|
features = {} |
|
|
|
|
|
|
|
|
rms = librosa.feature.rms(y=y) |
|
|
features['rms_mean'] = np.mean(rms) |
|
|
features['rms_var'] = np.var(rms) |
|
|
|
|
|
|
|
|
rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr) |
|
|
features['rolloff_mean'] = np.mean(rolloff) |
|
|
features['rolloff_var'] = np.var(rolloff) |
|
|
|
|
|
|
|
|
spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr) |
|
|
features['spectral_centroid_mean'] = np.mean(spectral_centroid) |
|
|
features['spectral_centroid_var'] = np.var(spectral_centroid) |
|
|
|
|
|
|
|
|
zcr = librosa.feature.zero_crossing_rate(y) |
|
|
features['zero_crossing_rate_mean'] = np.mean(zcr) |
|
|
features['zero_crossing_rate_var'] = np.var(zcr) |
|
|
|
|
|
|
|
|
harmony = librosa.effects.harmonic(y) |
|
|
features['harmony_mean'] = np.mean(harmony) |
|
|
features['harmony_var'] = np.var(harmony) |
|
|
|
|
|
|
|
|
onset_env = librosa.onset.onset_strength(y=y, sr=sr) |
|
|
tempo, _ = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr) |
|
|
features['tempo'] = tempo |
|
|
|
|
|
return features |
|
|
|
|
|
fit = joblib.load('models/preprocessing/scaler.pkl') |
|
|
|
|
|
def predict_audio_class(model, audio_path): |
|
|
""" |
|
|
Predicts the genre for a wav audio file |
|
|
""" |
|
|
|
|
|
|
|
|
y, sr = librosa.load(audio_path, sr=None) |
|
|
|
|
|
|
|
|
features = extract_audio_features(y, sr) |
|
|
|
|
|
|
|
|
feature_array = np.array([ |
|
|
features['rms_mean'], |
|
|
features['rms_var'], |
|
|
features['rolloff_mean'], |
|
|
features['spectral_centroid_mean'], |
|
|
features['spectral_centroid_var'], |
|
|
features['rolloff_var'], |
|
|
features['zero_crossing_rate_mean'], |
|
|
features['zero_crossing_rate_var'], |
|
|
features['harmony_mean'], |
|
|
features['harmony_var'], |
|
|
features['tempo'][0] |
|
|
]).reshape(1, -1) |
|
|
|
|
|
|
|
|
z = fit.transform(feature_array) |
|
|
print(z) |
|
|
prediction = model.predict(z) |
|
|
return prediction |
|
|
|
|
|
from pydub import AudioSegment |
|
|
def convert_mp3_to_wav(mp3_file, wav_file): |
|
|
|
|
|
audio = AudioSegment.from_mp3(mp3_file) |
|
|
|
|
|
audio.export(wav_file, format='wav') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
genres = { |
|
|
0: 'blues', 1: 'classical', 2: 'country', 3: 'disco', 4: 'hiphop', |
|
|
5: 'metal', 6: 'pop', 7: 'reggae', 8: 'rock', 9: 'jazz' |
|
|
} |
|
|
|
|
|
def predict_genre_LSTM(mp3_file,model): |
|
|
|
|
|
wav_file = "temp_converted_audio.wav" |
|
|
convert_mp3_to_wav(mp3_file, wav_file) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
features = features_extractor(wav_file) |
|
|
features = np.expand_dims(features, axis=0) |
|
|
features = np.expand_dims(features, axis=-1) |
|
|
prediction = model.predict(features) |
|
|
return prediction |