Modified a few files
Browse files- __pycache__/audio_splitting.cpython-310.pyc +0 -0
- __pycache__/feature_extraction.cpython-310.pyc +0 -0
- app.py +34 -16
- audio_splitting.py +7 -3
- feature_extraction.py +4 -0
__pycache__/audio_splitting.cpython-310.pyc
ADDED
|
Binary file (825 Bytes). View file
|
|
|
__pycache__/feature_extraction.cpython-310.pyc
ADDED
|
Binary file (3.28 kB). View file
|
|
|
app.py
CHANGED
|
@@ -15,7 +15,8 @@ import audio_splitting
|
|
| 15 |
|
| 16 |
# Create a Streamlit web app
|
| 17 |
st.title("Music Genre Classifier")
|
| 18 |
-
|
|
|
|
| 19 |
# Upload music file
|
| 20 |
uploaded_file = st.file_uploader("Upload a music file", type=["mp3", "wav"])
|
| 21 |
|
|
@@ -23,44 +24,60 @@ if uploaded_file is not None:
|
|
| 23 |
# User selects a model
|
| 24 |
all_models = ["K-Nearest Neighbors - (Single Label)", "Logistic Regression - (Single Label)", "Support Vector Machines - (Single Label)",
|
| 25 |
"Neural Network - (Single Label)",
|
| 26 |
-
"XGB Classifier - (Single Label)"
|
| 27 |
-
"Neural Network - (Multi Label)","Batch Normalization - (Multi Label)"]
|
| 28 |
model_name = st.selectbox("Select a model", all_models)
|
| 29 |
st.write(f"Predicition of following genres")
|
|
|
|
| 30 |
multi_class_names = ["Metal", "Jazz", "Blues", "R&B", "Classical", "Reggae", "Rap & Hip-Hop", "Punk", "Rock",
|
| 31 |
"Country", "Bebop", "Pop", "Soul", "Dance & Electronic", "Folk"]
|
| 32 |
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
# Load the selected model
|
| 36 |
if model_name == "K-Nearest Neighbors - (Single Label)":
|
| 37 |
-
model = joblib.load("
|
| 38 |
elif model_name == "Logistic Regression - (Single Label)":
|
| 39 |
-
model = joblib.load("
|
| 40 |
elif model_name == "Support Vector Machines - (Single Label)":
|
| 41 |
-
model = joblib.load("
|
| 42 |
elif model_name == "Neural Network - (Single Label)":
|
| 43 |
-
model = joblib.load("
|
| 44 |
elif model_name == "XGB Classifier - (Single Label)":
|
| 45 |
-
model = joblib.load("
|
| 46 |
elif model_name == "XGB - (Multi Label)":
|
| 47 |
-
model = joblib.load("
|
| 48 |
elif model_name == "Convolutional Recurrent Neural Network - (Multi Label)":
|
| 49 |
-
model = tensorflow.keras.models.load_model("
|
| 50 |
model.compile(loss=binary_crossentropy,
|
| 51 |
optimizer=Adam(),
|
| 52 |
metrics=['accuracy'])
|
| 53 |
elif model_name == "Neural Network - (Multi Label)":
|
| 54 |
-
model = tensorflow.keras.models.load_model("
|
| 55 |
model.compile(loss=binary_crossentropy,
|
| 56 |
optimizer=Adam(),
|
| 57 |
metrics=['accuracy'])
|
| 58 |
elif model_name == "Batch Normalization - (Multi Label)":
|
| 59 |
-
model = tensorflow.keras.models.load_model("
|
| 60 |
model.compile(loss=binary_crossentropy,
|
| 61 |
optimizer=Adam(),
|
| 62 |
metrics=['accuracy'])
|
| 63 |
-
class_names = ["blues", "classical", "country", "disco", "hiphop", "jazz", "metal", "pop", "reggae", "rock"]
|
| 64 |
|
| 65 |
xgb_multi_class_names = ["Rock", "Rap & Hip-Hop", "Soul", "Classical", "Dance & Electronic", "Blues","Jazz",
|
| 66 |
"Country","Bebop","Folk","Reggae","R&B","Punk","Metal","Pop"]
|
|
@@ -72,7 +89,7 @@ if uploaded_file is not None:
|
|
| 72 |
features_list = audio_splitting.split_audio(uploaded_file)
|
| 73 |
features = feature_extraction.scale(features_list)
|
| 74 |
|
| 75 |
-
st.write(features)
|
| 76 |
# Reshape the features to match the expected shape for prediction
|
| 77 |
reshaped_features = features.reshape(1, -1)
|
| 78 |
if model_name == "XGB - (Multi Label)":
|
|
@@ -117,5 +134,6 @@ if uploaded_file is not None:
|
|
| 117 |
else:
|
| 118 |
st.write("No genre predicted above the threshold.")
|
| 119 |
else:
|
| 120 |
-
predicted_label = model.predict(
|
| 121 |
st.write(f"Predicted Genre: {predicted_label}")
|
|
|
|
|
|
| 15 |
|
| 16 |
# Create a Streamlit web app
|
| 17 |
st.title("Music Genre Classifier")
|
| 18 |
+
st.write("A single-label music genre classifier based and trained on the GTZAN Dataset available for use on "
|
| 19 |
+
"Kaggle. All the models have been trained on that dataset.")
|
| 20 |
# Upload music file
|
| 21 |
uploaded_file = st.file_uploader("Upload a music file", type=["mp3", "wav"])
|
| 22 |
|
|
|
|
| 24 |
# User selects a model
|
| 25 |
all_models = ["K-Nearest Neighbors - (Single Label)", "Logistic Regression - (Single Label)", "Support Vector Machines - (Single Label)",
|
| 26 |
"Neural Network - (Single Label)",
|
| 27 |
+
"XGB Classifier - (Single Label)"]
|
|
|
|
| 28 |
model_name = st.selectbox("Select a model", all_models)
|
| 29 |
st.write(f"Predicition of following genres")
|
| 30 |
+
|
| 31 |
multi_class_names = ["Metal", "Jazz", "Blues", "R&B", "Classical", "Reggae", "Rap & Hip-Hop", "Punk", "Rock",
|
| 32 |
"Country", "Bebop", "Pop", "Soul", "Dance & Electronic", "Folk"]
|
| 33 |
|
| 34 |
+
class_names = ["Blues", "Classical", "Country", "Disco", "HipHop",
|
| 35 |
+
"Jazz", "Metal", "Pop", "Reggae", "Rock"]
|
| 36 |
+
|
| 37 |
+
col1, col2 = st.columns(2)
|
| 38 |
+
s = ''
|
| 39 |
+
with col1:
|
| 40 |
+
for i in class_names[:5]:
|
| 41 |
+
s += "- " + i + "\n"
|
| 42 |
+
st.markdown(s)
|
| 43 |
+
|
| 44 |
+
s = ''
|
| 45 |
+
|
| 46 |
+
with col2:
|
| 47 |
+
for i in class_names[5:]:
|
| 48 |
+
s += "- " + i + "\n"
|
| 49 |
+
st.markdown(s)
|
| 50 |
+
# st.write(multi_class_names)
|
| 51 |
|
| 52 |
# Load the selected model
|
| 53 |
if model_name == "K-Nearest Neighbors - (Single Label)":
|
| 54 |
+
model = joblib.load("../models/knn.pkl")
|
| 55 |
elif model_name == "Logistic Regression - (Single Label)":
|
| 56 |
+
model = joblib.load("../models/logistic.pkl")
|
| 57 |
elif model_name == "Support Vector Machines - (Single Label)":
|
| 58 |
+
model = joblib.load("../models/svm.pkl")
|
| 59 |
elif model_name == "Neural Network - (Single Label)":
|
| 60 |
+
model = joblib.load("../models/nn.pkl")
|
| 61 |
elif model_name == "XGB Classifier - (Single Label)":
|
| 62 |
+
model = joblib.load("../models/xgb.pkl")
|
| 63 |
elif model_name == "XGB - (Multi Label)":
|
| 64 |
+
model = joblib.load("../models/xgb_mlb.pkl")
|
| 65 |
elif model_name == "Convolutional Recurrent Neural Network - (Multi Label)":
|
| 66 |
+
model = tensorflow.keras.models.load_model("../models/model_crnn1.h5", compile=False)
|
| 67 |
model.compile(loss=binary_crossentropy,
|
| 68 |
optimizer=Adam(),
|
| 69 |
metrics=['accuracy'])
|
| 70 |
elif model_name == "Neural Network - (Multi Label)":
|
| 71 |
+
model = tensorflow.keras.models.load_model("../models/model_nn.h5", compile=False)
|
| 72 |
model.compile(loss=binary_crossentropy,
|
| 73 |
optimizer=Adam(),
|
| 74 |
metrics=['accuracy'])
|
| 75 |
elif model_name == "Batch Normalization - (Multi Label)":
|
| 76 |
+
model = tensorflow.keras.models.load_model("../models/model_bn.h5", compile=False)
|
| 77 |
model.compile(loss=binary_crossentropy,
|
| 78 |
optimizer=Adam(),
|
| 79 |
metrics=['accuracy'])
|
| 80 |
+
# class_names = ["blues", "classical", "country", "disco", "hiphop", "jazz", "metal", "pop", "reggae", "rock"]
|
| 81 |
|
| 82 |
xgb_multi_class_names = ["Rock", "Rap & Hip-Hop", "Soul", "Classical", "Dance & Electronic", "Blues","Jazz",
|
| 83 |
"Country","Bebop","Folk","Reggae","R&B","Punk","Metal","Pop"]
|
|
|
|
| 89 |
features_list = audio_splitting.split_audio(uploaded_file)
|
| 90 |
features = feature_extraction.scale(features_list)
|
| 91 |
|
| 92 |
+
# st.write(features)
|
| 93 |
# Reshape the features to match the expected shape for prediction
|
| 94 |
reshaped_features = features.reshape(1, -1)
|
| 95 |
if model_name == "XGB - (Multi Label)":
|
|
|
|
| 134 |
else:
|
| 135 |
st.write("No genre predicted above the threshold.")
|
| 136 |
else:
|
| 137 |
+
predicted_label = model.predict(features)[0]
|
| 138 |
st.write(f"Predicted Genre: {predicted_label}")
|
| 139 |
+
st.metric("Predicted Genre:",{predicted_label})
|
audio_splitting.py
CHANGED
|
@@ -1,17 +1,20 @@
|
|
| 1 |
import pydub
|
|
|
|
| 2 |
from pydub import AudioSegment
|
| 3 |
import feature_extraction
|
| 4 |
import io
|
| 5 |
def split_audio(uploaded_file):
|
| 6 |
# Load your audio file
|
| 7 |
# audio = AudioSegment.from_file("classical.00000.wav", format="wav")
|
| 8 |
-
audio = AudioSegment.from_file(uploaded_file
|
|
|
|
| 9 |
# Define the duration of each segment in milliseconds (3 seconds)
|
| 10 |
segment_duration = 3 * 1000 # 3 seconds in milliseconds
|
| 11 |
|
| 12 |
# Check the total duration of the audio
|
| 13 |
audio_duration = len(audio)
|
| 14 |
|
|
|
|
| 15 |
# Check if the audio is shorter than 1 minute and 3 seconds
|
| 16 |
if audio_duration < 63 * 1000:
|
| 17 |
# If it's shorter, take audio from 0 to 3 seconds
|
|
@@ -23,13 +26,14 @@ def split_audio(uploaded_file):
|
|
| 23 |
segment = audio[start_time:end_time]
|
| 24 |
output_stream = io.BytesIO()
|
| 25 |
segment.export(output_stream, format="wav")
|
| 26 |
-
|
| 27 |
# Now you can directly use the output_stream for feature extraction
|
| 28 |
output_stream.seek(0) # Reset the stream position to the beginning
|
| 29 |
|
| 30 |
# Process and extract features from the segment
|
| 31 |
features = feature_extraction.all_feature_extraction(output_stream)
|
| 32 |
-
|
|
|
|
| 33 |
return features
|
| 34 |
# output_file = "D:/miniproject/output_segment.wav"
|
| 35 |
|
|
|
|
| 1 |
import pydub
|
| 2 |
+
import streamlit
|
| 3 |
from pydub import AudioSegment
|
| 4 |
import feature_extraction
|
| 5 |
import io
|
| 6 |
def split_audio(uploaded_file):
|
| 7 |
# Load your audio file
|
| 8 |
# audio = AudioSegment.from_file("classical.00000.wav", format="wav")
|
| 9 |
+
audio = AudioSegment.from_file(uploaded_file)
|
| 10 |
+
print("Works")
|
| 11 |
# Define the duration of each segment in milliseconds (3 seconds)
|
| 12 |
segment_duration = 3 * 1000 # 3 seconds in milliseconds
|
| 13 |
|
| 14 |
# Check the total duration of the audio
|
| 15 |
audio_duration = len(audio)
|
| 16 |
|
| 17 |
+
print("works")
|
| 18 |
# Check if the audio is shorter than 1 minute and 3 seconds
|
| 19 |
if audio_duration < 63 * 1000:
|
| 20 |
# If it's shorter, take audio from 0 to 3 seconds
|
|
|
|
| 26 |
segment = audio[start_time:end_time]
|
| 27 |
output_stream = io.BytesIO()
|
| 28 |
segment.export(output_stream, format="wav")
|
| 29 |
+
print("Works")
|
| 30 |
# Now you can directly use the output_stream for feature extraction
|
| 31 |
output_stream.seek(0) # Reset the stream position to the beginning
|
| 32 |
|
| 33 |
# Process and extract features from the segment
|
| 34 |
features = feature_extraction.all_feature_extraction(output_stream)
|
| 35 |
+
print(features)
|
| 36 |
+
streamlit.write(features)
|
| 37 |
return features
|
| 38 |
# output_file = "D:/miniproject/output_segment.wav"
|
| 39 |
|
feature_extraction.py
CHANGED
|
@@ -32,8 +32,11 @@ short_field = Fields[2:]
|
|
| 32 |
def all_feature_extraction(audio_path, sample_rate=22050):
|
| 33 |
data_list = []
|
| 34 |
audio_df, sr = librosa.load(audio_path, sr=22050)
|
|
|
|
| 35 |
data_list.append(audio_path)
|
|
|
|
| 36 |
data_list.append(len(audio_df))
|
|
|
|
| 37 |
# 1. Chroma STFT
|
| 38 |
chroma_stft = librosa.feature.chroma_stft(y=audio_df, hop_length=512)
|
| 39 |
chroma_stft_mean = np.mean(chroma_stft)
|
|
@@ -100,6 +103,7 @@ def all_feature_extraction(audio_path, sample_rate=22050):
|
|
| 100 |
for mean, var in mfcc_list:
|
| 101 |
data_list.append(mean)
|
| 102 |
data_list.append(var)
|
|
|
|
| 103 |
return data_list
|
| 104 |
|
| 105 |
def scale(initial_features):
|
|
|
|
| 32 |
def all_feature_extraction(audio_path, sample_rate=22050):
|
| 33 |
data_list = []
|
| 34 |
audio_df, sr = librosa.load(audio_path, sr=22050)
|
| 35 |
+
print("\n",audio_df)
|
| 36 |
data_list.append(audio_path)
|
| 37 |
+
print(audio_path)
|
| 38 |
data_list.append(len(audio_df))
|
| 39 |
+
print(data_list)
|
| 40 |
# 1. Chroma STFT
|
| 41 |
chroma_stft = librosa.feature.chroma_stft(y=audio_df, hop_length=512)
|
| 42 |
chroma_stft_mean = np.mean(chroma_stft)
|
|
|
|
| 103 |
for mean, var in mfcc_list:
|
| 104 |
data_list.append(mean)
|
| 105 |
data_list.append(var)
|
| 106 |
+
print(data_list)
|
| 107 |
return data_list
|
| 108 |
|
| 109 |
def scale(initial_features):
|