Spaces:

Hetan07
/

Single_Label_Music_Genre_Classifier

Sleeping

App Files Files Community

Hetan07 commited on Jan 11, 2024

Commit

6bcf173

1 Parent(s): 208a636

Modified a few files

Browse files

Files changed (5) hide show

__pycache__/audio_splitting.cpython-310.pyc +0 -0
__pycache__/feature_extraction.cpython-310.pyc +0 -0
app.py +34 -16
audio_splitting.py +7 -3
feature_extraction.py +4 -0

__pycache__/audio_splitting.cpython-310.pyc ADDED Viewed

Binary file (825 Bytes). View file

__pycache__/feature_extraction.cpython-310.pyc ADDED Viewed

Binary file (3.28 kB). View file

app.py CHANGED Viewed

@@ -15,7 +15,8 @@ import audio_splitting
 # Create a Streamlit web app
 st.title("Music Genre Classifier")
 # Upload music file
 uploaded_file = st.file_uploader("Upload a music file", type=["mp3", "wav"])
@@ -23,44 +24,60 @@ if uploaded_file is not None:
     # User selects a model
     all_models = ["K-Nearest Neighbors - (Single Label)", "Logistic Regression - (Single Label)", "Support Vector Machines - (Single Label)",
                   "Neural Network - (Single Label)",
-                  "XGB Classifier - (Single Label)", "Convolutional Recurrent Neural Network - (Multi Label)", "XGB - (Multi Label)",
-                  "Neural Network - (Multi Label)","Batch Normalization - (Multi Label)"]
     model_name = st.selectbox("Select a model", all_models)
     st.write(f"Predicition of following genres")
     multi_class_names = ["Metal", "Jazz", "Blues", "R&B", "Classical", "Reggae", "Rap & Hip-Hop", "Punk", "Rock",
                          "Country", "Bebop", "Pop", "Soul", "Dance & Electronic", "Folk"]
-    st.write(multi_class_names)
     # Load the selected model
     if model_name == "K-Nearest Neighbors - (Single Label)":
-        model = joblib.load("./models/knn.pkl")
     elif model_name == "Logistic Regression - (Single Label)":
-        model = joblib.load("./models/logistic.pkl")
     elif model_name == "Support Vector Machines - (Single Label)":
-        model = joblib.load("./models/svm.pkl")
     elif model_name == "Neural Network - (Single Label)":
-        model = joblib.load("./models/nn.pkl")
     elif model_name == "XGB Classifier - (Single Label)":
-        model = joblib.load("./models/xgb.pkl")
     elif model_name == "XGB - (Multi Label)":
-        model = joblib.load("./models/xgb_mlb.pkl")
     elif model_name == "Convolutional Recurrent Neural Network - (Multi Label)":
-        model = tensorflow.keras.models.load_model("./models/model_crnn1.h5", compile=False)
         model.compile(loss=binary_crossentropy,
                       optimizer=Adam(),
                       metrics=['accuracy'])
     elif model_name == "Neural Network - (Multi Label)":
-        model = tensorflow.keras.models.load_model("./models/model_nn.h5", compile=False)
         model.compile(loss=binary_crossentropy,
                       optimizer=Adam(),
                       metrics=['accuracy'])
     elif model_name == "Batch Normalization - (Multi Label)":
-        model = tensorflow.keras.models.load_model("./models/model_bn.h5", compile=False)
         model.compile(loss=binary_crossentropy,
                       optimizer=Adam(),
                       metrics=['accuracy'])
-    class_names = ["blues", "classical", "country", "disco", "hiphop", "jazz", "metal", "pop", "reggae", "rock"]
     xgb_multi_class_names = ["Rock", "Rap & Hip-Hop", "Soul", "Classical", "Dance & Electronic", "Blues","Jazz",
                              "Country","Bebop","Folk","Reggae","R&B","Punk","Metal","Pop"]
@@ -72,7 +89,7 @@ if uploaded_file is not None:
     features_list = audio_splitting.split_audio(uploaded_file)
     features = feature_extraction.scale(features_list)
-    st.write(features)
     # Reshape the features to match the expected shape for prediction
     reshaped_features = features.reshape(1, -1)
     if model_name == "XGB - (Multi Label)":
@@ -117,5 +134,6 @@ if uploaded_file is not None:
         else:
             st.write("No genre predicted above the threshold.")
     else:
-        predicted_label = model.predict(reshaped_features)[0]
         st.write(f"Predicted Genre: {predicted_label}")

 # Create a Streamlit web app
 st.title("Music Genre Classifier")
+st.write("A single-label music genre classifier based and trained on the GTZAN Dataset available for use on "
+         "Kaggle. All the models have been trained on that dataset.")
 # Upload music file
 uploaded_file = st.file_uploader("Upload a music file", type=["mp3", "wav"])
     # User selects a model
     all_models = ["K-Nearest Neighbors - (Single Label)", "Logistic Regression - (Single Label)", "Support Vector Machines - (Single Label)",
                   "Neural Network - (Single Label)",
+                  "XGB Classifier - (Single Label)"]
     model_name = st.selectbox("Select a model", all_models)
     st.write(f"Predicition of following genres")
     multi_class_names = ["Metal", "Jazz", "Blues", "R&B", "Classical", "Reggae", "Rap & Hip-Hop", "Punk", "Rock",
                          "Country", "Bebop", "Pop", "Soul", "Dance & Electronic", "Folk"]
+    class_names = ["Blues", "Classical", "Country", "Disco", "HipHop",
+                   "Jazz", "Metal", "Pop", "Reggae", "Rock"]
+    col1, col2 = st.columns(2)
+    s = ''
+    with col1:
+        for i in class_names[:5]:
+            s += "- " + i + "\n"
+        st.markdown(s)
+    s = ''
+    with col2:
+        for i in class_names[5:]:
+            s += "- " + i + "\n"
+        st.markdown(s)
+    # st.write(multi_class_names)
     # Load the selected model
     if model_name == "K-Nearest Neighbors - (Single Label)":
+        model = joblib.load("../models/knn.pkl")
     elif model_name == "Logistic Regression - (Single Label)":
+        model = joblib.load("../models/logistic.pkl")
     elif model_name == "Support Vector Machines - (Single Label)":
+        model = joblib.load("../models/svm.pkl")
     elif model_name == "Neural Network - (Single Label)":
+        model = joblib.load("../models/nn.pkl")
     elif model_name == "XGB Classifier - (Single Label)":
+        model = joblib.load("../models/xgb.pkl")
     elif model_name == "XGB - (Multi Label)":
+        model = joblib.load("../models/xgb_mlb.pkl")
     elif model_name == "Convolutional Recurrent Neural Network - (Multi Label)":
+        model = tensorflow.keras.models.load_model("../models/model_crnn1.h5", compile=False)
         model.compile(loss=binary_crossentropy,
                       optimizer=Adam(),
                       metrics=['accuracy'])
     elif model_name == "Neural Network - (Multi Label)":
+        model = tensorflow.keras.models.load_model("../models/model_nn.h5", compile=False)
         model.compile(loss=binary_crossentropy,
                       optimizer=Adam(),
                       metrics=['accuracy'])
     elif model_name == "Batch Normalization - (Multi Label)":
+        model = tensorflow.keras.models.load_model("../models/model_bn.h5", compile=False)
         model.compile(loss=binary_crossentropy,
                       optimizer=Adam(),
                       metrics=['accuracy'])
+    # class_names = ["blues", "classical", "country", "disco", "hiphop", "jazz", "metal", "pop", "reggae", "rock"]
     xgb_multi_class_names = ["Rock", "Rap & Hip-Hop", "Soul", "Classical", "Dance & Electronic", "Blues","Jazz",
                              "Country","Bebop","Folk","Reggae","R&B","Punk","Metal","Pop"]
     features_list = audio_splitting.split_audio(uploaded_file)
     features = feature_extraction.scale(features_list)
+    # st.write(features)
     # Reshape the features to match the expected shape for prediction
     reshaped_features = features.reshape(1, -1)
     if model_name == "XGB - (Multi Label)":
         else:
             st.write("No genre predicted above the threshold.")
     else:
+        predicted_label = model.predict(features)[0]
         st.write(f"Predicted Genre: {predicted_label}")
+        st.metric("Predicted Genre:",{predicted_label})

audio_splitting.py CHANGED Viewed

@@ -1,17 +1,20 @@
 import pydub
 from pydub import AudioSegment
 import feature_extraction
 import io
 def split_audio(uploaded_file):
     # Load your audio file
     # audio = AudioSegment.from_file("classical.00000.wav", format="wav")
-    audio = AudioSegment.from_file(uploaded_file,)
     # Define the duration of each segment in milliseconds (3 seconds)
     segment_duration = 3 * 1000  # 3 seconds in milliseconds
     # Check the total duration of the audio
     audio_duration = len(audio)
     # Check if the audio is shorter than 1 minute and 3 seconds
     if audio_duration < 63 * 1000:
         # If it's shorter, take audio from 0 to 3 seconds
@@ -23,13 +26,14 @@ def split_audio(uploaded_file):
         segment = audio[start_time:end_time]
     output_stream = io.BytesIO()
     segment.export(output_stream, format="wav")
     # Now you can directly use the output_stream for feature extraction
     output_stream.seek(0)  # Reset the stream position to the beginning
     # Process and extract features from the segment
     features = feature_extraction.all_feature_extraction(output_stream)
     return features
 # output_file = "D:/miniproject/output_segment.wav"

 import pydub
+import streamlit
 from pydub import AudioSegment
 import feature_extraction
 import io
 def split_audio(uploaded_file):
     # Load your audio file
     # audio = AudioSegment.from_file("classical.00000.wav", format="wav")
+    audio = AudioSegment.from_file(uploaded_file)
+    print("Works")
     # Define the duration of each segment in milliseconds (3 seconds)
     segment_duration = 3 * 1000  # 3 seconds in milliseconds
     # Check the total duration of the audio
     audio_duration = len(audio)
+    print("works")
     # Check if the audio is shorter than 1 minute and 3 seconds
     if audio_duration < 63 * 1000:
         # If it's shorter, take audio from 0 to 3 seconds
         segment = audio[start_time:end_time]
     output_stream = io.BytesIO()
     segment.export(output_stream, format="wav")
+    print("Works")
     # Now you can directly use the output_stream for feature extraction
     output_stream.seek(0)  # Reset the stream position to the beginning
     # Process and extract features from the segment
     features = feature_extraction.all_feature_extraction(output_stream)
+    print(features)
+    streamlit.write(features)
     return features
 # output_file = "D:/miniproject/output_segment.wav"

feature_extraction.py CHANGED Viewed

@@ -32,8 +32,11 @@ short_field = Fields[2:]
 def all_feature_extraction(audio_path, sample_rate=22050):
     data_list = []
     audio_df, sr = librosa.load(audio_path, sr=22050)
     data_list.append(audio_path)
     data_list.append(len(audio_df))
     # 1. Chroma STFT
     chroma_stft = librosa.feature.chroma_stft(y=audio_df, hop_length=512)
     chroma_stft_mean = np.mean(chroma_stft)
@@ -100,6 +103,7 @@ def all_feature_extraction(audio_path, sample_rate=22050):
     for mean, var in mfcc_list:
         data_list.append(mean)
         data_list.append(var)
     return data_list
 def scale(initial_features):

 def all_feature_extraction(audio_path, sample_rate=22050):
     data_list = []
     audio_df, sr = librosa.load(audio_path, sr=22050)
+    print("\n",audio_df)
     data_list.append(audio_path)
+    print(audio_path)
     data_list.append(len(audio_df))
+    print(data_list)
     # 1. Chroma STFT
     chroma_stft = librosa.feature.chroma_stft(y=audio_df, hop_length=512)
     chroma_stft_mean = np.mean(chroma_stft)
     for mean, var in mfcc_list:
         data_list.append(mean)
         data_list.append(var)
+    print(data_list)
     return data_list
 def scale(initial_features):