Hetan07 commited on
Commit
6bcf173
·
1 Parent(s): 208a636

Modified a few files

Browse files
__pycache__/audio_splitting.cpython-310.pyc ADDED
Binary file (825 Bytes). View file
 
__pycache__/feature_extraction.cpython-310.pyc ADDED
Binary file (3.28 kB). View file
 
app.py CHANGED
@@ -15,7 +15,8 @@ import audio_splitting
15
 
16
  # Create a Streamlit web app
17
  st.title("Music Genre Classifier")
18
-
 
19
  # Upload music file
20
  uploaded_file = st.file_uploader("Upload a music file", type=["mp3", "wav"])
21
 
@@ -23,44 +24,60 @@ if uploaded_file is not None:
23
  # User selects a model
24
  all_models = ["K-Nearest Neighbors - (Single Label)", "Logistic Regression - (Single Label)", "Support Vector Machines - (Single Label)",
25
  "Neural Network - (Single Label)",
26
- "XGB Classifier - (Single Label)", "Convolutional Recurrent Neural Network - (Multi Label)", "XGB - (Multi Label)",
27
- "Neural Network - (Multi Label)","Batch Normalization - (Multi Label)"]
28
  model_name = st.selectbox("Select a model", all_models)
29
  st.write(f"Predicition of following genres")
 
30
  multi_class_names = ["Metal", "Jazz", "Blues", "R&B", "Classical", "Reggae", "Rap & Hip-Hop", "Punk", "Rock",
31
  "Country", "Bebop", "Pop", "Soul", "Dance & Electronic", "Folk"]
32
 
33
- st.write(multi_class_names)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  # Load the selected model
36
  if model_name == "K-Nearest Neighbors - (Single Label)":
37
- model = joblib.load("./models/knn.pkl")
38
  elif model_name == "Logistic Regression - (Single Label)":
39
- model = joblib.load("./models/logistic.pkl")
40
  elif model_name == "Support Vector Machines - (Single Label)":
41
- model = joblib.load("./models/svm.pkl")
42
  elif model_name == "Neural Network - (Single Label)":
43
- model = joblib.load("./models/nn.pkl")
44
  elif model_name == "XGB Classifier - (Single Label)":
45
- model = joblib.load("./models/xgb.pkl")
46
  elif model_name == "XGB - (Multi Label)":
47
- model = joblib.load("./models/xgb_mlb.pkl")
48
  elif model_name == "Convolutional Recurrent Neural Network - (Multi Label)":
49
- model = tensorflow.keras.models.load_model("./models/model_crnn1.h5", compile=False)
50
  model.compile(loss=binary_crossentropy,
51
  optimizer=Adam(),
52
  metrics=['accuracy'])
53
  elif model_name == "Neural Network - (Multi Label)":
54
- model = tensorflow.keras.models.load_model("./models/model_nn.h5", compile=False)
55
  model.compile(loss=binary_crossentropy,
56
  optimizer=Adam(),
57
  metrics=['accuracy'])
58
  elif model_name == "Batch Normalization - (Multi Label)":
59
- model = tensorflow.keras.models.load_model("./models/model_bn.h5", compile=False)
60
  model.compile(loss=binary_crossentropy,
61
  optimizer=Adam(),
62
  metrics=['accuracy'])
63
- class_names = ["blues", "classical", "country", "disco", "hiphop", "jazz", "metal", "pop", "reggae", "rock"]
64
 
65
  xgb_multi_class_names = ["Rock", "Rap & Hip-Hop", "Soul", "Classical", "Dance & Electronic", "Blues","Jazz",
66
  "Country","Bebop","Folk","Reggae","R&B","Punk","Metal","Pop"]
@@ -72,7 +89,7 @@ if uploaded_file is not None:
72
  features_list = audio_splitting.split_audio(uploaded_file)
73
  features = feature_extraction.scale(features_list)
74
 
75
- st.write(features)
76
  # Reshape the features to match the expected shape for prediction
77
  reshaped_features = features.reshape(1, -1)
78
  if model_name == "XGB - (Multi Label)":
@@ -117,5 +134,6 @@ if uploaded_file is not None:
117
  else:
118
  st.write("No genre predicted above the threshold.")
119
  else:
120
- predicted_label = model.predict(reshaped_features)[0]
121
  st.write(f"Predicted Genre: {predicted_label}")
 
 
15
 
16
  # Create a Streamlit web app
17
  st.title("Music Genre Classifier")
18
+ st.write("A single-label music genre classifier based and trained on the GTZAN Dataset available for use on "
19
+ "Kaggle. All the models have been trained on that dataset.")
20
  # Upload music file
21
  uploaded_file = st.file_uploader("Upload a music file", type=["mp3", "wav"])
22
 
 
24
  # User selects a model
25
  all_models = ["K-Nearest Neighbors - (Single Label)", "Logistic Regression - (Single Label)", "Support Vector Machines - (Single Label)",
26
  "Neural Network - (Single Label)",
27
+ "XGB Classifier - (Single Label)"]
 
28
  model_name = st.selectbox("Select a model", all_models)
29
  st.write(f"Predicition of following genres")
30
+
31
  multi_class_names = ["Metal", "Jazz", "Blues", "R&B", "Classical", "Reggae", "Rap & Hip-Hop", "Punk", "Rock",
32
  "Country", "Bebop", "Pop", "Soul", "Dance & Electronic", "Folk"]
33
 
34
+ class_names = ["Blues", "Classical", "Country", "Disco", "HipHop",
35
+ "Jazz", "Metal", "Pop", "Reggae", "Rock"]
36
+
37
+ col1, col2 = st.columns(2)
38
+ s = ''
39
+ with col1:
40
+ for i in class_names[:5]:
41
+ s += "- " + i + "\n"
42
+ st.markdown(s)
43
+
44
+ s = ''
45
+
46
+ with col2:
47
+ for i in class_names[5:]:
48
+ s += "- " + i + "\n"
49
+ st.markdown(s)
50
+ # st.write(multi_class_names)
51
 
52
  # Load the selected model
53
  if model_name == "K-Nearest Neighbors - (Single Label)":
54
+ model = joblib.load("../models/knn.pkl")
55
  elif model_name == "Logistic Regression - (Single Label)":
56
+ model = joblib.load("../models/logistic.pkl")
57
  elif model_name == "Support Vector Machines - (Single Label)":
58
+ model = joblib.load("../models/svm.pkl")
59
  elif model_name == "Neural Network - (Single Label)":
60
+ model = joblib.load("../models/nn.pkl")
61
  elif model_name == "XGB Classifier - (Single Label)":
62
+ model = joblib.load("../models/xgb.pkl")
63
  elif model_name == "XGB - (Multi Label)":
64
+ model = joblib.load("../models/xgb_mlb.pkl")
65
  elif model_name == "Convolutional Recurrent Neural Network - (Multi Label)":
66
+ model = tensorflow.keras.models.load_model("../models/model_crnn1.h5", compile=False)
67
  model.compile(loss=binary_crossentropy,
68
  optimizer=Adam(),
69
  metrics=['accuracy'])
70
  elif model_name == "Neural Network - (Multi Label)":
71
+ model = tensorflow.keras.models.load_model("../models/model_nn.h5", compile=False)
72
  model.compile(loss=binary_crossentropy,
73
  optimizer=Adam(),
74
  metrics=['accuracy'])
75
  elif model_name == "Batch Normalization - (Multi Label)":
76
+ model = tensorflow.keras.models.load_model("../models/model_bn.h5", compile=False)
77
  model.compile(loss=binary_crossentropy,
78
  optimizer=Adam(),
79
  metrics=['accuracy'])
80
+ # class_names = ["blues", "classical", "country", "disco", "hiphop", "jazz", "metal", "pop", "reggae", "rock"]
81
 
82
  xgb_multi_class_names = ["Rock", "Rap & Hip-Hop", "Soul", "Classical", "Dance & Electronic", "Blues","Jazz",
83
  "Country","Bebop","Folk","Reggae","R&B","Punk","Metal","Pop"]
 
89
  features_list = audio_splitting.split_audio(uploaded_file)
90
  features = feature_extraction.scale(features_list)
91
 
92
+ # st.write(features)
93
  # Reshape the features to match the expected shape for prediction
94
  reshaped_features = features.reshape(1, -1)
95
  if model_name == "XGB - (Multi Label)":
 
134
  else:
135
  st.write("No genre predicted above the threshold.")
136
  else:
137
+ predicted_label = model.predict(features)[0]
138
  st.write(f"Predicted Genre: {predicted_label}")
139
+ st.metric("Predicted Genre:",{predicted_label})
audio_splitting.py CHANGED
@@ -1,17 +1,20 @@
1
  import pydub
 
2
  from pydub import AudioSegment
3
  import feature_extraction
4
  import io
5
  def split_audio(uploaded_file):
6
  # Load your audio file
7
  # audio = AudioSegment.from_file("classical.00000.wav", format="wav")
8
- audio = AudioSegment.from_file(uploaded_file,)
 
9
  # Define the duration of each segment in milliseconds (3 seconds)
10
  segment_duration = 3 * 1000 # 3 seconds in milliseconds
11
 
12
  # Check the total duration of the audio
13
  audio_duration = len(audio)
14
 
 
15
  # Check if the audio is shorter than 1 minute and 3 seconds
16
  if audio_duration < 63 * 1000:
17
  # If it's shorter, take audio from 0 to 3 seconds
@@ -23,13 +26,14 @@ def split_audio(uploaded_file):
23
  segment = audio[start_time:end_time]
24
  output_stream = io.BytesIO()
25
  segment.export(output_stream, format="wav")
26
-
27
  # Now you can directly use the output_stream for feature extraction
28
  output_stream.seek(0) # Reset the stream position to the beginning
29
 
30
  # Process and extract features from the segment
31
  features = feature_extraction.all_feature_extraction(output_stream)
32
-
 
33
  return features
34
  # output_file = "D:/miniproject/output_segment.wav"
35
 
 
1
  import pydub
2
+ import streamlit
3
  from pydub import AudioSegment
4
  import feature_extraction
5
  import io
6
  def split_audio(uploaded_file):
7
  # Load your audio file
8
  # audio = AudioSegment.from_file("classical.00000.wav", format="wav")
9
+ audio = AudioSegment.from_file(uploaded_file)
10
+ print("Works")
11
  # Define the duration of each segment in milliseconds (3 seconds)
12
  segment_duration = 3 * 1000 # 3 seconds in milliseconds
13
 
14
  # Check the total duration of the audio
15
  audio_duration = len(audio)
16
 
17
+ print("works")
18
  # Check if the audio is shorter than 1 minute and 3 seconds
19
  if audio_duration < 63 * 1000:
20
  # If it's shorter, take audio from 0 to 3 seconds
 
26
  segment = audio[start_time:end_time]
27
  output_stream = io.BytesIO()
28
  segment.export(output_stream, format="wav")
29
+ print("Works")
30
  # Now you can directly use the output_stream for feature extraction
31
  output_stream.seek(0) # Reset the stream position to the beginning
32
 
33
  # Process and extract features from the segment
34
  features = feature_extraction.all_feature_extraction(output_stream)
35
+ print(features)
36
+ streamlit.write(features)
37
  return features
38
  # output_file = "D:/miniproject/output_segment.wav"
39
 
feature_extraction.py CHANGED
@@ -32,8 +32,11 @@ short_field = Fields[2:]
32
  def all_feature_extraction(audio_path, sample_rate=22050):
33
  data_list = []
34
  audio_df, sr = librosa.load(audio_path, sr=22050)
 
35
  data_list.append(audio_path)
 
36
  data_list.append(len(audio_df))
 
37
  # 1. Chroma STFT
38
  chroma_stft = librosa.feature.chroma_stft(y=audio_df, hop_length=512)
39
  chroma_stft_mean = np.mean(chroma_stft)
@@ -100,6 +103,7 @@ def all_feature_extraction(audio_path, sample_rate=22050):
100
  for mean, var in mfcc_list:
101
  data_list.append(mean)
102
  data_list.append(var)
 
103
  return data_list
104
 
105
  def scale(initial_features):
 
32
  def all_feature_extraction(audio_path, sample_rate=22050):
33
  data_list = []
34
  audio_df, sr = librosa.load(audio_path, sr=22050)
35
+ print("\n",audio_df)
36
  data_list.append(audio_path)
37
+ print(audio_path)
38
  data_list.append(len(audio_df))
39
+ print(data_list)
40
  # 1. Chroma STFT
41
  chroma_stft = librosa.feature.chroma_stft(y=audio_df, hop_length=512)
42
  chroma_stft_mean = np.mean(chroma_stft)
 
103
  for mean, var in mfcc_list:
104
  data_list.append(mean)
105
  data_list.append(var)
106
+ print(data_list)
107
  return data_list
108
 
109
  def scale(initial_features):