Spaces:
Runtime error
Runtime error
Commit
·
c1b97fa
1
Parent(s):
175c5b3
updated files
Browse files
app.py
CHANGED
|
@@ -1,54 +1,13 @@
|
|
| 1 |
-
|
| 2 |
-
import numpy as np
|
| 3 |
-
import tensorflow as tf
|
| 4 |
-
from scipy.io.wavfile import write
|
| 5 |
-
import keras.backend as K
|
| 6 |
-
import librosa.display
|
| 7 |
-
import cv2
|
| 8 |
-
import librosa
|
| 9 |
-
import matplotlib.pyplot as plt
|
| 10 |
-
import librosa.display
|
| 11 |
-
import numpy as np
|
| 12 |
-
from keras.applications import VGG16
|
| 13 |
import os
|
| 14 |
-
import scipy
|
| 15 |
import gradio as gr
|
| 16 |
import shutil
|
|
|
|
|
|
|
| 17 |
|
| 18 |
-
# Load the tune recognition model
|
| 19 |
-
model = tf.keras.models.load_model('embdmodel_1.hdf5')
|
| 20 |
-
embedding_model=model.layers[2]
|
| 21 |
|
| 22 |
DURATION = 10
|
| 23 |
WAVE_OUTPUT_FILE = "my_audio.wav"
|
| 24 |
|
| 25 |
-
|
| 26 |
-
# Define function to preprocess input audio
|
| 27 |
-
#convert song to mel spectogram as siamese network doesn't work on sound directly
|
| 28 |
-
def create_spectrogram(clip,sample_rate,save_path):
|
| 29 |
-
plt.interactive(False)
|
| 30 |
-
fig=plt.figure(figsize=[0.72,0.72])
|
| 31 |
-
S=librosa.feature.melspectrogram(y=clip,sr=sample_rate)
|
| 32 |
-
librosa.display.specshow(librosa.power_to_db(S,ref=np.max))
|
| 33 |
-
fig.savefig(save_path,dpi=400,bbox_inches='tight',pad_inches=0)
|
| 34 |
-
plt.close()
|
| 35 |
-
fig.clf()
|
| 36 |
-
plt.close(fig)
|
| 37 |
-
plt.close('all')
|
| 38 |
-
del save_path,clip,sample_rate,fig,S
|
| 39 |
-
|
| 40 |
-
def load_img(path):
|
| 41 |
-
img=cv2.imread(path)
|
| 42 |
-
img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
|
| 43 |
-
img=cv2.resize(img,(150,150))
|
| 44 |
-
return img
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
import pickle
|
| 48 |
-
with open('dict.pickle', 'rb') as handle:
|
| 49 |
-
songspecdict = pickle.load(handle)
|
| 50 |
-
|
| 51 |
-
|
| 52 |
def list_file_sizes():
|
| 53 |
path = "."
|
| 54 |
|
|
@@ -76,75 +35,9 @@ def main(audio):
|
|
| 76 |
|
| 77 |
list_file_sizes()
|
| 78 |
|
| 79 |
-
|
| 80 |
-
song, sr = librosa.load("my_audio.wav")
|
| 81 |
-
to_match = np.copy(song[0:220500])
|
| 82 |
-
print("Loaded data into librosa...")
|
| 83 |
-
|
| 84 |
-
# Create spectrogram image of the song to match
|
| 85 |
-
create_spectrogram(to_match, sr, 'test.png')
|
| 86 |
-
print("Created spectogram...")
|
| 87 |
-
|
| 88 |
-
# Load the spectrogram image of the song to match
|
| 89 |
-
to_match_img = load_img('test.png')
|
| 90 |
-
to_match_img = np.expand_dims(to_match_img, axis=0)
|
| 91 |
-
print("Loaded spectrum image...")
|
| 92 |
-
|
| 93 |
-
# Get the embedding of the song to match
|
| 94 |
-
to_match_emb = embedding_model.predict(to_match_img)
|
| 95 |
-
print("Get song embedding...")
|
| 96 |
-
|
| 97 |
-
# Calculate the distances between the song to match and the songs in the database
|
| 98 |
-
songsdistdict = {}
|
| 99 |
-
for key, values in songspecdict.items():
|
| 100 |
-
dist_array = []
|
| 101 |
-
for embd in values:
|
| 102 |
-
dist_array.append(np.linalg.norm(to_match_emb - embd))
|
| 103 |
-
|
| 104 |
-
songsdistdict[key] = min(dist_array)
|
| 105 |
-
song_titles=list(songsdistdict.keys())
|
| 106 |
-
distances=list(songsdistdict.values())
|
| 107 |
-
|
| 108 |
-
# Get the title and artist of the recognized song
|
| 109 |
-
recognized_song_artist, recognized_song_title = song_titles[distances.index(min(distances))].split('-')
|
| 110 |
-
recognized_song_title = os.path.splitext(recognized_song_title)[0]
|
| 111 |
-
print(f'Artist: {recognized_song_artist}')
|
| 112 |
-
print(f'Title: {recognized_song_title}')
|
| 113 |
-
|
| 114 |
-
from musixmatch import Musixmatch
|
| 115 |
-
|
| 116 |
-
# Initialize Musixmatch API
|
| 117 |
-
musixmatch = Musixmatch(apikey='2b0d0615efa782e95598a0e99bda4a60')
|
| 118 |
-
|
| 119 |
-
# Search for the recognized song
|
| 120 |
-
track_search_results = musixmatch.track_search(q_track=recognized_song_title, q_artist=recognized_song_artist, page_size=1, page=1, s_track_rating='desc')
|
| 121 |
-
|
| 122 |
-
if track_search_results['message']['header']['status_code'] == 200:
|
| 123 |
-
# Get the track ID for the top result
|
| 124 |
-
track_id = track_search_results['message']['body']['track_list'][0]['track']['track_id']
|
| 125 |
-
|
| 126 |
-
# Get the lyrics for the recognized song
|
| 127 |
-
lyrics_result = musixmatch.track_lyrics_get(track_id=track_id)
|
| 128 |
-
|
| 129 |
-
if lyrics_result['message']['header']['status_code'] == 200:
|
| 130 |
-
# Get the lyrics
|
| 131 |
-
lyrics = lyrics_result['message']['body']['lyrics']['lyrics_body']
|
| 132 |
-
# Remove the annotation tags from the lyrics
|
| 133 |
-
lyrics = lyrics.replace('******* This Lyrics is NOT for Commercial use *******', '').strip()
|
| 134 |
-
print("Lyrics:\n", lyrics)
|
| 135 |
-
else:
|
| 136 |
-
print("Couldn't find lyrics for the recognized song.")
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
# Play the recognized song
|
| 141 |
-
recognized_song_file = f'https://huggingface.co/spaces/prerna9811/Chord/tree/main/seismese_net_songs/{song_titles[distances.index(min(distances))]}'
|
| 142 |
-
recognized_song_audio, recognized_song_sr = librosa.load(recognized_song_file)
|
| 143 |
-
|
| 144 |
-
audio_file = open(recognized_song_file, 'rb') # enter the filename with filepath
|
| 145 |
-
audio_bytes = audio_file.read() # reading the file
|
| 146 |
|
| 147 |
-
return
|
| 148 |
|
| 149 |
|
| 150 |
import asyncio
|
|
@@ -155,7 +48,7 @@ demo = gr.Blocks()
|
|
| 155 |
mf_transcribe = gr.Interface(
|
| 156 |
fn=main,
|
| 157 |
inputs=gr.inputs.Audio(source="microphone", type="filepath"),
|
| 158 |
-
outputs="
|
| 159 |
layout="horizontal",
|
| 160 |
theme="huggingface",
|
| 161 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
import shutil
|
| 4 |
+
from main_code import main_loop
|
| 5 |
+
|
| 6 |
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
DURATION = 10
|
| 9 |
WAVE_OUTPUT_FILE = "my_audio.wav"
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
def list_file_sizes():
|
| 12 |
path = "."
|
| 13 |
|
|
|
|
| 35 |
|
| 36 |
list_file_sizes()
|
| 37 |
|
| 38 |
+
song = main_loop()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
+
return audio
|
| 41 |
|
| 42 |
|
| 43 |
import asyncio
|
|
|
|
| 48 |
mf_transcribe = gr.Interface(
|
| 49 |
fn=main,
|
| 50 |
inputs=gr.inputs.Audio(source="microphone", type="filepath"),
|
| 51 |
+
outputs="text",
|
| 52 |
layout="horizontal",
|
| 53 |
theme="huggingface",
|
| 54 |
)
|