bharatverse11 commited on
Commit
ad4ef4a
·
verified ·
1 Parent(s): d9fce80

Update dejavu/fingerprint.py

Browse files
Files changed (1) hide show
  1. dejavu/fingerprint.py +167 -167
dejavu/fingerprint.py CHANGED
@@ -1,167 +1,167 @@
1
- from __future__ import absolute_import
2
- from __future__ import unicode_literals
3
- import os
4
- import numpy as np
5
- import matplotlib.mlab as mlab
6
- import matplotlib.pyplot as plt
7
- from scipy.ndimage.filters import maximum_filter
8
- from scipy.ndimage.morphology import (
9
- generate_binary_structure, iterate_structure, binary_erosion
10
- )
11
- import hashlib
12
- from operator import itemgetter
13
- from six.moves import range
14
- from six.moves import zip
15
-
16
- IDX_FREQ_I = 0
17
- IDX_TIME_J = 1
18
-
19
- ######################################################################
20
- # Sampling rate, related to the Nyquist conditions, which affects
21
- # the range frequencies we can detect.
22
- DEFAULT_FS = os.getenv('DEFAULT_FS', 44100)
23
-
24
- ######################################################################
25
- # Size of the FFT window, affects frequency granularity
26
- DEFAULT_WINDOW_SIZE = os.getenv('DEFAULT_WINDOW_SIZE', 4096)
27
-
28
- ######################################################################
29
- # Ratio by which each sequential window overlaps the last and the
30
- # next window. Higher overlap will allow a higher granularity of offset
31
- # matching, but potentially more fingerprints.
32
- DEFAULT_OVERLAP_RATIO = os.getenv('DEFAULT_OVERLAP_RATIO', 0.5)
33
-
34
- ######################################################################
35
- # Degree to which a fingerprint can be paired with its neighbors --
36
- # higher will cause more fingerprints, but potentially better accuracy.
37
- DEFAULT_FAN_VALUE = os.getenv('DEFAULT_FAN_VALUE', 15)
38
-
39
- ######################################################################
40
- # Minimum amplitude in spectrogram in order to be considered a peak.
41
- # This can be raised to reduce number of fingerprints, but can negatively
42
- # affect accuracy.
43
- DEFAULT_AMP_MIN = os.getenv('DEFAULT_AMP_MIN', 10)
44
-
45
- ######################################################################
46
- # Number of cells around an amplitude peak in the spectrogram in order
47
- # for Dejavu to consider it a spectral peak. Higher values mean less
48
- # fingerprints and faster matching, but can potentially affect accuracy.
49
- PEAK_NEIGHBORHOOD_SIZE = os.getenv('PEAK_NEIGHBORHOOD_SIZE', 20)
50
-
51
- ######################################################################
52
- # Thresholds on how close or far fingerprints can be in time in order
53
- # to be paired as a fingerprint. If your max is too low, higher values of
54
- # DEFAULT_FAN_VALUE may not perform as expected.
55
- MIN_HASH_TIME_DELTA = os.getenv('MIN_HASH_TIME_DELTA', 0)
56
- MAX_HASH_TIME_DELTA = os.getenv('MAX_HASH_TIME_DELTA', 200)
57
-
58
- ######################################################################
59
- # If True, will sort peaks temporally for fingerprinting;
60
- # not sorting will cut down number of fingerprints, but potentially
61
- # affect performance.
62
- PEAK_SORT = True
63
-
64
- ######################################################################
65
- # Number of bits to throw away from the front of the SHA1 hash in the
66
- # fingerprint calculation. The more you throw away, the less storage, but
67
- # potentially higher collisions and misclassifications when identifying songs.
68
- FINGERPRINT_REDUCTION = os.getenv('FINGERPRINT_REDUCTION', 20)
69
-
70
-
71
- def fingerprint(
72
- channel_samples,
73
- Fs=DEFAULT_FS,
74
- wsize=DEFAULT_WINDOW_SIZE,
75
- wratio=DEFAULT_OVERLAP_RATIO,
76
- fan_value=DEFAULT_FAN_VALUE,
77
- amp_min=DEFAULT_AMP_MIN
78
- ):
79
- """
80
- FFT the channel, log transform output, find local maxima, then return
81
- locally sensitive hashes.
82
- """
83
- # FFT the signal and extract frequency components
84
- arr2D = mlab.specgram(
85
- channel_samples,
86
- NFFT=wsize,
87
- Fs=Fs,
88
- window=mlab.window_hanning,
89
- noverlap=int(wsize * wratio)
90
- )[0]
91
-
92
- # apply log transform since specgram() returns linear array
93
- arr2D = 10 * np.log10(arr2D)
94
- arr2D[arr2D == -np.inf] = 0 # replace infs with zeros
95
-
96
- # find local maxima
97
- local_maxima = get_2D_peaks(arr2D, plot=False, amp_min=amp_min)
98
-
99
- # return hashes
100
- return generate_hashes(local_maxima, fan_value=fan_value)
101
-
102
-
103
- def get_2D_peaks(arr2D, plot=False, amp_min=DEFAULT_AMP_MIN):
104
- # http://docs.scipy.org/doc/scipy/reference/generated/scipy.ndimage.morphology.iterate_structure.html#scipy.ndimage.morphology.iterate_structure
105
- struct = generate_binary_structure(2, 1)
106
- neighborhood = iterate_structure(struct, PEAK_NEIGHBORHOOD_SIZE)
107
-
108
- # find local maxima using our fliter shape
109
- local_max = maximum_filter(arr2D, footprint=neighborhood) == arr2D
110
- background = (arr2D == 0)
111
- eroded_background = binary_erosion(
112
- background, structure=neighborhood, border_value=1
113
- )
114
-
115
- # Boolean mask of arr2D with True at peaks
116
- detected_peaks = local_max ^ eroded_background
117
-
118
- # extract peaks
119
- amps = arr2D[detected_peaks]
120
- j, i = np.where(detected_peaks)
121
-
122
- # filter peaks
123
- amps = amps.flatten()
124
- peaks = list(zip(i, j, amps))
125
- peaks_filtered = [x for x in peaks if x[2] > amp_min] # freq, time, amp
126
-
127
- # get indices for frequency and time
128
- frequency_idx = [x[1] for x in peaks_filtered]
129
- time_idx = [x[0] for x in peaks_filtered]
130
-
131
- if plot:
132
- # scatter of the peaks
133
- fig, ax = plt.subplots()
134
- ax.imshow(arr2D)
135
- ax.scatter(time_idx, frequency_idx)
136
- ax.set_xlabel('Time')
137
- ax.set_ylabel('Frequency')
138
- ax.set_title("Spectrogram")
139
- plt.gca().invert_yaxis()
140
- plt.show()
141
-
142
- return list(zip(frequency_idx, time_idx))
143
-
144
-
145
- def generate_hashes(peaks, fan_value=DEFAULT_FAN_VALUE):
146
- """
147
- Hash list structure:
148
- sha1_hash[0:20] time_offset
149
- [(e05b341a9b77a51fd26, 32), ... ]
150
- """
151
- if PEAK_SORT:
152
- peaks.sort(key=itemgetter(1))
153
-
154
- for i in range(len(peaks)):
155
- for j in range(1, fan_value):
156
- if (i + j) < len(peaks):
157
-
158
- freq1 = peaks[i][IDX_FREQ_I]
159
- freq2 = peaks[i + j][IDX_FREQ_I]
160
- t1 = peaks[i][IDX_TIME_J]
161
- t2 = peaks[i + j][IDX_TIME_J]
162
- t_delta = t2 - t1
163
-
164
- if t_delta >= MIN_HASH_TIME_DELTA and t_delta <= MAX_HASH_TIME_DELTA:
165
- key = u"{}|{}|{}".format(freq1, freq2, t_delta)
166
- h = hashlib.sha1(key.encode('utf-8'))
167
- yield (h.hexdigest()[0:FINGERPRINT_REDUCTION], t1)
 
1
+ from __future__ import absolute_import
2
+ from __future__ import unicode_literals
3
+ import os
4
+ import numpy as np
5
+ import matplotlib.mlab as mlab
6
+ import matplotlib.pyplot as plt
7
+ from scipy.ndimage.filters import maximum_filter
8
+ from scipy.ndimage.morphology import (
9
+ generate_binary_structure, iterate_structure, binary_erosion
10
+ )
11
+ import hashlib
12
+ from operator import itemgetter
13
+ from six.moves import range
14
+ from six.moves import zip
15
+
16
+ IDX_FREQ_I = 0
17
+ IDX_TIME_J = 1
18
+
19
+ ######################################################################
20
+ # Sampling rate, related to the Nyquist conditions, which affects
21
+ # the range frequencies we can detect.
22
+ DEFAULT_FS = os.getenv('DEFAULT_FS', 44100)
23
+
24
+ ######################################################################
25
+ # Size of the FFT window, affects frequency granularity
26
+ DEFAULT_WINDOW_SIZE = os.getenv('DEFAULT_WINDOW_SIZE', 4096)
27
+
28
+ ######################################################################
29
+ # Ratio by which each sequential window overlaps the last and the
30
+ # next window. Higher overlap will allow a higher granularity of offset
31
+ # matching, but potentially more fingerprints.
32
+ DEFAULT_OVERLAP_RATIO = os.getenv('DEFAULT_OVERLAP_RATIO', 0.5)
33
+
34
+ ######################################################################
35
+ # Degree to which a fingerprint can be paired with its neighbors --
36
+ # higher will cause more fingerprints, but potentially better accuracy.
37
+ DEFAULT_FAN_VALUE = os.getenv('DEFAULT_FAN_VALUE', 20) # Increased from 15 for better accuracy
38
+
39
+ ######################################################################
40
+ # Minimum amplitude in spectrogram in order to be considered a peak.
41
+ # This can be raised to reduce number of fingerprints, but can negatively
42
+ # affect accuracy.
43
+ DEFAULT_AMP_MIN = os.getenv('DEFAULT_AMP_MIN', 8) # Lowered from 10 to detect more peaks
44
+
45
+ ######################################################################
46
+ # Number of cells around an amplitude peak in the spectrogram in order
47
+ # for Dejavu to consider it a spectral peak. Higher values mean less
48
+ # fingerprints and faster matching, but can potentially affect accuracy.
49
+ PEAK_NEIGHBORHOOD_SIZE = os.getenv('PEAK_NEIGHBORHOOD_SIZE', 15) # Reduced from 20 for more peaks
50
+
51
+ ######################################################################
52
+ # Thresholds on how close or far fingerprints can be in time in order
53
+ # to be paired as a fingerprint. If your max is too low, higher values of
54
+ # DEFAULT_FAN_VALUE may not perform as expected.
55
+ MIN_HASH_TIME_DELTA = os.getenv('MIN_HASH_TIME_DELTA', 0)
56
+ MAX_HASH_TIME_DELTA = os.getenv('MAX_HASH_TIME_DELTA', 200)
57
+
58
+ ######################################################################
59
+ # If True, will sort peaks temporally for fingerprinting;
60
+ # not sorting will cut down number of fingerprints, but potentially
61
+ # affect performance.
62
+ PEAK_SORT = True
63
+
64
+ ######################################################################
65
+ # Number of bits to throw away from the front of the SHA1 hash in the
66
+ # fingerprint calculation. The more you throw away, the less storage, but
67
+ # potentially higher collisions and misclassifications when identifying songs.
68
+ FINGERPRINT_REDUCTION = os.getenv('FINGERPRINT_REDUCTION', 20)
69
+
70
+
71
+ def fingerprint(
72
+ channel_samples,
73
+ Fs=DEFAULT_FS,
74
+ wsize=DEFAULT_WINDOW_SIZE,
75
+ wratio=DEFAULT_OVERLAP_RATIO,
76
+ fan_value=DEFAULT_FAN_VALUE,
77
+ amp_min=DEFAULT_AMP_MIN
78
+ ):
79
+ """
80
+ FFT the channel, log transform output, find local maxima, then return
81
+ locally sensitive hashes.
82
+ """
83
+ # FFT the signal and extract frequency components
84
+ arr2D = mlab.specgram(
85
+ channel_samples,
86
+ NFFT=wsize,
87
+ Fs=Fs,
88
+ window=mlab.window_hanning,
89
+ noverlap=int(wsize * wratio)
90
+ )[0]
91
+
92
+ # apply log transform since specgram() returns linear array
93
+ arr2D = 10 * np.log10(arr2D)
94
+ arr2D[arr2D == -np.inf] = 0 # replace infs with zeros
95
+
96
+ # find local maxima
97
+ local_maxima = get_2D_peaks(arr2D, plot=False, amp_min=amp_min)
98
+
99
+ # return hashes
100
+ return generate_hashes(local_maxima, fan_value=fan_value)
101
+
102
+
103
+ def get_2D_peaks(arr2D, plot=False, amp_min=DEFAULT_AMP_MIN):
104
+ # http://docs.scipy.org/doc/scipy/reference/generated/scipy.ndimage.morphology.iterate_structure.html#scipy.ndimage.morphology.iterate_structure
105
+ struct = generate_binary_structure(2, 1)
106
+ neighborhood = iterate_structure(struct, PEAK_NEIGHBORHOOD_SIZE)
107
+
108
+ # find local maxima using our fliter shape
109
+ local_max = maximum_filter(arr2D, footprint=neighborhood) == arr2D
110
+ background = (arr2D == 0)
111
+ eroded_background = binary_erosion(
112
+ background, structure=neighborhood, border_value=1
113
+ )
114
+
115
+ # Boolean mask of arr2D with True at peaks
116
+ detected_peaks = local_max ^ eroded_background
117
+
118
+ # extract peaks
119
+ amps = arr2D[detected_peaks]
120
+ j, i = np.where(detected_peaks)
121
+
122
+ # filter peaks
123
+ amps = amps.flatten()
124
+ peaks = list(zip(i, j, amps))
125
+ peaks_filtered = [x for x in peaks if x[2] > amp_min] # freq, time, amp
126
+
127
+ # get indices for frequency and time
128
+ frequency_idx = [x[1] for x in peaks_filtered]
129
+ time_idx = [x[0] for x in peaks_filtered]
130
+
131
+ if plot:
132
+ # scatter of the peaks
133
+ fig, ax = plt.subplots()
134
+ ax.imshow(arr2D)
135
+ ax.scatter(time_idx, frequency_idx)
136
+ ax.set_xlabel('Time')
137
+ ax.set_ylabel('Frequency')
138
+ ax.set_title("Spectrogram")
139
+ plt.gca().invert_yaxis()
140
+ plt.show()
141
+
142
+ return list(zip(frequency_idx, time_idx))
143
+
144
+
145
+ def generate_hashes(peaks, fan_value=DEFAULT_FAN_VALUE):
146
+ """
147
+ Hash list structure:
148
+ sha1_hash[0:20] time_offset
149
+ [(e05b341a9b77a51fd26, 32), ... ]
150
+ """
151
+ if PEAK_SORT:
152
+ peaks.sort(key=itemgetter(1))
153
+
154
+ for i in range(len(peaks)):
155
+ for j in range(1, fan_value):
156
+ if (i + j) < len(peaks):
157
+
158
+ freq1 = peaks[i][IDX_FREQ_I]
159
+ freq2 = peaks[i + j][IDX_FREQ_I]
160
+ t1 = peaks[i][IDX_TIME_J]
161
+ t2 = peaks[i + j][IDX_TIME_J]
162
+ t_delta = t2 - t1
163
+
164
+ if t_delta >= MIN_HASH_TIME_DELTA and t_delta <= MAX_HASH_TIME_DELTA:
165
+ key = u"{}|{}|{}".format(freq1, freq2, t_delta)
166
+ h = hashlib.sha1(key.encode('utf-8'))
167
+ yield (h.hexdigest()[0:FINGERPRINT_REDUCTION], t1)