Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
Added comments
Browse files- sonogram.py +28 -1
sonogram.py
CHANGED
|
@@ -6,6 +6,12 @@ import torch
|
|
| 6 |
class Sonogram():
|
| 7 |
|
| 8 |
def __init__(self,enableDenoise=False):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
#TODO: Should these be adjustable via initialization, or constants?
|
| 10 |
self.secondDifference = 5
|
| 11 |
self.gainWindow = 4
|
|
@@ -38,7 +44,27 @@ class Sonogram():
|
|
| 38 |
self.groupClassifier = pickle.load(f)
|
| 39 |
|
| 40 |
def processFile(self,filePath):
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
waveformList, sampleRate = su.splitIntoTimeSegments(filePath,600)
|
| 43 |
print("File loaded")
|
| 44 |
waveformEnhanced = su.combineWaveforms(waveformList)
|
|
@@ -74,5 +100,6 @@ class Sonogram():
|
|
| 74 |
else:
|
| 75 |
# May not be necessary, consider using to reformat default names away from SPEAKER_XX
|
| 76 |
labelMapping[speaker] = speaker
|
|
|
|
| 77 |
annotation.rename_labels(labelMapping)
|
| 78 |
return annotation, totalTimeInSeconds, waveformGainAdjusted, sampleRate
|
|
|
|
| 6 |
class Sonogram():
|
| 7 |
|
| 8 |
def __init__(self,enableDenoise=False):
|
| 9 |
+
'''
|
| 10 |
+
Initialize Sonogram Class
|
| 11 |
+
|
| 12 |
+
enableDenoise : False|True
|
| 13 |
+
Legacy code to support denoise, which has currently been removed. Consider removing if denoise will not be reimplemented in the future.
|
| 14 |
+
'''
|
| 15 |
#TODO: Should these be adjustable via initialization, or constants?
|
| 16 |
self.secondDifference = 5
|
| 17 |
self.gainWindow = 4
|
|
|
|
| 44 |
self.groupClassifier = pickle.load(f)
|
| 45 |
|
| 46 |
def processFile(self,filePath):
|
| 47 |
+
'''
|
| 48 |
+
Processes audio file to generate diarization output
|
| 49 |
+
|
| 50 |
+
filePath : string
|
| 51 |
+
Path to the audio file
|
| 52 |
+
|
| 53 |
+
Returns
|
| 54 |
+
--------
|
| 55 |
+
diarizationOutput : DiarizeOutput
|
| 56 |
+
found here https://github.com/pyannote/pyannote-audio/blob/main/src/pyannote/audio/pipelines/speaker_diarization.py#L64
|
| 57 |
+
|
| 58 |
+
totalTimeInSeconds : int
|
| 59 |
+
Approximate total seconds of audio file
|
| 60 |
+
|
| 61 |
+
waveformGainAdjusted : np.array
|
| 62 |
+
The waveform of the audio file after equalization
|
| 63 |
+
|
| 64 |
+
sampleRate : int
|
| 65 |
+
The sample rate of the audio file
|
| 66 |
+
'''
|
| 67 |
+
print(f"Loading file : {filePath}")
|
| 68 |
waveformList, sampleRate = su.splitIntoTimeSegments(filePath,600)
|
| 69 |
print("File loaded")
|
| 70 |
waveformEnhanced = su.combineWaveforms(waveformList)
|
|
|
|
| 100 |
else:
|
| 101 |
# May not be necessary, consider using to reformat default names away from SPEAKER_XX
|
| 102 |
labelMapping[speaker] = speaker
|
| 103 |
+
# Rename in place
|
| 104 |
annotation.rename_labels(labelMapping)
|
| 105 |
return annotation, totalTimeInSeconds, waveformGainAdjusted, sampleRate
|