Spaces:

Sonogram
/

Instructor-Support-Tool

Running on CPU Upgrade

App Files Files Community

czyoung commited on Mar 19, 2025

Commit

ddefe81

verified ·

1 Parent(s): 85b7ab1

Update sonogram_utility.py

Browse files

Files changed (1) hide show

sonogram_utility.py +72 -1

sonogram_utility.py CHANGED Viewed

@@ -3,6 +3,8 @@ import random
 import copy
 from pyannote.core import Annotation, Segment
 import numpy as np
 def colors(n):
   '''
@@ -94,4 +96,73 @@ def loadAudioRTTM(sampleRTTM):
             speakerList[index].append((float(speakerResult[3]),float(speakerResult[4])))
             prediction[Segment(start,end)] = index
-    return speakerList, prediction

 import copy
 from pyannote.core import Annotation, Segment
 import numpy as np
+import torch
+import torchaudio
 def colors(n):
   '''
             speakerList[index].append((float(speakerResult[3]),float(speakerResult[4])))
             prediction[Segment(start,end)] = index
+    return speakerList, prediction
+def splitIntoTimeSegments(testFile,maxDurationInSeconds=60)
+    waveform, sample_rate = torchaudio.load(testFile)
+    audioSegments = []
+    outOfBoundsIndex = waveform.shape[-1]
+    currentStart = 0
+    currentEnd = min(maxDurationInSeconds * sample_rate,outOfBoundsIndex)
+    done = False
+    while(not done):
+        waveformSegment = waveform[:,currentStart:currentEnd]
+        audioSegments.append(waveformSegment)
+        if currentEnd >= outOfBoundsIndex:
+            done = True
+            break
+        else:
+            currentStart = currentEnd
+            currentEnd = min(currentStart + maxDurationInSeconds * sample_rate,outOfBoundsIndex)
+    return audioSegments, sample_rate
+def audioNormalize(waveform,sampleRate,stepSizeInSeconds = 2,dbThreshold = -50,dbTarget = -5):
+    copyWaveform = waveform.clone().detach()
+    copyWaveform_db = waveform.clone().detach()
+    transform = torchaudio.transforms.AmplitudeToDB(stype="amplitude", top_db=80)
+    copyWaveform_db = transform(copyWaveform_db)
+    currStart = 0
+    currEnd = int(min(currStart + stepSizeInSeconds * sampleRate, len(copyWaveform_db[0])-1))
+    done = False
+    while(not done):
+        if torch.max(copyWaveform_db[0][currStart:currEnd]).item() > dbThreshold:
+            gain = torch.min(dbTarget - copyWaveform_db[0][currStart:currEnd])
+            adjustGain = torchaudio.transforms.Vol(gain,'db')
+            copyWaveform[0][currStart:currEnd] = adjustGain(copyWaveform[0][currStart:currEnd])
+        if len(copyWaveform_db) > 1:
+            if torch.max(copyWaveform_db[1][currStart:currEnd]).item() > dbThreshold:
+                gain = torch.min(dbTarget - copyWaveform_db[1][currStart:currEnd])
+                adjustGain = torchaudio.transforms.Vol(gain,'db')
+                copyWaveform[1][currStart:currEnd] = adjustGain(copyWaveform[1][currStart:currEnd])
+        currStart += int(stepSizeInSeconds * sampleRate)
+        if currStart > currEnd:
+            done = True
+        else:
+            currEnd = int(min(currStart + stepSizeInSeconds * sampleRate, len(copyWaveform_db[0])-1))
+    return copyWaveform
+class equalizeVolume(torch.nn.Module):
+    def forward(self, waveform,sampleRate,stepSizeInSeconds,dbThreshold,dbTarget):
+        waveformDifference = audioNormalize(waveform,sampleRate,stepSizeInSeconds,dbThreshold,dbTarget)
+        return waveformDifference
+def combineWaveforms(waveformList):
+    return torch.cat(waveformList,1)
+def annotationToSpeakerList(myAnnotation):
+    tempSpeakerList = []
+    tempSpeakerNames = []
+    for speakerName in myAnnotation.labels():
+        speakerIndex = None
+        if speakerName not in tempSpeakerNames:
+            speakerIndex = len(tempSpeakerNames)
+            tempSpeakerNames.append(speakerName)
+            tempSpeakerList.append([])
+        else:
+            speakerIndex = tempSpeakerNames.index(speakerName)
+        for segmentItem in myAnnotation.label_support(speakerName):
+            tempSpeakerList[speakerIndex].append((segmentItem.start,segmentItem.duration))
+    return tempSpeakerList