Spaces:

dwarkesh
/

whisper-speaker-recognition

Build error

dwarkesh commited on Dec 10, 2022

Commit

4731011

1 Parent(s): c97242d

fix first speaker bug

Files changed (1) hide show

app.py CHANGED Viewed

@@ -21,7 +21,8 @@ import numpy as np
 model = whisper.load_model("large-v2")
 embedding_model = PretrainedSpeakerEmbedding(
     "speechbrain/spkrec-ecapa-voxceleb",
-    device=torch.device("cuda"))
 def transcribe(audio, num_speakers):
   path = convert_to_wav(audio)
@@ -84,7 +85,7 @@ def get_output(segments):
   for (i, segment) in enumerate(segments):
     if i > 0:
       output += '\n\n'
-    if segments[i - 1]["speaker"] != segment["speaker"]:
       output += segment["speaker"] + ' ' + str(time(segment["start"])) + '\n\n'
     output += segment["text"][1:] + ' '
   return output

 model = whisper.load_model("large-v2")
 embedding_model = PretrainedSpeakerEmbedding(
     "speechbrain/spkrec-ecapa-voxceleb",
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+)
 def transcribe(audio, num_speakers):
   path = convert_to_wav(audio)
   for (i, segment) in enumerate(segments):
     if i > 0:
       output += '\n\n'
+    if i == 0 or segments[i - 1]["speaker"] != segment["speaker"]:
       output += segment["speaker"] + ' ' + str(time(segment["start"])) + '\n\n'
     output += segment["text"][1:] + ' '
   return output