vyluong commited on
Commit
f2fe172
·
verified ·
1 Parent(s): f14c6b9

Update app/services/processor.py

Browse files
Files changed (1) hide show
  1. app/services/processor.py +3 -6
app/services/processor.py CHANGED
@@ -8,7 +8,6 @@ from collections import defaultdict, Counter
8
 
9
  import numpy as np
10
  import librosa
11
- import torch
12
 
13
  from app.core.config import get_settings
14
  from app.services.transcription import TranscriptionService
@@ -173,12 +172,11 @@ def merge_consecutive_segments(
173
 
174
  gap = seg.start - prev.end
175
 
176
- overlap = prev.end - seg.start
177
 
178
  if (
179
  seg.speaker == prev.speaker
 
180
  and gap <= max_gap
181
- and overlap <= max_overlap
182
  ):
183
 
184
  prev.end = max(
@@ -225,7 +223,6 @@ class Processor:
225
 
226
  # 2: Load audio
227
  y, sr = librosa.load(wav_path, sr=16000, mono=True)
228
- waveform = torch.from_numpy(y).unsqueeze(0)
229
  if y.size == 0:
230
  raise ValueError("Empty audio")
231
  duration = len(y) / sr
@@ -248,8 +245,8 @@ class Processor:
248
 
249
  diarization_segments = [
250
  SpeakerSegment(
251
- start=new_start,
252
- end=new_end,
253
  speaker=s.speaker,
254
  confidence=getattr(s, "confidence", 1.0),
255
  )
 
8
 
9
  import numpy as np
10
  import librosa
 
11
 
12
  from app.core.config import get_settings
13
  from app.services.transcription import TranscriptionService
 
172
 
173
  gap = seg.start - prev.end
174
 
 
175
 
176
  if (
177
  seg.speaker == prev.speaker
178
+ and gap >= -max_overlap
179
  and gap <= max_gap
 
180
  ):
181
 
182
  prev.end = max(
 
223
 
224
  # 2: Load audio
225
  y, sr = librosa.load(wav_path, sr=16000, mono=True)
 
226
  if y.size == 0:
227
  raise ValueError("Empty audio")
228
  duration = len(y) / sr
 
245
 
246
  diarization_segments = [
247
  SpeakerSegment(
248
+ start=s.start,
249
+ end=s.end,
250
  speaker=s.speaker,
251
  confidence=getattr(s, "confidence", 1.0),
252
  )