Spaces:
Running
Running
Update app/services/processor.py
Browse files
app/services/processor.py
CHANGED
|
@@ -8,7 +8,6 @@ from collections import defaultdict, Counter
|
|
| 8 |
|
| 9 |
import numpy as np
|
| 10 |
import librosa
|
| 11 |
-
import torch
|
| 12 |
|
| 13 |
from app.core.config import get_settings
|
| 14 |
from app.services.transcription import TranscriptionService
|
|
@@ -173,12 +172,11 @@ def merge_consecutive_segments(
|
|
| 173 |
|
| 174 |
gap = seg.start - prev.end
|
| 175 |
|
| 176 |
-
overlap = prev.end - seg.start
|
| 177 |
|
| 178 |
if (
|
| 179 |
seg.speaker == prev.speaker
|
|
|
|
| 180 |
and gap <= max_gap
|
| 181 |
-
and overlap <= max_overlap
|
| 182 |
):
|
| 183 |
|
| 184 |
prev.end = max(
|
|
@@ -225,7 +223,6 @@ class Processor:
|
|
| 225 |
|
| 226 |
# 2: Load audio
|
| 227 |
y, sr = librosa.load(wav_path, sr=16000, mono=True)
|
| 228 |
-
waveform = torch.from_numpy(y).unsqueeze(0)
|
| 229 |
if y.size == 0:
|
| 230 |
raise ValueError("Empty audio")
|
| 231 |
duration = len(y) / sr
|
|
@@ -248,8 +245,8 @@ class Processor:
|
|
| 248 |
|
| 249 |
diarization_segments = [
|
| 250 |
SpeakerSegment(
|
| 251 |
-
start=
|
| 252 |
-
end=
|
| 253 |
speaker=s.speaker,
|
| 254 |
confidence=getattr(s, "confidence", 1.0),
|
| 255 |
)
|
|
|
|
| 8 |
|
| 9 |
import numpy as np
|
| 10 |
import librosa
|
|
|
|
| 11 |
|
| 12 |
from app.core.config import get_settings
|
| 13 |
from app.services.transcription import TranscriptionService
|
|
|
|
| 172 |
|
| 173 |
gap = seg.start - prev.end
|
| 174 |
|
|
|
|
| 175 |
|
| 176 |
if (
|
| 177 |
seg.speaker == prev.speaker
|
| 178 |
+
and gap >= -max_overlap
|
| 179 |
and gap <= max_gap
|
|
|
|
| 180 |
):
|
| 181 |
|
| 182 |
prev.end = max(
|
|
|
|
| 223 |
|
| 224 |
# 2: Load audio
|
| 225 |
y, sr = librosa.load(wav_path, sr=16000, mono=True)
|
|
|
|
| 226 |
if y.size == 0:
|
| 227 |
raise ValueError("Empty audio")
|
| 228 |
duration = len(y) / sr
|
|
|
|
| 245 |
|
| 246 |
diarization_segments = [
|
| 247 |
SpeakerSegment(
|
| 248 |
+
start=s.start,
|
| 249 |
+
end=s.end,
|
| 250 |
speaker=s.speaker,
|
| 251 |
confidence=getattr(s, "confidence", 1.0),
|
| 252 |
)
|