Capstone04 commited on
Commit
3e1f8e9
·
verified ·
1 Parent(s): e7d2bdc

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. asr_diarization/pipeline.py +9 -11
asr_diarization/pipeline.py CHANGED
@@ -44,7 +44,7 @@ class ASR_Diarization:
44
  def run_diarization(self, audio_path):
45
  diarization = self.diar_pipeline(audio_path)
46
  return [
47
- {"segment_start": t.start, "segment_end": t.end, "speaker": spk}
48
  for t, _, spk in diarization.itertracks(yield_label=True)
49
  ]
50
 
@@ -54,8 +54,8 @@ class ASR_Diarization:
54
  speaker_segments = {}
55
 
56
  for seg in diar_json:
57
- segment_start, segment_end, spk = seg["segment_start"], seg["segment_end"], seg["speaker"]
58
- start_sample, end_sample = int(segment_start * sr), int(segment_end * sr)
59
  chunk = audio[0, start_sample:end_sample].numpy()
60
 
61
  reduced = nr.reduce_noise(y=chunk, sr=sr)
@@ -66,21 +66,19 @@ class ASR_Diarization:
66
  for word_info in result["chunks"]:
67
  start_ts, end_ts = word_info.get("timestamp", (None, None)) or (None, None)
68
  tokens.append({
69
- "tag": "w",
70
  "start": start_ts,
71
  "end": end_ts,
72
- "text": word_info["text"]
 
73
  })
74
 
75
  seg_dict = {
76
  "speaker": spk,
77
- "segment_start": segment_start,
78
- "segment_end": segment_end,
79
  "tokens": tokens
80
  }
81
  merged_segments.append(seg_dict)
82
- print("Sample merged segment:", merged_segments[0])
83
-
84
 
85
  if spk not in speaker_segments:
86
  speaker_segments[spk] = []
@@ -101,8 +99,8 @@ class ASR_Diarization:
101
  with open(rttm_path, "w") as f:
102
  for seg in diar_json:
103
  f.write(
104
- f"SPEAKER {base_name} 1 {seg['segment_start']:.6f} "
105
- f"{seg['segment_end']-seg['segment_start']:.6f} <NA> <NA> "
106
  f"{seg['speaker']} <NA>\n"
107
  )
108
 
 
44
  def run_diarization(self, audio_path):
45
  diarization = self.diar_pipeline(audio_path)
46
  return [
47
+ {"start": t.start, "end": t.end, "speaker": spk}
48
  for t, _, spk in diarization.itertracks(yield_label=True)
49
  ]
50
 
 
54
  speaker_segments = {}
55
 
56
  for seg in diar_json:
57
+ start, end, spk = seg["start"], seg["end"], seg["speaker"]
58
+ start_sample, end_sample = int(start * sr), int(end * sr)
59
  chunk = audio[0, start_sample:end_sample].numpy()
60
 
61
  reduced = nr.reduce_noise(y=chunk, sr=sr)
 
66
  for word_info in result["chunks"]:
67
  start_ts, end_ts = word_info.get("timestamp", (None, None)) or (None, None)
68
  tokens.append({
 
69
  "start": start_ts,
70
  "end": end_ts,
71
+ "text": word_info["text"],
72
+ "tag": "w"
73
  })
74
 
75
  seg_dict = {
76
  "speaker": spk,
77
+ "start": start,
78
+ "end": end,
79
  "tokens": tokens
80
  }
81
  merged_segments.append(seg_dict)
 
 
82
 
83
  if spk not in speaker_segments:
84
  speaker_segments[spk] = []
 
99
  with open(rttm_path, "w") as f:
100
  for seg in diar_json:
101
  f.write(
102
+ f"SPEAKER {base_name} 1 {seg['start']:.6f} "
103
+ f"{seg['end']-seg['start']:.6f} <NA> <NA> "
104
  f"{seg['speaker']} <NA>\n"
105
  )
106