Capstone04 commited on
Commit
17a7b78
·
verified ·
1 Parent(s): e21ef93

Update asr_diarization/pipeline.py

Browse files
Files changed (1) hide show
  1. asr_diarization/pipeline.py +0 -28
asr_diarization/pipeline.py CHANGED
@@ -185,13 +185,6 @@ class ASR_Diarization:
185
  for t, _, spk in diarization.itertracks(yield_label=True)
186
  ]
187
 
188
- print(f"DEBUG DIARIZATION:")
189
- print(f" Raw diarization segments: {len(diar_segments)}")
190
-
191
- # Count unique speakers BEFORE any processing
192
- raw_speakers = list(set([seg['speaker'] for seg in diar_segments]))
193
- print(f" Raw unique speakers: {len(raw_speakers)} - {raw_speakers}")
194
-
195
  # Step 2: Calculate SNR for adaptive processing
196
  snr = self.calculate_snr(audio_path)
197
 
@@ -227,24 +220,6 @@ class ASR_Diarization:
227
  print(f"Final: {len(filtered_segments)} segments for Whisper")
228
  return filtered_segments
229
 
230
- def map_speaker_labels(self, segments, original_speakers=['A', 'B', 'C', 'D']):
231
- """Map SPEAKER_XX labels to A, B, C, D format to match original"""
232
- unique_speakers = list(set([seg['speaker'] for seg in segments]))
233
- speaker_map = {}
234
-
235
- # Create mapping from SPEAKER_00 -> A, SPEAKER_01 -> B, etc.
236
- for i, spk in enumerate(sorted(unique_speakers)):
237
- if i < len(original_speakers):
238
- speaker_map[spk] = original_speakers[i]
239
- else:
240
- speaker_map[spk] = f"SPK_{i}"
241
-
242
- # Apply mapping to all segments
243
- for seg in segments:
244
- seg['speaker'] = speaker_map[seg['speaker']]
245
-
246
- return segments, list(speaker_map.values())
247
-
248
  def merge_consecutive_speaker_segments(self, segments):
249
  """Merge only consecutive segments from the same speaker while preserving order"""
250
  if not segments:
@@ -384,9 +359,6 @@ class ASR_Diarization:
384
  # Merge consecutive segments by same speaker
385
  merged_segments = self.merge_consecutive_speaker_segments(merged_segments)
386
 
387
- # Map speaker labels to match original format (A, B, C, D)
388
- merged_segments, speakers = self.map_speaker_labels(merged_segments)
389
-
390
  # Combine ASR segments with NSE events if provided
391
  if nse_events:
392
  print(f"Combining {len(merged_segments)} ASR segments with {len(nse_events)} NSE events")
 
185
  for t, _, spk in diarization.itertracks(yield_label=True)
186
  ]
187
 
 
 
 
 
 
 
 
188
  # Step 2: Calculate SNR for adaptive processing
189
  snr = self.calculate_snr(audio_path)
190
 
 
220
  print(f"Final: {len(filtered_segments)} segments for Whisper")
221
  return filtered_segments
222
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
  def merge_consecutive_speaker_segments(self, segments):
224
  """Merge only consecutive segments from the same speaker while preserving order"""
225
  if not segments:
 
359
  # Merge consecutive segments by same speaker
360
  merged_segments = self.merge_consecutive_speaker_segments(merged_segments)
361
 
 
 
 
362
  # Combine ASR segments with NSE events if provided
363
  if nse_events:
364
  print(f"Combining {len(merged_segments)} ASR segments with {len(nse_events)} NSE events")