liuyang commited on
Commit
0b6cc7c
·
1 Parent(s): 6475331

try use diarization as clip_timestamp

Browse files
Files changed (1) hide show
  1. app.py +12 -1
app.py CHANGED
@@ -916,9 +916,20 @@ class WhisperTranscriber:
916
  wav_path, num_speakers, base_offset_s=base_offset_s
917
  )
918
 
 
 
 
 
 
 
 
 
 
 
 
919
  # Step 2: Transcribe full audio once
920
  transcription_results, detected_language = self.transcribe_full_audio(
921
- wav_path, language, translate, prompt, batch_size, base_offset_s=base_offset_s, clip_timestamps=diarization_segments
922
  )
923
 
924
 
 
916
  wav_path, num_speakers, base_offset_s=base_offset_s
917
  )
918
 
919
+ # Convert diarization_segments to clip_timestamps format
920
+ # Format: "start,end,start,end,..." with timestamps relative to the file (subtract base_offset_s)
921
+ clip_timestamps_list = []
922
+ for seg in diarization_segments:
923
+ # Convert global timestamps back to local file timestamps
924
+ local_start = max(0.0, float(seg["start"]) - base_offset_s)
925
+ local_end = max(local_start, float(seg["end"]) - base_offset_s)
926
+ clip_timestamps_list.extend([str(local_start), str(local_end)])
927
+
928
+ clip_timestamps = ",".join(clip_timestamps_list) if clip_timestamps_list else None
929
+
930
  # Step 2: Transcribe full audio once
931
  transcription_results, detected_language = self.transcribe_full_audio(
932
+ wav_path, language, translate, prompt, batch_size, base_offset_s=base_offset_s, clip_timestamps=clip_timestamps
933
  )
934
 
935