import json import random def seconds_to_mmss(seconds): minutes = int(seconds // 60) seconds = int(seconds % 60) return f"{minutes:02d}:{seconds:02d}" # Templates for speaker segment descriptions SPEAKER_TEMPLATES = [ "Speaker {speaker} speaks during the following periods: {times}", "Speaker {speaker}'s speaking segments occur at: {times}", "Speaker {speaker} is active in the conversation at: {times}", "The following time segments belong to Speaker {speaker}: {times}", "Speaker {speaker} participates in the dialogue at: {times}", "Speaker {speaker} contributes to the conversation during: {times}", "Speaking turns for Speaker {speaker} are at: {times}", "Speaker {speaker} takes the floor at: {times}", "The voice of Speaker {speaker} is heard at: {times}", "Speaker {speaker} engages in the discussion during: {times}" ] file = "silence" def process_speaker_segments(): # Read the overlap_5s_716.json file with open(f'{file}.json', 'r', encoding='utf-8') as f: data = json.load(f) # List to store results for all conversations results = [] # Process each conversation for conversation_id, conversation in data.items(): segments = conversation.get('segments', []) audio_path = conversation.get('stereo_audio', []) # Dictionary to store speaking times for each speaker speaker_times = {} # Process each segment for segment in segments: speaker = segment['speaker'] start_time = segment['start_time'] # Keep as float for accurate conversion end_time = segment['end_time'] # Keep as float for accurate conversion # Initialize list for this speaker if not exists if speaker not in speaker_times: speaker_times[speaker] = [] # Add this speaking interval speaker_times[speaker].append((start_time, end_time)) # Format the output string output_lines = [] for speaker in sorted(speaker_times.keys()): times = speaker_times[speaker] time_ranges = [f"{seconds_to_mmss(start)}-{seconds_to_mmss(end)}" for start, end in times] # Randomly select a template for each speaker template = random.choice(SPEAKER_TEMPLATES) output_lines.append(template.format(speaker=speaker, times=', '.join(time_ranges))) # Create result entry result = { "key": conversation_id, "audio_url": audio_path, "model_output": "\n".join(output_lines) } results.append(result) # Save the results to a JSON file output_file = f'{file}_speaker.json' with open(output_file, 'w', encoding='utf-8') as f: json.dump(results, f, indent=2, ensure_ascii=False) if __name__ == "__main__": process_speaker_segments()