import json import os import random def seconds_to_mmss(seconds): minutes = int(seconds // 60) seconds = int(seconds % 60) return f"{minutes:02d}:{seconds:02d}" # Templates for silence gap descriptions SILENCE_TEMPLATES = [ "Silence gaps longer than 3 seconds occur at: {gaps}", "The conversation contains significant pauses at: {gaps}", "There are silent periods of more than 3 seconds at: {gaps}", "The dialogue features extended pauses at: {gaps}", "Silent intervals exceeding 3 seconds are found at: {gaps}", "The conversation includes notable gaps at: {gaps}", "Extended periods of silence occur at: {gaps}", "The dialogue has significant breaks at: {gaps}", "Silent segments longer than 3 seconds appear at: {gaps}", "The conversation shows substantial pauses at: {gaps}" ] # Templates for no silence case NO_SILENCE_TEMPLATES = [ "No silence gaps longer than 3 seconds were found in this conversation.", "The conversation flows continuously without significant pauses.", "No extended periods of silence were detected in this dialogue.", "The conversation maintains a steady pace without notable gaps.", "No silent intervals exceeding 3 seconds were identified.", "The dialogue proceeds without substantial pauses.", "No significant breaks in conversation were observed.", "The conversation shows no extended silent periods.", "No notable gaps in speech were detected.", "The dialogue continues without significant silent intervals." ] file = "silence" def process_silence_gaps(): # Read the overlap_5s_716.json file with open(f'{file}.json', 'r', encoding='utf-8') as f: silence_data = json.load(f) # List to store results for all conversations results = [] # Process each conversation for conversation_id, conversation in silence_data.items(): segments = conversation.get('segments', []) audio_path = conversation.get('stereo_audio', []) silence_gaps = [] # Find silence gaps > 3s between segments for i in range(len(segments) - 1): current_end = segments[i]['end_time'] next_start = segments[i + 1]['start_time'] gap_duration = next_start - current_end if gap_duration > 3: silence_gaps.append(f"{seconds_to_mmss(current_end)}-{seconds_to_mmss(next_start)}") # Create result entry with random template if silence_gaps: template = random.choice(SILENCE_TEMPLATES) model_output = template.format(gaps=', '.join(silence_gaps)) else: model_output = random.choice(NO_SILENCE_TEMPLATES) result = { "key": conversation_id, "audio_url": audio_path, "model_output": model_output } results.append(result) # Save the results to a JSON file output_file = f'{file}_silencegap.json' with open(output_file, 'w', encoding='utf-8') as f: json.dump(results, f, indent=2, ensure_ascii=False) print(f"Processed {len(results)} conversations") print(f"Results written to {output_file}") if __name__ == "__main__": process_silence_gaps()