interactSpeech / 4JOB /process_silence.py
Student0809's picture
Add files using upload-large-folder tool
e791fa3 verified
import json
import os
import random
def seconds_to_mmss(seconds):
minutes = int(seconds // 60)
seconds = int(seconds % 60)
return f"{minutes:02d}:{seconds:02d}"
# Templates for silence gap descriptions
SILENCE_TEMPLATES = [
"Silence gaps longer than 3 seconds occur at: {gaps}",
"The conversation contains significant pauses at: {gaps}",
"There are silent periods of more than 3 seconds at: {gaps}",
"The dialogue features extended pauses at: {gaps}",
"Silent intervals exceeding 3 seconds are found at: {gaps}",
"The conversation includes notable gaps at: {gaps}",
"Extended periods of silence occur at: {gaps}",
"The dialogue has significant breaks at: {gaps}",
"Silent segments longer than 3 seconds appear at: {gaps}",
"The conversation shows substantial pauses at: {gaps}"
]
# Templates for no silence case
NO_SILENCE_TEMPLATES = [
"No silence gaps longer than 3 seconds were found in this conversation.",
"The conversation flows continuously without significant pauses.",
"No extended periods of silence were detected in this dialogue.",
"The conversation maintains a steady pace without notable gaps.",
"No silent intervals exceeding 3 seconds were identified.",
"The dialogue proceeds without substantial pauses.",
"No significant breaks in conversation were observed.",
"The conversation shows no extended silent periods.",
"No notable gaps in speech were detected.",
"The dialogue continues without significant silent intervals."
]
file = "silence"
def process_silence_gaps():
# Read the overlap_5s_716.json file
with open(f'{file}.json', 'r', encoding='utf-8') as f:
silence_data = json.load(f)
# List to store results for all conversations
results = []
# Process each conversation
for conversation_id, conversation in silence_data.items():
segments = conversation.get('segments', [])
audio_path = conversation.get('stereo_audio', [])
silence_gaps = []
# Find silence gaps > 3s between segments
for i in range(len(segments) - 1):
current_end = segments[i]['end_time']
next_start = segments[i + 1]['start_time']
gap_duration = next_start - current_end
if gap_duration > 3:
silence_gaps.append(f"{seconds_to_mmss(current_end)}-{seconds_to_mmss(next_start)}")
# Create result entry with random template
if silence_gaps:
template = random.choice(SILENCE_TEMPLATES)
model_output = template.format(gaps=', '.join(silence_gaps))
else:
model_output = random.choice(NO_SILENCE_TEMPLATES)
result = {
"key": conversation_id,
"audio_url": audio_path,
"model_output": model_output
}
results.append(result)
# Save the results to a JSON file
output_file = f'{file}_silencegap.json'
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(results, f, indent=2, ensure_ascii=False)
print(f"Processed {len(results)} conversations")
print(f"Results written to {output_file}")
if __name__ == "__main__":
process_silence_gaps()