Student0809
/

interactSpeech

Model card Files Files and versions

interactSpeech / 4JOB /process_silence.py

Student0809's picture

Add files using upload-large-folder tool

e791fa3 verified 5 months ago

history blame contribute delete

3.24 kB

	import json
	import os
	import random

	def seconds_to_mmss(seconds):
	minutes = int(seconds // 60)
	seconds = int(seconds % 60)
	return f"{minutes:02d}:{seconds:02d}"

	# Templates for silence gap descriptions
	SILENCE_TEMPLATES = [
	"Silence gaps longer than 3 seconds occur at: {gaps}",
	"The conversation contains significant pauses at: {gaps}",
	"There are silent periods of more than 3 seconds at: {gaps}",
	"The dialogue features extended pauses at: {gaps}",
	"Silent intervals exceeding 3 seconds are found at: {gaps}",
	"The conversation includes notable gaps at: {gaps}",
	"Extended periods of silence occur at: {gaps}",
	"The dialogue has significant breaks at: {gaps}",
	"Silent segments longer than 3 seconds appear at: {gaps}",
	"The conversation shows substantial pauses at: {gaps}"
	]

	# Templates for no silence case
	NO_SILENCE_TEMPLATES = [
	"No silence gaps longer than 3 seconds were found in this conversation.",
	"The conversation flows continuously without significant pauses.",
	"No extended periods of silence were detected in this dialogue.",
	"The conversation maintains a steady pace without notable gaps.",
	"No silent intervals exceeding 3 seconds were identified.",
	"The dialogue proceeds without substantial pauses.",
	"No significant breaks in conversation were observed.",
	"The conversation shows no extended silent periods.",
	"No notable gaps in speech were detected.",
	"The dialogue continues without significant silent intervals."
	]
	file = "silence"
	def process_silence_gaps():
	# Read the overlap_5s_716.json file
	with open(f'{file}.json', 'r', encoding='utf-8') as f:
	silence_data = json.load(f)

	# List to store results for all conversations
	results = []

	# Process each conversation
	for conversation_id, conversation in silence_data.items():
	segments = conversation.get('segments', [])
	audio_path = conversation.get('stereo_audio', [])
	silence_gaps = []

	# Find silence gaps > 3s between segments
	for i in range(len(segments) - 1):
	current_end = segments[i]['end_time']
	next_start = segments[i + 1]['start_time']
	gap_duration = next_start - current_end

	if gap_duration > 3:
	silence_gaps.append(f"{seconds_to_mmss(current_end)}-{seconds_to_mmss(next_start)}")

	# Create result entry with random template
	if silence_gaps:
	template = random.choice(SILENCE_TEMPLATES)
	model_output = template.format(gaps=', '.join(silence_gaps))
	else:
	model_output = random.choice(NO_SILENCE_TEMPLATES)

	result = {
	"key": conversation_id,
	"audio_url": audio_path,
	"model_output": model_output
	}
	results.append(result)

	# Save the results to a JSON file
	output_file = f'{file}_silencegap.json'
	with open(output_file, 'w', encoding='utf-8') as f:
	json.dump(results, f, indent=2, ensure_ascii=False)

	print(f"Processed {len(results)} conversations")
	print(f"Results written to {output_file}")

	if __name__ == "__main__":
	process_silence_gaps()