Student0809
/

interactSpeech

Model card Files Files and versions

interactSpeech / 4JOB /process_speaker.py

Student0809's picture

Add files using upload-large-folder tool

b6a70f8 verified 5 months ago

history blame contribute delete

2.95 kB

	import json
	import random

	def seconds_to_mmss(seconds):
	minutes = int(seconds // 60)
	seconds = int(seconds % 60)
	return f"{minutes:02d}:{seconds:02d}"

	# Templates for speaker segment descriptions
	SPEAKER_TEMPLATES = [
	"Speaker {speaker} speaks during the following periods: {times}",
	"Speaker {speaker}'s speaking segments occur at: {times}",
	"Speaker {speaker} is active in the conversation at: {times}",
	"The following time segments belong to Speaker {speaker}: {times}",
	"Speaker {speaker} participates in the dialogue at: {times}",
	"Speaker {speaker} contributes to the conversation during: {times}",
	"Speaking turns for Speaker {speaker} are at: {times}",
	"Speaker {speaker} takes the floor at: {times}",
	"The voice of Speaker {speaker} is heard at: {times}",
	"Speaker {speaker} engages in the discussion during: {times}"
	]
	file = "silence"
	def process_speaker_segments():
	# Read the overlap_5s_716.json file
	with open(f'{file}.json', 'r', encoding='utf-8') as f:
	data = json.load(f)

	# List to store results for all conversations
	results = []

	# Process each conversation
	for conversation_id, conversation in data.items():
	segments = conversation.get('segments', [])
	audio_path = conversation.get('stereo_audio', [])
	# Dictionary to store speaking times for each speaker
	speaker_times = {}

	# Process each segment
	for segment in segments:
	speaker = segment['speaker']
	start_time = segment['start_time'] # Keep as float for accurate conversion
	end_time = segment['end_time'] # Keep as float for accurate conversion

	# Initialize list for this speaker if not exists
	if speaker not in speaker_times:
	speaker_times[speaker] = []

	# Add this speaking interval
	speaker_times[speaker].append((start_time, end_time))

	# Format the output string
	output_lines = []
	for speaker in sorted(speaker_times.keys()):
	times = speaker_times[speaker]
	time_ranges = [f"{seconds_to_mmss(start)}-{seconds_to_mmss(end)}" for start, end in times]
	# Randomly select a template for each speaker
	template = random.choice(SPEAKER_TEMPLATES)
	output_lines.append(template.format(speaker=speaker, times=', '.join(time_ranges)))

	# Create result entry
	result = {
	"key": conversation_id,
	"audio_url": audio_path,
	"model_output": "\n".join(output_lines)
	}
	results.append(result)

	# Save the results to a JSON file
	output_file = f'{file}_speaker.json'
	with open(output_file, 'w', encoding='utf-8') as f:
	json.dump(results, f, indent=2, ensure_ascii=False)

	if __name__ == "__main__":
	process_speaker_segments()