interactSpeech / cotSFT_new /process_transcription.py

Add files using upload-large-folder tool

3438cdb verified 9 months ago

2.76 kB

	import json

	def seconds_to_mmss(seconds):
	minutes = int(seconds // 60)
	seconds = int(seconds % 60)
	return f"{minutes:02d}:{seconds:02d}"

	filename = "correct_output"
	def is_overlapping(current_segment, other_segments):
	"""Check if the current segment overlaps with any other segment."""
	current_start = current_segment['start_time']
	current_end = current_segment['end_time']

	for segment in other_segments:
	if segment == current_segment:
	continue

	other_start = segment['start_time']
	other_end = segment['end_time']

	# Check if there's an overlap
	if (current_start < other_end and current_end > other_start):
	return True

	return False

	def process_transcriptions():
	# Read the overlap_5s_716.json file
	with open(f'./{filename}.json', 'r', encoding='utf-8') as f:
	data = json.load(f)

	# List to store results for all conversations
	results = []

	# Process each conversation
	for conversation_id, conversation in data.items():
	segments = conversation.get('segments', [])
	audio_path = conversation.get('stereo_audio', [])
	# Sort segments by start time
	segments.sort(key=lambda x: x['start_time'])

	# Process each segment and create transcription lines
	transcription_lines = []

	for segment in segments:
	speaker = segment['speaker']
	start_time = segment['start_time']
	end_time = segment['end_time']
	text = segment['text']
	original_text = segment['original_text']
	original_text = original_text.replace("[interrupt] ", "").strip()
	# Format timestamp
	timestamp = f"[{seconds_to_mmss(start_time)} - {seconds_to_mmss(end_time)}]"

	# Check if this segment overlaps with any other segment
	has_overlap = is_overlapping(segment, segments)

	# Format the line
	if has_overlap:
	line = f"{timestamp} Speaker {speaker}: {original_text}"
	else:
	line = f"{timestamp} Speaker {speaker}: {text}"

	transcription_lines.append(line)

	# Create result entry
	result = {
	"key": conversation_id,
	"audio_url": audio_path,
	"model_output": "\n".join(transcription_lines)
	}
	results.append(result)

	# Save the results to a JSON file
	output_file = f'./{filename}_transcription.json'
	with open(output_file, 'w', encoding='utf-8') as f:
	json.dump(results, f, indent=2, ensure_ascii=False)

	if __name__ == "__main__":
	process_transcriptions()