interactSpeech / 4JOB /process_transcription.py
Student0809's picture
Add files using upload-large-folder tool
e791fa3 verified
import json
def seconds_to_mmss(seconds):
minutes = int(seconds // 60)
seconds = int(seconds % 60)
return f"{minutes:02d}:{seconds:02d}"
filename = "silence"
def is_overlapping(current_segment, other_segments):
"""Check if the current segment overlaps with any other segment."""
current_start = current_segment['start_time']
current_end = current_segment['end_time']
for segment in other_segments:
if segment == current_segment:
continue
other_start = segment['start_time']
other_end = segment['end_time']
# Check if there's an overlap
if (current_start < other_end and current_end > other_start):
return True
return False
def process_transcriptions():
# Read the overlap_5s_716.json file
with open(f'./{filename}.json', 'r', encoding='utf-8') as f:
data = json.load(f)
# List to store results for all conversations
results = []
# Process each conversation
for conversation_id, conversation in data.items():
segments = conversation.get('segments', [])
audio_path = conversation.get('stereo_audio', [])
# Sort segments by start time
segments.sort(key=lambda x: x['start_time'])
# Process each segment and create transcription lines
transcription_lines = []
for segment in segments:
speaker = segment['speaker']
start_time = segment['start_time']
end_time = segment['end_time']
text = segment['text']
original_text = segment['original_text']
original_text = original_text.replace("[interrupt] ", "").strip()
# Format timestamp
timestamp = f"[{seconds_to_mmss(start_time)} - {seconds_to_mmss(end_time)}]"
# Check if this segment overlaps with any other segment
has_overlap = is_overlapping(segment, segments)
# Format the line
if has_overlap:
line = f"{timestamp} Speaker {speaker}: {original_text}"
else:
line = f"{timestamp} Speaker {speaker}: {text}"
transcription_lines.append(line)
# Create result entry
result = {
"key": conversation_id,
"audio_url": audio_path,
"model_output": "\n".join(transcription_lines)
}
results.append(result)
# Save the results to a JSON file
output_file = f'./{filename}_transcription.json'
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(results, f, indent=2, ensure_ascii=False)
if __name__ == "__main__":
process_transcriptions()