|
|
import json |
|
|
import random |
|
|
|
|
|
def seconds_to_mmss(seconds): |
|
|
minutes = int(seconds // 60) |
|
|
seconds = int(seconds % 60) |
|
|
return f"{minutes:02d}:{seconds:02d}" |
|
|
|
|
|
|
|
|
SPEAKER_TEMPLATES = [ |
|
|
"Speaker {speaker} speaks during the following periods: {times}", |
|
|
"Speaker {speaker}'s speaking segments occur at: {times}", |
|
|
"Speaker {speaker} is active in the conversation at: {times}", |
|
|
"The following time segments belong to Speaker {speaker}: {times}", |
|
|
"Speaker {speaker} participates in the dialogue at: {times}", |
|
|
"Speaker {speaker} contributes to the conversation during: {times}", |
|
|
"Speaking turns for Speaker {speaker} are at: {times}", |
|
|
"Speaker {speaker} takes the floor at: {times}", |
|
|
"The voice of Speaker {speaker} is heard at: {times}", |
|
|
"Speaker {speaker} engages in the discussion during: {times}" |
|
|
] |
|
|
file = "silence" |
|
|
def process_speaker_segments(): |
|
|
|
|
|
with open(f'{file}.json', 'r', encoding='utf-8') as f: |
|
|
data = json.load(f) |
|
|
|
|
|
|
|
|
results = [] |
|
|
|
|
|
|
|
|
for conversation_id, conversation in data.items(): |
|
|
segments = conversation.get('segments', []) |
|
|
audio_path = conversation.get('stereo_audio', []) |
|
|
|
|
|
speaker_times = {} |
|
|
|
|
|
|
|
|
for segment in segments: |
|
|
speaker = segment['speaker'] |
|
|
start_time = segment['start_time'] |
|
|
end_time = segment['end_time'] |
|
|
|
|
|
|
|
|
if speaker not in speaker_times: |
|
|
speaker_times[speaker] = [] |
|
|
|
|
|
|
|
|
speaker_times[speaker].append((start_time, end_time)) |
|
|
|
|
|
|
|
|
output_lines = [] |
|
|
for speaker in sorted(speaker_times.keys()): |
|
|
times = speaker_times[speaker] |
|
|
time_ranges = [f"{seconds_to_mmss(start)}-{seconds_to_mmss(end)}" for start, end in times] |
|
|
|
|
|
template = random.choice(SPEAKER_TEMPLATES) |
|
|
output_lines.append(template.format(speaker=speaker, times=', '.join(time_ranges))) |
|
|
|
|
|
|
|
|
result = { |
|
|
"key": conversation_id, |
|
|
"audio_url": audio_path, |
|
|
"model_output": "\n".join(output_lines) |
|
|
} |
|
|
results.append(result) |
|
|
|
|
|
|
|
|
output_file = f'{file}_speaker.json' |
|
|
with open(output_file, 'w', encoding='utf-8') as f: |
|
|
json.dump(results, f, indent=2, ensure_ascii=False) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
process_speaker_segments() |