|
|
import json |
|
|
import os |
|
|
import random |
|
|
|
|
|
def seconds_to_mmss(seconds): |
|
|
minutes = int(seconds // 60) |
|
|
seconds = int(seconds % 60) |
|
|
return f"{minutes:02d}:{seconds:02d}" |
|
|
|
|
|
|
|
|
OVERLAP_TEMPLATES = [ |
|
|
"An overlap where multiple speakers talk simultaneously for more than 3 seconds starts at {start} and ends at {end}.", |
|
|
"The overlap starts at {start} and ends at {end}.", |
|
|
"Multiple speakers talk simultaneously from {start} to {end}, an overlap lasting over three seconds.", |
|
|
"A conversation overlap occurs between {start} and {end}, with multiple people speaking at once.", |
|
|
"There is a significant overlap in the conversation from {start} to {end}, where speakers talk simultaneously.", |
|
|
"During the period {start}-{end}, multiple participants speak at the same time.", |
|
|
"A 3+ second overlap is detected between {start} and {end}, with concurrent speakers.", |
|
|
"The conversation features overlapping speech from {start} to {end}.", |
|
|
"Several speakers talk over each other between {start} and {end}.", |
|
|
"An overlapping segment is identified from {start} to {end}, lasting more than 3 seconds." |
|
|
] |
|
|
|
|
|
|
|
|
NO_OVERLAP_TEMPLATES = [ |
|
|
"No significant overlaps found in this conversation.", |
|
|
"The conversation shows no overlapping speech segments longer than 3 seconds.", |
|
|
"No instances of multiple speakers talking simultaneously for more than 3 seconds were detected.", |
|
|
"The dialogue proceeds without any significant overlaps between speakers.", |
|
|
"No overlapping segments exceeding 3 seconds were identified in this conversation.", |
|
|
"The speakers maintain clear turn-taking without significant overlaps.", |
|
|
"This conversation features no substantial overlapping speech periods.", |
|
|
"No overlapping speech segments of 3 seconds or longer were found.", |
|
|
"The conversation flows smoothly without any significant speaker overlaps.", |
|
|
"All speakers maintain clear speaking turns without substantial overlap." |
|
|
] |
|
|
|
|
|
|
|
|
CORRECT_OVERLAP_TEMPLATES = [ |
|
|
"The conversation contains overlapping speech segments longer than 3 seconds.", |
|
|
"The dialogue features instances where speakers overlap for more than 3 seconds.", |
|
|
"The recording includes multiple overlapping exchanges exceeding 3 seconds.", |
|
|
"There are clear overlaps in speech lasting beyond 3 seconds.", |
|
|
"Speaker interruptions or overlaps exceed 3 seconds in duration.", |
|
|
"Clear evidence shows speech overlaps extending beyond 3 seconds in duration.", |
|
|
"Speaker overlap durations consistently breach the 3-second threshold.", |
|
|
"Recorded overlaps between speakers routinely last longer than 3 seconds.", |
|
|
"The interaction contains several instances where voices overlap for over 3 seconds." |
|
|
] |
|
|
file = "silence" |
|
|
def process_overlap_segments(): |
|
|
|
|
|
with open(f'{file}.json', 'r', encoding='utf-8') as f: |
|
|
overlap_data = json.load(f) |
|
|
|
|
|
|
|
|
results = [] |
|
|
|
|
|
|
|
|
for conversation_id, conversation in overlap_data.items(): |
|
|
segments = conversation.get('segments', []) |
|
|
overlap_periods = [] |
|
|
audio_path = conversation.get('stereo_audio', []) |
|
|
|
|
|
for i in range(len(segments) - 1): |
|
|
current = segments[i] |
|
|
next_segment = segments[i + 1] |
|
|
|
|
|
|
|
|
overlap_start = max(current['start_time'], next_segment['start_time']) |
|
|
overlap_end = min(current['end_time'], next_segment['end_time']) |
|
|
overlap_duration = overlap_end - overlap_start |
|
|
|
|
|
|
|
|
if overlap_duration >= 3: |
|
|
overlap_periods.append(f"{seconds_to_mmss(overlap_start)}-{seconds_to_mmss(overlap_end)}") |
|
|
|
|
|
|
|
|
if overlap_periods: |
|
|
|
|
|
overlap_descriptions = [] |
|
|
for period in overlap_periods: |
|
|
start, end = period.split('-') |
|
|
template = random.choice(OVERLAP_TEMPLATES) |
|
|
overlap_descriptions.append(template.format(start=start, end=end)) |
|
|
model_output = " ".join(overlap_descriptions) |
|
|
else: |
|
|
model_output = random.choice(NO_OVERLAP_TEMPLATES) |
|
|
|
|
|
result = { |
|
|
"key": conversation_id, |
|
|
"audio_url": audio_path, |
|
|
"model_output": model_output |
|
|
} |
|
|
results.append(result) |
|
|
|
|
|
|
|
|
output_file = f'{file}_overlapgap.json' |
|
|
with open(output_file, 'w', encoding='utf-8') as f: |
|
|
json.dump(results, f, indent=2, ensure_ascii=False) |
|
|
|
|
|
print(f"Processed {len(results)} conversations") |
|
|
print(f"Results written to {output_file}") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
process_overlap_segments() |