File size: 2,754 Bytes
e791fa3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import json

def seconds_to_mmss(seconds):
    minutes = int(seconds // 60)
    seconds = int(seconds % 60)
    return f"{minutes:02d}:{seconds:02d}"

filename = "silence"
def is_overlapping(current_segment, other_segments):
    """Check if the current segment overlaps with any other segment."""
    current_start = current_segment['start_time']
    current_end = current_segment['end_time']
    
    for segment in other_segments:
        if segment == current_segment:
            continue
        
        other_start = segment['start_time']
        other_end = segment['end_time']
        
        # Check if there's an overlap
        if (current_start < other_end and current_end > other_start):
            return True
    
    return False

def process_transcriptions():
    # Read the overlap_5s_716.json file
    with open(f'./{filename}.json', 'r', encoding='utf-8') as f:
        data = json.load(f)

    # List to store results for all conversations
    results = []

    # Process each conversation
    for conversation_id, conversation in data.items():
        segments = conversation.get('segments', [])
        audio_path = conversation.get('stereo_audio', [])
        # Sort segments by start time
        segments.sort(key=lambda x: x['start_time'])
        
        # Process each segment and create transcription lines
        transcription_lines = []
        
        for segment in segments:
            speaker = segment['speaker']
            start_time = segment['start_time']
            end_time = segment['end_time']
            text = segment['text']
            original_text = segment['original_text']
            original_text = original_text.replace("[interrupt] ", "").strip()
            # Format timestamp
            timestamp = f"[{seconds_to_mmss(start_time)} - {seconds_to_mmss(end_time)}]"
            
            # Check if this segment overlaps with any other segment
            has_overlap = is_overlapping(segment, segments)
            
            # Format the line
            if has_overlap:
                line = f"{timestamp} Speaker {speaker}: {original_text}"
            else:
                line = f"{timestamp} Speaker {speaker}: {text}"
            
            transcription_lines.append(line)
        
        # Create result entry
        result = {
            "key": conversation_id,
            "audio_url": audio_path,
            "model_output": "\n".join(transcription_lines)
        }
        results.append(result)

    # Save the results to a JSON file
    output_file = f'./{filename}_transcription.json'
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(results, f, indent=2, ensure_ascii=False)

if __name__ == "__main__":
    process_transcriptions()