File size: 7,089 Bytes
20e9692
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
"""Data types for the segmentation pipeline."""

from dataclasses import dataclass
from typing import Optional


@dataclass
class VadSegment:
    """Raw VAD segment with timing info."""
    start_time: float
    end_time: float
    segment_idx: int


@dataclass
class SegmentInfo:
    """Processed segment with transcription and matching results."""
    start_time: float
    end_time: float
    transcribed_text: str
    matched_text: str
    matched_ref: str  # e.g. "2:255:1-2:255:5"
    match_score: float
    error: Optional[str] = None
    has_missing_words: bool = False
    potentially_undersegmented: bool = False


@dataclass
class ProfilingData:
    """Profiling metrics for the processing pipeline."""
    # Preprocessing
    resample_time: float = 0.0               # Audio resampling time
    # VAD profiling
    vad_model_load_time: float = 0.0
    vad_model_move_time: float = 0.0
    vad_inference_time: float = 0.0
    vad_gpu_time: float = 0.0               # Actual GPU lease execution time
    vad_wall_time: float = 0.0              # Wall-clock time (includes queue wait)
    # Phoneme ASR profiling
    asr_time: float = 0.0                    # Wav2vec wall-clock time (includes queue wait)
    asr_gpu_time: float = 0.0               # Actual GPU lease execution time
    asr_model_move_time: float = 0.0         # ASR model GPU move time
    asr_sorting_time: float = 0.0            # Duration-sorting time
    asr_batch_build_time: float = 0.0        # Dynamic batch construction time
    asr_batch_profiling: list = None         # Per-batch timing details
    # Global anchor profiling
    anchor_time: float = 0.0                 # N-gram voting anchor detection
    # Phoneme alignment profiling
    phoneme_total_time: float = 0.0          # Overall phoneme matching time
    phoneme_ref_build_time: float = 0.0      # Time to build chapter reference
    phoneme_dp_total_time: float = 0.0       # Total DP time across all segments
    phoneme_dp_min_time: float = 0.0         # Min DP time per segment
    phoneme_dp_max_time: float = 0.0         # Max DP time per segment
    phoneme_window_setup_time: float = 0.0   # Total window slicing time
    phoneme_result_build_time: float = 0.0   # Total result construction time
    phoneme_num_segments: int = 0            # Number of segments aligned
    match_wall_time: float = 0.0             # Total matching wall-clock time
    # Retry / reanchor counters
    tier1_attempts: int = 0
    tier1_passed: int = 0
    tier1_segments: list = None
    tier2_attempts: int = 0
    tier2_passed: int = 0
    tier2_segments: list = None
    consec_reanchors: int = 0
    segments_attempted: int = 0
    segments_passed: int = 0
    special_merges: int = 0
    # Result building profiling
    result_build_time: float = 0.0           # Total result building time
    result_audio_encode_time: float = 0.0    # Audio-to-data-URL encoding
    # Total pipeline time
    total_time: float = 0.0                  # End-to-end pipeline time

    @property
    def phoneme_dp_avg_time(self) -> float:
        """Average DP time per segment."""
        if self.phoneme_num_segments == 0:
            return 0.0
        return self.phoneme_dp_total_time / self.phoneme_num_segments

    @staticmethod
    def _fmt(seconds):
        """Format seconds as m:ss.fff when >= 60s, else as s.fffs."""
        if seconds >= 60:
            m, s = divmod(seconds, 60)
            return f"{int(m)}:{s:06.3f}"
        return f"{seconds:.3f}s"

    def summary(self) -> str:
        """Return a formatted profiling summary."""
        _fmt = self._fmt
        lines = [
            "\n" + "=" * 60,
            "PROFILING SUMMARY",
            "=" * 60,
            f"  Preprocessing:",
            f"    Resample:        {self.resample_time:.3f}s",
            f"  VAD:                                 wall {_fmt(self.vad_wall_time)}",
            f"    GPU Time:        {self.vad_gpu_time:.3f}s   (queue {self.vad_wall_time - self.vad_gpu_time:.3f}s)",
            f"    Model Load:      {self.vad_model_load_time:.3f}s",
            f"    Model Move:      {self.vad_model_move_time:.3f}s",
            f"    Inference:       {self.vad_inference_time:.3f}s",
            f"  Phoneme ASR:                         wall {_fmt(self.asr_time)}",
            f"    GPU Time:        {self.asr_gpu_time:.3f}s   (queue {self.asr_time - self.asr_gpu_time:.3f}s)",
            f"    Model Move:      {self.asr_model_move_time:.3f}s",
            f"    Sorting:         {self.asr_sorting_time:.3f}s",
            f"    Batch Build:     {self.asr_batch_build_time:.3f}s",
            f"    Batches:         {len(self.asr_batch_profiling) if self.asr_batch_profiling else 0}",
        ]
        if self.asr_batch_profiling:
            for b in self.asr_batch_profiling:
                lines.append(
                    f"    Batch {b['batch_num']:>2}: {b['size']:>3} segs | "
                    f"{b['time']:.3f}s | "
                    f"{b['min_dur']:.2f}-{b['max_dur']:.2f}s "
                    f"(A {b['avg_dur']:.2f}s, T {b['total_seconds']:.1f}s, W {b['pad_waste']:.0%})"
                )
        lines += [
            f"  Global Anchor:",
            f"    N-gram Voting:   {self.anchor_time:.3f}s",
            f"  Phoneme Alignment:                   wall {_fmt(self.match_wall_time)}",
            f"    Ref Build:       {self.phoneme_ref_build_time:.3f}s",
            f"    Window Setup:    {self.phoneme_window_setup_time:.3f}s",
            f"    DP Total:        {self.phoneme_dp_total_time:.3f}s",
            f"    Segments:        {self.phoneme_num_segments}",
            f"    DP Avg/segment:  {1000*self.phoneme_dp_avg_time:.3f}ms",
            f"    DP Min:          {1000*self.phoneme_dp_min_time:.3f}ms",
            f"    DP Max:          {1000*self.phoneme_dp_max_time:.3f}ms",
        ]
        pct = 100 * self.segments_passed / self.segments_attempted if self.segments_attempted else 0
        t1_segs = self.tier1_segments or []
        t2_segs = self.tier2_segments or []
        lines += [
            f"  Alignment Stats:",
            f"    Attempted:       {self.segments_attempted}",
            f"    Passed:          {self.segments_passed}  ({pct:.1f}%)",
            f"    Tier 1 Retries:  {self.tier1_passed}/{self.tier1_attempts} passed   segments: {t1_segs}",
            f"    Tier 2 Retries:  {self.tier2_passed}/{self.tier2_attempts} passed   segments: {t2_segs}",
            f"    Reanchors (consec failures): {self.consec_reanchors}",
            f"    Special Merges:  {self.special_merges}",
            "-" * 60,
        ]
        profiled_sum = (self.resample_time + self.vad_wall_time + self.asr_time
                        + self.anchor_time + self.match_wall_time + self.result_build_time)
        unaccounted = self.total_time - profiled_sum
        lines += [
            f"  PROFILED SUM:      {_fmt(profiled_sum)}",
            f"  TOTAL (wall):      {_fmt(self.total_time)}   (unaccounted: {_fmt(unaccounted)})",
            "=" * 60,
        ]
        return "\n".join(lines)