spagestic commited on
Commit
9274377
·
1 Parent(s): c9078b4

feat: implement AudioConcatenator package with audio processing utilities

Browse files
src/processors/AudioConcatenator/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # __init__.py for AudioConcatenator package
2
+
3
+ from .concatenator import AudioConcatenator
src/processors/AudioConcatenator/audio_filter.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Audio filtering utilities for AudioConcatenator."""
2
+
3
+ import numpy as np
4
+
5
+ class AudioFilter:
6
+ @staticmethod
7
+ def remove_clicks_and_pops(audio_data: np.ndarray) -> np.ndarray:
8
+ try:
9
+ from scipy import signal
10
+ sos = signal.butter(2, 80, btype='highpass', fs=22050, output='sos')
11
+ filtered_audio = signal.sosfilt(sos, audio_data)
12
+ return filtered_audio.astype(np.float32)
13
+ except ImportError:
14
+ return audio_data.astype(np.float32)
src/processors/AudioConcatenator/audio_utils.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Audio normalization and fade utilities for AudioConcatenator."""
2
+
3
+ import numpy as np
4
+
5
+ class AudioUtils:
6
+ @staticmethod
7
+ def normalize_audio(audio_data: np.ndarray) -> np.ndarray:
8
+ max_val = np.max(np.abs(audio_data))
9
+ if max_val == 0:
10
+ return audio_data
11
+ return (audio_data * (0.95 / max_val)).astype(np.float32)
12
+
13
+ @staticmethod
14
+ def apply_fade_effects(audio_data: np.ndarray, fade_duration: float, sample_rate: int) -> np.ndarray:
15
+ fade_samples = int(fade_duration * sample_rate)
16
+ if len(audio_data) < 2 * fade_samples:
17
+ return audio_data
18
+ audio_with_fades = audio_data.copy()
19
+ fade_in = np.linspace(0, 1, fade_samples)
20
+ audio_with_fades[:fade_samples] *= fade_in
21
+ fade_out = np.linspace(1, 0, fade_samples)
22
+ audio_with_fades[-fade_samples:] *= fade_out
23
+ return audio_with_fades
src/processors/AudioConcatenator/concatenator.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """AudioConcatenator class implementation."""
2
+
3
+ import numpy as np
4
+ from typing import List, Tuple, Optional
5
+ import gradio as gr
6
+ from .audio_utils import AudioUtils
7
+ from .audio_filter import AudioFilter
8
+ from .info import AudioConcatenationInfo
9
+ from .progressive import AudioProgressiveConcatenator
10
+
11
+ class AudioConcatenator:
12
+ """Handles concatenation of multiple audio chunks."""
13
+
14
+ def __init__(self, silence_duration: float = 0.5, fade_duration: float = 0.1):
15
+ self.silence_duration = silence_duration
16
+ self.fade_duration = fade_duration
17
+
18
+ def concatenate_audio_chunks(
19
+ self,
20
+ audio_chunks: List[Tuple[int, np.ndarray]],
21
+ progress_callback: Optional[callable] = None
22
+ ) -> Tuple[int, np.ndarray]:
23
+ if not audio_chunks:
24
+ raise gr.Error("No audio chunks to concatenate")
25
+ if len(audio_chunks) == 1:
26
+ return audio_chunks[0]
27
+ if progress_callback:
28
+ progress_callback(0.1, desc="Preparing audio concatenation...")
29
+ sample_rates = [chunk[0] for chunk in audio_chunks]
30
+ if len(set(sample_rates)) > 1:
31
+ raise gr.Error(f"Inconsistent sample rates found: {set(sample_rates)}. All chunks must have the same sample rate.")
32
+ sample_rate = sample_rates[0]
33
+ if progress_callback:
34
+ progress_callback(0.2, desc="Normalizing audio chunks...")
35
+ normalized_chunks = []
36
+ for i, (_, audio_data) in enumerate(audio_chunks):
37
+ if audio_data.ndim == 1:
38
+ normalized_audio = audio_data
39
+ elif audio_data.ndim == 2:
40
+ normalized_audio = np.mean(audio_data, axis=1)
41
+ else:
42
+ raise gr.Error(f"Unsupported audio format in chunk {i + 1}: {audio_data.shape}")
43
+ normalized_audio = AudioUtils.normalize_audio(normalized_audio)
44
+ normalized_audio = AudioUtils.apply_fade_effects(normalized_audio, self.fade_duration, sample_rate)
45
+ normalized_chunks.append(normalized_audio)
46
+ if progress_callback:
47
+ progress = 0.2 + (0.5 * (i + 1) / len(audio_chunks))
48
+ progress_callback(progress, desc=f"Processed chunk {i + 1}/{len(audio_chunks)}")
49
+ if progress_callback:
50
+ progress_callback(0.7, desc="Creating silence segments...")
51
+ silence_samples = int(self.silence_duration * sample_rate)
52
+ silence = np.zeros(silence_samples, dtype=np.float32)
53
+ if progress_callback:
54
+ progress_callback(0.8, desc="Concatenating audio segments...")
55
+ concatenated_segments = []
56
+ for i, chunk in enumerate(normalized_chunks):
57
+ concatenated_segments.append(chunk)
58
+ if i < len(normalized_chunks) - 1:
59
+ concatenated_segments.append(silence)
60
+ if progress_callback:
61
+ progress = 0.8 + (0.15 * (i + 1) / len(normalized_chunks))
62
+ progress_callback(progress, desc=f"Concatenated {i + 1}/{len(normalized_chunks)} chunks")
63
+ final_audio = np.concatenate(concatenated_segments)
64
+ if progress_callback:
65
+ progress_callback(0.95, desc="Finalizing audio...")
66
+ final_audio = AudioUtils.normalize_audio(final_audio)
67
+ final_audio = AudioFilter.remove_clicks_and_pops(final_audio)
68
+ if progress_callback:
69
+ progress_callback(1.0, desc="Audio concatenation complete!")
70
+ return sample_rate, final_audio
71
+
72
+ def get_concatenation_info(self, audio_chunks: List[Tuple[int, np.ndarray]]) -> dict:
73
+ return AudioConcatenationInfo.get_concatenation_info(audio_chunks, self.silence_duration)
74
+
75
+ def concatenate_progressive(
76
+ self,
77
+ new_chunk: Tuple[int, np.ndarray],
78
+ existing_audio: Optional[Tuple[int, np.ndarray]] = None,
79
+ progress_callback: Optional[callable] = None
80
+ ) -> Tuple[int, np.ndarray]:
81
+ return AudioProgressiveConcatenator.concatenate_progressive(
82
+ new_chunk,
83
+ existing_audio,
84
+ silence_duration=self.silence_duration,
85
+ fade_duration=self.fade_duration,
86
+ progress_callback=progress_callback
87
+ )
src/processors/AudioConcatenator/info.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """AudioConcatenator info utilities."""
2
+
3
+ import numpy as np
4
+ from typing import List, Tuple
5
+
6
+ class AudioConcatenationInfo:
7
+ @staticmethod
8
+ def get_concatenation_info(audio_chunks: List[Tuple[int, np.ndarray]], silence_duration: float) -> dict:
9
+ if not audio_chunks:
10
+ return {}
11
+ total_duration = 0
12
+ total_silence_duration = 0
13
+ chunk_durations = []
14
+ sample_rate = audio_chunks[0][0]
15
+ for _, audio_data in audio_chunks:
16
+ duration = len(audio_data) / sample_rate
17
+ chunk_durations.append(duration)
18
+ total_duration += duration
19
+ if len(audio_chunks) > 1:
20
+ total_silence_duration = (len(audio_chunks) - 1) * silence_duration
21
+ total_duration += total_silence_duration
22
+ return {
23
+ "num_chunks": len(audio_chunks),
24
+ "total_duration": total_duration,
25
+ "total_silence_duration": total_silence_duration,
26
+ "chunk_durations": chunk_durations,
27
+ "average_chunk_duration": np.mean(chunk_durations),
28
+ "sample_rate": sample_rate
29
+ }
src/processors/AudioConcatenator/progressive.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Progressive concatenation for AudioConcatenator."""
2
+
3
+ import numpy as np
4
+ from typing import Tuple, Optional
5
+ import gradio as gr
6
+ from .audio_utils import AudioUtils
7
+ from .audio_filter import AudioFilter
8
+
9
+ class AudioProgressiveConcatenator:
10
+ @staticmethod
11
+ def concatenate_progressive(
12
+ new_chunk: Tuple[int, np.ndarray],
13
+ existing_audio: Optional[Tuple[int, np.ndarray]] = None,
14
+ silence_duration: float = 0.5,
15
+ fade_duration: float = 0.1,
16
+ progress_callback: Optional[callable] = None
17
+ ) -> Tuple[int, np.ndarray]:
18
+ if progress_callback:
19
+ progress_callback(0.1, desc="Adding new audio chunk...")
20
+ if existing_audio is None:
21
+ sample_rate, audio_data = new_chunk
22
+ if audio_data.ndim == 1:
23
+ normalized_audio = audio_data
24
+ elif audio_data.ndim == 2:
25
+ normalized_audio = np.mean(audio_data, axis=1)
26
+ else:
27
+ raise gr.Error(f"Unsupported audio format: {audio_data.shape}")
28
+ normalized_audio = AudioUtils.normalize_audio(normalized_audio)
29
+ normalized_audio = AudioUtils.apply_fade_effects(normalized_audio, fade_duration, sample_rate)
30
+ if progress_callback:
31
+ progress_callback(1.0, desc="First chunk ready!")
32
+ return sample_rate, normalized_audio
33
+ existing_sample_rate, existing_audio_data = existing_audio
34
+ new_sample_rate, new_audio_data = new_chunk
35
+ if existing_sample_rate != new_sample_rate:
36
+ raise gr.Error(f"Sample rate mismatch: {existing_sample_rate} vs {new_sample_rate}")
37
+ if progress_callback:
38
+ progress_callback(0.3, desc="Processing new chunk...")
39
+ if new_audio_data.ndim == 1:
40
+ normalized_new = new_audio_data
41
+ elif new_audio_data.ndim == 2:
42
+ normalized_new = np.mean(new_audio_data, axis=1)
43
+ else:
44
+ raise gr.Error(f"Unsupported audio format: {new_audio_data.shape}")
45
+ normalized_new = AudioUtils.normalize_audio(normalized_new)
46
+ normalized_new = AudioUtils.apply_fade_effects(normalized_new, fade_duration, new_sample_rate)
47
+ if progress_callback:
48
+ progress_callback(0.6, desc="Creating silence segment...")
49
+ silence_samples = int(silence_duration * existing_sample_rate)
50
+ silence = np.zeros(silence_samples, dtype=np.float32)
51
+ if progress_callback:
52
+ progress_callback(0.8, desc="Concatenating audio...")
53
+ concatenated = np.concatenate([existing_audio_data, silence, normalized_new])
54
+ final_audio = AudioUtils.normalize_audio(concatenated)
55
+ final_audio = AudioFilter.remove_clicks_and_pops(final_audio)
56
+ if progress_callback:
57
+ progress_callback(1.0, desc="Progressive concatenation complete!")
58
+ return existing_sample_rate, final_audio
src/processors/audio_concatenator.py CHANGED
@@ -1,194 +1,6 @@
1
  """Audio concatenation utility for combining multiple audio chunks into a single audio file."""
2
 
3
- import numpy as np
4
- from typing import List, Tuple, Optional
5
- import gradio as gr
6
 
7
-
8
- class AudioConcatenator:
9
- """Handles concatenation of multiple audio chunks."""
10
-
11
- def __init__(self, silence_duration: float = 0.5, fade_duration: float = 0.1):
12
- """
13
- Initialize the audio concatenator.
14
-
15
- Args:
16
- silence_duration: Duration of silence between chunks (seconds)
17
- fade_duration: Duration of fade in/out effects (seconds)
18
- """
19
- self.silence_duration = silence_duration
20
- self.fade_duration = fade_duration
21
-
22
- def concatenate_audio_chunks(
23
- self,
24
- audio_chunks: List[Tuple[int, np.ndarray]],
25
- progress_callback: Optional[callable] = None
26
- ) -> Tuple[int, np.ndarray]:
27
- """
28
- Concatenate multiple audio chunks into a single audio file.
29
-
30
- Args:
31
- audio_chunks: List of (sample_rate, audio_data) tuples
32
- progress_callback: Optional callback for progress updates
33
-
34
- Returns:
35
- Tuple of (sample_rate, concatenated_audio_data)
36
- """
37
- if not audio_chunks:
38
- raise gr.Error("No audio chunks to concatenate")
39
-
40
- if len(audio_chunks) == 1:
41
- return audio_chunks[0]
42
-
43
- if progress_callback:
44
- progress_callback(0.1, desc="Preparing audio concatenation...")
45
-
46
- # Verify all chunks have the same sample rate
47
- sample_rates = [chunk[0] for chunk in audio_chunks]
48
- if len(set(sample_rates)) > 1:
49
- raise gr.Error(f"Inconsistent sample rates found: {set(sample_rates)}. All chunks must have the same sample rate.")
50
-
51
- sample_rate = sample_rates[0]
52
-
53
- if progress_callback:
54
- progress_callback(0.2, desc="Normalizing audio chunks...")
55
-
56
- # Normalize and prepare audio data
57
- normalized_chunks = []
58
- for i, (_, audio_data) in enumerate(audio_chunks):
59
- # Ensure audio data is in the correct format
60
- if audio_data.ndim == 1:
61
- normalized_audio = audio_data
62
- elif audio_data.ndim == 2:
63
- # Convert stereo to mono by averaging channels
64
- normalized_audio = np.mean(audio_data, axis=1)
65
- else:
66
- raise gr.Error(f"Unsupported audio format in chunk {i + 1}: {audio_data.shape}")
67
-
68
- # Normalize audio levels
69
- normalized_audio = self._normalize_audio(normalized_audio)
70
-
71
- # Apply fade effects
72
- normalized_audio = self._apply_fade_effects(normalized_audio, sample_rate)
73
-
74
- normalized_chunks.append(normalized_audio)
75
-
76
- if progress_callback:
77
- progress = 0.2 + (0.5 * (i + 1) / len(audio_chunks))
78
- progress_callback(progress, desc=f"Processed chunk {i + 1}/{len(audio_chunks)}")
79
-
80
- if progress_callback:
81
- progress_callback(0.7, desc="Creating silence segments...")
82
-
83
- # Create silence segments
84
- silence_samples = int(self.silence_duration * sample_rate)
85
- silence = np.zeros(silence_samples, dtype=np.float32)
86
-
87
- if progress_callback:
88
- progress_callback(0.8, desc="Concatenating audio segments...")
89
-
90
- # Concatenate all chunks with silence in between
91
- concatenated_segments = []
92
- for i, chunk in enumerate(normalized_chunks):
93
- concatenated_segments.append(chunk)
94
-
95
- # Add silence between chunks (but not after the last chunk)
96
- if i < len(normalized_chunks) - 1:
97
- concatenated_segments.append(silence)
98
-
99
- if progress_callback:
100
- progress = 0.8 + (0.15 * (i + 1) / len(normalized_chunks))
101
- progress_callback(progress, desc=f"Concatenated {i + 1}/{len(normalized_chunks)} chunks")
102
-
103
- # Combine all segments
104
- final_audio = np.concatenate(concatenated_segments)
105
-
106
- if progress_callback:
107
- progress_callback(0.95, desc="Finalizing audio...")
108
-
109
- # Final normalization and cleanup
110
- final_audio = self._normalize_audio(final_audio)
111
- final_audio = self._remove_clicks_and_pops(final_audio)
112
-
113
- if progress_callback:
114
- progress_callback(1.0, desc="Audio concatenation complete!")
115
-
116
- return sample_rate, final_audio
117
-
118
- def _normalize_audio(self, audio_data: np.ndarray) -> np.ndarray:
119
- """Normalize audio to prevent clipping."""
120
- # Find the maximum absolute value
121
- max_val = np.max(np.abs(audio_data))
122
-
123
- if max_val == 0:
124
- return audio_data
125
-
126
- # Normalize to 95% of maximum to leave some headroom
127
- normalized = audio_data * (0.95 / max_val)
128
-
129
- return normalized.astype(np.float32)
130
-
131
- def _apply_fade_effects(self, audio_data: np.ndarray, sample_rate: int) -> np.ndarray:
132
- """Apply fade in and fade out effects to reduce pops and clicks."""
133
- fade_samples = int(self.fade_duration * sample_rate)
134
-
135
- if len(audio_data) < 2 * fade_samples:
136
- # If audio is too short for fade effects, return as-is
137
- return audio_data
138
-
139
- audio_with_fades = audio_data.copy()
140
- # Apply fade in
141
- fade_in = np.linspace(0, 1, fade_samples)
142
- audio_with_fades[:fade_samples] *= fade_in
143
-
144
- # Apply fade out
145
- fade_out = np.linspace(1, 0, fade_samples)
146
- audio_with_fades[-fade_samples:] *= fade_out
147
-
148
- return audio_with_fades
149
-
150
- def _remove_clicks_and_pops(self, audio_data: np.ndarray) -> np.ndarray:
151
- """Apply basic filtering to remove clicks and pops."""
152
- try:
153
- # Simple high-pass filter to remove DC offset and low-frequency artifacts
154
- from scipy import signal
155
-
156
- # Design a high-pass filter (removes frequencies below 80 Hz)
157
- # This helps remove some pops and clicks while preserving speech
158
- sos = signal.butter(2, 80, btype='highpass', fs=22050, output='sos')
159
- filtered_audio = signal.sosfilt(sos, audio_data)
160
-
161
- return filtered_audio.astype(np.float32)
162
- except ImportError:
163
- # If scipy is not available, return audio as-is
164
- return audio_data.astype(np.float32)
165
-
166
- def get_concatenation_info(self, audio_chunks: List[Tuple[int, np.ndarray]]) -> dict:
167
- """Get information about the concatenation process."""
168
- if not audio_chunks:
169
- return {}
170
-
171
- total_duration = 0
172
- total_silence_duration = 0
173
- chunk_durations = []
174
-
175
- sample_rate = audio_chunks[0][0]
176
-
177
- for _, audio_data in audio_chunks:
178
- duration = len(audio_data) / sample_rate
179
- chunk_durations.append(duration)
180
- total_duration += duration
181
-
182
- # Add silence duration (between chunks)
183
- if len(audio_chunks) > 1:
184
- total_silence_duration = (len(audio_chunks) - 1) * self.silence_duration
185
- total_duration += total_silence_duration
186
-
187
- return {
188
- "num_chunks": len(audio_chunks),
189
- "total_duration": total_duration,
190
- "total_silence_duration": total_silence_duration,
191
- "chunk_durations": chunk_durations,
192
- "average_chunk_duration": np.mean(chunk_durations),
193
- "sample_rate": sample_rate
194
- }
 
1
  """Audio concatenation utility for combining multiple audio chunks into a single audio file."""
2
 
3
+ # The AudioConcatenator class and related logic have been split into multiple files in the AudioConcatenator package.
4
+ # See: AudioConcatenator/concatenator.py, audio_utils.py, audio_filter.py, info.py, progressive.py
 
5
 
6
+ from .AudioConcatenator import AudioConcatenator
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/processors/parallel_processor.py CHANGED
@@ -168,3 +168,83 @@ class ParallelAudioProcessor:
168
  estimated_time = sequential_time * parallel_efficiency
169
 
170
  return estimated_time
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  estimated_time = sequential_time * parallel_efficiency
169
 
170
  return estimated_time
171
+
172
+ def process_chunks_progressive(
173
+ self,
174
+ text_chunks: List[str],
175
+ audio_generator_func: Callable,
176
+ progress_callback: Optional[Callable] = None
177
+ ):
178
+ """
179
+ Process multiple text chunks in parallel and yield results in order as they become available.
180
+
181
+ Args:
182
+ text_chunks: List of text chunks to process
183
+ audio_generator_func: Function to generate audio from text
184
+ progress_callback: Optional callback for progress updates
185
+
186
+ Yields:
187
+ Tuples of (chunk_index, audio_result, is_complete, total_chunks)
188
+ where is_complete indicates if this is the final chunk
189
+ """
190
+ if not text_chunks:
191
+ return
192
+
193
+ total_chunks = len(text_chunks)
194
+ completed_chunks = 0
195
+ results = [None] * total_chunks
196
+ completed_indices = set()
197
+ next_index_to_yield = 0
198
+
199
+ def update_progress(chunk_index: int, desc: str = ""):
200
+ nonlocal completed_chunks
201
+ if progress_callback:
202
+ progress = completed_chunks / total_chunks
203
+ progress_callback(progress, desc=f"Processing chunk {completed_chunks + 1}/{total_chunks}{': ' + desc if desc else ''}")
204
+
205
+ def process_single_chunk(chunk_index: int, text_chunk: str) -> Tuple[int, Tuple[int, np.ndarray]]:
206
+ """Process a single chunk and return the result with its index."""
207
+ try:
208
+ # Create a local progress callback for this chunk
209
+ def chunk_progress(progress: float, desc: str = ""):
210
+ update_progress(chunk_index, f"Chunk {chunk_index + 1}: {desc}")
211
+
212
+ # Generate audio for this chunk
213
+ audio_result = audio_generator_func(text_chunk, None, progress=chunk_progress)
214
+ return chunk_index, audio_result
215
+ except Exception as e:
216
+ raise Exception(f"Error processing chunk {chunk_index + 1}: {str(e)}")
217
+
218
+ # Use ThreadPoolExecutor for parallel processing
219
+ with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
220
+ # Submit all chunks for processing
221
+ future_to_index = {
222
+ executor.submit(process_single_chunk, i, chunk): i
223
+ for i, chunk in enumerate(text_chunks)
224
+ }
225
+
226
+ # Collect results as they complete
227
+ for future in concurrent.futures.as_completed(future_to_index):
228
+ chunk_index = future_to_index[future]
229
+ try:
230
+ index, audio_result = future.result()
231
+ results[index] = audio_result
232
+ completed_indices.add(index)
233
+ completed_chunks += 1
234
+
235
+ if progress_callback:
236
+ progress = completed_chunks / total_chunks
237
+ progress_callback(
238
+ progress,
239
+ desc=f"Completed {completed_chunks}/{total_chunks} audio chunks"
240
+ )
241
+
242
+ # Yield any chunks that are now ready in order
243
+ while next_index_to_yield < total_chunks and next_index_to_yield in completed_indices:
244
+ chunk_result = results[next_index_to_yield]
245
+ is_complete = (next_index_to_yield == total_chunks - 1)
246
+ yield (next_index_to_yield, chunk_result, is_complete, total_chunks)
247
+ next_index_to_yield += 1
248
+
249
+ except Exception as e:
250
+ raise gr.Error(f"Failed to process chunk {chunk_index + 1}: {str(e)}")