Spaces:

vyluong
/

PoC_ASR_v5

Sleeping

colab-user commited on Feb 11

Commit

3e50eb1

1 Parent(s): 6099ac3

fix processor

Files changed (1) hide show

app/services/processor.py CHANGED Viewed

@@ -197,10 +197,31 @@ class Processor:
             raise ValueError("Empty audio")
         duration = len(y) / sr
-        # 3: Diarization
-        logger.info("Step 3: Running diarization...")
-        diarization: DiarizationResult = await DiarizationService.diarize_async(wav_path)
         diarization_segments = diarization.segments or []
         speakers = diarization.speakers or []
@@ -238,7 +259,7 @@ class Processor:
         speakers = list(speaker_map.values())
-        # 5. NORMALIZE ROLES
         speaker_duration = defaultdict(float)
         for seg in diarization_segments:
             speaker_duration[seg.speaker] += seg.end - seg.start
@@ -261,16 +282,7 @@ class Processor:
         logger.info(f"roles(mapped) = {roles}")
-        # 7: Transcribe segments after diarization
-        logger.info("Step 7: Running ASR with external VAD batch...")
-        asr_result = await TranscriptionService.transcribe_with_words_async(
-            audio_array=y,
-            model_name=model_name,
-            language=language,
-            vad_options=True
-        )
         text, raw_words = normalize_asr_result(asr_result)
         processed_segments: List[TranscriptSegment] = []

             raise ValueError("Empty audio")
         duration = len(y) / sr
+        # 3: Run diarization and ASR in parallel
+        logger.info("Step 3+7: Running diarization and ASR in parallel...")
+        diarization_task = asyncio.create_task(
+            DiarizationService.diarize_async(wav_path)
+        )
+        asr_task = asyncio.create_task(
+            TranscriptionService.transcribe_with_words_async(
+                audio_array=y,
+                model_name=model_name,
+                language=language,
+                vad_options=True
+            )
+        )
+        try:
+            diarization, asr_result = await asyncio.gather(
+                diarization_task,
+                asr_task
+            )
+        except Exception:
+            logger.exception("Parallel AI processing failed")
+            raise
         diarization_segments = diarization.segments or []
         speakers = diarization.speakers or []
         speakers = list(speaker_map.values())
+        # 5. Normalize roles
         speaker_duration = defaultdict(float)
         for seg in diarization_segments:
             speaker_duration[seg.speaker] += seg.end - seg.start
         logger.info(f"roles(mapped) = {roles}")
+        # 7: Normalize asr result
         text, raw_words = normalize_asr_result(asr_result)
         processed_segments: List[TranscriptSegment] = []