Lior-0618 commited on
Commit
651926c
Β·
1 Parent(s): 63fe495

fix: route video files through async polling to restore FER display

Browse files

Split processing by file type:
- Video β†’ /api/transcribe-diarize polling β†’ full FER data
- Audio β†’ /api/transcribe-stream SSE β†’ live tokens

Files changed (1) hide show
  1. web/src/app/studio/page.tsx +89 -69
web/src/app/studio/page.tsx CHANGED
@@ -687,8 +687,8 @@ function StudioContent() {
687
  }, [sessionId])
688
 
689
  // Automatic processing for pending sessions.
690
- // Uses Modal streaming API: tokens arrive via SSE for live display,
691
- // then the full transcription is converted to a DiarizeResult.
692
  useEffect(() => {
693
  if (!session || processingRef.current || processError) return
694
 
@@ -702,79 +702,99 @@ function StudioContent() {
702
  const formData = new FormData()
703
  formData.append("audio", session.file!, session.filename)
704
 
705
- const res = await fetch("/api/transcribe-stream", {
706
- method: "POST",
707
- body: formData,
708
- })
709
-
710
- if (!res.ok) {
711
- const errData = await res.json().catch(() => ({}))
712
- throw new Error(errData.error ?? "Transcription failed")
713
- }
714
-
715
- // Consume SSE stream
716
- const reader = res.body!.getReader()
717
- const decoder = new TextDecoder()
718
- let fullText = ""
719
- let buffer = ""
720
-
721
- while (true) {
722
- const { done, value } = await reader.read()
723
- if (done) break
724
-
725
- buffer += decoder.decode(value, { stream: true })
726
- const lines = buffer.split("\n")
727
- buffer = lines.pop() ?? ""
728
-
729
- for (const line of lines) {
730
- if (!line.startsWith("data: ")) continue
731
- try {
732
- const payload = JSON.parse(line.slice(6))
733
- if (payload.token) {
734
- fullText += payload.token
735
- setStreamingText(fullText)
736
- }
737
- if (payload.done) {
738
- fullText = payload.transcription ?? fullText
739
- setStreamingText(fullText)
740
  }
741
- } catch {
742
- // skip malformed SSE lines
743
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
744
  }
745
- }
746
-
747
- // Get audio duration from the media element
748
- const mediaDuration = mediaRef.current?.duration || 0
749
 
750
- // Derive emotion from the first bracket tag in transcription
751
- const firstTagMatch = fullText.match(/\[([^\]]+)\]/)
752
- const firstTag = firstTagMatch ? getTagEntry(firstTagMatch[1]) : null
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
753
 
754
- // Build DiarizeResult from plain transcription
755
- const data: DiarizeResult = {
756
- segments: fullText.trim() ? [{
757
- id: 1,
758
- speaker: "SPEAKER_00",
759
- start: 0,
760
- end: mediaDuration || 30,
 
 
 
 
 
 
761
  text: fullText.trim(),
762
- emotion: firstTag?.emotion ?? "Neutral",
763
- valence: firstTag?.valence ?? 0,
764
- arousal: firstTag?.arousal ?? 0,
765
- }] : [],
766
- duration: mediaDuration || 30,
767
- text: fullText.trim(),
768
- filename: session.filename,
769
- }
770
-
771
- updateSession(session.id, data)
772
- const updated = getSession(session.id)
773
- setSession(updated)
774
- if (updated?.data.segments && updated.data.segments.length > 0) {
775
- setActiveId(updated.data.segments[0].id)
776
  }
777
- setStreamingText(null)
778
  } catch (e) {
779
  processingRef.current = false
780
  setProcessError(e instanceof Error ? e.message : "Request failed")
 
687
  }, [sessionId])
688
 
689
  // Automatic processing for pending sessions.
690
+ // Video files β†’ async job polling via /api/transcribe-diarize (returns full FER data).
691
+ // Audio files β†’ Modal SSE via /api/transcribe-stream (fast token streaming).
692
  useEffect(() => {
693
  if (!session || processingRef.current || processError) return
694
 
 
702
  const formData = new FormData()
703
  formData.append("audio", session.file!, session.filename)
704
 
705
+ if (isVideoFile(session.filename)) {
706
+ // ── Video: async polling β†’ returns full FER + diarization ──────────
707
+ const submitRes = await fetch("/api/transcribe-diarize", {
708
+ method: "POST",
709
+ body: formData,
710
+ })
711
+ if (!submitRes.ok) {
712
+ const errData = await submitRes.json().catch(() => ({}))
713
+ throw new Error(errData.error ?? "Submit failed")
714
+ }
715
+ const submitJson = await submitRes.json() as { job_id?: string } & Partial<DiarizeResult>
716
+ let data: DiarizeResult
717
+ if (submitJson.job_id) {
718
+ const job_id = submitJson.job_id
719
+ data = await new Promise<DiarizeResult>((resolve, reject) => {
720
+ const tick = async () => {
721
+ try {
722
+ const r = await fetch(`/api/job/${job_id}`)
723
+ const j = await r.json() as { status: string; data?: DiarizeResult; error?: string }
724
+ if (j.status === "done" && j.data) resolve(j.data)
725
+ else if (j.status === "error") reject(new Error(j.error ?? "Processing failed"))
726
+ else setTimeout(tick, 3000)
727
+ } catch (e) { reject(e) }
 
 
 
 
 
 
 
 
 
 
 
 
728
  }
729
+ setTimeout(tick, 3000)
730
+ })
731
+ } else {
732
+ data = submitJson as DiarizeResult
733
+ }
734
+ updateSession(session.id, data)
735
+ const updated = getSession(session.id)
736
+ setSession(updated)
737
+ if (updated?.data.segments && updated.data.segments.length > 0) {
738
+ setActiveId(updated.data.segments[0].id)
739
+ }
740
+ setStreamingText(null)
741
+ } else {
742
+ // ── Audio: Modal SSE β†’ live token streaming ───────────────────────
743
+ const res = await fetch("/api/transcribe-stream", {
744
+ method: "POST",
745
+ body: formData,
746
+ })
747
+ if (!res.ok) {
748
+ const errData = await res.json().catch(() => ({}))
749
+ throw new Error(errData.error ?? "Transcription failed")
750
  }
 
 
 
 
751
 
752
+ // Consume SSE stream
753
+ const reader = res.body!.getReader()
754
+ const decoder = new TextDecoder()
755
+ let fullText = ""
756
+ let buffer = ""
757
+
758
+ while (true) {
759
+ const { done, value } = await reader.read()
760
+ if (done) break
761
+ buffer += decoder.decode(value, { stream: true })
762
+ const lines = buffer.split("\n")
763
+ buffer = lines.pop() ?? ""
764
+ for (const line of lines) {
765
+ if (!line.startsWith("data: ")) continue
766
+ try {
767
+ const payload = JSON.parse(line.slice(6))
768
+ if (payload.token) { fullText += payload.token; setStreamingText(fullText) }
769
+ if (payload.done) { fullText = payload.transcription ?? fullText; setStreamingText(fullText) }
770
+ } catch { /* skip malformed SSE lines */ }
771
+ }
772
+ }
773
 
774
+ const mediaDuration = mediaRef.current?.duration || 0
775
+ const firstTagMatch = fullText.match(/\[([^\]]+)\]/)
776
+ const firstTag = firstTagMatch ? getTagEntry(firstTagMatch[1]) : null
777
+ const data: DiarizeResult = {
778
+ segments: fullText.trim() ? [{
779
+ id: 1, speaker: "SPEAKER_00",
780
+ start: 0, end: mediaDuration || 30,
781
+ text: fullText.trim(),
782
+ emotion: firstTag?.emotion ?? "Neutral",
783
+ valence: firstTag?.valence ?? 0,
784
+ arousal: firstTag?.arousal ?? 0,
785
+ }] : [],
786
+ duration: mediaDuration || 30,
787
  text: fullText.trim(),
788
+ filename: session.filename,
789
+ }
790
+ updateSession(session.id, data)
791
+ const updated = getSession(session.id)
792
+ setSession(updated)
793
+ if (updated?.data.segments && updated.data.segments.length > 0) {
794
+ setActiveId(updated.data.segments[0].id)
795
+ }
796
+ setStreamingText(null)
 
 
 
 
 
797
  }
 
798
  } catch (e) {
799
  processingRef.current = false
800
  setProcessError(e instanceof Error ? e.message : "Request failed")