Spaces:
Sleeping
Sleeping
fix: route video files through async polling to restore FER display
Browse filesSplit processing by file type:
- Video β /api/transcribe-diarize polling β full FER data
- Audio β /api/transcribe-stream SSE β live tokens
- web/src/app/studio/page.tsx +89 -69
web/src/app/studio/page.tsx
CHANGED
|
@@ -687,8 +687,8 @@ function StudioContent() {
|
|
| 687 |
}, [sessionId])
|
| 688 |
|
| 689 |
// Automatic processing for pending sessions.
|
| 690 |
-
//
|
| 691 |
-
//
|
| 692 |
useEffect(() => {
|
| 693 |
if (!session || processingRef.current || processError) return
|
| 694 |
|
|
@@ -702,79 +702,99 @@ function StudioContent() {
|
|
| 702 |
const formData = new FormData()
|
| 703 |
formData.append("audio", session.file!, session.filename)
|
| 704 |
|
| 705 |
-
|
| 706 |
-
|
| 707 |
-
|
| 708 |
-
|
| 709 |
-
|
| 710 |
-
|
| 711 |
-
|
| 712 |
-
|
| 713 |
-
|
| 714 |
-
|
| 715 |
-
|
| 716 |
-
|
| 717 |
-
|
| 718 |
-
|
| 719 |
-
|
| 720 |
-
|
| 721 |
-
|
| 722 |
-
|
| 723 |
-
|
| 724 |
-
|
| 725 |
-
|
| 726 |
-
|
| 727 |
-
|
| 728 |
-
|
| 729 |
-
for (const line of lines) {
|
| 730 |
-
if (!line.startsWith("data: ")) continue
|
| 731 |
-
try {
|
| 732 |
-
const payload = JSON.parse(line.slice(6))
|
| 733 |
-
if (payload.token) {
|
| 734 |
-
fullText += payload.token
|
| 735 |
-
setStreamingText(fullText)
|
| 736 |
-
}
|
| 737 |
-
if (payload.done) {
|
| 738 |
-
fullText = payload.transcription ?? fullText
|
| 739 |
-
setStreamingText(fullText)
|
| 740 |
}
|
| 741 |
-
|
| 742 |
-
|
| 743 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 744 |
}
|
| 745 |
-
}
|
| 746 |
-
|
| 747 |
-
// Get audio duration from the media element
|
| 748 |
-
const mediaDuration = mediaRef.current?.duration || 0
|
| 749 |
|
| 750 |
-
|
| 751 |
-
|
| 752 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 753 |
|
| 754 |
-
|
| 755 |
-
|
| 756 |
-
|
| 757 |
-
|
| 758 |
-
|
| 759 |
-
|
| 760 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 761 |
text: fullText.trim(),
|
| 762 |
-
|
| 763 |
-
|
| 764 |
-
|
| 765 |
-
|
| 766 |
-
|
| 767 |
-
|
| 768 |
-
|
| 769 |
-
|
| 770 |
-
|
| 771 |
-
updateSession(session.id, data)
|
| 772 |
-
const updated = getSession(session.id)
|
| 773 |
-
setSession(updated)
|
| 774 |
-
if (updated?.data.segments && updated.data.segments.length > 0) {
|
| 775 |
-
setActiveId(updated.data.segments[0].id)
|
| 776 |
}
|
| 777 |
-
setStreamingText(null)
|
| 778 |
} catch (e) {
|
| 779 |
processingRef.current = false
|
| 780 |
setProcessError(e instanceof Error ? e.message : "Request failed")
|
|
|
|
| 687 |
}, [sessionId])
|
| 688 |
|
| 689 |
// Automatic processing for pending sessions.
|
| 690 |
+
// Video files β async job polling via /api/transcribe-diarize (returns full FER data).
|
| 691 |
+
// Audio files β Modal SSE via /api/transcribe-stream (fast token streaming).
|
| 692 |
useEffect(() => {
|
| 693 |
if (!session || processingRef.current || processError) return
|
| 694 |
|
|
|
|
| 702 |
const formData = new FormData()
|
| 703 |
formData.append("audio", session.file!, session.filename)
|
| 704 |
|
| 705 |
+
if (isVideoFile(session.filename)) {
|
| 706 |
+
// ββ Video: async polling β returns full FER + diarization ββββββββββ
|
| 707 |
+
const submitRes = await fetch("/api/transcribe-diarize", {
|
| 708 |
+
method: "POST",
|
| 709 |
+
body: formData,
|
| 710 |
+
})
|
| 711 |
+
if (!submitRes.ok) {
|
| 712 |
+
const errData = await submitRes.json().catch(() => ({}))
|
| 713 |
+
throw new Error(errData.error ?? "Submit failed")
|
| 714 |
+
}
|
| 715 |
+
const submitJson = await submitRes.json() as { job_id?: string } & Partial<DiarizeResult>
|
| 716 |
+
let data: DiarizeResult
|
| 717 |
+
if (submitJson.job_id) {
|
| 718 |
+
const job_id = submitJson.job_id
|
| 719 |
+
data = await new Promise<DiarizeResult>((resolve, reject) => {
|
| 720 |
+
const tick = async () => {
|
| 721 |
+
try {
|
| 722 |
+
const r = await fetch(`/api/job/${job_id}`)
|
| 723 |
+
const j = await r.json() as { status: string; data?: DiarizeResult; error?: string }
|
| 724 |
+
if (j.status === "done" && j.data) resolve(j.data)
|
| 725 |
+
else if (j.status === "error") reject(new Error(j.error ?? "Processing failed"))
|
| 726 |
+
else setTimeout(tick, 3000)
|
| 727 |
+
} catch (e) { reject(e) }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 728 |
}
|
| 729 |
+
setTimeout(tick, 3000)
|
| 730 |
+
})
|
| 731 |
+
} else {
|
| 732 |
+
data = submitJson as DiarizeResult
|
| 733 |
+
}
|
| 734 |
+
updateSession(session.id, data)
|
| 735 |
+
const updated = getSession(session.id)
|
| 736 |
+
setSession(updated)
|
| 737 |
+
if (updated?.data.segments && updated.data.segments.length > 0) {
|
| 738 |
+
setActiveId(updated.data.segments[0].id)
|
| 739 |
+
}
|
| 740 |
+
setStreamingText(null)
|
| 741 |
+
} else {
|
| 742 |
+
// ββ Audio: Modal SSE β live token streaming βββββββββββββββββββββββ
|
| 743 |
+
const res = await fetch("/api/transcribe-stream", {
|
| 744 |
+
method: "POST",
|
| 745 |
+
body: formData,
|
| 746 |
+
})
|
| 747 |
+
if (!res.ok) {
|
| 748 |
+
const errData = await res.json().catch(() => ({}))
|
| 749 |
+
throw new Error(errData.error ?? "Transcription failed")
|
| 750 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 751 |
|
| 752 |
+
// Consume SSE stream
|
| 753 |
+
const reader = res.body!.getReader()
|
| 754 |
+
const decoder = new TextDecoder()
|
| 755 |
+
let fullText = ""
|
| 756 |
+
let buffer = ""
|
| 757 |
+
|
| 758 |
+
while (true) {
|
| 759 |
+
const { done, value } = await reader.read()
|
| 760 |
+
if (done) break
|
| 761 |
+
buffer += decoder.decode(value, { stream: true })
|
| 762 |
+
const lines = buffer.split("\n")
|
| 763 |
+
buffer = lines.pop() ?? ""
|
| 764 |
+
for (const line of lines) {
|
| 765 |
+
if (!line.startsWith("data: ")) continue
|
| 766 |
+
try {
|
| 767 |
+
const payload = JSON.parse(line.slice(6))
|
| 768 |
+
if (payload.token) { fullText += payload.token; setStreamingText(fullText) }
|
| 769 |
+
if (payload.done) { fullText = payload.transcription ?? fullText; setStreamingText(fullText) }
|
| 770 |
+
} catch { /* skip malformed SSE lines */ }
|
| 771 |
+
}
|
| 772 |
+
}
|
| 773 |
|
| 774 |
+
const mediaDuration = mediaRef.current?.duration || 0
|
| 775 |
+
const firstTagMatch = fullText.match(/\[([^\]]+)\]/)
|
| 776 |
+
const firstTag = firstTagMatch ? getTagEntry(firstTagMatch[1]) : null
|
| 777 |
+
const data: DiarizeResult = {
|
| 778 |
+
segments: fullText.trim() ? [{
|
| 779 |
+
id: 1, speaker: "SPEAKER_00",
|
| 780 |
+
start: 0, end: mediaDuration || 30,
|
| 781 |
+
text: fullText.trim(),
|
| 782 |
+
emotion: firstTag?.emotion ?? "Neutral",
|
| 783 |
+
valence: firstTag?.valence ?? 0,
|
| 784 |
+
arousal: firstTag?.arousal ?? 0,
|
| 785 |
+
}] : [],
|
| 786 |
+
duration: mediaDuration || 30,
|
| 787 |
text: fullText.trim(),
|
| 788 |
+
filename: session.filename,
|
| 789 |
+
}
|
| 790 |
+
updateSession(session.id, data)
|
| 791 |
+
const updated = getSession(session.id)
|
| 792 |
+
setSession(updated)
|
| 793 |
+
if (updated?.data.segments && updated.data.segments.length > 0) {
|
| 794 |
+
setActiveId(updated.data.segments[0].id)
|
| 795 |
+
}
|
| 796 |
+
setStreamingText(null)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 797 |
}
|
|
|
|
| 798 |
} catch (e) {
|
| 799 |
processingRef.current = false
|
| 800 |
setProcessError(e instanceof Error ? e.message : "Request failed")
|