Spaces:

mistral-hackaton-2026
/

ethos

Running

App Files Files Community

Lior-0618 commited on 9 days ago

Commit

cc028c9

1 Parent(s): a01a078

fix: async job polling for timeout + FER ONNX ort>=1.19.0

Browse files

Files changed (3) hide show

api/requirements.txt +2 -2
proxy/index.js +70 -49
web/src/app/studio/page.tsx +42 -11

api/requirements.txt CHANGED Viewed

@@ -13,6 +13,6 @@ accelerate>=1.0.0
 mistral-common
 safetensors
 sentencepiece
-# FER inference
-onnxruntime>=1.16.0
 opencv-python-headless>=4.8.0

 mistral-common
 safetensors
 sentencepiece
+# FER inference — model uses ONNX IR v10, requires ort>=1.19.0
+onnxruntime>=1.19.0
 opencv-python-headless>=4.8.0

proxy/index.js CHANGED Viewed

@@ -1,6 +1,10 @@
 /**
  * Server layer: proxy client requests to Model layer (voxtral-server).
  * Port default 3000, Model layer default http://127.0.0.1:8000
  */
 import express from "express";
 import multer from "multer";
@@ -9,8 +13,8 @@ import cors from "cors";
 const PORT = Number(process.env.PORT) || 3000;
 const MODEL_URL = (process.env.MODEL_URL || "http://127.0.0.1:8000").replace(/\/$/, "");
 const MAX_UPLOAD_BYTES = 100 * 1024 * 1024; // 100 MB
-const TRANSCRIBE_TIMEOUT_MS = 30 * 60 * 1000;   // 30 min (CPU inference is slow)
-const DIARIZE_TIMEOUT_MS   = 60 * 60 * 1000;    // 60 min (CPU: ~50s audio/min)
 const upload = multer({
   storage: multer.memoryStorage(),
@@ -20,12 +24,7 @@ const upload = multer({
 const app = express();
 app.use(cors({
-  origin: [
-    "http://localhost:3030",
-    "http://127.0.0.1:3030",
-    "http://localhost:3000",
-    "http://127.0.0.1:3000",
-  ],
   methods: ["GET", "POST", "OPTIONS"],
   allowedHeaders: ["Content-Type"],
 }));
@@ -38,6 +37,18 @@ app.use((req, res, next) => {
   next();
 });
 // ─── /health ──────────────────────────────────────────────────────────────────
 app.get("/health", async (req, res) => {
   try {
@@ -57,36 +68,25 @@ app.get("/health", async (req, res) => {
   }
 });
-// ─── shared proxy helper ──────────────────────────────────────────────────────
-async function proxyToModel(req, res, modelPath, timeoutMs) {
   const reqId = `req-${Date.now().toString(36)}`;
   const start = Date.now();
-  if (!req.file) {
-    return res.status(400).json({ error: "Upload an audio file (form field: audio)" });
-  }
-  const { buffer, size, originalname } = req.file;
-  if (size > MAX_UPLOAD_BYTES) {
-    return res.status(400).json({
-      error: `File size exceeds ${MAX_UPLOAD_BYTES / 1024 / 1024}MB limit`,
-    });
-  }
   const form = new FormData();
   form.append("audio", new Blob([buffer]), originalname || "audio");
-  // Forward num_speakers query param if present
-  const numSpeakers = req.query.num_speakers;
   const url = numSpeakers
-    ? `${MODEL_URL}${modelPath}?num_speakers=${encodeURIComponent(numSpeakers)}`
-    : `${MODEL_URL}${modelPath}`;
   const controller = new AbortController();
-  const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
   try {
-    console.log(`[server] ${reqId} → ${url} file=${originalname} size=${size}`);
     const r = await fetch(url, { method: "POST", body: form, signal: controller.signal });
     clearTimeout(timeoutId);
@@ -96,27 +96,58 @@ async function proxyToModel(req, res, modelPath, timeoutMs) {
     if (!r.ok) {
       const errMsg = data.detail || data.error || "Failed";
-      console.error(`[server] ${reqId} model error ${r.status}: ${errMsg} | raw=${rawText.slice(0, 300)}`);
-      return res.status(r.status >= 500 ? 502 : r.status).json({
-        error: typeof errMsg === "string" ? errMsg : "Model error",
-      });
     }
-    console.log(`[server] ${reqId} ok in ${Date.now() - start}ms`);
-    res.json(data);
   } catch (err) {
     clearTimeout(timeoutId);
     const isAbort = err.name === "AbortError";
-    console.error(`[server] ${reqId} ${isAbort ? "timeout" : "error"} after ${Date.now() - start}ms:`, err.message);
-    res.status(isAbort ? 504 : 502).json({
       error: isAbort
-        ? `Request timeout (>${timeoutMs / 60000} min); try shorter audio`
         : "Cannot reach Model layer; ensure voxtral-server is running",
     });
   }
 }
-// ─── /api/debug-inference (proxies to model /debug-inference) ────────────────
 app.get("/api/debug-inference", async (req, res) => {
   try {
     const r = await fetch(`${MODEL_URL}/debug-inference`, { signal: AbortSignal.timeout(60000) });
@@ -127,20 +158,10 @@ app.get("/api/debug-inference", async (req, res) => {
   }
 });
-// ─── /api/speech-to-text ──────────────────────────────────────────────────────
-app.post("/api/speech-to-text", upload.single("audio"), (req, res) => {
-  return proxyToModel(req, res, "/transcribe", TRANSCRIBE_TIMEOUT_MS);
-});
-// ─── /api/transcribe-diarize ──────────────────────────────────────────────────
-app.post("/api/transcribe-diarize", upload.single("audio"), (req, res) => {
-  return proxyToModel(req, res, "/transcribe-diarize", DIARIZE_TIMEOUT_MS);
-});
 // ─── start ────────────────────────────────────────────────────────────────────
 app.listen(PORT, () => {
   console.log(`[server] Server layer listening on http://0.0.0.0:${PORT}`);
   console.log("[server] Model layer URL:", MODEL_URL);
-  console.log("[server] POST /api/speech-to-text        → batch transcription");
-  console.log("[server] POST /api/transcribe-diarize    → transcription + speaker diarization");
 });

 /**
  * Server layer: proxy client requests to Model layer (voxtral-server).
  * Port default 3000, Model layer default http://127.0.0.1:8000
+ *
+ * POST /api/transcribe-diarize → returns {job_id} immediately (202)
+ * GET  /api/job/:id            → returns {status, data?, error?}
+ * Polling avoids HF Spaces ~3 min proxy timeout during long CPU inference.
  */
 import express from "express";
 import multer from "multer";
 const PORT = Number(process.env.PORT) || 3000;
 const MODEL_URL = (process.env.MODEL_URL || "http://127.0.0.1:8000").replace(/\/$/, "");
 const MAX_UPLOAD_BYTES = 100 * 1024 * 1024; // 100 MB
+const DIARIZE_TIMEOUT_MS = 60 * 60 * 1000;  // 60 min (CPU: ~50s/min of audio)
+const JOB_TTL_MS = 30 * 60 * 1000;          // keep completed jobs 30 min then evict
 const upload = multer({
   storage: multer.memoryStorage(),
 const app = express();
 app.use(cors({
+  origin: "*",
   methods: ["GET", "POST", "OPTIONS"],
   allowedHeaders: ["Content-Type"],
 }));
   next();
 });
+// ─── Job store ────────────────────────────────────────────────────────────────
+/** @type {Map<string, {status:"pending"|"done"|"error", data?:object, error?:string, ts:number}>} */
+const jobs = new Map();
+function evictOldJobs() {
+  const cutoff = Date.now() - JOB_TTL_MS;
+  for (const [id, job] of jobs) {
+    if (job.status !== "pending" && job.ts < cutoff) jobs.delete(id);
+  }
+}
+setInterval(evictOldJobs, 5 * 60 * 1000);
 // ─── /health ──────────────────────────────────────────────────────────────────
 app.get("/health", async (req, res) => {
   try {
   }
 });
+// ─── Background job processor ─────────────────────────────────────────────────
+async function runDiarizeJob(jobId, file, query) {
   const reqId = `req-${Date.now().toString(36)}`;
   const start = Date.now();
+  const { buffer, size, originalname } = file;
   const form = new FormData();
   form.append("audio", new Blob([buffer]), originalname || "audio");
+  const numSpeakers = query?.num_speakers;
   const url = numSpeakers
+    ? `${MODEL_URL}/transcribe-diarize?num_speakers=${encodeURIComponent(numSpeakers)}`
+    : `${MODEL_URL}/transcribe-diarize`;
   const controller = new AbortController();
+  const timeoutId = setTimeout(() => controller.abort(), DIARIZE_TIMEOUT_MS);
   try {
+    console.log(`[server] ${reqId} job=${jobId} → ${url} file=${originalname} size=${size}`);
     const r = await fetch(url, { method: "POST", body: form, signal: controller.signal });
     clearTimeout(timeoutId);
     if (!r.ok) {
       const errMsg = data.detail || data.error || "Failed";
+      console.error(`[server] ${reqId} model error ${r.status}: ${errMsg}`);
+      jobs.set(jobId, { status: "error", error: typeof errMsg === "string" ? errMsg : "Model error", ts: Date.now() });
+      return;
     }
+    console.log(`[server] ${reqId} job=${jobId} done in ${Date.now() - start}ms`);
+    jobs.set(jobId, { status: "done", data, ts: Date.now() });
   } catch (err) {
     clearTimeout(timeoutId);
     const isAbort = err.name === "AbortError";
+    console.error(`[server] ${reqId} job=${jobId} ${isAbort ? "timeout" : "error"} after ${Date.now() - start}ms:`, err.message);
+    jobs.set(jobId, {
+      status: "error",
       error: isAbort
+        ? `Request timeout (>60 min); try shorter audio`
         : "Cannot reach Model layer; ensure voxtral-server is running",
+      ts: Date.now(),
     });
   }
 }
+// ─── /api/job/:id — poll for job result ───────────────────────────────────────
+app.get("/api/job/:id", (req, res) => {
+  const job = jobs.get(req.params.id);
+  if (!job) return res.status(404).json({ error: "Job not found or expired" });
+  if (job.status === "pending") return res.json({ status: "pending" });
+  if (job.status === "error") return res.status(200).json({ status: "error", error: job.error });
+  return res.json({ status: "done", data: job.data });
+});
+// ─── /api/transcribe-diarize — submit job, return immediately ─────────────────
+app.post("/api/transcribe-diarize", upload.single("audio"), (req, res) => {
+  if (!req.file) {
+    return res.status(400).json({ error: "Upload an audio file (form field: audio)" });
+  }
+  if (req.file.size > MAX_UPLOAD_BYTES) {
+    return res.status(400).json({ error: `File size exceeds ${MAX_UPLOAD_BYTES / 1024 / 1024}MB limit` });
+  }
+  const jobId = `job-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 6)}`;
+  jobs.set(jobId, { status: "pending", ts: Date.now() });
+  // Respond immediately — don't await
+  res.status(202).json({ job_id: jobId });
+  // Kick off background processing
+  runDiarizeJob(jobId, req.file, req.query).catch(err => {
+    jobs.set(jobId, { status: "error", error: err.message, ts: Date.now() });
+  });
+});
+// ─── /api/debug-inference ─────────────────────────────────────────────────────
 app.get("/api/debug-inference", async (req, res) => {
   try {
     const r = await fetch(`${MODEL_URL}/debug-inference`, { signal: AbortSignal.timeout(60000) });
   }
 });
 // ─── start ────────────────────────────────────────────────────────────────────
 app.listen(PORT, () => {
   console.log(`[server] Server layer listening on http://0.0.0.0:${PORT}`);
   console.log("[server] Model layer URL:", MODEL_URL);
+  console.log("[server] POST /api/transcribe-diarize  → submit async job (202 + job_id)");
+  console.log("[server] GET  /api/job/:id             → poll job status");
 });

web/src/app/studio/page.tsx CHANGED Viewed

@@ -687,41 +687,72 @@ function StudioContent() {
     }
   }, [sessionId])
-  // Automatic processing for pending sessions
   useEffect(() => {
     if (!session || processingRef.current || processError) return
-    // If we have a file but no segments, it's a pending session
     if (session.file && session.data.segments.length === 0) {
-      processingRef.current = true  // synchronous guard — prevents re-entry before state update commits
       const process = async () => {
         setIsProcessing(true)
         setProcessError(null)
         try {
           const formData = new FormData()
           formData.append("audio", session.file!, session.filename)
-          const res = await fetch(`${API_BASE}/api/transcribe-diarize`, {
             method: "POST",
             body: formData,
           })
-          if (!res.ok) {
-            const errData = await res.json().catch(() => ({}))
-            throw new Error(errData.error ?? "Processing failed")
           }
-          const data = await res.json() as DiarizeResult
-          updateSession(session.id, data)
-          // Re-fetch to update local state and trigger re-render
           const updated = getSession(session.id)
           setSession(updated)
           if (updated?.data.segments && updated.data.segments.length > 0) {
             setActiveId(updated.data.segments[0].id)
           }
         } catch (e) {
-          processingRef.current = false  // allow retry on error
           setProcessError(e instanceof Error ? e.message : "Request failed")
         } finally {
           setIsProcessing(false)

     }
   }, [sessionId])
+  // Automatic processing for pending sessions.
+  // Uses async job polling: POST returns {job_id} immediately, then GET /api/job/:id
+  // until done — avoids HF Spaces ~3 min proxy timeout during long CPU inference.
   useEffect(() => {
     if (!session || processingRef.current || processError) return
     if (session.file && session.data.segments.length === 0) {
+      processingRef.current = true
       const process = async () => {
         setIsProcessing(true)
         setProcessError(null)
         try {
+          // 1. Submit job — server responds immediately with job_id (202)
           const formData = new FormData()
           formData.append("audio", session.file!, session.filename)
+          const submitRes = await fetch(`${API_BASE}/api/transcribe-diarize`, {
             method: "POST",
             body: formData,
           })
+          if (!submitRes.ok) {
+            const errData = await submitRes.json().catch(() => ({}))
+            throw new Error(errData.error ?? "Submit failed")
           }
+          const { job_id } = await submitRes.json() as { job_id: string }
+          // 2. Poll until done (every 3s)
+          const POLL_INTERVAL = 3000
+          const MAX_POLLS = 60 * 20  // 60 min max
+          let polls = 0
+          const data = await new Promise<DiarizeResult>((resolve, reject) => {
+            const tick = async () => {
+              polls++
+              if (polls > MAX_POLLS) {
+                reject(new Error("Processing timed out after 60 minutes"))
+                return
+              }
+              try {
+                const pollRes = await fetch(`${API_BASE}/api/job/${job_id}`)
+                const pollData = await pollRes.json()
+                if (pollData.status === "done") {
+                  resolve(pollData.data as DiarizeResult)
+                } else if (pollData.status === "error") {
+                  reject(new Error(pollData.error ?? "Processing failed"))
+                } else {
+                  // still pending — keep polling
+                  setTimeout(tick, POLL_INTERVAL)
+                }
+              } catch (e) {
+                reject(e)
+              }
+            }
+            setTimeout(tick, POLL_INTERVAL)
+          })
+          updateSession(session.id, data)
           const updated = getSession(session.id)
           setSession(updated)
           if (updated?.data.segments && updated.data.segments.length > 0) {
             setActiveId(updated.data.segments[0].id)
           }
         } catch (e) {
+          processingRef.current = false
           setProcessError(e instanceof Error ? e.message : "Request failed")
         } finally {
           setIsProcessing(false)