Spaces:
Running
Running
File size: 7,216 Bytes
e0fe7d5 cc028c9 e0fe7d5 fdef69c e0fe7d5 cc028c9 e0fe7d5 fdef69c cc028c9 fdef69c e0fe7d5 fdef69c e0fe7d5 cc028c9 fdef69c e0fe7d5 cc028c9 fdef69c cc028c9 e0fe7d5 fdef69c e0fe7d5 cc028c9 fdef69c cc028c9 fdef69c cc028c9 fdef69c cc028c9 fdef69c 4ee7e7e e0fe7d5 fdef69c cc028c9 e0fe7d5 fdef69c cc028c9 fdef69c cc028c9 fdef69c cc028c9 fdef69c cc028c9 fdef69c e0fe7d5 fdef69c cc028c9 fbb0f59 fdef69c e0fe7d5 cc028c9 e0fe7d5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 | /**
* Server layer: proxy client requests to Model layer (voxtral-server).
* Port default 3000, Model layer default http://127.0.0.1:8000
*
* POST /api/transcribe-diarize β returns {job_id} immediately (202)
* GET /api/job/:id β returns {status, data?, error?}
* Polling avoids HF Spaces ~3 min proxy timeout during long CPU inference.
*/
import express from "express";
import multer from "multer";
import cors from "cors";
const PORT = Number(process.env.PORT) || 3000;
const MODEL_URL = (process.env.MODEL_URL || "http://127.0.0.1:8000").replace(/\/$/, "");
const MAX_UPLOAD_BYTES = 100 * 1024 * 1024; // 100 MB
const DIARIZE_TIMEOUT_MS = 60 * 60 * 1000; // 60 min (CPU: ~50s/min of audio)
const JOB_TTL_MS = 30 * 60 * 1000; // keep completed jobs 30 min then evict
const upload = multer({
storage: multer.memoryStorage(),
limits: { fileSize: MAX_UPLOAD_BYTES },
});
const app = express();
app.use(cors({
origin: "*",
methods: ["GET", "POST", "OPTIONS"],
allowedHeaders: ["Content-Type"],
}));
app.use((req, res, next) => {
const start = Date.now();
res.on("finish", () => {
console.log("[server]", req.method, req.path, res.statusCode, `${Date.now() - start}ms`);
});
next();
});
// βββ Job store ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
/** @type {Map<string, {status:"pending"|"done"|"error", data?:object, error?:string, ts:number}>} */
const jobs = new Map();
function evictOldJobs() {
const cutoff = Date.now() - JOB_TTL_MS;
for (const [id, job] of jobs) {
if (job.status !== "pending" && job.ts < cutoff) jobs.delete(id);
}
}
setInterval(evictOldJobs, 5 * 60 * 1000);
// βββ /health ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
app.get("/health", async (req, res) => {
try {
const r = await fetch(`${MODEL_URL}/health`, { signal: AbortSignal.timeout(5000) });
const data = await r.json().catch(() => ({}));
if (!r.ok) {
return res.status(502).json({ ok: false, error: "Model layer unavailable", detail: data });
}
res.json({ ok: true, server: "ser-server", model: data });
} catch (err) {
console.error("[server] health check model:", err?.message || err);
res.status(502).json({
ok: false,
error: "Cannot reach Model layer; start model/voxtral-server first",
url: MODEL_URL,
});
}
});
// βββ Background job processor βββββββββββββββββββββββββββββββββββββββββββββββββ
async function runDiarizeJob(jobId, file, query) {
const reqId = `req-${Date.now().toString(36)}`;
const start = Date.now();
const { buffer, size, originalname } = file;
const form = new FormData();
form.append("audio", new Blob([buffer]), originalname || "audio");
const numSpeakers = query?.num_speakers;
const url = numSpeakers
? `${MODEL_URL}/transcribe-diarize?num_speakers=${encodeURIComponent(numSpeakers)}`
: `${MODEL_URL}/transcribe-diarize`;
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), DIARIZE_TIMEOUT_MS);
try {
console.log(`[server] ${reqId} job=${jobId} β ${url} file=${originalname} size=${size}`);
const r = await fetch(url, { method: "POST", body: form, signal: controller.signal });
clearTimeout(timeoutId);
const rawText = await r.text().catch(() => "");
let data = {};
try { data = JSON.parse(rawText); } catch {}
if (!r.ok) {
const errMsg = data.detail || data.error || "Failed";
console.error(`[server] ${reqId} model error ${r.status}: ${errMsg}`);
jobs.set(jobId, { status: "error", error: typeof errMsg === "string" ? errMsg : "Model error", ts: Date.now() });
return;
}
console.log(`[server] ${reqId} job=${jobId} done in ${Date.now() - start}ms`);
jobs.set(jobId, { status: "done", data, ts: Date.now() });
} catch (err) {
clearTimeout(timeoutId);
const isAbort = err.name === "AbortError";
console.error(`[server] ${reqId} job=${jobId} ${isAbort ? "timeout" : "error"} after ${Date.now() - start}ms:`, err.message);
jobs.set(jobId, {
status: "error",
error: isAbort
? `Request timeout (>60 min); try shorter audio`
: "Cannot reach Model layer; ensure voxtral-server is running",
ts: Date.now(),
});
}
}
// βββ /api/job/:id β poll for job result βββββββββββββββββββββββββββββββββββββββ
app.get("/api/job/:id", (req, res) => {
const job = jobs.get(req.params.id);
if (!job) return res.status(404).json({ error: "Job not found or expired" });
if (job.status === "pending") return res.json({ status: "pending" });
if (job.status === "error") return res.status(200).json({ status: "error", error: job.error });
return res.json({ status: "done", data: job.data });
});
// βββ /api/transcribe-diarize β submit job, return immediately βββββββββββββββββ
app.post("/api/transcribe-diarize", upload.single("audio"), (req, res) => {
if (!req.file) {
return res.status(400).json({ error: "Upload an audio file (form field: audio)" });
}
if (req.file.size > MAX_UPLOAD_BYTES) {
return res.status(400).json({ error: `File size exceeds ${MAX_UPLOAD_BYTES / 1024 / 1024}MB limit` });
}
const jobId = `job-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 6)}`;
jobs.set(jobId, { status: "pending", ts: Date.now() });
// Respond immediately β don't await
res.status(202).json({ job_id: jobId });
// Kick off background processing
runDiarizeJob(jobId, req.file, req.query).catch(err => {
jobs.set(jobId, { status: "error", error: err.message, ts: Date.now() });
});
});
// βββ /api/debug-inference βββββββββββββββββββββββββββββββββββββββββββββββββββββ
app.get("/api/debug-inference", async (req, res) => {
try {
const r = await fetch(`${MODEL_URL}/debug-inference`, { signal: AbortSignal.timeout(60000) });
const data = await r.json().catch(() => ({ error: "non-JSON response from model" }));
res.json(data);
} catch (err) {
res.status(502).json({ error: err.message });
}
});
// βββ start ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
app.listen(PORT, () => {
console.log(`[server] Server layer listening on http://0.0.0.0:${PORT}`);
console.log("[server] Model layer URL:", MODEL_URL);
console.log("[server] POST /api/transcribe-diarize β submit async job (202 + job_id)");
console.log("[server] GET /api/job/:id β poll job status");
});
|