IMGVLM

Sleeping

App Files Files Community

khushalcodiste commited on Mar 24

Commit

bd23640

1 Parent(s): 0e79077

feat(added langfuse ):

Browse files

Files changed (2) hide show

package.json +3 -0
server.js +100 -9

package.json CHANGED Viewed

@@ -6,6 +6,9 @@
     "start": "node server.js"
   },
   "dependencies": {
     "@huggingface/transformers": "next",
     "express": "^4.21.0",
     "onnxruntime-node": "^1.21.0",

     "start": "node server.js"
   },
   "dependencies": {
+    "@langfuse/otel": "^5.0.1",
+    "@langfuse/tracing": "^5.0.1",
+    "@opentelemetry/sdk-node": "^0.206.0",
     "@huggingface/transformers": "next",
     "express": "^4.21.0",
     "onnxruntime-node": "^1.21.0",

server.js CHANGED Viewed

@@ -5,15 +5,24 @@ import {
   Qwen3_5ForConditionalGeneration,
 } from "@huggingface/transformers";
 import crypto from "crypto";
 const app = express();
 const PORT = 7860;
 const MODEL_ID = "huggingworld/Qwen3.5-0.8B-ONNX";
 const API_KEY = process.env.API_KEY;
 let model = null;
 let processor = null;
 let inferenceQueue = Promise.resolve();
 function log(level, event, meta = {}) {
   const payload = {
@@ -47,6 +56,55 @@ async function loadModel() {
   });
 }
 async function runTextInference(prompt, maxTokens) {
   const conversation = [
     {
@@ -213,14 +271,35 @@ app.post("/prompt", requireApiKey, express.json(), async (req, res) => {
   }
   try {
-    const start = Date.now();
-    const response = await queueTextInference(prompt, maxTokens);
-    log("info", "prompt_completed", {
-      request_id: req.requestId,
-      duration_ms: Date.now() - start,
-      response_chars: response?.length ?? 0,
     });
-    res.json({ response });
   } catch (err) {
     log("error", "prompt_failed", {
       request_id: req.requestId,
@@ -231,12 +310,24 @@ app.post("/prompt", requireApiKey, express.json(), async (req, res) => {
   }
 });
-loadModel().then(() => {
-  app.listen(PORT, "0.0.0.0", () => {
     log("info", "server_started", {
       host: "0.0.0.0",
       port: PORT,
       model_id: MODEL_ID,
     });
   });
 });

   Qwen3_5ForConditionalGeneration,
 } from "@huggingface/transformers";
 import crypto from "crypto";
+import { NodeSDK } from "@opentelemetry/sdk-node";
+import { LangfuseSpanProcessor } from "@langfuse/otel";
+import { startActiveObservation } from "@langfuse/tracing";
 const app = express();
 const PORT = 7860;
 const MODEL_ID = "huggingworld/Qwen3.5-0.8B-ONNX";
 const API_KEY = process.env.API_KEY;
+const LANGFUSE_PUBLIC_KEY = process.env.LANGFUSE_PUBLIC_KEY;
+const LANGFUSE_SECRET_KEY = process.env.LANGFUSE_SECRET_KEY;
+const LANGFUSE_BASE_URL = process.env.LANGFUSE_BASE_URL || "https://cloud.langfuse.com";
+const LANGFUSE_ENV = process.env.LANGFUSE_ENV || process.env.NODE_ENV || "development";
+const LANGFUSE_ENABLED = Boolean(LANGFUSE_PUBLIC_KEY && LANGFUSE_SECRET_KEY);
 let model = null;
 let processor = null;
 let inferenceQueue = Promise.resolve();
+let telemetrySdk = null;
 function log(level, event, meta = {}) {
   const payload = {
   });
 }
+async function setupTracing() {
+  if (!LANGFUSE_ENABLED) {
+    log("info", "langfuse_disabled", {
+      reason: "missing_langfuse_keys",
+    });
+    return;
+  }
+  const spanProcessor = new LangfuseSpanProcessor({
+    publicKey: LANGFUSE_PUBLIC_KEY,
+    secretKey: LANGFUSE_SECRET_KEY,
+    baseUrl: LANGFUSE_BASE_URL,
+    environment: LANGFUSE_ENV,
+  });
+  telemetrySdk = new NodeSDK({
+    spanProcessors: [spanProcessor],
+  });
+  await telemetrySdk.start();
+  log("info", "langfuse_enabled", {
+    base_url: LANGFUSE_BASE_URL,
+    environment: LANGFUSE_ENV,
+  });
+}
+async function withPromptTrace(req, prompt, maxTokens, handler) {
+  if (!LANGFUSE_ENABLED) {
+    return handler();
+  }
+  return startActiveObservation(
+    "http.prompt",
+    async (span) => {
+      span.update({
+        input: { prompt, max_tokens: maxTokens },
+        metadata: {
+          request_id: req.requestId,
+          method: req.method,
+          path: req.originalUrl,
+        },
+      });
+      return handler(span);
+    },
+    { endOnExit: true },
+  );
+}
 async function runTextInference(prompt, maxTokens) {
   const conversation = [
     {
   }
   try {
+    await withPromptTrace(req, prompt, maxTokens, async (span) => {
+      const generation = span?.startObservation(
+        "qwen_text_generation",
+        {
+          model: MODEL_ID,
+          input: prompt,
+          modelParameters: { max_new_tokens: maxTokens, do_sample: 0 },
+        },
+        { asType: "generation" },
+      );
+      const start = Date.now();
+      const response = await queueTextInference(prompt, maxTokens);
+      const duration = Date.now() - start;
+      generation?.update({ output: response }).end();
+      span?.update({
+        output: { response_chars: response?.length ?? 0 },
+        metadata: { duration_ms: duration },
+      });
+      log("info", "prompt_completed", {
+        request_id: req.requestId,
+        duration_ms: duration,
+        response_chars: response?.length ?? 0,
+      });
+      res.json({ response });
     });
   } catch (err) {
     log("error", "prompt_failed", {
       request_id: req.requestId,
   }
 });
+Promise.all([setupTracing(), loadModel()]).then(() => {
+  app.listen(PORT, "0.0.0.0", async () => {
     log("info", "server_started", {
       host: "0.0.0.0",
       port: PORT,
       model_id: MODEL_ID,
+      langfuse_enabled: LANGFUSE_ENABLED,
     });
+    const shutdown = async (signal) => {
+      log("info", "shutdown_started", { signal });
+      if (telemetrySdk) {
+        await telemetrySdk.shutdown();
+      }
+      process.exit(0);
+    };
+    process.on("SIGINT", () => shutdown("SIGINT"));
+    process.on("SIGTERM", () => shutdown("SIGTERM"));
   });
 });