Spaces:
Sleeping
Sleeping
Commit ·
e63ecc8
1
Parent(s): f1cddcd
fix: added
Browse files- Dockerfile +5 -12
- package.json +14 -0
- server.js +193 -0
Dockerfile
CHANGED
|
@@ -1,19 +1,12 @@
|
|
| 1 |
-
FROM
|
| 2 |
|
| 3 |
WORKDIR /app
|
| 4 |
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
git \
|
| 8 |
-
libgl1 \
|
| 9 |
-
libglib2.0-0 \
|
| 10 |
-
&& rm -rf /var/lib/apt/lists/*
|
| 11 |
|
| 12 |
-
COPY
|
| 13 |
-
RUN pip install --no-cache-dir -r requirements.txt
|
| 14 |
-
|
| 15 |
-
COPY app.py .
|
| 16 |
|
| 17 |
EXPOSE 7860
|
| 18 |
|
| 19 |
-
CMD ["
|
|
|
|
| 1 |
+
FROM node:20-slim
|
| 2 |
|
| 3 |
WORKDIR /app
|
| 4 |
|
| 5 |
+
COPY package.json .
|
| 6 |
+
RUN npm install
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
+
COPY server.js .
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
EXPOSE 7860
|
| 11 |
|
| 12 |
+
CMD ["node", "server.js"]
|
package.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "imgvlm",
|
| 3 |
+
"version": "1.0.0",
|
| 4 |
+
"type": "module",
|
| 5 |
+
"scripts": {
|
| 6 |
+
"start": "node server.js"
|
| 7 |
+
},
|
| 8 |
+
"dependencies": {
|
| 9 |
+
"@huggingface/transformers": "next",
|
| 10 |
+
"express": "^4.21.0",
|
| 11 |
+
"multer": "^1.4.5-lts.1",
|
| 12 |
+
"swagger-ui-express": "^5.0.0"
|
| 13 |
+
}
|
| 14 |
+
}
|
server.js
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import express from "express";
|
| 2 |
+
import multer from "multer";
|
| 3 |
+
import swaggerUi from "swagger-ui-express";
|
| 4 |
+
import {
|
| 5 |
+
AutoProcessor,
|
| 6 |
+
Qwen3_5ForConditionalGeneration,
|
| 7 |
+
RawImage,
|
| 8 |
+
} from "@huggingface/transformers";
|
| 9 |
+
|
| 10 |
+
const app = express();
|
| 11 |
+
const upload = multer({ storage: multer.memoryStorage() });
|
| 12 |
+
const PORT = 7860;
|
| 13 |
+
const MODEL_ID = "huggingworld/Qwen3.5-0.8B-ONNX";
|
| 14 |
+
|
| 15 |
+
let model = null;
|
| 16 |
+
let processor = null;
|
| 17 |
+
|
| 18 |
+
async function loadModel() {
|
| 19 |
+
console.log(`Loading model ${MODEL_ID}...`);
|
| 20 |
+
processor = await AutoProcessor.from_pretrained(MODEL_ID);
|
| 21 |
+
model = await Qwen3_5ForConditionalGeneration.from_pretrained(MODEL_ID, {
|
| 22 |
+
dtype: {
|
| 23 |
+
embed_tokens: "q4",
|
| 24 |
+
vision_encoder: "fp16",
|
| 25 |
+
decoder_model_merged: "q4",
|
| 26 |
+
},
|
| 27 |
+
});
|
| 28 |
+
console.log("Model loaded successfully.");
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
async function runInference(imageBuffer, prompt, maxTokens) {
|
| 32 |
+
const blob = new Blob([imageBuffer]);
|
| 33 |
+
const image = await (await RawImage.fromBlob(blob)).resize(448, 448);
|
| 34 |
+
|
| 35 |
+
const conversation = [
|
| 36 |
+
{
|
| 37 |
+
role: "user",
|
| 38 |
+
content: [
|
| 39 |
+
{ type: "image" },
|
| 40 |
+
{ type: "text", text: prompt },
|
| 41 |
+
],
|
| 42 |
+
},
|
| 43 |
+
];
|
| 44 |
+
|
| 45 |
+
const text = processor.apply_chat_template(conversation, {
|
| 46 |
+
add_generation_prompt: true,
|
| 47 |
+
});
|
| 48 |
+
|
| 49 |
+
const inputs = await processor(text, image);
|
| 50 |
+
const output = await model.generate({
|
| 51 |
+
...inputs,
|
| 52 |
+
max_new_tokens: maxTokens,
|
| 53 |
+
});
|
| 54 |
+
|
| 55 |
+
const promptLength = inputs.input_ids.dims.at(-1);
|
| 56 |
+
const decoded = processor.batch_decode(
|
| 57 |
+
output.slice(null, [promptLength, null]),
|
| 58 |
+
{ skip_special_tokens: true },
|
| 59 |
+
);
|
| 60 |
+
return decoded[0];
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
const swaggerDoc = {
|
| 64 |
+
openapi: "3.0.0",
|
| 65 |
+
info: {
|
| 66 |
+
title: "Qwen3.5-0.8B Vision API (ONNX)",
|
| 67 |
+
version: "1.0.0",
|
| 68 |
+
description: "Vision-language model API using Qwen3.5-0.8B ONNX with transformers.js",
|
| 69 |
+
},
|
| 70 |
+
paths: {
|
| 71 |
+
"/": {
|
| 72 |
+
get: {
|
| 73 |
+
summary: "Root",
|
| 74 |
+
responses: { 200: { description: "API status" } },
|
| 75 |
+
},
|
| 76 |
+
},
|
| 77 |
+
"/health": {
|
| 78 |
+
get: {
|
| 79 |
+
summary: "Health check",
|
| 80 |
+
responses: { 200: { description: "Model load status" } },
|
| 81 |
+
},
|
| 82 |
+
},
|
| 83 |
+
"/inference": {
|
| 84 |
+
post: {
|
| 85 |
+
summary: "Image inference (multipart upload)",
|
| 86 |
+
requestBody: {
|
| 87 |
+
required: true,
|
| 88 |
+
content: {
|
| 89 |
+
"multipart/form-data": {
|
| 90 |
+
schema: {
|
| 91 |
+
type: "object",
|
| 92 |
+
required: ["file"],
|
| 93 |
+
properties: {
|
| 94 |
+
file: { type: "string", format: "binary", description: "Image file" },
|
| 95 |
+
prompt: { type: "string", default: "Describe this image in detail." },
|
| 96 |
+
max_tokens: { type: "integer", default: 512 },
|
| 97 |
+
},
|
| 98 |
+
},
|
| 99 |
+
},
|
| 100 |
+
},
|
| 101 |
+
},
|
| 102 |
+
responses: {
|
| 103 |
+
200: { description: "Inference result" },
|
| 104 |
+
400: { description: "Invalid input" },
|
| 105 |
+
503: { description: "Model not loaded" },
|
| 106 |
+
},
|
| 107 |
+
},
|
| 108 |
+
},
|
| 109 |
+
"/inference/base64": {
|
| 110 |
+
post: {
|
| 111 |
+
summary: "Image inference (base64)",
|
| 112 |
+
requestBody: {
|
| 113 |
+
required: true,
|
| 114 |
+
content: {
|
| 115 |
+
"application/x-www-form-urlencoded": {
|
| 116 |
+
schema: {
|
| 117 |
+
type: "object",
|
| 118 |
+
required: ["image_base64"],
|
| 119 |
+
properties: {
|
| 120 |
+
image_base64: { type: "string", description: "Base64 encoded image" },
|
| 121 |
+
prompt: { type: "string", default: "Describe this image in detail." },
|
| 122 |
+
max_tokens: { type: "integer", default: 512 },
|
| 123 |
+
},
|
| 124 |
+
},
|
| 125 |
+
},
|
| 126 |
+
},
|
| 127 |
+
},
|
| 128 |
+
responses: {
|
| 129 |
+
200: { description: "Inference result" },
|
| 130 |
+
400: { description: "Invalid input" },
|
| 131 |
+
503: { description: "Model not loaded" },
|
| 132 |
+
},
|
| 133 |
+
},
|
| 134 |
+
},
|
| 135 |
+
},
|
| 136 |
+
};
|
| 137 |
+
|
| 138 |
+
app.use("/docs", swaggerUi.serve, swaggerUi.setup(swaggerDoc));
|
| 139 |
+
|
| 140 |
+
app.get("/", (_req, res) => {
|
| 141 |
+
res.json({ status: "ok", model: MODEL_ID });
|
| 142 |
+
});
|
| 143 |
+
|
| 144 |
+
app.get("/health", (_req, res) => {
|
| 145 |
+
res.json({ status: "healthy", model_loaded: model !== null });
|
| 146 |
+
});
|
| 147 |
+
|
| 148 |
+
app.post("/inference", upload.single("file"), async (req, res) => {
|
| 149 |
+
if (!model || !processor) {
|
| 150 |
+
return res.status(503).json({ detail: "Model not loaded yet." });
|
| 151 |
+
}
|
| 152 |
+
if (!req.file) {
|
| 153 |
+
return res.status(400).json({ detail: "No image file provided." });
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
const prompt = req.body.prompt || "Describe this image in detail.";
|
| 157 |
+
const maxTokens = parseInt(req.body.max_tokens) || 512;
|
| 158 |
+
|
| 159 |
+
try {
|
| 160 |
+
const response = await runInference(req.file.buffer, prompt, maxTokens);
|
| 161 |
+
res.json({ response });
|
| 162 |
+
} catch (err) {
|
| 163 |
+
console.error(err);
|
| 164 |
+
res.status(500).json({ detail: "Inference failed.", error: err.message });
|
| 165 |
+
}
|
| 166 |
+
});
|
| 167 |
+
|
| 168 |
+
app.post("/inference/base64", express.urlencoded({ extended: true, limit: "50mb" }), async (req, res) => {
|
| 169 |
+
if (!model || !processor) {
|
| 170 |
+
return res.status(503).json({ detail: "Model not loaded yet." });
|
| 171 |
+
}
|
| 172 |
+
if (!req.body.image_base64) {
|
| 173 |
+
return res.status(400).json({ detail: "No base64 image provided." });
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
const prompt = req.body.prompt || "Describe this image in detail.";
|
| 177 |
+
const maxTokens = parseInt(req.body.max_tokens) || 512;
|
| 178 |
+
|
| 179 |
+
try {
|
| 180 |
+
const imageBuffer = Buffer.from(req.body.image_base64, "base64");
|
| 181 |
+
const response = await runInference(imageBuffer, prompt, maxTokens);
|
| 182 |
+
res.json({ response });
|
| 183 |
+
} catch (err) {
|
| 184 |
+
console.error(err);
|
| 185 |
+
res.status(500).json({ detail: "Inference failed.", error: err.message });
|
| 186 |
+
}
|
| 187 |
+
});
|
| 188 |
+
|
| 189 |
+
loadModel().then(() => {
|
| 190 |
+
app.listen(PORT, "0.0.0.0", () => {
|
| 191 |
+
console.log(`Server running on http://0.0.0.0:${PORT}`);
|
| 192 |
+
});
|
| 193 |
+
});
|