khushalcodiste commited on
Commit
e63ecc8
·
1 Parent(s): f1cddcd

fix: added

Browse files
Files changed (3) hide show
  1. Dockerfile +5 -12
  2. package.json +14 -0
  3. server.js +193 -0
Dockerfile CHANGED
@@ -1,19 +1,12 @@
1
- FROM python:3.11-slim
2
 
3
  WORKDIR /app
4
 
5
- RUN apt-get update && apt-get install -y --no-install-recommends \
6
- build-essential \
7
- git \
8
- libgl1 \
9
- libglib2.0-0 \
10
- && rm -rf /var/lib/apt/lists/*
11
 
12
- COPY requirements.txt .
13
- RUN pip install --no-cache-dir -r requirements.txt
14
-
15
- COPY app.py .
16
 
17
  EXPOSE 7860
18
 
19
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
+ FROM node:20-slim
2
 
3
  WORKDIR /app
4
 
5
+ COPY package.json .
6
+ RUN npm install
 
 
 
 
7
 
8
+ COPY server.js .
 
 
 
9
 
10
  EXPOSE 7860
11
 
12
+ CMD ["node", "server.js"]
package.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "imgvlm",
3
+ "version": "1.0.0",
4
+ "type": "module",
5
+ "scripts": {
6
+ "start": "node server.js"
7
+ },
8
+ "dependencies": {
9
+ "@huggingface/transformers": "next",
10
+ "express": "^4.21.0",
11
+ "multer": "^1.4.5-lts.1",
12
+ "swagger-ui-express": "^5.0.0"
13
+ }
14
+ }
server.js ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import express from "express";
2
+ import multer from "multer";
3
+ import swaggerUi from "swagger-ui-express";
4
+ import {
5
+ AutoProcessor,
6
+ Qwen3_5ForConditionalGeneration,
7
+ RawImage,
8
+ } from "@huggingface/transformers";
9
+
10
+ const app = express();
11
+ const upload = multer({ storage: multer.memoryStorage() });
12
+ const PORT = 7860;
13
+ const MODEL_ID = "huggingworld/Qwen3.5-0.8B-ONNX";
14
+
15
+ let model = null;
16
+ let processor = null;
17
+
18
+ async function loadModel() {
19
+ console.log(`Loading model ${MODEL_ID}...`);
20
+ processor = await AutoProcessor.from_pretrained(MODEL_ID);
21
+ model = await Qwen3_5ForConditionalGeneration.from_pretrained(MODEL_ID, {
22
+ dtype: {
23
+ embed_tokens: "q4",
24
+ vision_encoder: "fp16",
25
+ decoder_model_merged: "q4",
26
+ },
27
+ });
28
+ console.log("Model loaded successfully.");
29
+ }
30
+
31
+ async function runInference(imageBuffer, prompt, maxTokens) {
32
+ const blob = new Blob([imageBuffer]);
33
+ const image = await (await RawImage.fromBlob(blob)).resize(448, 448);
34
+
35
+ const conversation = [
36
+ {
37
+ role: "user",
38
+ content: [
39
+ { type: "image" },
40
+ { type: "text", text: prompt },
41
+ ],
42
+ },
43
+ ];
44
+
45
+ const text = processor.apply_chat_template(conversation, {
46
+ add_generation_prompt: true,
47
+ });
48
+
49
+ const inputs = await processor(text, image);
50
+ const output = await model.generate({
51
+ ...inputs,
52
+ max_new_tokens: maxTokens,
53
+ });
54
+
55
+ const promptLength = inputs.input_ids.dims.at(-1);
56
+ const decoded = processor.batch_decode(
57
+ output.slice(null, [promptLength, null]),
58
+ { skip_special_tokens: true },
59
+ );
60
+ return decoded[0];
61
+ }
62
+
63
+ const swaggerDoc = {
64
+ openapi: "3.0.0",
65
+ info: {
66
+ title: "Qwen3.5-0.8B Vision API (ONNX)",
67
+ version: "1.0.0",
68
+ description: "Vision-language model API using Qwen3.5-0.8B ONNX with transformers.js",
69
+ },
70
+ paths: {
71
+ "/": {
72
+ get: {
73
+ summary: "Root",
74
+ responses: { 200: { description: "API status" } },
75
+ },
76
+ },
77
+ "/health": {
78
+ get: {
79
+ summary: "Health check",
80
+ responses: { 200: { description: "Model load status" } },
81
+ },
82
+ },
83
+ "/inference": {
84
+ post: {
85
+ summary: "Image inference (multipart upload)",
86
+ requestBody: {
87
+ required: true,
88
+ content: {
89
+ "multipart/form-data": {
90
+ schema: {
91
+ type: "object",
92
+ required: ["file"],
93
+ properties: {
94
+ file: { type: "string", format: "binary", description: "Image file" },
95
+ prompt: { type: "string", default: "Describe this image in detail." },
96
+ max_tokens: { type: "integer", default: 512 },
97
+ },
98
+ },
99
+ },
100
+ },
101
+ },
102
+ responses: {
103
+ 200: { description: "Inference result" },
104
+ 400: { description: "Invalid input" },
105
+ 503: { description: "Model not loaded" },
106
+ },
107
+ },
108
+ },
109
+ "/inference/base64": {
110
+ post: {
111
+ summary: "Image inference (base64)",
112
+ requestBody: {
113
+ required: true,
114
+ content: {
115
+ "application/x-www-form-urlencoded": {
116
+ schema: {
117
+ type: "object",
118
+ required: ["image_base64"],
119
+ properties: {
120
+ image_base64: { type: "string", description: "Base64 encoded image" },
121
+ prompt: { type: "string", default: "Describe this image in detail." },
122
+ max_tokens: { type: "integer", default: 512 },
123
+ },
124
+ },
125
+ },
126
+ },
127
+ },
128
+ responses: {
129
+ 200: { description: "Inference result" },
130
+ 400: { description: "Invalid input" },
131
+ 503: { description: "Model not loaded" },
132
+ },
133
+ },
134
+ },
135
+ },
136
+ };
137
+
138
+ app.use("/docs", swaggerUi.serve, swaggerUi.setup(swaggerDoc));
139
+
140
+ app.get("/", (_req, res) => {
141
+ res.json({ status: "ok", model: MODEL_ID });
142
+ });
143
+
144
+ app.get("/health", (_req, res) => {
145
+ res.json({ status: "healthy", model_loaded: model !== null });
146
+ });
147
+
148
+ app.post("/inference", upload.single("file"), async (req, res) => {
149
+ if (!model || !processor) {
150
+ return res.status(503).json({ detail: "Model not loaded yet." });
151
+ }
152
+ if (!req.file) {
153
+ return res.status(400).json({ detail: "No image file provided." });
154
+ }
155
+
156
+ const prompt = req.body.prompt || "Describe this image in detail.";
157
+ const maxTokens = parseInt(req.body.max_tokens) || 512;
158
+
159
+ try {
160
+ const response = await runInference(req.file.buffer, prompt, maxTokens);
161
+ res.json({ response });
162
+ } catch (err) {
163
+ console.error(err);
164
+ res.status(500).json({ detail: "Inference failed.", error: err.message });
165
+ }
166
+ });
167
+
168
+ app.post("/inference/base64", express.urlencoded({ extended: true, limit: "50mb" }), async (req, res) => {
169
+ if (!model || !processor) {
170
+ return res.status(503).json({ detail: "Model not loaded yet." });
171
+ }
172
+ if (!req.body.image_base64) {
173
+ return res.status(400).json({ detail: "No base64 image provided." });
174
+ }
175
+
176
+ const prompt = req.body.prompt || "Describe this image in detail.";
177
+ const maxTokens = parseInt(req.body.max_tokens) || 512;
178
+
179
+ try {
180
+ const imageBuffer = Buffer.from(req.body.image_base64, "base64");
181
+ const response = await runInference(imageBuffer, prompt, maxTokens);
182
+ res.json({ response });
183
+ } catch (err) {
184
+ console.error(err);
185
+ res.status(500).json({ detail: "Inference failed.", error: err.message });
186
+ }
187
+ });
188
+
189
+ loadModel().then(() => {
190
+ app.listen(PORT, "0.0.0.0", () => {
191
+ console.log(`Server running on http://0.0.0.0:${PORT}`);
192
+ });
193
+ });