khushalcodiste commited on
Commit
78cfb21
·
1 Parent(s): fa2b547

Initial commit: Qwen3.5-0.8B Vision API

Browse files
Files changed (4) hide show
  1. Dockerfile +21 -0
  2. README.md +37 -6
  3. app.py +142 -0
  4. requirements.txt +8 -0
Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y --no-install-recommends \
7
+ build-essential \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Copy requirements and install Python deps
11
+ COPY requirements.txt .
12
+ RUN pip install --no-cache-dir -r requirements.txt
13
+
14
+ # Copy application code
15
+ COPY app.py .
16
+
17
+ # HF Spaces expects port 7860
18
+ EXPOSE 7860
19
+
20
+ # Run the FastAPI app
21
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,10 +1,41 @@
1
  ---
2
- title: IMGVLM
3
- emoji: 🦀
4
- colorFrom: purple
5
- colorTo: yellow
6
  sdk: docker
7
- pinned: false
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Qwen3.5-0.8B Vision API
3
+ emoji: 🔮
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: docker
7
+ app_port: 7860
8
  ---
9
 
10
+ # Qwen3.5-0.8B Vision API
11
+
12
+ FastAPI service for image inference using [Qwen/Qwen3.5-0.8B](https://huggingface.co/Qwen/Qwen3.5-0.8B).
13
+
14
+ ## Endpoints
15
+
16
+ ### `POST /inference`
17
+ Upload an image file with a text prompt.
18
+
19
+ **Parameters:**
20
+ - `file` (required) - Image file upload
21
+ - `prompt` (optional) - Text prompt (default: "Describe this image in detail.")
22
+ - `max_tokens` (optional) - Max tokens to generate (default: 512)
23
+
24
+ ### `POST /inference/base64`
25
+ Send a base64-encoded image with a text prompt.
26
+
27
+ **Parameters:**
28
+ - `image_base64` (required) - Base64-encoded image
29
+ - `prompt` (optional) - Text prompt
30
+ - `max_tokens` (optional) - Max tokens to generate
31
+
32
+ ### `GET /health`
33
+ Health check endpoint.
34
+
35
+ ## Usage
36
+
37
+ ```bash
38
+ curl -X POST "https://your-space.hf.space/inference" \
39
+ -F "file=@image.png" \
40
+ -F "prompt=What is in this image?"
41
+ ```
app.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import base64
3
+ from contextlib import asynccontextmanager
4
+
5
+ import torch
6
+ from fastapi import FastAPI, File, Form, UploadFile, HTTPException
7
+ from fastapi.responses import JSONResponse
8
+ from PIL import Image
9
+ from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
10
+ from qwen_vl_utils import process_vision_info
11
+
12
+ model = None
13
+ processor = None
14
+
15
+ MODEL_ID = "Qwen/Qwen3.5-0.8B"
16
+
17
+
18
+ @asynccontextmanager
19
+ async def lifespan(app: FastAPI):
20
+ global model, processor
21
+ print(f"Loading model {MODEL_ID}...")
22
+ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
23
+ MODEL_ID,
24
+ torch_dtype=torch.float16,
25
+ device_map="auto",
26
+ )
27
+ processor = AutoProcessor.from_pretrained(MODEL_ID)
28
+ print("Model loaded successfully.")
29
+ yield
30
+ del model, processor
31
+
32
+
33
+ app = FastAPI(title="Qwen3.5-0.8B Vision API", lifespan=lifespan)
34
+
35
+
36
+ @app.get("/")
37
+ async def root():
38
+ return {"status": "ok", "model": MODEL_ID}
39
+
40
+
41
+ @app.get("/health")
42
+ async def health():
43
+ return {"status": "healthy", "model_loaded": model is not None}
44
+
45
+
46
+ @app.post("/inference")
47
+ async def inference(
48
+ file: UploadFile = File(...),
49
+ prompt: str = Form(default="Describe this image in detail."),
50
+ max_tokens: int = Form(default=512),
51
+ ):
52
+ if model is None or processor is None:
53
+ raise HTTPException(status_code=503, detail="Model not loaded yet.")
54
+
55
+ try:
56
+ image_bytes = await file.read()
57
+ image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
58
+ except Exception:
59
+ raise HTTPException(status_code=400, detail="Invalid image file.")
60
+
61
+ messages = [
62
+ {
63
+ "role": "user",
64
+ "content": [
65
+ {"type": "image", "image": image},
66
+ {"type": "text", "text": prompt},
67
+ ],
68
+ }
69
+ ]
70
+
71
+ text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
72
+ image_inputs, video_inputs = process_vision_info(messages)
73
+ inputs = processor(
74
+ text=[text],
75
+ images=image_inputs,
76
+ videos=video_inputs,
77
+ padding=True,
78
+ return_tensors="pt",
79
+ ).to(model.device)
80
+
81
+ with torch.no_grad():
82
+ generated_ids = model.generate(**inputs, max_new_tokens=max_tokens)
83
+
84
+ # Trim input tokens from generated output
85
+ generated_ids_trimmed = [
86
+ out_ids[len(in_ids):]
87
+ for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
88
+ ]
89
+ response_text = processor.batch_decode(
90
+ generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
91
+ )[0]
92
+
93
+ return JSONResponse(content={"response": response_text})
94
+
95
+
96
+ @app.post("/inference/base64")
97
+ async def inference_base64(
98
+ image_base64: str = Form(...),
99
+ prompt: str = Form(default="Describe this image in detail."),
100
+ max_tokens: int = Form(default=512),
101
+ ):
102
+ if model is None or processor is None:
103
+ raise HTTPException(status_code=503, detail="Model not loaded yet.")
104
+
105
+ try:
106
+ image_bytes = base64.b64decode(image_base64)
107
+ image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
108
+ except Exception:
109
+ raise HTTPException(status_code=400, detail="Invalid base64 image.")
110
+
111
+ messages = [
112
+ {
113
+ "role": "user",
114
+ "content": [
115
+ {"type": "image", "image": image},
116
+ {"type": "text", "text": prompt},
117
+ ],
118
+ }
119
+ ]
120
+
121
+ text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
122
+ image_inputs, video_inputs = process_vision_info(messages)
123
+ inputs = processor(
124
+ text=[text],
125
+ images=image_inputs,
126
+ videos=video_inputs,
127
+ padding=True,
128
+ return_tensors="pt",
129
+ ).to(model.device)
130
+
131
+ with torch.no_grad():
132
+ generated_ids = model.generate(**inputs, max_new_tokens=max_tokens)
133
+
134
+ generated_ids_trimmed = [
135
+ out_ids[len(in_ids):]
136
+ for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
137
+ ]
138
+ response_text = processor.batch_decode(
139
+ generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
140
+ )[0]
141
+
142
+ return JSONResponse(content={"response": response_text})
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.115.6
2
+ uvicorn[standard]==0.34.0
3
+ transformers==4.48.1
4
+ torch==2.5.1
5
+ Pillow==11.1.0
6
+ python-multipart==0.0.20
7
+ accelerate==1.2.1
8
+ qwen-vl-utils==0.0.8