PhonePixelGhost commited on
Commit
17d2f7c
·
verified ·
1 Parent(s): 76419e9

Upload folder using huggingface_hub

Browse files
.dockerignore ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ tests/
2
+ .git/
3
+ .github/
4
+ *.pt
5
+ *.pth
6
+ __pycache__/
7
+ *.pyc
8
+ *.pyo
9
+ *.pyd
10
+ .pytest_cache/
11
+ .venv/
12
+ venv/
13
+ *.md
14
+ .gitignore
15
+ scripts/
.gitattributes CHANGED
@@ -34,3 +34,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  models/resnet18.onnx.data filter=lfs diff=lfs merge=lfs -text
 
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  models/resnet18.onnx.data filter=lfs diff=lfs merge=lfs -text
37
+ 0a2152e6-4aef-11f1-b222-345a603e44a9.data filter=lfs diff=lfs merge=lfs -text
38
+ 72a8c263-4aef-11f1-aad7-345a603e44a9.data filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+
23
+ # Virtual Environment
24
+ venv/
25
+ env/
26
+ ENV/
27
+ .venv
28
+
29
+ # IDE
30
+ .vscode/
31
+ .idea/
32
+ *.swp
33
+ *.swo
34
+ *~
35
+
36
+ # Testing
37
+ .pytest_cache/
38
+ .coverage
39
+ htmlcov/
40
+ .tox/
41
+
42
+ # Models
43
+ *.pt
44
+ *.pth
45
+ *.bin
46
+ pytorch_model/
47
+
48
+ # Logs
49
+ *.log
50
+
51
+ # OS
52
+ .DS_Store
53
+ Thumbs.db
54
+
55
+ # Test files
56
+ test.jpg
.pytest_cache/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Created by pytest automatically.
2
+ *
.pytest_cache/CACHEDIR.TAG ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Signature: 8a477f597d28d172789f06886806bc55
2
+ # This file is a cache directory tag created by pytest.
3
+ # For information about cache directory tags, see:
4
+ # https://bford.info/cachedir/spec.html
.pytest_cache/README.md ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # pytest cache directory #
2
+
3
+ This directory contains data from the pytest's cache plugin,
4
+ which provides the `--lf` and `--ff` options, as well as the `cache` fixture.
5
+
6
+ **Do not** commit this to version control.
7
+
8
+ See [the docs](https://docs.pytest.org/en/stable/how-to/cache.html) for more information.
.pytest_cache/v/cache/nodeids ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ [
2
+ "tests/test_api.py::test_health_endpoint",
3
+ "tests/test_api.py::test_predict_rejects_corrupted_file",
4
+ "tests/test_api.py::test_predict_rejects_non_image",
5
+ "tests/test_api.py::test_predict_rejects_oversized_file",
6
+ "tests/test_api.py::test_predict_returns_valid_json"
7
+ ]
0a2152e6-4aef-11f1-b222-345a603e44a9.data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:237ab7da3d82e3a5e7fbd88cb146e9ba328e7492c4c21d65b131002249cb6979
3
+ size 46735008
72a8c263-4aef-11f1-aad7-345a603e44a9.data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:237ab7da3d82e3a5e7fbd88cb146e9ba328e7492c4c21d65b131002249cb6979
3
+ size 46735008
README.md CHANGED
@@ -1,10 +1,184 @@
1
- ---
2
- title: Image Classification Service
3
- emoji: 🌖
4
- colorFrom: gray
5
- colorTo: yellow
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # High-Throughput Image Classification Service
2
+
3
+ A production-ready image classification API using ResNet-18 with ONNX optimization, FastAPI, and CI/CD pipeline.
4
+
5
+ ## Features
6
+
7
+ - **Optimized Model**: ResNet-18 converted to ONNX with dynamic quantization (~70% size reduction)
8
+ - **High Performance**: ProcessPoolExecutor for concurrent request handling
9
+ - **Production Ready**: Docker containerization, comprehensive error handling
10
+ - **CI/CD Pipeline**: Automated testing and deployment to Hugging Face Spaces
11
+ - **Comprehensive Testing**: pytest unit tests with 100% endpoint coverage
12
+
13
+ ## Project Structure
14
+
15
+ ```
16
+ image-classification-service/
17
+ ├── app/
18
+ │ ├── __init__.py
19
+ │ ├── main.py # FastAPI application
20
+ │ ├── model.py # ONNX inference logic
21
+ │ └── schemas.py # Pydantic models
22
+ ├── models/
23
+ │ └── resnet18_quantized.onnx # Optimized model
24
+ ├── tests/
25
+ │ └── test_api.py # Unit tests
26
+ ├── scripts/
27
+ │ ├── 01_baseline_test.py # PyTorch baseline benchmark
28
+ │ ├── 02_export_onnx.py # Export to ONNX
29
+ │ ├── 03_quantize.py # Dynamic quantization
30
+ │ └── 04_benchmark_onnx.py # ONNX benchmark
31
+ ├── .github/
32
+ │ └── workflows/
33
+ │ └── ci-cd.yml # GitHub Actions pipeline
34
+ ├── Dockerfile
35
+ ├── .dockerignore
36
+ ├── requirements.txt
37
+ └── README.md
38
+ ```
39
+
40
+ ## Quick Start
41
+
42
+ ### 1. Install Dependencies
43
+
44
+ ```bash
45
+ pip install -r requirements.txt
46
+ ```
47
+
48
+ ### 2. Prepare the Model
49
+
50
+ Run the optimization scripts in order:
51
+
52
+ ```bash
53
+ cd scripts
54
+ python 01_baseline_test.py # Measure PyTorch baseline
55
+ python 02_export_onnx.py # Export to ONNX
56
+ python 03_quantize.py # Apply quantization
57
+ python 04_benchmark_onnx.py # Compare performance
58
+ cd ..
59
+ ```
60
+
61
+ ### 3. Run the API
62
+
63
+ ```bash
64
+ uvicorn app.main:app --host 0.0.0.0 --port 7860
65
+ ```
66
+
67
+ ### 4. Test the API
68
+
69
+ ```bash
70
+ # Health check
71
+ curl http://localhost:7860/health
72
+
73
+ # Predict
74
+ curl -X POST "http://localhost:7860/predict" \
75
+ -H "accept: application/json" \
76
+ -F "file=@/path/to/image.jpg"
77
+ ```
78
+
79
+ ## Docker Deployment
80
+
81
+ ### Build and Run
82
+
83
+ ```bash
84
+ docker build -t image-classifier .
85
+ docker run -p 7860:7860 image-classifier
86
+ ```
87
+
88
+ ## Testing
89
+
90
+ ```bash
91
+ pytest tests/ -v
92
+ ```
93
+
94
+ ## API Endpoints
95
+
96
+ ### GET /health
97
+
98
+ Health check endpoint.
99
+
100
+ **Response:**
101
+ ```json
102
+ {
103
+ "status": "ok"
104
+ }
105
+ ```
106
+
107
+ ### POST /predict
108
+
109
+ Image classification endpoint.
110
+
111
+ **Request:**
112
+ - Content-Type: `multipart/form-data`
113
+ - Body: `file` (image file)
114
+
115
+ **Response:**
116
+ ```json
117
+ {
118
+ "label": "tabby, tabby cat",
119
+ "score": 0.8234,
120
+ "label_id": 281,
121
+ "inference_time_ms": 45.123
122
+ }
123
+ ```
124
+
125
+ **Error Codes:**
126
+ - `400`: Corrupted or invalid image
127
+ - `413`: File too large (max 10MB)
128
+ - `415`: Unsupported media type
129
+ - `500`: Inference error
130
+
131
+ ## Performance Metrics
132
+
133
+ | Format | File Size | Avg Latency | P95 Latency |
134
+ |--------|-----------|-------------|-------------|
135
+ | PyTorch | ~45 MB | baseline | baseline |
136
+ | ONNX | ~45 MB | ~20% faster | - |
137
+ | ONNX Quantized | ~12 MB | ~40% faster | - |
138
+
139
+ *Run benchmark scripts to get actual measurements on your hardware*
140
+
141
+ ## CI/CD Pipeline
142
+
143
+ The GitHub Actions workflow automatically:
144
+ 1. Runs unit tests on every push/PR
145
+ 2. Deploys to Hugging Face Spaces on main branch (requires `HF_TOKEN` secret)
146
+
147
+ ### Setup Hugging Face Deployment
148
+
149
+ 1. Create a Hugging Face Space
150
+ 2. Generate an access token with write permissions
151
+ 3. Add `HF_TOKEN` to GitHub repository secrets
152
+ 4. Update `.github/workflows/ci-cd.yml` with your Space URL
153
+
154
+ ## Model Details
155
+
156
+ - **Base Model**: microsoft/resnet-18 (Hugging Face)
157
+ - **Task**: Image Classification (ImageNet-1k)
158
+ - **Input**: RGB images (224x224)
159
+ - **Output**: 1000 class probabilities
160
+ - **Optimization**: ONNX + Dynamic Quantization (QUint8)
161
+
162
+ ## Development
163
+
164
+ ### Adding New Features
165
+
166
+ 1. Update code in `app/`
167
+ 2. Add tests in `tests/`
168
+ 3. Run tests: `pytest tests/ -v`
169
+ 4. Update documentation
170
+
171
+ ### Performance Testing
172
+
173
+ Use JMeter or similar tools to test throughput:
174
+ - Concurrent users: 10, 50, 100
175
+ - Measure: TPS, P95 latency, error rate
176
+
177
+ ## License
178
+
179
+ MIT
180
+
181
+ ## Acknowledgments
182
+
183
+ - Model: microsoft/resnet-18 from Hugging Face
184
+ - Framework: FastAPI, ONNX Runtime
SKILL.md ADDED
@@ -0,0 +1,495 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: image-classification-mlops
3
+ description: >
4
+ ทักษะสำหรับพัฒนาระบบ High-Throughput Image Classification Service ครบวงจร ตั้งแต่
5
+ Model Optimization, FastAPI Development, CI/CD Pipeline จนถึง Performance Testing
6
+ โดยใช้โมเดล microsoft/resnet-18 จาก Hugging Face
7
+
8
+ ใช้ skill นี้เมื่อ:
9
+ - ต้องการ Optimize โมเดล (ONNX Conversion + Dynamic Quantization)
10
+ - สร้าง FastAPI ที่รองรับ Concurrent Request ด้วย ProcessPoolExecutor
11
+ - เขียน Dockerfile สำหรับ Production
12
+ - ตั้งค่า GitHub Actions CI/CD → Deploy ไป Hugging Face Spaces
13
+ - เขียน pytest Unit Tests สำหรับ /predict endpoint
14
+ - วิเคราะห์ผล JMeter Load Test (Throughput / P95 Latency)
15
+ - เขียน Project Report หรือสร้าง System Architecture Diagram
16
+ ---
17
+
18
+ # High-Throughput Image Classification Service — MLOps Skill
19
+
20
+ ## ภาพรวมโปรเจกต์
21
+
22
+ | Phase | เนื้อหา |
23
+ |---|---|
24
+ | 1. Model Optimization | ResNet-18 → ONNX → Dynamic Quantization |
25
+ | 2. API Development | FastAPI + ProcessPoolExecutor + Pydantic |
26
+ | 3. Automation & CI/CD | pytest + GitHub Actions + HF Spaces Deploy |
27
+ | 4. Performance Testing | JMeter Load Test + TPS/P95 Analysis |
28
+
29
+ **โมเดลหลัก:** `microsoft/resnet-18` (Hugging Face)
30
+ **Stack:** Python 3.11, FastAPI, ONNX Runtime, Transformers, Docker, GitHub Actions
31
+
32
+ ---
33
+
34
+ ## Phase 1 — Model Optimization
35
+
36
+ ### 1.1 Baseline Test (Original PyTorch)
37
+
38
+ ```python
39
+ from transformers import AutoFeatureExtractor, ResNetForImageClassification
40
+ import torch, time, os
41
+ from PIL import Image
42
+
43
+ model_id = "microsoft/resnet-18"
44
+ extractor = AutoFeatureExtractor.from_pretrained(model_id)
45
+ model = ResNetForImageClassification.from_pretrained(model_id)
46
+ model.eval()
47
+
48
+ # วัด Baseline Latency (100 runs)
49
+ img = Image.open("test.jpg").convert("RGB")
50
+ inputs = extractor(images=img, return_tensors="pt")
51
+
52
+ times = []
53
+ with torch.no_grad():
54
+ for _ in range(100):
55
+ t0 = time.perf_counter()
56
+ _ = model(**inputs)
57
+ times.append(time.perf_counter() - t0)
58
+
59
+ print(f"Baseline Latency (avg): {sum(times)/len(times)*1000:.2f} ms")
60
+ print(f"Model Size: {os.path.getsize('pytorch_model.bin')/1e6:.2f} MB")
61
+ ```
62
+
63
+ ### 1.2 Export to ONNX
64
+
65
+ ```python
66
+ import torch
67
+ from transformers import AutoFeatureExtractor, ResNetForImageClassification
68
+
69
+ model_id = "microsoft/resnet-18"
70
+ extractor = AutoFeatureExtractor.from_pretrained(model_id)
71
+ model = ResNetForImageClassification.from_pretrained(model_id).eval()
72
+
73
+ dummy = torch.randn(1, 3, 224, 224)
74
+
75
+ torch.onnx.export(
76
+ model,
77
+ dummy,
78
+ "resnet18.onnx",
79
+ input_names=["pixel_values"],
80
+ output_names=["logits"],
81
+ dynamic_axes={"pixel_values": {0: "batch_size"}},
82
+ opset_version=17,
83
+ )
84
+ print("ONNX exported successfully")
85
+ ```
86
+
87
+ ### 1.3 Dynamic Quantization
88
+
89
+ ```python
90
+ from onnxruntime.quantization import quantize_dynamic, QuantType
91
+
92
+ quantize_dynamic(
93
+ model_input="resnet18.onnx",
94
+ model_output="resnet18_quantized.onnx",
95
+ weight_type=QuantType.QUint8,
96
+ )
97
+ print("Quantization complete")
98
+ ```
99
+
100
+ ### 1.4 ตารางเปรียบเทียบ (บันทึกผลจริงลงตาราง)
101
+
102
+ | Format | File Size (MB) | Avg Latency (ms) | P95 Latency (ms) |
103
+ |---|---|---|---|
104
+ | Original (PyTorch) | ~45 | baseline | baseline |
105
+ | ONNX | ~45 | คาดว่าเร็วขึ้น ~20% | - |
106
+ | ONNX Quantized | ~12 | คาดว่าเร็วขึ้น ~40% | - |
107
+
108
+ > **วิธีวัด:** รัน 100 ครั้ง → เก็บค่า avg และ percentile ด้วย `numpy.percentile(times, 95)`
109
+
110
+ ---
111
+
112
+ ## Phase 2 — API Development
113
+
114
+ ### 2.1 โครงสร้างโปรเจกต์
115
+
116
+ ```
117
+ image-classification-service/
118
+ ├── app/
119
+ │ ├── main.py # FastAPI app
120
+ │ ├── model.py # ONNX inference logic
121
+ │ └── schemas.py # Pydantic models
122
+ ├── models/
123
+ │ └── resnet18_quantized.onnx
124
+ ├── tests/
125
+ │ └── test_api.py
126
+ ├── .github/
127
+ │ └── workflows/
128
+ │ └── ci-cd.yml
129
+ ├── Dockerfile
130
+ ├── requirements.txt
131
+ └── README.md
132
+ ```
133
+
134
+ ### 2.2 Pydantic Schemas (`app/schemas.py`)
135
+
136
+ ```python
137
+ from pydantic import BaseModel
138
+ from typing import Optional
139
+
140
+ class PredictionResponse(BaseModel):
141
+ label: str
142
+ score: float
143
+ label_id: int
144
+ inference_time_ms: float
145
+
146
+ class ErrorResponse(BaseModel):
147
+ detail: str
148
+ error_code: str
149
+ ```
150
+
151
+ ### 2.3 ONNX Inference (`app/model.py`)
152
+
153
+ ```python
154
+ import onnxruntime as ort
155
+ import numpy as np
156
+ from PIL import Image
157
+ import io, time
158
+
159
+ # Labels จาก ImageNet
160
+ from transformers import AutoFeatureExtractor
161
+ extractor = AutoFeatureExtractor.from_pretrained("microsoft/resnet-18")
162
+
163
+ # โหลด session ครั้งเดียว (module-level)
164
+ session = ort.InferenceSession(
165
+ "models/resnet18_quantized.onnx",
166
+ providers=["CPUExecutionProvider"]
167
+ )
168
+
169
+ def run_inference(image_bytes: bytes) -> dict:
170
+ img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
171
+ inputs = extractor(images=img, return_tensors="np")
172
+ pixel_values = inputs["pixel_values"].astype(np.float32)
173
+
174
+ t0 = time.perf_counter()
175
+ outputs = session.run(["logits"], {"pixel_values": pixel_values})
176
+ elapsed = (time.perf_counter() - t0) * 1000
177
+
178
+ logits = outputs[0][0]
179
+ probs = np.exp(logits) / np.sum(np.exp(logits))
180
+ label_id = int(np.argmax(probs))
181
+
182
+ # ดึง label จาก model config
183
+ from transformers import ResNetForImageClassification
184
+ cfg = ResNetForImageClassification.from_pretrained("microsoft/resnet-18").config
185
+ label = cfg.id2label.get(label_id, str(label_id))
186
+
187
+ return {
188
+ "label": label,
189
+ "score": float(probs[label_id]),
190
+ "label_id": label_id,
191
+ "inference_time_ms": round(elapsed, 3),
192
+ }
193
+ ```
194
+
195
+ ### 2.4 FastAPI Main App (`app/main.py`)
196
+
197
+ ```python
198
+ from fastapi import FastAPI, File, UploadFile, HTTPException
199
+ from concurrent.futures import ProcessPoolExecutor
200
+ import asyncio
201
+ from app.model import run_inference
202
+ from app.schemas import PredictionResponse
203
+
204
+ app = FastAPI(title="ResNet-18 Image Classifier", version="1.0.0")
205
+ executor = ProcessPoolExecutor(max_workers=4)
206
+
207
+ MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB
208
+ ALLOWED_CONTENT_TYPES = {"image/jpeg", "image/png", "image/webp", "image/gif"}
209
+
210
+
211
+ @app.get("/health")
212
+ async def health():
213
+ return {"status": "ok"}
214
+
215
+
216
+ @app.post("/predict", response_model=PredictionResponse)
217
+ async def predict(file: UploadFile = File(...)):
218
+ # Validate content type
219
+ if file.content_type not in ALLOWED_CONTENT_TYPES:
220
+ raise HTTPException(
221
+ status_code=415,
222
+ detail=f"Unsupported media type: {file.content_type}. Allowed: {ALLOWED_CONTENT_TYPES}"
223
+ )
224
+
225
+ image_bytes = await file.read()
226
+
227
+ # Validate file size
228
+ if len(image_bytes) > MAX_FILE_SIZE:
229
+ raise HTTPException(
230
+ status_code=413,
231
+ detail=f"File too large. Max size is {MAX_FILE_SIZE // 1024 // 1024} MB."
232
+ )
233
+
234
+ # Validate not corrupted (try opening with PIL)
235
+ try:
236
+ from PIL import Image
237
+ import io
238
+ Image.open(io.BytesIO(image_bytes)).verify()
239
+ except Exception:
240
+ raise HTTPException(status_code=400, detail="Corrupted or invalid image file.")
241
+
242
+ # Run CPU-bound inference in ProcessPoolExecutor (ไม่บล็อก event loop)
243
+ loop = asyncio.get_event_loop()
244
+ try:
245
+ result = await loop.run_in_executor(executor, run_inference, image_bytes)
246
+ except Exception as e:
247
+ raise HTTPException(status_code=500, detail=f"Inference error: {str(e)}")
248
+
249
+ return PredictionResponse(**result)
250
+ ```
251
+
252
+ ### 2.5 Error Handling Summary
253
+
254
+ | สถานการณ์ | HTTP Status | รายละเอียด |
255
+ |---|---|---|
256
+ | ไฟล์ไม่ใช่รูปภาพ | 415 Unsupported Media Type | Content-type ไม่ตรง |
257
+ | ไฟล์เสีย (Corrupted) | 400 Bad Request | PIL ไม่สามารถเปิดได้ |
258
+ | ไฟล์ใหญ่เกินไป | 413 Request Entity Too Large | เกิน 10MB |
259
+ | Inference Error | 500 Internal Server Error | โมเดลทำงานผิดพลาด |
260
+
261
+ ---
262
+
263
+ ## Phase 3 — Dockerfile
264
+
265
+ ```dockerfile
266
+ # ใช้ slim image เพื่อลด size
267
+ FROM python:3.11-slim
268
+
269
+ WORKDIR /app
270
+
271
+ # ติดตั้ง dependencies ก่อน (cache layer)
272
+ COPY requirements.txt .
273
+ RUN pip install --no-cache-dir -r requirements.txt
274
+
275
+ # Copy โค้ดและโมเดล
276
+ COPY app/ ./app/
277
+ COPY models/ ./models/
278
+
279
+ EXPOSE 7860
280
+
281
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
282
+ ```
283
+
284
+ **requirements.txt:**
285
+ ```
286
+ fastapi==0.111.0
287
+ uvicorn[standard]==0.29.0
288
+ python-multipart==0.0.9
289
+ onnxruntime==1.18.0
290
+ numpy==1.26.4
291
+ Pillow==10.3.0
292
+ transformers==4.41.0
293
+ torch==2.3.0
294
+ pydantic==2.7.1
295
+ pytest==8.2.0
296
+ httpx==0.27.0
297
+ ```
298
+
299
+ > **เทคนิคลด Docker Image Size:**
300
+ > - ใช้ `python:3.11-slim` (ไม่ใช่ full)
301
+ > - `--no-cache-dir` ใน pip
302
+ > - ลบ torch ออกหลัง export ONNX (ใน production image ไม่จำเป็น)
303
+ > - ใช้ `.dockerignore` เพื่อ exclude `tests/`, `.git/`, `*.pt`
304
+
305
+ ---
306
+
307
+ ## Phase 4 — Unit Testing (`tests/test_api.py`)
308
+
309
+ ```python
310
+ import pytest
311
+ from fastapi.testclient import TestClient
312
+ from app.main import app
313
+ from pathlib import Path
314
+
315
+ client = TestClient(app)
316
+
317
+ # --- Helper ---
318
+ def get_test_image() -> bytes:
319
+ """ใช้ภาพ test จริงหรือสร้าง dummy PNG"""
320
+ from PIL import Image
321
+ import io
322
+ img = Image.new("RGB", (224, 224), color=(128, 64, 32))
323
+ buf = io.BytesIO()
324
+ img.save(buf, format="JPEG")
325
+ return buf.getvalue()
326
+
327
+
328
+ # --- Tests ---
329
+
330
+ def test_health_endpoint():
331
+ res = client.get("/health")
332
+ assert res.status_code == 200
333
+ assert res.json() == {"status": "ok"}
334
+
335
+
336
+ def test_predict_returns_valid_json():
337
+ img_bytes = get_test_image()
338
+ res = client.post(
339
+ "/predict",
340
+ files={"file": ("test.jpg", img_bytes, "image/jpeg")}
341
+ )
342
+ assert res.status_code == 200
343
+ data = res.json()
344
+ assert "label" in data
345
+ assert "score" in data
346
+ assert isinstance(data["score"], float)
347
+ assert 0.0 <= data["score"] <= 1.0
348
+
349
+
350
+ def test_predict_rejects_non_image():
351
+ res = client.post(
352
+ "/predict",
353
+ files={"file": ("test.txt", b"not an image", "text/plain")}
354
+ )
355
+ assert res.status_code == 415
356
+
357
+
358
+ def test_predict_rejects_corrupted_file():
359
+ res = client.post(
360
+ "/predict",
361
+ files={"file": ("bad.jpg", b"\xff\xd8corrupted", "image/jpeg")}
362
+ )
363
+ assert res.status_code == 400
364
+
365
+
366
+ def test_predict_rejects_oversized_file():
367
+ huge = b"A" * (11 * 1024 * 1024) # 11MB
368
+ res = client.post(
369
+ "/predict",
370
+ files={"file": ("big.jpg", huge, "image/jpeg")}
371
+ )
372
+ assert res.status_code == 413
373
+ ```
374
+
375
+ ---
376
+
377
+ ## Phase 5 — GitHub Actions CI/CD (`.github/workflows/ci-cd.yml`)
378
+
379
+ ```yaml
380
+ name: CI/CD Pipeline
381
+
382
+ on:
383
+ push:
384
+ branches: [main]
385
+ pull_request:
386
+ branches: [main]
387
+
388
+ jobs:
389
+ test:
390
+ runs-on: ubuntu-latest
391
+ steps:
392
+ - uses: actions/checkout@v4
393
+
394
+ - name: Set up Python 3.11
395
+ uses: actions/setup-python@v5
396
+ with:
397
+ python-version: "3.11"
398
+
399
+ - name: Install dependencies
400
+ run: pip install -r requirements.txt
401
+
402
+ - name: Run Unit Tests
403
+ run: pytest tests/ -v --tb=short
404
+
405
+ deploy:
406
+ needs: test # รัน deploy เฉพาะเมื่อ test ผ่านทุก case
407
+ runs-on: ubuntu-latest
408
+ if: github.ref == 'refs/heads/main' && github.event_name == 'push'
409
+ steps:
410
+ - uses: actions/checkout@v4
411
+
412
+ - name: Push to Hugging Face Spaces
413
+ env:
414
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
415
+ run: |
416
+ git config --global user.email "ci@github.com"
417
+ git config --global user.name "GitHub Actions"
418
+ git remote add hf https://user:${HF_TOKEN}@huggingface.co/spaces/<YOUR_USERNAME>/<YOUR_SPACE_NAME>
419
+ git push hf main --force
420
+ ```
421
+
422
+ > **การตั้งค่า Secret:**
423
+ > ไปที่ GitHub Repo → Settings → Secrets → Actions → New secret
424
+ > ชื่อ: `HF_TOKEN` | ค่า: Hugging Face Access Token (write permission)
425
+
426
+ ---
427
+
428
+ ## Phase 6 — Performance Testing (JMeter)
429
+
430
+ ### 6.1 JMeter Test Plan (.jmx) — Key Settings
431
+
432
+ | Parameter | Local (Docker) | Cloud (HF Spaces) |
433
+ |---|---|---|
434
+ | Threads (Users) | 10, 50, 100 | 10, 25, 50 |
435
+ | Ramp-Up (sec) | 10 | 20 |
436
+ | Loop Count | 100 | 50 |
437
+ | Endpoint | `http://localhost:7860/predict` | `https://<space>.hf.space/predict` |
438
+
439
+ ### 6.2 Metrics ที่ต้องรายงาน
440
+
441
+ | Metric | คำอธิบาย | เป้าหมาย |
442
+ |---|---|---|
443
+ | **Throughput (TPS)** | Request ต่อวินาที | สูงที่สุด |
444
+ | **P95 Latency** | 95th percentile response time | < 2000ms |
445
+ | **Error Rate** | % ที่ได้รับ error | < 1% |
446
+ | **Avg Latency** | ค่าเฉลี่ย response time | ต่ำที่สุด |
447
+
448
+ ### 6.3 การวิเคราะห์ผล
449
+
450
+ ```
451
+ จุดที่ต้องวิเคราะห์:
452
+ 1. หา "Knee Point" — จุดที่ TPS หยุดเพิ่ม แต่ Latency เริ่มพุ่ง
453
+ 2. CPU Utilization ใน Docker stats ณ จำนวน concurrent users นั้น
454
+ 3. เปรียบเทียบ Local vs Cloud เพื่อดู overhead ของ Network/HF cold-start
455
+ ```
456
+
457
+ ---
458
+
459
+ ## Phase 7 — cURL Examples
460
+
461
+ ```bash
462
+ # Health Check
463
+ curl https://<USERNAME>-<SPACE>.hf.space/health
464
+
465
+ # Predict (ส่งไฟล์รูปภาพจริง)
466
+ curl -X POST "https://<USERNAME>-<SPACE>.hf.space/predict" \
467
+ -H "accept: application/json" \
468
+ -F "file=@/path/to/your/image.jpg"
469
+
470
+ # Postman Collection — ดูไฟล์ postman_collection.json ใน repo
471
+ ```
472
+
473
+ ---
474
+
475
+ ## Checklist Deliverables
476
+
477
+ - [ ] Project Report (PDF) — Model details, Optimization table, Error strategy, JMeter analysis, Architecture diagram
478
+ - [ ] GitHub Repo — Source code + `.github/workflows/ci-cd.yml` + `README.md`
479
+ - [ ] `resnet18_quantized.onnx` — โมเดลที่ optimize แล้ว
480
+ - [ ] `tests/test_api.py` — pytest ครอบคลุม Happy path + Error cases
481
+ - [ ] `Dockerfile` — Production-ready
482
+ - [ ] JMeter Test Plan (`.jmx`)
483
+ - [ ] Postman Collection (`.json`)
484
+ - [ ] Hugging Face Space — Live API endpoint
485
+ - [ ] Presentation Slides + Live Demo (9 พ.ค. 2569)
486
+
487
+ ---
488
+
489
+ ## Notes & Tips
490
+
491
+ - **HF Spaces Free Tier** ใช้ CPU เท่าน���้น — ONNX Runtime บน CPU เหมาะสมที่สุด
492
+ - **Cold Start** ใน HF Spaces อาจทำให้ request แรกช้า — ควรระบุในรายงาน
493
+ - **ProcessPoolExecutor** ต้องระวัง: แต่ละ worker โหลด ONNX session แยกกัน (memory x workers)
494
+ - **Pydantic v2** syntax เปลี่ยนจาก v1 — ใช้ `model_config` แทน `class Config`
495
+ - ใน `pytest` ต้องมี `conftest.py` หรือ set `PYTHONPATH=.` ให้ถูกต้อง
app/main.py CHANGED
@@ -4,14 +4,17 @@ from concurrent.futures import ProcessPoolExecutor
4
  import asyncio
5
  from app.model import run_inference
6
  from app.schemas import PredictionResponse
 
7
 
8
  app = FastAPI(title="ResNet-18 Image Classifier", version="1.0.0")
9
  executor = ProcessPoolExecutor(max_workers=4)
10
 
 
11
  ALLOWED_CONTENT_TYPES = {"image/jpeg", "image/png", "image/webp", "image/gif"}
12
 
13
  @app.get("/", response_class=HTMLResponse)
14
  async def demo_ui():
 
15
  return """
16
  <!DOCTYPE html>
17
  <html>
@@ -78,6 +81,11 @@ async def demo_ui():
78
  const response = await fetch('/predict', { method: 'POST', body: formData });
79
  const data = await response.json();
80
 
 
 
 
 
 
81
  document.getElementById('res-label').innerText = data.label;
82
  document.getElementById('res-score').innerText = (data.score * 100).toFixed(2) + '%';
83
  document.getElementById('res-time').innerText = data.inference_time_ms.toFixed(2) + ' ms';
@@ -101,9 +109,23 @@ async def health():
101
 
102
  @app.post("/predict", response_model=PredictionResponse)
103
  async def predict(file: UploadFile = File(...)):
 
104
  if file.content_type not in ALLOWED_CONTENT_TYPES:
105
  raise HTTPException(status_code=415, detail="Unsupported media type")
 
 
106
  image_bytes = await file.read()
 
 
 
 
 
 
107
  loop = asyncio.get_event_loop()
108
- result = await loop.run_in_executor(executor, run_inference, image_bytes)
109
- return result
 
 
 
 
 
 
4
  import asyncio
5
  from app.model import run_inference
6
  from app.schemas import PredictionResponse
7
+ from PIL import UnidentifiedImageError
8
 
9
  app = FastAPI(title="ResNet-18 Image Classifier", version="1.0.0")
10
  executor = ProcessPoolExecutor(max_workers=4)
11
 
12
+ MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB
13
  ALLOWED_CONTENT_TYPES = {"image/jpeg", "image/png", "image/webp", "image/gif"}
14
 
15
  @app.get("/", response_class=HTMLResponse)
16
  async def demo_ui():
17
+ # ... (HTML UI code remains the same)
18
  return """
19
  <!DOCTYPE html>
20
  <html>
 
81
  const response = await fetch('/predict', { method: 'POST', body: formData });
82
  const data = await response.json();
83
 
84
+ if (response.status !== 200) {
85
+ alert(data.detail || 'Prediction failed');
86
+ return;
87
+ }
88
+
89
  document.getElementById('res-label').innerText = data.label;
90
  document.getElementById('res-score').innerText = (data.score * 100).toFixed(2) + '%';
91
  document.getElementById('res-time').innerText = data.inference_time_ms.toFixed(2) + ' ms';
 
109
 
110
  @app.post("/predict", response_model=PredictionResponse)
111
  async def predict(file: UploadFile = File(...)):
112
+ # 1. ตรวจสอบ Content Type
113
  if file.content_type not in ALLOWED_CONTENT_TYPES:
114
  raise HTTPException(status_code=415, detail="Unsupported media type")
115
+
116
+ # 2. อ่านข้อมูล
117
  image_bytes = await file.read()
118
+
119
+ # 3. ตรวจสอบขนาดไฟล์ (Fix สำหรับ test_predict_rejects_oversized_file)
120
+ if len(image_bytes) > MAX_FILE_SIZE:
121
+ raise HTTPException(status_code=413, detail="File too large")
122
+
123
+ # 4. รัน Inference และดักจับ Error (Fix สำหรับ test_predict_rejects_corrupted_file)
124
  loop = asyncio.get_event_loop()
125
+ try:
126
+ result = await loop.run_in_executor(executor, run_inference, image_bytes)
127
+ return result
128
+ except UnidentifiedImageError:
129
+ raise HTTPException(status_code=400, detail="Invalid image file")
130
+ except Exception as e:
131
+ raise HTTPException(status_code=500, detail=f"Inference error: {str(e)}")
jmeter_test_plan.jmx ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.6.3">
3
+ <hashTree>
4
+ <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="ResNet Image Classifier Load Test">
5
+ <elementProp name="TestPlan.user_defined_variables" elementType="Arguments" guiclass="ArgumentsPanel" testclass="Arguments" testname="User Defined Variables">
6
+ <collectionProp name="Arguments.arguments"/>
7
+ </elementProp>
8
+ </TestPlan>
9
+ <hashTree>
10
+ <ThreadGroup guiclass="ThreadGroupGui" testclass="ThreadGroup" testname="Concurrent Users">
11
+ <intProp name="ThreadGroup.num_threads">60</intProp>
12
+ <intProp name="ThreadGroup.ramp_time">10</intProp>
13
+ <longProp name="ThreadGroup.duration">60</longProp>
14
+ <boolProp name="ThreadGroup.same_user_on_next_iteration">true</boolProp>
15
+ <stringProp name="ThreadGroup.on_sample_error">continue</stringProp>
16
+ <elementProp name="ThreadGroup.main_controller" elementType="LoopController" guiclass="LoopControlPanel" testclass="LoopController" testname="Loop Controller">
17
+ <intProp name="LoopController.loops">-1</intProp>
18
+ <boolProp name="LoopController.continue_forever">false</boolProp>
19
+ </elementProp>
20
+ </ThreadGroup>
21
+ <hashTree>
22
+ <HTTPSamplerProxy guiclass="HttpTestSampleGui" testclass="HTTPSamplerProxy" testname="Predict Request">
23
+ <stringProp name="HTTPSampler.domain">127.0.0.1</stringProp>
24
+ <stringProp name="HTTPSampler.port">8000</stringProp>
25
+ <stringProp name="HTTPSampler.protocol">http</stringProp>
26
+ <stringProp name="HTTPSampler.path">/predict</stringProp>
27
+ <boolProp name="HTTPSampler.follow_redirects">true</boolProp>
28
+ <stringProp name="HTTPSampler.method">POST</stringProp>
29
+ <boolProp name="HTTPSampler.use_keepalive">true</boolProp>
30
+ <boolProp name="HTTPSampler.DO_MULTIPART_POST">true</boolProp>
31
+ <elementProp name="HTTPsampler.Files" elementType="HTTPFileArgs">
32
+ <collectionProp name="HTTPFileArgs.files">
33
+ <elementProp name="C:\Yanakorn\works\Assignments\AIE494\finalproject\test.jpg" elementType="HTTPFileArg">
34
+ <stringProp name="File.mimetype">image/jpeg</stringProp>
35
+ <stringProp name="File.path">C:\Yanakorn\works\Assignments\AIE494\finalproject\test.jpg</stringProp>
36
+ <stringProp name="File.paramname">file</stringProp>
37
+ </elementProp>
38
+ </collectionProp>
39
+ </elementProp>
40
+ <boolProp name="HTTPSampler.postBodyRaw">false</boolProp>
41
+ <elementProp name="HTTPsampler.Arguments" elementType="Arguments" guiclass="HTTPArgumentsPanel" testclass="Arguments" testname="User Defined Variables">
42
+ <collectionProp name="Arguments.arguments"/>
43
+ </elementProp>
44
+ </HTTPSamplerProxy>
45
+ <hashTree/>
46
+ <ResultCollector guiclass="ViewResultsFullVisualizer" testclass="ResultCollector" testname="View Results Tree">
47
+ <boolProp name="ResultCollector.error_logging">false</boolProp>
48
+ <objProp>
49
+ <name>saveConfig</name>
50
+ <value class="SampleSaveConfiguration">
51
+ <time>true</time>
52
+ <latency>true</latency>
53
+ <timestamp>true</timestamp>
54
+ <success>true</success>
55
+ <label>true</label>
56
+ <code>true</code>
57
+ <message>true</message>
58
+ <threadName>true</threadName>
59
+ <dataType>true</dataType>
60
+ <encoding>false</encoding>
61
+ <assertions>true</assertions>
62
+ <subresults>true</subresults>
63
+ <responseData>false</responseData>
64
+ <samplerData>false</samplerData>
65
+ <xml>false</xml>
66
+ <fieldNames>true</fieldNames>
67
+ <responseHeaders>false</responseHeaders>
68
+ <requestHeaders>false</requestHeaders>
69
+ <responseDataOnError>false</responseDataOnError>
70
+ <saveAssertionResultsFailureMessage>true</saveAssertionResultsFailureMessage>
71
+ <assertionsResultsToSave>0</assertionsResultsToSave>
72
+ <bytes>true</bytes>
73
+ <sentBytes>true</sentBytes>
74
+ <url>true</url>
75
+ <threadCounts>true</threadCounts>
76
+ <idleTime>true</idleTime>
77
+ <connectTime>true</connectTime>
78
+ </value>
79
+ </objProp>
80
+ <stringProp name="filename"></stringProp>
81
+ </ResultCollector>
82
+ <hashTree/>
83
+ </hashTree>
84
+ <ResultCollector guiclass="SummaryReport" testclass="ResultCollector" testname="Summary Report">
85
+ <boolProp name="ResultCollector.error_logging">false</boolProp>
86
+ <objProp>
87
+ <name>saveConfig</name>
88
+ <value class="SampleSaveConfiguration">
89
+ <time>true</time>
90
+ <latency>true</latency>
91
+ <timestamp>true</timestamp>
92
+ <success>true</success>
93
+ <label>true</label>
94
+ <code>true</code>
95
+ <message>true</message>
96
+ <threadName>true</threadName>
97
+ <dataType>true</dataType>
98
+ <encoding>false</encoding>
99
+ <assertions>true</assertions>
100
+ <subresults>true</subresults>
101
+ <responseData>false</responseData>
102
+ <samplerData>false</samplerData>
103
+ <xml>false</xml>
104
+ <fieldNames>true</fieldNames>
105
+ <responseHeaders>false</responseHeaders>
106
+ <requestHeaders>false</requestHeaders>
107
+ <responseDataOnError>false</responseDataOnError>
108
+ <saveAssertionResultsFailureMessage>true</saveAssertionResultsFailureMessage>
109
+ <assertionsResultsToSave>0</assertionsResultsToSave>
110
+ <bytes>true</bytes>
111
+ <sentBytes>true</sentBytes>
112
+ <url>true</url>
113
+ <threadCounts>true</threadCounts>
114
+ <idleTime>true</idleTime>
115
+ <connectTime>true</connectTime>
116
+ </value>
117
+ </objProp>
118
+ <stringProp name="filename"></stringProp>
119
+ </ResultCollector>
120
+ <hashTree/>
121
+ </hashTree>
122
+ </hashTree>
123
+ </jmeterTestPlan>
postman_collection.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "info": {
3
+ "_postman_id": "8923a12b-7c45-4b2e-9d2a-8c9d9e9f9a9b",
4
+ "name": "ResNet-18 Image Classifier",
5
+ "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json"
6
+ },
7
+ "item": [
8
+ {
9
+ "name": "Health Check",
10
+ "request": {
11
+ "method": "GET",
12
+ "header": [],
13
+ "url": {
14
+ "raw": "{{baseUrl}}/health",
15
+ "host": [
16
+ "{{baseUrl}}"
17
+ ],
18
+ "path": [
19
+ "health"
20
+ ]
21
+ }
22
+ },
23
+ "response": []
24
+ },
25
+ {
26
+ "name": "Predict Image",
27
+ "request": {
28
+ "method": "POST",
29
+ "header": [
30
+ {
31
+ "key": "accept",
32
+ "value": "application/json"
33
+ }
34
+ ],
35
+ "body": {
36
+ "mode": "formdata",
37
+ "formdata": [
38
+ {
39
+ "key": "file",
40
+ "type": "file",
41
+ "src": ""
42
+ }
43
+ ]
44
+ },
45
+ "url": {
46
+ "raw": "{{baseUrl}}/predict",
47
+ "host": [
48
+ "{{baseUrl}}"
49
+ ],
50
+ "path": [
51
+ "predict"
52
+ ]
53
+ }
54
+ },
55
+ "response": []
56
+ }
57
+ ],
58
+ "variable": [
59
+ {
60
+ "key": "baseUrl",
61
+ "value": "http://localhost:8000",
62
+ "type": "string"
63
+ }
64
+ ]
65
+ }
scripts/01_baseline_test.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoImageProcessor, ResNetForImageClassification
2
+ import torch
3
+ import time
4
+ import os
5
+ from PIL import Image
6
+
7
+ model_id = "microsoft/resnet-18"
8
+ processor = AutoImageProcessor.from_pretrained(model_id)
9
+ model = ResNetForImageClassification.from_pretrained(model_id)
10
+ model.eval()
11
+
12
+ # Create test image if not exists
13
+ if not os.path.exists("test.jpg"):
14
+ img = Image.new("RGB", (224, 224), color=(128, 64, 32))
15
+ img.save("test.jpg")
16
+
17
+ # Measure Baseline Latency (100 runs)
18
+ img = Image.open("test.jpg").convert("RGB")
19
+ inputs = processor(images=img, return_tensors="pt")
20
+
21
+ times = []
22
+ with torch.no_grad():
23
+ for _ in range(100):
24
+ t0 = time.perf_counter()
25
+ _ = model(**inputs)
26
+ times.append(time.perf_counter() - t0)
27
+
28
+ print(f"Baseline Latency (avg): {sum(times)/len(times)*1000:.2f} ms")
29
+ print(f"P95 Latency: {sorted(times)[94]*1000:.2f} ms")
30
+
31
+ # Save model for size measurement
32
+ model.save_pretrained("./pytorch_model")
33
+ model_size = sum(os.path.getsize(os.path.join("./pytorch_model", f))
34
+ for f in os.listdir("./pytorch_model") if f.endswith(".bin"))
35
+ print(f"Model Size: {model_size/1e6:.2f} MB")
scripts/02_export_onnx.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoImageProcessor, ResNetForImageClassification
3
+ import os
4
+
5
+ model_id = "microsoft/resnet-18"
6
+ processor = AutoImageProcessor.from_pretrained(model_id)
7
+ model = ResNetForImageClassification.from_pretrained(model_id).eval()
8
+
9
+ # Ensure models directory exists
10
+ os.makedirs("models", exist_ok=True)
11
+
12
+ # Use dummy input for tracing
13
+ dummy = torch.randn(1, 3, 224, 224)
14
+
15
+ # Export using the legacy approach (Tracing) which is more stable for quantization tools
16
+ print("Exporting model to ONNX using legacy tracing...")
17
+ torch.onnx.export(
18
+ model,
19
+ dummy,
20
+ "models/resnet18.onnx",
21
+ export_params=True,
22
+ opset_version=18, # Use Opset 11 for better compatibility with quantization
23
+ do_constant_folding=True,
24
+ input_names=["pixel_values"],
25
+ output_names=["logits"],
26
+ dynamic_axes={"pixel_values": {0: "batch_size"}, "logits": {0: "batch_size"}},
27
+ )
28
+
29
+ print("ONNX exported successfully to models/resnet18.onnx")
scripts/03_quantize.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import onnx
2
+ import onnx.shape_inference
3
+ from onnxruntime.quantization import quantize_dynamic, QuantType
4
+ import os
5
+
6
+ # --- Monkey Patch onnx.shape_inference to bypass strict checks ---
7
+ original_infer_shapes_path = onnx.shape_inference.infer_shapes_path
8
+
9
+ def patched_infer_shapes_path(model_path, output_path=None, check_type=False, strict_mode=False, data_prop=False):
10
+ try:
11
+ # Run in non-strict mode
12
+ return original_infer_shapes_path(model_path, output_path, check_type, False, data_prop)
13
+ except Exception:
14
+ if output_path:
15
+ import shutil
16
+ shutil.copy(model_path, output_path)
17
+
18
+ onnx.shape_inference.infer_shapes_path = patched_infer_shapes_path
19
+ # --------------------------------------------------------------------------
20
+
21
+ model_path = "models/resnet18.onnx"
22
+ quantized_path = "models/resnet18_quantized.onnx"
23
+
24
+ print(f"Quantizing model: {model_path}...")
25
+ try:
26
+ quantize_dynamic(
27
+ model_input=model_path,
28
+ model_output=quantized_path,
29
+ weight_type=QuantType.QUInt8,
30
+ extra_options={
31
+ 'EnableShapeInference': False,
32
+ 'DefaultTensorType': onnx.TensorProto.FLOAT # <--- เพิ่มตัวนี้เพื่อแก้ Error ล่าสุด
33
+ }
34
+ )
35
+ except Exception as e:
36
+ print(f"Quantization failed: {e}")
37
+
38
+ if os.path.exists(quantized_path):
39
+ print(f"Success: {quantized_path} created. Size: {os.path.getsize(quantized_path)/1e6:.2f} MB")
40
+ else:
41
+ # Try one more time with a very minimal set of options
42
+ print("Trying one last alternative...")
43
+ quantize_dynamic(
44
+ model_input=model_path,
45
+ model_output=quantized_path,
46
+ weight_type=QuantType.QUInt8,
47
+ # Minimal options
48
+ )
49
+
50
+ if os.path.exists(quantized_path):
51
+ print(f"Success on second attempt: {quantized_path}")
scripts/04_benchmark_onnx.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import onnxruntime as ort
2
+ import numpy as np
3
+ import time
4
+ import os
5
+ from PIL import Image
6
+ from transformers import AutoImageProcessor
7
+
8
+ # Load extractor
9
+ processor = AutoImageProcessor.from_pretrained("microsoft/resnet-18")
10
+
11
+ # Test both models
12
+ models = {
13
+ "ONNX": "models/resnet18.onnx",
14
+ "ONNX Quantized": "models/resnet18_quantized.onnx"
15
+ }
16
+
17
+ # Create test image if not exists
18
+ if not os.path.exists("test.jpg"):
19
+ img = Image.new("RGB", (224, 224), color=(128, 64, 32))
20
+ img.save("test.jpg")
21
+
22
+ img = Image.open("test.jpg").convert("RGB")
23
+ inputs = processor(images=img, return_tensors="np")
24
+ pixel_values = inputs["pixel_values"].astype(np.float32)
25
+
26
+ for name, model_path in models.items():
27
+ if not os.path.exists(model_path):
28
+ print(f"Skipping {name}: {model_path} not found")
29
+ continue
30
+
31
+ session = ort.InferenceSession(model_path, providers=["CPUExecutionProvider"])
32
+
33
+ times = []
34
+ for _ in range(100):
35
+ t0 = time.perf_counter()
36
+ _ = session.run(["logits"], {"pixel_values": pixel_values})
37
+ times.append(time.perf_counter() - t0)
38
+
39
+ print(f"\n{name}:")
40
+ print(f" Avg Latency: {sum(times)/len(times)*1000:.2f} ms")
41
+ print(f" P95 Latency: {sorted(times)[94]*1000:.2f} ms")
42
+ print(f" File Size: {os.path.getsize(model_path)/1e6:.2f} MB")
sym_shape_infer_temp.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:736f6cb91a0ae100eaeb13aa7842f5e718ab67b101e2c44115f9d9fbf87e80b3
3
+ size 179747
tests/conftest.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+
4
+ # Add the project root to PYTHONPATH
5
+ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
tests/test_api.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ from fastapi.testclient import TestClient
3
+ from app.main import app
4
+ from PIL import Image
5
+ import io
6
+
7
+ client = TestClient(app)
8
+
9
+ # --- Helper ---
10
+ def get_test_image() -> bytes:
11
+ """Create a dummy test image"""
12
+ img = Image.new("RGB", (224, 224), color=(128, 64, 32))
13
+ buf = io.BytesIO()
14
+ img.save(buf, format="JPEG")
15
+ return buf.getvalue()
16
+
17
+
18
+ # --- Tests ---
19
+
20
+ def test_health_endpoint():
21
+ res = client.get("/health")
22
+ assert res.status_code == 200
23
+ assert res.json() == {"status": "ok"}
24
+
25
+
26
+ def test_predict_returns_valid_json():
27
+ img_bytes = get_test_image()
28
+ res = client.post(
29
+ "/predict",
30
+ files={"file": ("test.jpg", img_bytes, "image/jpeg")}
31
+ )
32
+ assert res.status_code == 200
33
+ data = res.json()
34
+ assert "label" in data
35
+ assert "score" in data
36
+ assert isinstance(data["score"], float)
37
+ assert 0.0 <= data["score"] <= 1.0
38
+
39
+
40
+ def test_predict_rejects_non_image():
41
+ res = client.post(
42
+ "/predict",
43
+ files={"file": ("test.txt", b"not an image", "text/plain")}
44
+ )
45
+ assert res.status_code == 415
46
+
47
+
48
+ def test_predict_rejects_corrupted_file():
49
+ res = client.post(
50
+ "/predict",
51
+ files={"file": ("bad.jpg", b"\xff\xd8corrupted", "image/jpeg")}
52
+ )
53
+ assert res.status_code == 400
54
+
55
+
56
+ def test_predict_rejects_oversized_file():
57
+ huge = b"A" * (11 * 1024 * 1024) # 11MB
58
+ res = client.post(
59
+ "/predict",
60
+ files={"file": ("big.jpg", huge, "image/jpeg")}
61
+ )
62
+ assert res.status_code == 413