Upload folder using huggingface_hub
Browse files- .dockerignore +15 -0
- .gitattributes +2 -0
- .gitignore +56 -0
- .pytest_cache/.gitignore +2 -0
- .pytest_cache/CACHEDIR.TAG +4 -0
- .pytest_cache/README.md +8 -0
- .pytest_cache/v/cache/nodeids +7 -0
- 0a2152e6-4aef-11f1-b222-345a603e44a9.data +3 -0
- 72a8c263-4aef-11f1-aad7-345a603e44a9.data +3 -0
- README.md +184 -10
- SKILL.md +495 -0
- app/main.py +24 -2
- jmeter_test_plan.jmx +123 -0
- postman_collection.json +65 -0
- scripts/01_baseline_test.py +35 -0
- scripts/02_export_onnx.py +29 -0
- scripts/03_quantize.py +51 -0
- scripts/04_benchmark_onnx.py +42 -0
- sym_shape_infer_temp.onnx +3 -0
- tests/conftest.py +5 -0
- tests/test_api.py +62 -0
.dockerignore
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
tests/
|
| 2 |
+
.git/
|
| 3 |
+
.github/
|
| 4 |
+
*.pt
|
| 5 |
+
*.pth
|
| 6 |
+
__pycache__/
|
| 7 |
+
*.pyc
|
| 8 |
+
*.pyo
|
| 9 |
+
*.pyd
|
| 10 |
+
.pytest_cache/
|
| 11 |
+
.venv/
|
| 12 |
+
venv/
|
| 13 |
+
*.md
|
| 14 |
+
.gitignore
|
| 15 |
+
scripts/
|
.gitattributes
CHANGED
|
@@ -34,3 +34,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
models/resnet18.onnx.data filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
models/resnet18.onnx.data filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
0a2152e6-4aef-11f1-b222-345a603e44a9.data filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
72a8c263-4aef-11f1-aad7-345a603e44a9.data filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
*.so
|
| 6 |
+
.Python
|
| 7 |
+
build/
|
| 8 |
+
develop-eggs/
|
| 9 |
+
dist/
|
| 10 |
+
downloads/
|
| 11 |
+
eggs/
|
| 12 |
+
.eggs/
|
| 13 |
+
lib/
|
| 14 |
+
lib64/
|
| 15 |
+
parts/
|
| 16 |
+
sdist/
|
| 17 |
+
var/
|
| 18 |
+
wheels/
|
| 19 |
+
*.egg-info/
|
| 20 |
+
.installed.cfg
|
| 21 |
+
*.egg
|
| 22 |
+
|
| 23 |
+
# Virtual Environment
|
| 24 |
+
venv/
|
| 25 |
+
env/
|
| 26 |
+
ENV/
|
| 27 |
+
.venv
|
| 28 |
+
|
| 29 |
+
# IDE
|
| 30 |
+
.vscode/
|
| 31 |
+
.idea/
|
| 32 |
+
*.swp
|
| 33 |
+
*.swo
|
| 34 |
+
*~
|
| 35 |
+
|
| 36 |
+
# Testing
|
| 37 |
+
.pytest_cache/
|
| 38 |
+
.coverage
|
| 39 |
+
htmlcov/
|
| 40 |
+
.tox/
|
| 41 |
+
|
| 42 |
+
# Models
|
| 43 |
+
*.pt
|
| 44 |
+
*.pth
|
| 45 |
+
*.bin
|
| 46 |
+
pytorch_model/
|
| 47 |
+
|
| 48 |
+
# Logs
|
| 49 |
+
*.log
|
| 50 |
+
|
| 51 |
+
# OS
|
| 52 |
+
.DS_Store
|
| 53 |
+
Thumbs.db
|
| 54 |
+
|
| 55 |
+
# Test files
|
| 56 |
+
test.jpg
|
.pytest_cache/.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Created by pytest automatically.
|
| 2 |
+
*
|
.pytest_cache/CACHEDIR.TAG
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Signature: 8a477f597d28d172789f06886806bc55
|
| 2 |
+
# This file is a cache directory tag created by pytest.
|
| 3 |
+
# For information about cache directory tags, see:
|
| 4 |
+
# https://bford.info/cachedir/spec.html
|
.pytest_cache/README.md
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# pytest cache directory #
|
| 2 |
+
|
| 3 |
+
This directory contains data from the pytest's cache plugin,
|
| 4 |
+
which provides the `--lf` and `--ff` options, as well as the `cache` fixture.
|
| 5 |
+
|
| 6 |
+
**Do not** commit this to version control.
|
| 7 |
+
|
| 8 |
+
See [the docs](https://docs.pytest.org/en/stable/how-to/cache.html) for more information.
|
.pytest_cache/v/cache/nodeids
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
"tests/test_api.py::test_health_endpoint",
|
| 3 |
+
"tests/test_api.py::test_predict_rejects_corrupted_file",
|
| 4 |
+
"tests/test_api.py::test_predict_rejects_non_image",
|
| 5 |
+
"tests/test_api.py::test_predict_rejects_oversized_file",
|
| 6 |
+
"tests/test_api.py::test_predict_returns_valid_json"
|
| 7 |
+
]
|
0a2152e6-4aef-11f1-b222-345a603e44a9.data
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:237ab7da3d82e3a5e7fbd88cb146e9ba328e7492c4c21d65b131002249cb6979
|
| 3 |
+
size 46735008
|
72a8c263-4aef-11f1-aad7-345a603e44a9.data
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:237ab7da3d82e3a5e7fbd88cb146e9ba328e7492c4c21d65b131002249cb6979
|
| 3 |
+
size 46735008
|
README.md
CHANGED
|
@@ -1,10 +1,184 @@
|
|
| 1 |
-
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
-
|
| 9 |
-
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# High-Throughput Image Classification Service
|
| 2 |
+
|
| 3 |
+
A production-ready image classification API using ResNet-18 with ONNX optimization, FastAPI, and CI/CD pipeline.
|
| 4 |
+
|
| 5 |
+
## Features
|
| 6 |
+
|
| 7 |
+
- **Optimized Model**: ResNet-18 converted to ONNX with dynamic quantization (~70% size reduction)
|
| 8 |
+
- **High Performance**: ProcessPoolExecutor for concurrent request handling
|
| 9 |
+
- **Production Ready**: Docker containerization, comprehensive error handling
|
| 10 |
+
- **CI/CD Pipeline**: Automated testing and deployment to Hugging Face Spaces
|
| 11 |
+
- **Comprehensive Testing**: pytest unit tests with 100% endpoint coverage
|
| 12 |
+
|
| 13 |
+
## Project Structure
|
| 14 |
+
|
| 15 |
+
```
|
| 16 |
+
image-classification-service/
|
| 17 |
+
├── app/
|
| 18 |
+
│ ├── __init__.py
|
| 19 |
+
│ ├── main.py # FastAPI application
|
| 20 |
+
│ ├── model.py # ONNX inference logic
|
| 21 |
+
│ └── schemas.py # Pydantic models
|
| 22 |
+
├── models/
|
| 23 |
+
│ └── resnet18_quantized.onnx # Optimized model
|
| 24 |
+
├── tests/
|
| 25 |
+
│ └── test_api.py # Unit tests
|
| 26 |
+
├── scripts/
|
| 27 |
+
│ ├── 01_baseline_test.py # PyTorch baseline benchmark
|
| 28 |
+
│ ├── 02_export_onnx.py # Export to ONNX
|
| 29 |
+
│ ├── 03_quantize.py # Dynamic quantization
|
| 30 |
+
│ └── 04_benchmark_onnx.py # ONNX benchmark
|
| 31 |
+
├── .github/
|
| 32 |
+
│ └── workflows/
|
| 33 |
+
│ └── ci-cd.yml # GitHub Actions pipeline
|
| 34 |
+
├── Dockerfile
|
| 35 |
+
├── .dockerignore
|
| 36 |
+
├── requirements.txt
|
| 37 |
+
└── README.md
|
| 38 |
+
```
|
| 39 |
+
|
| 40 |
+
## Quick Start
|
| 41 |
+
|
| 42 |
+
### 1. Install Dependencies
|
| 43 |
+
|
| 44 |
+
```bash
|
| 45 |
+
pip install -r requirements.txt
|
| 46 |
+
```
|
| 47 |
+
|
| 48 |
+
### 2. Prepare the Model
|
| 49 |
+
|
| 50 |
+
Run the optimization scripts in order:
|
| 51 |
+
|
| 52 |
+
```bash
|
| 53 |
+
cd scripts
|
| 54 |
+
python 01_baseline_test.py # Measure PyTorch baseline
|
| 55 |
+
python 02_export_onnx.py # Export to ONNX
|
| 56 |
+
python 03_quantize.py # Apply quantization
|
| 57 |
+
python 04_benchmark_onnx.py # Compare performance
|
| 58 |
+
cd ..
|
| 59 |
+
```
|
| 60 |
+
|
| 61 |
+
### 3. Run the API
|
| 62 |
+
|
| 63 |
+
```bash
|
| 64 |
+
uvicorn app.main:app --host 0.0.0.0 --port 7860
|
| 65 |
+
```
|
| 66 |
+
|
| 67 |
+
### 4. Test the API
|
| 68 |
+
|
| 69 |
+
```bash
|
| 70 |
+
# Health check
|
| 71 |
+
curl http://localhost:7860/health
|
| 72 |
+
|
| 73 |
+
# Predict
|
| 74 |
+
curl -X POST "http://localhost:7860/predict" \
|
| 75 |
+
-H "accept: application/json" \
|
| 76 |
+
-F "file=@/path/to/image.jpg"
|
| 77 |
+
```
|
| 78 |
+
|
| 79 |
+
## Docker Deployment
|
| 80 |
+
|
| 81 |
+
### Build and Run
|
| 82 |
+
|
| 83 |
+
```bash
|
| 84 |
+
docker build -t image-classifier .
|
| 85 |
+
docker run -p 7860:7860 image-classifier
|
| 86 |
+
```
|
| 87 |
+
|
| 88 |
+
## Testing
|
| 89 |
+
|
| 90 |
+
```bash
|
| 91 |
+
pytest tests/ -v
|
| 92 |
+
```
|
| 93 |
+
|
| 94 |
+
## API Endpoints
|
| 95 |
+
|
| 96 |
+
### GET /health
|
| 97 |
+
|
| 98 |
+
Health check endpoint.
|
| 99 |
+
|
| 100 |
+
**Response:**
|
| 101 |
+
```json
|
| 102 |
+
{
|
| 103 |
+
"status": "ok"
|
| 104 |
+
}
|
| 105 |
+
```
|
| 106 |
+
|
| 107 |
+
### POST /predict
|
| 108 |
+
|
| 109 |
+
Image classification endpoint.
|
| 110 |
+
|
| 111 |
+
**Request:**
|
| 112 |
+
- Content-Type: `multipart/form-data`
|
| 113 |
+
- Body: `file` (image file)
|
| 114 |
+
|
| 115 |
+
**Response:**
|
| 116 |
+
```json
|
| 117 |
+
{
|
| 118 |
+
"label": "tabby, tabby cat",
|
| 119 |
+
"score": 0.8234,
|
| 120 |
+
"label_id": 281,
|
| 121 |
+
"inference_time_ms": 45.123
|
| 122 |
+
}
|
| 123 |
+
```
|
| 124 |
+
|
| 125 |
+
**Error Codes:**
|
| 126 |
+
- `400`: Corrupted or invalid image
|
| 127 |
+
- `413`: File too large (max 10MB)
|
| 128 |
+
- `415`: Unsupported media type
|
| 129 |
+
- `500`: Inference error
|
| 130 |
+
|
| 131 |
+
## Performance Metrics
|
| 132 |
+
|
| 133 |
+
| Format | File Size | Avg Latency | P95 Latency |
|
| 134 |
+
|--------|-----------|-------------|-------------|
|
| 135 |
+
| PyTorch | ~45 MB | baseline | baseline |
|
| 136 |
+
| ONNX | ~45 MB | ~20% faster | - |
|
| 137 |
+
| ONNX Quantized | ~12 MB | ~40% faster | - |
|
| 138 |
+
|
| 139 |
+
*Run benchmark scripts to get actual measurements on your hardware*
|
| 140 |
+
|
| 141 |
+
## CI/CD Pipeline
|
| 142 |
+
|
| 143 |
+
The GitHub Actions workflow automatically:
|
| 144 |
+
1. Runs unit tests on every push/PR
|
| 145 |
+
2. Deploys to Hugging Face Spaces on main branch (requires `HF_TOKEN` secret)
|
| 146 |
+
|
| 147 |
+
### Setup Hugging Face Deployment
|
| 148 |
+
|
| 149 |
+
1. Create a Hugging Face Space
|
| 150 |
+
2. Generate an access token with write permissions
|
| 151 |
+
3. Add `HF_TOKEN` to GitHub repository secrets
|
| 152 |
+
4. Update `.github/workflows/ci-cd.yml` with your Space URL
|
| 153 |
+
|
| 154 |
+
## Model Details
|
| 155 |
+
|
| 156 |
+
- **Base Model**: microsoft/resnet-18 (Hugging Face)
|
| 157 |
+
- **Task**: Image Classification (ImageNet-1k)
|
| 158 |
+
- **Input**: RGB images (224x224)
|
| 159 |
+
- **Output**: 1000 class probabilities
|
| 160 |
+
- **Optimization**: ONNX + Dynamic Quantization (QUint8)
|
| 161 |
+
|
| 162 |
+
## Development
|
| 163 |
+
|
| 164 |
+
### Adding New Features
|
| 165 |
+
|
| 166 |
+
1. Update code in `app/`
|
| 167 |
+
2. Add tests in `tests/`
|
| 168 |
+
3. Run tests: `pytest tests/ -v`
|
| 169 |
+
4. Update documentation
|
| 170 |
+
|
| 171 |
+
### Performance Testing
|
| 172 |
+
|
| 173 |
+
Use JMeter or similar tools to test throughput:
|
| 174 |
+
- Concurrent users: 10, 50, 100
|
| 175 |
+
- Measure: TPS, P95 latency, error rate
|
| 176 |
+
|
| 177 |
+
## License
|
| 178 |
+
|
| 179 |
+
MIT
|
| 180 |
+
|
| 181 |
+
## Acknowledgments
|
| 182 |
+
|
| 183 |
+
- Model: microsoft/resnet-18 from Hugging Face
|
| 184 |
+
- Framework: FastAPI, ONNX Runtime
|
SKILL.md
ADDED
|
@@ -0,0 +1,495 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
name: image-classification-mlops
|
| 3 |
+
description: >
|
| 4 |
+
ทักษะสำหรับพัฒนาระบบ High-Throughput Image Classification Service ครบวงจร ตั้งแต่
|
| 5 |
+
Model Optimization, FastAPI Development, CI/CD Pipeline จนถึง Performance Testing
|
| 6 |
+
โดยใช้โมเดล microsoft/resnet-18 จาก Hugging Face
|
| 7 |
+
|
| 8 |
+
ใช้ skill นี้เมื่อ:
|
| 9 |
+
- ต้องการ Optimize โมเดล (ONNX Conversion + Dynamic Quantization)
|
| 10 |
+
- สร้าง FastAPI ที่รองรับ Concurrent Request ด้วย ProcessPoolExecutor
|
| 11 |
+
- เขียน Dockerfile สำหรับ Production
|
| 12 |
+
- ตั้งค่า GitHub Actions CI/CD → Deploy ไป Hugging Face Spaces
|
| 13 |
+
- เขียน pytest Unit Tests สำหรับ /predict endpoint
|
| 14 |
+
- วิเคราะห์ผล JMeter Load Test (Throughput / P95 Latency)
|
| 15 |
+
- เขียน Project Report หรือสร้าง System Architecture Diagram
|
| 16 |
+
---
|
| 17 |
+
|
| 18 |
+
# High-Throughput Image Classification Service — MLOps Skill
|
| 19 |
+
|
| 20 |
+
## ภาพรวมโปรเจกต์
|
| 21 |
+
|
| 22 |
+
| Phase | เนื้อหา |
|
| 23 |
+
|---|---|
|
| 24 |
+
| 1. Model Optimization | ResNet-18 → ONNX → Dynamic Quantization |
|
| 25 |
+
| 2. API Development | FastAPI + ProcessPoolExecutor + Pydantic |
|
| 26 |
+
| 3. Automation & CI/CD | pytest + GitHub Actions + HF Spaces Deploy |
|
| 27 |
+
| 4. Performance Testing | JMeter Load Test + TPS/P95 Analysis |
|
| 28 |
+
|
| 29 |
+
**โมเดลหลัก:** `microsoft/resnet-18` (Hugging Face)
|
| 30 |
+
**Stack:** Python 3.11, FastAPI, ONNX Runtime, Transformers, Docker, GitHub Actions
|
| 31 |
+
|
| 32 |
+
---
|
| 33 |
+
|
| 34 |
+
## Phase 1 — Model Optimization
|
| 35 |
+
|
| 36 |
+
### 1.1 Baseline Test (Original PyTorch)
|
| 37 |
+
|
| 38 |
+
```python
|
| 39 |
+
from transformers import AutoFeatureExtractor, ResNetForImageClassification
|
| 40 |
+
import torch, time, os
|
| 41 |
+
from PIL import Image
|
| 42 |
+
|
| 43 |
+
model_id = "microsoft/resnet-18"
|
| 44 |
+
extractor = AutoFeatureExtractor.from_pretrained(model_id)
|
| 45 |
+
model = ResNetForImageClassification.from_pretrained(model_id)
|
| 46 |
+
model.eval()
|
| 47 |
+
|
| 48 |
+
# วัด Baseline Latency (100 runs)
|
| 49 |
+
img = Image.open("test.jpg").convert("RGB")
|
| 50 |
+
inputs = extractor(images=img, return_tensors="pt")
|
| 51 |
+
|
| 52 |
+
times = []
|
| 53 |
+
with torch.no_grad():
|
| 54 |
+
for _ in range(100):
|
| 55 |
+
t0 = time.perf_counter()
|
| 56 |
+
_ = model(**inputs)
|
| 57 |
+
times.append(time.perf_counter() - t0)
|
| 58 |
+
|
| 59 |
+
print(f"Baseline Latency (avg): {sum(times)/len(times)*1000:.2f} ms")
|
| 60 |
+
print(f"Model Size: {os.path.getsize('pytorch_model.bin')/1e6:.2f} MB")
|
| 61 |
+
```
|
| 62 |
+
|
| 63 |
+
### 1.2 Export to ONNX
|
| 64 |
+
|
| 65 |
+
```python
|
| 66 |
+
import torch
|
| 67 |
+
from transformers import AutoFeatureExtractor, ResNetForImageClassification
|
| 68 |
+
|
| 69 |
+
model_id = "microsoft/resnet-18"
|
| 70 |
+
extractor = AutoFeatureExtractor.from_pretrained(model_id)
|
| 71 |
+
model = ResNetForImageClassification.from_pretrained(model_id).eval()
|
| 72 |
+
|
| 73 |
+
dummy = torch.randn(1, 3, 224, 224)
|
| 74 |
+
|
| 75 |
+
torch.onnx.export(
|
| 76 |
+
model,
|
| 77 |
+
dummy,
|
| 78 |
+
"resnet18.onnx",
|
| 79 |
+
input_names=["pixel_values"],
|
| 80 |
+
output_names=["logits"],
|
| 81 |
+
dynamic_axes={"pixel_values": {0: "batch_size"}},
|
| 82 |
+
opset_version=17,
|
| 83 |
+
)
|
| 84 |
+
print("ONNX exported successfully")
|
| 85 |
+
```
|
| 86 |
+
|
| 87 |
+
### 1.3 Dynamic Quantization
|
| 88 |
+
|
| 89 |
+
```python
|
| 90 |
+
from onnxruntime.quantization import quantize_dynamic, QuantType
|
| 91 |
+
|
| 92 |
+
quantize_dynamic(
|
| 93 |
+
model_input="resnet18.onnx",
|
| 94 |
+
model_output="resnet18_quantized.onnx",
|
| 95 |
+
weight_type=QuantType.QUint8,
|
| 96 |
+
)
|
| 97 |
+
print("Quantization complete")
|
| 98 |
+
```
|
| 99 |
+
|
| 100 |
+
### 1.4 ตารางเปรียบเทียบ (บันทึกผลจริงลงตาราง)
|
| 101 |
+
|
| 102 |
+
| Format | File Size (MB) | Avg Latency (ms) | P95 Latency (ms) |
|
| 103 |
+
|---|---|---|---|
|
| 104 |
+
| Original (PyTorch) | ~45 | baseline | baseline |
|
| 105 |
+
| ONNX | ~45 | คาดว่าเร็วขึ้น ~20% | - |
|
| 106 |
+
| ONNX Quantized | ~12 | คาดว่าเร็วขึ้น ~40% | - |
|
| 107 |
+
|
| 108 |
+
> **วิธีวัด:** รัน 100 ครั้ง → เก็บค่า avg และ percentile ด้วย `numpy.percentile(times, 95)`
|
| 109 |
+
|
| 110 |
+
---
|
| 111 |
+
|
| 112 |
+
## Phase 2 — API Development
|
| 113 |
+
|
| 114 |
+
### 2.1 โครงสร้างโปรเจกต์
|
| 115 |
+
|
| 116 |
+
```
|
| 117 |
+
image-classification-service/
|
| 118 |
+
├── app/
|
| 119 |
+
│ ├── main.py # FastAPI app
|
| 120 |
+
│ ├── model.py # ONNX inference logic
|
| 121 |
+
│ └── schemas.py # Pydantic models
|
| 122 |
+
├── models/
|
| 123 |
+
│ └── resnet18_quantized.onnx
|
| 124 |
+
├── tests/
|
| 125 |
+
│ └── test_api.py
|
| 126 |
+
├── .github/
|
| 127 |
+
│ └── workflows/
|
| 128 |
+
│ └── ci-cd.yml
|
| 129 |
+
├── Dockerfile
|
| 130 |
+
├── requirements.txt
|
| 131 |
+
└── README.md
|
| 132 |
+
```
|
| 133 |
+
|
| 134 |
+
### 2.2 Pydantic Schemas (`app/schemas.py`)
|
| 135 |
+
|
| 136 |
+
```python
|
| 137 |
+
from pydantic import BaseModel
|
| 138 |
+
from typing import Optional
|
| 139 |
+
|
| 140 |
+
class PredictionResponse(BaseModel):
|
| 141 |
+
label: str
|
| 142 |
+
score: float
|
| 143 |
+
label_id: int
|
| 144 |
+
inference_time_ms: float
|
| 145 |
+
|
| 146 |
+
class ErrorResponse(BaseModel):
|
| 147 |
+
detail: str
|
| 148 |
+
error_code: str
|
| 149 |
+
```
|
| 150 |
+
|
| 151 |
+
### 2.3 ONNX Inference (`app/model.py`)
|
| 152 |
+
|
| 153 |
+
```python
|
| 154 |
+
import onnxruntime as ort
|
| 155 |
+
import numpy as np
|
| 156 |
+
from PIL import Image
|
| 157 |
+
import io, time
|
| 158 |
+
|
| 159 |
+
# Labels จาก ImageNet
|
| 160 |
+
from transformers import AutoFeatureExtractor
|
| 161 |
+
extractor = AutoFeatureExtractor.from_pretrained("microsoft/resnet-18")
|
| 162 |
+
|
| 163 |
+
# โหลด session ครั้งเดียว (module-level)
|
| 164 |
+
session = ort.InferenceSession(
|
| 165 |
+
"models/resnet18_quantized.onnx",
|
| 166 |
+
providers=["CPUExecutionProvider"]
|
| 167 |
+
)
|
| 168 |
+
|
| 169 |
+
def run_inference(image_bytes: bytes) -> dict:
|
| 170 |
+
img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
|
| 171 |
+
inputs = extractor(images=img, return_tensors="np")
|
| 172 |
+
pixel_values = inputs["pixel_values"].astype(np.float32)
|
| 173 |
+
|
| 174 |
+
t0 = time.perf_counter()
|
| 175 |
+
outputs = session.run(["logits"], {"pixel_values": pixel_values})
|
| 176 |
+
elapsed = (time.perf_counter() - t0) * 1000
|
| 177 |
+
|
| 178 |
+
logits = outputs[0][0]
|
| 179 |
+
probs = np.exp(logits) / np.sum(np.exp(logits))
|
| 180 |
+
label_id = int(np.argmax(probs))
|
| 181 |
+
|
| 182 |
+
# ดึง label จาก model config
|
| 183 |
+
from transformers import ResNetForImageClassification
|
| 184 |
+
cfg = ResNetForImageClassification.from_pretrained("microsoft/resnet-18").config
|
| 185 |
+
label = cfg.id2label.get(label_id, str(label_id))
|
| 186 |
+
|
| 187 |
+
return {
|
| 188 |
+
"label": label,
|
| 189 |
+
"score": float(probs[label_id]),
|
| 190 |
+
"label_id": label_id,
|
| 191 |
+
"inference_time_ms": round(elapsed, 3),
|
| 192 |
+
}
|
| 193 |
+
```
|
| 194 |
+
|
| 195 |
+
### 2.4 FastAPI Main App (`app/main.py`)
|
| 196 |
+
|
| 197 |
+
```python
|
| 198 |
+
from fastapi import FastAPI, File, UploadFile, HTTPException
|
| 199 |
+
from concurrent.futures import ProcessPoolExecutor
|
| 200 |
+
import asyncio
|
| 201 |
+
from app.model import run_inference
|
| 202 |
+
from app.schemas import PredictionResponse
|
| 203 |
+
|
| 204 |
+
app = FastAPI(title="ResNet-18 Image Classifier", version="1.0.0")
|
| 205 |
+
executor = ProcessPoolExecutor(max_workers=4)
|
| 206 |
+
|
| 207 |
+
MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB
|
| 208 |
+
ALLOWED_CONTENT_TYPES = {"image/jpeg", "image/png", "image/webp", "image/gif"}
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
@app.get("/health")
|
| 212 |
+
async def health():
|
| 213 |
+
return {"status": "ok"}
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
@app.post("/predict", response_model=PredictionResponse)
|
| 217 |
+
async def predict(file: UploadFile = File(...)):
|
| 218 |
+
# Validate content type
|
| 219 |
+
if file.content_type not in ALLOWED_CONTENT_TYPES:
|
| 220 |
+
raise HTTPException(
|
| 221 |
+
status_code=415,
|
| 222 |
+
detail=f"Unsupported media type: {file.content_type}. Allowed: {ALLOWED_CONTENT_TYPES}"
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
image_bytes = await file.read()
|
| 226 |
+
|
| 227 |
+
# Validate file size
|
| 228 |
+
if len(image_bytes) > MAX_FILE_SIZE:
|
| 229 |
+
raise HTTPException(
|
| 230 |
+
status_code=413,
|
| 231 |
+
detail=f"File too large. Max size is {MAX_FILE_SIZE // 1024 // 1024} MB."
|
| 232 |
+
)
|
| 233 |
+
|
| 234 |
+
# Validate not corrupted (try opening with PIL)
|
| 235 |
+
try:
|
| 236 |
+
from PIL import Image
|
| 237 |
+
import io
|
| 238 |
+
Image.open(io.BytesIO(image_bytes)).verify()
|
| 239 |
+
except Exception:
|
| 240 |
+
raise HTTPException(status_code=400, detail="Corrupted or invalid image file.")
|
| 241 |
+
|
| 242 |
+
# Run CPU-bound inference in ProcessPoolExecutor (ไม่บล็อก event loop)
|
| 243 |
+
loop = asyncio.get_event_loop()
|
| 244 |
+
try:
|
| 245 |
+
result = await loop.run_in_executor(executor, run_inference, image_bytes)
|
| 246 |
+
except Exception as e:
|
| 247 |
+
raise HTTPException(status_code=500, detail=f"Inference error: {str(e)}")
|
| 248 |
+
|
| 249 |
+
return PredictionResponse(**result)
|
| 250 |
+
```
|
| 251 |
+
|
| 252 |
+
### 2.5 Error Handling Summary
|
| 253 |
+
|
| 254 |
+
| สถานการณ์ | HTTP Status | รายละเอียด |
|
| 255 |
+
|---|---|---|
|
| 256 |
+
| ไฟล์ไม่ใช่รูปภาพ | 415 Unsupported Media Type | Content-type ไม่ตรง |
|
| 257 |
+
| ไฟล์เสีย (Corrupted) | 400 Bad Request | PIL ไม่สามารถเปิดได้ |
|
| 258 |
+
| ไฟล์ใหญ่เกินไป | 413 Request Entity Too Large | เกิน 10MB |
|
| 259 |
+
| Inference Error | 500 Internal Server Error | โมเดลทำงานผิดพลาด |
|
| 260 |
+
|
| 261 |
+
---
|
| 262 |
+
|
| 263 |
+
## Phase 3 — Dockerfile
|
| 264 |
+
|
| 265 |
+
```dockerfile
|
| 266 |
+
# ใช้ slim image เพื่อลด size
|
| 267 |
+
FROM python:3.11-slim
|
| 268 |
+
|
| 269 |
+
WORKDIR /app
|
| 270 |
+
|
| 271 |
+
# ติดตั้ง dependencies ก่อน (cache layer)
|
| 272 |
+
COPY requirements.txt .
|
| 273 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 274 |
+
|
| 275 |
+
# Copy โค้ดและโมเดล
|
| 276 |
+
COPY app/ ./app/
|
| 277 |
+
COPY models/ ./models/
|
| 278 |
+
|
| 279 |
+
EXPOSE 7860
|
| 280 |
+
|
| 281 |
+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
|
| 282 |
+
```
|
| 283 |
+
|
| 284 |
+
**requirements.txt:**
|
| 285 |
+
```
|
| 286 |
+
fastapi==0.111.0
|
| 287 |
+
uvicorn[standard]==0.29.0
|
| 288 |
+
python-multipart==0.0.9
|
| 289 |
+
onnxruntime==1.18.0
|
| 290 |
+
numpy==1.26.4
|
| 291 |
+
Pillow==10.3.0
|
| 292 |
+
transformers==4.41.0
|
| 293 |
+
torch==2.3.0
|
| 294 |
+
pydantic==2.7.1
|
| 295 |
+
pytest==8.2.0
|
| 296 |
+
httpx==0.27.0
|
| 297 |
+
```
|
| 298 |
+
|
| 299 |
+
> **เทคนิคลด Docker Image Size:**
|
| 300 |
+
> - ใช้ `python:3.11-slim` (ไม่ใช่ full)
|
| 301 |
+
> - `--no-cache-dir` ใน pip
|
| 302 |
+
> - ลบ torch ออกหลัง export ONNX (ใน production image ไม่จำเป็น)
|
| 303 |
+
> - ใช้ `.dockerignore` เพื่อ exclude `tests/`, `.git/`, `*.pt`
|
| 304 |
+
|
| 305 |
+
---
|
| 306 |
+
|
| 307 |
+
## Phase 4 — Unit Testing (`tests/test_api.py`)
|
| 308 |
+
|
| 309 |
+
```python
|
| 310 |
+
import pytest
|
| 311 |
+
from fastapi.testclient import TestClient
|
| 312 |
+
from app.main import app
|
| 313 |
+
from pathlib import Path
|
| 314 |
+
|
| 315 |
+
client = TestClient(app)
|
| 316 |
+
|
| 317 |
+
# --- Helper ---
|
| 318 |
+
def get_test_image() -> bytes:
|
| 319 |
+
"""ใช้ภาพ test จริงหรือสร้าง dummy PNG"""
|
| 320 |
+
from PIL import Image
|
| 321 |
+
import io
|
| 322 |
+
img = Image.new("RGB", (224, 224), color=(128, 64, 32))
|
| 323 |
+
buf = io.BytesIO()
|
| 324 |
+
img.save(buf, format="JPEG")
|
| 325 |
+
return buf.getvalue()
|
| 326 |
+
|
| 327 |
+
|
| 328 |
+
# --- Tests ---
|
| 329 |
+
|
| 330 |
+
def test_health_endpoint():
|
| 331 |
+
res = client.get("/health")
|
| 332 |
+
assert res.status_code == 200
|
| 333 |
+
assert res.json() == {"status": "ok"}
|
| 334 |
+
|
| 335 |
+
|
| 336 |
+
def test_predict_returns_valid_json():
|
| 337 |
+
img_bytes = get_test_image()
|
| 338 |
+
res = client.post(
|
| 339 |
+
"/predict",
|
| 340 |
+
files={"file": ("test.jpg", img_bytes, "image/jpeg")}
|
| 341 |
+
)
|
| 342 |
+
assert res.status_code == 200
|
| 343 |
+
data = res.json()
|
| 344 |
+
assert "label" in data
|
| 345 |
+
assert "score" in data
|
| 346 |
+
assert isinstance(data["score"], float)
|
| 347 |
+
assert 0.0 <= data["score"] <= 1.0
|
| 348 |
+
|
| 349 |
+
|
| 350 |
+
def test_predict_rejects_non_image():
|
| 351 |
+
res = client.post(
|
| 352 |
+
"/predict",
|
| 353 |
+
files={"file": ("test.txt", b"not an image", "text/plain")}
|
| 354 |
+
)
|
| 355 |
+
assert res.status_code == 415
|
| 356 |
+
|
| 357 |
+
|
| 358 |
+
def test_predict_rejects_corrupted_file():
|
| 359 |
+
res = client.post(
|
| 360 |
+
"/predict",
|
| 361 |
+
files={"file": ("bad.jpg", b"\xff\xd8corrupted", "image/jpeg")}
|
| 362 |
+
)
|
| 363 |
+
assert res.status_code == 400
|
| 364 |
+
|
| 365 |
+
|
| 366 |
+
def test_predict_rejects_oversized_file():
|
| 367 |
+
huge = b"A" * (11 * 1024 * 1024) # 11MB
|
| 368 |
+
res = client.post(
|
| 369 |
+
"/predict",
|
| 370 |
+
files={"file": ("big.jpg", huge, "image/jpeg")}
|
| 371 |
+
)
|
| 372 |
+
assert res.status_code == 413
|
| 373 |
+
```
|
| 374 |
+
|
| 375 |
+
---
|
| 376 |
+
|
| 377 |
+
## Phase 5 — GitHub Actions CI/CD (`.github/workflows/ci-cd.yml`)
|
| 378 |
+
|
| 379 |
+
```yaml
|
| 380 |
+
name: CI/CD Pipeline
|
| 381 |
+
|
| 382 |
+
on:
|
| 383 |
+
push:
|
| 384 |
+
branches: [main]
|
| 385 |
+
pull_request:
|
| 386 |
+
branches: [main]
|
| 387 |
+
|
| 388 |
+
jobs:
|
| 389 |
+
test:
|
| 390 |
+
runs-on: ubuntu-latest
|
| 391 |
+
steps:
|
| 392 |
+
- uses: actions/checkout@v4
|
| 393 |
+
|
| 394 |
+
- name: Set up Python 3.11
|
| 395 |
+
uses: actions/setup-python@v5
|
| 396 |
+
with:
|
| 397 |
+
python-version: "3.11"
|
| 398 |
+
|
| 399 |
+
- name: Install dependencies
|
| 400 |
+
run: pip install -r requirements.txt
|
| 401 |
+
|
| 402 |
+
- name: Run Unit Tests
|
| 403 |
+
run: pytest tests/ -v --tb=short
|
| 404 |
+
|
| 405 |
+
deploy:
|
| 406 |
+
needs: test # รัน deploy เฉพาะเมื่อ test ผ่านทุก case
|
| 407 |
+
runs-on: ubuntu-latest
|
| 408 |
+
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
|
| 409 |
+
steps:
|
| 410 |
+
- uses: actions/checkout@v4
|
| 411 |
+
|
| 412 |
+
- name: Push to Hugging Face Spaces
|
| 413 |
+
env:
|
| 414 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 415 |
+
run: |
|
| 416 |
+
git config --global user.email "ci@github.com"
|
| 417 |
+
git config --global user.name "GitHub Actions"
|
| 418 |
+
git remote add hf https://user:${HF_TOKEN}@huggingface.co/spaces/<YOUR_USERNAME>/<YOUR_SPACE_NAME>
|
| 419 |
+
git push hf main --force
|
| 420 |
+
```
|
| 421 |
+
|
| 422 |
+
> **การตั้งค่า Secret:**
|
| 423 |
+
> ไปที่ GitHub Repo → Settings → Secrets → Actions → New secret
|
| 424 |
+
> ชื่อ: `HF_TOKEN` | ค่า: Hugging Face Access Token (write permission)
|
| 425 |
+
|
| 426 |
+
---
|
| 427 |
+
|
| 428 |
+
## Phase 6 — Performance Testing (JMeter)
|
| 429 |
+
|
| 430 |
+
### 6.1 JMeter Test Plan (.jmx) — Key Settings
|
| 431 |
+
|
| 432 |
+
| Parameter | Local (Docker) | Cloud (HF Spaces) |
|
| 433 |
+
|---|---|---|
|
| 434 |
+
| Threads (Users) | 10, 50, 100 | 10, 25, 50 |
|
| 435 |
+
| Ramp-Up (sec) | 10 | 20 |
|
| 436 |
+
| Loop Count | 100 | 50 |
|
| 437 |
+
| Endpoint | `http://localhost:7860/predict` | `https://<space>.hf.space/predict` |
|
| 438 |
+
|
| 439 |
+
### 6.2 Metrics ที่ต้องรายงาน
|
| 440 |
+
|
| 441 |
+
| Metric | คำอธิบาย | เป้าหมาย |
|
| 442 |
+
|---|---|---|
|
| 443 |
+
| **Throughput (TPS)** | Request ต่อวินาที | สูงที่สุด |
|
| 444 |
+
| **P95 Latency** | 95th percentile response time | < 2000ms |
|
| 445 |
+
| **Error Rate** | % ที่ได้รับ error | < 1% |
|
| 446 |
+
| **Avg Latency** | ค่าเฉลี่ย response time | ต่ำที่สุด |
|
| 447 |
+
|
| 448 |
+
### 6.3 การวิเคราะห์ผล
|
| 449 |
+
|
| 450 |
+
```
|
| 451 |
+
จุดที่ต้องวิเคราะห์:
|
| 452 |
+
1. หา "Knee Point" — จุดที่ TPS หยุดเพิ่ม แต่ Latency เริ่มพุ่ง
|
| 453 |
+
2. CPU Utilization ใน Docker stats ณ จำนวน concurrent users นั้น
|
| 454 |
+
3. เปรียบเทียบ Local vs Cloud เพื่อดู overhead ของ Network/HF cold-start
|
| 455 |
+
```
|
| 456 |
+
|
| 457 |
+
---
|
| 458 |
+
|
| 459 |
+
## Phase 7 — cURL Examples
|
| 460 |
+
|
| 461 |
+
```bash
|
| 462 |
+
# Health Check
|
| 463 |
+
curl https://<USERNAME>-<SPACE>.hf.space/health
|
| 464 |
+
|
| 465 |
+
# Predict (ส่งไฟล์รูปภาพจริง)
|
| 466 |
+
curl -X POST "https://<USERNAME>-<SPACE>.hf.space/predict" \
|
| 467 |
+
-H "accept: application/json" \
|
| 468 |
+
-F "file=@/path/to/your/image.jpg"
|
| 469 |
+
|
| 470 |
+
# Postman Collection — ดูไฟล์ postman_collection.json ใน repo
|
| 471 |
+
```
|
| 472 |
+
|
| 473 |
+
---
|
| 474 |
+
|
| 475 |
+
## Checklist Deliverables
|
| 476 |
+
|
| 477 |
+
- [ ] Project Report (PDF) — Model details, Optimization table, Error strategy, JMeter analysis, Architecture diagram
|
| 478 |
+
- [ ] GitHub Repo — Source code + `.github/workflows/ci-cd.yml` + `README.md`
|
| 479 |
+
- [ ] `resnet18_quantized.onnx` — โมเดลที่ optimize แล้ว
|
| 480 |
+
- [ ] `tests/test_api.py` — pytest ครอบคลุม Happy path + Error cases
|
| 481 |
+
- [ ] `Dockerfile` — Production-ready
|
| 482 |
+
- [ ] JMeter Test Plan (`.jmx`)
|
| 483 |
+
- [ ] Postman Collection (`.json`)
|
| 484 |
+
- [ ] Hugging Face Space — Live API endpoint
|
| 485 |
+
- [ ] Presentation Slides + Live Demo (9 พ.ค. 2569)
|
| 486 |
+
|
| 487 |
+
---
|
| 488 |
+
|
| 489 |
+
## Notes & Tips
|
| 490 |
+
|
| 491 |
+
- **HF Spaces Free Tier** ใช้ CPU เท่าน���้น — ONNX Runtime บน CPU เหมาะสมที่สุด
|
| 492 |
+
- **Cold Start** ใน HF Spaces อาจทำให้ request แรกช้า — ควรระบุในรายงาน
|
| 493 |
+
- **ProcessPoolExecutor** ต้องระวัง: แต่ละ worker โหลด ONNX session แยกกัน (memory x workers)
|
| 494 |
+
- **Pydantic v2** syntax เปลี่ยนจาก v1 — ใช้ `model_config` แทน `class Config`
|
| 495 |
+
- ใน `pytest` ต้องมี `conftest.py` หรือ set `PYTHONPATH=.` ให้ถูกต้อง
|
app/main.py
CHANGED
|
@@ -4,14 +4,17 @@ from concurrent.futures import ProcessPoolExecutor
|
|
| 4 |
import asyncio
|
| 5 |
from app.model import run_inference
|
| 6 |
from app.schemas import PredictionResponse
|
|
|
|
| 7 |
|
| 8 |
app = FastAPI(title="ResNet-18 Image Classifier", version="1.0.0")
|
| 9 |
executor = ProcessPoolExecutor(max_workers=4)
|
| 10 |
|
|
|
|
| 11 |
ALLOWED_CONTENT_TYPES = {"image/jpeg", "image/png", "image/webp", "image/gif"}
|
| 12 |
|
| 13 |
@app.get("/", response_class=HTMLResponse)
|
| 14 |
async def demo_ui():
|
|
|
|
| 15 |
return """
|
| 16 |
<!DOCTYPE html>
|
| 17 |
<html>
|
|
@@ -78,6 +81,11 @@ async def demo_ui():
|
|
| 78 |
const response = await fetch('/predict', { method: 'POST', body: formData });
|
| 79 |
const data = await response.json();
|
| 80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
document.getElementById('res-label').innerText = data.label;
|
| 82 |
document.getElementById('res-score').innerText = (data.score * 100).toFixed(2) + '%';
|
| 83 |
document.getElementById('res-time').innerText = data.inference_time_ms.toFixed(2) + ' ms';
|
|
@@ -101,9 +109,23 @@ async def health():
|
|
| 101 |
|
| 102 |
@app.post("/predict", response_model=PredictionResponse)
|
| 103 |
async def predict(file: UploadFile = File(...)):
|
|
|
|
| 104 |
if file.content_type not in ALLOWED_CONTENT_TYPES:
|
| 105 |
raise HTTPException(status_code=415, detail="Unsupported media type")
|
|
|
|
|
|
|
| 106 |
image_bytes = await file.read()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
loop = asyncio.get_event_loop()
|
| 108 |
-
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
import asyncio
|
| 5 |
from app.model import run_inference
|
| 6 |
from app.schemas import PredictionResponse
|
| 7 |
+
from PIL import UnidentifiedImageError
|
| 8 |
|
| 9 |
app = FastAPI(title="ResNet-18 Image Classifier", version="1.0.0")
|
| 10 |
executor = ProcessPoolExecutor(max_workers=4)
|
| 11 |
|
| 12 |
+
MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB
|
| 13 |
ALLOWED_CONTENT_TYPES = {"image/jpeg", "image/png", "image/webp", "image/gif"}
|
| 14 |
|
| 15 |
@app.get("/", response_class=HTMLResponse)
|
| 16 |
async def demo_ui():
|
| 17 |
+
# ... (HTML UI code remains the same)
|
| 18 |
return """
|
| 19 |
<!DOCTYPE html>
|
| 20 |
<html>
|
|
|
|
| 81 |
const response = await fetch('/predict', { method: 'POST', body: formData });
|
| 82 |
const data = await response.json();
|
| 83 |
|
| 84 |
+
if (response.status !== 200) {
|
| 85 |
+
alert(data.detail || 'Prediction failed');
|
| 86 |
+
return;
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
document.getElementById('res-label').innerText = data.label;
|
| 90 |
document.getElementById('res-score').innerText = (data.score * 100).toFixed(2) + '%';
|
| 91 |
document.getElementById('res-time').innerText = data.inference_time_ms.toFixed(2) + ' ms';
|
|
|
|
| 109 |
|
| 110 |
@app.post("/predict", response_model=PredictionResponse)
|
| 111 |
async def predict(file: UploadFile = File(...)):
|
| 112 |
+
# 1. ตรวจสอบ Content Type
|
| 113 |
if file.content_type not in ALLOWED_CONTENT_TYPES:
|
| 114 |
raise HTTPException(status_code=415, detail="Unsupported media type")
|
| 115 |
+
|
| 116 |
+
# 2. อ่านข้อมูล
|
| 117 |
image_bytes = await file.read()
|
| 118 |
+
|
| 119 |
+
# 3. ตรวจสอบขนาดไฟล์ (Fix สำหรับ test_predict_rejects_oversized_file)
|
| 120 |
+
if len(image_bytes) > MAX_FILE_SIZE:
|
| 121 |
+
raise HTTPException(status_code=413, detail="File too large")
|
| 122 |
+
|
| 123 |
+
# 4. รัน Inference และดักจับ Error (Fix สำหรับ test_predict_rejects_corrupted_file)
|
| 124 |
loop = asyncio.get_event_loop()
|
| 125 |
+
try:
|
| 126 |
+
result = await loop.run_in_executor(executor, run_inference, image_bytes)
|
| 127 |
+
return result
|
| 128 |
+
except UnidentifiedImageError:
|
| 129 |
+
raise HTTPException(status_code=400, detail="Invalid image file")
|
| 130 |
+
except Exception as e:
|
| 131 |
+
raise HTTPException(status_code=500, detail=f"Inference error: {str(e)}")
|
jmeter_test_plan.jmx
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<jmeterTestPlan version="1.2" properties="5.0" jmeter="5.6.3">
|
| 3 |
+
<hashTree>
|
| 4 |
+
<TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="ResNet Image Classifier Load Test">
|
| 5 |
+
<elementProp name="TestPlan.user_defined_variables" elementType="Arguments" guiclass="ArgumentsPanel" testclass="Arguments" testname="User Defined Variables">
|
| 6 |
+
<collectionProp name="Arguments.arguments"/>
|
| 7 |
+
</elementProp>
|
| 8 |
+
</TestPlan>
|
| 9 |
+
<hashTree>
|
| 10 |
+
<ThreadGroup guiclass="ThreadGroupGui" testclass="ThreadGroup" testname="Concurrent Users">
|
| 11 |
+
<intProp name="ThreadGroup.num_threads">60</intProp>
|
| 12 |
+
<intProp name="ThreadGroup.ramp_time">10</intProp>
|
| 13 |
+
<longProp name="ThreadGroup.duration">60</longProp>
|
| 14 |
+
<boolProp name="ThreadGroup.same_user_on_next_iteration">true</boolProp>
|
| 15 |
+
<stringProp name="ThreadGroup.on_sample_error">continue</stringProp>
|
| 16 |
+
<elementProp name="ThreadGroup.main_controller" elementType="LoopController" guiclass="LoopControlPanel" testclass="LoopController" testname="Loop Controller">
|
| 17 |
+
<intProp name="LoopController.loops">-1</intProp>
|
| 18 |
+
<boolProp name="LoopController.continue_forever">false</boolProp>
|
| 19 |
+
</elementProp>
|
| 20 |
+
</ThreadGroup>
|
| 21 |
+
<hashTree>
|
| 22 |
+
<HTTPSamplerProxy guiclass="HttpTestSampleGui" testclass="HTTPSamplerProxy" testname="Predict Request">
|
| 23 |
+
<stringProp name="HTTPSampler.domain">127.0.0.1</stringProp>
|
| 24 |
+
<stringProp name="HTTPSampler.port">8000</stringProp>
|
| 25 |
+
<stringProp name="HTTPSampler.protocol">http</stringProp>
|
| 26 |
+
<stringProp name="HTTPSampler.path">/predict</stringProp>
|
| 27 |
+
<boolProp name="HTTPSampler.follow_redirects">true</boolProp>
|
| 28 |
+
<stringProp name="HTTPSampler.method">POST</stringProp>
|
| 29 |
+
<boolProp name="HTTPSampler.use_keepalive">true</boolProp>
|
| 30 |
+
<boolProp name="HTTPSampler.DO_MULTIPART_POST">true</boolProp>
|
| 31 |
+
<elementProp name="HTTPsampler.Files" elementType="HTTPFileArgs">
|
| 32 |
+
<collectionProp name="HTTPFileArgs.files">
|
| 33 |
+
<elementProp name="C:\Yanakorn\works\Assignments\AIE494\finalproject\test.jpg" elementType="HTTPFileArg">
|
| 34 |
+
<stringProp name="File.mimetype">image/jpeg</stringProp>
|
| 35 |
+
<stringProp name="File.path">C:\Yanakorn\works\Assignments\AIE494\finalproject\test.jpg</stringProp>
|
| 36 |
+
<stringProp name="File.paramname">file</stringProp>
|
| 37 |
+
</elementProp>
|
| 38 |
+
</collectionProp>
|
| 39 |
+
</elementProp>
|
| 40 |
+
<boolProp name="HTTPSampler.postBodyRaw">false</boolProp>
|
| 41 |
+
<elementProp name="HTTPsampler.Arguments" elementType="Arguments" guiclass="HTTPArgumentsPanel" testclass="Arguments" testname="User Defined Variables">
|
| 42 |
+
<collectionProp name="Arguments.arguments"/>
|
| 43 |
+
</elementProp>
|
| 44 |
+
</HTTPSamplerProxy>
|
| 45 |
+
<hashTree/>
|
| 46 |
+
<ResultCollector guiclass="ViewResultsFullVisualizer" testclass="ResultCollector" testname="View Results Tree">
|
| 47 |
+
<boolProp name="ResultCollector.error_logging">false</boolProp>
|
| 48 |
+
<objProp>
|
| 49 |
+
<name>saveConfig</name>
|
| 50 |
+
<value class="SampleSaveConfiguration">
|
| 51 |
+
<time>true</time>
|
| 52 |
+
<latency>true</latency>
|
| 53 |
+
<timestamp>true</timestamp>
|
| 54 |
+
<success>true</success>
|
| 55 |
+
<label>true</label>
|
| 56 |
+
<code>true</code>
|
| 57 |
+
<message>true</message>
|
| 58 |
+
<threadName>true</threadName>
|
| 59 |
+
<dataType>true</dataType>
|
| 60 |
+
<encoding>false</encoding>
|
| 61 |
+
<assertions>true</assertions>
|
| 62 |
+
<subresults>true</subresults>
|
| 63 |
+
<responseData>false</responseData>
|
| 64 |
+
<samplerData>false</samplerData>
|
| 65 |
+
<xml>false</xml>
|
| 66 |
+
<fieldNames>true</fieldNames>
|
| 67 |
+
<responseHeaders>false</responseHeaders>
|
| 68 |
+
<requestHeaders>false</requestHeaders>
|
| 69 |
+
<responseDataOnError>false</responseDataOnError>
|
| 70 |
+
<saveAssertionResultsFailureMessage>true</saveAssertionResultsFailureMessage>
|
| 71 |
+
<assertionsResultsToSave>0</assertionsResultsToSave>
|
| 72 |
+
<bytes>true</bytes>
|
| 73 |
+
<sentBytes>true</sentBytes>
|
| 74 |
+
<url>true</url>
|
| 75 |
+
<threadCounts>true</threadCounts>
|
| 76 |
+
<idleTime>true</idleTime>
|
| 77 |
+
<connectTime>true</connectTime>
|
| 78 |
+
</value>
|
| 79 |
+
</objProp>
|
| 80 |
+
<stringProp name="filename"></stringProp>
|
| 81 |
+
</ResultCollector>
|
| 82 |
+
<hashTree/>
|
| 83 |
+
</hashTree>
|
| 84 |
+
<ResultCollector guiclass="SummaryReport" testclass="ResultCollector" testname="Summary Report">
|
| 85 |
+
<boolProp name="ResultCollector.error_logging">false</boolProp>
|
| 86 |
+
<objProp>
|
| 87 |
+
<name>saveConfig</name>
|
| 88 |
+
<value class="SampleSaveConfiguration">
|
| 89 |
+
<time>true</time>
|
| 90 |
+
<latency>true</latency>
|
| 91 |
+
<timestamp>true</timestamp>
|
| 92 |
+
<success>true</success>
|
| 93 |
+
<label>true</label>
|
| 94 |
+
<code>true</code>
|
| 95 |
+
<message>true</message>
|
| 96 |
+
<threadName>true</threadName>
|
| 97 |
+
<dataType>true</dataType>
|
| 98 |
+
<encoding>false</encoding>
|
| 99 |
+
<assertions>true</assertions>
|
| 100 |
+
<subresults>true</subresults>
|
| 101 |
+
<responseData>false</responseData>
|
| 102 |
+
<samplerData>false</samplerData>
|
| 103 |
+
<xml>false</xml>
|
| 104 |
+
<fieldNames>true</fieldNames>
|
| 105 |
+
<responseHeaders>false</responseHeaders>
|
| 106 |
+
<requestHeaders>false</requestHeaders>
|
| 107 |
+
<responseDataOnError>false</responseDataOnError>
|
| 108 |
+
<saveAssertionResultsFailureMessage>true</saveAssertionResultsFailureMessage>
|
| 109 |
+
<assertionsResultsToSave>0</assertionsResultsToSave>
|
| 110 |
+
<bytes>true</bytes>
|
| 111 |
+
<sentBytes>true</sentBytes>
|
| 112 |
+
<url>true</url>
|
| 113 |
+
<threadCounts>true</threadCounts>
|
| 114 |
+
<idleTime>true</idleTime>
|
| 115 |
+
<connectTime>true</connectTime>
|
| 116 |
+
</value>
|
| 117 |
+
</objProp>
|
| 118 |
+
<stringProp name="filename"></stringProp>
|
| 119 |
+
</ResultCollector>
|
| 120 |
+
<hashTree/>
|
| 121 |
+
</hashTree>
|
| 122 |
+
</hashTree>
|
| 123 |
+
</jmeterTestPlan>
|
postman_collection.json
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"info": {
|
| 3 |
+
"_postman_id": "8923a12b-7c45-4b2e-9d2a-8c9d9e9f9a9b",
|
| 4 |
+
"name": "ResNet-18 Image Classifier",
|
| 5 |
+
"schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json"
|
| 6 |
+
},
|
| 7 |
+
"item": [
|
| 8 |
+
{
|
| 9 |
+
"name": "Health Check",
|
| 10 |
+
"request": {
|
| 11 |
+
"method": "GET",
|
| 12 |
+
"header": [],
|
| 13 |
+
"url": {
|
| 14 |
+
"raw": "{{baseUrl}}/health",
|
| 15 |
+
"host": [
|
| 16 |
+
"{{baseUrl}}"
|
| 17 |
+
],
|
| 18 |
+
"path": [
|
| 19 |
+
"health"
|
| 20 |
+
]
|
| 21 |
+
}
|
| 22 |
+
},
|
| 23 |
+
"response": []
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"name": "Predict Image",
|
| 27 |
+
"request": {
|
| 28 |
+
"method": "POST",
|
| 29 |
+
"header": [
|
| 30 |
+
{
|
| 31 |
+
"key": "accept",
|
| 32 |
+
"value": "application/json"
|
| 33 |
+
}
|
| 34 |
+
],
|
| 35 |
+
"body": {
|
| 36 |
+
"mode": "formdata",
|
| 37 |
+
"formdata": [
|
| 38 |
+
{
|
| 39 |
+
"key": "file",
|
| 40 |
+
"type": "file",
|
| 41 |
+
"src": ""
|
| 42 |
+
}
|
| 43 |
+
]
|
| 44 |
+
},
|
| 45 |
+
"url": {
|
| 46 |
+
"raw": "{{baseUrl}}/predict",
|
| 47 |
+
"host": [
|
| 48 |
+
"{{baseUrl}}"
|
| 49 |
+
],
|
| 50 |
+
"path": [
|
| 51 |
+
"predict"
|
| 52 |
+
]
|
| 53 |
+
}
|
| 54 |
+
},
|
| 55 |
+
"response": []
|
| 56 |
+
}
|
| 57 |
+
],
|
| 58 |
+
"variable": [
|
| 59 |
+
{
|
| 60 |
+
"key": "baseUrl",
|
| 61 |
+
"value": "http://localhost:8000",
|
| 62 |
+
"type": "string"
|
| 63 |
+
}
|
| 64 |
+
]
|
| 65 |
+
}
|
scripts/01_baseline_test.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import AutoImageProcessor, ResNetForImageClassification
|
| 2 |
+
import torch
|
| 3 |
+
import time
|
| 4 |
+
import os
|
| 5 |
+
from PIL import Image
|
| 6 |
+
|
| 7 |
+
model_id = "microsoft/resnet-18"
|
| 8 |
+
processor = AutoImageProcessor.from_pretrained(model_id)
|
| 9 |
+
model = ResNetForImageClassification.from_pretrained(model_id)
|
| 10 |
+
model.eval()
|
| 11 |
+
|
| 12 |
+
# Create test image if not exists
|
| 13 |
+
if not os.path.exists("test.jpg"):
|
| 14 |
+
img = Image.new("RGB", (224, 224), color=(128, 64, 32))
|
| 15 |
+
img.save("test.jpg")
|
| 16 |
+
|
| 17 |
+
# Measure Baseline Latency (100 runs)
|
| 18 |
+
img = Image.open("test.jpg").convert("RGB")
|
| 19 |
+
inputs = processor(images=img, return_tensors="pt")
|
| 20 |
+
|
| 21 |
+
times = []
|
| 22 |
+
with torch.no_grad():
|
| 23 |
+
for _ in range(100):
|
| 24 |
+
t0 = time.perf_counter()
|
| 25 |
+
_ = model(**inputs)
|
| 26 |
+
times.append(time.perf_counter() - t0)
|
| 27 |
+
|
| 28 |
+
print(f"Baseline Latency (avg): {sum(times)/len(times)*1000:.2f} ms")
|
| 29 |
+
print(f"P95 Latency: {sorted(times)[94]*1000:.2f} ms")
|
| 30 |
+
|
| 31 |
+
# Save model for size measurement
|
| 32 |
+
model.save_pretrained("./pytorch_model")
|
| 33 |
+
model_size = sum(os.path.getsize(os.path.join("./pytorch_model", f))
|
| 34 |
+
for f in os.listdir("./pytorch_model") if f.endswith(".bin"))
|
| 35 |
+
print(f"Model Size: {model_size/1e6:.2f} MB")
|
scripts/02_export_onnx.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from transformers import AutoImageProcessor, ResNetForImageClassification
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
model_id = "microsoft/resnet-18"
|
| 6 |
+
processor = AutoImageProcessor.from_pretrained(model_id)
|
| 7 |
+
model = ResNetForImageClassification.from_pretrained(model_id).eval()
|
| 8 |
+
|
| 9 |
+
# Ensure models directory exists
|
| 10 |
+
os.makedirs("models", exist_ok=True)
|
| 11 |
+
|
| 12 |
+
# Use dummy input for tracing
|
| 13 |
+
dummy = torch.randn(1, 3, 224, 224)
|
| 14 |
+
|
| 15 |
+
# Export using the legacy approach (Tracing) which is more stable for quantization tools
|
| 16 |
+
print("Exporting model to ONNX using legacy tracing...")
|
| 17 |
+
torch.onnx.export(
|
| 18 |
+
model,
|
| 19 |
+
dummy,
|
| 20 |
+
"models/resnet18.onnx",
|
| 21 |
+
export_params=True,
|
| 22 |
+
opset_version=18, # Use Opset 11 for better compatibility with quantization
|
| 23 |
+
do_constant_folding=True,
|
| 24 |
+
input_names=["pixel_values"],
|
| 25 |
+
output_names=["logits"],
|
| 26 |
+
dynamic_axes={"pixel_values": {0: "batch_size"}, "logits": {0: "batch_size"}},
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
print("ONNX exported successfully to models/resnet18.onnx")
|
scripts/03_quantize.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import onnx
|
| 2 |
+
import onnx.shape_inference
|
| 3 |
+
from onnxruntime.quantization import quantize_dynamic, QuantType
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
# --- Monkey Patch onnx.shape_inference to bypass strict checks ---
|
| 7 |
+
original_infer_shapes_path = onnx.shape_inference.infer_shapes_path
|
| 8 |
+
|
| 9 |
+
def patched_infer_shapes_path(model_path, output_path=None, check_type=False, strict_mode=False, data_prop=False):
|
| 10 |
+
try:
|
| 11 |
+
# Run in non-strict mode
|
| 12 |
+
return original_infer_shapes_path(model_path, output_path, check_type, False, data_prop)
|
| 13 |
+
except Exception:
|
| 14 |
+
if output_path:
|
| 15 |
+
import shutil
|
| 16 |
+
shutil.copy(model_path, output_path)
|
| 17 |
+
|
| 18 |
+
onnx.shape_inference.infer_shapes_path = patched_infer_shapes_path
|
| 19 |
+
# --------------------------------------------------------------------------
|
| 20 |
+
|
| 21 |
+
model_path = "models/resnet18.onnx"
|
| 22 |
+
quantized_path = "models/resnet18_quantized.onnx"
|
| 23 |
+
|
| 24 |
+
print(f"Quantizing model: {model_path}...")
|
| 25 |
+
try:
|
| 26 |
+
quantize_dynamic(
|
| 27 |
+
model_input=model_path,
|
| 28 |
+
model_output=quantized_path,
|
| 29 |
+
weight_type=QuantType.QUInt8,
|
| 30 |
+
extra_options={
|
| 31 |
+
'EnableShapeInference': False,
|
| 32 |
+
'DefaultTensorType': onnx.TensorProto.FLOAT # <--- เพิ่มตัวนี้เพื่อแก้ Error ล่าสุด
|
| 33 |
+
}
|
| 34 |
+
)
|
| 35 |
+
except Exception as e:
|
| 36 |
+
print(f"Quantization failed: {e}")
|
| 37 |
+
|
| 38 |
+
if os.path.exists(quantized_path):
|
| 39 |
+
print(f"Success: {quantized_path} created. Size: {os.path.getsize(quantized_path)/1e6:.2f} MB")
|
| 40 |
+
else:
|
| 41 |
+
# Try one more time with a very minimal set of options
|
| 42 |
+
print("Trying one last alternative...")
|
| 43 |
+
quantize_dynamic(
|
| 44 |
+
model_input=model_path,
|
| 45 |
+
model_output=quantized_path,
|
| 46 |
+
weight_type=QuantType.QUInt8,
|
| 47 |
+
# Minimal options
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
if os.path.exists(quantized_path):
|
| 51 |
+
print(f"Success on second attempt: {quantized_path}")
|
scripts/04_benchmark_onnx.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import onnxruntime as ort
|
| 2 |
+
import numpy as np
|
| 3 |
+
import time
|
| 4 |
+
import os
|
| 5 |
+
from PIL import Image
|
| 6 |
+
from transformers import AutoImageProcessor
|
| 7 |
+
|
| 8 |
+
# Load extractor
|
| 9 |
+
processor = AutoImageProcessor.from_pretrained("microsoft/resnet-18")
|
| 10 |
+
|
| 11 |
+
# Test both models
|
| 12 |
+
models = {
|
| 13 |
+
"ONNX": "models/resnet18.onnx",
|
| 14 |
+
"ONNX Quantized": "models/resnet18_quantized.onnx"
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
# Create test image if not exists
|
| 18 |
+
if not os.path.exists("test.jpg"):
|
| 19 |
+
img = Image.new("RGB", (224, 224), color=(128, 64, 32))
|
| 20 |
+
img.save("test.jpg")
|
| 21 |
+
|
| 22 |
+
img = Image.open("test.jpg").convert("RGB")
|
| 23 |
+
inputs = processor(images=img, return_tensors="np")
|
| 24 |
+
pixel_values = inputs["pixel_values"].astype(np.float32)
|
| 25 |
+
|
| 26 |
+
for name, model_path in models.items():
|
| 27 |
+
if not os.path.exists(model_path):
|
| 28 |
+
print(f"Skipping {name}: {model_path} not found")
|
| 29 |
+
continue
|
| 30 |
+
|
| 31 |
+
session = ort.InferenceSession(model_path, providers=["CPUExecutionProvider"])
|
| 32 |
+
|
| 33 |
+
times = []
|
| 34 |
+
for _ in range(100):
|
| 35 |
+
t0 = time.perf_counter()
|
| 36 |
+
_ = session.run(["logits"], {"pixel_values": pixel_values})
|
| 37 |
+
times.append(time.perf_counter() - t0)
|
| 38 |
+
|
| 39 |
+
print(f"\n{name}:")
|
| 40 |
+
print(f" Avg Latency: {sum(times)/len(times)*1000:.2f} ms")
|
| 41 |
+
print(f" P95 Latency: {sorted(times)[94]*1000:.2f} ms")
|
| 42 |
+
print(f" File Size: {os.path.getsize(model_path)/1e6:.2f} MB")
|
sym_shape_infer_temp.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:736f6cb91a0ae100eaeb13aa7842f5e718ab67b101e2c44115f9d9fbf87e80b3
|
| 3 |
+
size 179747
|
tests/conftest.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
# Add the project root to PYTHONPATH
|
| 5 |
+
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
tests/test_api.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
from fastapi.testclient import TestClient
|
| 3 |
+
from app.main import app
|
| 4 |
+
from PIL import Image
|
| 5 |
+
import io
|
| 6 |
+
|
| 7 |
+
client = TestClient(app)
|
| 8 |
+
|
| 9 |
+
# --- Helper ---
|
| 10 |
+
def get_test_image() -> bytes:
|
| 11 |
+
"""Create a dummy test image"""
|
| 12 |
+
img = Image.new("RGB", (224, 224), color=(128, 64, 32))
|
| 13 |
+
buf = io.BytesIO()
|
| 14 |
+
img.save(buf, format="JPEG")
|
| 15 |
+
return buf.getvalue()
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
# --- Tests ---
|
| 19 |
+
|
| 20 |
+
def test_health_endpoint():
|
| 21 |
+
res = client.get("/health")
|
| 22 |
+
assert res.status_code == 200
|
| 23 |
+
assert res.json() == {"status": "ok"}
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def test_predict_returns_valid_json():
|
| 27 |
+
img_bytes = get_test_image()
|
| 28 |
+
res = client.post(
|
| 29 |
+
"/predict",
|
| 30 |
+
files={"file": ("test.jpg", img_bytes, "image/jpeg")}
|
| 31 |
+
)
|
| 32 |
+
assert res.status_code == 200
|
| 33 |
+
data = res.json()
|
| 34 |
+
assert "label" in data
|
| 35 |
+
assert "score" in data
|
| 36 |
+
assert isinstance(data["score"], float)
|
| 37 |
+
assert 0.0 <= data["score"] <= 1.0
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def test_predict_rejects_non_image():
|
| 41 |
+
res = client.post(
|
| 42 |
+
"/predict",
|
| 43 |
+
files={"file": ("test.txt", b"not an image", "text/plain")}
|
| 44 |
+
)
|
| 45 |
+
assert res.status_code == 415
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def test_predict_rejects_corrupted_file():
|
| 49 |
+
res = client.post(
|
| 50 |
+
"/predict",
|
| 51 |
+
files={"file": ("bad.jpg", b"\xff\xd8corrupted", "image/jpeg")}
|
| 52 |
+
)
|
| 53 |
+
assert res.status_code == 400
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def test_predict_rejects_oversized_file():
|
| 57 |
+
huge = b"A" * (11 * 1024 * 1024) # 11MB
|
| 58 |
+
res = client.post(
|
| 59 |
+
"/predict",
|
| 60 |
+
files={"file": ("big.jpg", huge, "image/jpeg")}
|
| 61 |
+
)
|
| 62 |
+
assert res.status_code == 413
|