Reorganize repository with clean separation of concerns
Browse files- Restructure project into logical directories (src/, docker/, deployments/, scripts/, docs/, assets/)
- Separate platform-specific deployments (HuggingFace and Azure AI Foundry)
- Add platform-specific deployment scripts with dedicated READMEs
- Create usage examples in assets/examples/
- Move documentation to docs/ directory
- Update all path references in Dockerfile, scripts, and tests
- Add comprehensive dual-deployment documentation
- Validate all functionality: build, deploy, and inference working
Benefits:
- Clear separation of concerns (source, docker, deployment, docs, assets)
- Scalable structure for adding new platforms
- Easy navigation and maintenance
- Professional industry-standard layout
- Ready for CI/CD integration
Tested and validated:
β Docker build with new paths
β HuggingFace deployment successful
β Inference API operational (2.07s response time)
β Usage examples working
β All path resolutions correct
π€ Generated with [Claude Code](https://claude.ai/code)
via [Happy](https://happy.engineering)
Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Happy <yesreply@happy.engineering>
- .gitattributes +1 -0
- README.md +317 -119
- assets/examples/usage_example.py +118 -0
- test.jpg β assets/test_images/test.jpg +0 -0
- deployments/azure/README.md +65 -0
- deployments/azure/deploy.sh +63 -0
- deployments/huggingface/README.md +39 -0
- deployments/huggingface/deploy.sh +70 -0
- Dockerfile β docker/Dockerfile +2 -2
- requirements.txt β docker/requirements.txt +5 -4
- docs/DEPLOYMENT.md +361 -0
- model/config.json +2 -2
- model/model.safetensors +2 -2
- model/processor_config.json +2 -2
- model/tokenizer_config.json +2 -2
- scripts/deploy_all.sh +162 -0
- scripts/test/test_api.py +88 -0
- app.py β src/app.py +127 -142
- test_remote.py +0 -21
|
@@ -36,3 +36,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 36 |
# Only track large model JSON files in LFS (tokenizer, vocab, etc.)
|
| 37 |
model/*.json filter=lfs diff=lfs merge=lfs -text
|
| 38 |
model/*.txt filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 36 |
# Only track large model JSON files in LFS (tokenizer, vocab, etc.)
|
| 37 |
model/*.json filter=lfs diff=lfs merge=lfs -text
|
| 38 |
model/*.txt filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
*.jpg filter=lfs diff=lfs merge=lfs -text
|
|
@@ -1,180 +1,378 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
---
|
| 11 |
|
| 12 |
-
|
| 13 |
|
| 14 |
-
|
| 15 |
-
|
|
|
|
|
|
|
| 16 |
|
| 17 |
-
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
- **VRAM-aware** concurrency control for large images
|
| 23 |
-
- **Scale-to-zero** support on Hugging Face Endpoints
|
| 24 |
-
- Optimized for **1920Γ1080** images on A10/L4 GPUs
|
| 25 |
|
| 26 |
-
|
| 27 |
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
docker push yourusername/sam3:latest
|
| 34 |
-
```
|
| 35 |
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
-
|
| 52 |
|
|
|
|
| 53 |
```json
|
| 54 |
{
|
| 55 |
-
"inputs": "<
|
| 56 |
-
"parameters": {
|
|
|
|
|
|
|
| 57 |
}
|
| 58 |
```
|
| 59 |
|
| 60 |
-
|
| 61 |
-
|
| 62 |
```json
|
| 63 |
[
|
| 64 |
-
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
]
|
| 66 |
```
|
| 67 |
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
-
|
| 71 |
|
| 72 |
-
|
| 73 |
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
|
| 78 |
-
|
| 79 |
-
docker run --gpus all -p 7860:7860 sam3:latest
|
| 80 |
|
| 81 |
-
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
```
|
| 84 |
|
| 85 |
-
|
| 86 |
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
```
|
| 91 |
|
| 92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
```bash
|
| 94 |
-
|
| 95 |
-
-H "Content-Type: application/json" \
|
| 96 |
-
-d '{
|
| 97 |
-
"inputs": "<base64_encoded_image>",
|
| 98 |
-
"parameters": {"classes": ["pothole", "marking"]}
|
| 99 |
-
}'
|
| 100 |
```
|
| 101 |
|
| 102 |
-
|
|
|
|
| 103 |
```bash
|
| 104 |
-
|
|
|
|
| 105 |
```
|
| 106 |
|
| 107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
|
|
|
|
|
|
|
| 109 |
```
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
|
|
|
|
|
|
| 124 |
```
|
| 125 |
|
| 126 |
-
|
|
|
|
|
|
|
| 127 |
|
| 128 |
-
|
| 129 |
|
| 130 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
```bash
|
| 133 |
-
|
| 134 |
-
docker login
|
| 135 |
-
docker push yourusername/sam3:latest
|
| 136 |
```
|
| 137 |
|
| 138 |
-
|
| 139 |
|
| 140 |
-
|
| 141 |
|
| 142 |
-
|
| 143 |
-
- **Inference time:** 5-10 seconds
|
| 144 |
-
- **VRAM usage:** 8-12GB per inference
|
| 145 |
-
- **Recommended GPU:** L4 (24GB) or A10G (24GB)
|
| 146 |
-
- **Max concurrent:** 1-2 requests (automatically managed)
|
| 147 |
|
| 148 |
-
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
-
##
|
| 151 |
|
| 152 |
-
###
|
| 153 |
|
| 154 |
-
|
| 155 |
-
- **Out of memory**: The app automatically manages VRAM. If issues persist, reduce image resolution
|
| 156 |
-
- **Model loading fails**: Verify Git LFS pulled all files (`git lfs pull`)
|
| 157 |
-
- **API timeout**: Increase timeout in endpoint config (recommend 300s for large images)
|
| 158 |
-
- **Slow inference**: First request is slower due to model warmup (~10s), subsequent requests are faster
|
| 159 |
|
| 160 |
-
|
|
|
|
| 161 |
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
|
|
|
|
|
|
|
|
|
| 165 |
```
|
| 166 |
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
"
|
| 178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
}
|
| 180 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SAM3 Static Image Segmentation - HuggingFace Deployment
|
| 2 |
+
|
| 3 |
+
Production-ready deployment of Meta's SAM3 (Segment Anything Model 3) for text-prompted static image segmentation on HuggingFace Inference Endpoints with Azure Container Registry.
|
| 4 |
+
|
| 5 |
+
## π Quick Start
|
| 6 |
+
|
| 7 |
+
### Deployments
|
| 8 |
+
|
| 9 |
+
This repository supports deployment to **both HuggingFace and Azure AI Foundry**. See [DEPLOYMENT.md](DEPLOYMENT.md) for dual-deployment guide.
|
|
|
|
| 10 |
|
| 11 |
+
#### HuggingFace (Current)
|
| 12 |
|
| 13 |
+
**URL**: `https://yzsj8fy005ix8sje.us-east-1.aws.endpoints.huggingface.cloud`
|
| 14 |
+
**Status**: β
Running
|
| 15 |
+
**Model**: `facebook/sam3` (Sam3Model for static images)
|
| 16 |
+
**Hardware**: NVIDIA A10G GPU (24GB VRAM)
|
| 17 |
|
| 18 |
+
#### Azure AI Foundry (Pending GPU Quota)
|
| 19 |
|
| 20 |
+
**Registry**: `sam3acr.azurecr.io`
|
| 21 |
+
**Status**: β³ Waiting for GPU quota approval
|
| 22 |
+
**See**: [DEPLOYMENT.md](DEPLOYMENT.md) for deployment instructions
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
+
### Basic Usage
|
| 25 |
|
| 26 |
+
```python
|
| 27 |
+
import requests
|
| 28 |
+
import base64
|
| 29 |
+
from PIL import Image
|
| 30 |
+
import io
|
| 31 |
|
| 32 |
+
# Load and encode image
|
| 33 |
+
with open("image.jpg", "rb") as f:
|
| 34 |
+
image_b64 = base64.b64encode(f.read()).decode()
|
|
|
|
|
|
|
| 35 |
|
| 36 |
+
# Request segmentation masks
|
| 37 |
+
response = requests.post(
|
| 38 |
+
"https://yzsj8fy005ix8sje.us-east-1.aws.endpoints.huggingface.cloud",
|
| 39 |
+
json={
|
| 40 |
+
"inputs": image_b64,
|
| 41 |
+
"parameters": {
|
| 42 |
+
"classes": ["pothole", "asphalt", "yellow line", "shadow"]
|
| 43 |
+
}
|
| 44 |
+
}
|
| 45 |
+
)
|
| 46 |
|
| 47 |
+
# Process results
|
| 48 |
+
results = response.json()
|
| 49 |
+
for result in results:
|
| 50 |
+
label = result["label"]
|
| 51 |
+
score = result["score"]
|
| 52 |
+
mask_b64 = result["mask"]
|
| 53 |
|
| 54 |
+
# Decode mask (PNG image as base64)
|
| 55 |
+
mask_bytes = base64.b64decode(mask_b64)
|
| 56 |
+
mask_image = Image.open(io.BytesIO(mask_bytes))
|
| 57 |
|
| 58 |
+
print(f"Class: {label}, Score: {score}")
|
| 59 |
+
mask_image.save(f"mask_{label}.png")
|
| 60 |
+
```
|
| 61 |
+
|
| 62 |
+
## π API Reference
|
| 63 |
+
|
| 64 |
+
### POST `/`
|
| 65 |
|
| 66 |
+
Segment objects in an image using text prompts.
|
| 67 |
|
| 68 |
+
**Request Body**:
|
| 69 |
```json
|
| 70 |
{
|
| 71 |
+
"inputs": "<base64 encoded JPEG/PNG image>",
|
| 72 |
+
"parameters": {
|
| 73 |
+
"classes": ["object1", "object2", "object3"]
|
| 74 |
+
}
|
| 75 |
}
|
| 76 |
```
|
| 77 |
|
| 78 |
+
**Response**:
|
|
|
|
| 79 |
```json
|
| 80 |
[
|
| 81 |
+
{
|
| 82 |
+
"label": "object1",
|
| 83 |
+
"score": 1.0,
|
| 84 |
+
"mask": "<base64 encoded PNG mask>"
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"label": "object2",
|
| 88 |
+
"score": 1.0,
|
| 89 |
+
"mask": "<base64 encoded PNG mask>"
|
| 90 |
+
}
|
| 91 |
]
|
| 92 |
```
|
| 93 |
|
| 94 |
+
**Mask Format**:
|
| 95 |
+
- PNG grayscale image (base64 encoded)
|
| 96 |
+
- White pixels (255) = object present
|
| 97 |
+
- Black pixels (0) = background
|
| 98 |
+
- Same dimensions as input image
|
| 99 |
|
| 100 |
+
### GET `/health`
|
| 101 |
|
| 102 |
+
Check endpoint health and GPU status.
|
| 103 |
|
| 104 |
+
**Response**:
|
| 105 |
+
```json
|
| 106 |
+
{
|
| 107 |
+
"status": "healthy",
|
| 108 |
+
"model": "Sam3Model",
|
| 109 |
+
"gpu_available": true,
|
| 110 |
+
"vram": {
|
| 111 |
+
"total_gb": 23.95,
|
| 112 |
+
"allocated_gb": 1.72,
|
| 113 |
+
"free_gb": 22.20,
|
| 114 |
+
"processing_now": 0
|
| 115 |
+
}
|
| 116 |
+
}
|
| 117 |
+
```
|
| 118 |
+
|
| 119 |
+
### GET `/metrics`
|
| 120 |
|
| 121 |
+
Get VRAM metrics.
|
|
|
|
| 122 |
|
| 123 |
+
**Response**:
|
| 124 |
+
```json
|
| 125 |
+
{
|
| 126 |
+
"total_gb": 23.95,
|
| 127 |
+
"allocated_gb": 1.72,
|
| 128 |
+
"free_gb": 22.20,
|
| 129 |
+
"processing_now": 0
|
| 130 |
+
}
|
| 131 |
```
|
| 132 |
|
| 133 |
+
## π οΈ Deployment Architecture
|
| 134 |
|
| 135 |
+
### Components
|
| 136 |
+
|
| 137 |
+
- **Model**: `facebook/sam3` (Sam3Model - 3.4GB)
|
| 138 |
+
- **Container**: NVIDIA CUDA 12.9.1 + Ubuntu 24.04
|
| 139 |
+
- **Registry**: Azure Container Registry `sam3acr4hf.azurecr.io`
|
| 140 |
+
- **Endpoint**: HuggingFace Inference Endpoints (Logiroad organization)
|
| 141 |
+
- **GPU**: NVIDIA A10G (24GB VRAM)
|
| 142 |
+
|
| 143 |
+
### Repository Structure
|
| 144 |
+
|
| 145 |
+
```
|
| 146 |
+
sam3_huggingface/
|
| 147 |
+
βββ src/ # Source code
|
| 148 |
+
β βββ app.py # FastAPI inference server
|
| 149 |
+
β βββ utils/ # Utility modules
|
| 150 |
+
βββ docker/ # Docker configurations
|
| 151 |
+
β βββ Dockerfile # Container definition
|
| 152 |
+
β βββ requirements.txt # Python dependencies
|
| 153 |
+
βββ deployments/ # Platform-specific deployments
|
| 154 |
+
β βββ huggingface/ # HuggingFace configuration
|
| 155 |
+
β βββ azure/ # Azure AI Foundry configuration
|
| 156 |
+
βββ scripts/ # Automation scripts
|
| 157 |
+
β βββ deploy_all.sh # Unified deployment
|
| 158 |
+
β βββ test/ # Test scripts
|
| 159 |
+
βββ docs/ # Documentation
|
| 160 |
+
β βββ DEPLOYMENT.md # Deployment guide
|
| 161 |
+
βββ assets/ # Static assets
|
| 162 |
+
β βββ test_images/ # Test images
|
| 163 |
+
β βββ examples/ # Usage examples
|
| 164 |
+
βββ model/ # SAM3 model files (3.4GB)
|
| 165 |
+
βββ README.md # This file
|
| 166 |
```
|
| 167 |
|
| 168 |
+
## π§ Local Development
|
| 169 |
+
|
| 170 |
+
### Prerequisites
|
| 171 |
+
|
| 172 |
+
- Docker with NVIDIA GPU support
|
| 173 |
+
- Azure CLI (for ACR access)
|
| 174 |
+
- Python 3.11+
|
| 175 |
+
- CUDA-compatible GPU (optional, for local testing)
|
| 176 |
+
|
| 177 |
+
### Build Docker Image
|
| 178 |
+
|
| 179 |
```bash
|
| 180 |
+
docker build -t sam3acr4hf.azurecr.io/sam3-hf:latest -f docker/Dockerfile .
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
```
|
| 182 |
|
| 183 |
+
### Run Locally (with GPU)
|
| 184 |
+
|
| 185 |
```bash
|
| 186 |
+
docker run -p 7860:7860 --gpus all \
|
| 187 |
+
sam3acr4hf.azurecr.io/sam3-hf:latest
|
| 188 |
```
|
| 189 |
|
| 190 |
+
### Test Locally
|
| 191 |
+
|
| 192 |
+
```bash
|
| 193 |
+
# Using test script
|
| 194 |
+
python3 scripts/test/test_api.py
|
| 195 |
|
| 196 |
+
# Or using example
|
| 197 |
+
python3 assets/examples/usage_example.py
|
| 198 |
```
|
| 199 |
+
|
| 200 |
+
## π’ Deployment
|
| 201 |
+
|
| 202 |
+
### Quick Deploy (Recommended)
|
| 203 |
+
|
| 204 |
+
Use the provided deployment script for easy deployment to one or both platforms:
|
| 205 |
+
|
| 206 |
+
```bash
|
| 207 |
+
# Deploy to HuggingFace only (default)
|
| 208 |
+
./deploy_all.sh --hf
|
| 209 |
+
|
| 210 |
+
# Deploy to Azure AI Foundry only
|
| 211 |
+
./deploy_all.sh --azure
|
| 212 |
+
|
| 213 |
+
# Deploy to both platforms
|
| 214 |
+
./deploy_all.sh --all
|
| 215 |
```
|
| 216 |
|
| 217 |
+
The script handles building, tagging, and pushing to both registries automatically.
|
| 218 |
+
|
| 219 |
+
### Manual Deployment
|
| 220 |
|
| 221 |
+
#### HuggingFace
|
| 222 |
|
| 223 |
+
```bash
|
| 224 |
+
./deployments/huggingface/deploy.sh
|
| 225 |
+
```
|
| 226 |
+
|
| 227 |
+
See [`deployments/huggingface/README.md`](deployments/huggingface/README.md) for details.
|
| 228 |
+
|
| 229 |
+
#### Azure AI Foundry
|
| 230 |
|
| 231 |
```bash
|
| 232 |
+
./deployments/azure/deploy.sh
|
|
|
|
|
|
|
| 233 |
```
|
| 234 |
|
| 235 |
+
See [`deployments/azure/README.md`](deployments/azure/README.md) for details.
|
| 236 |
|
| 237 |
+
For complete deployment instructions, see [`docs/DEPLOYMENT.md`](docs/DEPLOYMENT.md).
|
| 238 |
|
| 239 |
+
## π Performance
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
|
| 241 |
+
- **Inference Time**: ~2-3 seconds for 4 classes
|
| 242 |
+
- **Throughput**: Limited by GPU (24GB VRAM)
|
| 243 |
+
- **Concurrency**: 2 concurrent requests (configurable)
|
| 244 |
+
- **Image Size**: Supports up to ~2000x2000 pixels
|
| 245 |
|
| 246 |
+
## π Key Implementation Details
|
| 247 |
|
| 248 |
+
### SAM3 Model Selection
|
| 249 |
|
| 250 |
+
β οΈ **Important**: Use `Sam3Model` (static images), not `Sam3VideoModel` (video tracking).
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
|
| 252 |
+
```python
|
| 253 |
+
from transformers import Sam3Model, Sam3Processor
|
| 254 |
|
| 255 |
+
# β
Correct for static images
|
| 256 |
+
model = Sam3Model.from_pretrained("facebook/sam3")
|
| 257 |
+
processor = Sam3Processor.from_pretrained("facebook/sam3")
|
| 258 |
+
|
| 259 |
+
# β Wrong - for video tracking
|
| 260 |
+
# model = Sam3VideoModel.from_pretrained("facebook/sam3")
|
| 261 |
```
|
| 262 |
|
| 263 |
+
### Batch Processing
|
| 264 |
+
|
| 265 |
+
To segment multiple objects in ONE image, repeat the image for each text prompt:
|
| 266 |
+
|
| 267 |
+
```python
|
| 268 |
+
# For multiple classes in one image
|
| 269 |
+
images_batch = [image] * len(classes) # Repeat image
|
| 270 |
+
inputs = processor(
|
| 271 |
+
images=images_batch,
|
| 272 |
+
text=classes,
|
| 273 |
+
return_tensors="pt"
|
| 274 |
+
)
|
| 275 |
+
```
|
| 276 |
+
|
| 277 |
+
### Dtype Handling
|
| 278 |
+
|
| 279 |
+
Only convert floating-point tensors to match model dtype (float16):
|
| 280 |
+
|
| 281 |
+
```python
|
| 282 |
+
model_dtype = next(model.parameters()).dtype
|
| 283 |
+
inputs = {
|
| 284 |
+
k: v.cuda().to(model_dtype) if v.dtype.is_floating_point
|
| 285 |
+
else v.cuda()
|
| 286 |
+
for k, v in inputs.items()
|
| 287 |
+
if isinstance(v, torch.Tensor)
|
| 288 |
}
|
| 289 |
```
|
| 290 |
+
|
| 291 |
+
## π¦ Dependencies
|
| 292 |
+
|
| 293 |
+
```txt
|
| 294 |
+
fastapi==0.121.3
|
| 295 |
+
uvicorn==0.38.0
|
| 296 |
+
torch==2.9.1
|
| 297 |
+
torchvision
|
| 298 |
+
git+https://github.com/huggingface/transformers.git # SAM3 support
|
| 299 |
+
huggingface_hub>=1.0.0,<2.0
|
| 300 |
+
numpy>=2.3.0
|
| 301 |
+
pillow>=12.0.0
|
| 302 |
+
```
|
| 303 |
+
|
| 304 |
+
## π Troubleshooting
|
| 305 |
+
|
| 306 |
+
### Endpoint Stuck Initializing
|
| 307 |
+
|
| 308 |
+
The 15.7GB Docker image takes 5-10 minutes to pull and initialize. Wait patiently.
|
| 309 |
+
|
| 310 |
+
### "shape is invalid for input" Error
|
| 311 |
+
|
| 312 |
+
Ensure you're repeating the image for each class:
|
| 313 |
+
```python
|
| 314 |
+
images_batch = [image] * len(classes)
|
| 315 |
+
```
|
| 316 |
+
|
| 317 |
+
### "dtype mismatch" Error
|
| 318 |
+
|
| 319 |
+
Don't convert integer tensors (input_ids, attention_mask) to float16.
|
| 320 |
+
|
| 321 |
+
### Empty/Wrong Masks
|
| 322 |
+
|
| 323 |
+
Ensure text prompts match actual image content. SAM3 will try to find matches even for non-existent objects.
|
| 324 |
+
|
| 325 |
+
## π Example: Road Defect Detection
|
| 326 |
+
|
| 327 |
+
```python
|
| 328 |
+
import requests
|
| 329 |
+
import base64
|
| 330 |
+
from PIL import Image
|
| 331 |
+
import io
|
| 332 |
+
|
| 333 |
+
# Load road image
|
| 334 |
+
with open("road.jpg", "rb") as f:
|
| 335 |
+
image_b64 = base64.b64encode(f.read()).decode()
|
| 336 |
+
|
| 337 |
+
# Segment road defects
|
| 338 |
+
response = requests.post(
|
| 339 |
+
"https://yzsj8fy005ix8sje.us-east-1.aws.endpoints.huggingface.cloud",
|
| 340 |
+
json={
|
| 341 |
+
"inputs": image_b64,
|
| 342 |
+
"parameters": {
|
| 343 |
+
"classes": ["pothole", "crack", "debris", "patch"]
|
| 344 |
+
}
|
| 345 |
+
}
|
| 346 |
+
)
|
| 347 |
+
|
| 348 |
+
# Save masks
|
| 349 |
+
results = response.json()
|
| 350 |
+
for result in results:
|
| 351 |
+
mask_bytes = base64.b64decode(result["mask"])
|
| 352 |
+
mask_img = Image.open(io.BytesIO(mask_bytes))
|
| 353 |
+
mask_img.save(f"defect_{result['label']}.png")
|
| 354 |
+
print(f"Found {result['label']} (score: {result['score']:.2f})")
|
| 355 |
+
```
|
| 356 |
+
|
| 357 |
+
## π Resources
|
| 358 |
+
|
| 359 |
+
- **Model**: [facebook/sam3 on HuggingFace](https://huggingface.co/facebook/sam3)
|
| 360 |
+
- **Paper**: [SAM 3: Segment Anything with Concepts](https://ai.meta.com/research/publications/sam-3/)
|
| 361 |
+
- **Endpoint Management**: [HuggingFace Console](https://ui.endpoints.huggingface.co/Logiroad/endpoints/sam3-segmentation)
|
| 362 |
+
|
| 363 |
+
## π License
|
| 364 |
+
|
| 365 |
+
This deployment uses Meta's SAM3 model. See the [model card](https://huggingface.co/facebook/sam3) for license information.
|
| 366 |
+
|
| 367 |
+
## π€ Support
|
| 368 |
+
|
| 369 |
+
For issues with:
|
| 370 |
+
- **Model/Inference**: Check SAM3 documentation
|
| 371 |
+
- **Deployment**: Contact HuggingFace support
|
| 372 |
+
- **Azure Registry**: Check ACR credentials and permissions
|
| 373 |
+
|
| 374 |
+
---
|
| 375 |
+
|
| 376 |
+
**Last Updated**: 2025-11-22
|
| 377 |
+
**Status**: β
Production Ready
|
| 378 |
+
**Endpoint**: https://yzsj8fy005ix8sje.us-east-1.aws.endpoints.huggingface.cloud
|
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
SAM3 API Usage Example
|
| 4 |
+
|
| 5 |
+
This example shows how to use the SAM3 text-prompted segmentation API
|
| 6 |
+
for road defect detection.
|
| 7 |
+
"""
|
| 8 |
+
import requests
|
| 9 |
+
import base64
|
| 10 |
+
from PIL import Image
|
| 11 |
+
import io
|
| 12 |
+
import os
|
| 13 |
+
|
| 14 |
+
# Configuration
|
| 15 |
+
ENDPOINT_URL = "https://yzsj8fy005ix8sje.us-east-1.aws.endpoints.huggingface.cloud"
|
| 16 |
+
|
| 17 |
+
def segment_image(image_path, classes):
|
| 18 |
+
"""
|
| 19 |
+
Segment objects in an image using text prompts
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
image_path: Path to the image file
|
| 23 |
+
classes: List of object classes to segment (e.g., ["pothole", "crack"])
|
| 24 |
+
|
| 25 |
+
Returns:
|
| 26 |
+
List of dictionaries with 'label', 'mask' (base64), and 'score'
|
| 27 |
+
"""
|
| 28 |
+
# Load and encode image
|
| 29 |
+
with open(image_path, "rb") as f:
|
| 30 |
+
image_b64 = base64.b64encode(f.read()).decode()
|
| 31 |
+
|
| 32 |
+
# Make API request
|
| 33 |
+
response = requests.post(
|
| 34 |
+
ENDPOINT_URL,
|
| 35 |
+
json={
|
| 36 |
+
"inputs": image_b64,
|
| 37 |
+
"parameters": {
|
| 38 |
+
"classes": classes
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
timeout=30
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
response.raise_for_status()
|
| 45 |
+
return response.json()
|
| 46 |
+
|
| 47 |
+
def save_masks(results, output_dir="output"):
|
| 48 |
+
"""
|
| 49 |
+
Save segmentation masks as PNG files
|
| 50 |
+
|
| 51 |
+
Args:
|
| 52 |
+
results: API response (list of dictionaries)
|
| 53 |
+
output_dir: Directory to save masks
|
| 54 |
+
"""
|
| 55 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 56 |
+
|
| 57 |
+
for result in results:
|
| 58 |
+
label = result["label"]
|
| 59 |
+
score = result["score"]
|
| 60 |
+
mask_b64 = result["mask"]
|
| 61 |
+
|
| 62 |
+
# Decode mask
|
| 63 |
+
mask_bytes = base64.b64decode(mask_b64)
|
| 64 |
+
mask_image = Image.open(io.BytesIO(mask_bytes))
|
| 65 |
+
|
| 66 |
+
# Save mask
|
| 67 |
+
output_path = os.path.join(output_dir, f"mask_{label}.png")
|
| 68 |
+
mask_image.save(output_path)
|
| 69 |
+
|
| 70 |
+
print(f"β Saved {label} mask: {output_path} (score: {score:.2f})")
|
| 71 |
+
|
| 72 |
+
def main():
|
| 73 |
+
"""Example: Road defect detection"""
|
| 74 |
+
|
| 75 |
+
# Example 1: Detect road defects
|
| 76 |
+
print("Example 1: Road Defect Detection")
|
| 77 |
+
print("=" * 50)
|
| 78 |
+
|
| 79 |
+
image_path = "../test_images/test.jpg"
|
| 80 |
+
classes = ["pothole", "crack", "patch", "debris"]
|
| 81 |
+
|
| 82 |
+
print(f"Image: {image_path}")
|
| 83 |
+
print(f"Classes: {classes}")
|
| 84 |
+
print()
|
| 85 |
+
|
| 86 |
+
try:
|
| 87 |
+
results = segment_image(image_path, classes)
|
| 88 |
+
print(f"Found {len(results)} segmentation masks")
|
| 89 |
+
print()
|
| 90 |
+
|
| 91 |
+
save_masks(results, output_dir="defects_output")
|
| 92 |
+
print()
|
| 93 |
+
|
| 94 |
+
except requests.exceptions.RequestException as e:
|
| 95 |
+
print(f"Error: {e}")
|
| 96 |
+
return
|
| 97 |
+
|
| 98 |
+
# Example 2: Segment specific objects
|
| 99 |
+
print("\nExample 2: Specific Object Segmentation")
|
| 100 |
+
print("=" * 50)
|
| 101 |
+
|
| 102 |
+
classes = ["asphalt", "yellow line"]
|
| 103 |
+
|
| 104 |
+
print(f"Classes: {classes}")
|
| 105 |
+
print()
|
| 106 |
+
|
| 107 |
+
try:
|
| 108 |
+
results = segment_image(image_path, classes)
|
| 109 |
+
print(f"Found {len(results)} segmentation masks")
|
| 110 |
+
print()
|
| 111 |
+
|
| 112 |
+
save_masks(results, output_dir="objects_output")
|
| 113 |
+
|
| 114 |
+
except requests.exceptions.RequestException as e:
|
| 115 |
+
print(f"Error: {e}")
|
| 116 |
+
|
| 117 |
+
if __name__ == "__main__":
|
| 118 |
+
main()
|
|
File without changes
|
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Azure AI Foundry Deployment
|
| 2 |
+
|
| 3 |
+
Deploy SAM3 to Azure AI Foundry (pending GPU quota).
|
| 4 |
+
|
| 5 |
+
## Quick Deploy
|
| 6 |
+
|
| 7 |
+
```bash
|
| 8 |
+
./deployments/azure/deploy.sh
|
| 9 |
+
```
|
| 10 |
+
|
| 11 |
+
This will build and push the image to Azure Container Registry.
|
| 12 |
+
|
| 13 |
+
## Configuration
|
| 14 |
+
|
| 15 |
+
- **Registry**: `sam3acr.azurecr.io`
|
| 16 |
+
- **Image**: `sam3-foundry:latest`
|
| 17 |
+
- **Endpoint**: `sam3-foundry` (to be created)
|
| 18 |
+
- **Resource Group**: `productionline-test`
|
| 19 |
+
- **Instance Type**: Standard_NC6s_v3 (Tesla V100) or higher
|
| 20 |
+
|
| 21 |
+
## Status
|
| 22 |
+
|
| 23 |
+
β³ **Pending GPU Quota Approval**
|
| 24 |
+
|
| 25 |
+
Once GPU quota is approved, create the endpoint:
|
| 26 |
+
|
| 27 |
+
## Create Endpoint (Azure Portal)
|
| 28 |
+
|
| 29 |
+
1. Navigate to Azure AI Foundry workspace
|
| 30 |
+
2. Go to **Endpoints** β **Real-time endpoints**
|
| 31 |
+
3. Click **Create**
|
| 32 |
+
4. Select **Custom container**
|
| 33 |
+
5. Image: `sam3acr.azurecr.io/sam3-foundry:latest`
|
| 34 |
+
6. Instance type: **Standard_NC6s_v3** or higher
|
| 35 |
+
7. Deploy
|
| 36 |
+
|
| 37 |
+
## Create Endpoint (Azure CLI)
|
| 38 |
+
|
| 39 |
+
```bash
|
| 40 |
+
# Create endpoint
|
| 41 |
+
az ml online-endpoint create \
|
| 42 |
+
--name sam3-foundry \
|
| 43 |
+
--resource-group productionline-test \
|
| 44 |
+
--workspace-name <your-workspace>
|
| 45 |
+
|
| 46 |
+
# Create deployment
|
| 47 |
+
az ml online-deployment create \
|
| 48 |
+
--name sam3-foundry-deployment \
|
| 49 |
+
--endpoint sam3-foundry \
|
| 50 |
+
--model-uri sam3acr.azurecr.io/sam3-foundry:latest \
|
| 51 |
+
--instance-type Standard_NC6s_v3 \
|
| 52 |
+
--instance-count 1
|
| 53 |
+
```
|
| 54 |
+
|
| 55 |
+
## Testing
|
| 56 |
+
|
| 57 |
+
Once deployed, update the endpoint URL in the test script and run:
|
| 58 |
+
|
| 59 |
+
```bash
|
| 60 |
+
python3 scripts/test/test_api.py
|
| 61 |
+
```
|
| 62 |
+
|
| 63 |
+
## For More Information
|
| 64 |
+
|
| 65 |
+
See `docs/DEPLOYMENT.md` for complete Azure AI Foundry deployment guide.
|
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# Deploy SAM3 to Azure AI Foundry
|
| 3 |
+
set -e
|
| 4 |
+
|
| 5 |
+
echo "π· Deploying SAM3 to Azure AI Foundry..."
|
| 6 |
+
echo ""
|
| 7 |
+
|
| 8 |
+
# Configuration
|
| 9 |
+
REGISTRY="sam3acr.azurecr.io"
|
| 10 |
+
IMAGE="sam3-foundry:latest"
|
| 11 |
+
ENDPOINT_NAME="sam3-foundry"
|
| 12 |
+
RESOURCE_GROUP="productionline-test"
|
| 13 |
+
|
| 14 |
+
# Navigate to project root
|
| 15 |
+
cd "$(dirname "$0")/../.."
|
| 16 |
+
|
| 17 |
+
# Step 1: Build Docker image
|
| 18 |
+
echo "[1/3] Building Docker image..."
|
| 19 |
+
docker build -t ${REGISTRY}/${IMAGE} -f docker/Dockerfile .
|
| 20 |
+
echo "β Build complete"
|
| 21 |
+
echo ""
|
| 22 |
+
|
| 23 |
+
# Step 2: Login to ACR
|
| 24 |
+
echo "[2/3] Logging in to Azure Container Registry..."
|
| 25 |
+
az acr login --name sam3acr
|
| 26 |
+
echo "β Login successful"
|
| 27 |
+
echo ""
|
| 28 |
+
|
| 29 |
+
# Step 3: Push image
|
| 30 |
+
echo "[3/3] Pushing image to registry..."
|
| 31 |
+
docker push ${REGISTRY}/${IMAGE}
|
| 32 |
+
echo "β Push complete"
|
| 33 |
+
echo ""
|
| 34 |
+
|
| 35 |
+
echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
|
| 36 |
+
echo "β
Image Pushed to Azure Container Registry"
|
| 37 |
+
echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
|
| 38 |
+
echo ""
|
| 39 |
+
echo "Registry: ${REGISTRY}"
|
| 40 |
+
echo "Image: ${IMAGE}"
|
| 41 |
+
echo ""
|
| 42 |
+
echo "β οΈ Manual Step Required: Create Azure AI Foundry Endpoint"
|
| 43 |
+
echo ""
|
| 44 |
+
echo "Option 1: Azure Portal"
|
| 45 |
+
echo " 1. Navigate to your Azure AI Foundry workspace"
|
| 46 |
+
echo " 2. Go to Endpoints β Real-time endpoints"
|
| 47 |
+
echo " 3. Click 'Create'"
|
| 48 |
+
echo " 4. Select 'Custom container'"
|
| 49 |
+
echo " 5. Image: ${REGISTRY}/${IMAGE}"
|
| 50 |
+
echo " 6. Instance: Standard_NC6s_v3 or higher"
|
| 51 |
+
echo ""
|
| 52 |
+
echo "Option 2: Azure CLI"
|
| 53 |
+
echo " az ml online-endpoint create \\"
|
| 54 |
+
echo " --name ${ENDPOINT_NAME} \\"
|
| 55 |
+
echo " --resource-group ${RESOURCE_GROUP}"
|
| 56 |
+
echo ""
|
| 57 |
+
echo " az ml online-deployment create \\"
|
| 58 |
+
echo " --name ${ENDPOINT_NAME}-deployment \\"
|
| 59 |
+
echo " --endpoint ${ENDPOINT_NAME} \\"
|
| 60 |
+
echo " --model-uri ${REGISTRY}/${IMAGE} \\"
|
| 61 |
+
echo " --instance-type Standard_NC6s_v3"
|
| 62 |
+
echo ""
|
| 63 |
+
echo "For complete instructions, see: docs/DEPLOYMENT.md"
|
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# HuggingFace Deployment
|
| 2 |
+
|
| 3 |
+
Deploy SAM3 to HuggingFace Inference Endpoints.
|
| 4 |
+
|
| 5 |
+
## Quick Deploy
|
| 6 |
+
|
| 7 |
+
```bash
|
| 8 |
+
./deployments/huggingface/deploy.sh
|
| 9 |
+
```
|
| 10 |
+
|
| 11 |
+
## Configuration
|
| 12 |
+
|
| 13 |
+
- **Registry**: `sam3acr4hf.azurecr.io`
|
| 14 |
+
- **Image**: `sam3-hf:latest`
|
| 15 |
+
- **Endpoint**: `sam3-segmentation`
|
| 16 |
+
- **Organization**: `Logiroad`
|
| 17 |
+
- **Hardware**: NVIDIA A10G (24GB VRAM)
|
| 18 |
+
|
| 19 |
+
## Manual Deployment
|
| 20 |
+
|
| 21 |
+
```bash
|
| 22 |
+
# Build and push
|
| 23 |
+
docker build -t sam3acr4hf.azurecr.io/sam3-hf:latest -f docker/Dockerfile .
|
| 24 |
+
az acr login --name sam3acr4hf
|
| 25 |
+
docker push sam3acr4hf.azurecr.io/sam3-hf:latest
|
| 26 |
+
|
| 27 |
+
# Restart endpoint
|
| 28 |
+
python3 -c "from huggingface_hub import HfApi; api = HfApi(); e = api.get_inference_endpoint('sam3-segmentation', namespace='Logiroad'); e.pause(); e.resume()"
|
| 29 |
+
```
|
| 30 |
+
|
| 31 |
+
## Testing
|
| 32 |
+
|
| 33 |
+
```bash
|
| 34 |
+
python3 scripts/test/test_api.py
|
| 35 |
+
```
|
| 36 |
+
|
| 37 |
+
## Endpoint URL
|
| 38 |
+
|
| 39 |
+
https://yzsj8fy005ix8sje.us-east-1.aws.endpoints.huggingface.cloud
|
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# Deploy SAM3 to HuggingFace Inference Endpoints
|
| 3 |
+
set -e
|
| 4 |
+
|
| 5 |
+
echo "π Deploying SAM3 to HuggingFace..."
|
| 6 |
+
echo ""
|
| 7 |
+
|
| 8 |
+
# Configuration
|
| 9 |
+
REGISTRY="sam3acr4hf.azurecr.io"
|
| 10 |
+
IMAGE="sam3-hf:latest"
|
| 11 |
+
ENDPOINT_NAME="sam3-segmentation"
|
| 12 |
+
NAMESPACE="Logiroad"
|
| 13 |
+
|
| 14 |
+
# Navigate to project root
|
| 15 |
+
cd "$(dirname "$0")/../.."
|
| 16 |
+
|
| 17 |
+
# Step 1: Build Docker image
|
| 18 |
+
echo "[1/4] Building Docker image..."
|
| 19 |
+
docker build -t ${REGISTRY}/${IMAGE} -f docker/Dockerfile .
|
| 20 |
+
echo "β Build complete"
|
| 21 |
+
echo ""
|
| 22 |
+
|
| 23 |
+
# Step 2: Login to ACR
|
| 24 |
+
echo "[2/4] Logging in to Azure Container Registry..."
|
| 25 |
+
az acr login --name sam3acr4hf
|
| 26 |
+
echo "β Login successful"
|
| 27 |
+
echo ""
|
| 28 |
+
|
| 29 |
+
# Step 3: Push image
|
| 30 |
+
echo "[3/4] Pushing image to registry..."
|
| 31 |
+
docker push ${REGISTRY}/${IMAGE}
|
| 32 |
+
echo "β Push complete"
|
| 33 |
+
echo ""
|
| 34 |
+
|
| 35 |
+
# Step 4: Restart endpoint
|
| 36 |
+
echo "[4/4] Restarting HuggingFace endpoint..."
|
| 37 |
+
python3 << 'EOF'
|
| 38 |
+
from huggingface_hub import HfApi
|
| 39 |
+
import time
|
| 40 |
+
|
| 41 |
+
api = HfApi()
|
| 42 |
+
endpoint = api.get_inference_endpoint('sam3-segmentation', namespace='Logiroad')
|
| 43 |
+
|
| 44 |
+
print(" Pausing endpoint...")
|
| 45 |
+
endpoint.pause()
|
| 46 |
+
time.sleep(5)
|
| 47 |
+
|
| 48 |
+
print(" Resuming endpoint...")
|
| 49 |
+
endpoint.resume()
|
| 50 |
+
|
| 51 |
+
print(" Waiting for endpoint to be running...")
|
| 52 |
+
for i in range(60):
|
| 53 |
+
endpoint = api.get_inference_endpoint('sam3-segmentation', namespace='Logiroad')
|
| 54 |
+
if endpoint.status == 'running':
|
| 55 |
+
print(f" β Endpoint running after {i*5}s")
|
| 56 |
+
break
|
| 57 |
+
time.sleep(5)
|
| 58 |
+
else:
|
| 59 |
+
print(" β Timeout waiting for endpoint")
|
| 60 |
+
EOF
|
| 61 |
+
|
| 62 |
+
echo ""
|
| 63 |
+
echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
|
| 64 |
+
echo "β
HuggingFace Deployment Complete"
|
| 65 |
+
echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
|
| 66 |
+
echo ""
|
| 67 |
+
echo "Endpoint: https://yzsj8fy005ix8sje.us-east-1.aws.endpoints.huggingface.cloud"
|
| 68 |
+
echo ""
|
| 69 |
+
echo "Test with:"
|
| 70 |
+
echo " python3 scripts/test/test_api.py"
|
|
@@ -18,7 +18,7 @@ RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1
|
|
| 18 |
WORKDIR /app
|
| 19 |
|
| 20 |
# Copy requirements first (to enable Docker cache)
|
| 21 |
-
COPY requirements.txt /app/requirements.txt
|
| 22 |
|
| 23 |
# Install PyTorch with CUDA support first (separate to use correct index URL)
|
| 24 |
RUN pip install --no-cache-dir torch==2.9.1 --index-url https://download.pytorch.org/whl/cu129 --break-system-packages
|
|
@@ -27,7 +27,7 @@ RUN pip install --no-cache-dir torch==2.9.1 --index-url https://download.pytorch
|
|
| 27 |
RUN pip install --no-cache-dir -r requirements.txt --break-system-packages
|
| 28 |
|
| 29 |
# Copy application code
|
| 30 |
-
COPY app.py /app/app.py
|
| 31 |
COPY model /app/model
|
| 32 |
|
| 33 |
# Uvicorn exposed port
|
|
|
|
| 18 |
WORKDIR /app
|
| 19 |
|
| 20 |
# Copy requirements first (to enable Docker cache)
|
| 21 |
+
COPY docker/requirements.txt /app/requirements.txt
|
| 22 |
|
| 23 |
# Install PyTorch with CUDA support first (separate to use correct index URL)
|
| 24 |
RUN pip install --no-cache-dir torch==2.9.1 --index-url https://download.pytorch.org/whl/cu129 --break-system-packages
|
|
|
|
| 27 |
RUN pip install --no-cache-dir -r requirements.txt --break-system-packages
|
| 28 |
|
| 29 |
# Copy application code
|
| 30 |
+
COPY src/app.py /app/app.py
|
| 31 |
COPY model /app/model
|
| 32 |
|
| 33 |
# Uvicorn exposed port
|
|
@@ -5,12 +5,13 @@ uvicorn==0.38.0
|
|
| 5 |
# PyTorch with CUDA 12.9 (for HF L4/A10G/A100 GPUs)
|
| 6 |
# Note: Install with: pip install torch==2.9.1 --index-url https://download.pytorch.org/whl/cu129
|
| 7 |
torch==2.9.1
|
|
|
|
| 8 |
|
| 9 |
-
# Transformers with SAM3 support
|
| 10 |
-
transformers
|
| 11 |
|
| 12 |
-
# Hugging Face Hub
|
| 13 |
-
huggingface_hub>=0.
|
| 14 |
|
| 15 |
# Core dependencies
|
| 16 |
numpy>=2.3.0
|
|
|
|
| 5 |
# PyTorch with CUDA 12.9 (for HF L4/A10G/A100 GPUs)
|
| 6 |
# Note: Install with: pip install torch==2.9.1 --index-url https://download.pytorch.org/whl/cu129
|
| 7 |
torch==2.9.1
|
| 8 |
+
torchvision
|
| 9 |
|
| 10 |
+
# Transformers with SAM3 support (install from git main for latest models)
|
| 11 |
+
git+https://github.com/huggingface/transformers.git
|
| 12 |
|
| 13 |
+
# Hugging Face Hub (updated for transformers 5.0.0.dev0)
|
| 14 |
+
huggingface_hub>=1.0.0,<2.0
|
| 15 |
|
| 16 |
# Core dependencies
|
| 17 |
numpy>=2.3.0
|
|
@@ -0,0 +1,361 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Dual Deployment Guide - HuggingFace & Azure AI Foundry
|
| 2 |
+
|
| 3 |
+
This repository supports deployment to both **HuggingFace Inference Endpoints** and **Azure AI Foundry** using the same codebase and Docker image.
|
| 4 |
+
|
| 5 |
+
## π Deployment Overview
|
| 6 |
+
|
| 7 |
+
| Platform | Status | Container Registry | Endpoint |
|
| 8 |
+
|----------|--------|-------------------|----------|
|
| 9 |
+
| **HuggingFace** | β
Running | `sam3acr4hf.azurecr.io` | https://yzsj8fy005ix8sje.us-east-1.aws.endpoints.huggingface.cloud |
|
| 10 |
+
| **Azure AI Foundry** | β³ Pending GPU Quota | `sam3acr.azurecr.io` | To be deployed |
|
| 11 |
+
|
| 12 |
+
Both deployments use the **same Docker image** with SAM3Model for static image segmentation.
|
| 13 |
+
|
| 14 |
+
---
|
| 15 |
+
|
| 16 |
+
## π HuggingFace Deployment (Current)
|
| 17 |
+
|
| 18 |
+
### Status
|
| 19 |
+
β
**DEPLOYED AND RUNNING**
|
| 20 |
+
|
| 21 |
+
### Registry
|
| 22 |
+
```bash
|
| 23 |
+
sam3acr4hf.azurecr.io/sam3-hf:latest
|
| 24 |
+
```
|
| 25 |
+
|
| 26 |
+
### Quick Deploy
|
| 27 |
+
```bash
|
| 28 |
+
# Build and push
|
| 29 |
+
docker build -t sam3acr4hf.azurecr.io/sam3-hf:latest .
|
| 30 |
+
az acr login --name sam3acr4hf
|
| 31 |
+
docker push sam3acr4hf.azurecr.io/sam3-hf:latest
|
| 32 |
+
|
| 33 |
+
# Restart endpoint
|
| 34 |
+
python3 << 'EOF'
|
| 35 |
+
from huggingface_hub import HfApi
|
| 36 |
+
api = HfApi()
|
| 37 |
+
endpoint = api.get_inference_endpoint('sam3-segmentation', namespace='Logiroad')
|
| 38 |
+
endpoint.pause()
|
| 39 |
+
endpoint.resume()
|
| 40 |
+
EOF
|
| 41 |
+
```
|
| 42 |
+
|
| 43 |
+
### Configuration
|
| 44 |
+
- **Hardware**: NVIDIA A10G (24GB VRAM)
|
| 45 |
+
- **Organization**: Logiroad
|
| 46 |
+
- **Access**: Public
|
| 47 |
+
- **Auto-scaling**: Enabled (0-5 replicas)
|
| 48 |
+
|
| 49 |
+
---
|
| 50 |
+
|
| 51 |
+
## π· Azure AI Foundry Deployment (Future)
|
| 52 |
+
|
| 53 |
+
### Status
|
| 54 |
+
β³ **WAITING FOR GPU QUOTA**
|
| 55 |
+
|
| 56 |
+
Once GPU quota is approved, deploy using the same Docker image:
|
| 57 |
+
|
| 58 |
+
### Registry
|
| 59 |
+
```bash
|
| 60 |
+
sam3acr.azurecr.io/sam3-foundry:latest
|
| 61 |
+
```
|
| 62 |
+
|
| 63 |
+
### Deployment Steps
|
| 64 |
+
|
| 65 |
+
#### 1. Build and Push to Azure ACR
|
| 66 |
+
|
| 67 |
+
```bash
|
| 68 |
+
# Login to Azure AI Foundry ACR
|
| 69 |
+
az acr login --name sam3acr
|
| 70 |
+
|
| 71 |
+
# Build with Azure AI Foundry tag
|
| 72 |
+
docker build -t sam3acr.azurecr.io/sam3-foundry:latest .
|
| 73 |
+
|
| 74 |
+
# Push to Azure ACR
|
| 75 |
+
docker push sam3acr.azurecr.io/sam3-foundry:latest
|
| 76 |
+
```
|
| 77 |
+
|
| 78 |
+
#### 2. Deploy to Azure AI Foundry
|
| 79 |
+
|
| 80 |
+
Using Azure CLI:
|
| 81 |
+
|
| 82 |
+
```bash
|
| 83 |
+
# Create Azure AI Foundry endpoint
|
| 84 |
+
az ml online-endpoint create \
|
| 85 |
+
--name sam3-foundry \
|
| 86 |
+
--resource-group productionline-test \
|
| 87 |
+
--workspace-name <your-workspace>
|
| 88 |
+
|
| 89 |
+
# Create deployment
|
| 90 |
+
az ml online-deployment create \
|
| 91 |
+
--name sam3-deployment \
|
| 92 |
+
--endpoint sam3-foundry \
|
| 93 |
+
--model-uri sam3acr.azurecr.io/sam3-foundry:latest \
|
| 94 |
+
--instance-type Standard_NC6s_v3 \
|
| 95 |
+
--instance-count 1
|
| 96 |
+
```
|
| 97 |
+
|
| 98 |
+
Or using Azure Portal:
|
| 99 |
+
1. Navigate to Azure AI Foundry workspace
|
| 100 |
+
2. Go to **Endpoints** β **Real-time endpoints**
|
| 101 |
+
3. Click **Create**
|
| 102 |
+
4. Select **Custom container**
|
| 103 |
+
5. Image: `sam3acr.azurecr.io/sam3-foundry:latest`
|
| 104 |
+
6. Instance type: **Standard_NC6s_v3** (Tesla V100)
|
| 105 |
+
7. Deploy
|
| 106 |
+
|
| 107 |
+
#### 3. Test Azure AI Foundry Endpoint
|
| 108 |
+
|
| 109 |
+
```python
|
| 110 |
+
import requests
|
| 111 |
+
import base64
|
| 112 |
+
|
| 113 |
+
# Get endpoint URL and key from Azure Portal
|
| 114 |
+
ENDPOINT_URL = "https://<your-endpoint>.azureml.net/score"
|
| 115 |
+
API_KEY = "<your-api-key>"
|
| 116 |
+
|
| 117 |
+
with open("test.jpg", "rb") as f:
|
| 118 |
+
image_b64 = base64.b64encode(f.read()).decode()
|
| 119 |
+
|
| 120 |
+
response = requests.post(
|
| 121 |
+
ENDPOINT_URL,
|
| 122 |
+
json={
|
| 123 |
+
"inputs": image_b64,
|
| 124 |
+
"parameters": {"classes": ["pothole", "asphalt"]}
|
| 125 |
+
},
|
| 126 |
+
headers={"Authorization": f"Bearer {API_KEY}"}
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
print(response.json())
|
| 130 |
+
```
|
| 131 |
+
|
| 132 |
+
---
|
| 133 |
+
|
| 134 |
+
## π Unified Deployment Workflow
|
| 135 |
+
|
| 136 |
+
Since both platforms use the **same Docker image**, you can deploy to both simultaneously:
|
| 137 |
+
|
| 138 |
+
### Option 1: Separate Tags (Recommended)
|
| 139 |
+
|
| 140 |
+
```bash
|
| 141 |
+
# Build once
|
| 142 |
+
docker build -t sam3-base:latest .
|
| 143 |
+
|
| 144 |
+
# Tag for HuggingFace
|
| 145 |
+
docker tag sam3-base:latest sam3acr4hf.azurecr.io/sam3-hf:latest
|
| 146 |
+
|
| 147 |
+
# Tag for Azure AI Foundry
|
| 148 |
+
docker tag sam3-base:latest sam3acr.azurecr.io/sam3-foundry:latest
|
| 149 |
+
|
| 150 |
+
# Push to both registries
|
| 151 |
+
az acr login --name sam3acr4hf
|
| 152 |
+
docker push sam3acr4hf.azurecr.io/sam3-hf:latest
|
| 153 |
+
|
| 154 |
+
az acr login --name sam3acr
|
| 155 |
+
docker push sam3acr.azurecr.io/sam3-foundry:latest
|
| 156 |
+
```
|
| 157 |
+
|
| 158 |
+
### Option 2: Deploy Script
|
| 159 |
+
|
| 160 |
+
Create `deploy_all.sh`:
|
| 161 |
+
|
| 162 |
+
```bash
|
| 163 |
+
#!/bin/bash
|
| 164 |
+
set -e
|
| 165 |
+
|
| 166 |
+
echo "Building Docker image..."
|
| 167 |
+
docker build -t sam3:latest .
|
| 168 |
+
|
| 169 |
+
echo "Pushing to HuggingFace ACR..."
|
| 170 |
+
docker tag sam3:latest sam3acr4hf.azurecr.io/sam3-hf:latest
|
| 171 |
+
az acr login --name sam3acr4hf
|
| 172 |
+
docker push sam3acr4hf.azurecr.io/sam3-hf:latest
|
| 173 |
+
|
| 174 |
+
echo "Pushing to Azure AI Foundry ACR..."
|
| 175 |
+
docker tag sam3:latest sam3acr.azurecr.io/sam3-foundry:latest
|
| 176 |
+
az acr login --name sam3acr
|
| 177 |
+
docker push sam3acr.azurecr.io/sam3-foundry:latest
|
| 178 |
+
|
| 179 |
+
echo "β
Deployed to both registries!"
|
| 180 |
+
```
|
| 181 |
+
|
| 182 |
+
---
|
| 183 |
+
|
| 184 |
+
## π Platform Comparison
|
| 185 |
+
|
| 186 |
+
| Feature | HuggingFace | Azure AI Foundry |
|
| 187 |
+
|---------|-------------|------------------|
|
| 188 |
+
| **GPU** | NVIDIA A10G (24GB) | Tesla V100 (16GB) or A100 |
|
| 189 |
+
| **Auto-scaling** | β
Yes (0-5 replicas) | β
Yes (configurable) |
|
| 190 |
+
| **Authentication** | Public or Token | API Key required |
|
| 191 |
+
| **Pricing** | Per-second billing | Per-hour billing |
|
| 192 |
+
| **Scale to Zero** | β
Yes | β οΈ Limited support |
|
| 193 |
+
| **Integration** | HuggingFace ecosystem | Azure ML ecosystem |
|
| 194 |
+
| **Monitoring** | HF Dashboard | Azure Monitor |
|
| 195 |
+
|
| 196 |
+
---
|
| 197 |
+
|
| 198 |
+
## π§ Configuration Differences
|
| 199 |
+
|
| 200 |
+
### API Authentication
|
| 201 |
+
|
| 202 |
+
**HuggingFace** (current - public):
|
| 203 |
+
```python
|
| 204 |
+
response = requests.post(endpoint_url, json=payload)
|
| 205 |
+
```
|
| 206 |
+
|
| 207 |
+
**Azure AI Foundry** (requires key):
|
| 208 |
+
```python
|
| 209 |
+
response = requests.post(
|
| 210 |
+
endpoint_url,
|
| 211 |
+
json=payload,
|
| 212 |
+
headers={"Authorization": f"Bearer {api_key}"}
|
| 213 |
+
)
|
| 214 |
+
```
|
| 215 |
+
|
| 216 |
+
### Environment Variables
|
| 217 |
+
|
| 218 |
+
For Azure AI Foundry, you may need to add environment variables:
|
| 219 |
+
|
| 220 |
+
```dockerfile
|
| 221 |
+
# Add to Dockerfile if needed for Azure
|
| 222 |
+
ENV AZURE_AI_FOUNDRY=true
|
| 223 |
+
ENV MLFLOW_TRACKING_URI=<your-mlflow-uri>
|
| 224 |
+
```
|
| 225 |
+
|
| 226 |
+
### Health Check Endpoints
|
| 227 |
+
|
| 228 |
+
Both platforms expect:
|
| 229 |
+
- `GET /health` - Health check
|
| 230 |
+
- `POST /` - Inference endpoint
|
| 231 |
+
|
| 232 |
+
Our current `app.py` already supports both! β
|
| 233 |
+
|
| 234 |
+
---
|
| 235 |
+
|
| 236 |
+
## π§ͺ Testing Both Deployments
|
| 237 |
+
|
| 238 |
+
Create `test_both_platforms.py`:
|
| 239 |
+
|
| 240 |
+
```python
|
| 241 |
+
import requests
|
| 242 |
+
import base64
|
| 243 |
+
|
| 244 |
+
def test_endpoint(name, url, api_key=None):
|
| 245 |
+
"""Test an endpoint"""
|
| 246 |
+
print(f"\n{'='*60}")
|
| 247 |
+
print(f"Testing {name}")
|
| 248 |
+
print(f"{'='*60}")
|
| 249 |
+
|
| 250 |
+
# Health check
|
| 251 |
+
headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
|
| 252 |
+
health = requests.get(f"{url}/health", headers=headers)
|
| 253 |
+
print(f"Health: {health.status_code}")
|
| 254 |
+
|
| 255 |
+
# Inference
|
| 256 |
+
with open("test.jpg", "rb") as f:
|
| 257 |
+
image_b64 = base64.b64encode(f.read()).decode()
|
| 258 |
+
|
| 259 |
+
response = requests.post(
|
| 260 |
+
url,
|
| 261 |
+
json={
|
| 262 |
+
"inputs": image_b64,
|
| 263 |
+
"parameters": {"classes": ["pothole", "asphalt"]}
|
| 264 |
+
},
|
| 265 |
+
headers=headers
|
| 266 |
+
)
|
| 267 |
+
|
| 268 |
+
print(f"Inference: {response.status_code}")
|
| 269 |
+
if response.status_code == 200:
|
| 270 |
+
results = response.json()
|
| 271 |
+
print(f"β
Generated {len(results)} masks")
|
| 272 |
+
else:
|
| 273 |
+
print(f"β Error: {response.text}")
|
| 274 |
+
|
| 275 |
+
# Test HuggingFace
|
| 276 |
+
test_endpoint(
|
| 277 |
+
"HuggingFace",
|
| 278 |
+
"https://yzsj8fy005ix8sje.us-east-1.aws.endpoints.huggingface.cloud"
|
| 279 |
+
)
|
| 280 |
+
|
| 281 |
+
# Test Azure AI Foundry (when deployed)
|
| 282 |
+
# test_endpoint(
|
| 283 |
+
# "Azure AI Foundry",
|
| 284 |
+
# "https://<your-endpoint>.azureml.net/score",
|
| 285 |
+
# api_key="<your-key>"
|
| 286 |
+
# )
|
| 287 |
+
```
|
| 288 |
+
|
| 289 |
+
---
|
| 290 |
+
|
| 291 |
+
## π Deployment Checklist
|
| 292 |
+
|
| 293 |
+
### HuggingFace (Complete) β
|
| 294 |
+
- [x] Azure Container Registry created (`sam3acr4hf`)
|
| 295 |
+
- [x] Docker image built and pushed
|
| 296 |
+
- [x] HuggingFace endpoint created
|
| 297 |
+
- [x] Model validated with test image
|
| 298 |
+
- [x] Documentation complete
|
| 299 |
+
|
| 300 |
+
### Azure AI Foundry (Pending GPU Quota) β³
|
| 301 |
+
- [x] Azure Container Registry exists (`sam3acr`)
|
| 302 |
+
- [ ] GPU quota approved
|
| 303 |
+
- [ ] Azure AI Foundry workspace created
|
| 304 |
+
- [ ] Docker image pushed to `sam3acr`
|
| 305 |
+
- [ ] Endpoint deployed
|
| 306 |
+
- [ ] API key obtained
|
| 307 |
+
- [ ] Endpoint validated
|
| 308 |
+
|
| 309 |
+
---
|
| 310 |
+
|
| 311 |
+
## π Troubleshooting
|
| 312 |
+
|
| 313 |
+
### HuggingFace Issues
|
| 314 |
+
See main README.md troubleshooting section.
|
| 315 |
+
|
| 316 |
+
### Azure AI Foundry Issues
|
| 317 |
+
|
| 318 |
+
**Issue**: GPU quota not available
|
| 319 |
+
- **Solution**: Request quota increase in Azure Portal β Quotas β ML quotas
|
| 320 |
+
|
| 321 |
+
**Issue**: Container registry authentication failed
|
| 322 |
+
```bash
|
| 323 |
+
az acr login --name sam3acr --expose-token
|
| 324 |
+
```
|
| 325 |
+
|
| 326 |
+
**Issue**: Endpoint deployment fails
|
| 327 |
+
- Check Azure Activity Log for detailed error
|
| 328 |
+
- Verify image is accessible: `az acr repository show --name sam3acr --image sam3-foundry:latest`
|
| 329 |
+
|
| 330 |
+
**Issue**: Model loading timeout
|
| 331 |
+
- Increase deployment timeout in Azure ML Studio
|
| 332 |
+
- Consider using smaller instance for testing
|
| 333 |
+
|
| 334 |
+
---
|
| 335 |
+
|
| 336 |
+
## π‘ Best Practices
|
| 337 |
+
|
| 338 |
+
1. **Use same Docker image** for both platforms to ensure consistency
|
| 339 |
+
2. **Tag images with versions** (e.g., `v1.0.0`) for rollback capability
|
| 340 |
+
3. **Test locally first** before pushing to registries
|
| 341 |
+
4. **Monitor costs** on both platforms (HF per-second, Azure per-hour)
|
| 342 |
+
5. **Set up alerts** for endpoint health on both platforms
|
| 343 |
+
6. **Keep API keys secure** (use Azure Key Vault for Azure AI Foundry)
|
| 344 |
+
|
| 345 |
+
---
|
| 346 |
+
|
| 347 |
+
## π Resources
|
| 348 |
+
|
| 349 |
+
### HuggingFace
|
| 350 |
+
- [Inference Endpoints Docs](https://huggingface.co/docs/inference-endpoints)
|
| 351 |
+
- [Custom Docker Images](https://huggingface.co/docs/inference-endpoints/guides/custom_container)
|
| 352 |
+
|
| 353 |
+
### Azure AI Foundry
|
| 354 |
+
- [Azure ML Endpoints](https://learn.microsoft.com/azure/machine-learning/concept-endpoints)
|
| 355 |
+
- [Deploy Custom Containers](https://learn.microsoft.com/azure/machine-learning/how-to-deploy-custom-container)
|
| 356 |
+
- [GPU Quota Requests](https://learn.microsoft.com/azure/machine-learning/how-to-manage-quotas)
|
| 357 |
+
|
| 358 |
+
---
|
| 359 |
+
|
| 360 |
+
**Last Updated**: 2025-11-22
|
| 361 |
+
**Next Step**: Deploy to Azure AI Foundry once GPU quota is approved
|
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:df2aaed0e692a46c60919b999dbc2f9e99a2aa3bda4f355bac442acd1010a07f
|
| 3 |
+
size 4002
|
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1eb699cd5c7231e0ab4c8edcc05e68da9cb929ff4f3a51339efa24fb02351693
|
| 3 |
+
size 3362838680
|
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9519992cb0d55181c42779c1dd001b4adccbb513ff64cd3565cf3710e14476c4
|
| 3 |
+
size 889
|
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a636ae7273ac541b10e50fcedd5a049b610c25473893a70590dcfa105514c16
|
| 3 |
+
size 794
|
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# Deploy SAM3 to both HuggingFace and Azure AI Foundry
|
| 3 |
+
set -e
|
| 4 |
+
|
| 5 |
+
# Colors for output
|
| 6 |
+
GREEN='\033[0;32m'
|
| 7 |
+
BLUE='\033[0;34m'
|
| 8 |
+
YELLOW='\033[1;33m'
|
| 9 |
+
NC='\033[0m' # No Color
|
| 10 |
+
|
| 11 |
+
echo -e "${BLUE}ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ${NC}"
|
| 12 |
+
echo -e "${BLUE}β SAM3 Dual Deployment Script β${NC}"
|
| 13 |
+
echo -e "${BLUE}β HuggingFace + Azure AI Foundry β${NC}"
|
| 14 |
+
echo -e "${BLUE}ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ${NC}"
|
| 15 |
+
echo ""
|
| 16 |
+
|
| 17 |
+
# Configuration
|
| 18 |
+
HF_REGISTRY="sam3acr4hf.azurecr.io"
|
| 19 |
+
HF_IMAGE="sam3-hf:latest"
|
| 20 |
+
AZURE_REGISTRY="sam3acr.azurecr.io"
|
| 21 |
+
AZURE_IMAGE="sam3-foundry:latest"
|
| 22 |
+
|
| 23 |
+
# Parse arguments
|
| 24 |
+
DEPLOY_HF=false
|
| 25 |
+
DEPLOY_AZURE=false
|
| 26 |
+
|
| 27 |
+
while [[ $# -gt 0 ]]; do
|
| 28 |
+
case $1 in
|
| 29 |
+
--hf)
|
| 30 |
+
DEPLOY_HF=true
|
| 31 |
+
shift
|
| 32 |
+
;;
|
| 33 |
+
--azure)
|
| 34 |
+
DEPLOY_AZURE=true
|
| 35 |
+
shift
|
| 36 |
+
;;
|
| 37 |
+
--all)
|
| 38 |
+
DEPLOY_HF=true
|
| 39 |
+
DEPLOY_AZURE=true
|
| 40 |
+
shift
|
| 41 |
+
;;
|
| 42 |
+
--help)
|
| 43 |
+
echo "Usage: ./deploy_all.sh [options]"
|
| 44 |
+
echo ""
|
| 45 |
+
echo "Options:"
|
| 46 |
+
echo " --hf Deploy to HuggingFace only"
|
| 47 |
+
echo " --azure Deploy to Azure AI Foundry only"
|
| 48 |
+
echo " --all Deploy to both platforms"
|
| 49 |
+
echo " --help Show this help message"
|
| 50 |
+
echo ""
|
| 51 |
+
echo "Examples:"
|
| 52 |
+
echo " ./deploy_all.sh --hf # Deploy to HuggingFace"
|
| 53 |
+
echo " ./deploy_all.sh --azure # Deploy to Azure AI Foundry"
|
| 54 |
+
echo " ./deploy_all.sh --all # Deploy to both"
|
| 55 |
+
exit 0
|
| 56 |
+
;;
|
| 57 |
+
*)
|
| 58 |
+
echo "Unknown option: $1"
|
| 59 |
+
echo "Use --help for usage information"
|
| 60 |
+
exit 1
|
| 61 |
+
;;
|
| 62 |
+
esac
|
| 63 |
+
done
|
| 64 |
+
|
| 65 |
+
# Default to HuggingFace if no option specified
|
| 66 |
+
if [ "$DEPLOY_HF" = false ] && [ "$DEPLOY_AZURE" = false ]; then
|
| 67 |
+
echo -e "${YELLOW}No deployment target specified. Defaulting to HuggingFace.${NC}"
|
| 68 |
+
echo -e "${YELLOW}Use --all to deploy to both platforms.${NC}"
|
| 69 |
+
echo ""
|
| 70 |
+
DEPLOY_HF=true
|
| 71 |
+
fi
|
| 72 |
+
|
| 73 |
+
# Step 1: Build Docker image
|
| 74 |
+
echo -e "${BLUE}[1/4] Building Docker image...${NC}"
|
| 75 |
+
docker build -t sam3:latest -f docker/Dockerfile .
|
| 76 |
+
echo -e "${GREEN}β Build complete${NC}"
|
| 77 |
+
echo ""
|
| 78 |
+
|
| 79 |
+
# Step 2: Deploy to HuggingFace
|
| 80 |
+
if [ "$DEPLOY_HF" = true ]; then
|
| 81 |
+
echo -e "${BLUE}[2/4] Deploying to HuggingFace...${NC}"
|
| 82 |
+
|
| 83 |
+
# Tag for HuggingFace
|
| 84 |
+
docker tag sam3:latest ${HF_REGISTRY}/${HF_IMAGE}
|
| 85 |
+
echo " Tagged: ${HF_REGISTRY}/${HF_IMAGE}"
|
| 86 |
+
|
| 87 |
+
# Login to HF ACR
|
| 88 |
+
echo " Logging in to HuggingFace ACR..."
|
| 89 |
+
az acr login --name sam3acr4hf
|
| 90 |
+
|
| 91 |
+
# Push to HF ACR
|
| 92 |
+
echo " Pushing to HuggingFace ACR..."
|
| 93 |
+
docker push ${HF_REGISTRY}/${HF_IMAGE}
|
| 94 |
+
|
| 95 |
+
echo -e "${GREEN}β HuggingFace deployment complete${NC}"
|
| 96 |
+
echo ""
|
| 97 |
+
else
|
| 98 |
+
echo -e "${YELLOW}[2/4] Skipping HuggingFace deployment${NC}"
|
| 99 |
+
echo ""
|
| 100 |
+
fi
|
| 101 |
+
|
| 102 |
+
# Step 3: Deploy to Azure AI Foundry
|
| 103 |
+
if [ "$DEPLOY_AZURE" = true ]; then
|
| 104 |
+
echo -e "${BLUE}[3/4] Deploying to Azure AI Foundry...${NC}"
|
| 105 |
+
|
| 106 |
+
# Tag for Azure
|
| 107 |
+
docker tag sam3:latest ${AZURE_REGISTRY}/${AZURE_IMAGE}
|
| 108 |
+
echo " Tagged: ${AZURE_REGISTRY}/${AZURE_IMAGE}"
|
| 109 |
+
|
| 110 |
+
# Login to Azure ACR
|
| 111 |
+
echo " Logging in to Azure ACR..."
|
| 112 |
+
az acr login --name sam3acr
|
| 113 |
+
|
| 114 |
+
# Push to Azure ACR
|
| 115 |
+
echo " Pushing to Azure ACR..."
|
| 116 |
+
docker push ${AZURE_REGISTRY}/${AZURE_IMAGE}
|
| 117 |
+
|
| 118 |
+
echo -e "${GREEN}β Azure AI Foundry image pushed${NC}"
|
| 119 |
+
echo -e "${YELLOW} β Note: Azure AI Foundry endpoint deployment pending GPU quota${NC}"
|
| 120 |
+
echo -e "${YELLOW} β See DEPLOYMENT.md for endpoint deployment instructions${NC}"
|
| 121 |
+
echo ""
|
| 122 |
+
else
|
| 123 |
+
echo -e "${YELLOW}[3/4] Skipping Azure AI Foundry deployment${NC}"
|
| 124 |
+
echo ""
|
| 125 |
+
fi
|
| 126 |
+
|
| 127 |
+
# Step 4: Summary
|
| 128 |
+
echo -e "${BLUE}[4/4] Deployment Summary${NC}"
|
| 129 |
+
echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
|
| 130 |
+
|
| 131 |
+
if [ "$DEPLOY_HF" = true ]; then
|
| 132 |
+
echo -e "${GREEN}β
HuggingFace:${NC}"
|
| 133 |
+
echo " Registry: ${HF_REGISTRY}"
|
| 134 |
+
echo " Image: ${HF_IMAGE}"
|
| 135 |
+
echo " Endpoint: https://yzsj8fy005ix8sje.us-east-1.aws.endpoints.huggingface.cloud"
|
| 136 |
+
echo ""
|
| 137 |
+
echo " Restart endpoint with:"
|
| 138 |
+
echo " python3 -c 'from huggingface_hub import HfApi; api = HfApi(); e = api.get_inference_endpoint(\"sam3-segmentation\", namespace=\"Logiroad\"); e.pause(); e.resume()'"
|
| 139 |
+
echo ""
|
| 140 |
+
fi
|
| 141 |
+
|
| 142 |
+
if [ "$DEPLOY_AZURE" = true ]; then
|
| 143 |
+
echo -e "${YELLOW}β³ Azure AI Foundry:${NC}"
|
| 144 |
+
echo " Registry: ${AZURE_REGISTRY}"
|
| 145 |
+
echo " Image: ${AZURE_IMAGE}"
|
| 146 |
+
echo " Status: Image ready, endpoint deployment pending GPU quota"
|
| 147 |
+
echo ""
|
| 148 |
+
echo " Once GPU quota is approved, deploy with:"
|
| 149 |
+
echo " az ml online-endpoint create --name sam3-foundry ..."
|
| 150 |
+
echo " See DEPLOYMENT.md for complete instructions"
|
| 151 |
+
echo ""
|
| 152 |
+
fi
|
| 153 |
+
|
| 154 |
+
echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
|
| 155 |
+
echo -e "${GREEN}β Deployment complete!${NC}"
|
| 156 |
+
echo ""
|
| 157 |
+
echo "Test the deployment:"
|
| 158 |
+
echo " python3 scripts/test/test_api.py"
|
| 159 |
+
echo ""
|
| 160 |
+
echo "For more information:"
|
| 161 |
+
echo " cat README.md # HuggingFace usage"
|
| 162 |
+
echo " cat docs/DEPLOYMENT.md # Dual deployment guide"
|
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Quick API test for SAM3 endpoint
|
| 4 |
+
Usage: python test_api.py
|
| 5 |
+
"""
|
| 6 |
+
import requests
|
| 7 |
+
import base64
|
| 8 |
+
import sys
|
| 9 |
+
|
| 10 |
+
ENDPOINT_URL = "https://yzsj8fy005ix8sje.us-east-1.aws.endpoints.huggingface.cloud"
|
| 11 |
+
|
| 12 |
+
def test_health():
|
| 13 |
+
"""Test health endpoint"""
|
| 14 |
+
print("Testing /health endpoint...")
|
| 15 |
+
response = requests.get(f"{ENDPOINT_URL}/health")
|
| 16 |
+
|
| 17 |
+
if response.status_code == 200:
|
| 18 |
+
data = response.json()
|
| 19 |
+
print(f"β
Health check passed")
|
| 20 |
+
print(f" Model: {data['model']}")
|
| 21 |
+
print(f" GPU: {'Available' if data['gpu_available'] else 'Not available'}")
|
| 22 |
+
print(f" VRAM: {data['vram']['free_gb']:.1f}GB free / {data['vram']['total_gb']:.1f}GB total")
|
| 23 |
+
return True
|
| 24 |
+
else:
|
| 25 |
+
print(f"β Health check failed: {response.status_code}")
|
| 26 |
+
return False
|
| 27 |
+
|
| 28 |
+
def test_inference():
|
| 29 |
+
"""Test inference with sample image"""
|
| 30 |
+
print("\nTesting inference endpoint...")
|
| 31 |
+
|
| 32 |
+
# Load test image
|
| 33 |
+
import os
|
| 34 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
| 35 |
+
project_root = os.path.dirname(os.path.dirname(script_dir))
|
| 36 |
+
test_image_path = os.path.join(project_root, "assets", "test_images", "test.jpg")
|
| 37 |
+
|
| 38 |
+
try:
|
| 39 |
+
with open(test_image_path, "rb") as f:
|
| 40 |
+
image_b64 = base64.b64encode(f.read()).decode()
|
| 41 |
+
except FileNotFoundError:
|
| 42 |
+
print(f"β Test image not found at: {test_image_path}")
|
| 43 |
+
return False
|
| 44 |
+
|
| 45 |
+
# Make request
|
| 46 |
+
response = requests.post(
|
| 47 |
+
ENDPOINT_URL,
|
| 48 |
+
json={
|
| 49 |
+
"inputs": image_b64,
|
| 50 |
+
"parameters": {
|
| 51 |
+
"classes": ["pothole", "asphalt"]
|
| 52 |
+
}
|
| 53 |
+
},
|
| 54 |
+
timeout=30
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
if response.status_code == 200:
|
| 58 |
+
results = response.json()
|
| 59 |
+
print(f"β
Inference successful ({response.elapsed.total_seconds():.2f}s)")
|
| 60 |
+
print(f" Generated {len(results)} masks:")
|
| 61 |
+
for result in results:
|
| 62 |
+
mask_size = len(base64.b64decode(result['mask']))
|
| 63 |
+
print(f" - {result['label']}: {mask_size:,} bytes (score: {result['score']:.2f})")
|
| 64 |
+
return True
|
| 65 |
+
else:
|
| 66 |
+
print(f"β Inference failed: {response.status_code}")
|
| 67 |
+
print(f" Response: {response.text}")
|
| 68 |
+
return False
|
| 69 |
+
|
| 70 |
+
def main():
|
| 71 |
+
print("=" * 60)
|
| 72 |
+
print("SAM3 API Test")
|
| 73 |
+
print("=" * 60)
|
| 74 |
+
print(f"Endpoint: {ENDPOINT_URL}\n")
|
| 75 |
+
|
| 76 |
+
health_ok = test_health()
|
| 77 |
+
inference_ok = test_inference()
|
| 78 |
+
|
| 79 |
+
print("\n" + "=" * 60)
|
| 80 |
+
if health_ok and inference_ok:
|
| 81 |
+
print("β
All tests passed!")
|
| 82 |
+
sys.exit(0)
|
| 83 |
+
else:
|
| 84 |
+
print("β Some tests failed")
|
| 85 |
+
sys.exit(1)
|
| 86 |
+
|
| 87 |
+
if __name__ == "__main__":
|
| 88 |
+
main()
|
|
@@ -1,199 +1,202 @@
|
|
| 1 |
"""
|
| 2 |
-
SAM3
|
| 3 |
|
| 4 |
-
|
| 5 |
-
- Large images (1920x1080)
|
| 6 |
-
- A10 GPU (24GB VRAM)
|
| 7 |
-
- Automatic concurrency adjustment based on available VRAM
|
| 8 |
"""
|
| 9 |
import base64
|
| 10 |
import io
|
| 11 |
import asyncio
|
| 12 |
import torch
|
|
|
|
| 13 |
from PIL import Image
|
| 14 |
from fastapi import FastAPI, HTTPException
|
| 15 |
from pydantic import BaseModel
|
| 16 |
-
from transformers import AutoProcessor,
|
| 17 |
-
from collections import deque
|
| 18 |
import logging
|
| 19 |
|
| 20 |
logging.basicConfig(level=logging.INFO)
|
| 21 |
logger = logging.getLogger(__name__)
|
| 22 |
|
| 23 |
-
# Load SAM3 model
|
| 24 |
-
processor = AutoProcessor.from_pretrained("./model")
|
| 25 |
-
model =
|
| 26 |
"./model",
|
| 27 |
-
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
|
|
|
|
| 28 |
)
|
| 29 |
|
| 30 |
model.eval()
|
| 31 |
if torch.cuda.is_available():
|
| 32 |
model.cuda()
|
| 33 |
-
logger.info(f"GPU
|
| 34 |
-
logger.info(f"
|
| 35 |
|
| 36 |
-
|
|
|
|
|
|
|
| 37 |
class VRAMManager:
|
| 38 |
-
"""Dynamically manage concurrency based on available VRAM"""
|
| 39 |
-
|
| 40 |
def __init__(self):
|
| 41 |
-
self.
|
| 42 |
-
self.model_vram_gb = torch.cuda.memory_allocated() / 1e9 if torch.cuda.is_available() else 0
|
| 43 |
-
|
| 44 |
-
# Estimate VRAM per inference for 1920x1080 images with SAM3
|
| 45 |
-
# Conservative estimate: 8-12GB per inference at this resolution
|
| 46 |
-
self.estimated_inference_vram_gb = 10.0
|
| 47 |
-
|
| 48 |
-
# Calculate max concurrent inferences
|
| 49 |
-
available_vram = self.total_vram_gb - self.model_vram_gb - 2.0 # Keep 2GB buffer
|
| 50 |
-
self.max_concurrent = max(1, int(available_vram / self.estimated_inference_vram_gb))
|
| 51 |
-
|
| 52 |
-
self.semaphore = asyncio.Semaphore(self.max_concurrent)
|
| 53 |
-
self.request_queue = deque()
|
| 54 |
self.processing_count = 0
|
| 55 |
-
|
| 56 |
-
logger.info(f"VRAM Manager initialized:")
|
| 57 |
-
logger.info(f" Total VRAM: {self.total_vram_gb:.2f} GB")
|
| 58 |
-
logger.info(f" Model VRAM: {self.model_vram_gb:.2f} GB")
|
| 59 |
-
logger.info(f" Estimated per inference: {self.estimated_inference_vram_gb:.2f} GB")
|
| 60 |
-
logger.info(f" Max concurrent inferences: {self.max_concurrent}")
|
| 61 |
-
|
| 62 |
def get_vram_status(self):
|
| 63 |
-
"""Get current VRAM usage"""
|
| 64 |
if not torch.cuda.is_available():
|
| 65 |
return {}
|
| 66 |
-
|
| 67 |
return {
|
| 68 |
-
"total_gb":
|
| 69 |
"allocated_gb": torch.cuda.memory_allocated() / 1e9,
|
| 70 |
-
"
|
| 71 |
-
"
|
| 72 |
-
"max_concurrent": self.max_concurrent,
|
| 73 |
-
"processing_now": self.processing_count,
|
| 74 |
-
"queued": len(self.request_queue)
|
| 75 |
}
|
| 76 |
-
|
| 77 |
-
async def acquire(self,
|
| 78 |
-
"""Acquire GPU slot with VRAM check"""
|
| 79 |
-
self.request_queue.append(request_id)
|
| 80 |
-
position = len(self.request_queue)
|
| 81 |
-
|
| 82 |
-
logger.info(f"Request {request_id}: Queued at position {position}")
|
| 83 |
-
|
| 84 |
-
# Wait for semaphore slot
|
| 85 |
await self.semaphore.acquire()
|
| 86 |
-
|
| 87 |
-
# Remove from queue and increment processing count
|
| 88 |
-
if request_id in self.request_queue:
|
| 89 |
-
self.request_queue.remove(request_id)
|
| 90 |
self.processing_count += 1
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
vram_status = self.get_vram_status()
|
| 94 |
-
if vram_status.get("free_gb", 0) < 5.0: # Need at least 5GB free
|
| 95 |
-
self.processing_count -= 1
|
| 96 |
-
self.semaphore.release()
|
| 97 |
-
raise HTTPException(
|
| 98 |
-
status_code=503,
|
| 99 |
-
detail=f"Insufficient VRAM: {vram_status.get('free_gb', 0):.2f}GB free, need 5GB+"
|
| 100 |
-
)
|
| 101 |
-
|
| 102 |
-
logger.info(f"Request {request_id}: Processing started (VRAM: {vram_status['free_gb']:.2f}GB free)")
|
| 103 |
-
|
| 104 |
-
def release(self, request_id):
|
| 105 |
-
"""Release GPU slot"""
|
| 106 |
self.processing_count -= 1
|
| 107 |
self.semaphore.release()
|
| 108 |
-
|
| 109 |
-
# Clean up memory
|
| 110 |
if torch.cuda.is_available():
|
| 111 |
torch.cuda.empty_cache()
|
| 112 |
-
|
| 113 |
-
logger.info(f"Request {request_id}: Completed and released")
|
| 114 |
|
| 115 |
-
# Initialize VRAM manager
|
| 116 |
vram_manager = VRAMManager()
|
| 117 |
-
|
| 118 |
-
app = FastAPI(title="SAM3 Inference API")
|
| 119 |
-
|
| 120 |
|
| 121 |
class Request(BaseModel):
|
| 122 |
-
inputs: str
|
| 123 |
-
parameters: dict
|
| 124 |
|
| 125 |
|
| 126 |
def run_inference(image_b64: str, classes: list, request_id: str):
|
| 127 |
"""
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
|
|
|
|
|
|
| 131 |
"""
|
| 132 |
try:
|
| 133 |
# Decode image
|
| 134 |
image_bytes = base64.b64decode(image_b64)
|
| 135 |
pil_image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
#
|
|
|
|
|
|
|
| 140 |
inputs = processor(
|
| 141 |
-
images=
|
| 142 |
-
text=classes,
|
| 143 |
return_tensors="pt"
|
| 144 |
)
|
|
|
|
|
|
|
|
|
|
| 145 |
if torch.cuda.is_available():
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
with torch.no_grad():
|
|
|
|
| 150 |
outputs = model(**inputs)
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
results = []
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
# Convert to PNG
|
| 160 |
pil_mask = Image.fromarray(binary_mask, mode="L")
|
| 161 |
buf = io.BytesIO()
|
| 162 |
pil_mask.save(buf, format="PNG")
|
| 163 |
mask_b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
|
| 164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
results.append({
|
| 166 |
"label": cls,
|
| 167 |
"mask": mask_b64,
|
| 168 |
-
"score":
|
| 169 |
})
|
| 170 |
-
|
| 171 |
-
logger.info(f"
|
| 172 |
return results
|
| 173 |
-
|
| 174 |
except Exception as e:
|
| 175 |
-
logger.error(f"
|
|
|
|
|
|
|
| 176 |
raise
|
| 177 |
|
| 178 |
|
| 179 |
@app.post("/")
|
| 180 |
async def predict(req: Request):
|
| 181 |
-
|
| 182 |
-
Predict segmentation masks for given classes
|
| 183 |
-
|
| 184 |
-
Expected performance for 1920x1080 images:
|
| 185 |
-
- Processing time: 5-10 seconds
|
| 186 |
-
- VRAM usage: 8-12GB per inference
|
| 187 |
-
- Concurrent capacity: 1-2 inferences on A10 24GB GPU
|
| 188 |
-
"""
|
| 189 |
-
request_id = str(id(req))
|
| 190 |
-
|
| 191 |
try:
|
| 192 |
-
# Acquire GPU slot (with VRAM check)
|
| 193 |
await vram_manager.acquire(request_id)
|
| 194 |
-
|
| 195 |
try:
|
| 196 |
-
# Run inference in thread pool (non-blocking)
|
| 197 |
results = await asyncio.to_thread(
|
| 198 |
run_inference,
|
| 199 |
req.inputs,
|
|
@@ -201,46 +204,28 @@ async def predict(req: Request):
|
|
| 201 |
request_id
|
| 202 |
)
|
| 203 |
return results
|
| 204 |
-
|
| 205 |
finally:
|
| 206 |
-
# Always release GPU slot
|
| 207 |
vram_manager.release(request_id)
|
| 208 |
-
|
| 209 |
-
except HTTPException:
|
| 210 |
-
raise
|
| 211 |
except Exception as e:
|
| 212 |
-
logger.error(f"
|
| 213 |
raise HTTPException(status_code=500, detail=str(e))
|
| 214 |
|
| 215 |
|
| 216 |
@app.get("/health")
|
| 217 |
async def health():
|
| 218 |
-
"""Health check endpoint"""
|
| 219 |
-
vram_status = vram_manager.get_vram_status()
|
| 220 |
-
|
| 221 |
return {
|
| 222 |
"status": "healthy",
|
|
|
|
| 223 |
"gpu_available": torch.cuda.is_available(),
|
| 224 |
-
"vram":
|
| 225 |
}
|
| 226 |
|
| 227 |
|
| 228 |
@app.get("/metrics")
|
| 229 |
async def metrics():
|
| 230 |
-
"""Detailed metrics endpoint"""
|
| 231 |
return vram_manager.get_vram_status()
|
| 232 |
|
| 233 |
|
| 234 |
if __name__ == "__main__":
|
| 235 |
import uvicorn
|
| 236 |
-
|
| 237 |
-
# Configuration for large images (1920x1080) on A10 GPU
|
| 238 |
-
uvicorn.run(
|
| 239 |
-
app,
|
| 240 |
-
host="0.0.0.0",
|
| 241 |
-
port=7860,
|
| 242 |
-
workers=1, # Single worker for single GPU
|
| 243 |
-
limit_concurrency=50, # Queue up to 50 requests
|
| 244 |
-
timeout_keep_alive=300, # 5 min keepalive for long inferences
|
| 245 |
-
log_level="info"
|
| 246 |
-
)
|
|
|
|
| 1 |
"""
|
| 2 |
+
SAM3 Static Image Segmentation - Correct Implementation
|
| 3 |
|
| 4 |
+
Uses Sam3Model (not Sam3VideoModel) for text-prompted static image segmentation.
|
|
|
|
|
|
|
|
|
|
| 5 |
"""
|
| 6 |
import base64
|
| 7 |
import io
|
| 8 |
import asyncio
|
| 9 |
import torch
|
| 10 |
+
import numpy as np
|
| 11 |
from PIL import Image
|
| 12 |
from fastapi import FastAPI, HTTPException
|
| 13 |
from pydantic import BaseModel
|
| 14 |
+
from transformers import AutoProcessor, AutoModel
|
|
|
|
| 15 |
import logging
|
| 16 |
|
| 17 |
logging.basicConfig(level=logging.INFO)
|
| 18 |
logger = logging.getLogger(__name__)
|
| 19 |
|
| 20 |
+
# Load SAM3 model for STATIC IMAGES
|
| 21 |
+
processor = AutoProcessor.from_pretrained("./model", trust_remote_code=True)
|
| 22 |
+
model = AutoModel.from_pretrained(
|
| 23 |
"./model",
|
| 24 |
+
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
| 25 |
+
trust_remote_code=True
|
| 26 |
)
|
| 27 |
|
| 28 |
model.eval()
|
| 29 |
if torch.cuda.is_available():
|
| 30 |
model.cuda()
|
| 31 |
+
logger.info(f"GPU: {torch.cuda.get_device_name()}")
|
| 32 |
+
logger.info(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
|
| 33 |
|
| 34 |
+
logger.info(f"β Loaded {model.__class__.__name__} for static image segmentation")
|
| 35 |
+
|
| 36 |
+
# Simple concurrency control
|
| 37 |
class VRAMManager:
|
|
|
|
|
|
|
| 38 |
def __init__(self):
|
| 39 |
+
self.semaphore = asyncio.Semaphore(2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
self.processing_count = 0
|
| 41 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
def get_vram_status(self):
|
|
|
|
| 43 |
if not torch.cuda.is_available():
|
| 44 |
return {}
|
|
|
|
| 45 |
return {
|
| 46 |
+
"total_gb": torch.cuda.get_device_properties(0).total_memory / 1e9,
|
| 47 |
"allocated_gb": torch.cuda.memory_allocated() / 1e9,
|
| 48 |
+
"free_gb": (torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_reserved()) / 1e9,
|
| 49 |
+
"processing_now": self.processing_count
|
|
|
|
|
|
|
|
|
|
| 50 |
}
|
| 51 |
+
|
| 52 |
+
async def acquire(self, rid):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
await self.semaphore.acquire()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
self.processing_count += 1
|
| 55 |
+
|
| 56 |
+
def release(self, rid):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
self.processing_count -= 1
|
| 58 |
self.semaphore.release()
|
|
|
|
|
|
|
| 59 |
if torch.cuda.is_available():
|
| 60 |
torch.cuda.empty_cache()
|
|
|
|
|
|
|
| 61 |
|
|
|
|
| 62 |
vram_manager = VRAMManager()
|
| 63 |
+
app = FastAPI(title="SAM3 Static Image API")
|
|
|
|
|
|
|
| 64 |
|
| 65 |
class Request(BaseModel):
|
| 66 |
+
inputs: str
|
| 67 |
+
parameters: dict
|
| 68 |
|
| 69 |
|
| 70 |
def run_inference(image_b64: str, classes: list, request_id: str):
|
| 71 |
"""
|
| 72 |
+
Sam3Model inference for static images with text prompts
|
| 73 |
+
|
| 74 |
+
According to HuggingFace docs, Sam3Model uses:
|
| 75 |
+
- processor(images=image, text=text_prompts)
|
| 76 |
+
- model.forward(pixel_values, input_ids, ...)
|
| 77 |
"""
|
| 78 |
try:
|
| 79 |
# Decode image
|
| 80 |
image_bytes = base64.b64decode(image_b64)
|
| 81 |
pil_image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
|
| 82 |
+
logger.info(f"[{request_id}] Image: {pil_image.size}, Classes: {classes}")
|
| 83 |
+
|
| 84 |
+
# Process with Sam3Processor
|
| 85 |
+
# Sam3Model expects: batch of images matching text prompts
|
| 86 |
+
# For multiple objects in ONE image, repeat the image for each class
|
| 87 |
+
images_batch = [pil_image] * len(classes)
|
| 88 |
inputs = processor(
|
| 89 |
+
images=images_batch, # Repeat image for each text prompt
|
| 90 |
+
text=classes, # List of text prompts
|
| 91 |
return_tensors="pt"
|
| 92 |
)
|
| 93 |
+
logger.info(f"[{request_id}] Processing {len(classes)} classes with batched images")
|
| 94 |
+
|
| 95 |
+
# Move to GPU and match model dtype
|
| 96 |
if torch.cuda.is_available():
|
| 97 |
+
model_dtype = next(model.parameters()).dtype
|
| 98 |
+
inputs = {
|
| 99 |
+
k: v.cuda().to(model_dtype) if isinstance(v, torch.Tensor) and v.dtype.is_floating_point else v.cuda() if isinstance(v, torch.Tensor) else v
|
| 100 |
+
for k, v in inputs.items()
|
| 101 |
+
}
|
| 102 |
+
logger.info(f"[{request_id}] Moved inputs to GPU (float tensors to {model_dtype})")
|
| 103 |
+
|
| 104 |
+
logger.info(f"[{request_id}] Input keys: {list(inputs.keys())}")
|
| 105 |
+
|
| 106 |
+
# Sam3Model Inference
|
| 107 |
with torch.no_grad():
|
| 108 |
+
# Sam3Model.forward() accepts pixel_values, input_ids, etc.
|
| 109 |
outputs = model(**inputs)
|
| 110 |
+
logger.info(f"[{request_id}] Forward pass successful!")
|
| 111 |
+
|
| 112 |
+
logger.info(f"[{request_id}] Output type: {type(outputs)}")
|
| 113 |
+
logger.info(f"[{request_id}] Output attributes: {dir(outputs)}")
|
| 114 |
+
|
| 115 |
+
# Extract masks from outputs
|
| 116 |
+
# Sam3Model returns masks in outputs.pred_masks
|
| 117 |
+
if hasattr(outputs, 'pred_masks'):
|
| 118 |
+
pred_masks = outputs.pred_masks
|
| 119 |
+
logger.info(f"[{request_id}] pred_masks shape: {pred_masks.shape}")
|
| 120 |
+
elif hasattr(outputs, 'masks'):
|
| 121 |
+
pred_masks = outputs.masks
|
| 122 |
+
logger.info(f"[{request_id}] masks shape: {pred_masks.shape}")
|
| 123 |
+
elif isinstance(outputs, dict) and 'pred_masks' in outputs:
|
| 124 |
+
pred_masks = outputs['pred_masks']
|
| 125 |
+
logger.info(f"[{request_id}] pred_masks shape: {pred_masks.shape}")
|
| 126 |
+
else:
|
| 127 |
+
logger.error(f"[{request_id}] Unexpected output format")
|
| 128 |
+
logger.error(f"Output attributes: {dir(outputs) if not isinstance(outputs, dict) else outputs.keys()}")
|
| 129 |
+
raise ValueError("Cannot find masks in model output")
|
| 130 |
+
|
| 131 |
+
# Process masks
|
| 132 |
results = []
|
| 133 |
+
|
| 134 |
+
# pred_masks typically: [batch, num_objects, height, width]
|
| 135 |
+
batch_size = pred_masks.shape[0]
|
| 136 |
+
num_masks = pred_masks.shape[1] if len(pred_masks.shape) > 1 else 1
|
| 137 |
+
|
| 138 |
+
logger.info(f"[{request_id}] Batch size: {batch_size}, Num masks: {num_masks}")
|
| 139 |
+
|
| 140 |
+
for i, cls in enumerate(classes):
|
| 141 |
+
if i < num_masks:
|
| 142 |
+
# Get mask for this class/object
|
| 143 |
+
if len(pred_masks.shape) == 4: # [batch, num, h, w]
|
| 144 |
+
mask_tensor = pred_masks[0, i] # [h, w]
|
| 145 |
+
elif len(pred_masks.shape) == 3: # [num, h, w]
|
| 146 |
+
mask_tensor = pred_masks[i]
|
| 147 |
+
else:
|
| 148 |
+
mask_tensor = pred_masks
|
| 149 |
+
|
| 150 |
+
# Resize to original size if needed
|
| 151 |
+
if mask_tensor.shape[-2:] != pil_image.size[::-1]:
|
| 152 |
+
mask_tensor = torch.nn.functional.interpolate(
|
| 153 |
+
mask_tensor.unsqueeze(0).unsqueeze(0),
|
| 154 |
+
size=pil_image.size[::-1],
|
| 155 |
+
mode='bilinear',
|
| 156 |
+
align_corners=False
|
| 157 |
+
).squeeze()
|
| 158 |
+
|
| 159 |
+
# Convert to binary mask
|
| 160 |
+
binary_mask = (mask_tensor > 0.0).float().cpu().numpy().astype("uint8") * 255
|
| 161 |
+
else:
|
| 162 |
+
# No mask available for this class
|
| 163 |
+
binary_mask = np.zeros(pil_image.size[::-1], dtype="uint8")
|
| 164 |
+
|
| 165 |
# Convert to PNG
|
| 166 |
pil_mask = Image.fromarray(binary_mask, mode="L")
|
| 167 |
buf = io.BytesIO()
|
| 168 |
pil_mask.save(buf, format="PNG")
|
| 169 |
mask_b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
|
| 170 |
+
|
| 171 |
+
# Get confidence score if available
|
| 172 |
+
score = 1.0
|
| 173 |
+
if hasattr(outputs, 'pred_scores') and i < outputs.pred_scores.shape[1]:
|
| 174 |
+
score = float(outputs.pred_scores[0, i].cpu())
|
| 175 |
+
elif hasattr(outputs, 'scores') and i < len(outputs.scores):
|
| 176 |
+
score = float(outputs.scores[i].cpu() if hasattr(outputs.scores[i], 'cpu') else outputs.scores[i])
|
| 177 |
+
|
| 178 |
results.append({
|
| 179 |
"label": cls,
|
| 180 |
"mask": mask_b64,
|
| 181 |
+
"score": score
|
| 182 |
})
|
| 183 |
+
|
| 184 |
+
logger.info(f"[{request_id}] Completed: {len(results)} masks generated")
|
| 185 |
return results
|
| 186 |
+
|
| 187 |
except Exception as e:
|
| 188 |
+
logger.error(f"[{request_id}] Failed: {str(e)}")
|
| 189 |
+
import traceback
|
| 190 |
+
traceback.print_exc()
|
| 191 |
raise
|
| 192 |
|
| 193 |
|
| 194 |
@app.post("/")
|
| 195 |
async def predict(req: Request):
|
| 196 |
+
request_id = str(id(req))[:8]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
try:
|
|
|
|
| 198 |
await vram_manager.acquire(request_id)
|
|
|
|
| 199 |
try:
|
|
|
|
| 200 |
results = await asyncio.to_thread(
|
| 201 |
run_inference,
|
| 202 |
req.inputs,
|
|
|
|
| 204 |
request_id
|
| 205 |
)
|
| 206 |
return results
|
|
|
|
| 207 |
finally:
|
|
|
|
| 208 |
vram_manager.release(request_id)
|
|
|
|
|
|
|
|
|
|
| 209 |
except Exception as e:
|
| 210 |
+
logger.error(f"[{request_id}] Error: {str(e)}")
|
| 211 |
raise HTTPException(status_code=500, detail=str(e))
|
| 212 |
|
| 213 |
|
| 214 |
@app.get("/health")
|
| 215 |
async def health():
|
|
|
|
|
|
|
|
|
|
| 216 |
return {
|
| 217 |
"status": "healthy",
|
| 218 |
+
"model": model.__class__.__name__,
|
| 219 |
"gpu_available": torch.cuda.is_available(),
|
| 220 |
+
"vram": vram_manager.get_vram_status()
|
| 221 |
}
|
| 222 |
|
| 223 |
|
| 224 |
@app.get("/metrics")
|
| 225 |
async def metrics():
|
|
|
|
| 226 |
return vram_manager.get_vram_status()
|
| 227 |
|
| 228 |
|
| 229 |
if __name__ == "__main__":
|
| 230 |
import uvicorn
|
| 231 |
+
uvicorn.run(app, host="0.0.0.0", port=7860, workers=1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -1,21 +0,0 @@
|
|
| 1 |
-
import requests
|
| 2 |
-
import base64
|
| 3 |
-
|
| 4 |
-
ENDPOINT = "https://YOUR-ENDPOINT"
|
| 5 |
-
TOKEN = "hf_xxx"
|
| 6 |
-
|
| 7 |
-
with open("test.jpg", "rb") as f:
|
| 8 |
-
img = base64.b64encode(f.read()).decode("utf-8")
|
| 9 |
-
|
| 10 |
-
payload = {
|
| 11 |
-
"inputs": img,
|
| 12 |
-
"parameters": {"classes": ["pothole", "marking"]}
|
| 13 |
-
}
|
| 14 |
-
|
| 15 |
-
r = requests.post(
|
| 16 |
-
ENDPOINT,
|
| 17 |
-
headers={"Authorization": f"Bearer {TOKEN}"},
|
| 18 |
-
json=payload
|
| 19 |
-
)
|
| 20 |
-
|
| 21 |
-
print(r.json())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|