Upload folder using huggingface_hub
Browse files- Dockerfile +34 -0
- README.md +47 -0
- app.py +60 -0
- handler.py +109 -0
- requirements.txt +10 -0
Dockerfile
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04
|
| 2 |
+
|
| 3 |
+
ENV PYTHONUNBUFFERED=1
|
| 4 |
+
ENV DEBIAN_FRONTEND=noninteractive
|
| 5 |
+
|
| 6 |
+
RUN apt-get update && apt-get install -y \
|
| 7 |
+
python3.11 \
|
| 8 |
+
python3.11-venv \
|
| 9 |
+
python3-pip \
|
| 10 |
+
git \
|
| 11 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 12 |
+
|
| 13 |
+
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1
|
| 14 |
+
RUN update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1
|
| 15 |
+
|
| 16 |
+
WORKDIR /app
|
| 17 |
+
|
| 18 |
+
# Upgrade pip to fix resolver bugs
|
| 19 |
+
RUN pip install --upgrade pip
|
| 20 |
+
|
| 21 |
+
# Install stable packages FIRST (these should never fail)
|
| 22 |
+
RUN pip install --no-cache-dir torch pillow sentencepiece protobuf fastapi uvicorn
|
| 23 |
+
|
| 24 |
+
# Install HuggingFace packages from git (might be slow/flaky)
|
| 25 |
+
RUN pip install --no-cache-dir git+https://github.com/huggingface/peft.git
|
| 26 |
+
RUN pip install --no-cache-dir git+https://github.com/huggingface/transformers.git
|
| 27 |
+
RUN pip install --no-cache-dir git+https://github.com/huggingface/accelerate.git
|
| 28 |
+
RUN pip install --no-cache-dir git+https://github.com/huggingface/diffusers.git
|
| 29 |
+
|
| 30 |
+
COPY app.py .
|
| 31 |
+
COPY handler.py .
|
| 32 |
+
|
| 33 |
+
EXPOSE 8080
|
| 34 |
+
CMD ["python", "app.py"]
|
README.md
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Qwen Image Layered - Docker Deployment for HF Inference Endpoints
|
| 2 |
+
|
| 3 |
+
This directory contains a custom Docker-based deployment for `QwenImageLayeredPipeline`.
|
| 4 |
+
|
| 5 |
+
## Files
|
| 6 |
+
- `Dockerfile`: Custom container with all bleeding-edge dependencies.
|
| 7 |
+
- `app.py`: FastAPI server (HF-compatible API format).
|
| 8 |
+
- `handler.py`: Model loading and inference logic.
|
| 9 |
+
- `requirements.txt`: Python dependencies (all from git main).
|
| 10 |
+
|
| 11 |
+
## Deployment Steps
|
| 12 |
+
|
| 13 |
+
### 1. Build and Push Docker Image
|
| 14 |
+
```bash
|
| 15 |
+
# Login to Docker Hub (or another registry)
|
| 16 |
+
docker login
|
| 17 |
+
|
| 18 |
+
# Build the image
|
| 19 |
+
docker build -t yourusername/qwen-layered:latest .
|
| 20 |
+
|
| 21 |
+
# Push to registry
|
| 22 |
+
docker push yourusername/qwen-layered:latest
|
| 23 |
+
```
|
| 24 |
+
|
| 25 |
+
### 2. Create HF Inference Endpoint
|
| 26 |
+
1. Go to [HF Inference Endpoints](https://ui.endpoints.huggingface.co/)
|
| 27 |
+
2. Click **New Endpoint**
|
| 28 |
+
3. Select **Custom Container**
|
| 29 |
+
4. Enter your Docker image URL: `docker.io/yourusername/qwen-layered:latest`
|
| 30 |
+
5. Select GPU (A10G or better, 24GB+ VRAM)
|
| 31 |
+
6. Deploy
|
| 32 |
+
|
| 33 |
+
### 3. Usage
|
| 34 |
+
```bash
|
| 35 |
+
curl https://your-endpoint.endpoints.huggingface.cloud \
|
| 36 |
+
-X POST \
|
| 37 |
+
-d '{"inputs": {"prompt": "A cute cat"}}' \
|
| 38 |
+
-H "Authorization: Bearer hf_..." \
|
| 39 |
+
-H "Content-Type: application/json"
|
| 40 |
+
```
|
| 41 |
+
|
| 42 |
+
## Local Testing
|
| 43 |
+
```bash
|
| 44 |
+
docker build -t qwen-test .
|
| 45 |
+
docker run --gpus all -p 8080:8080 qwen-test
|
| 46 |
+
# Then: curl http://localhost:8080/health
|
| 47 |
+
```
|
app.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
FastAPI server for Qwen Image Layered model.
|
| 3 |
+
Compatible with Hugging Face Inference Endpoints custom container format.
|
| 4 |
+
"""
|
| 5 |
+
from fastapi import FastAPI, HTTPException
|
| 6 |
+
from pydantic import BaseModel
|
| 7 |
+
from typing import Optional, List, Dict, Any
|
| 8 |
+
import uvicorn
|
| 9 |
+
import base64
|
| 10 |
+
import io
|
| 11 |
+
from PIL import Image
|
| 12 |
+
|
| 13 |
+
# Import our handler
|
| 14 |
+
from handler import EndpointHandler
|
| 15 |
+
|
| 16 |
+
app = FastAPI()
|
| 17 |
+
|
| 18 |
+
# Initialize handler on startup
|
| 19 |
+
handler = None
|
| 20 |
+
|
| 21 |
+
@app.on_event("startup")
|
| 22 |
+
async def startup_event():
|
| 23 |
+
global handler
|
| 24 |
+
print("Initializing model...")
|
| 25 |
+
handler = EndpointHandler()
|
| 26 |
+
print("Model ready!")
|
| 27 |
+
|
| 28 |
+
class InferenceRequest(BaseModel):
|
| 29 |
+
inputs: Dict[str, Any]
|
| 30 |
+
parameters: Optional[Dict[str, Any]] = None
|
| 31 |
+
|
| 32 |
+
class HealthResponse(BaseModel):
|
| 33 |
+
status: str
|
| 34 |
+
|
| 35 |
+
@app.get("/health")
|
| 36 |
+
async def health() -> HealthResponse:
|
| 37 |
+
return HealthResponse(status="ok")
|
| 38 |
+
|
| 39 |
+
@app.get("/")
|
| 40 |
+
async def root():
|
| 41 |
+
return {"status": "Qwen Image Layered Endpoint Ready"}
|
| 42 |
+
|
| 43 |
+
@app.post("/")
|
| 44 |
+
async def predict(request: InferenceRequest):
|
| 45 |
+
if handler is None:
|
| 46 |
+
raise HTTPException(status_code=503, detail="Model not loaded")
|
| 47 |
+
|
| 48 |
+
data = {
|
| 49 |
+
"inputs": request.inputs,
|
| 50 |
+
"parameters": request.parameters or {}
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
try:
|
| 54 |
+
result = handler(data)
|
| 55 |
+
return result
|
| 56 |
+
except Exception as e:
|
| 57 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 58 |
+
|
| 59 |
+
if __name__ == "__main__":
|
| 60 |
+
uvicorn.run(app, host="0.0.0.0", port=8080)
|
handler.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Handler for QwenImageLayeredPipeline.
|
| 3 |
+
Decomposes an input RGBA image into semantic layers (foreground, background, objects, etc.)
|
| 4 |
+
"""
|
| 5 |
+
from typing import Dict, List, Any
|
| 6 |
+
import torch
|
| 7 |
+
import base64
|
| 8 |
+
import io
|
| 9 |
+
from PIL import Image
|
| 10 |
+
|
| 11 |
+
# Try to import the specific pipeline class
|
| 12 |
+
try:
|
| 13 |
+
from diffusers import QwenImageLayeredPipeline
|
| 14 |
+
except ImportError:
|
| 15 |
+
from diffusers import DiffusionPipeline
|
| 16 |
+
QwenImageLayeredPipeline = None
|
| 17 |
+
|
| 18 |
+
class EndpointHandler:
|
| 19 |
+
def __init__(self, path=""):
|
| 20 |
+
# The correct model for layered decomposition
|
| 21 |
+
model_id = "Qwen/Qwen-Image-Layered"
|
| 22 |
+
|
| 23 |
+
print(f"Loading model {model_id}...")
|
| 24 |
+
|
| 25 |
+
if QwenImageLayeredPipeline:
|
| 26 |
+
print("Using explicit QwenImageLayeredPipeline class.")
|
| 27 |
+
self.pipeline = QwenImageLayeredPipeline.from_pretrained(
|
| 28 |
+
model_id,
|
| 29 |
+
torch_dtype=torch.bfloat16,
|
| 30 |
+
)
|
| 31 |
+
else:
|
| 32 |
+
print("Falling back to DiffusionPipeline auto-load.")
|
| 33 |
+
self.pipeline = DiffusionPipeline.from_pretrained(
|
| 34 |
+
model_id,
|
| 35 |
+
trust_remote_code=True,
|
| 36 |
+
torch_dtype=torch.bfloat16,
|
| 37 |
+
)
|
| 38 |
+
print(f"Loaded pipeline class: {type(self.pipeline).__name__}")
|
| 39 |
+
|
| 40 |
+
if torch.cuda.is_available():
|
| 41 |
+
self.pipeline.to("cuda")
|
| 42 |
+
|
| 43 |
+
print("Model ready!")
|
| 44 |
+
|
| 45 |
+
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
| 46 |
+
"""
|
| 47 |
+
Expects:
|
| 48 |
+
inputs.image: base64-encoded RGBA image
|
| 49 |
+
parameters.layers: number of layers to decompose into (default: 4)
|
| 50 |
+
parameters.num_inference_steps: inference steps (default: 50)
|
| 51 |
+
parameters.resolution: output resolution (default: 640)
|
| 52 |
+
|
| 53 |
+
Returns:
|
| 54 |
+
List of base64-encoded layer images
|
| 55 |
+
"""
|
| 56 |
+
inputs = data.pop("inputs", data)
|
| 57 |
+
parameters = data.pop("parameters", {})
|
| 58 |
+
|
| 59 |
+
# Parse the input image
|
| 60 |
+
image_data = inputs.get("image")
|
| 61 |
+
if not image_data:
|
| 62 |
+
raise ValueError("Missing 'image' in inputs. Please provide a base64-encoded RGBA image.")
|
| 63 |
+
|
| 64 |
+
try:
|
| 65 |
+
image_bytes = base64.b64decode(image_data)
|
| 66 |
+
image = Image.open(io.BytesIO(image_bytes)).convert("RGBA")
|
| 67 |
+
except Exception as e:
|
| 68 |
+
raise ValueError(f"Failed to decode image: {e}")
|
| 69 |
+
|
| 70 |
+
# Get parameters with defaults
|
| 71 |
+
layers = parameters.get("layers", 4)
|
| 72 |
+
num_inference_steps = parameters.get("num_inference_steps", 50)
|
| 73 |
+
resolution = parameters.get("resolution", 640)
|
| 74 |
+
prompt = parameters.get("prompt", "") # Usually empty for decomposition
|
| 75 |
+
|
| 76 |
+
print(f"Decomposing image into {layers} layers at resolution {resolution}...")
|
| 77 |
+
|
| 78 |
+
# Run the pipeline
|
| 79 |
+
with torch.autocast("cuda"):
|
| 80 |
+
output = self.pipeline(
|
| 81 |
+
image,
|
| 82 |
+
prompt,
|
| 83 |
+
num_inference_steps=num_inference_steps,
|
| 84 |
+
layers=layers,
|
| 85 |
+
resolution=resolution,
|
| 86 |
+
true_cfg_scale=4.0,
|
| 87 |
+
cfg_normalize=False,
|
| 88 |
+
use_en_prompt=True,
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
# Serialize output layers
|
| 92 |
+
images_response = []
|
| 93 |
+
|
| 94 |
+
if hasattr(output, "images") and output.images:
|
| 95 |
+
# output.images is a list of lists (per batch), we take the first batch
|
| 96 |
+
layer_images = output.images[0] if isinstance(output.images[0], list) else output.images
|
| 97 |
+
|
| 98 |
+
for i, layer_img in enumerate(layer_images):
|
| 99 |
+
if isinstance(layer_img, Image.Image):
|
| 100 |
+
buffered = io.BytesIO()
|
| 101 |
+
layer_img.save(buffered, format="PNG")
|
| 102 |
+
img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
|
| 103 |
+
images_response.append({
|
| 104 |
+
"layer_index": i,
|
| 105 |
+
"image": img_str
|
| 106 |
+
})
|
| 107 |
+
|
| 108 |
+
print(f"Returned {len(images_response)} layers.")
|
| 109 |
+
return images_response
|
requirements.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
torch
|
| 2 |
+
pillow
|
| 3 |
+
sentencepiece
|
| 4 |
+
protobuf
|
| 5 |
+
fastapi
|
| 6 |
+
uvicorn
|
| 7 |
+
git+https://github.com/huggingface/peft.git
|
| 8 |
+
git+https://github.com/huggingface/diffusers.git
|
| 9 |
+
git+https://github.com/huggingface/transformers.git
|
| 10 |
+
git+https://github.com/huggingface/accelerate.git
|