jide commited on
Commit
06e4a45
·
verified ·
1 Parent(s): fd2e359

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. Dockerfile +34 -0
  2. README.md +47 -0
  3. app.py +60 -0
  4. handler.py +109 -0
  5. requirements.txt +10 -0
Dockerfile ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04
2
+
3
+ ENV PYTHONUNBUFFERED=1
4
+ ENV DEBIAN_FRONTEND=noninteractive
5
+
6
+ RUN apt-get update && apt-get install -y \
7
+ python3.11 \
8
+ python3.11-venv \
9
+ python3-pip \
10
+ git \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+ RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1
14
+ RUN update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1
15
+
16
+ WORKDIR /app
17
+
18
+ # Upgrade pip to fix resolver bugs
19
+ RUN pip install --upgrade pip
20
+
21
+ # Install stable packages FIRST (these should never fail)
22
+ RUN pip install --no-cache-dir torch pillow sentencepiece protobuf fastapi uvicorn
23
+
24
+ # Install HuggingFace packages from git (might be slow/flaky)
25
+ RUN pip install --no-cache-dir git+https://github.com/huggingface/peft.git
26
+ RUN pip install --no-cache-dir git+https://github.com/huggingface/transformers.git
27
+ RUN pip install --no-cache-dir git+https://github.com/huggingface/accelerate.git
28
+ RUN pip install --no-cache-dir git+https://github.com/huggingface/diffusers.git
29
+
30
+ COPY app.py .
31
+ COPY handler.py .
32
+
33
+ EXPOSE 8080
34
+ CMD ["python", "app.py"]
README.md ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Qwen Image Layered - Docker Deployment for HF Inference Endpoints
2
+
3
+ This directory contains a custom Docker-based deployment for `QwenImageLayeredPipeline`.
4
+
5
+ ## Files
6
+ - `Dockerfile`: Custom container with all bleeding-edge dependencies.
7
+ - `app.py`: FastAPI server (HF-compatible API format).
8
+ - `handler.py`: Model loading and inference logic.
9
+ - `requirements.txt`: Python dependencies (all from git main).
10
+
11
+ ## Deployment Steps
12
+
13
+ ### 1. Build and Push Docker Image
14
+ ```bash
15
+ # Login to Docker Hub (or another registry)
16
+ docker login
17
+
18
+ # Build the image
19
+ docker build -t yourusername/qwen-layered:latest .
20
+
21
+ # Push to registry
22
+ docker push yourusername/qwen-layered:latest
23
+ ```
24
+
25
+ ### 2. Create HF Inference Endpoint
26
+ 1. Go to [HF Inference Endpoints](https://ui.endpoints.huggingface.co/)
27
+ 2. Click **New Endpoint**
28
+ 3. Select **Custom Container**
29
+ 4. Enter your Docker image URL: `docker.io/yourusername/qwen-layered:latest`
30
+ 5. Select GPU (A10G or better, 24GB+ VRAM)
31
+ 6. Deploy
32
+
33
+ ### 3. Usage
34
+ ```bash
35
+ curl https://your-endpoint.endpoints.huggingface.cloud \
36
+ -X POST \
37
+ -d '{"inputs": {"prompt": "A cute cat"}}' \
38
+ -H "Authorization: Bearer hf_..." \
39
+ -H "Content-Type: application/json"
40
+ ```
41
+
42
+ ## Local Testing
43
+ ```bash
44
+ docker build -t qwen-test .
45
+ docker run --gpus all -p 8080:8080 qwen-test
46
+ # Then: curl http://localhost:8080/health
47
+ ```
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ FastAPI server for Qwen Image Layered model.
3
+ Compatible with Hugging Face Inference Endpoints custom container format.
4
+ """
5
+ from fastapi import FastAPI, HTTPException
6
+ from pydantic import BaseModel
7
+ from typing import Optional, List, Dict, Any
8
+ import uvicorn
9
+ import base64
10
+ import io
11
+ from PIL import Image
12
+
13
+ # Import our handler
14
+ from handler import EndpointHandler
15
+
16
+ app = FastAPI()
17
+
18
+ # Initialize handler on startup
19
+ handler = None
20
+
21
+ @app.on_event("startup")
22
+ async def startup_event():
23
+ global handler
24
+ print("Initializing model...")
25
+ handler = EndpointHandler()
26
+ print("Model ready!")
27
+
28
+ class InferenceRequest(BaseModel):
29
+ inputs: Dict[str, Any]
30
+ parameters: Optional[Dict[str, Any]] = None
31
+
32
+ class HealthResponse(BaseModel):
33
+ status: str
34
+
35
+ @app.get("/health")
36
+ async def health() -> HealthResponse:
37
+ return HealthResponse(status="ok")
38
+
39
+ @app.get("/")
40
+ async def root():
41
+ return {"status": "Qwen Image Layered Endpoint Ready"}
42
+
43
+ @app.post("/")
44
+ async def predict(request: InferenceRequest):
45
+ if handler is None:
46
+ raise HTTPException(status_code=503, detail="Model not loaded")
47
+
48
+ data = {
49
+ "inputs": request.inputs,
50
+ "parameters": request.parameters or {}
51
+ }
52
+
53
+ try:
54
+ result = handler(data)
55
+ return result
56
+ except Exception as e:
57
+ raise HTTPException(status_code=500, detail=str(e))
58
+
59
+ if __name__ == "__main__":
60
+ uvicorn.run(app, host="0.0.0.0", port=8080)
handler.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Handler for QwenImageLayeredPipeline.
3
+ Decomposes an input RGBA image into semantic layers (foreground, background, objects, etc.)
4
+ """
5
+ from typing import Dict, List, Any
6
+ import torch
7
+ import base64
8
+ import io
9
+ from PIL import Image
10
+
11
+ # Try to import the specific pipeline class
12
+ try:
13
+ from diffusers import QwenImageLayeredPipeline
14
+ except ImportError:
15
+ from diffusers import DiffusionPipeline
16
+ QwenImageLayeredPipeline = None
17
+
18
+ class EndpointHandler:
19
+ def __init__(self, path=""):
20
+ # The correct model for layered decomposition
21
+ model_id = "Qwen/Qwen-Image-Layered"
22
+
23
+ print(f"Loading model {model_id}...")
24
+
25
+ if QwenImageLayeredPipeline:
26
+ print("Using explicit QwenImageLayeredPipeline class.")
27
+ self.pipeline = QwenImageLayeredPipeline.from_pretrained(
28
+ model_id,
29
+ torch_dtype=torch.bfloat16,
30
+ )
31
+ else:
32
+ print("Falling back to DiffusionPipeline auto-load.")
33
+ self.pipeline = DiffusionPipeline.from_pretrained(
34
+ model_id,
35
+ trust_remote_code=True,
36
+ torch_dtype=torch.bfloat16,
37
+ )
38
+ print(f"Loaded pipeline class: {type(self.pipeline).__name__}")
39
+
40
+ if torch.cuda.is_available():
41
+ self.pipeline.to("cuda")
42
+
43
+ print("Model ready!")
44
+
45
+ def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
46
+ """
47
+ Expects:
48
+ inputs.image: base64-encoded RGBA image
49
+ parameters.layers: number of layers to decompose into (default: 4)
50
+ parameters.num_inference_steps: inference steps (default: 50)
51
+ parameters.resolution: output resolution (default: 640)
52
+
53
+ Returns:
54
+ List of base64-encoded layer images
55
+ """
56
+ inputs = data.pop("inputs", data)
57
+ parameters = data.pop("parameters", {})
58
+
59
+ # Parse the input image
60
+ image_data = inputs.get("image")
61
+ if not image_data:
62
+ raise ValueError("Missing 'image' in inputs. Please provide a base64-encoded RGBA image.")
63
+
64
+ try:
65
+ image_bytes = base64.b64decode(image_data)
66
+ image = Image.open(io.BytesIO(image_bytes)).convert("RGBA")
67
+ except Exception as e:
68
+ raise ValueError(f"Failed to decode image: {e}")
69
+
70
+ # Get parameters with defaults
71
+ layers = parameters.get("layers", 4)
72
+ num_inference_steps = parameters.get("num_inference_steps", 50)
73
+ resolution = parameters.get("resolution", 640)
74
+ prompt = parameters.get("prompt", "") # Usually empty for decomposition
75
+
76
+ print(f"Decomposing image into {layers} layers at resolution {resolution}...")
77
+
78
+ # Run the pipeline
79
+ with torch.autocast("cuda"):
80
+ output = self.pipeline(
81
+ image,
82
+ prompt,
83
+ num_inference_steps=num_inference_steps,
84
+ layers=layers,
85
+ resolution=resolution,
86
+ true_cfg_scale=4.0,
87
+ cfg_normalize=False,
88
+ use_en_prompt=True,
89
+ )
90
+
91
+ # Serialize output layers
92
+ images_response = []
93
+
94
+ if hasattr(output, "images") and output.images:
95
+ # output.images is a list of lists (per batch), we take the first batch
96
+ layer_images = output.images[0] if isinstance(output.images[0], list) else output.images
97
+
98
+ for i, layer_img in enumerate(layer_images):
99
+ if isinstance(layer_img, Image.Image):
100
+ buffered = io.BytesIO()
101
+ layer_img.save(buffered, format="PNG")
102
+ img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
103
+ images_response.append({
104
+ "layer_index": i,
105
+ "image": img_str
106
+ })
107
+
108
+ print(f"Returned {len(images_response)} layers.")
109
+ return images_response
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ pillow
3
+ sentencepiece
4
+ protobuf
5
+ fastapi
6
+ uvicorn
7
+ git+https://github.com/huggingface/peft.git
8
+ git+https://github.com/huggingface/diffusers.git
9
+ git+https://github.com/huggingface/transformers.git
10
+ git+https://github.com/huggingface/accelerate.git