Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

Dockerfile +34 -0
README.md +47 -0
app.py +60 -0
handler.py +109 -0
requirements.txt +10 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,34 @@

+FROM nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04
+ENV PYTHONUNBUFFERED=1
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && apt-get install -y \
+    python3.11 \
+    python3.11-venv \
+    python3-pip \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1
+RUN update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1
+WORKDIR /app
+# Upgrade pip to fix resolver bugs
+RUN pip install --upgrade pip
+# Install stable packages FIRST (these should never fail)
+RUN pip install --no-cache-dir torch pillow sentencepiece protobuf fastapi uvicorn
+# Install HuggingFace packages from git (might be slow/flaky)
+RUN pip install --no-cache-dir git+https://github.com/huggingface/peft.git
+RUN pip install --no-cache-dir git+https://github.com/huggingface/transformers.git
+RUN pip install --no-cache-dir git+https://github.com/huggingface/accelerate.git
+RUN pip install --no-cache-dir git+https://github.com/huggingface/diffusers.git
+COPY app.py .
+COPY handler.py .
+EXPOSE 8080
+CMD ["python", "app.py"]

README.md ADDED Viewed

	@@ -0,0 +1,47 @@

+# Qwen Image Layered - Docker Deployment for HF Inference Endpoints
+This directory contains a custom Docker-based deployment for `QwenImageLayeredPipeline`.
+## Files
+- `Dockerfile`: Custom container with all bleeding-edge dependencies.
+- `app.py`: FastAPI server (HF-compatible API format).
+- `handler.py`: Model loading and inference logic.
+- `requirements.txt`: Python dependencies (all from git main).
+## Deployment Steps
+### 1. Build and Push Docker Image
+```bash
+# Login to Docker Hub (or another registry)
+docker login
+# Build the image
+docker build -t yourusername/qwen-layered:latest .
+# Push to registry
+docker push yourusername/qwen-layered:latest
+```
+### 2. Create HF Inference Endpoint
+1. Go to [HF Inference Endpoints](https://ui.endpoints.huggingface.co/)
+2. Click **New Endpoint**
+3. Select **Custom Container**
+4. Enter your Docker image URL: `docker.io/yourusername/qwen-layered:latest`
+5. Select GPU (A10G or better, 24GB+ VRAM)
+6. Deploy
+### 3. Usage
+```bash
+curl https://your-endpoint.endpoints.huggingface.cloud \
+  -X POST \
+  -d '{"inputs": {"prompt": "A cute cat"}}' \
+  -H "Authorization: Bearer hf_..." \
+  -H "Content-Type: application/json"
+```
+## Local Testing
+```bash
+docker build -t qwen-test .
+docker run --gpus all -p 8080:8080 qwen-test
+# Then: curl http://localhost:8080/health
+```

app.py ADDED Viewed

	@@ -0,0 +1,60 @@

+"""
+FastAPI server for Qwen Image Layered model.
+Compatible with Hugging Face Inference Endpoints custom container format.
+"""
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from typing import Optional, List, Dict, Any
+import uvicorn
+import base64
+import io
+from PIL import Image
+# Import our handler
+from handler import EndpointHandler
+app = FastAPI()
+# Initialize handler on startup
+handler = None
+@app.on_event("startup")
+async def startup_event():
+    global handler
+    print("Initializing model...")
+    handler = EndpointHandler()
+    print("Model ready!")
+class InferenceRequest(BaseModel):
+    inputs: Dict[str, Any]
+    parameters: Optional[Dict[str, Any]] = None
+class HealthResponse(BaseModel):
+    status: str
+@app.get("/health")
+async def health() -> HealthResponse:
+    return HealthResponse(status="ok")
+@app.get("/")
+async def root():
+    return {"status": "Qwen Image Layered Endpoint Ready"}
+@app.post("/")
+async def predict(request: InferenceRequest):
+    if handler is None:
+        raise HTTPException(status_code=503, detail="Model not loaded")
+    data = {
+        "inputs": request.inputs,
+        "parameters": request.parameters or {}
+    }
+    try:
+        result = handler(data)
+        return result
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8080)

handler.py ADDED Viewed

	@@ -0,0 +1,109 @@

+"""
+Handler for QwenImageLayeredPipeline.
+Decomposes an input RGBA image into semantic layers (foreground, background, objects, etc.)
+"""
+from typing import Dict, List, Any
+import torch
+import base64
+import io
+from PIL import Image
+# Try to import the specific pipeline class
+try:
+    from diffusers import QwenImageLayeredPipeline
+except ImportError:
+    from diffusers import DiffusionPipeline
+    QwenImageLayeredPipeline = None
+class EndpointHandler:
+    def __init__(self, path=""):
+        # The correct model for layered decomposition
+        model_id = "Qwen/Qwen-Image-Layered"
+        print(f"Loading model {model_id}...")
+        if QwenImageLayeredPipeline:
+            print("Using explicit QwenImageLayeredPipeline class.")
+            self.pipeline = QwenImageLayeredPipeline.from_pretrained(
+                model_id,
+                torch_dtype=torch.bfloat16,
+            )
+        else:
+            print("Falling back to DiffusionPipeline auto-load.")
+            self.pipeline = DiffusionPipeline.from_pretrained(
+                model_id,
+                trust_remote_code=True,
+                torch_dtype=torch.bfloat16,
+            )
+            print(f"Loaded pipeline class: {type(self.pipeline).__name__}")
+        if torch.cuda.is_available():
+            self.pipeline.to("cuda")
+        print("Model ready!")
+    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """
+        Expects:
+            inputs.image: base64-encoded RGBA image
+            parameters.layers: number of layers to decompose into (default: 4)
+            parameters.num_inference_steps: inference steps (default: 50)
+            parameters.resolution: output resolution (default: 640)
+        Returns:
+            List of base64-encoded layer images
+        """
+        inputs = data.pop("inputs", data)
+        parameters = data.pop("parameters", {})
+        # Parse the input image
+        image_data = inputs.get("image")
+        if not image_data:
+            raise ValueError("Missing 'image' in inputs. Please provide a base64-encoded RGBA image.")
+        try:
+            image_bytes = base64.b64decode(image_data)
+            image = Image.open(io.BytesIO(image_bytes)).convert("RGBA")
+        except Exception as e:
+            raise ValueError(f"Failed to decode image: {e}")
+        # Get parameters with defaults
+        layers = parameters.get("layers", 4)
+        num_inference_steps = parameters.get("num_inference_steps", 50)
+        resolution = parameters.get("resolution", 640)
+        prompt = parameters.get("prompt", "")  # Usually empty for decomposition
+        print(f"Decomposing image into {layers} layers at resolution {resolution}...")
+        # Run the pipeline
+        with torch.autocast("cuda"):
+            output = self.pipeline(
+                image,
+                prompt,
+                num_inference_steps=num_inference_steps,
+                layers=layers,
+                resolution=resolution,
+                true_cfg_scale=4.0,
+                cfg_normalize=False,
+                use_en_prompt=True,
+            )
+        # Serialize output layers
+        images_response = []
+        if hasattr(output, "images") and output.images:
+            # output.images is a list of lists (per batch), we take the first batch
+            layer_images = output.images[0] if isinstance(output.images[0], list) else output.images
+            for i, layer_img in enumerate(layer_images):
+                if isinstance(layer_img, Image.Image):
+                    buffered = io.BytesIO()
+                    layer_img.save(buffered, format="PNG")
+                    img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
+                    images_response.append({
+                        "layer_index": i,
+                        "image": img_str
+                    })
+        print(f"Returned {len(images_response)} layers.")
+        return images_response

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+torch
+pillow
+sentencepiece
+protobuf
+fastapi
+uvicorn
+git+https://github.com/huggingface/peft.git
+git+https://github.com/huggingface/diffusers.git
+git+https://github.com/huggingface/transformers.git
+git+https://github.com/huggingface/accelerate.git