Spaces:

sidmaz666
/

imgenapi

Paused

App Files Files Community

sidmaz666 commited on Apr 19

Commit

49a8215

verified ·

1 Parent(s): 42ec191

Update app.py

Browse files

Files changed (1) hide show

app.py +89 -141

app.py CHANGED Viewed

@@ -16,15 +16,13 @@ from fastapi.responses import FileResponse
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, Field
-# Diffusers imports
 from diffusers import (
-    StableDiffusionPipeline,
     StableDiffusionImg2ImgPipeline,
     StableDiffusionControlNetPipeline,
     ControlNetModel,
     LCMScheduler,
 )
-from optimum.intel import OVStableDiffusionPipeline
 from transformers import CLIPTokenizer
 # Configure logging
@@ -32,7 +30,7 @@ logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 # --- Configuration ---
-MODEL_PATH = os.environ.get("OV_MODEL_PATH", "/app/models/LCM-dreamshaper-v7-openvino")
 CONTROLNET_ID = "lllyasviel/sd-controlnet-canny"
 OUTPUT_DIR = Path("/tmp/outputs")
 LORA_CACHE_DIR = Path("/app/models/loras")
@@ -42,56 +40,47 @@ LORA_CACHE_DIR.mkdir(parents=True, exist_ok=True)
 # --- Pydantic Models for API Documentation ---
 class GenerationResponse(BaseModel):
-    """Response after successful image generation."""
     status: Literal["success"] = "success"
     message: str = "Image generated successfully"
-    image_base64: Optional[str] = Field(None, description="Base64 encoded image (optional)")
-    image_url: Optional[str] = Field(None, description="Relative URL to download the image")
-    seed: int = Field(..., description="The seed used for generation (for reproducibility)")
-    parameters: dict = Field(..., description="All parameters used for this generation")
 class ErrorResponse(BaseModel):
-    """Standard error response."""
     status: Literal["error"] = "error"
     message: str
     detail: Optional[str] = None
 class LoRAInfo(BaseModel):
-    """Information about a LoRA."""
-    id: str = Field(..., description="Hugging Face repository ID")
-    name: str = Field(..., description="Display name")
-    description: str = Field(..., description="What the LoRA does")
-    suggested_strength: float = Field(..., description="Recommended strength")
-    keywords: List[str] = Field(..., description="Suggested prompt keywords")
 # --- FastAPI App Setup ---
 app = FastAPI(
     title="LCM Dreamshaper v7 Image Generation API",
     description="""
-    ## Fast, CPU-optimized image generation using LCM Dreamshaper v7 (OpenVINO)
     This API provides:
-    - **`/generate`** - Text-to-image generation (OpenVINO optimized, ~20-30s)
-    - **`/img2img`** - Image-to-image transformation (OpenVINO optimized)
-    - **`/controlnet`** - Generate with structural guidance using ControlNet (PyTorch fallback)
     - **`/loras`** - List available style LoRAs
     ### Model Information
-    - **Base Model**: `rupeshs/LCM-dreamshaper-v7-openvino` (1B parameters, 4-step LCM)
-    - **Inference Engine**: OpenVINO for basic generation; PyTorch for LoRAs/ControlNet
-    - **Average Generation Time**: 20-40 seconds on CPU (16GB RAM) for txt2img/img2img
-    - **Recommended Steps**: 4 (optimized for LCM)
     ### Usage Notes
-    - For LoRA requests, the system falls back to PyTorch (slightly slower but functional).
     - All image dimensions must be multiples of 8.
     - Seed is returned with every response; reuse it to reproduce the same image.
     """,
     version="1.0.0",
-    contact={
-        "name": "Your Name",
-        "url": "https://huggingface.co/your-space",
-    },
 )
 app.add_middleware(
@@ -102,13 +91,11 @@ app.add_middleware(
 )
 # --- Global Variables for Models ---
-ov_pipeline = None          # OpenVINO pipeline (fast, no LoRA)
-torch_txt2img = None        # PyTorch txt2img pipeline (for LoRA)
-torch_img2img = None        # PyTorch img2img pipeline (for LoRA)
-controlnet_pipeline = None  # PyTorch ControlNet pipeline
 tokenizer = None
-# --- Available LoRAs (Pre-defined) ---
 AVAILABLE_LORAS = [
     {
         "id": "prithiviraj1710/pixel-art",
@@ -163,7 +150,6 @@ AVAILABLE_LORAS = [
 # --- Helper Functions ---
 def download_lora_sync(lora_id: str) -> Path:
-    """Download a LoRA from Hugging Face if not already cached (synchronous)."""
     lora_path = LORA_CACHE_DIR / f"{lora_id.replace('/', '_')}.safetensors"
     if lora_path.exists():
         return lora_path
@@ -176,7 +162,6 @@ def download_lora_sync(lora_id: str) -> Path:
             filename="pytorch_lora_weights.safetensors",
             cache_dir=str(LORA_CACHE_DIR)
         )
-        # Create a symlink to our expected path for easy future access
         if not lora_path.exists():
             os.symlink(downloaded_path, lora_path)
         return lora_path
@@ -185,7 +170,6 @@ def download_lora_sync(lora_id: str) -> Path:
         raise HTTPException(status_code=400, detail=f"LoRA {lora_id} not found or invalid")
 def apply_loras_to_pipe(pipe, lora_ids: str, lora_scales: Optional[str] = None) -> list:
-    """Apply LoRAs to a PyTorch pipeline and return list of applied LoRAs."""
     lora_list = []
     if not lora_ids:
         return lora_list
@@ -199,7 +183,6 @@ def apply_loras_to_pipe(pipe, lora_ids: str, lora_scales: Optional[str] = None)
     if len(lora_ids_list) != len(scales_list):
         raise HTTPException(status_code=400, detail="Number of LoRA IDs must match number of scales")
-    # Download and load each LoRA synchronously
     for lora_id, scale in zip(lora_ids_list, scales_list):
         lora_path = download_lora_sync(lora_id)
         pipe.load_lora_weights(str(lora_path))
@@ -208,74 +191,58 @@ def apply_loras_to_pipe(pipe, lora_ids: str, lora_scales: Optional[str] = None)
     return lora_list
-def load_ov_pipeline():
-    """Load the OpenVINO-optimized LCM pipeline from local path."""
-    global ov_pipeline, tokenizer
-    if ov_pipeline is None:
-        logger.info(f"Loading OpenVINO pipeline from {MODEL_PATH}...")
-        ov_pipeline = OVStableDiffusionPipeline.from_pretrained(
-            MODEL_PATH,
-            ov_config={"PERFORMANCE_HINT": "LATENCY", "NUM_STREAMS": "1", "INFERENCE_NUM_THREADS": "4"},
-            compile=False
-        )
-        ov_pipeline.reshape(batch_size=1, height=512, width=512, num_images_per_prompt=1)
-        ov_pipeline.compile()
-        tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
-        logger.info("OpenVINO pipeline loaded and compiled.")
-    return ov_pipeline
-def load_torch_pipelines():
-    """Load PyTorch pipelines for LoRA support."""
-    global torch_txt2img, torch_img2img
-    if torch_txt2img is None:
-        logger.info("Loading PyTorch pipelines (for LoRA support)...")
-        # Use the original Dreamshaper v7 model
-        model_id = "Lykon/dreamshaper-7"
-        pipe = StableDiffusionPipeline.from_pretrained(
-            model_id,
             torch_dtype=torch.float32,
             safety_checker=None
         )
-        pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
-        pipe.to("cpu")
-        torch_txt2img = pipe
-        # Create img2img pipeline from same components
-        torch_img2img = StableDiffusionImg2ImgPipeline(
-            vae=pipe.vae,
-            text_encoder=pipe.text_encoder,
-            tokenizer=pipe.tokenizer,
-            unet=pipe.unet,
-            scheduler=pipe.scheduler,
             safety_checker=None,
             feature_extractor=None,
         )
-        torch_img2img.to("cpu")
-        logger.info("PyTorch pipelines loaded.")
-    return torch_txt2img, torch_img2img
 def load_controlnet_pipeline():
-    """Load the ControlNet pipeline (PyTorch)."""
-    global controlnet_pipeline
-    if controlnet_pipeline is None:
         logger.info("Loading ControlNet pipeline...")
         controlnet = ControlNetModel.from_pretrained(
             CONTROLNET_ID,
             torch_dtype=torch.float32
         )
-        controlnet_pipeline = StableDiffusionControlNetPipeline.from_pretrained(
-            "Lykon/dreamshaper-7",
             controlnet=controlnet,
             torch_dtype=torch.float32,
             safety_checker=None
         )
-        controlnet_pipeline.scheduler = LCMScheduler.from_config(controlnet_pipeline.scheduler.config)
-        controlnet_pipeline.to("cpu")
         logger.info("ControlNet pipeline loaded.")
-    return controlnet_pipeline
 def apply_canny_edge(image: Image.Image, low_threshold: int = 100, high_threshold: int = 200) -> Image.Image:
-    """Apply Canny edge detection to an image."""
     image_np = np.array(image)
     image_np = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
     edges = cv2.Canny(image_np, low_threshold, high_threshold)
@@ -283,7 +250,6 @@ def apply_canny_edge(image: Image.Image, low_threshold: int = 100, high_threshol
     return Image.fromarray(edges)
 async def save_upload_file(upload_file: UploadFile) -> Path:
-    """Save an uploaded file to a temporary location."""
     temp_dir = Path("/tmp/uploads")
     temp_dir.mkdir(exist_ok=True)
     file_path = temp_dir / f"{uuid.uuid4()}_{upload_file.filename}"
@@ -293,13 +259,11 @@ async def save_upload_file(upload_file: UploadFile) -> Path:
     return file_path
 def image_to_base64(image: Image.Image, format: str = "PNG") -> str:
-    """Convert a PIL Image to a base64 string."""
     buffered = BytesIO()
     image.save(buffered, format=format)
     return base64.b64encode(buffered.getvalue()).decode("utf-8")
 def cleanup_temp_files(*paths: Path):
-    """Remove temporary files."""
     for path in paths:
         try:
             if path.exists():
@@ -354,26 +318,11 @@ async def text_to_image(
     generator = torch.Generator(device="cpu").manual_seed(seed)
     lora_list = []
-    # Choose pipeline based on LoRA presence
     if lora_ids:
-        # Use PyTorch pipeline with LoRA support
-        pipe_txt2img, _ = load_torch_pipelines()
-        lora_list = apply_loras_to_pipe(pipe_txt2img, lora_ids, lora_scales)
-        try:
-            image = pipe_txt2img(
-                prompt=prompt,
-                negative_prompt=negative_prompt,
-                num_inference_steps=steps,
-                guidance_scale=guidance_scale,
-                generator=generator,
-                height=height,
-                width=width
-            ).images[0]
-        finally:
-            pipe_txt2img.unfuse_lora()
-    else:
-        # Use fast OpenVINO pipeline
-        pipe = load_ov_pipeline()
         image = pipe(
             prompt=prompt,
             negative_prompt=negative_prompt,
@@ -383,6 +332,9 @@ async def text_to_image(
             height=height,
             width=width
         ).images[0]
     output_filename = f"txt2img_{uuid.uuid4()}.png"
     output_path = OUTPUT_DIR / output_filename
@@ -452,33 +404,24 @@ async def image_to_image(
     generator = torch.Generator(device="cpu").manual_seed(seed)
     lora_list = []
     try:
-        if lora_ids:
-            _, pipe_img2img = load_torch_pipelines()
-            lora_list = apply_loras_to_pipe(pipe_img2img, lora_ids, lora_scales)
-            output_image = pipe_img2img(
-                prompt=prompt,
-                image=init_image,
-                strength=strength,
-                negative_prompt=negative_prompt,
-                num_inference_steps=steps,
-                guidance_scale=guidance_scale,
-                generator=generator
-            ).images[0]
-            pipe_img2img.unfuse_lora()
-        else:
-            pipe = load_ov_pipeline()
-            output_image = pipe(
-                prompt=prompt,
-                image=init_image,
-                strength=strength,
-                negative_prompt=negative_prompt,
-                num_inference_steps=steps,
-                guidance_scale=guidance_scale,
-                generator=generator
-            ).images[0]
     finally:
         cleanup_temp_files(input_path)
     output_filename = f"img2img_{uuid.uuid4()}.png"
     output_path = OUTPUT_DIR / output_filename
@@ -617,20 +560,25 @@ async def get_image(filename: str):
     summary="Health check endpoint"
 )
 async def health_check():
-    model_index_exists = (Path(MODEL_PATH) / "model_index.json").exists()
     return {
-        "status": "healthy" if model_index_exists else "degraded",
-        "base_model_loaded": model_index_exists,
         "available_loras": len(AVAILABLE_LORAS)
     }
 @app.on_event("startup")
 async def startup_event():
-    """Pre-load OpenVINO model to reduce first-request latency."""
-    logger.info("Starting up, pre-loading OpenVINO model...")
     try:
-        load_ov_pipeline()
-        logger.info("OpenVINO pipeline ready.")
     except Exception as e:
-        logger.error(f"Failed to load OpenVINO pipeline: {e}")
-    # PyTorch pipelines are loaded on-demand to save memory

 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, Field
 from diffusers import (
+    DiffusionPipeline,
     StableDiffusionImg2ImgPipeline,
     StableDiffusionControlNetPipeline,
     ControlNetModel,
     LCMScheduler,
 )
 from transformers import CLIPTokenizer
 # Configure logging
 logger = logging.getLogger(__name__)
 # --- Configuration ---
+MODEL_ID = "SimianLuo/LCM_Dreamshaper_v7"
 CONTROLNET_ID = "lllyasviel/sd-controlnet-canny"
 OUTPUT_DIR = Path("/tmp/outputs")
 LORA_CACHE_DIR = Path("/app/models/loras")
 # --- Pydantic Models for API Documentation ---
 class GenerationResponse(BaseModel):
     status: Literal["success"] = "success"
     message: str = "Image generated successfully"
+    image_base64: Optional[str] = Field(None)
+    image_url: Optional[str] = Field(None)
+    seed: int = Field(...)
+    parameters: dict = Field(...)
 class ErrorResponse(BaseModel):
     status: Literal["error"] = "error"
     message: str
     detail: Optional[str] = None
 class LoRAInfo(BaseModel):
+    id: str
+    name: str
+    description: str
+    suggested_strength: float
+    keywords: List[str]
 # --- FastAPI App Setup ---
 app = FastAPI(
     title="LCM Dreamshaper v7 Image Generation API",
     description="""
+    ## Fast, CPU-optimized image generation using LCM Dreamshaper v7
     This API provides:
+    - **`/generate`** - Text-to-image generation (~20-30s on CPU)
+    - **`/img2img`** - Image-to-image transformation
+    - **`/controlnet`** - Generate with structural guidance using ControlNet
     - **`/loras`** - List available style LoRAs
     ### Model Information
+    - **Base Model**: `SimianLuo/LCM_Dreamshaper_v7` (1B parameters, 4-step LCM)
+    - **Inference Engine**: PyTorch with LCM Scheduler
+    - **Average Generation Time**: 20-40 seconds on CPU (16GB RAM)
     ### Usage Notes
     - All image dimensions must be multiples of 8.
     - Seed is returned with every response; reuse it to reproduce the same image.
     """,
     version="1.0.0",
 )
 app.add_middleware(
 )
 # --- Global Variables for Models ---
+txt2img_pipe = None
+img2img_pipe = None
+controlnet_pipe = None
 tokenizer = None
 AVAILABLE_LORAS = [
     {
         "id": "prithiviraj1710/pixel-art",
 # --- Helper Functions ---
 def download_lora_sync(lora_id: str) -> Path:
     lora_path = LORA_CACHE_DIR / f"{lora_id.replace('/', '_')}.safetensors"
     if lora_path.exists():
         return lora_path
             filename="pytorch_lora_weights.safetensors",
             cache_dir=str(LORA_CACHE_DIR)
         )
         if not lora_path.exists():
             os.symlink(downloaded_path, lora_path)
         return lora_path
         raise HTTPException(status_code=400, detail=f"LoRA {lora_id} not found or invalid")
 def apply_loras_to_pipe(pipe, lora_ids: str, lora_scales: Optional[str] = None) -> list:
     lora_list = []
     if not lora_ids:
         return lora_list
     if len(lora_ids_list) != len(scales_list):
         raise HTTPException(status_code=400, detail="Number of LoRA IDs must match number of scales")
     for lora_id, scale in zip(lora_ids_list, scales_list):
         lora_path = download_lora_sync(lora_id)
         pipe.load_lora_weights(str(lora_path))
     return lora_list
+def load_txt2img_pipeline():
+    global txt2img_pipe, tokenizer
+    if txt2img_pipe is None:
+        logger.info(f"Loading text-to-image pipeline from {MODEL_ID}...")
+        txt2img_pipe = DiffusionPipeline.from_pretrained(
+            MODEL_ID,
             torch_dtype=torch.float32,
             safety_checker=None
         )
+        txt2img_pipe.to("cpu")
+        tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
+        logger.info("Text-to-image pipeline loaded.")
+    return txt2img_pipe
+def load_img2img_pipeline():
+    global img2img_pipe
+    if img2img_pipe is None:
+        logger.info("Loading image-to-image pipeline...")
+        txt2img = load_txt2img_pipeline()
+        img2img_pipe = StableDiffusionImg2ImgPipeline(
+            vae=txt2img.vae,
+            text_encoder=txt2img.text_encoder,
+            tokenizer=txt2img.tokenizer,
+            unet=txt2img.unet,
+            scheduler=txt2img.scheduler,
             safety_checker=None,
             feature_extractor=None,
         )
+        img2img_pipe.to("cpu")
+        logger.info("Image-to-image pipeline loaded.")
+    return img2img_pipe
 def load_controlnet_pipeline():
+    global controlnet_pipe
+    if controlnet_pipe is None:
         logger.info("Loading ControlNet pipeline...")
         controlnet = ControlNetModel.from_pretrained(
             CONTROLNET_ID,
             torch_dtype=torch.float32
         )
+        controlnet_pipe = StableDiffusionControlNetPipeline.from_pretrained(
+            MODEL_ID,
             controlnet=controlnet,
             torch_dtype=torch.float32,
             safety_checker=None
         )
+        controlnet_pipe.scheduler = LCMScheduler.from_config(controlnet_pipe.scheduler.config)
+        controlnet_pipe.to("cpu")
         logger.info("ControlNet pipeline loaded.")
+    return controlnet_pipe
 def apply_canny_edge(image: Image.Image, low_threshold: int = 100, high_threshold: int = 200) -> Image.Image:
     image_np = np.array(image)
     image_np = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
     edges = cv2.Canny(image_np, low_threshold, high_threshold)
     return Image.fromarray(edges)
 async def save_upload_file(upload_file: UploadFile) -> Path:
     temp_dir = Path("/tmp/uploads")
     temp_dir.mkdir(exist_ok=True)
     file_path = temp_dir / f"{uuid.uuid4()}_{upload_file.filename}"
     return file_path
 def image_to_base64(image: Image.Image, format: str = "PNG") -> str:
     buffered = BytesIO()
     image.save(buffered, format=format)
     return base64.b64encode(buffered.getvalue()).decode("utf-8")
 def cleanup_temp_files(*paths: Path):
     for path in paths:
         try:
             if path.exists():
     generator = torch.Generator(device="cpu").manual_seed(seed)
     lora_list = []
+    pipe = load_txt2img_pipeline()
     if lora_ids:
+        lora_list = apply_loras_to_pipe(pipe, lora_ids, lora_scales)
+    try:
         image = pipe(
             prompt=prompt,
             negative_prompt=negative_prompt,
             height=height,
             width=width
         ).images[0]
+    finally:
+        if lora_ids:
+            pipe.unfuse_lora()
     output_filename = f"txt2img_{uuid.uuid4()}.png"
     output_path = OUTPUT_DIR / output_filename
     generator = torch.Generator(device="cpu").manual_seed(seed)
     lora_list = []
+    pipe = load_img2img_pipeline()
+    if lora_ids:
+        lora_list = apply_loras_to_pipe(pipe, lora_ids, lora_scales)
     try:
+        output_image = pipe(
+            prompt=prompt,
+            image=init_image,
+            strength=strength,
+            negative_prompt=negative_prompt,
+            num_inference_steps=steps,
+            guidance_scale=guidance_scale,
+            generator=generator
+        ).images[0]
     finally:
         cleanup_temp_files(input_path)
+        if lora_ids:
+            pipe.unfuse_lora()
     output_filename = f"img2img_{uuid.uuid4()}.png"
     output_path = OUTPUT_DIR / output_filename
     summary="Health check endpoint"
 )
 async def health_check():
+    try:
+        _ = load_txt2img_pipeline()
+        status = "healthy"
+        model_loaded = True
+    except:
+        status = "degraded"
+        model_loaded = False
     return {
+        "status": status,
+        "base_model_loaded": model_loaded,
         "available_loras": len(AVAILABLE_LORAS)
     }
 @app.on_event("startup")
 async def startup_event():
+    logger.info("Starting up, pre-loading text-to-image model...")
     try:
+        load_txt2img_pipeline()
+        logger.info("Text-to-image model is ready.")
     except Exception as e:
+        logger.error(f"Failed to pre-load model: {e}")