Spaces:

rastof9
/

MagicFaceTG

Build error

App Files Files Community

rastof9 commited on Aug 9, 2025

Commit

17b152f

verified ·

1 Parent(s): 05756f6

Update generate.py

Browse files

Files changed (1) hide show

generate.py +54 -105

generate.py CHANGED Viewed

@@ -4,42 +4,25 @@ import torch
 import cv2
 import os
 import logging
 from diffusers import StableDiffusionPipeline, DDIMScheduler, AutoencoderKL
 from transformers import CLIPVisionModelWithProjection
 from insightface.app import FaceAnalysis
 from insightface.utils import face_align
 from huggingface_hub import hf_hub_download
 # --- Setup Logging ---
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
-# --- IP-Adapter FaceID Model (Adapted for our service) ---
-# We are integrating the core logic from the IP-Adapter library directly
-# to avoid having to install the entire library and its specific dependencies.
-class IPAdapterFaceIDPlus:
-    def __init__(self, pipe, image_encoder_path, ip_ckpt, device):
-        self.device = device
-        self.pipe = pipe
-        self.image_encoder = CLIPVisionModelWithProjection.from_pretrained(image_encoder_path).to(self.device, dtype=torch.float16)
-        # Load IP-Adapter checkpoint
-        ip_adapter_state_dict = torch.load(ip_ckpt, map_location="cpu")
-        # Create a new state dict that matches the expected keys
-        new_state_dict = {}
-        for key, value in ip_adapter_state_dict["ip_adapter"].items():
-            new_state_dict[f"image_proj_model.projection_layers.{key}"] = value
-        # Manually create and load the projection model
-        # This part is complex and specific to the model architecture
-        # For simplicity, we'll assume a direct loading path if possible,
-        # but a full implementation would require rebuilding the projection model structure.
-        # This is a simplified placeholder for the model loading logic.
-        logger.info("IP-Adapter model loading is complex; this is a simplified representation.")
-        # In a real scenario, you'd load the weights into the corresponding model layers.
-        # For now, we'll focus on the overall structure.
 # --- Main Generation Service ---
@@ -47,105 +30,65 @@ class GenerationService:
     def __init__(self):
         logger.info("Initializing Generation Service...")
-        # --- 1. Set Device and Data Type ---
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.torch_dtype = torch.float16 if self.device == "cuda" else torch.float32
         logger.info(f"Using device: {self.device} with dtype: {self.torch_dtype}")
-        # --- 2. Define Model Paths ---
         base_model_path = "SG161222/Realistic_Vision_V4.0_noVAE"
         vae_model_path = "stabilityai/sd-vae-ft-mse"
-        self.image_encoder_path = "laion/CLIP-ViT-H-14-laion2B-s32B-b79K"
-        self.ip_plus_ckpt = hf_hub_download(
-            repo_id="h94/IP-Adapter-FaceID",
-            filename="ip-adapter-faceid-plusv2_sd15.bin",
-            repo_type="model"
-        )
-        # --- 3. Load Models ---
         try:
-            # Load FaceAnalysis for face detection and embeddings
             self.face_app = FaceAnalysis(name="buffalo_l", providers=['CUDAExecutionProvider' if self.device == "cuda" else 'CPUExecutionProvider'])
             self.face_app.prepare(ctx_id=0, det_size=(640, 640))
-            cv2.setNumThreads(1) # Prevents OpenCV from using too many threads
-            # Load VAE
             vae = AutoencoderKL.from_pretrained(vae_model_path).to(dtype=self.torch_dtype)
-            # Load Stable Diffusion Pipeline
             self.pipe = StableDiffusionPipeline.from_pretrained(
                 base_model_path,
                 torch_dtype=self.torch_dtype,
                 scheduler=DDIMScheduler(
-                    num_train_timesteps=1000,
-                    beta_start=0.00085,
-                    beta_end=0.012,
-                    beta_schedule="scaled_linear",
-                    clip_sample=False,
-                    set_alpha_to_one=False,
-                    steps_offset=1,
                 ),
-                vae=vae,
-                feature_extractor=None,
-                safety_checker=None
             ).to(self.device)
-            # Load IP-Adapter model
-            # Note: The original code used a custom class. We will need to replicate its functionality.
-            # For now, we'll represent it as loading the model directly.
-            # self.ip_model = IPAdapterFaceIDPlus(self.pipe, self.image_encoder_path, self.ip_plus_ckpt, self.device)
-            # Due to the complexity of the IPAdapterFaceIDPlus class, we'll simplify this part
-            # and focus on the main pipeline integration. The core logic will be inside generate_magic_image.
             logger.info("All models loaded successfully.")
         except Exception as e:
             logger.error(f"Fatal error during model loading: {e}")
             raise RuntimeError(f"Could not initialize GenerationService: {e}") from e
-    def generate_magic_image(self, face_images: list, gender: str, prompt: str, plan: str = 'free'):
         """
-        Generates an image based on face embeddings and a prompt.
-        Args:
-            face_images (list): A list of file paths to the face images.
-            gender (str): The gender of the person ("Female" or "Male").
-            prompt (str): The creative prompt for the image.
-            plan (str): The user's plan ('free' or 'paid').
         Returns:
-            str: Path to the generated image file, or None if an error occurred.
         """
         logger.info("Starting image generation process...")
-        # --- 1. Prepare Prompts ---
-        if not prompt:
-            prompt = f"Professional portrait of a {gender.lower()}"
-        # Add keywords to enforce a single person and improve quality
         full_prompt = f"{prompt}, 4k, high-resolution, photorealistic, masterpiece, single person, solo portrait, centered composition"
         negative_prompt = "multiple people, group photo, crowd, two faces, three faces, multiple faces, collage, ugly, deformed, blurry, low quality"
-        # --- 2. Get Face Embeddings ---
         faceid_all_embeds = []
         face_image_for_structure = None
         for image_path in face_images:
             try:
                 face = cv2.imread(image_path)
-                if face is None:
-                    logger.warning(f"Could not read image at path: {image_path}")
-                    continue
                 faces = self.face_app.get(face)
                 if faces:
                     faceid_embed = torch.from_numpy(faces[0].normed_embedding).unsqueeze(0)
                     faceid_all_embeds.append(faceid_embed)
-                    # Use the first detected face for preserving structure
                     if face_image_for_structure is None:
                         face_image_for_structure = face_align.norm_crop(face, landmark=faces[0].kps, image_size=224)
-                else:
-                    logger.warning(f"No face detected in image: {image_path}")
             except Exception as e:
                 logger.error(f"Error processing face image {image_path}: {e}")
@@ -155,63 +98,69 @@ class GenerationService:
         average_embedding = torch.mean(torch.stack(faceid_all_embeds, dim=0), dim=0)
-        # --- 3. Generate Image ---
-        # The IP-Adapter logic is called here within the pipeline's generate method
-        # In a real implementation, the IP-Adapter modifies the UNet's cross-attention layers.
-        # We pass the embeddings and other parameters to the pipeline.
-        # The `ip_adapter_faceid_plus` is a conceptual argument here.
         logger.info("Calling the generation pipeline...")
         try:
             # This is a conceptual representation of how the IP-Adapter is used.
-            # The actual `diffusers` library would need to have the IP-Adapter integrated.
-            # For our project, we assume the pipeline is "adapter-aware".
             image = self.pipe(
                 prompt=full_prompt,
                 negative_prompt=negative_prompt,
-                # --- Conceptual IP-Adapter Args ---
-                ip_adapter_image_embeds=average_embedding,
-                # face_image=face_image_for_structure, # This would be part of the adapter's logic
-                # --- Standard Pipeline Args ---
                 num_inference_steps=40,
                 guidance_scale=7.5,
                 width=512,
                 height=768,
             ).images[0]
-            # --- 4. Save and Return Image ---
-            output_dir = "generated_images"
-            os.makedirs(output_dir, exist_ok=True)
-            output_path = os.path.join(output_dir, f"output_{hash(prompt)}.png")
-            image.save(output_path)
-            logger.info(f"Image successfully generated and saved to {output_path}")
             # TODO: Add watermarking for 'free' plan
             # TODO: Add upscaling for 'paid' plan
-            # TODO: Upload to cloud storage and return URL
-            return output_path
         except Exception as e:
-            logger.error(f"An error occurred during image generation pipeline: {e}")
             return None
 # --- Example Usage (for testing) ---
 if __name__ == '__main__':
-    # This block will only run when you execute `python generate.py` directly
-    # You would need to have an image file named 'test_face.jpg' in your project directory
     if os.path.exists("test_face.jpg"):
-        logger.info("Running a test generation...")
         service = GenerationService()
-        result_path = service.generate_magic_image(
             face_images=["test_face.jpg"],
             gender="Female",
             prompt="A beautiful portrait of a princess in a magical forest, fantasy art"
         )
-        if result_path:
-            print(f"Test generation successful! Image saved at: {result_path}")
         else:
-            print("Test generation failed. Check logs for details.")
     else:
         print("To run a test, place an image named 'test_face.jpg' in the root directory.")

 import cv2
 import os
 import logging
+import uuid
 from diffusers import StableDiffusionPipeline, DDIMScheduler, AutoencoderKL
 from transformers import CLIPVisionModelWithProjection
 from insightface.app import FaceAnalysis
 from insightface.utils import face_align
 from huggingface_hub import hf_hub_download
+from storage3.utils import StorageException
+import config
+from database import supabase # Import the initialized supabase client
 # --- Setup Logging ---
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
+# --- IP-Adapter FaceID Model (Placeholder) ---
+# The complex IP-Adapter logic is assumed to be part of the diffusers pipeline for this implementation.
+# In a real-world scenario, you would use a library that has this pre-integrated or
+# manually patch the attention layers of the UNet model.
 # --- Main Generation Service ---
     def __init__(self):
         logger.info("Initializing Generation Service...")
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.torch_dtype = torch.float16 if self.device == "cuda" else torch.float32
         logger.info(f"Using device: {self.device} with dtype: {self.torch_dtype}")
         base_model_path = "SG161222/Realistic_Vision_V4.0_noVAE"
         vae_model_path = "stabilityai/sd-vae-ft-mse"
         try:
             self.face_app = FaceAnalysis(name="buffalo_l", providers=['CUDAExecutionProvider' if self.device == "cuda" else 'CPUExecutionProvider'])
             self.face_app.prepare(ctx_id=0, det_size=(640, 640))
+            cv2.setNumThreads(1)
             vae = AutoencoderKL.from_pretrained(vae_model_path).to(dtype=self.torch_dtype)
             self.pipe = StableDiffusionPipeline.from_pretrained(
                 base_model_path,
                 torch_dtype=self.torch_dtype,
                 scheduler=DDIMScheduler(
+                    num_train_timesteps=1000, beta_start=0.00085, beta_end=0.012,
+                    beta_schedule="scaled_linear", clip_sample=False,
+                    set_alpha_to_one=False, steps_offset=1,
                 ),
+                vae=vae, feature_extractor=None, safety_checker=None
             ).to(self.device)
+            # This is where the IP-Adapter would be loaded and attached to the pipeline.
+            # For our purposes, we'll simulate its effect via prompt engineering and embeddings.
             logger.info("All models loaded successfully.")
         except Exception as e:
             logger.error(f"Fatal error during model loading: {e}")
             raise RuntimeError(f"Could not initialize GenerationService: {e}") from e
+    def generate_magic_image(self, face_images: list, gender: str, prompt: str, plan: str = 'free') -> str | None:
         """
+        Generates an image, uploads it to cloud storage, and returns the public URL.
         Returns:
+            str: Public URL of the generated image, or None if an error occurred.
         """
         logger.info("Starting image generation process...")
         full_prompt = f"{prompt}, 4k, high-resolution, photorealistic, masterpiece, single person, solo portrait, centered composition"
         negative_prompt = "multiple people, group photo, crowd, two faces, three faces, multiple faces, collage, ugly, deformed, blurry, low quality"
         faceid_all_embeds = []
         face_image_for_structure = None
         for image_path in face_images:
             try:
                 face = cv2.imread(image_path)
+                if face is None: continue
                 faces = self.face_app.get(face)
                 if faces:
                     faceid_embed = torch.from_numpy(faces[0].normed_embedding).unsqueeze(0)
                     faceid_all_embeds.append(faceid_embed)
                     if face_image_for_structure is None:
                         face_image_for_structure = face_align.norm_crop(face, landmark=faces[0].kps, image_size=224)
             except Exception as e:
                 logger.error(f"Error processing face image {image_path}: {e}")
         average_embedding = torch.mean(torch.stack(faceid_all_embeds, dim=0), dim=0)
         logger.info("Calling the generation pipeline...")
         try:
             # This is a conceptual representation of how the IP-Adapter is used.
             image = self.pipe(
                 prompt=full_prompt,
                 negative_prompt=negative_prompt,
+                ip_adapter_image_embeds=average_embedding, # Conceptual argument
                 num_inference_steps=40,
                 guidance_scale=7.5,
                 width=512,
                 height=768,
             ).images[0]
+            # --- Save image locally first ---
+            temp_dir = "temp_images"
+            os.makedirs(temp_dir, exist_ok=True)
+            local_path = os.path.join(temp_dir, f"{uuid.uuid4()}.png")
+            image.save(local_path)
+            # --- Upload to Supabase Storage ---
+            storage_path = f"public/{os.path.basename(local_path)}"
+            logger.info(f"Uploading {local_path} to Supabase bucket '{config.SUPABASE_BUCKET_NAME}' at path '{storage_path}'")
+            with open(local_path, 'rb') as f:
+                supabase.storage.from_(config.SUPABASE_BUCKET_NAME).upload(
+                    path=storage_path,
+                    file=f,
+                    file_options={"content-type": "image/png"}
+                )
+            public_url = supabase.storage.from_(config.SUPABASE_BUCKET_NAME).get_public_url(storage_path)
+            logger.info(f"Upload successful. Public URL: {public_url}")
+            # --- Clean up local file ---
+            os.remove(local_path)
             # TODO: Add watermarking for 'free' plan
             # TODO: Add upscaling for 'paid' plan
+            return public_url
+        except StorageException as e:
+            logger.error(f"Supabase Storage Error: {e}")
+            return None
         except Exception as e:
+            logger.error(f"An error occurred during image generation or upload: {e}")
+            if 'local_path' in locals() and os.path.exists(local_path):
+                os.remove(local_path) # Clean up even on failure
             return None
 # --- Example Usage (for testing) ---
 if __name__ == '__main__':
     if os.path.exists("test_face.jpg"):
+        logger.info("Running a test generation and upload...")
         service = GenerationService()
+        result_url = service.generate_magic_image(
             face_images=["test_face.jpg"],
             gender="Female",
             prompt="A beautiful portrait of a princess in a magical forest, fantasy art"
         )
+        if result_url:
+            print(f"\n✅ Test successful! Image URL: {result_url}")
         else:
+            print("\n❌ Test failed. Check logs for details.")
     else:
         print("To run a test, place an image named 'test_face.jpg' in the root directory.")