Spaces:

xen87348
/

anycoder-31767185

Runtime error

App Files Files Community

xen87348 commited on Dec 7, 2025

Commit

56b5d8a

verified ·

1 Parent(s): f15cb9c

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -116

app.py CHANGED Viewed

@@ -1,64 +1,15 @@
-import gradio as gr
-import torch
-import spaces
-from PIL import Image
-from transformers import CLIPTokenizer, CLIPTextModel
-import numpy as np
-import os
-from typing import Literal
 # --- 1. CONFIGURATION AND MODEL PLACEHOLDERS ---
-# Define the list of styles for the Gradio dropdown
-STYLE_OPTIONS: list[str] = ["Photorealistic", "Impressionist", "Oil Painting", "Pixel Art"]
-# Dummy embeddings: in a real system, these would be loaded or calculated.
-# Using a 768-dim vector to match CLIP's output dimension.
-STYLE_EMBEDDINGS: dict[str, torch.Tensor] = {
-    "Photorealistic": torch.zeros(768),
-    "Impressionist": torch.ones(768) * 0.2,
-    "Oil Painting": torch.ones(768) * 0.5,
-    "Pixel Art": torch.ones(768) * 0.8,
-}
-class CustomTextEncoder:
-    """Wrapper for the text encoder (using CLIP) to convert prompts to embeddings."""
-    def __init__(self, device: str = "cuda"):
-        # Load pre-trained CLIP components
-        self.tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
-        self.text_model = CLIPTextModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
-        self.device = device
-    def encode(self, prompt: str) -> torch.Tensor:
-        """Converts text prompt into a single 768-dimensional embedding vector."""
-        if not prompt:
-            # Return a zero vector for empty prompts as negative conditioning
-            return torch.zeros(1, 768, device=self.device)
-        inputs = self.tokenizer(
-            prompt,
-            padding="max_length",
-            max_length=self.tokenizer.model_max_length,
-            truncation=True,
-            return_tensors="pt"
-        ).to(self.device)
-        with torch.no_grad():
-            # Get the pooled output for a single vector representing the entire text
-            embeddings = self.text_model(**inputs).pooler_output
-        return embeddings.to(torch.float32) # Ensure output is float32 for consistency
 class GANGenerator(torch.nn.Module):
     """
-    Conditional GAN Generator Placeholder.
-    This architecture uses a simple linear layer to simulate generation based on:
-    1. Noise vector (z)
-    2. Positive Text Embedding (c_pos)
-    3. Negative Text Embedding (c_neg)
-    4. Style Embedding (s_embed)
     """
     def __init__(self, latent_dim: int = 100, embed_dim: int = 768):
         super().__init__()
-        # Total input dimension = Noise (100) + Positive (768) + Negative (768) + Style (768)
         input_dim = latent_dim + embed_dim * 3
         # Output: 3 color channels * 256 * 256 image size
@@ -68,33 +19,29 @@ class GANGenerator(torch.nn.Module):
     def forward(self, c_pos: torch.Tensor, c_neg: torch.Tensor, s_embed: torch.Tensor) -> torch.Tensor:
         batch_size = c_pos.shape[0]
-        device = c_pos.device # Ensure noise is on the correct device
-        # 1. Generate noise vector
         z = torch.randn(batch_size, self.latent_dim, device=device, dtype=torch.float32)
         # 2. Concatenate all conditioning inputs
         combined_conditioning = torch.cat([z, c_pos, c_neg, s_embed], dim=1)
-        # 3. Simple feedforward pass (Placeholder for complex GAN layers)
         x = self.fc(combined_conditioning)
-        # 4. Reshape to image format (Batch, Channels, Height, Width) and normalize to [-1, 1]
         image_tensor = x.view(batch_size, 3, 256, 256).tanh()
         return image_tensor.to(torch.float32)
-# --- 2. INITIALIZATION (Runs once on the Host/CPU, moves to GPU if available) ---
 DEVICE: str = "cuda" if torch.cuda.is_available() else "cpu"
 try:
-    # Initialize models and move them to the target device
     text_encoder = CustomTextEncoder(device=DEVICE)
     generator = GANGenerator().to(DEVICE).eval()
-    # 📝 NOTE: If you have pre-trained weights, load them here:
-    # generator.load_state_dict(torch.load("your_pretrained_weights.pth"))
     print(f"Models initialized on {DEVICE}")
 except Exception as e:
     print(f"Warning: Model initialization failed. Running with dummy data. Error: {e}")
@@ -108,75 +55,62 @@ def generate_image(positive_prompt: str, negative_prompt: str, style: str) -> Im
     """The main inference function, decorated for ZeroGPU."""
     if generator is None or text_encoder is None:
-        # Fallback for failed initialization
         return Image.fromarray(np.zeros((256, 256, 3), dtype=np.uint8))
     # 1. Encode Inputs
     c_pos = text_encoder.encode(positive_prompt)
     c_neg = text_encoder.encode(negative_prompt)
-    # Get style embedding and move it to the correct device
     s_embed = STYLE_EMBEDDINGS.get(style, STYLE_EMBEDDINGS["Photorealistic"]).to(DEVICE).unsqueeze(0)
-    # Ensure all inputs are float32
     c_pos = c_pos.to(torch.float32)
     c_neg = c_neg.to(torch.float32)
     s_embed = s_embed.to(torch.float32)
-    # 2. Generate Image (Forward Pass)
-    with torch.no_grad():
-        image_tensor = generator(c_pos, c_neg, s_embed)
-    # 3. Post-process to PIL Image
-    # Convert from [-1, 1] range to [0, 255]
-    image_tensor = (image_tensor * 0.5 + 0.5) * 255.0
-    image_tensor = image_tensor.clamp(0, 255).byte()
-    # Convert from C H W to H W C (for numpy/PIL)
-    image_numpy = image_tensor.squeeze(0).permute(1, 2, 0).cpu().numpy()
-    return Image.fromarray(image_numpy)
-# --- 4. GRADIO APP DEFINITION (FIXED for v6.x) ---
-# Removed 'theme' keyword argument from gr.Blocks() to fix the TypeError
-with gr.Blocks(
-    title="Custom Text-to-Image ZeroGPU GAN"
-) as demo:
-    gr.Markdown("## ✨ Conditional GAN with Negative and Style Prompting")
-    gr.Markdown("Enter a **Positive Description** (what you want) and an **Anti-Description** (what you *don't* want).")
-    with gr.Row():
-        positive_prompt = gr.Textbox(
-            label="1. Positive Description",
-            value="A beautiful, vibrant oil painting of a lighthouse by the sea",
-            lines=2
-        )
-        style_dropdown = gr.Dropdown(
-            label="3. Choose Style",
-            choices=STYLE_OPTIONS,
-            value=STYLE_OPTIONS[1],
-            scale=0.5
-        )
-    negative_prompt = gr.Textbox(
-        label="2. Anti-Description (Negative Prompt)",
-        value="ugly, noise, blurry, low resolution, watermark, text",
-        lines=2
-    )
-    generate_button = gr.Button("🎨 Generate Image", variant="primary")
-    output_image = gr.Image(label="Generated Image (256x256)", type="pil", height=256)
-    generate_button.click(
-        fn=generate_image,
-        inputs=[positive_prompt, negative_prompt, style_dropdown],
-        outputs=output_image
-    )
 if __name__ == "__main__":
-    # The 'theme' argument is correctly placed in the .launch() call for Gradio 6.x
     demo.launch(theme=gr.themes.Soft())

+# ... (Imports and STYLE_OPTIONS/STYLE_EMBEDDINGS are the same) ...
 # --- 1. CONFIGURATION AND MODEL PLACEHOLDERS ---
+# ... (CustomTextEncoder class is the same) ...
 class GANGenerator(torch.nn.Module):
     """
+    Conditional GAN Generator Placeholder with robust device handling.
     """
     def __init__(self, latent_dim: int = 100, embed_dim: int = 768):
         super().__init__()
         input_dim = latent_dim + embed_dim * 3
         # Output: 3 color channels * 256 * 256 image size
     def forward(self, c_pos: torch.Tensor, c_neg: torch.Tensor, s_embed: torch.Tensor) -> torch.Tensor:
         batch_size = c_pos.shape[0]
+        # Get the device from an input tensor (e.g., c_pos) to ensure consistency
+        device = c_pos.device
+        # ✅ FIX 1: Explicitly create the noise vector Z on the correct device
         z = torch.randn(batch_size, self.latent_dim, device=device, dtype=torch.float32)
         # 2. Concatenate all conditioning inputs
         combined_conditioning = torch.cat([z, c_pos, c_neg, s_embed], dim=1)
+        # 3. Feedforward pass (Placeholder)
         x = self.fc(combined_conditioning)
+        # 4. Reshape and normalize
         image_tensor = x.view(batch_size, 3, 256, 256).tanh()
         return image_tensor.to(torch.float32)
+# --- 2. INITIALIZATION (Runs once on the Host/CPU) ---
 DEVICE: str = "cuda" if torch.cuda.is_available() else "cpu"
 try:
     text_encoder = CustomTextEncoder(device=DEVICE)
     generator = GANGenerator().to(DEVICE).eval()
     print(f"Models initialized on {DEVICE}")
 except Exception as e:
     print(f"Warning: Model initialization failed. Running with dummy data. Error: {e}")
     """The main inference function, decorated for ZeroGPU."""
     if generator is None or text_encoder is None:
         return Image.fromarray(np.zeros((256, 256, 3), dtype=np.uint8))
     # 1. Encode Inputs
     c_pos = text_encoder.encode(positive_prompt)
     c_neg = text_encoder.encode(negative_prompt)
+    # ✅ FIX 2: Ensure style embedding is moved to the correct DEVICE
     s_embed = STYLE_EMBEDDINGS.get(style, STYLE_EMBEDDINGS["Photorealistic"]).to(DEVICE).unsqueeze(0)
+    # ✅ FIX 3: Explicitly cast all input tensors to float32 (standard for most GANs)
     c_pos = c_pos.to(torch.float32)
     c_neg = c_neg.to(torch.float32)
     s_embed = s_embed.to(torch.float32)
+    # --- DEBUGGING STEP: Check Shapes and Devices before generation ---
+    print("\n--- DEBUG INFO BEFORE GENERATION ---")
+    print(f"Generator device: {next(generator.parameters()).device}")
+    print(f"c_pos shape: {c_pos.shape}, device: {c_pos.device}")
+    print(f"c_neg shape: {c_neg.shape}, device: {c_neg.device}")
+    print(f"s_embed shape: {s_embed.shape}, device: {s_embed.device}")
+    print("------------------------------------\n")
+    # -----------------------------------------------------------------
+    try:
+        # 2. Generate Image (Forward Pass)
+        with torch.no_grad():
+            image_tensor = generator(c_pos, c_neg, s_embed)
+        # 3. Post-process to PIL Image (conversion code remains the same)
+        image_tensor = (image_tensor * 0.5 + 0.5) * 255.0
+        image_tensor = image_tensor.clamp(0, 255).byte()
+        # Convert from C H W to H W C (for numpy/PIL)
+        image_numpy = image_tensor.squeeze(0).permute(1, 2, 0).cpu().numpy()
+        return Image.fromarray(image_numpy)
+    except RuntimeError as e:
+        # Catch and report the specific runtime error in the logs
+        print(f"\nFATAL RUNTIME ERROR DURING GENERATION: {e}\n")
+        if "out of memory" in str(e).lower():
+            # If it's OOM, suggest resolution reduction
+            error_message = "CUDA Out of Memory Error: The model is too large for the allocated ZeroGPU memory. Try reducing the output resolution (e.g., from 256x256 to 128x128) in the GANGenerator class."
+        else:
+            # Assume device/type mismatch for other RuntimeError cases
+            error_message = f"Runtime Error: Tensors or model parameters are likely on different devices (CPU/CUDA) or have mismatched data types (float32/float64). See logs for full traceback. Error: {e}"
+        # Return a red error image to the user
+        error_img = np.full((256, 256, 3), [255, 0, 0], dtype=np.uint8)
+        return Image.fromarray(error_img)
+# --- 4. GRADIO APP DEFINITION (Same as before) ---
+# ... (The rest of the Gradio Blocks definition remains the same) ...
 if __name__ == "__main__":
     demo.launch(theme=gr.themes.Soft())