Spaces:

cronos3k
/

CharacterForgePro

Running on Zero

ghmk Claude Opus 4.5 commited on Feb 12

Commit

2ed486e

1 Parent(s): f3f2fa1

Add Scene Composer tab for multi-reference scene generation

- Tab 1: Character Sheet Generator (existing functionality)
- Tab 2: Scene Composer with up to 4 reference images
- Character sheet 1 & 2 inputs
- Background image input
- Object/prop image input
- render_scene_with_gpu() function with @spaces.GPU decorator
- Aspect ratio selection for scene output

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

Files changed (1) hide show

app.py +416 -153

app.py CHANGED Viewed

@@ -458,6 +458,155 @@ def generate_with_gpu(
         return None, f"Error: {str(e)}", {}
 # =============================================================================
 # Gradio Interface Functions
 # =============================================================================
@@ -672,9 +821,9 @@ def create_ui():
         )
         gr.Markdown("# Character Sheet Pro")
-        gr.Markdown("Generate 7-view character turnaround sheets from a single input image using FLUX.2 klein.")
-        # Backend selection and controls
         with gr.Row():
             backend_dropdown = gr.Dropdown(
                 choices=[
@@ -682,7 +831,7 @@ def create_ui():
                     ("FLUX.2 klein 4B (Fast, ~13GB)", BackendType.FLUX_KLEIN.value),
                     ("Gemini Flash (Cloud - Fallback)", BackendType.GEMINI_FLASH.value),
                 ],
-                value="flux_klein_9b_fp8",  # Default to best quality
                 label="Backend",
                 scale=2
             )
@@ -695,159 +844,252 @@ def create_ui():
                 scale=2
             )
-        with gr.Row():
-            # Left column: Inputs
-            with gr.Column(scale=1):
-                gr.Markdown("### Input Settings")
-                input_type = gr.Radio(
-                    choices=["Face Only", "Full Body", "Face + Body (Separate)"],
-                    value="Face Only",
-                    label="Input Type",
-                    info="What type of image(s) are you providing?"
-                )
-                main_input = gr.Image(
-                    label="Input Image",
-                    type="pil",
-                    format="png",
-                    visible=True
-                )
-                with gr.Row(visible=False) as face_body_row:
-                    face_input = gr.Image(
-                        label="Face Reference",
-                        type="pil",
-                        format="png",
-                        visible=False
-                    )
-                    body_input = gr.Image(
-                        label="Body Reference",
-                        type="pil",
-                        format="png",
-                        visible=False
-                    )
-                gr.Markdown("### Character Details")
-                character_name = gr.Textbox(
-                    label="Character Name",
-                    placeholder="My Character",
-                    value=""
-                )
-                gender = gr.Radio(
-                    choices=["Auto/Neutral", "Male", "Female"],
-                    value="Auto/Neutral",
-                    label="Gender"
-                )
-                costume_description = gr.Textbox(
-                    label="Costume Description (Optional)",
-                    placeholder="e.g., Full plate armor with gold trim...",
-                    value="",
-                    lines=3
-                )
-                costume_image = gr.Image(
-                    label="Costume Reference Image (Optional)",
-                    type="pil",
-                    format="png"
-                )
-                gr.Markdown("### Generation Parameters")
                 with gr.Row():
-                    num_steps = gr.Number(
-                        label="Inference Steps",
-                        value=4,
-                        minimum=1,
-                        maximum=50,
-                        step=1,
-                        info="FLUX klein uses 4 steps"
-                    )
-                    guidance_scale = gr.Number(
-                        label="Guidance Scale",
-                        value=1.0,
-                        minimum=0.0,
-                        maximum=10.0,
-                        step=0.1,
-                        info="FLUX klein uses 1.0"
-                    )
-                include_costume_in_faces = gr.Checkbox(
-                    label="Include costume in face views",
-                    value=False,
-                    info="Turn OFF for FLUX (can confuse framing)"
-                )
-                # GENERATE BUTTON
-                generate_btn = gr.Button(
-                    "GENERATE CHARACTER SHEET",
-                    variant="primary",
-                    size="lg",
-                    elem_classes=["generate-btn-main"]
-                )
-            # Right column: Output
-            with gr.Column(scale=2):
-                gr.Markdown("### Generated Character Sheet")
-                output_image = gr.Image(
-                    label="Character Sheet",
-                    type="pil",
-                    format="png",
-                    elem_classes=["output-image"]
-                )
-                status_text = gr.Textbox(
-                    label="Status",
-                    interactive=False
-                )
-                # Preview gallery
-                gr.Markdown("### Individual Views Preview")
-                with gr.Row():
-                    gr.Markdown("**Face Views:**")
-                with gr.Row():
-                    preview_left_face = gr.Image(label="Left Face", type="pil", height=150, width=112)
-                    preview_front_face = gr.Image(label="Front Face", type="pil", height=150, width=112)
-                    preview_right_face = gr.Image(label="Right Face", type="pil", height=150, width=112)
                 with gr.Row():
-                    gr.Markdown("**Body Views:**")
-                with gr.Row():
-                    preview_left_body = gr.Image(label="Left Body", type="pil", height=150, width=84)
-                    preview_front_body = gr.Image(label="Front Body", type="pil", height=150, width=84)
-                    preview_right_body = gr.Image(label="Right Body", type="pil", height=150, width=84)
-                    preview_back_body = gr.Image(label="Back Body", type="pil", height=150, width=84)
-                # Downloads
-                gr.Markdown("### Downloads")
-                with gr.Row():
-                    json_download = gr.File(label="Metadata JSON", interactive=False)
-                    zip_download = gr.File(label="Complete Package (ZIP)", interactive=False)
-        # Usage instructions
-        gr.Markdown("---")
-        gr.Markdown("### How to Use")
-        gr.Markdown("""
-        1. **Upload an image** (face portrait or full body)
-        2. **Select input type** based on your image
-        3. **Optionally** add character name, gender, and costume description
-        4. **Click Generate** - the model loads automatically on first run (~30-60s)
-        5. **Wait** for all 7 views to generate (~2-3 minutes total)
-        6. **Download** the complete package
-        **GPU Notes:**
-        - Uses Zero GPU (A10G 24GB) - free but with 5-minute session limit
-        - First generation loads the model (adds ~30-60 seconds)
-        - Subsequent generations in the same session are faster
-        - If GPU unavailable, switch to Gemini Flash (requires API key)
-        """)
-        # Event handlers
         input_type.change(
             fn=update_input_visibility,
             inputs=[input_type],
@@ -892,6 +1134,27 @@ def create_ui():
             ]
         )
     return demo

         return None, f"Error: {str(e)}", {}
+# =============================================================================
+# Scene Composer GPU Function
+# =============================================================================
+@spaces.GPU(duration=120)  # 2-minute timeout for scene rendering
+def render_scene_with_gpu(
+    character_sheet_1: Optional[Image.Image],
+    character_sheet_2: Optional[Image.Image],
+    background_image: Optional[Image.Image],
+    object_image: Optional[Image.Image],
+    scene_description: str,
+    aspect_ratio: str,
+    backend_choice: str,
+    api_key: str,
+    num_steps: int,
+    guidance_scale: float
+) -> Tuple[Optional[Image.Image], str]:
+    """
+    GPU-wrapped scene rendering function.
+    Uses character sheets and optional references to compose a scene.
+    """
+    global _cached_service, _cached_backend
+    try:
+        # Determine backend
+        backend = BackendRouter.backend_from_string(backend_choice)
+        is_cloud = backend in (BackendType.GEMINI_FLASH, BackendType.GEMINI_PRO)
+        # Validate inputs
+        if character_sheet_1 is None:
+            return None, "Error: Please provide at least one character sheet"
+        if not scene_description.strip():
+            return None, "Error: Please describe the scene"
+        # Load or reuse service
+        if _cached_service is None or _cached_backend != backend:
+            logger.info(f"Loading model for {backend.value}...")
+            _cached_service = CharacterSheetService(
+                api_key=api_key if is_cloud else None,
+                backend=backend
+            )
+            _cached_backend = backend
+        # Build the prompt
+        prompt_parts = ["Render the character from the first reference image"]
+        if character_sheet_2 is not None:
+            prompt_parts.append("together with the character from the second reference image")
+        prompt_parts.append(f"{scene_description.strip()}")
+        if background_image is not None:
+            prompt_parts.append("using the background from the reference")
+        if object_image is not None:
+            prompt_parts.append("incorporating the object/prop from the reference")
+        prompt_parts.append("Maintain exact character identity and features from the character sheet(s). High quality, detailed, professional lighting.")
+        prompt = ". ".join(prompt_parts)
+        # Collect input images
+        input_images = [character_sheet_1]
+        if character_sheet_2 is not None:
+            input_images.append(character_sheet_2)
+        if background_image is not None:
+            input_images.append(background_image)
+        if object_image is not None:
+            input_images.append(object_image)
+        # Map aspect ratio to dimensions
+        aspect_ratios = {
+            "1:1 (Square)": (1024, 1024),
+            "16:9 (Landscape)": (1344, 768),
+            "9:16 (Portrait)": (768, 1344),
+            "4:3 (Landscape)": (1152, 896),
+            "3:4 (Portrait)": (896, 1152),
+            "3:2 (Landscape)": (1248, 832),
+            "2:3 (Portrait)": (832, 1248),
+        }
+        width, height = aspect_ratios.get(aspect_ratio, (1024, 1024))
+        # Generate scene using the client directly
+        logger.info(f"Rendering scene: {prompt[:100]}...")
+        if hasattr(_cached_service, 'client') and hasattr(_cached_service.client, 'generate_image'):
+            result_image, status = _cached_service.client.generate_image(
+                prompt=prompt,
+                input_images=input_images,
+                width=width,
+                height=height,
+                steps=num_steps,
+                guidance=guidance_scale
+            )
+            return result_image, status
+        else:
+            return None, "Error: Scene rendering not supported by current backend"
+    except Exception as e:
+        logger.exception(f"Scene rendering error: {e}")
+        return None, f"Error: {str(e)}"
+def render_scene(
+    character_sheet_1: Optional[Image.Image],
+    character_sheet_2: Optional[Image.Image],
+    background_image: Optional[Image.Image],
+    object_image: Optional[Image.Image],
+    scene_description: str,
+    aspect_ratio: str,
+    backend_choice: str,
+    api_key_override: str,
+    num_steps: int,
+    guidance_scale: float,
+    progress=gr.Progress()
+) -> Tuple[Optional[Image.Image], str]:
+    """
+    Wrapper for scene rendering with progress updates.
+    """
+    progress(0.1, desc="Preparing scene...")
+    # Preprocess images
+    character_sheet_1 = ensure_png_image(character_sheet_1, max_size=1024)
+    character_sheet_2 = ensure_png_image(character_sheet_2, max_size=1024) if character_sheet_2 else None
+    background_image = ensure_png_image(background_image, max_size=1024) if background_image else None
+    object_image = ensure_png_image(object_image, max_size=512) if object_image else None
+    api_key = api_key_override.strip() if api_key_override.strip() else API_KEY
+    progress(0.2, desc="Allocating GPU and rendering scene...")
+    result, status = render_scene_with_gpu(
+        character_sheet_1=character_sheet_1,
+        character_sheet_2=character_sheet_2,
+        background_image=background_image,
+        object_image=object_image,
+        scene_description=scene_description,
+        aspect_ratio=aspect_ratio,
+        backend_choice=backend_choice,
+        api_key=api_key,
+        num_steps=int(num_steps),
+        guidance_scale=float(guidance_scale)
+    )
+    progress(1.0, desc="Done!")
+    return result, status
 # =============================================================================
 # Gradio Interface Functions
 # =============================================================================
         )
         gr.Markdown("# Character Sheet Pro")
+        gr.Markdown("Generate 7-view character turnaround sheets and compose scenes with your characters.")
+        # Shared controls (outside tabs)
         with gr.Row():
             backend_dropdown = gr.Dropdown(
                 choices=[
                     ("FLUX.2 klein 4B (Fast, ~13GB)", BackendType.FLUX_KLEIN.value),
                     ("Gemini Flash (Cloud - Fallback)", BackendType.GEMINI_FLASH.value),
                 ],
+                value="flux_klein_9b_fp8",
                 label="Backend",
                 scale=2
             )
                 scale=2
             )
+        with gr.Tabs():
+            # =========================================================
+            # TAB 1: Character Sheet Generator
+            # =========================================================
+            with gr.TabItem("Character Sheet Generator"):
                 with gr.Row():
+                    # Left column: Inputs
+                    with gr.Column(scale=1):
+                        gr.Markdown("### Input Settings")
+                        input_type = gr.Radio(
+                            choices=["Face Only", "Full Body", "Face + Body (Separate)"],
+                            value="Face Only",
+                            label="Input Type",
+                            info="What type of image(s) are you providing?"
+                        )
+                        main_input = gr.Image(
+                            label="Input Image",
+                            type="pil",
+                            format="png",
+                            visible=True
+                        )
+                        with gr.Row(visible=False) as face_body_row:
+                            face_input = gr.Image(
+                                label="Face Reference",
+                                type="pil",
+                                format="png",
+                                visible=False
+                            )
+                            body_input = gr.Image(
+                                label="Body Reference",
+                                type="pil",
+                                format="png",
+                                visible=False
+                            )
+                        gr.Markdown("### Character Details")
+                        character_name = gr.Textbox(
+                            label="Character Name",
+                            placeholder="My Character",
+                            value=""
+                        )
+                        gender = gr.Radio(
+                            choices=["Auto/Neutral", "Male", "Female"],
+                            value="Auto/Neutral",
+                            label="Gender"
+                        )
+                        costume_description = gr.Textbox(
+                            label="Costume Description (Optional)",
+                            placeholder="e.g., Full plate armor with gold trim...",
+                            value="",
+                            lines=3
+                        )
+                        costume_image = gr.Image(
+                            label="Costume Reference Image (Optional)",
+                            type="pil",
+                            format="png"
+                        )
+                        gr.Markdown("### Generation Parameters")
+                        with gr.Row():
+                            num_steps = gr.Number(
+                                label="Inference Steps",
+                                value=4,
+                                minimum=1,
+                                maximum=50,
+                                step=1,
+                                info="FLUX klein uses 4 steps"
+                            )
+                            guidance_scale = gr.Number(
+                                label="Guidance Scale",
+                                value=1.0,
+                                minimum=0.0,
+                                maximum=10.0,
+                                step=0.1,
+                                info="FLUX klein uses 1.0"
+                            )
+                        include_costume_in_faces = gr.Checkbox(
+                            label="Include costume in face views",
+                            value=False,
+                            info="Turn OFF for FLUX (can confuse framing)"
+                        )
+                        # GENERATE BUTTON
+                        generate_btn = gr.Button(
+                            "GENERATE CHARACTER SHEET",
+                            variant="primary",
+                            size="lg",
+                            elem_classes=["generate-btn-main"]
+                        )
+                    # Right column: Output
+                    with gr.Column(scale=2):
+                        gr.Markdown("### Generated Character Sheet")
+                        output_image = gr.Image(
+                            label="Character Sheet",
+                            type="pil",
+                            format="png",
+                            elem_classes=["output-image"]
+                        )
+                        status_text = gr.Textbox(
+                            label="Status",
+                            interactive=False
+                        )
+                        # Preview gallery
+                        gr.Markdown("### Individual Views Preview")
+                        with gr.Row():
+                            gr.Markdown("**Face Views:**")
+                        with gr.Row():
+                            preview_left_face = gr.Image(label="Left Face", type="pil", height=150, width=112)
+                            preview_front_face = gr.Image(label="Front Face", type="pil", height=150, width=112)
+                            preview_right_face = gr.Image(label="Right Face", type="pil", height=150, width=112)
+                        with gr.Row():
+                            gr.Markdown("**Body Views:**")
+                        with gr.Row():
+                            preview_left_body = gr.Image(label="Left Body", type="pil", height=150, width=84)
+                            preview_front_body = gr.Image(label="Front Body", type="pil", height=150, width=84)
+                            preview_right_body = gr.Image(label="Right Body", type="pil", height=150, width=84)
+                            preview_back_body = gr.Image(label="Back Body", type="pil", height=150, width=84)
+                        # Downloads
+                        gr.Markdown("### Downloads")
+                        with gr.Row():
+                            json_download = gr.File(label="Metadata JSON", interactive=False)
+                            zip_download = gr.File(label="Complete Package (ZIP)", interactive=False)
+            # =========================================================
+            # TAB 2: Scene Composer
+            # =========================================================
+            with gr.TabItem("Scene Composer"):
+                gr.Markdown("### Compose Scenes with Your Characters")
+                gr.Markdown("Use character sheets to render characters in custom scenes with backgrounds and props.")
                 with gr.Row():
+                    # Left column: Reference inputs
+                    with gr.Column(scale=1):
+                        gr.Markdown("### Reference Images")
+                        with gr.Row():
+                            scene_char1 = gr.Image(
+                                label="Character Sheet 1 (Required)",
+                                type="pil",
+                                format="png"
+                            )
+                            scene_char2 = gr.Image(
+                                label="Character Sheet 2 (Optional)",
+                                type="pil",
+                                format="png"
+                            )
+                        with gr.Row():
+                            scene_background = gr.Image(
+                                label="Background Image (Optional)",
+                                type="pil",
+                                format="png"
+                            )
+                            scene_object = gr.Image(
+                                label="Object/Prop (Optional)",
+                                type="pil",
+                                format="png"
+                            )
+                        gr.Markdown("### Scene Description")
+                        scene_description = gr.Textbox(
+                            label="Describe the scene",
+                            placeholder="e.g., standing on a beach at sunset, dancing in a nightclub, sitting in a cafe...",
+                            lines=3
+                        )
+                        scene_aspect_ratio = gr.Dropdown(
+                            choices=[
+                                "1:1 (Square)",
+                                "16:9 (Landscape)",
+                                "9:16 (Portrait)",
+                                "4:3 (Landscape)",
+                                "3:4 (Portrait)",
+                                "3:2 (Landscape)",
+                                "2:3 (Portrait)",
+                            ],
+                            value="16:9 (Landscape)",
+                            label="Output Aspect Ratio"
+                        )
+                        with gr.Row():
+                            scene_steps = gr.Number(
+                                label="Inference Steps",
+                                value=4,
+                                minimum=1,
+                                maximum=50,
+                                step=1
+                            )
+                            scene_guidance = gr.Number(
+                                label="Guidance Scale",
+                                value=1.0,
+                                minimum=0.0,
+                                maximum=10.0,
+                                step=0.1
+                            )
+                        render_btn = gr.Button(
+                            "RENDER SCENE",
+                            variant="primary",
+                            size="lg",
+                            elem_classes=["generate-btn-main"]
+                        )
+                    # Right column: Output
+                    with gr.Column(scale=2):
+                        gr.Markdown("### Rendered Scene")
+                        scene_output = gr.Image(
+                            label="Scene Output",
+                            type="pil",
+                            format="png",
+                            elem_classes=["output-image"]
+                        )
+                        scene_status = gr.Textbox(
+                            label="Status",
+                            interactive=False
+                        )
+                gr.Markdown("---")
+                gr.Markdown("""
+                **Tips for Scene Composer:**
+                - Upload a character sheet generated in the first tab, or use any character turnaround image
+                - Add a second character sheet to include multiple characters in the scene
+                - Background images help set the scene location and lighting
+                - Object/prop images can be items the character holds or interacts with
+                - Be descriptive in your scene description for best results
+                """)
+        # Event handlers for Tab 1
         input_type.change(
             fn=update_input_visibility,
             inputs=[input_type],
             ]
         )
+        # Event handlers for Tab 2 (Scene Composer)
+        render_btn.click(
+            fn=render_scene,
+            inputs=[
+                scene_char1,
+                scene_char2,
+                scene_background,
+                scene_object,
+                scene_description,
+                scene_aspect_ratio,
+                backend_dropdown,
+                api_key_input,
+                scene_steps,
+                scene_guidance
+            ],
+            outputs=[
+                scene_output,
+                scene_status
+            ]
+        )
     return demo