Spaces:

ysharma
/

qwen_camera_angles_custom_component

Running on Zero

App Files Files Community

ysharma HF Staff commited on Feb 20

Commit

f0d6a54

verified ·

1 Parent(s): 2f644fe

Update app.py

Browse files

Files changed (1) hide show

app.py +392 -256

app.py CHANGED Viewed

@@ -1,328 +1,464 @@
 import gradio as gr
 import base64
 from io import BytesIO
-from PIL import Image
-import torch
-from diffusers import StableDiffusionXLPipeline
-# Initialize the model (placeholder - replace with your actual model)
-def init_model():
-    """Initialize the Qwen model with camera control LoRAs"""
-    # This is a placeholder - replace with actual model initialization
-    try:
-        # pipe = StableDiffusionXLPipeline.from_pretrained(
-        #     "Qwen/Qwen2-VL-7B-Instruct",
-        #     torch_dtype=torch.float16,
-        #     device_map="auto"
-        # )
-        # return pipe
-        return None  # Placeholder
-    except:
-        return None
 # Camera parameter mappings
-AZIMUTH_MAP = {
-    0: "front view", 45: "front-right quarter view", 90: "right side view",
-    135: "back-right quarter view", 180: "back view", 225: "back-left quarter view",
-    270: "left side view", 315: "front-left quarter view"
 }
-ELEVATION_MAP = {
-    -30: "low-angle shot", 0: "eye-level shot", 30: "elevated shot", 60: "high-angle shot"
 }
-DISTANCE_MAP = {
-    0.6: "close-up", 1.0: "medium shot", 1.8: "wide shot"
 }
 def snap_to_nearest(value, steps):
-    """Snap value to nearest step."""
     return min(steps, key=lambda x: abs(x - value))
 def build_camera_prompt(azimuth, elevation, distance):
-    """Build camera prompt from parameters."""
     azimuth_steps = [0, 45, 90, 135, 180, 225, 270, 315]
     elevation_steps = [-30, 0, 30, 60]
     distance_steps = [0.6, 1.0, 1.8]
-    az_snap = snap_to_nearest(azimuth, azimuth_steps)
-    el_snap = snap_to_nearest(elevation, elevation_steps)
-    dist_snap = snap_to_nearest(distance, distance_steps)
-    az_name = AZIMUTH_MAP[az_snap]
-    el_name = ELEVATION_MAP[el_snap]
-    dist_name = DISTANCE_MAP[dist_snap]
-    return f"<sks> {az_name} {el_name} {dist_name}"
-def generate_new_view(input_image, azimuth, elevation, distance, seed, randomize_seed, guidance_scale, num_inference_steps, height, width):
-    """Generate new camera view (placeholder - replace with actual model inference)."""
-    if input_image is None:
-        return None, seed, build_camera_prompt(azimuth, elevation, distance)
-    try:
-        # Placeholder implementation - replace with actual model inference
-        prompt = build_camera_prompt(azimuth, elevation, distance)
-        # For now, just return the original image as a placeholder
-        # In real implementation, this would call your Qwen model with camera parameters
-        return input_image, seed, prompt
-    except Exception as e:
-        print(f"Generation error: {e}")
-        return None, seed, build_camera_prompt(azimuth, elevation, distance)
 def create_camera_control_app():
-    """Create the working camera control app."""
-    with gr.Blocks(title="Camera Control with Directional Arrows - WORKING VERSION") as demo:
         gr.Markdown("# 📸 Camera Control with Directional Arrows")
-        gr.Markdown("Upload an image and use arrows to control camera angles for 3D view generation")
         with gr.Row():
-            # Left column: Image upload and controls
             with gr.Column(scale=1):
                 image = gr.Image(label="Upload Image", type="pil", height=400)
-                # Camera parameter inputs (visible for debugging)
                 js_azimuth = gr.Textbox("0", visible=True, elem_id="js-azimuth", label="Azimuth")
                 js_elevation = gr.Textbox("0", visible=True, elem_id="js-elevation", label="Elevation")
                 js_distance = gr.Textbox("1.0", visible=True, elem_id="js-distance", label="Distance")
-                # Generation settings
-                with gr.Accordion("⚙️ Generation Settings", open=False):
-                    seed = gr.Slider(minimum=0, maximum=2147483647, step=1, value=42, label="Seed")
-                    randomize_seed = gr.Checkbox(True, label="Randomize seed")
-                    guidance_scale = gr.Slider(minimum=1, maximum=20, step=0.1, value=7.5, label="Guidance scale")
-                    num_inference_steps = gr.Slider(minimum=10, maximum=50, step=1, value=25, label="Number of inference steps")
-                    height = gr.Slider(minimum=256, maximum=1024, step=64, value=1024, label="Height")
-                    width = gr.Slider(minimum=256, maximum=1024, step=64, value=1024, label="Width")
                 prompt_display = gr.Textbox(
-                    label="Current Camera Prompt",
                     value="<sks> front view eye-level shot medium shot",
                     interactive=False
                 )
-            # Right column: Interactive image view
             with gr.Column(scale=1):
                 gr.Markdown("### 🎯 Interactive Image View")
-                gr.Markdown("*Upload an image, then hover to see controls and click arrows to generate new views*")
                 # Interactive HTML component using working pattern
                 result_display = gr.HTML(
-                    value="""
-                    <div style="width: 100%; height: 500px; background: #f8f8f8; border: 2px solid #e0e0e0; border-radius: 12px;
-                                position: relative; display: flex; align-items: center; justify-content: center;">
-                        <div style="text-align: center; color: #999;">
-                            <div style="font-size: 48px; margin-bottom: 10px;">📸</div>
                             <p>Upload an image on the left to begin</p>
-                            <p>Then hover to see camera controls</p>
                         </div>
                     </div>
-                    """,
                     elem_id="result-display"
                 )
-                # Debug output
-                debug_output = gr.Textbox(label="Debug Output", visible=True)
-    # Functions for handling interactions
-    def show_uploaded_image_with_arrows(uploaded_image):
-        """Show uploaded image with working arrow controls."""
-        if uploaded_image is None:
-            return gr.update(value="""
-            <div style="width: 100%; height: 500px; background: #f8f8f8; border: 2px solid #e0e0e0; border-radius: 12px;
-                        position: relative; display: flex; align-items: center; justify-content: center;">
-                <div style="text-align: center; color: #999;">
-                    <div style="font-size: 48px; margin-bottom: 10px;">📸</div>
-                    <p>Upload an image on the left to begin</p>
-                    <p>Then hover to see camera controls</p>
-                </div>
-            </div>
-            """)
-        # Convert to data URL
-        buffered = BytesIO()
-        uploaded_image.save(buffered, format="PNG")
-        img_str = base64.b64encode(buffered.getvalue()).decode()
-        data_url = f"data:image/png;base64,{img_str}"
-        # Return HTML with image and working arrow controls
-        return gr.update(value=f"""
-        <div style="width: 100%; height: 500px; background: #f8f8f8; border: 2px solid #e0e0e0; border-radius: 12px;
-                    position: relative; display: flex; align-items: center; justify-content: center;">
-            <!-- Uploaded image -->
-            <img src="{data_url}" style="max-width: 100%; max-height: 100%; object-fit: contain;">
-            <!-- Arrow controls with CSS hover effects and inline JavaScript -->
-            <div style="position: absolute; inset: 0; z-index: 10; opacity: 0; transition: opacity 0.3s ease;"
-                 onmouseover="this.style.opacity='1'" onmouseout="this.style.opacity='0'">
-                <!-- Left arrow (Azimuth -45°) -->
-                <button onclick="
-                    var az = parseInt(document.getElementById('js-azimuth').querySelector('input').value) - 45;
-                    if (az < 0) az += 360;
-                    document.getElementById('js-azimuth').querySelector('input').value = az;
-                    document.getElementById('js-azimuth').querySelector('input').dispatchEvent(new Event('input', {{bubbles: true}}));
-                    document.getElementById('status-az').textContent = az;
-                "
-                style="position: absolute; left: 20px; top: 50%; transform: translateY(-50%);
-                       width: 60px; height: 60px; background: rgba(0,255,136,0.9); border: none;
-                       border-radius: 50%; color: white; font-size: 24px; cursor: pointer; z-index: 11;
-                       box-shadow: 0 4px 12px rgba(0,0,0,0.3); transition: transform 0.2s;"
-                onmouseover="this.style.transform += ' scale(1.1)'"
-                onmouseout="this.style.transform = this.style.transform.replace(' scale(1.1)', '')"
-                title="Rotate Left (Azimuth -45°)">
-                    ←
-                </button>
-                <!-- Right arrow (Azimuth +45°) -->
-                <button onclick="
-                    var az = (parseInt(document.getElementById('js-azimuth').querySelector('input').value) + 45) % 360;
-                    document.getElementById('js-azimuth').querySelector('input').value = az;
-                    document.getElementById('js-azimuth').querySelector('input').dispatchEvent(new Event('input', {{bubbles: true}}));
-                    document.getElementById('status-az').textContent = az;
-                "
-                style="position: absolute; right: 20px; top: 50%; transform: translateY(-50%);
-                       width: 60px; height: 60px; background: rgba(0,255,136,0.9); border: none;
-                       border-radius: 50%; color: white; font-size: 24px; cursor: pointer; z-index: 11;
-                       box-shadow: 0 4px 12px rgba(0,0,0,0.3); transition: transform 0.2s;"
-                onmouseover="this.style.transform += ' scale(1.1)'"
-                onmouseout="this.style.transform = this.style.transform.replace(' scale(1.1)', '')"
-                title="Rotate Right (Azimuth +45°)">
-                    →
-                </button>
-                <!-- Up arrow (Elevation +30°) -->
-                <button onclick="
-                    var el = Math.min(60, parseInt(document.getElementById('js-elevation').querySelector('input').value) + 30);
-                    document.getElementById('js-elevation').querySelector('input').value = el;
-                    document.getElementById('js-elevation').querySelector('input').dispatchEvent(new Event('input', {{bubbles: true}}));
-                    document.getElementById('status-el').textContent = el;
-                "
-                style="position: absolute; top: 20px; left: 50%; transform: translateX(-50%);
-                       width: 60px; height: 60px; background: rgba(255,105,180,0.9); border: none;
-                       border-radius: 50%; color: white; font-size: 24px; cursor: pointer; z-index: 11;
-                       box-shadow: 0 4px 12px rgba(0,0,0,0.3); transition: transform 0.2s;"
-                onmouseover="this.style.transform += ' scale(1.1)'"
-                onmouseout="this.style.transform = this.style.transform.replace(' scale(1.1)', '')"
-                title="Look Up (Elevation +30°)">
-                    ↑
-                </button>
-                <!-- Down arrow (Elevation -30°) -->
-                <button onclick="
-                    var el = Math.max(-30, parseInt(document.getElementById('js-elevation').querySelector('input').value) - 30);
-                    document.getElementById('js-elevation').querySelector('input').value = el;
-                    document.getElementById('js-elevation').querySelector('input').dispatchEvent(new Event('input', {{bubbles: true}}));
-                    document.getElementById('status-el').textContent = el;
-                "
-                style="position: absolute; bottom: 80px; left: 50%; transform: translateX(-50%);
-                       width: 60px; height: 60px; background: rgba(255,105,180,0.9); border: none;
-                       border-radius: 50%; color: white; font-size: 24px; cursor: pointer; z-index: 11;
-                       box-shadow: 0 4px 12px rgba(0,0,0,0.3); transition: transform 0.2s;"
-                onmouseover="this.style.transform += ' scale(1.1)'"
-                onmouseout="this.style.transform = this.style.transform.replace(' scale(1.1)', '')"
-                title="Look Down (Elevation -30°)">
-                    ↓
-                </button>
-                <!-- Zoom controls -->
-                <div style="position: absolute; bottom: 20px; left: 50%; transform: translateX(-50%);
-                            display: flex; gap: 15px; z-index: 11;">
-                    <!-- Zoom out (Distance +0.4) -->
-                    <button onclick="
-                        var dist = Math.min(1.8, parseFloat(document.getElementById('js-distance').querySelector('input').value) + 0.4);
-                        document.getElementById('js-distance').querySelector('input').value = dist.toFixed(1);
-                        document.getElementById('js-distance').querySelector('input').dispatchEvent(new Event('input', {{bubbles: true}}));
-                        document.getElementById('status-dist').textContent = dist.toFixed(1);
-                    "
-                    style="width: 55px; height: 55px; background: rgba(255,165,0,0.9); border: none;
                            border-radius: 50%; color: white; font-size: 24px; cursor: pointer;
-                           box-shadow: 0 4px 12px rgba(0,0,0,0.3); transition: transform 0.2s;"
-                    onmouseover="this.style.transform = 'scale(1.1)'"
-                    onmouseout="this.style.transform = ''"
-                    title="Zoom Out (Distance +0.4)">
-                        −
                     </button>
-                    <!-- Zoom in (Distance -0.4) -->
-                    <button onclick="
-                        var dist = Math.max(0.6, parseFloat(document.getElementById('js-distance').querySelector('input').value) - 0.4);
-                        document.getElementById('js-distance').querySelector('input').value = dist.toFixed(1);
-                        document.getElementById('js-distance').querySelector('input').dispatchEvent(new Event('input', {{bubbles: true}}));
-                        document.getElementById('status-dist').textContent = dist.toFixed(1);
-                    "
-                    style="width: 55px; height: 55px; background: rgba(255,165,0,0.9); border: none;
                            border-radius: 50%; color: white; font-size: 24px; cursor: pointer;
-                           box-shadow: 0 4px 12px rgba(0,0,0,0.3); transition: transform 0.2s;"
-                    onmouseover="this.style.transform = 'scale(1.1)'"
-                    onmouseout="this.style.transform = ''"
-                    title="Zoom In (Distance -0.4)">
-                        +
                     </button>
-                </div>
-                <!-- Status display -->
-                <div style="position: absolute; top: 15px; right: 15px; background: rgba(0,0,0,0.85);
-                            color: white; padding: 10px 14px; border-radius: 8px; font-family: monospace;
-                            font-size: 13px; z-index: 11; box-shadow: 0 4px 12px rgba(0,0,0,0.4);">
-                    <div>Az: <span id="status-az">0</span>° | El: <span id="status-el">0</span>° | Dist: <span id="status-dist">1.0</span></div>
                 </div>
             </div>
-        </div>
-        """)
-    def handle_parameter_change(az, el, dist, input_image):
-        """Handle camera parameter changes and trigger generation."""
-        try:
-            azimuth = float(az)
-            elevation = float(el)
-            distance = float(dist)
-            # Build new prompt
-            prompt = build_camera_prompt(azimuth, elevation, distance)
-            # Generate new image (placeholder)
-            if input_image is not None:
-                # For now, just return the original image
-                # In real implementation, call your model here
-                new_image = input_image  # Placeholder
-                # Convert to data URL for display
-                buffered = BytesIO()
-                new_image.save(buffered, format="PNG")
-                img_str = base64.b64encode(buffered.getvalue()).decode()
-                data_url = f"data:image/png;base64,{img_str}"
-                # Update HTML with new image (keeping the same arrow structure)
-                return show_uploaded_image_with_arrows(new_image).value, prompt, f"Generated view: Az={azimuth}°, El={elevation}°, Dist={distance}"
-            return gr.update(), prompt, f"Parameters updated: Az={azimuth}°, El={elevation}°, Dist={distance}"
-        except Exception as e:
-            return gr.update(), f"Error: {str(e)}", f"Error processing parameters: {str(e)}"
-    # Set up event handlers
-    # Image upload handler
-    image.upload(
-        fn=show_uploaded_image_with_arrows,
-        inputs=[image],
-        outputs=[result_display]
-    )
-    # Parameter change handlers (triggered by arrow clicks)
-    for param_input in [js_azimuth, js_elevation, js_distance]:
-        param_input.change(
-            fn=handle_parameter_change,
-            inputs=[js_azimuth, js_elevation, js_distance, image],
-            outputs=[result_display, prompt_display, debug_output]
         )
     return demo
-if __name__ == "__main__":
     demo = create_camera_control_app()
     demo.launch()

+"""
+Camera Control App with Working Arrow Interface
+Complete version with Qwen model integration
+"""
 import gradio as gr
+import torch
+import numpy as np
+import random
+from PIL import Image
+import spaces
+from diffusers import DiffusionPipeline
 import base64
 from io import BytesIO
+# Model configuration
+device = "cuda" if torch.cuda.is_available() else "cpu"
+dtype = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else torch.float16
+MAX_SEED = np.iinfo(np.int32).max
+pipe = None
+def load_model():
+    """Load the Qwen diffusion model with camera control LoRAs."""
+    global pipe
+    if pipe is None:
+        pipe = DiffusionPipeline.from_pretrained(
+            "multimodalart/qwen-image-multiple-angles-3d-camera",
+            torch_dtype=dtype,
+        ).to(device)
+        pipe.load_lora_weights("multimodalart/qwen-image-multiple-angles-3d-camera", weight_name="lightning.safetensors")
+        pipe.fuse_lora(lora_scale=1.0)
+        pipe.load_lora_weights("multimodalart/qwen-image-multiple-angles-3d-camera", weight_name="multi_angles.safetensors", adapter_name="multi_angles")
+        pipe.set_adapters(["default", "multi_angles"], adapter_weights=[1.0, 1.0])
+    return pipe
 # Camera parameter mappings
+azimuth_mapping = {
+    0: "front view",
+    45: "front-right quarter view",
+    90: "right side view",
+    135: "back-right quarter view",
+    180: "back view",
+    225: "back-left quarter view",
+    270: "left side view",
+    315: "front-left quarter view"
 }
+elevation_mapping = {
+    -30: "low-angle shot",
+    0: "eye-level shot",
+    30: "elevated shot",
+    60: "high-angle shot"
 }
+distance_mapping = {
+    0.6: "close-up",
+    1.0: "medium shot",
+    1.8: "wide shot"
 }
 def snap_to_nearest(value, steps):
+    """Snap a value to the nearest step in a list."""
     return min(steps, key=lambda x: abs(x - value))
 def build_camera_prompt(azimuth, elevation, distance):
+    """Build camera prompt from numerical parameters."""
     azimuth_steps = [0, 45, 90, 135, 180, 225, 270, 315]
     elevation_steps = [-30, 0, 30, 60]
     distance_steps = [0.6, 1.0, 1.8]
+    azimuth_snapped = snap_to_nearest(azimuth, azimuth_steps)
+    elevation_snapped = snap_to_nearest(elevation, elevation_steps)
+    distance_snapped = snap_to_nearest(distance, distance_steps)
+    azimuth_name = azimuth_mapping[azimuth_snapped]
+    elevation_name = elevation_mapping[elevation_snapped]
+    distance_name = distance_mapping[distance_snapped]
+    return f"<sks> {azimuth_name} {elevation_name} {distance_name}"
+@spaces.GPU(duration=5)
+def infer_camera_edit(
+    image: Image.Image,
+    azimuth: float = 0.0,
+    elevation: float = 0.0,
+    distance: float = 1.0,
+    seed: int = 0,
+    randomize_seed: bool = True,
+    guidance_scale: float = 1.0,
+    num_inference_steps: int = 4,
+    height: int = 1024,
+    width: int = 1024,
+):
+    """Generate new camera view using Qwen model."""
+    prompt = build_camera_prompt(azimuth, elevation, distance)
+    print(f"Generated Prompt: {prompt}")
+    if randomize_seed:
+        seed = random.randint(0, MAX_SEED)
+    generator = torch.Generator(device=device).manual_seed(seed)
+    if image is None:
+        raise gr.Error("Please upload an image first.")
+    pil_image = image.convert("RGB") if isinstance(image, Image.Image) else Image.open(image).convert("RGB")
+    # Load model only when needed
+    current_pipe = load_model()
+    result = current_pipe(
+        image=[pil_image],
+        prompt=prompt,
+        height=height if height != 0 else None,
+        width=width if width != 0 else None,
+        num_inference_steps=num_inference_steps,
+        generator=generator,
+        guidance_scale=guidance_scale,
+        num_images_per_prompt=1,
+    ).images[0]
+    return result, seed, prompt
 def create_camera_control_app():
+    """Create the complete working camera control app."""
+    with gr.Blocks(title="Camera Control with Directional Arrows", theme=gr.themes.Soft()) as demo:
         gr.Markdown("# 📸 Camera Control with Directional Arrows")
+        gr.Markdown("Upload an image and use the directional arrows to control camera angles")
         with gr.Row():
+            # Left column: Input image and settings
             with gr.Column(scale=1):
                 image = gr.Image(label="Upload Image", type="pil", height=400)
+                # Camera parameter inputs (visible for debugging, can be hidden later)
                 js_azimuth = gr.Textbox("0", visible=True, elem_id="js-azimuth", label="Azimuth")
                 js_elevation = gr.Textbox("0", visible=True, elem_id="js-elevation", label="Elevation")
                 js_distance = gr.Textbox("1.0", visible=True, elem_id="js-distance", label="Distance")
                 prompt_display = gr.Textbox(
+                    label="Current Camera Prompt",
                     value="<sks> front view eye-level shot medium shot",
                     interactive=False
                 )
+                # Advanced settings
+                with gr.Accordion("⚙️ Advanced Settings", open=False):
+                    seed = gr.Slider(
+                        label="Seed",
+                        minimum=0,
+                        maximum=MAX_SEED,
+                        step=1,
+                        value=0,
+                    )
+                    randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
+                    with gr.Row():
+                        guidance_scale = gr.Slider(
+                            label="Guidance scale",
+                            minimum=0.1,
+                            maximum=2.0,
+                            step=0.1,
+                            value=1.0,
+                        )
+                        num_inference_steps = gr.Slider(
+                            label="Number of inference steps",
+                            minimum=1,
+                            maximum=8,
+                            step=1,
+                            value=4,
+                        )
+                    with gr.Row():
+                        height = gr.Slider(
+                            label="Height",
+                            minimum=256,
+                            maximum=1024,
+                            step=32,
+                            value=1024,
+                        )
+                        width = gr.Slider(
+                            label="Width",
+                            minimum=256,
+                            maximum=1024,
+                            step=32,
+                            value=1024,
+                        )
+            # Right column: Interactive image view
             with gr.Column(scale=1):
                 gr.Markdown("### 🎯 Interactive Image View")
+                gr.Markdown("*Upload an image, then hover to see camera controls and click arrows to generate new views*")
                 # Interactive HTML component using working pattern
                 result_display = gr.HTML(
+                    value=\"\"\"
+                    <div style=\"width: 100%; height: 500px; background: #f8f8f8; border: 2px solid #e0e0e0; border-radius: 12px;
+                                position: relative; display: flex; align-items: center; justify-content: center;\">
+                        <div style=\"text-align: center; color: #999;\">
+                            <div style=\"font-size: 48px; margin-bottom: 10px;\">📸</div>
                             <p>Upload an image on the left to begin</p>
+                            <p>Then hover here to see camera controls</p>
                         </div>
                     </div>
+                    \"\"\",
                     elem_id="result-display"
                 )
+        # ===== FUNCTIONS INSIDE BLOCKS CONTEXT =====
+        def update_dimensions_on_upload(input_image):
+            \"\"\"Compute recommended dimensions preserving aspect ratio.\"\"\"
+            if input_image is None:
+                return 1024, 1024
+            original_width, original_height = input_image.size
+            aspect_ratio = original_width / original_height
+            if aspect_ratio > 1:
+                # Landscape
+                new_width = 1024
+                new_height = round(1024 / aspect_ratio / 32) * 32
+            else:
+                # Portrait or square
+                new_height = 1024
+                new_width = round(1024 * aspect_ratio / 32) * 32
+            # Ensure minimum size
+            new_width = max(256, min(1024, new_width))
+            new_height = max(256, min(1024, new_height))
+            return new_width, new_height
+        def show_uploaded_image_with_arrows(uploaded_image):
+            \"\"\"Show uploaded image with working arrow controls.\"\"\"
+            if uploaded_image is None:
+                return gr.update(value=\"\"\"
+                <div style=\"width: 100%; height: 500px; background: #f8f8f8; border: 2px solid #e0e0e0; border-radius: 12px;
+                            position: relative; display: flex; align-items: center; justify-content: center;\">
+                    <div style=\"text-align: center; color: #999;\">
+                        <div style=\"font-size: 48px; margin-bottom: 10px;\">📸</div>
+                        <p>Upload an image on the left to begin</p>
+                        <p>Then hover here to see camera controls</p>
+                    </div>
+                </div>
+                \"\"\")
+            # Convert to data URL
+            buffered = BytesIO()
+            uploaded_image.save(buffered, format=\"PNG\")
+            img_str = base64.b64encode(buffered.getvalue()).decode()
+            data_url = f\"data:image/png;base64,{img_str}\"
+            return gr.update(value=f\"\"\"
+            <div style=\"width: 100%; height: 500px; background: #f8f8f8; border: 2px solid #e0e0e0; border-radius: 12px;
+                        position: relative; display: flex; align-items: center; justify-content: center;\"
+                 onmouseenter=\"this.querySelector('#arrow-controls').style.opacity='1'\"
+                 onmouseleave=\"this.querySelector('#arrow-controls').style.opacity='0'\">
+                <!-- Image -->
+                <img src=\"{data_url}\" style=\"max-width: 100%; max-height: 100%; object-fit: contain;\">
+                <!-- Arrow controls -->
+                <div id=\"arrow-controls\" style=\"position: absolute; inset: 0; opacity: 0; transition: opacity 0.3s ease; z-index: 10;\">
+                    <!-- Left Arrow (Azimuth -45°) -->
+                    <button onclick=\"
+                        var azInput = document.getElementById('js-azimuth').querySelector('input');
+                        var newAz = (parseInt(azInput.value) - 45 + 360) % 360;
+                        azInput.value = newAz;
+                        azInput.dispatchEvent(new Event('input', {{bubbles: true}}));
+                        document.getElementById('status-az').textContent = newAz;
+                    \"
+                    style=\"position: absolute; left: 20px; top: 50%; transform: translateY(-50%);
+                           width: 60px; height: 60px; background: rgba(0,255,136,0.95); border: none;
                            border-radius: 50%; color: white; font-size: 24px; cursor: pointer;
+                           box-shadow: 0 6px 20px rgba(0,0,0,0.4); transition: transform 0.2s;\"
+                    onmouseover=\"this.style.transform += ' scale(1.1)'\"
+                    onmouseout=\"this.style.transform = this.style.transform.replace(' scale(1.1)', '')\"
+                    title=\"Rotate Left (Azimuth -45°)\">
+                        ←
                     </button>
+                    <!-- Right Arrow (Azimuth +45°) -->
+                    <button onclick=\"
+                        var azInput = document.getElementById('js-azimuth').querySelector('input');
+                        var newAz = (parseInt(azInput.value) + 45) % 360;
+                        azInput.value = newAz;
+                        azInput.dispatchEvent(new Event('input', {{bubbles: true}}));
+                        document.getElementById('status-az').textContent = newAz;
+                    \"
+                    style=\"position: absolute; right: 20px; top: 50%; transform: translateY(-50%);
+                           width: 60px; height: 60px; background: rgba(0,255,136,0.95); border: none;
                            border-radius: 50%; color: white; font-size: 24px; cursor: pointer;
+                           box-shadow: 0 6px 20px rgba(0,0,0,0.4); transition: transform 0.2s;\"
+                    onmouseover=\"this.style.transform += ' scale(1.1)'\"
+                    onmouseout=\"this.style.transform = this.style.transform.replace(' scale(1.1)', '')\"
+                    title=\"Rotate Right (Azimuth +45°)\">
+                        →
                     </button>
+                    <!-- Up Arrow (Elevation +30°) -->
+                    <button onclick=\"
+                        var elInput = document.getElementById('js-elevation').querySelector('input');
+                        var newEl = Math.min(60, parseInt(elInput.value) + 30);
+                        elInput.value = newEl;
+                        elInput.dispatchEvent(new Event('input', {{bubbles: true}}));
+                        document.getElementById('status-el').textContent = newEl;
+                    \"
+                    style=\"position: absolute; top: 20px; left: 50%; transform: translateX(-50%);
+                           width: 60px; height: 60px; background: rgba(255,105,180,0.95); border: none;
+                           border-radius: 50%; color: white; font-size: 24px; cursor: pointer;
+                           box-shadow: 0 6px 20px rgba(0,0,0,0.4); transition: transform 0.2s;\"
+                    onmouseover=\"this.style.transform += ' scale(1.1)'\"
+                    onmouseout=\"this.style.transform = this.style.transform.replace(' scale(1.1)', '')\"
+                    title=\"Look Up (Elevation +30°)\">
+                        ↑
+                    </button>
+                    <!-- Down Arrow (Elevation -30°) -->
+                    <button onclick=\"
+                        var elInput = document.getElementById('js-elevation').querySelector('input');
+                        var newEl = Math.max(-30, parseInt(elInput.value) - 30);
+                        elInput.value = newEl;
+                        elInput.dispatchEvent(new Event('input', {{bubbles: true}}));
+                        document.getElementById('status-el').textContent = newEl;
+                    \"
+                    style=\"position: absolute; bottom: 80px; left: 50%; transform: translateX(-50%);
+                           width: 60px; height: 60px; background: rgba(255,105,180,0.95); border: none;
+                           border-radius: 50%; color: white; font-size: 24px; cursor: pointer;
+                           box-shadow: 0 6px 20px rgba(0,0,0,0.4); transition: transform 0.2s;\"
+                    onmouseover=\"this.style.transform += ' scale(1.1)'\"
+                    onmouseout=\"this.style.transform = this.style.transform.replace(' scale(1.1)', '')\"
+                    title=\"Look Down (Elevation -30°)\">
+                        ↓
+                    </button>
+                    <!-- Zoom Controls -->
+                    <div style=\"position: absolute; bottom: 20px; left: 50%; transform: translateX(-50%);
+                                display: flex; gap: 15px;\">
+                        <!-- Zoom Out -->
+                        <button onclick=\"
+                            var distInput = document.getElementById('js-distance').querySelector('input');
+                            var newDist = Math.min(1.8, parseFloat(distInput.value) + 0.4);
+                            distInput.value = newDist.toFixed(1);
+                            distInput.dispatchEvent(new Event('input', {{bubbles: true}}));
+                            document.getElementById('status-dist').textContent = newDist.toFixed(1);
+                        \"
+                        style=\"width: 55px; height: 55px; background: rgba(255,165,0,0.95); border: none;
+                               border-radius: 50%; color: white; font-size: 28px; cursor: pointer;
+                               box-shadow: 0 6px 20px rgba(0,0,0,0.4); transition: transform 0.2s;\"
+                        onmouseover=\"this.style.transform = 'scale(1.1)'\"
+                        onmouseout=\"this.style.transform = ''\"
+                        title=\"Zoom Out (Distance +0.4)\">
+                            −
+                        </button>
+                        <!-- Zoom In -->
+                        <button onclick=\"
+                            var distInput = document.getElementById('js-distance').querySelector('input');
+                            var newDist = Math.max(0.6, parseFloat(distInput.value) - 0.4);
+                            distInput.value = newDist.toFixed(1);
+                            distInput.dispatchEvent(new Event('input', {{bubbles: true}}));
+                            document.getElementById('status-dist').textContent = newDist.toFixed(1);
+                        \"
+                        style=\"width: 55px; height: 55px; background: rgba(255,165,0,0.95); border: none;
+                               border-radius: 50%; color: white; font-size: 24px; cursor: pointer;
+                               box-shadow: 0 6px 20px rgba(0,0,0,0.4); transition: transform 0.2s;\"
+                        onmouseover=\"this.style.transform = 'scale(1.1)'\"
+                        onmouseout=\"this.style.transform = ''\"
+                        title=\"Zoom In (Distance -0.4)\">
+                            +
+                        </button>
+                    </div>
+                    <!-- Status Display -->
+                    <div style=\"position: absolute; top: 15px; right: 15px; background: rgba(0,0,0,0.9);
+                                color: white; padding: 12px 16px; border-radius: 10px; font-family: monospace;
+                                font-size: 14px; box-shadow: 0 6px 20px rgba(0,0,0,0.4); min-width: 200px;\">
+                        <div style=\"margin-bottom: 4px;\">Az: <span id=\"status-az\">0</span>° | El: <span id=\"status-el\">0</span>° | Dist: <span id=\"status-dist\">1.0</span></div>
+                        <div id=\"status-prompt\" style=\"color: #00ff88; font-size: 12px; line-height: 1.3;\">
+                            <sks> front view eye-level shot medium shot
+                        </div>
+                    </div>
                 </div>
             </div>
+            \"\"\")
+        def handle_parameter_change(az, el, dist, input_image, seed_val, randomize_seed_val, guidance_val, steps_val, h_val, w_val):
+            \"\"\"Handle camera parameter changes and generate new view.\"\"\"
+            try:
+                azimuth = float(az)
+                elevation = float(el)
+                distance = float(dist)
+                # Build prompt
+                prompt = build_camera_prompt(azimuth, elevation, distance)
+                if input_image is not None:
+                    # Generate new image using the actual Qwen model
+                    generated_image, final_seed, final_prompt = infer_camera_edit(
+                        image=input_image,
+                        azimuth=azimuth,
+                        elevation=elevation,
+                        distance=distance,
+                        seed=seed_val,
+                        randomize_seed=randomize_seed_val,
+                        guidance_scale=guidance_val,
+                        num_inference_steps=steps_val,
+                        height=h_val,
+                        width=w_val
+                    )
+                    # Show generated image with arrows
+                    html_result = show_uploaded_image_with_arrows(generated_image)
+                    return html_result.value, final_seed, final_prompt
+                return gr.update(), seed_val, prompt
+            except Exception as e:
+                print(f\"Generation error: {e}\")
+                import traceback
+                traceback.print_exc()
+                raise gr.Error(f\"Generation failed: {str(e)}\")
+        # ===== EVENT HANDLERS INSIDE BLOCKS CONTEXT =====
+        # Auto-update dimensions when image is uploaded
+        image.upload(
+            fn=update_dimensions_on_upload,
+            inputs=[image],
+            outputs=[width, height]
+        )
+        # Show uploaded image immediately
+        image.upload(
+            fn=show_uploaded_image_with_arrows,
+            inputs=[image],
+            outputs=[result_display]
         )
+        # Auto-generation handler triggered by input changes
+        def auto_generate_on_change(js_az, js_el, js_dist, input_image, seed_val, randomize_seed_val, guidance_val, steps_val, h_val, w_val):
+            \"\"\"Auto-generate when camera parameters change from arrow clicks.\"\"\"
+            if input_image is None:
+                return gr.update(), seed_val, \"<sks> front view eye-level shot medium shot\"
+            return handle_parameter_change(js_az, js_el, js_dist, input_image, seed_val, randomize_seed_val, guidance_val, steps_val, h_val, w_val)
+        # Set up auto-generation on parameter changes
+        for input_component in [js_azimuth, js_elevation, js_distance]:
+            input_component.change(
+                fn=auto_generate_on_change,
+                inputs=[js_azimuth, js_elevation, js_distance, image, seed, randomize_seed, guidance_scale, num_inference_steps, height, width],
+                outputs=[result_display, seed, prompt_display]
+            )
     return demo
+if __name__ == \"__main__\":
     demo = create_camera_control_app()
     demo.launch()