ysharma HF Staff commited on
Commit
f0d6a54
·
verified ·
1 Parent(s): 2f644fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +392 -256
app.py CHANGED
@@ -1,328 +1,464 @@
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
2
  import base64
3
  from io import BytesIO
4
- from PIL import Image
5
- import torch
6
- from diffusers import StableDiffusionXLPipeline
7
 
8
- # Initialize the model (placeholder - replace with your actual model)
9
- def init_model():
10
- """Initialize the Qwen model with camera control LoRAs"""
11
- # This is a placeholder - replace with actual model initialization
12
- try:
13
- # pipe = StableDiffusionXLPipeline.from_pretrained(
14
- # "Qwen/Qwen2-VL-7B-Instruct",
15
- # torch_dtype=torch.float16,
16
- # device_map="auto"
17
- # )
18
- # return pipe
19
- return None # Placeholder
20
- except:
21
- return None
 
 
 
 
 
 
22
 
23
  # Camera parameter mappings
24
- AZIMUTH_MAP = {
25
- 0: "front view", 45: "front-right quarter view", 90: "right side view",
26
- 135: "back-right quarter view", 180: "back view", 225: "back-left quarter view",
27
- 270: "left side view", 315: "front-left quarter view"
 
 
 
 
 
28
  }
29
- ELEVATION_MAP = {
30
- -30: "low-angle shot", 0: "eye-level shot", 30: "elevated shot", 60: "high-angle shot"
 
 
 
 
31
  }
32
- DISTANCE_MAP = {
33
- 0.6: "close-up", 1.0: "medium shot", 1.8: "wide shot"
 
 
 
34
  }
35
 
36
  def snap_to_nearest(value, steps):
37
- """Snap value to nearest step."""
38
  return min(steps, key=lambda x: abs(x - value))
39
 
40
  def build_camera_prompt(azimuth, elevation, distance):
41
- """Build camera prompt from parameters."""
42
  azimuth_steps = [0, 45, 90, 135, 180, 225, 270, 315]
43
  elevation_steps = [-30, 0, 30, 60]
44
  distance_steps = [0.6, 1.0, 1.8]
45
 
46
- az_snap = snap_to_nearest(azimuth, azimuth_steps)
47
- el_snap = snap_to_nearest(elevation, elevation_steps)
48
- dist_snap = snap_to_nearest(distance, distance_steps)
49
 
50
- az_name = AZIMUTH_MAP[az_snap]
51
- el_name = ELEVATION_MAP[el_snap]
52
- dist_name = DISTANCE_MAP[dist_snap]
53
 
54
- return f"<sks> {az_name} {el_name} {dist_name}"
55
 
56
- def generate_new_view(input_image, azimuth, elevation, distance, seed, randomize_seed, guidance_scale, num_inference_steps, height, width):
57
- """Generate new camera view (placeholder - replace with actual model inference)."""
58
- if input_image is None:
59
- return None, seed, build_camera_prompt(azimuth, elevation, distance)
60
-
61
- try:
62
- # Placeholder implementation - replace with actual model inference
63
- prompt = build_camera_prompt(azimuth, elevation, distance)
64
-
65
- # For now, just return the original image as a placeholder
66
- # In real implementation, this would call your Qwen model with camera parameters
67
- return input_image, seed, prompt
68
-
69
- except Exception as e:
70
- print(f"Generation error: {e}")
71
- return None, seed, build_camera_prompt(azimuth, elevation, distance)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  def create_camera_control_app():
74
- """Create the working camera control app."""
75
 
76
- with gr.Blocks(title="Camera Control with Directional Arrows - WORKING VERSION") as demo:
77
  gr.Markdown("# 📸 Camera Control with Directional Arrows")
78
- gr.Markdown("Upload an image and use arrows to control camera angles for 3D view generation")
79
 
80
  with gr.Row():
81
- # Left column: Image upload and controls
82
  with gr.Column(scale=1):
83
  image = gr.Image(label="Upload Image", type="pil", height=400)
84
 
85
- # Camera parameter inputs (visible for debugging)
86
  js_azimuth = gr.Textbox("0", visible=True, elem_id="js-azimuth", label="Azimuth")
87
  js_elevation = gr.Textbox("0", visible=True, elem_id="js-elevation", label="Elevation")
88
  js_distance = gr.Textbox("1.0", visible=True, elem_id="js-distance", label="Distance")
89
 
90
- # Generation settings
91
- with gr.Accordion("⚙️ Generation Settings", open=False):
92
- seed = gr.Slider(minimum=0, maximum=2147483647, step=1, value=42, label="Seed")
93
- randomize_seed = gr.Checkbox(True, label="Randomize seed")
94
- guidance_scale = gr.Slider(minimum=1, maximum=20, step=0.1, value=7.5, label="Guidance scale")
95
- num_inference_steps = gr.Slider(minimum=10, maximum=50, step=1, value=25, label="Number of inference steps")
96
- height = gr.Slider(minimum=256, maximum=1024, step=64, value=1024, label="Height")
97
- width = gr.Slider(minimum=256, maximum=1024, step=64, value=1024, label="Width")
98
-
99
  prompt_display = gr.Textbox(
100
- label="Current Camera Prompt",
101
  value="<sks> front view eye-level shot medium shot",
102
  interactive=False
103
  )
104
 
105
- # Right column: Interactive image view
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  with gr.Column(scale=1):
107
  gr.Markdown("### 🎯 Interactive Image View")
108
- gr.Markdown("*Upload an image, then hover to see controls and click arrows to generate new views*")
109
 
110
  # Interactive HTML component using working pattern
111
  result_display = gr.HTML(
112
- value="""
113
- <div style="width: 100%; height: 500px; background: #f8f8f8; border: 2px solid #e0e0e0; border-radius: 12px;
114
- position: relative; display: flex; align-items: center; justify-content: center;">
115
- <div style="text-align: center; color: #999;">
116
- <div style="font-size: 48px; margin-bottom: 10px;">📸</div>
117
  <p>Upload an image on the left to begin</p>
118
- <p>Then hover to see camera controls</p>
119
  </div>
120
  </div>
121
- """,
122
  elem_id="result-display"
123
  )
124
 
125
- # Debug output
126
- debug_output = gr.Textbox(label="Debug Output", visible=True)
127
-
128
- # Functions for handling interactions
129
- def show_uploaded_image_with_arrows(uploaded_image):
130
- """Show uploaded image with working arrow controls."""
131
- if uploaded_image is None:
132
- return gr.update(value="""
133
- <div style="width: 100%; height: 500px; background: #f8f8f8; border: 2px solid #e0e0e0; border-radius: 12px;
134
- position: relative; display: flex; align-items: center; justify-content: center;">
135
- <div style="text-align: center; color: #999;">
136
- <div style="font-size: 48px; margin-bottom: 10px;">📸</div>
137
- <p>Upload an image on the left to begin</p>
138
- <p>Then hover to see camera controls</p>
139
- </div>
140
- </div>
141
- """)
142
 
143
- # Convert to data URL
144
- buffered = BytesIO()
145
- uploaded_image.save(buffered, format="PNG")
146
- img_str = base64.b64encode(buffered.getvalue()).decode()
147
- data_url = f"data:image/png;base64,{img_str}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
- # Return HTML with image and working arrow controls
150
- return gr.update(value=f"""
151
- <div style="width: 100%; height: 500px; background: #f8f8f8; border: 2px solid #e0e0e0; border-radius: 12px;
152
- position: relative; display: flex; align-items: center; justify-content: center;">
 
 
 
 
 
 
 
 
 
153
 
154
- <!-- Uploaded image -->
155
- <img src="{data_url}" style="max-width: 100%; max-height: 100%; object-fit: contain;">
 
 
 
156
 
157
- <!-- Arrow controls with CSS hover effects and inline JavaScript -->
158
- <div style="position: absolute; inset: 0; z-index: 10; opacity: 0; transition: opacity 0.3s ease;"
159
- onmouseover="this.style.opacity='1'" onmouseout="this.style.opacity='0'">
160
-
161
- <!-- Left arrow (Azimuth -45°) -->
162
- <button onclick="
163
- var az = parseInt(document.getElementById('js-azimuth').querySelector('input').value) - 45;
164
- if (az < 0) az += 360;
165
- document.getElementById('js-azimuth').querySelector('input').value = az;
166
- document.getElementById('js-azimuth').querySelector('input').dispatchEvent(new Event('input', {{bubbles: true}}));
167
- document.getElementById('status-az').textContent = az;
168
- "
169
- style="position: absolute; left: 20px; top: 50%; transform: translateY(-50%);
170
- width: 60px; height: 60px; background: rgba(0,255,136,0.9); border: none;
171
- border-radius: 50%; color: white; font-size: 24px; cursor: pointer; z-index: 11;
172
- box-shadow: 0 4px 12px rgba(0,0,0,0.3); transition: transform 0.2s;"
173
- onmouseover="this.style.transform += ' scale(1.1)'"
174
- onmouseout="this.style.transform = this.style.transform.replace(' scale(1.1)', '')"
175
- title="Rotate Left (Azimuth -45°)">
176
-
177
- </button>
178
-
179
- <!-- Right arrow (Azimuth +45°) -->
180
- <button onclick="
181
- var az = (parseInt(document.getElementById('js-azimuth').querySelector('input').value) + 45) % 360;
182
- document.getElementById('js-azimuth').querySelector('input').value = az;
183
- document.getElementById('js-azimuth').querySelector('input').dispatchEvent(new Event('input', {{bubbles: true}}));
184
- document.getElementById('status-az').textContent = az;
185
- "
186
- style="position: absolute; right: 20px; top: 50%; transform: translateY(-50%);
187
- width: 60px; height: 60px; background: rgba(0,255,136,0.9); border: none;
188
- border-radius: 50%; color: white; font-size: 24px; cursor: pointer; z-index: 11;
189
- box-shadow: 0 4px 12px rgba(0,0,0,0.3); transition: transform 0.2s;"
190
- onmouseover="this.style.transform += ' scale(1.1)'"
191
- onmouseout="this.style.transform = this.style.transform.replace(' scale(1.1)', '')"
192
- title="Rotate Right (Azimuth +45°)">
193
-
194
- </button>
195
-
196
- <!-- Up arrow (Elevation +30°) -->
197
- <button onclick="
198
- var el = Math.min(60, parseInt(document.getElementById('js-elevation').querySelector('input').value) + 30);
199
- document.getElementById('js-elevation').querySelector('input').value = el;
200
- document.getElementById('js-elevation').querySelector('input').dispatchEvent(new Event('input', {{bubbles: true}}));
201
- document.getElementById('status-el').textContent = el;
202
- "
203
- style="position: absolute; top: 20px; left: 50%; transform: translateX(-50%);
204
- width: 60px; height: 60px; background: rgba(255,105,180,0.9); border: none;
205
- border-radius: 50%; color: white; font-size: 24px; cursor: pointer; z-index: 11;
206
- box-shadow: 0 4px 12px rgba(0,0,0,0.3); transition: transform 0.2s;"
207
- onmouseover="this.style.transform += ' scale(1.1)'"
208
- onmouseout="this.style.transform = this.style.transform.replace(' scale(1.1)', '')"
209
- title="Look Up (Elevation +30°)">
210
-
211
- </button>
212
 
213
- <!-- Down arrow (Elevation -30°) -->
214
- <button onclick="
215
- var el = Math.max(-30, parseInt(document.getElementById('js-elevation').querySelector('input').value) - 30);
216
- document.getElementById('js-elevation').querySelector('input').value = el;
217
- document.getElementById('js-elevation').querySelector('input').dispatchEvent(new Event('input', {{bubbles: true}}));
218
- document.getElementById('status-el').textContent = el;
219
- "
220
- style="position: absolute; bottom: 80px; left: 50%; transform: translateX(-50%);
221
- width: 60px; height: 60px; background: rgba(255,105,180,0.9); border: none;
222
- border-radius: 50%; color: white; font-size: 24px; cursor: pointer; z-index: 11;
223
- box-shadow: 0 4px 12px rgba(0,0,0,0.3); transition: transform 0.2s;"
224
- onmouseover="this.style.transform += ' scale(1.1)'"
225
- onmouseout="this.style.transform = this.style.transform.replace(' scale(1.1)', '')"
226
- title="Look Down (Elevation -30°)">
227
-
228
- </button>
229
 
230
- <!-- Zoom controls -->
231
- <div style="position: absolute; bottom: 20px; left: 50%; transform: translateX(-50%);
232
- display: flex; gap: 15px; z-index: 11;">
233
 
234
- <!-- Zoom out (Distance +0.4) -->
235
- <button onclick="
236
- var dist = Math.min(1.8, parseFloat(document.getElementById('js-distance').querySelector('input').value) + 0.4);
237
- document.getElementById('js-distance').querySelector('input').value = dist.toFixed(1);
238
- document.getElementById('js-distance').querySelector('input').dispatchEvent(new Event('input', {{bubbles: true}}));
239
- document.getElementById('status-dist').textContent = dist.toFixed(1);
240
- "
241
- style="width: 55px; height: 55px; background: rgba(255,165,0,0.9); border: none;
 
 
242
  border-radius: 50%; color: white; font-size: 24px; cursor: pointer;
243
- box-shadow: 0 4px 12px rgba(0,0,0,0.3); transition: transform 0.2s;"
244
- onmouseover="this.style.transform = 'scale(1.1)'"
245
- onmouseout="this.style.transform = ''"
246
- title="Zoom Out (Distance +0.4)">
247
-
248
  </button>
249
 
250
- <!-- Zoom in (Distance -0.4) -->
251
- <button onclick="
252
- var dist = Math.max(0.6, parseFloat(document.getElementById('js-distance').querySelector('input').value) - 0.4);
253
- document.getElementById('js-distance').querySelector('input').value = dist.toFixed(1);
254
- document.getElementById('js-distance').querySelector('input').dispatchEvent(new Event('input', {{bubbles: true}}));
255
- document.getElementById('status-dist').textContent = dist.toFixed(1);
256
- "
257
- style="width: 55px; height: 55px; background: rgba(255,165,0,0.9); border: none;
 
 
258
  border-radius: 50%; color: white; font-size: 24px; cursor: pointer;
259
- box-shadow: 0 4px 12px rgba(0,0,0,0.3); transition: transform 0.2s;"
260
- onmouseover="this.style.transform = 'scale(1.1)'"
261
- onmouseout="this.style.transform = ''"
262
- title="Zoom In (Distance -0.4)">
263
- +
264
  </button>
265
- </div>
266
-
267
- <!-- Status display -->
268
- <div style="position: absolute; top: 15px; right: 15px; background: rgba(0,0,0,0.85);
269
- color: white; padding: 10px 14px; border-radius: 8px; font-family: monospace;
270
- font-size: 13px; z-index: 11; box-shadow: 0 4px 12px rgba(0,0,0,0.4);">
271
- <div>Az: <span id="status-az">0</span>° | El: <span id="status-el">0</span>° | Dist: <span id="status-dist">1.0</span></div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
272
  </div>
273
  </div>
274
- </div>
275
- """)
276
-
277
- def handle_parameter_change(az, el, dist, input_image):
278
- """Handle camera parameter changes and trigger generation."""
279
- try:
280
- azimuth = float(az)
281
- elevation = float(el)
282
- distance = float(dist)
283
-
284
- # Build new prompt
285
- prompt = build_camera_prompt(azimuth, elevation, distance)
286
-
287
- # Generate new image (placeholder)
288
- if input_image is not None:
289
- # For now, just return the original image
290
- # In real implementation, call your model here
291
- new_image = input_image # Placeholder
292
 
293
- # Convert to data URL for display
294
- buffered = BytesIO()
295
- new_image.save(buffered, format="PNG")
296
- img_str = base64.b64encode(buffered.getvalue()).decode()
297
- data_url = f"data:image/png;base64,{img_str}"
298
 
299
- # Update HTML with new image (keeping the same arrow structure)
300
- return show_uploaded_image_with_arrows(new_image).value, prompt, f"Generated view: Az={azimuth}°, El={elevation}°, Dist={distance}"
301
-
302
- return gr.update(), prompt, f"Parameters updated: Az={azimuth}°, El={elevation}°, Dist={distance}"
303
-
304
- except Exception as e:
305
- return gr.update(), f"Error: {str(e)}", f"Error processing parameters: {str(e)}"
306
-
307
- # Set up event handlers
308
-
309
- # Image upload handler
310
- image.upload(
311
- fn=show_uploaded_image_with_arrows,
312
- inputs=[image],
313
- outputs=[result_display]
314
- )
315
-
316
- # Parameter change handlers (triggered by arrow clicks)
317
- for param_input in [js_azimuth, js_elevation, js_distance]:
318
- param_input.change(
319
- fn=handle_parameter_change,
320
- inputs=[js_azimuth, js_elevation, js_distance, image],
321
- outputs=[result_display, prompt_display, debug_output]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
322
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
 
324
  return demo
325
 
326
- if __name__ == "__main__":
327
  demo = create_camera_control_app()
328
  demo.launch()
 
1
+ """
2
+ Camera Control App with Working Arrow Interface
3
+ Complete version with Qwen model integration
4
+ """
5
+
6
  import gradio as gr
7
+ import torch
8
+ import numpy as np
9
+ import random
10
+ from PIL import Image
11
+ import spaces
12
+ from diffusers import DiffusionPipeline
13
  import base64
14
  from io import BytesIO
 
 
 
15
 
16
+ # Model configuration
17
+ device = "cuda" if torch.cuda.is_available() else "cpu"
18
+ dtype = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else torch.float16
19
+
20
+ MAX_SEED = np.iinfo(np.int32).max
21
+ pipe = None
22
+
23
+ def load_model():
24
+ """Load the Qwen diffusion model with camera control LoRAs."""
25
+ global pipe
26
+ if pipe is None:
27
+ pipe = DiffusionPipeline.from_pretrained(
28
+ "multimodalart/qwen-image-multiple-angles-3d-camera",
29
+ torch_dtype=dtype,
30
+ ).to(device)
31
+ pipe.load_lora_weights("multimodalart/qwen-image-multiple-angles-3d-camera", weight_name="lightning.safetensors")
32
+ pipe.fuse_lora(lora_scale=1.0)
33
+ pipe.load_lora_weights("multimodalart/qwen-image-multiple-angles-3d-camera", weight_name="multi_angles.safetensors", adapter_name="multi_angles")
34
+ pipe.set_adapters(["default", "multi_angles"], adapter_weights=[1.0, 1.0])
35
+ return pipe
36
 
37
  # Camera parameter mappings
38
+ azimuth_mapping = {
39
+ 0: "front view",
40
+ 45: "front-right quarter view",
41
+ 90: "right side view",
42
+ 135: "back-right quarter view",
43
+ 180: "back view",
44
+ 225: "back-left quarter view",
45
+ 270: "left side view",
46
+ 315: "front-left quarter view"
47
  }
48
+
49
+ elevation_mapping = {
50
+ -30: "low-angle shot",
51
+ 0: "eye-level shot",
52
+ 30: "elevated shot",
53
+ 60: "high-angle shot"
54
  }
55
+
56
+ distance_mapping = {
57
+ 0.6: "close-up",
58
+ 1.0: "medium shot",
59
+ 1.8: "wide shot"
60
  }
61
 
62
  def snap_to_nearest(value, steps):
63
+ """Snap a value to the nearest step in a list."""
64
  return min(steps, key=lambda x: abs(x - value))
65
 
66
  def build_camera_prompt(azimuth, elevation, distance):
67
+ """Build camera prompt from numerical parameters."""
68
  azimuth_steps = [0, 45, 90, 135, 180, 225, 270, 315]
69
  elevation_steps = [-30, 0, 30, 60]
70
  distance_steps = [0.6, 1.0, 1.8]
71
 
72
+ azimuth_snapped = snap_to_nearest(azimuth, azimuth_steps)
73
+ elevation_snapped = snap_to_nearest(elevation, elevation_steps)
74
+ distance_snapped = snap_to_nearest(distance, distance_steps)
75
 
76
+ azimuth_name = azimuth_mapping[azimuth_snapped]
77
+ elevation_name = elevation_mapping[elevation_snapped]
78
+ distance_name = distance_mapping[distance_snapped]
79
 
80
+ return f"<sks> {azimuth_name} {elevation_name} {distance_name}"
81
 
82
+ @spaces.GPU(duration=5)
83
+ def infer_camera_edit(
84
+ image: Image.Image,
85
+ azimuth: float = 0.0,
86
+ elevation: float = 0.0,
87
+ distance: float = 1.0,
88
+ seed: int = 0,
89
+ randomize_seed: bool = True,
90
+ guidance_scale: float = 1.0,
91
+ num_inference_steps: int = 4,
92
+ height: int = 1024,
93
+ width: int = 1024,
94
+ ):
95
+ """Generate new camera view using Qwen model."""
96
+ prompt = build_camera_prompt(azimuth, elevation, distance)
97
+ print(f"Generated Prompt: {prompt}")
98
+
99
+ if randomize_seed:
100
+ seed = random.randint(0, MAX_SEED)
101
+ generator = torch.Generator(device=device).manual_seed(seed)
102
+
103
+ if image is None:
104
+ raise gr.Error("Please upload an image first.")
105
+
106
+ pil_image = image.convert("RGB") if isinstance(image, Image.Image) else Image.open(image).convert("RGB")
107
+
108
+ # Load model only when needed
109
+ current_pipe = load_model()
110
+
111
+ result = current_pipe(
112
+ image=[pil_image],
113
+ prompt=prompt,
114
+ height=height if height != 0 else None,
115
+ width=width if width != 0 else None,
116
+ num_inference_steps=num_inference_steps,
117
+ generator=generator,
118
+ guidance_scale=guidance_scale,
119
+ num_images_per_prompt=1,
120
+ ).images[0]
121
+
122
+ return result, seed, prompt
123
 
124
  def create_camera_control_app():
125
+ """Create the complete working camera control app."""
126
 
127
+ with gr.Blocks(title="Camera Control with Directional Arrows", theme=gr.themes.Soft()) as demo:
128
  gr.Markdown("# 📸 Camera Control with Directional Arrows")
129
+ gr.Markdown("Upload an image and use the directional arrows to control camera angles")
130
 
131
  with gr.Row():
132
+ # Left column: Input image and settings
133
  with gr.Column(scale=1):
134
  image = gr.Image(label="Upload Image", type="pil", height=400)
135
 
136
+ # Camera parameter inputs (visible for debugging, can be hidden later)
137
  js_azimuth = gr.Textbox("0", visible=True, elem_id="js-azimuth", label="Azimuth")
138
  js_elevation = gr.Textbox("0", visible=True, elem_id="js-elevation", label="Elevation")
139
  js_distance = gr.Textbox("1.0", visible=True, elem_id="js-distance", label="Distance")
140
 
 
 
 
 
 
 
 
 
 
141
  prompt_display = gr.Textbox(
142
+ label="Current Camera Prompt",
143
  value="<sks> front view eye-level shot medium shot",
144
  interactive=False
145
  )
146
 
147
+ # Advanced settings
148
+ with gr.Accordion("⚙️ Advanced Settings", open=False):
149
+ seed = gr.Slider(
150
+ label="Seed",
151
+ minimum=0,
152
+ maximum=MAX_SEED,
153
+ step=1,
154
+ value=0,
155
+ )
156
+ randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
157
+
158
+ with gr.Row():
159
+ guidance_scale = gr.Slider(
160
+ label="Guidance scale",
161
+ minimum=0.1,
162
+ maximum=2.0,
163
+ step=0.1,
164
+ value=1.0,
165
+ )
166
+ num_inference_steps = gr.Slider(
167
+ label="Number of inference steps",
168
+ minimum=1,
169
+ maximum=8,
170
+ step=1,
171
+ value=4,
172
+ )
173
+
174
+ with gr.Row():
175
+ height = gr.Slider(
176
+ label="Height",
177
+ minimum=256,
178
+ maximum=1024,
179
+ step=32,
180
+ value=1024,
181
+ )
182
+ width = gr.Slider(
183
+ label="Width",
184
+ minimum=256,
185
+ maximum=1024,
186
+ step=32,
187
+ value=1024,
188
+ )
189
+
190
+ # Right column: Interactive image view
191
  with gr.Column(scale=1):
192
  gr.Markdown("### 🎯 Interactive Image View")
193
+ gr.Markdown("*Upload an image, then hover to see camera controls and click arrows to generate new views*")
194
 
195
  # Interactive HTML component using working pattern
196
  result_display = gr.HTML(
197
+ value=\"\"\"
198
+ <div style=\"width: 100%; height: 500px; background: #f8f8f8; border: 2px solid #e0e0e0; border-radius: 12px;
199
+ position: relative; display: flex; align-items: center; justify-content: center;\">
200
+ <div style=\"text-align: center; color: #999;\">
201
+ <div style=\"font-size: 48px; margin-bottom: 10px;\">📸</div>
202
  <p>Upload an image on the left to begin</p>
203
+ <p>Then hover here to see camera controls</p>
204
  </div>
205
  </div>
206
+ \"\"\",
207
  elem_id="result-display"
208
  )
209
 
210
+ # ===== FUNCTIONS INSIDE BLOCKS CONTEXT =====
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
 
212
+ def update_dimensions_on_upload(input_image):
213
+ \"\"\"Compute recommended dimensions preserving aspect ratio.\"\"\"
214
+ if input_image is None:
215
+ return 1024, 1024
216
+
217
+ original_width, original_height = input_image.size
218
+ aspect_ratio = original_width / original_height
219
+
220
+ if aspect_ratio > 1:
221
+ # Landscape
222
+ new_width = 1024
223
+ new_height = round(1024 / aspect_ratio / 32) * 32
224
+ else:
225
+ # Portrait or square
226
+ new_height = 1024
227
+ new_width = round(1024 * aspect_ratio / 32) * 32
228
+
229
+ # Ensure minimum size
230
+ new_width = max(256, min(1024, new_width))
231
+ new_height = max(256, min(1024, new_height))
232
+
233
+ return new_width, new_height
234
 
235
+ def show_uploaded_image_with_arrows(uploaded_image):
236
+ \"\"\"Show uploaded image with working arrow controls.\"\"\"
237
+ if uploaded_image is None:
238
+ return gr.update(value=\"\"\"
239
+ <div style=\"width: 100%; height: 500px; background: #f8f8f8; border: 2px solid #e0e0e0; border-radius: 12px;
240
+ position: relative; display: flex; align-items: center; justify-content: center;\">
241
+ <div style=\"text-align: center; color: #999;\">
242
+ <div style=\"font-size: 48px; margin-bottom: 10px;\">📸</div>
243
+ <p>Upload an image on the left to begin</p>
244
+ <p>Then hover here to see camera controls</p>
245
+ </div>
246
+ </div>
247
+ \"\"\")
248
 
249
+ # Convert to data URL
250
+ buffered = BytesIO()
251
+ uploaded_image.save(buffered, format=\"PNG\")
252
+ img_str = base64.b64encode(buffered.getvalue()).decode()
253
+ data_url = f\"data:image/png;base64,{img_str}\"
254
 
255
+ return gr.update(value=f\"\"\"
256
+ <div style=\"width: 100%; height: 500px; background: #f8f8f8; border: 2px solid #e0e0e0; border-radius: 12px;
257
+ position: relative; display: flex; align-items: center; justify-content: center;\"
258
+ onmouseenter=\"this.querySelector('#arrow-controls').style.opacity='1'\"
259
+ onmouseleave=\"this.querySelector('#arrow-controls').style.opacity='0'\">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
 
261
+ <!-- Image -->
262
+ <img src=\"{data_url}\" style=\"max-width: 100%; max-height: 100%; object-fit: contain;\">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
 
264
+ <!-- Arrow controls -->
265
+ <div id=\"arrow-controls\" style=\"position: absolute; inset: 0; opacity: 0; transition: opacity 0.3s ease; z-index: 10;\">
 
266
 
267
+ <!-- Left Arrow (Azimuth -45°) -->
268
+ <button onclick=\"
269
+ var azInput = document.getElementById('js-azimuth').querySelector('input');
270
+ var newAz = (parseInt(azInput.value) - 45 + 360) % 360;
271
+ azInput.value = newAz;
272
+ azInput.dispatchEvent(new Event('input', {{bubbles: true}}));
273
+ document.getElementById('status-az').textContent = newAz;
274
+ \"
275
+ style=\"position: absolute; left: 20px; top: 50%; transform: translateY(-50%);
276
+ width: 60px; height: 60px; background: rgba(0,255,136,0.95); border: none;
277
  border-radius: 50%; color: white; font-size: 24px; cursor: pointer;
278
+ box-shadow: 0 6px 20px rgba(0,0,0,0.4); transition: transform 0.2s;\"
279
+ onmouseover=\"this.style.transform += ' scale(1.1)'\"
280
+ onmouseout=\"this.style.transform = this.style.transform.replace(' scale(1.1)', '')\"
281
+ title=\"Rotate Left (Azimuth -45°)\">
282
+
283
  </button>
284
 
285
+ <!-- Right Arrow (Azimuth +45°) -->
286
+ <button onclick=\"
287
+ var azInput = document.getElementById('js-azimuth').querySelector('input');
288
+ var newAz = (parseInt(azInput.value) + 45) % 360;
289
+ azInput.value = newAz;
290
+ azInput.dispatchEvent(new Event('input', {{bubbles: true}}));
291
+ document.getElementById('status-az').textContent = newAz;
292
+ \"
293
+ style=\"position: absolute; right: 20px; top: 50%; transform: translateY(-50%);
294
+ width: 60px; height: 60px; background: rgba(0,255,136,0.95); border: none;
295
  border-radius: 50%; color: white; font-size: 24px; cursor: pointer;
296
+ box-shadow: 0 6px 20px rgba(0,0,0,0.4); transition: transform 0.2s;\"
297
+ onmouseover=\"this.style.transform += ' scale(1.1)'\"
298
+ onmouseout=\"this.style.transform = this.style.transform.replace(' scale(1.1)', '')\"
299
+ title=\"Rotate Right (Azimuth +45°)\">
300
+
301
  </button>
302
+
303
+ <!-- Up Arrow (Elevation +30°) -->
304
+ <button onclick=\"
305
+ var elInput = document.getElementById('js-elevation').querySelector('input');
306
+ var newEl = Math.min(60, parseInt(elInput.value) + 30);
307
+ elInput.value = newEl;
308
+ elInput.dispatchEvent(new Event('input', {{bubbles: true}}));
309
+ document.getElementById('status-el').textContent = newEl;
310
+ \"
311
+ style=\"position: absolute; top: 20px; left: 50%; transform: translateX(-50%);
312
+ width: 60px; height: 60px; background: rgba(255,105,180,0.95); border: none;
313
+ border-radius: 50%; color: white; font-size: 24px; cursor: pointer;
314
+ box-shadow: 0 6px 20px rgba(0,0,0,0.4); transition: transform 0.2s;\"
315
+ onmouseover=\"this.style.transform += ' scale(1.1)'\"
316
+ onmouseout=\"this.style.transform = this.style.transform.replace(' scale(1.1)', '')\"
317
+ title=\"Look Up (Elevation +30°)\">
318
+
319
+ </button>
320
+
321
+ <!-- Down Arrow (Elevation -30°) -->
322
+ <button onclick=\"
323
+ var elInput = document.getElementById('js-elevation').querySelector('input');
324
+ var newEl = Math.max(-30, parseInt(elInput.value) - 30);
325
+ elInput.value = newEl;
326
+ elInput.dispatchEvent(new Event('input', {{bubbles: true}}));
327
+ document.getElementById('status-el').textContent = newEl;
328
+ \"
329
+ style=\"position: absolute; bottom: 80px; left: 50%; transform: translateX(-50%);
330
+ width: 60px; height: 60px; background: rgba(255,105,180,0.95); border: none;
331
+ border-radius: 50%; color: white; font-size: 24px; cursor: pointer;
332
+ box-shadow: 0 6px 20px rgba(0,0,0,0.4); transition: transform 0.2s;\"
333
+ onmouseover=\"this.style.transform += ' scale(1.1)'\"
334
+ onmouseout=\"this.style.transform = this.style.transform.replace(' scale(1.1)', '')\"
335
+ title=\"Look Down (Elevation -30°)\">
336
+
337
+ </button>
338
+
339
+ <!-- Zoom Controls -->
340
+ <div style=\"position: absolute; bottom: 20px; left: 50%; transform: translateX(-50%);
341
+ display: flex; gap: 15px;\">
342
+
343
+ <!-- Zoom Out -->
344
+ <button onclick=\"
345
+ var distInput = document.getElementById('js-distance').querySelector('input');
346
+ var newDist = Math.min(1.8, parseFloat(distInput.value) + 0.4);
347
+ distInput.value = newDist.toFixed(1);
348
+ distInput.dispatchEvent(new Event('input', {{bubbles: true}}));
349
+ document.getElementById('status-dist').textContent = newDist.toFixed(1);
350
+ \"
351
+ style=\"width: 55px; height: 55px; background: rgba(255,165,0,0.95); border: none;
352
+ border-radius: 50%; color: white; font-size: 28px; cursor: pointer;
353
+ box-shadow: 0 6px 20px rgba(0,0,0,0.4); transition: transform 0.2s;\"
354
+ onmouseover=\"this.style.transform = 'scale(1.1)'\"
355
+ onmouseout=\"this.style.transform = ''\"
356
+ title=\"Zoom Out (Distance +0.4)\">
357
+
358
+ </button>
359
+
360
+ <!-- Zoom In -->
361
+ <button onclick=\"
362
+ var distInput = document.getElementById('js-distance').querySelector('input');
363
+ var newDist = Math.max(0.6, parseFloat(distInput.value) - 0.4);
364
+ distInput.value = newDist.toFixed(1);
365
+ distInput.dispatchEvent(new Event('input', {{bubbles: true}}));
366
+ document.getElementById('status-dist').textContent = newDist.toFixed(1);
367
+ \"
368
+ style=\"width: 55px; height: 55px; background: rgba(255,165,0,0.95); border: none;
369
+ border-radius: 50%; color: white; font-size: 24px; cursor: pointer;
370
+ box-shadow: 0 6px 20px rgba(0,0,0,0.4); transition: transform 0.2s;\"
371
+ onmouseover=\"this.style.transform = 'scale(1.1)'\"
372
+ onmouseout=\"this.style.transform = ''\"
373
+ title=\"Zoom In (Distance -0.4)\">
374
+ +
375
+ </button>
376
+ </div>
377
+
378
+ <!-- Status Display -->
379
+ <div style=\"position: absolute; top: 15px; right: 15px; background: rgba(0,0,0,0.9);
380
+ color: white; padding: 12px 16px; border-radius: 10px; font-family: monospace;
381
+ font-size: 14px; box-shadow: 0 6px 20px rgba(0,0,0,0.4); min-width: 200px;\">
382
+ <div style=\"margin-bottom: 4px;\">Az: <span id=\"status-az\">0</span>° | El: <span id=\"status-el\">0</span>° | Dist: <span id=\"status-dist\">1.0</span></div>
383
+ <div id=\"status-prompt\" style=\"color: #00ff88; font-size: 12px; line-height: 1.3;\">
384
+ <sks> front view eye-level shot medium shot
385
+ </div>
386
+ </div>
387
  </div>
388
  </div>
389
+ \"\"\")
390
+
391
+ def handle_parameter_change(az, el, dist, input_image, seed_val, randomize_seed_val, guidance_val, steps_val, h_val, w_val):
392
+ \"\"\"Handle camera parameter changes and generate new view.\"\"\"
393
+ try:
394
+ azimuth = float(az)
395
+ elevation = float(el)
396
+ distance = float(dist)
 
 
 
 
 
 
 
 
 
 
397
 
398
+ # Build prompt
399
+ prompt = build_camera_prompt(azimuth, elevation, distance)
 
 
 
400
 
401
+ if input_image is not None:
402
+ # Generate new image using the actual Qwen model
403
+ generated_image, final_seed, final_prompt = infer_camera_edit(
404
+ image=input_image,
405
+ azimuth=azimuth,
406
+ elevation=elevation,
407
+ distance=distance,
408
+ seed=seed_val,
409
+ randomize_seed=randomize_seed_val,
410
+ guidance_scale=guidance_val,
411
+ num_inference_steps=steps_val,
412
+ height=h_val,
413
+ width=w_val
414
+ )
415
+
416
+ # Show generated image with arrows
417
+ html_result = show_uploaded_image_with_arrows(generated_image)
418
+ return html_result.value, final_seed, final_prompt
419
+
420
+ return gr.update(), seed_val, prompt
421
+
422
+ except Exception as e:
423
+ print(f\"Generation error: {e}\")
424
+ import traceback
425
+ traceback.print_exc()
426
+ raise gr.Error(f\"Generation failed: {str(e)}\")
427
+
428
+ # ===== EVENT HANDLERS INSIDE BLOCKS CONTEXT =====
429
+
430
+ # Auto-update dimensions when image is uploaded
431
+ image.upload(
432
+ fn=update_dimensions_on_upload,
433
+ inputs=[image],
434
+ outputs=[width, height]
435
+ )
436
+
437
+ # Show uploaded image immediately
438
+ image.upload(
439
+ fn=show_uploaded_image_with_arrows,
440
+ inputs=[image],
441
+ outputs=[result_display]
442
  )
443
+
444
+ # Auto-generation handler triggered by input changes
445
+ def auto_generate_on_change(js_az, js_el, js_dist, input_image, seed_val, randomize_seed_val, guidance_val, steps_val, h_val, w_val):
446
+ \"\"\"Auto-generate when camera parameters change from arrow clicks.\"\"\"
447
+ if input_image is None:
448
+ return gr.update(), seed_val, \"<sks> front view eye-level shot medium shot\"
449
+
450
+ return handle_parameter_change(js_az, js_el, js_dist, input_image, seed_val, randomize_seed_val, guidance_val, steps_val, h_val, w_val)
451
+
452
+ # Set up auto-generation on parameter changes
453
+ for input_component in [js_azimuth, js_elevation, js_distance]:
454
+ input_component.change(
455
+ fn=auto_generate_on_change,
456
+ inputs=[js_azimuth, js_elevation, js_distance, image, seed, randomize_seed, guidance_scale, num_inference_steps, height, width],
457
+ outputs=[result_display, seed, prompt_display]
458
+ )
459
 
460
  return demo
461
 
462
+ if __name__ == \"__main__\":
463
  demo = create_camera_control_app()
464
  demo.launch()