ysharma HF Staff commited on
Commit
9bf1531
·
verified ·
1 Parent(s): 5f1f222

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +318 -349
app.py CHANGED
@@ -87,390 +87,359 @@ def build_camera_prompt(azimuth, elevation, distance):
87
 
88
  return f"<sks> {az_name} {el_name} {dist_name}"
89
 
90
- def gpu_decorator(func):
91
- """Decorator that applies @spaces.GPU if available, otherwise returns function unchanged."""
92
- try:
93
- import spaces
94
- return spaces.GPU(func)
95
- except ImportError:
96
- return func
97
-
98
- @gpu_decorator
99
- def infer_camera_edit(
100
- image: Image.Image,
101
- azimuth: float = 0.0,
102
- elevation: float = 0.0,
103
- distance: float = 1.0,
104
- seed: int = 0,
105
- randomize_seed: bool = True,
106
- guidance_scale: float = 1.0,
107
- num_inference_steps: int = 4,
108
- height: int = 1024,
109
- width: int = 1024,
110
- ):
111
- prompt = build_camera_prompt(azimuth, elevation, distance)
112
- print(f"Generated Prompt: {prompt}")
113
-
114
  if randomize_seed:
115
  seed = random.randint(0, MAX_SEED)
 
116
  generator = torch.Generator(device=device).manual_seed(seed)
117
-
118
- if image is None:
119
- raise gr.Error("Please upload an image first.")
120
-
121
- pil_image = image.convert("RGB") if isinstance(image, Image.Image) else Image.open(image).convert("RGB")
122
-
123
- # Load model only when needed
124
- current_pipe = load_model()
125
-
126
- result = current_pipe(
127
- image=[pil_image],
128
  prompt=prompt,
129
- height=height if height != 0 else None,
130
- width=width if width != 0 else None,
131
- num_inference_steps=num_inference_steps,
132
- generator=generator,
133
  guidance_scale=guidance_scale,
134
- num_images_per_prompt=1,
 
135
  ).images[0]
136
-
137
  return result, seed, prompt
138
 
139
- def data_url_to_pil(data_url):
140
- """Convert data URL to PIL Image."""
141
- if data_url and data_url.startswith('data:image'):
142
- header, encoded = data_url.split(',', 1)
143
- data = base64.b64decode(encoded)
144
- return Image.open(BytesIO(data))
145
- return None
146
-
147
- def generate_from_inputs(js_az, js_el, js_dist, js_img_data, seed_val, randomize_seed_val, guidance_val, steps_val, h_val, w_val):
148
- """Generate new camera view from JavaScript inputs."""
149
- try:
150
- print(f"Received inputs:")
151
- print(f" Azimuth: {js_az}")
152
- print(f" Elevation: {js_el}")
153
- print(f" Distance: {js_dist}")
154
- print(f" Image data length: {len(js_img_data) if js_img_data else 0}")
155
-
156
- azimuth = float(js_az) if js_az else 0.0
157
- elevation = float(js_el) if js_el else 0.0
158
- distance = float(js_dist) if js_dist else 1.0
159
-
160
- # Check if we have valid image data
161
- if not js_img_data or not js_img_data.startswith('data:image'):
162
- raise gr.Error("No image data received. Please upload an image and try again.")
163
-
164
- # Convert data URL to PIL image
165
- image_pil = data_url_to_pil(js_img_data)
166
-
167
- if image_pil is None:
168
- raise gr.Error("Failed to process image data. Please try uploading the image again.")
169
-
170
- print(f"Successfully converted image. Size: {image_pil.size}")
171
- return infer_camera_edit(image_pil, azimuth, elevation, distance, seed_val, randomize_seed_val, guidance_val, steps_val, h_val, w_val)
172
- except Exception as e:
173
- import traceback
174
- traceback.print_exc()
175
- raise gr.Error(f"Generation failed: {str(e)}")
176
-
177
  def create_camera_control_app():
178
- """Create the complete camera control app using the working pattern."""
179
 
180
  with gr.Blocks(title="Camera Control with Directional Arrows") as demo:
181
  gr.Markdown("# 📸 Camera Control with Directional Arrows")
182
  gr.Markdown("Upload an image and use arrows to control camera angles for 3D view generation")
183
 
184
  with gr.Row():
185
- # Left column: Image upload and camera controls
186
  with gr.Column(scale=1):
187
- gr.Markdown("### 🎮 Interactive Camera Control")
188
- gr.Markdown("*Upload an image, then hover and click arrows to control the camera*")
189
 
190
- # Combined image upload + camera control interface
191
- camera_display = gr.HTML(
192
- value="""
193
- <div id="camera-interface" style="width: 100%; height: 400px; background: #f0f0f0; border: 2px dashed #999; border-radius: 12px; position: relative; display: flex; align-items: center; justify-content: center; cursor: pointer;">
194
- <!-- Hidden file input -->
195
- <input type="file" id="file-input" accept="image/*" style="display: none;">
 
 
 
 
 
 
 
 
 
196
 
197
- <!-- Image display area -->
198
- <img id="uploaded-image" style="max-width: 100%; max-height: 100%; object-fit: contain; display: none;">
199
 
200
- <!-- Upload prompt -->
201
- <div id="upload-prompt" style="text-align: center; color: #666;">
202
- <div style="font-size: 48px; margin-bottom: 10px;">📸</div>
203
- <p><strong>Click here to upload an image</strong></p>
204
- <p>Then use the arrows to control camera angles</p>
205
- <small style="color: #999;">Supports JPG, PNG, WebP</small>
206
- </div>
 
207
 
208
- <!-- Directional arrow controls (initially hidden) -->
209
- <div id="arrow-controls" style="position: absolute; inset: 0; opacity: 0; pointer-events: none; transition: opacity 0.3s ease; display: none;">
210
- <!-- Left arrow -->
211
- <button class="arrow-btn" data-action="left"
212
- style="position: absolute; left: 20px; top: 50%; transform: translateY(-50%);
213
- width: 60px; height: 60px; background: rgba(0,255,136,0.9); border: none;
214
- border-radius: 50%; color: white; font-size: 24px; cursor: pointer;
215
- pointer-events: auto; transition: all 0.2s; z-index: 10;
216
- box-shadow: 0 4px 12px rgba(0,0,0,0.3);">
217
-
218
- </button>
219
-
220
- <!-- Right arrow -->
221
- <button class="arrow-btn" data-action="right"
222
- style="position: absolute; right: 20px; top: 50%; transform: translateY(-50%);
223
- width: 60px; height: 60px; background: rgba(0,255,136,0.9); border: none;
224
- border-radius: 50%; color: white; font-size: 24px; cursor: pointer;
225
- pointer-events: auto; transition: all 0.2s; z-index: 10;
226
- box-shadow: 0 4px 12px rgba(0,0,0,0.3);">
227
-
228
- </button>
229
-
230
- <!-- Up arrow -->
231
- <button class="arrow-btn" data-action="up"
232
- style="position: absolute; top: 20px; left: 50%; transform: translateX(-50%);
233
- width: 60px; height: 60px; background: rgba(255,105,180,0.9); border: none;
234
- border-radius: 50%; color: white; font-size: 24px; cursor: pointer;
235
- pointer-events: auto; transition: all 0.2s; z-index: 10;
236
- box-shadow: 0 4px 12px rgba(0,0,0,0.3);">
237
-
238
- </button>
239
-
240
- <!-- Down arrow -->
241
- <button class="arrow-btn" data-action="down"
242
- style="position: absolute; bottom: 80px; left: 50%; transform: translateX(-50%);
243
- width: 60px; height: 60px; background: rgba(255,105,180,0.9); border: none;
244
- border-radius: 50%; color: white; font-size: 24px; cursor: pointer;
245
- pointer-events: auto; transition: all 0.2s; z-index: 10;
246
- box-shadow: 0 4px 12px rgba(0,0,0,0.3);">
247
-
248
- </button>
249
-
250
- <!-- Zoom controls -->
251
- <div style="position: absolute; bottom: 20px; left: 50%; transform: translateX(-50%);
252
- display: flex; gap: 15px; pointer-events: auto; z-index: 10;">
253
- <button class="arrow-btn" data-action="zoom-out"
254
- style="width: 55px; height: 55px; background: rgba(255,165,0,0.9); border: none;
255
- border-radius: 50%; color: white; font-size: 24px; cursor: pointer;
256
- transition: all 0.2s; box-shadow: 0 4px 12px rgba(0,0,0,0.3);">
257
-
258
- </button>
259
-
260
- <button class="arrow-btn" data-action="zoom-in"
261
- style="width: 55px; height: 55px; background: rgba(255,165,0,0.9); border: none;
262
- border-radius: 50%; color: white; font-size: 20px; cursor: pointer;
263
- transition: all 0.2s; box-shadow: 0 4px 12px rgba(0,0,0,0.3);">
264
- +
265
- </button>
266
- </div>
267
- </div>
268
 
269
- <!-- Status display -->
270
- <div id="status-display" style="position: absolute; top: 15px; right: 15px; background: rgba(0,0,0,0.85);
271
- color: white; padding: 10px 14px; border-radius: 8px; font-family: monospace;
272
- font-size: 13px; z-index: 10; display: none;
273
- box-shadow: 0 4px 12px rgba(0,0,0,0.4);">
274
- <div id="camera-values">Az: 0° | El: 0° | Dist: 1.0</div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
  </div>
276
  </div>
277
  """,
278
- js_on_load="""
279
- (() => {
280
- const container = element.querySelector('#camera-interface');
281
- if (!container) return;
282
-
283
- const fileInput = container.querySelector('#file-input');
284
- const uploadedImage = container.querySelector('#uploaded-image');
285
- const uploadPrompt = container.querySelector('#upload-prompt');
286
- const arrowControls = container.querySelector('#arrow-controls');
287
- const statusDisplay = container.querySelector('#status-display');
288
- const cameraValues = container.querySelector('#camera-values');
289
-
290
- let currentAzimuth = 0;
291
- let currentElevation = 0;
292
- let currentDistance = 1.0;
293
-
294
- function updateDisplay() {
295
- cameraValues.textContent = `Az: ${currentAzimuth}° | El: ${currentElevation}° | Dist: ${currentDistance}`;
296
- }
297
-
298
- function updateGradioInputs() {
299
- setTimeout(() => {
300
- console.log('Updating Gradio inputs...');
301
-
302
- // Find the input elements
303
- let azInput = document.getElementById('js-azimuth');
304
- let elInput = document.getElementById('js-elevation');
305
- let distInput = document.getElementById('js-distance');
306
- let imgInput = document.getElementById('js-image-data');
307
-
308
- // Look for actual input/textarea elements inside
309
- if (azInput) {
310
- const inputInside = azInput.querySelector('input, textarea');
311
- if (inputInside) {
312
- azInput = inputInside;
313
- console.log('Updating azimuth to:', currentAzimuth);
314
- azInput.value = currentAzimuth;
315
- azInput.dispatchEvent(new Event('input', { bubbles: true }));
316
- azInput.dispatchEvent(new Event('change', { bubbles: true }));
317
- }
318
- }
319
- if (elInput) {
320
- const inputInside = elInput.querySelector('input, textarea');
321
- if (inputInside) {
322
- elInput = inputInside;
323
- console.log('Updating elevation to:', currentElevation);
324
- elInput.value = currentElevation;
325
- elInput.dispatchEvent(new Event('input', { bubbles: true }));
326
- elInput.dispatchEvent(new Event('change', { bubbles: true }));
327
- }
328
- }
329
- if (distInput) {
330
- const inputInside = distInput.querySelector('input, textarea');
331
- if (inputInside) {
332
- distInput = inputInside;
333
- console.log('Updating distance to:', currentDistance);
334
- distInput.value = currentDistance;
335
- distInput.dispatchEvent(new Event('input', { bubbles: true }));
336
- distInput.dispatchEvent(new Event('change', { bubbles: true }));
337
- }
338
- }
339
- if (imgInput && uploadedImage.src) {
340
- const inputInside = imgInput.querySelector('input, textarea');
341
- if (inputInside) {
342
- imgInput = inputInside;
343
- console.log('Updating image data, length:', uploadedImage.src.length);
344
- imgInput.value = uploadedImage.src;
345
- imgInput.dispatchEvent(new Event('input', { bubbles: true }));
346
- imgInput.dispatchEvent(new Event('change', { bubbles: true }));
347
- }
348
- }
349
- }, 200);
350
- }
351
 
352
- // Click to upload
353
- container.addEventListener('click', function(e) {
354
- if (!e.target.classList.contains('arrow-btn') && uploadedImage.style.display !== 'block') {
355
- fileInput.click();
356
- }
357
- });
358
 
359
- // File upload handler
360
- fileInput.addEventListener('change', function(e) {
361
- const file = e.target.files[0];
362
- if (file) {
363
- const reader = new FileReader();
364
- reader.onload = function(e) {
365
- uploadedImage.src = e.target.result;
366
- uploadedImage.style.display = 'block';
367
- uploadPrompt.style.display = 'none';
368
- arrowControls.style.display = 'block';
369
- statusDisplay.style.display = 'block';
370
- container.style.cursor = 'default';
371
- container.style.border = '2px solid #ddd';
372
- updateDisplay();
373
- updateGradioInputs();
374
- };
375
- reader.readAsDataURL(file);
376
- }
377
- });
378
 
379
- // Hover effects for arrows
380
- container.addEventListener('mouseenter', function(e) {
381
- if (arrowControls.style.display === 'block') {
382
- arrowControls.style.opacity = '1';
383
- }
384
- });
 
385
 
386
- container.addEventListener('mouseleave', function(e) {
387
- if (arrowControls.style.display === 'block') {
388
- arrowControls.style.opacity = '0.3';
389
- }
390
- });
391
 
392
- // Arrow button handlers
393
- container.addEventListener('click', function(e) {
394
- if (e.target.classList.contains('arrow-btn')) {
395
- e.stopPropagation();
396
- const action = e.target.dataset.action;
397
-
398
- console.log('Arrow clicked:', action);
399
-
400
- switch(action) {
401
- case 'left':
402
- currentAzimuth = (currentAzimuth - 45 + 360) % 360;
403
- break;
404
- case 'right':
405
- currentAzimuth = (currentAzimuth + 45) % 360;
406
- break;
407
- case 'up':
408
- currentElevation = Math.max(-30, Math.min(60, currentElevation + 30));
409
- break;
410
- case 'down':
411
- currentElevation = Math.max(-30, Math.min(60, currentElevation - 30));
412
- break;
413
- case 'zoom-out':
414
- currentDistance = Math.max(0.6, Math.min(1.8, currentDistance + 0.4));
415
- break;
416
- case 'zoom-in':
417
- currentDistance = Math.max(0.6, Math.min(1.8, currentDistance - 0.4));
418
- break;
419
- }
420
-
421
- updateDisplay();
422
- updateGradioInputs();
423
- }
424
- });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
425
 
426
- // Button hover effects
427
- container.addEventListener('mouseover', function(e) {
428
- if (e.target.classList.contains('arrow-btn')) {
429
- e.target.style.transform += ' scale(1.1)';
430
- }
431
- });
 
 
 
 
 
 
 
 
 
 
432
 
433
- container.addEventListener('mouseout', function(e) {
434
- if (e.target.classList.contains('arrow-btn')) {
435
- e.target.style.transform = e.target.style.transform.replace(' scale(1.1)', '');
436
- }
437
- });
438
- })();
439
- """,
440
- elem_id="camera-display"
441
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
442
 
443
- # Debug textboxes (visible for now, can be hidden later)
444
- js_azimuth = gr.Textbox("0", label="Azimuth", elem_id="js-azimuth")
445
- js_elevation = gr.Textbox("0", label="Elevation", elem_id="js-elevation")
446
- js_distance = gr.Textbox("1.0", label="Distance", elem_id="js-distance")
447
- js_image_data = gr.Textbox("", label="Image Data", elem_id="js-image-data", max_lines=2)
448
 
449
- run_btn = gr.Button("🚀 Generate New View", variant="primary", size="lg")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
 
451
- # Advanced settings
452
- with gr.Accordion("⚙️ Generation Settings", open=False):
453
- seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42)
454
- randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
455
- guidance_scale = gr.Slider(label="Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0)
456
- num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=20, step=1, value=4)
457
- height = gr.Slider(label="Height", minimum=256, maximum=2048, step=32, value=1024)
458
- width = gr.Slider(label="Width", minimum=256, maximum=2048, step=32, value=1024)
459
-
460
- # Right column: Generated result
461
- with gr.Column(scale=1):
462
- result = gr.Image(label="Generated View", height=500)
463
- prompt_display = gr.Textbox(
464
- label="Current Camera Prompt",
465
- value="<sks> front view eye-level shot medium shot",
466
- interactive=False
467
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
468
 
469
- # Generate button event
470
- run_btn.click(
471
- fn=generate_from_inputs,
472
- inputs=[js_azimuth, js_elevation, js_distance, js_image_data, seed, randomize_seed, guidance_scale, num_inference_steps, height, width],
473
- outputs=[result, seed, prompt_display]
474
  )
475
 
476
  return demo
 
87
 
88
  return f"<sks> {az_name} {el_name} {dist_name}"
89
 
90
+ def infer_camera_edit(image, azimuth, elevation, distance, seed, randomize_seed, guidance_scale, num_inference_steps, height, width):
91
+ """Generate new camera view using the Qwen model."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  if randomize_seed:
93
  seed = random.randint(0, MAX_SEED)
94
+
95
  generator = torch.Generator(device=device).manual_seed(seed)
96
+
97
+ # Build the camera prompt
98
+ prompt = build_camera_prompt(azimuth, elevation, distance)
99
+
100
+ # Load model if not already loaded
101
+ model = load_model()
102
+
103
+ # Generate the new view
104
+ result = model(
105
+ image=image,
 
106
  prompt=prompt,
107
+ height=height,
108
+ width=width,
 
 
109
  guidance_scale=guidance_scale,
110
+ num_inference_steps=num_inference_steps,
111
+ generator=generator
112
  ).images[0]
113
+
114
  return result, seed, prompt
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  def create_camera_control_app():
117
+ """Create the complete camera control app."""
118
 
119
  with gr.Blocks(title="Camera Control with Directional Arrows") as demo:
120
  gr.Markdown("# 📸 Camera Control with Directional Arrows")
121
  gr.Markdown("Upload an image and use arrows to control camera angles for 3D view generation")
122
 
123
  with gr.Row():
124
+ # Left column: Image upload and controls
125
  with gr.Column(scale=1):
126
+ image = gr.Image(label="Upload Image", type="pil", height=400)
 
127
 
128
+ # Camera parameter inputs (visible for debugging)
129
+ js_azimuth = gr.Textbox("0", visible=True, elem_id="js-azimuth", label="Azimuth")
130
+ js_elevation = gr.Textbox("0", visible=True, elem_id="js-elevation", label="Elevation")
131
+ js_distance = gr.Textbox("1.0", visible=True, elem_id="js-distance", label="Distance")
132
+
133
+ # Generation settings
134
+ with gr.Accordion("⚙️ Generation Settings", open=False):
135
+ seed = gr.Slider(minimum=0, maximum=MAX_SEED, step=1, value=42, label="Seed")
136
+ randomize_seed = gr.Checkbox(True, label="Randomize seed")
137
+ guidance_scale = gr.Slider(minimum=1, maximum=20, step=0.1, value=7.5, label="Guidance scale")
138
+ num_inference_steps = gr.Slider(minimum=10, maximum=50, step=1, value=4, label="Number of inference steps")
139
+
140
+ def update_dimensions_on_upload(input_image):
141
+ if input_image is None:
142
+ return 1024, 1024
143
 
144
+ original_width, original_height = input_image.size
145
+ aspect_ratio = original_width / original_height
146
 
147
+ if aspect_ratio > 1:
148
+ # Landscape
149
+ new_width = 1024
150
+ new_height = round(1024 / aspect_ratio / 32) * 32
151
+ else:
152
+ # Portrait or square
153
+ new_height = 1024
154
+ new_width = round(1024 * aspect_ratio / 32) * 32
155
 
156
+ # Ensure minimum size
157
+ new_width = max(256, min(1024, new_width))
158
+ new_height = max(256, min(1024, new_height))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
+ return new_width, new_height
161
+
162
+ height = gr.Slider(minimum=256, maximum=1024, step=32, value=1024, label="Height")
163
+ width = gr.Slider(minimum=256, maximum=1024, step=32, value=1024, label="Width")
164
+
165
+ prompt_display = gr.Textbox(
166
+ label="Current Camera Prompt",
167
+ value="<sks> front view eye-level shot medium shot",
168
+ interactive=False
169
+ )
170
+
171
+ # Right column: Interactive image view
172
+ with gr.Column(scale=1):
173
+ gr.Markdown("### 🎯 Interactive Image View")
174
+ gr.Markdown("*Upload an image, then hover to see controls and click arrows to generate new views*")
175
+
176
+ # Interactive HTML component using working pattern
177
+ result_display = gr.HTML(
178
+ value="""
179
+ <div style="width: 100%; height: 500px; background: #f8f8f8; border: 2px solid #e0e0e0; border-radius: 12px;
180
+ position: relative; display: flex; align-items: center; justify-content: center;">
181
+ <div style="text-align: center; color: #999;">
182
+ <div style="font-size: 48px; margin-bottom: 10px;">📸</div>
183
+ <p>Upload an image on the left to begin</p>
184
+ <p>Then hover here to see camera controls</p>
185
  </div>
186
  </div>
187
  """,
188
+ elem_id="result-display"
189
+ )
190
+
191
+ # Debug output
192
+ debug_output = gr.Textbox(label="Debug Output", visible=True)
193
+
194
+ # Functions for handling interactions (inside Blocks context)
195
+ def show_uploaded_image_with_arrows(uploaded_image):
196
+ """Show uploaded image with working arrow controls."""
197
+ if uploaded_image is None:
198
+ return gr.update(value="""
199
+ <div style="width: 100%; height: 500px; background: #f8f8f8; border: 2px solid #e0e0e0; border-radius: 12px;
200
+ position: relative; display: flex; align-items: center; justify-content: center;">
201
+ <div style="text-align: center; color: #999;">
202
+ <div style="font-size: 48px; margin-bottom: 10px;">📸</div>
203
+ <p>Upload an image on the left to begin</p>
204
+ <p>Then hover here to see camera controls</p>
205
+ </div>
206
+ </div>
207
+ """)
208
+
209
+ # Convert to data URL
210
+ buffered = BytesIO()
211
+ uploaded_image.save(buffered, format="PNG")
212
+ img_str = base64.b64encode(buffered.getvalue()).decode()
213
+ data_url = f"data:image/png;base64,{img_str}"
214
+
215
+ return gr.update(value=f"""
216
+ <div style="width: 100%; height: 500px; background: #f8f8f8; border: 2px solid #e0e0e0; border-radius: 12px;
217
+ position: relative; display: flex; align-items: center; justify-content: center;"
218
+ onmouseenter="this.querySelector('#arrow-controls').style.opacity='1'"
219
+ onmouseleave="this.querySelector('#arrow-controls').style.opacity='0'">
220
+
221
+ <!-- Image -->
222
+ <img src="{data_url}" style="max-width: 100%; max-height: 100%; object-fit: contain;">
223
+
224
+ <!-- Arrow controls -->
225
+ <div id="arrow-controls" style="position: absolute; inset: 0; opacity: 0; transition: opacity 0.3s ease; z-index: 10;">
226
+
227
+ <!-- Left Arrow -->
228
+ <button onclick="
229
+ console.log('Left arrow clicked');
230
+ var azInputElement = document.getElementById('js-azimuth');
231
+ console.log('azInputElement:', azInputElement);
232
+ if (!azInputElement) {{ console.error('js-azimuth element not found'); return; }}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
 
234
+ // Try multiple selectors for Gradio textbox input
235
+ var azInput = azInputElement.querySelector('input') ||
236
+ azInputElement.querySelector('textarea') ||
237
+ azInputElement.querySelector('[contenteditable]') ||
238
+ azInputElement.querySelector('.gr-textbox input') ||
239
+ azInputElement.querySelector('input[type=text]');
240
 
241
+ console.log('azInput:', azInput);
242
+ console.log('All inputs in element:', azInputElement.querySelectorAll('input, textarea'));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
 
244
+ if (!azInput) {{
245
+ console.error('No input found. Element HTML:', azInputElement.innerHTML);
246
+ console.error('Trying to find any input in the container...');
247
+ var allInputs = document.querySelectorAll('#js-azimuth input, #js-azimuth textarea');
248
+ console.log('All matching inputs:', allInputs);
249
+ if (allInputs.length > 0) azInput = allInputs[0];
250
+ }}
251
 
252
+ if (!azInput) {{ console.error('Still no input found'); return; }}
 
 
 
 
253
 
254
+ var currentAz = parseInt(azInput.value) || 0;
255
+ var newAz = (currentAz - 45 + 360) % 360;
256
+ console.log('Setting azimuth from', currentAz, 'to', newAz);
257
+ azInput.value = newAz;
258
+ azInput.dispatchEvent(new Event('input', {{bubbles: true}}));
259
+ azInput.dispatchEvent(new Event('change', {{bubbles: true}}));
260
+ var statusAz = document.getElementById('status-az');
261
+ if (statusAz) statusAz.textContent = newAz;
262
+ "
263
+ style="position: absolute; left: 20px; top: 50%; transform: translateY(-50%);
264
+ width: 60px; height: 60px; background: rgba(0,255,136,0.9); border: none;
265
+ border-radius: 50%; color: white; font-size: 24px; cursor: pointer;
266
+ box-shadow: 0 4px 12px rgba(0,0,0,0.3); transition: transform 0.2s;"
267
+ onmouseover="this.style.transform += ' scale(1.1)'"
268
+ onmouseout="this.style.transform = this.style.transform.replace(' scale(1.1)', '')"
269
+ title="Rotate Left">
270
+
271
+ </button>
272
+
273
+ <!-- Right Arrow -->
274
+ <button onclick="
275
+ var azInput = document.getElementById('js-azimuth').querySelector('input');
276
+ var newAz = (parseInt(azInput.value) + 45) % 360;
277
+ azInput.value = newAz;
278
+ azInput.dispatchEvent(new Event('input', {{bubbles: true}}));
279
+ var statusAz = document.getElementById('status-az');
280
+ if (statusAz) statusAz.textContent = newAz;
281
+ "
282
+ style="position: absolute; right: 20px; top: 50%; transform: translateY(-50%);
283
+ width: 60px; height: 60px; background: rgba(0,255,136,0.9); border: none;
284
+ border-radius: 50%; color: white; font-size: 24px; cursor: pointer;
285
+ box-shadow: 0 4px 12px rgba(0,0,0,0.3); transition: transform 0.2s;"
286
+ onmouseover="this.style.transform += ' scale(1.1)'"
287
+ onmouseout="this.style.transform = this.style.transform.replace(' scale(1.1)', '')"
288
+ title="Rotate Right">
289
+
290
+ </button>
291
+
292
+ <!-- Up Arrow -->
293
+ <button onclick="
294
+ var elInput = document.getElementById('js-elevation').querySelector('input');
295
+ var newEl = Math.min(60, parseInt(elInput.value) + 30);
296
+ elInput.value = newEl;
297
+ elInput.dispatchEvent(new Event('input', {{bubbles: true}}));
298
+ var statusEl = document.getElementById('status-el');
299
+ if (statusEl) statusEl.textContent = newEl;
300
+ "
301
+ style="position: absolute; top: 20px; left: 50%; transform: translateX(-50%);
302
+ width: 60px; height: 60px; background: rgba(255,105,180,0.9); border: none;
303
+ border-radius: 50%; color: white; font-size: 24px; cursor: pointer;
304
+ box-shadow: 0 4px 12px rgba(0,0,0,0.3); transition: transform 0.2s;"
305
+ onmouseover="this.style.transform += ' scale(1.1)'"
306
+ onmouseout="this.style.transform = this.style.transform.replace(' scale(1.1)', '')"
307
+ title="Look Up">
308
+
309
+ </button>
310
+
311
+ <!-- Down Arrow -->
312
+ <button onclick="
313
+ var elInput = document.getElementById('js-elevation').querySelector('input');
314
+ var newEl = Math.max(-30, parseInt(elInput.value) - 30);
315
+ elInput.value = newEl;
316
+ elInput.dispatchEvent(new Event('input', {{bubbles: true}}));
317
+ var statusEl = document.getElementById('status-el');
318
+ if (statusEl) statusEl.textContent = newEl;
319
+ "
320
+ style="position: absolute; bottom: 80px; left: 50%; transform: translateX(-50%);
321
+ width: 60px; height: 60px; background: rgba(255,105,180,0.9); border: none;
322
+ border-radius: 50%; color: white; font-size: 24px; cursor: pointer;
323
+ box-shadow: 0 4px 12px rgba(0,0,0,0.3); transition: transform 0.2s;"
324
+ onmouseover="this.style.transform += ' scale(1.1)'"
325
+ onmouseout="this.style.transform = this.style.transform.replace(' scale(1.1)', '')"
326
+ title="Look Down">
327
+
328
+ </button>
329
+
330
+ <!-- Zoom Controls -->
331
+ <div style="position: absolute; bottom: 20px; left: 50%; transform: translateX(-50%);
332
+ display: flex; gap: 15px;">
333
 
334
+ <button onclick="
335
+ var distInput = document.getElementById('js-distance').querySelector('input');
336
+ var newDist = Math.min(1.8, parseFloat(distInput.value) + 0.4);
337
+ distInput.value = newDist.toFixed(1);
338
+ distInput.dispatchEvent(new Event('input', {{bubbles: true}}));
339
+ var statusDist = document.getElementById('status-dist');
340
+ if (statusDist) statusDist.textContent = newDist.toFixed(1);
341
+ "
342
+ style="width: 55px; height: 55px; background: rgba(255,165,0,0.9); border: none;
343
+ border-radius: 50%; color: white; font-size: 24px; cursor: pointer;
344
+ box-shadow: 0 4px 12px rgba(0,0,0,0.3); transition: transform 0.2s;"
345
+ onmouseover="this.style.transform = 'scale(1.1)'"
346
+ onmouseout="this.style.transform = ''"
347
+ title="Zoom Out">
348
+
349
+ </button>
350
 
351
+ <button onclick="
352
+ var distInput = document.getElementById('js-distance').querySelector('input');
353
+ var newDist = Math.max(0.6, parseFloat(distInput.value) - 0.4);
354
+ distInput.value = newDist.toFixed(1);
355
+ distInput.dispatchEvent(new Event('input', {{bubbles: true}}));
356
+ var statusDist = document.getElementById('status-dist');
357
+ if (statusDist) statusDist.textContent = newDist.toFixed(1);
358
+ "
359
+ style="width: 55px; height: 55px; background: rgba(255,165,0,0.9); border: none;
360
+ border-radius: 50%; color: white; font-size: 24px; cursor: pointer;
361
+ box-shadow: 0 4px 12px rgba(0,0,0,0.3); transition: transform 0.2s;"
362
+ onmouseover="this.style.transform = 'scale(1.1)'"
363
+ onmouseout="this.style.transform = ''"
364
+ title="Zoom In">
365
+ +
366
+ </button>
367
+ </div>
368
+
369
+ <!-- Status Display -->
370
+ <div style="position: absolute; top: 15px; right: 15px; background: rgba(0,0,0,0.85);
371
+ color: white; padding: 10px 14px; border-radius: 8px; font-family: monospace;
372
+ font-size: 13px; box-shadow: 0 4px 12px rgba(0,0,0,0.4);">
373
+ <div>Az: <span id="status-az">0</span>° | El: <span id="status-el">0</span>° | Dist: <span id="status-dist">1.0</span></div>
374
+ </div>
375
+ </div>
376
+ </div>
377
+ """)
378
+
379
+ def handle_parameter_change(az, el, dist, input_image, seed_val, randomize_seed_val, guidance_val, steps_val, h_val, w_val):
380
+ """Handle camera parameter changes and generate new view."""
381
+ try:
382
+ azimuth = float(az)
383
+ elevation = float(el)
384
+ distance = float(dist)
385
 
386
+ # Build prompt
387
+ prompt = build_camera_prompt(azimuth, elevation, distance)
 
 
 
388
 
389
+ if input_image is not None:
390
+ # Generate new image using the actual Qwen model
391
+ generated_image, final_seed, final_prompt = infer_camera_edit(
392
+ image=input_image,
393
+ azimuth=azimuth,
394
+ elevation=elevation,
395
+ distance=distance,
396
+ seed=seed_val,
397
+ randomize_seed=randomize_seed_val,
398
+ guidance_scale=guidance_val,
399
+ num_inference_steps=steps_val,
400
+ height=int(h_val),
401
+ width=int(w_val)
402
+ )
403
+
404
+ # Update the HTML display with the generated image
405
+ return show_uploaded_image_with_arrows(generated_image).value, prompt, f"Generated view: Az={azimuth}°, El={elevation}°, Dist={distance}, Seed={final_seed}"
406
 
407
+ return gr.update(), prompt, f"Parameters updated: Az={azimuth}°, El={elevation}°, Dist={distance}"
408
+
409
+ except Exception as e:
410
+ return gr.update(), f"Error: {str(e)}", f"Error processing parameters: {str(e)}"
411
+
412
+ # Update dimensions when image is uploaded
413
+ image.upload(
414
+ fn=update_dimensions_on_upload,
415
+ inputs=[image],
416
+ outputs=[width, height]
417
+ )
418
+
419
+ # Image upload handler
420
+ image.upload(
421
+ fn=show_uploaded_image_with_arrows,
422
+ inputs=[image],
423
+ outputs=[result_display]
424
+ )
425
+
426
+ # Parameter change handlers (triggered by arrow clicks)
427
+ js_azimuth.change(
428
+ fn=handle_parameter_change,
429
+ inputs=[js_azimuth, js_elevation, js_distance, image, seed, randomize_seed, guidance_scale, num_inference_steps, height, width],
430
+ outputs=[result_display, prompt_display, debug_output]
431
+ )
432
+
433
+ js_elevation.change(
434
+ fn=handle_parameter_change,
435
+ inputs=[js_azimuth, js_elevation, js_distance, image, seed, randomize_seed, guidance_scale, num_inference_steps, height, width],
436
+ outputs=[result_display, prompt_display, debug_output]
437
+ )
438
 
439
+ js_distance.change(
440
+ fn=handle_parameter_change,
441
+ inputs=[js_azimuth, js_elevation, js_distance, image, seed, randomize_seed, guidance_scale, num_inference_steps, height, width],
442
+ outputs=[result_display, prompt_display, debug_output]
 
443
  )
444
 
445
  return demo