ysharma HF Staff commited on
Commit
2f644fe
·
verified ·
1 Parent(s): 9b1e72d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +244 -1293
app.py CHANGED
@@ -1,1377 +1,328 @@
1
- #fifteenth
2
-
3
-
4
  import gradio as gr
5
- import numpy as np
6
- import random
7
- import torch
8
  import base64
9
  from io import BytesIO
10
  from PIL import Image
11
- from diffusers import FlowMatchEulerDiscreteScheduler, QwenImageEditPlusPipeline
12
-
13
- MAX_SEED = np.iinfo(np.int32).max
14
-
15
- # --- Model Loading ---
16
- dtype = torch.bfloat16
17
- device = "cuda" if torch.cuda.is_available() else "cpu"
18
-
19
- # Initialize pipe as None - will be loaded when needed
20
- pipe = None
21
-
22
- def load_model():
23
- """Load the model only when needed to avoid initialization errors."""
24
- global pipe
25
- if pipe is None:
26
- pipe = QwenImageEditPlusPipeline.from_pretrained(
27
- "Qwen/Qwen-Image-Edit-2511",
28
- torch_dtype=dtype
29
- ).to(device)
30
-
31
- # Load the lightning LoRA for fast inference
32
- pipe.load_lora_weights(
33
- "lightx2v/Qwen-Image-Edit-2511-Lightning",
34
- weight_name="Qwen-Image-Edit-2511-Lightning-4steps-V1.0-bf16.safetensors",
35
- adapter_name="lightning"
36
- )
37
-
38
- # Load the multi-angles LoRA
39
- pipe.load_lora_weights(
40
- "fal/Qwen-Image-Edit-2511-Multiple-Angles-LoRA",
41
- weight_name="qwen-image-edit-2511-multiple-angles-lora.safetensors",
42
- adapter_name="angles"
43
- )
44
-
45
- pipe.set_adapters(["lightning", "angles"], adapter_weights=[1.0, 1.0])
46
- return pipe
47
 
48
- # --- Mappings ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  AZIMUTH_MAP = {
50
- 0: "front view",
51
- 45: "front-right quarter view",
52
- 90: "right side view",
53
- 135: "back-right quarter view",
54
- 180: "back view",
55
- 225: "back-left quarter view",
56
- 270: "left side view",
57
- 315: "front-left quarter view"
58
  }
59
-
60
  ELEVATION_MAP = {
61
- -30: "low-angle shot",
62
- 0: "eye-level shot",
63
- 30: "elevated shot",
64
- 60: "high-angle shot"
65
  }
66
-
67
  DISTANCE_MAP = {
68
- 0.6: "close-up",
69
- 1.0: "medium shot",
70
- 1.8: "wide shot"
71
  }
72
 
73
- def snap_to_nearest(value, options):
74
- return min(options, key=lambda x: abs(x - value))
 
75
 
76
- def build_camera_prompt(azimuth: float, elevation: float, distance: float) -> str:
77
- azimuth_snapped = snap_to_nearest(azimuth, list(AZIMUTH_MAP.keys()))
78
- elevation_snapped = snap_to_nearest(elevation, list(ELEVATION_MAP.keys()))
79
- distance_snapped = snap_to_nearest(distance, list(DISTANCE_MAP.keys()))
 
80
 
81
- azimuth_name = AZIMUTH_MAP[azimuth_snapped]
82
- elevation_name = ELEVATION_MAP[elevation_snapped]
83
- distance_name = DISTANCE_MAP[distance_snapped]
84
 
85
- return f"<sks> {azimuth_name} {elevation_name} {distance_name}"
86
-
87
- def gpu_decorator(func):
88
- """Decorator that applies @spaces.GPU if available, otherwise returns function unchanged."""
89
- try:
90
- import spaces
91
- return spaces.GPU(func)
92
- except ImportError:
93
- return func
94
-
95
- @gpu_decorator
96
- def infer_camera_edit(
97
- image: Image.Image,
98
- azimuth: float = 0.0,
99
- elevation: float = 0.0,
100
- distance: float = 1.0,
101
- seed: int = 0,
102
- randomize_seed: bool = True,
103
- guidance_scale: float = 1.0,
104
- num_inference_steps: int = 4,
105
- height: int = 1024,
106
- width: int = 1024,
107
- ):
108
- prompt = build_camera_prompt(azimuth, elevation, distance)
109
- print(f"Generated Prompt: {prompt}")
110
-
111
- if randomize_seed:
112
- seed = random.randint(0, MAX_SEED)
113
- generator = torch.Generator(device=device).manual_seed(seed)
114
-
115
- if image is None:
116
- raise gr.Error("Please upload an image first.")
117
-
118
- pil_image = image.convert("RGB") if isinstance(image, Image.Image) else Image.open(image).convert("RGB")
119
-
120
- # Load model only when needed
121
- current_pipe = load_model()
122
-
123
- result = current_pipe(
124
- image=[pil_image],
125
- prompt=prompt,
126
- height=height if height != 0 else None,
127
- width=width if width != 0 else None,
128
- num_inference_steps=num_inference_steps,
129
- generator=generator,
130
- guidance_scale=guidance_scale,
131
- num_images_per_prompt=1,
132
- ).images[0]
133
-
134
- return result, seed, prompt
135
-
136
- # --- Arrow Camera Control Function ---
137
- def create_arrow_camera_control(value=None, imageUrl=None, **kwargs):
138
- """
139
- Creates an HTML component with directional arrows overlaid on image for camera control.
140
- """
141
- if value is None:
142
- value = {"azimuth": 0, "elevation": 0, "distance": 1.0}
143
 
144
- # Simple HTML template that should definitely render
145
- html_template = f"""
146
- <div style="position: relative; width: 100%; height: 400px; background: #f0f0f0; border-radius: 12px; overflow: hidden; border: 2px solid #ddd; min-height: 400px;">
147
- <!-- Image display area -->
148
- <div id="image-display" style="width: 100%; height: 100%; display: flex; align-items: center; justify-content: center; background: #1a1a1a;">
149
- <img id="preview-image" src="" alt="Upload an image" style="max-width: 100%; max-height: 100%; object-fit: contain; display: none;">
150
- <div id="placeholder-text" style="color: #888; font-size: 16px; text-align: center;">
151
- <div style="font-size: 48px; margin-bottom: 10px;">📸</div>
152
- Upload an image to begin
153
- </div>
154
- </div>
155
-
156
- <!-- Camera controls overlay -->
157
- <div style="position: absolute; inset: 0; pointer-events: none;">
158
- <!-- Left arrow (azimuth -45°) -->
159
- <button class="arrow-btn" data-action="azimuth" data-value="-45"
160
- style="position: absolute; left: 20px; top: 50%; transform: translateY(-50%);
161
- width: 50px; height: 50px; background: rgba(0,255,136,0.8); border: none;
162
- border-radius: 50%; color: white; font-size: 20px; cursor: pointer;
163
- pointer-events: auto; transition: all 0.2s; z-index: 10;">
164
-
165
- </button>
166
-
167
- <!-- Right arrow (azimuth +45°) -->
168
- <button class="arrow-btn" data-action="azimuth" data-value="+45"
169
- style="position: absolute; right: 20px; top: 50%; transform: translateY(-50%);
170
- width: 50px; height: 50px; background: rgba(0,255,136,0.8); border: none;
171
- border-radius: 50%; color: white; font-size: 20px; cursor: pointer;
172
- pointer-events: auto; transition: all 0.2s; z-index: 10;">
173
-
174
- </button>
175
-
176
- <!-- Up arrow (elevation +30°) -->
177
- <button class="arrow-btn" data-action="elevation" data-value="+30"
178
- style="position: absolute; top: 20px; left: 50%; transform: translateX(-50%);
179
- width: 50px; height: 50px; background: rgba(255,105,180,0.8); border: none;
180
- border-radius: 50%; color: white; font-size: 20px; cursor: pointer;
181
- pointer-events: auto; transition: all 0.2s; z-index: 10;">
182
-
183
- </button>
184
-
185
- <!-- Down arrow (elevation -30°) -->
186
- <button class="arrow-btn" data-action="elevation" data-value="-30"
187
- style="position: absolute; bottom: 60px; left: 50%; transform: translateX(-50%);
188
- width: 50px; height: 50px; background: rgba(255,105,180,0.8); border: none;
189
- border-radius: 50%; color: white; font-size: 20px; cursor: pointer;
190
- pointer-events: auto; transition: all 0.2s; z-index: 10;">
191
-
192
- </button>
193
-
194
- <!-- Zoom controls at bottom -->
195
- <div style="position: absolute; bottom: 10px; left: 50%; transform: translateX(-50%);
196
- display: flex; gap: 10px; pointer-events: auto; z-index: 10;">
197
- <!-- Zoom out (-) -->
198
- <button class="arrow-btn" data-action="distance" data-value="+0.4"
199
- style="width: 45px; height: 45px; background: rgba(255,165,0,0.8); border: none;
200
- border-radius: 50%; color: white; font-size: 24px; cursor: pointer;
201
- transition: all 0.2s;">
202
-
203
- </button>
204
-
205
- <!-- Zoom in (+) -->
206
- <button class="arrow-btn" data-action="distance" data-value="-0.4"
207
- style="width: 45px; height: 45px; background: rgba(255,165,0,0.8); border: none;
208
- border-radius: 50%; color: white; font-size: 20px; cursor: pointer;
209
- transition: all 0.2s;">
210
- +
211
- </button>
212
- </div>
213
- </div>
214
-
215
- <!-- Status display -->
216
- <div style="position: absolute; top: 10px; right: 10px; background: rgba(0,0,0,0.8);
217
- color: white; padding: 8px 12px; border-radius: 6px; font-family: monospace;
218
- font-size: 12px; pointer-events: none; z-index: 10;">
219
- <div id="camera-values">Az: {value['azimuth']}° | El: {value['elevation']}° | Dist: {value['distance']}</div>
220
- <div id="prompt-preview" style="color: #00ff88; margin-top: 4px;"><sks> front view eye-level shot medium shot</div>
221
- </div>
222
- </div>
223
- """
224
 
225
- js_on_load = """
226
- (() => {
227
- const container = element.querySelector('#arrow-camera-container');
228
- const previewImage = element.querySelector('#preview-image');
229
- const placeholderText = element.querySelector('#placeholder-text');
230
- const cameraValues = element.querySelector('#camera-values');
231
- const promptPreview = element.querySelector('#prompt-preview');
232
- const arrowBtns = element.querySelectorAll('.arrow-btn');
233
-
234
- // Camera state
235
- let currentAzimuth = props.value?.azimuth || 0;
236
- let currentElevation = props.value?.elevation || 0;
237
- let currentDistance = props.value?.distance || 1.0;
238
-
239
- // Mappings for prompt building
240
- const azimuthMap = {
241
- 0: "front view", 45: "front-right quarter view", 90: "right side view",
242
- 135: "back-right quarter view", 180: "back view", 225: "back-left quarter view",
243
- 270: "left side view", 315: "front-left quarter view"
244
- };
245
- const elevationMap = {
246
- "-30": "low-angle shot", "0": "eye-level shot", "30": "elevated shot", "60": "high-angle shot"
247
- };
248
- const distanceMap = {
249
- "0.6": "close-up", "1": "medium shot", "1.8": "wide shot"
250
- };
251
-
252
- function snapToNearest(value, options) {
253
- return options.reduce((prev, curr) => Math.abs(curr - value) < Math.abs(prev - value) ? curr : prev);
254
- }
255
-
256
- function buildCameraPrompt(azimuth, elevation, distance) {
257
- const azimuthSteps = [0, 45, 90, 135, 180, 225, 270, 315];
258
- const elevationSteps = [-30, 0, 30, 60];
259
- const distanceSteps = [0.6, 1.0, 1.8];
260
-
261
- const azSnap = snapToNearest(azimuth, azimuthSteps);
262
- const elSnap = snapToNearest(elevation, elevationSteps);
263
- const distSnap = snapToNearest(distance, distanceSteps);
264
-
265
- const azName = azimuthMap[azSnap];
266
- const elName = elevationMap[String(elSnap)];
267
- const distName = distanceMap[distSnap === 1 ? "1" : String(distSnap)];
268
-
269
- return `<sks> ${azName} ${elName} ${distName}`;
270
- }
271
-
272
- function updateDisplay() {
273
- // Update status display
274
- cameraValues.textContent = `Az: ${currentAzimuth}° | El: ${currentElevation}° | Dist: ${currentDistance}`;
275
- promptPreview.textContent = buildCameraPrompt(currentAzimuth, currentElevation, currentDistance);
276
-
277
- // Update props and trigger change event
278
- props.value = {
279
- azimuth: currentAzimuth,
280
- elevation: currentElevation,
281
- distance: currentDistance
282
- };
283
- trigger('change', props.value);
284
- }
285
-
286
- // Update image from props
287
- function updateImage() {
288
- if (props.imageUrl && props.imageUrl !== '') {
289
- previewImage.src = props.imageUrl;
290
- previewImage.style.display = 'block';
291
- placeholderText.style.display = 'none';
292
- } else {
293
- previewImage.style.display = 'none';
294
- placeholderText.style.display = 'block';
295
- }
296
- }
297
-
298
- // Arrow button click handlers
299
- arrowBtns.forEach(btn => {
300
- btn.addEventListener('mouseenter', () => {
301
- btn.style.transform = btn.style.transform.replace('scale(1)', 'scale(1.1)');
302
- if (!btn.style.transform.includes('scale')) {
303
- btn.style.transform += ' scale(1.1)';
304
- }
305
- });
306
-
307
- btn.addEventListener('mouseleave', () => {
308
- btn.style.transform = btn.style.transform.replace('scale(1.1)', 'scale(1)');
309
- });
310
-
311
- btn.addEventListener('click', (e) => {
312
- const action = btn.dataset.action;
313
- const value = parseFloat(btn.dataset.value);
314
-
315
- // Add click animation
316
- btn.style.transform += ' scale(0.95)';
317
- setTimeout(() => {
318
- btn.style.transform = btn.style.transform.replace('scale(0.95)', '');
319
- }, 150);
320
-
321
- if (action === 'azimuth') {
322
- currentAzimuth = (currentAzimuth + value + 360) % 360;
323
- } else if (action === 'elevation') {
324
- currentElevation = Math.max(-30, Math.min(60, currentElevation + value));
325
- } else if (action === 'distance') {
326
- currentDistance = Math.max(0.6, Math.min(1.8, currentDistance + value));
327
- }
328
-
329
- updateDisplay();
330
- });
331
- });
332
-
333
- // Watch for external prop changes
334
- let lastImageUrl = props.imageUrl;
335
- let lastValue = JSON.stringify(props.value);
336
 
337
- setInterval(() => {
338
- // Check for image URL changes
339
- if (props.imageUrl !== lastImageUrl) {
340
- lastImageUrl = props.imageUrl;
341
- updateImage();
342
- }
343
-
344
- // Check for value changes from external controls
345
- const currentValueStr = JSON.stringify(props.value);
346
- if (currentValueStr !== lastValue) {
347
- lastValue = currentValueStr;
348
- if (props.value && typeof props.value === 'object') {
349
- currentAzimuth = props.value.azimuth || currentAzimuth;
350
- currentElevation = props.value.elevation || currentElevation;
351
- currentDistance = props.value.distance || currentDistance;
352
- updateDisplay();
353
- }
354
- }
355
- }, 100);
356
 
357
- // Initial setup
358
- updateImage();
359
- updateDisplay();
360
- })();
361
- """
362
-
363
- return gr.HTML(
364
- value=html_template,
365
- js_on_load=js_on_load,
366
- **kwargs
367
- )
368
-
369
- # --- UI ---
370
- css = '''
371
- #col-container { max-width: 1200px; margin: 0 auto; }
372
- .dark .progress-text { color: white !important; }
373
- #arrow-camera-control { min-height: 400px; }
374
- .arrow-btn:hover { opacity: 0.9; }
375
- .arrow-btn:active { transform: scale(0.95) !important; }
376
- '''
377
 
378
- def update_camera_with_image(image):
379
- """Convert uploaded image to data URL for the camera control component."""
380
- if image is None:
381
- return gr.update(imageUrl=None)
382
 
383
- # Convert PIL image to base64 data URL
384
- buffered = BytesIO()
385
- image.save(buffered, format="PNG")
386
- img_str = base64.b64encode(buffered.getvalue()).decode()
387
- data_url = f"data:image/png;base64,{img_str}"
388
- return gr.update(imageUrl=data_url)
389
-
390
- def update_dimensions_on_upload(image):
391
- """Compute recommended dimensions preserving aspect ratio."""
392
- if image is None:
393
- return 1024, 1024
394
-
395
- original_width, original_height = image.size
396
-
397
- if original_width > original_height:
398
- new_width = 1024
399
- aspect_ratio = original_height / original_width
400
- new_height = int(new_width * aspect_ratio)
401
- else:
402
- new_height = 1024
403
- aspect_ratio = original_width / original_height
404
- new_width = int(new_height * aspect_ratio)
405
-
406
- new_width = (new_width // 8) * 8
407
- new_height = (new_height // 8) * 8
408
-
409
- return new_width, new_height
410
-
411
- with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
412
- gr.Markdown("""
413
- # 🎯 Camera Control with Directional Arrows
414
-
415
- Upload an image and use the **directional arrows** to control camera angles:
416
- - **🟢 Left/Right arrows**: Control azimuth (horizontal rotation)
417
- - **🩷 Up/Down arrows**: Control elevation (vertical angle)
418
- - **🟠 +/- buttons**: Control distance (zoom in/out)
419
-
420
- The arrows give you the feeling of moving the camera around your subject!
421
- """)
422
-
423
- with gr.Row():
424
- # Left column: Input image and settings
425
- with gr.Column(scale=1):
426
- image = gr.Image(label="Upload Image", type="pil", height=400)
427
-
428
- # Hidden textboxes for JavaScript communication
429
- js_azimuth = gr.Textbox("0", visible=False, elem_id="js-azimuth")
430
- js_elevation = gr.Textbox("0", visible=False, elem_id="js-elevation")
431
- js_distance = gr.Textbox("1.0", visible=False, elem_id="js-distance")
432
-
433
- # Hidden button for triggering generation from JavaScript
434
- hidden_generate_btn = gr.Button("Generate", visible=False, elem_id="hidden-generate-btn")
435
-
436
- prompt_display = gr.Textbox(
437
- label="Current Camera Prompt",
438
- value="<sks> front view eye-level shot medium shot",
439
- interactive=False
440
- )
441
-
442
- # Advanced settings
443
- with gr.Accordion("⚙️ Advanced Settings", open=False):
444
- seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
445
- randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
446
- guidance_scale = gr.Slider(label="Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0)
447
- num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=20, step=1, value=4)
448
- height = gr.Slider(label="Height", minimum=256, maximum=2048, step=8, value=1024)
449
- width = gr.Slider(label="Width", minimum=256, maximum=2048, step=8, value=1024)
450
 
451
- # Right column: Interactive image viewer with hover controls
452
- with gr.Column(scale=1):
453
- gr.Markdown("### 🎯 Interactive Image View")
454
- gr.Markdown("*Upload an image, then hover to see camera controls and click arrows to generate new views*")
455
-
456
- # HTML component for interactive image display
457
- result_display = gr.HTML(
458
- value="""
459
- <div id="result-container"
460
- style="width: 100%; height: 500px; background: #f8f8f8; border: 2px solid #e0e0e0; border-radius: 12px;
461
- position: relative; display: flex; align-items: center; justify-content: center; cursor: default;">
462
-
463
- <!-- Placeholder for no image -->
464
- <div id="no-result-placeholder" style="text-align: center; color: #999;">
465
- <div style="font-size: 48px; margin-bottom: 10px;">📸</div>
466
- <p>Upload an image on the left to begin</p>
467
- <p>Then hover here to see camera controls</p>
468
- </div>
469
-
470
- <!-- Image display -->
471
- <img id="result-image" style="max-width: 100%; max-height: 100%; object-fit: contain; display: none;">
472
-
473
- <!-- Hover-activated arrow controls -->
474
- <div id="hover-controls"
475
- style="position: absolute; inset: 0; pointer-events: none; opacity: 0;
476
- transition: opacity 0.3s ease-in-out; z-index: 100;">
477
-
478
- <!-- Left arrow -->
479
- <button class="hover-arrow-btn" data-action="left"
480
- style="position: absolute; left: 20px; top: 50%; transform: translateY(-50%);
481
- width: 60px; height: 60px; background: rgba(0,255,136,0.95); border: none;
482
- border-radius: 50%; color: white; font-size: 24px; font-weight: bold; cursor: pointer;
483
- pointer-events: auto; transition: all 0.2s ease; z-index: 101;
484
- box-shadow: 0 6px 20px rgba(0,0,0,0.4); display: flex; align-items: center; justify-content: center;">
485
-
486
- </button>
487
-
488
- <!-- Right arrow -->
489
- <button class="hover-arrow-btn" data-action="right"
490
- style="position: absolute; right: 20px; top: 50%; transform: translateY(-50%);
491
- width: 60px; height: 60px; background: rgba(0,255,136,0.95); border: none;
492
- border-radius: 50%; color: white; font-size: 24px; font-weight: bold; cursor: pointer;
493
- pointer-events: auto; transition: all 0.2s ease; z-index: 101;
494
- box-shadow: 0 6px 20px rgba(0,0,0,0.4); display: flex; align-items: center; justify-content: center;">
495
-
496
- </button>
497
-
498
- <!-- Up arrow -->
499
- <button class="hover-arrow-btn" data-action="up"
500
- style="position: absolute; top: 20px; left: 50%; transform: translateX(-50%);
501
- width: 60px; height: 60px; background: rgba(255,105,180,0.95); border: none;
502
- border-radius: 50%; color: white; font-size: 24px; font-weight: bold; cursor: pointer;
503
- pointer-events: auto; transition: all 0.2s ease; z-index: 101;
504
- box-shadow: 0 6px 20px rgba(0,0,0,0.4); display: flex; align-items: center; justify-content: center;">
505
-
506
- </button>
507
-
508
- <!-- Down arrow -->
509
- <button class="hover-arrow-btn" data-action="down"
510
- style="position: absolute; bottom: 80px; left: 50%; transform: translateX(-50%);
511
- width: 60px; height: 60px; background: rgba(255,105,180,0.95); border: none;
512
- border-radius: 50%; color: white; font-size: 24px; font-weight: bold; cursor: pointer;
513
- pointer-events: auto; transition: all 0.2s ease; z-index: 101;
514
- box-shadow: 0 6px 20px rgba(0,0,0,0.4); display: flex; align-items: center; justify-content: center;">
515
-
516
- </button>
517
-
518
- <!-- Zoom controls -->
519
- <div style="position: absolute; bottom: 15px; left: 50%; transform: translateX(-50%);
520
- display: flex; gap: 15px; pointer-events: auto; z-index: 101;">
521
- <button class="hover-arrow-btn" data-action="zoom-out"
522
- style="width: 55px; height: 55px; background: rgba(255,165,0,0.95); border: none;
523
- border-radius: 50%; color: white; font-size: 28px; font-weight: bold; cursor: pointer;
524
- transition: all 0.2s ease; box-shadow: 0 6px 20px rgba(0,0,0,0.4);
525
- display: flex; align-items: center; justify-content: center;">
526
-
527
- </button>
528
-
529
- <button class="hover-arrow-btn" data-action="zoom-in"
530
- style="width: 55px; height: 55px; background: rgba(255,165,0,0.95); border: none;
531
- border-radius: 50%; color: white; font-size: 24px; font-weight: bold; cursor: pointer;
532
- transition: all 0.2s ease; box-shadow: 0 6px 20px rgba(0,0,0,0.4);
533
- display: flex; align-items: center; justify-content: center;">
534
- +
535
- </button>
536
- </div>
537
-
538
- <!-- Status display -->
539
- <div id="hover-status"
540
- style="position: absolute; top: 15px; right: 15px; background: rgba(0,0,0,0.9);
541
- color: white; padding: 12px 16px; border-radius: 10px; font-family: monospace;
542
- font-size: 14px; z-index: 101; box-shadow: 0 6px 20px rgba(0,0,0,0.4);
543
- min-width: 200px;">
544
- <div id="hover-camera-values" style="margin-bottom: 4px;">Az: 0° | El: 0° | Dist: 1.0</div>
545
- <div id="hover-prompt-preview" style="color: #00ff88; font-size: 12px; line-height: 1.3;"><sks> front view eye-level shot medium shot</div>
546
- </div>
547
- </div>
548
- </div>
549
- """,
550
- js_on_load="""
551
- (() => {
552
- const container = element.querySelector('#result-container');
553
- if (!container) return;
554
-
555
- const resultImage = container.querySelector('#result-image');
556
- const noResultPlaceholder = container.querySelector('#no-result-placeholder');
557
- const hoverControls = container.querySelector('#hover-controls');
558
- const hoverCameraValues = container.querySelector('#hover-camera-values');
559
- const hoverPromptPreview = container.querySelector('#hover-prompt-preview');
560
-
561
- let currentAzimuth = 0;
562
- let currentElevation = 0;
563
- let currentDistance = 1.0;
564
- let isGenerating = false;
565
-
566
- // Camera mappings
567
- const azimuthMap = {
568
- 0: "front view", 45: "front-right quarter view", 90: "right side view",
569
- 135: "back-right quarter view", 180: "back view", 225: "back-left quarter view",
570
- 270: "left side view", 315: "front-left quarter view"
571
- };
572
- const elevationMap = {
573
- "-30": "low-angle shot", "0": "eye-level shot", "30": "elevated shot", "60": "high-angle shot"
574
- };
575
- const distanceMap = {
576
- "0.6": "close-up", "1": "medium shot", "1.8": "wide shot"
577
- };
578
-
579
- function snapToNearest(value, steps) {
580
- return steps.reduce((prev, curr) => Math.abs(curr - value) < Math.abs(prev - value) ? curr : prev);
581
- }
582
-
583
- function buildCameraPrompt(azimuth, elevation, distance) {
584
- const azimuthSteps = [0, 45, 90, 135, 180, 225, 270, 315];
585
- const elevationSteps = [-30, 0, 30, 60];
586
- const distanceSteps = [0.6, 1.0, 1.8];
587
-
588
- const azSnap = snapToNearest(azimuth, azimuthSteps);
589
- const elSnap = snapToNearest(elevation, elevationSteps);
590
- const distSnap = snapToNearest(distance, distanceSteps);
591
-
592
- const azName = azimuthMap[azSnap];
593
- const elName = elevationMap[String(elSnap)];
594
- const distName = distanceMap[distSnap === 1 ? "1" : String(distSnap)];
595
-
596
- return `<sks> ` + azName + ` ` + elName + ` ` + distName;
597
- }
598
-
599
- function updateDisplay() {
600
- hoverCameraValues.textContent = `Az: ` + currentAzimuth + `° | El: ` + currentElevation + `° | Dist: ` + currentDistance;
601
- hoverPromptPreview.textContent = buildCameraPrompt(currentAzimuth, currentElevation, currentDistance);
602
- updateGradioInputs();
603
- }
604
-
605
- function updateGradioInputs() {
606
- setTimeout(() => {
607
- const azInput = document.getElementById('js-azimuth')?.querySelector('input, textarea');
608
- const elInput = document.getElementById('js-elevation')?.querySelector('input, textarea');
609
- const distInput = document.getElementById('js-distance')?.querySelector('input, textarea');
610
-
611
- if (azInput) {
612
- azInput.value = currentAzimuth;
613
- azInput.dispatchEvent(new Event('input', { bubbles: true }));
614
- }
615
- if (elInput) {
616
- elInput.value = currentElevation;
617
- elInput.dispatchEvent(new Event('input', { bubbles: true }));
618
- }
619
- if (distInput) {
620
- distInput.value = currentDistance;
621
- distInput.dispatchEvent(new Event('input', { bubbles: true }));
622
- }
623
- }, 50);
624
- }
625
-
626
- function showImage(imageUrl) {
627
- if (imageUrl) {
628
- resultImage.src = imageUrl;
629
- resultImage.style.display = 'block';
630
- noResultPlaceholder.style.display = 'none';
631
- } else {
632
- resultImage.style.display = 'none';
633
- noResultPlaceholder.style.display = 'block';
634
- hoverControls.style.opacity = '0';
635
- }
636
- }
637
-
638
- // Global function for external updates
639
- window.updateResultImage = showImage;
640
-
641
- // Hover effects for the container
642
- container.addEventListener('mouseenter', function(e) {
643
- if (resultImage.style.display !== 'none' && resultImage.src && !isGenerating) {
644
- hoverControls.style.opacity = '1';
645
- }
646
- });
647
-
648
- container.addEventListener('mouseleave', function(e) {
649
- hoverControls.style.opacity = '0';
650
- });
651
-
652
- // Arrow button handlers with auto-generation
653
- container.addEventListener('click', function(e) {
654
- if (e.target.classList.contains('hover-arrow-btn') && !isGenerating) {
655
- e.stopPropagation();
656
- const action = e.target.dataset.action;
657
-
658
- // Add click animation
659
- e.target.style.transform += ' scale(0.9)';
660
- setTimeout(() => {
661
- e.target.style.transform = e.target.style.transform.replace(' scale(0.9)', '');
662
- }, 150);
663
-
664
- switch(action) {
665
- case 'left':
666
- currentAzimuth = (currentAzimuth - 45 + 360) % 360;
667
- break;
668
- case 'right':
669
- currentAzimuth = (currentAzimuth + 45) % 360;
670
- break;
671
- case 'up':
672
- currentElevation = Math.max(-30, Math.min(60, currentElevation + 30));
673
- break;
674
- case 'down':
675
- currentElevation = Math.max(-30, Math.min(60, currentElevation - 30));
676
- break;
677
- case 'zoom-out':
678
- currentDistance = Math.max(0.6, Math.min(1.8, currentDistance + 0.4));
679
- break;
680
- case 'zoom-in':
681
- currentDistance = Math.max(0.6, Math.min(1.8, currentDistance - 0.4));
682
- break;
683
- }
684
-
685
- updateDisplay();
686
-
687
- // Auto-trigger generation by updating inputs and calling generation function
688
- isGenerating = true;
689
- hoverControls.style.opacity = '0.3'; // Dim controls during generation
690
-
691
- // Update the hidden inputs to trigger generation
692
- setTimeout(() => {
693
- const azInput = document.getElementById('js-azimuth')?.querySelector('input, textarea');
694
- const elInput = document.getElementById('js-elevation')?.querySelector('input, textarea');
695
- const distInput = document.getElementById('js-distance')?.querySelector('input, textarea');
696
-
697
- if (azInput && elInput && distInput) {
698
- // Update all inputs
699
- azInput.value = currentAzimuth;
700
- elInput.value = currentElevation;
701
- distInput.value = currentDistance;
702
-
703
- // Trigger input events to notify Gradio
704
- azInput.dispatchEvent(new Event('input', { bubbles: true }));
705
- elInput.dispatchEvent(new Event('input', { bubbles: true }));
706
- distInput.dispatchEvent(new Event('input', { bubbles: true }));
707
-
708
- // Trigger a custom event that our Python handler can listen to
709
- const autoGenerateEvent = new CustomEvent('autoGenerate', {
710
- detail: { azimuth: currentAzimuth, elevation: currentElevation, distance: currentDistance }
711
- });
712
- window.dispatchEvent(autoGenerateEvent);
713
- }
714
-
715
- // Reset generating state after delay
716
- setTimeout(() => {
717
- isGenerating = false;
718
- hoverControls.style.opacity = '1';
719
- }, 3000);
720
- }, 100);
721
- }
722
- });
723
-
724
- // Hover effects for individual buttons
725
- container.addEventListener('mouseover', function(e) {
726
- if (e.target.classList.contains('hover-arrow-btn') && !isGenerating) {
727
- e.target.style.transform += ' scale(1.1)';
728
- }
729
- });
730
-
731
- container.addEventListener('mouseout', function(e) {
732
- if (e.target.classList.contains('hover-arrow-btn')) {
733
- e.target.style.transform = e.target.style.transform.replace(' scale(1.1)', '');
734
- }
735
- });
736
-
737
- // Initial display update
738
- updateDisplay();
739
- })();
740
- """,
741
- elem_id="result-display"
742
- )
743
-
744
- # Function to update dimensions based on uploaded image aspect ratio
745
- def update_dimensions_on_upload(input_image):
746
- """Compute recommended dimensions preserving aspect ratio."""
747
- if input_image is None:
748
- return 1024, 1024
749
-
750
- original_width, original_height = input_image.size
751
-
752
- if original_width > original_height:
753
- new_width = 1024
754
- aspect_ratio = original_height / original_width
755
- new_height = int(new_width * aspect_ratio)
756
- else:
757
- new_height = 1024
758
- aspect_ratio = original_width / original_height
759
- new_width = int(new_height * aspect_ratio)
760
-
761
- new_width = (new_width // 8) * 8
762
- new_height = (new_height // 8) * 8
763
-
764
- return new_width, new_height
765
-
766
- # Function to process the generate request with regular image input
767
- def generate_new_view(input_image, js_az, js_el, js_dist, seed_val, randomize_seed_val, guidance_val, steps_val, h_val, w_val):
768
- try:
769
- if input_image is None:
770
- raise gr.Error("Please upload an image first.")
771
 
772
- azimuth = float(js_az) if js_az else 0.0
773
- elevation = float(js_el) if js_el else 0.0
774
- distance = float(js_dist) if js_dist else 1.0
775
-
776
- # Generate the result
777
- result_image, final_seed, final_prompt = infer_camera_edit(
778
- input_image, azimuth, elevation, distance, seed_val, randomize_seed_val,
779
- guidance_val, steps_val, h_val, w_val
780
- )
781
-
782
- # Convert PIL image to base64 data URL for the HTML component
783
- import base64
784
- from io import BytesIO
785
- buffered = BytesIO()
786
- result_image.save(buffered, format="PNG")
787
- img_str = base64.b64encode(buffered.getvalue()).decode()
788
- data_url = f"data:image/png;base64,{img_str}"
789
-
790
- # Update the HTML component with the new image
791
- updated_html = f"""
792
- <div id="result-container" style="width: 100%; height: 500px; background: #f8f8f8; border: 2px solid #e0e0e0; border-radius: 12px; position: relative; display: flex; align-items: center; justify-content: center;">
793
- <!-- Generated image display -->
794
- <img id="result-image" src="{data_url}" style="max-width: 100%; max-height: 100%; object-fit: contain; display: block;">
795
 
796
- <!-- Hover-activated arrow controls -->
797
- <div id="hover-controls" style="position: absolute; inset: 0; pointer-events: none; opacity: 0; transition: opacity 0.3s ease;">
798
- <!-- Left arrow (azimuth -45°) -->
799
- <button class="hover-arrow-btn" data-action="left"
800
- style="position: absolute; left: 20px; top: 50%; transform: translateY(-50%);
801
- width: 55px; height: 55px; background: rgba(0,255,136,0.9); border: none;
802
- border-radius: 50%; color: white; font-size: 22px; cursor: pointer;
803
- pointer-events: auto; transition: all 0.2s; z-index: 10; box-shadow: 0 4px 12px rgba(0,0,0,0.3);">
804
-
805
- </button>
806
-
807
- <!-- Right arrow (azimuth +45°) -->
808
- <button class="hover-arrow-btn" data-action="right"
809
- style="position: absolute; right: 20px; top: 50%; transform: translateY(-50%);
810
- width: 55px; height: 55px; background: rgba(0,255,136,0.9); border: none;
811
- border-radius: 50%; color: white; font-size: 22px; cursor: pointer;
812
- pointer-events: auto; transition: all 0.2s; z-index: 10; box-shadow: 0 4px 12px rgba(0,0,0,0.3);">
813
-
814
- </button>
815
-
816
- <!-- Up arrow (elevation +30°) -->
817
- <button class="hover-arrow-btn" data-action="up"
818
- style="position: absolute; top: 20px; left: 50%; transform: translateX(-50%);
819
- width: 55px; height: 55px; background: rgba(255,105,180,0.9); border: none;
820
- border-radius: 50%; color: white; font-size: 22px; cursor: pointer;
821
- pointer-events: auto; transition: all 0.2s; z-index: 10; box-shadow: 0 4px 12px rgba(0,0,0,0.3);">
822
-
823
- </button>
824
-
825
- <!-- Down arrow (elevation -30°) -->
826
- <button class="hover-arrow-btn" data-action="down"
827
- style="position: absolute; bottom: 60px; left: 50%; transform: translateX(-50%);
828
- width: 55px; height: 55px; background: rgba(255,105,180,0.9); border: none;
829
- border-radius: 50%; color: white; font-size: 22px; cursor: pointer;
830
- pointer-events: auto; transition: all 0.2s; z-index: 10; box-shadow: 0 4px 12px rgba(0,0,0,0.3);">
831
-
832
- </button>
833
-
834
- <!-- Zoom controls -->
835
- <div style="position: absolute; bottom: 10px; left: 50%; transform: translateX(-50%);
836
- display: flex; gap: 12px; pointer-events: auto; z-index: 10;">
837
- <button class="hover-arrow-btn" data-action="zoom-out"
838
- style="width: 50px; height: 50px; background: rgba(255,165,0,0.9); border: none;
839
- border-radius: 50%; color: white; font-size: 26px; cursor: pointer;
840
- transition: all 0.2s; box-shadow: 0 4px 12px rgba(0,0,0,0.3);">
841
-
842
- </button>
843
-
844
- <button class="hover-arrow-btn" data-action="zoom-in"
845
- style="width: 50px; height: 50px; background: rgba(255,165,0,0.9); border: none;
846
- border-radius: 50%; color: white; font-size: 22px; cursor: pointer;
847
- transition: all 0.2s; box-shadow: 0 4px 12px rgba(0,0,0,0.3);">
848
- +
849
- </button>
850
- </div>
851
-
852
- <!-- Status display -->
853
- <div id="hover-status" style="position: absolute; top: 15px; right: 15px; background: rgba(0,0,0,0.85);
854
- color: white; padding: 10px 15px; border-radius: 8px; font-family: monospace;
855
- font-size: 13px; z-index: 10; box-shadow: 0 4px 12px rgba(0,0,0,0.3);">
856
- <div id="hover-camera-values">Az: {azimuth}° | El: {elevation}° | Dist: {distance}</div>
857
- <div id="hover-prompt-preview" style="color: #00ff88; margin-top: 5px; font-size: 11px;">{final_prompt}</div>
858
- </div>
859
- </div>
860
- </div>
861
- <script>
862
- // Re-initialize event handlers after HTML update
863
- setTimeout(() => {{
864
- const container = document.getElementById('result-container');
865
- if (!container) return;
866
-
867
- const hoverControls = container.querySelector('#hover-controls');
868
- const hoverCameraValues = container.querySelector('#hover-camera-values');
869
- const hoverPromptPreview = container.querySelector('#hover-prompt-preview');
870
-
871
- let currentAzimuth = {azimuth};
872
- let currentElevation = {elevation};
873
- let currentDistance = {distance};
874
 
875
- const azimuthMap = {{
876
- 0: "front view", 45: "front-right quarter view", 90: "right side view",
877
- 135: "back-right quarter view", 180: "back view", 225: "back-left quarter view",
878
- 270: "left side view", 315: "front-left quarter view"
879
- }};
880
- const elevationMap = {{
881
- "-30": "low-angle shot", "0": "eye-level shot", "30": "elevated shot", "60": "high-angle shot"
882
- }};
883
- const distanceMap = {{
884
- "0.6": "close-up", "1": "medium shot", "1.8": "wide shot"
885
- }};
886
 
887
- function snapToNearest(value, steps) {{
888
- return steps.reduce((prev, curr) => Math.abs(curr - value) < Math.abs(prev - value) ? curr : prev);
889
- }}
890
-
891
- function buildCameraPrompt(azimuth, elevation, distance) {{
892
- const azimuthSteps = [0, 45, 90, 135, 180, 225, 270, 315];
893
- const elevationSteps = [-30, 0, 30, 60];
894
- const distanceSteps = [0.6, 1.0, 1.8];
895
-
896
- const azSnap = snapToNearest(azimuth, azimuthSteps);
897
- const elSnap = snapToNearest(elevation, elevationSteps);
898
- const distSnap = snapToNearest(distance, distanceSteps);
899
-
900
- const azName = azimuthMap[azSnap];
901
- const elName = elevationMap[String(elSnap)];
902
- const distName = distanceMap[distSnap === 1 ? "1" : String(distSnap)];
903
-
904
- return `<sks> ` + azName + ` ` + elName + ` ` + distName;
905
- }}
906
-
907
- function updateDisplay() {{
908
- hoverCameraValues.textContent = `Az: ` + currentAzimuth + `° | El: ` + currentElevation + `° | Dist: ` + currentDistance;
909
- hoverPromptPreview.textContent = buildCameraPrompt(currentAzimuth, currentElevation, currentDistance);
910
-
911
- // Update hidden inputs
912
- setTimeout(() => {{
913
- const azInput = document.getElementById('js-azimuth')?.querySelector('input, textarea');
914
- const elInput = document.getElementById('js-elevation')?.querySelector('input, textarea');
915
- const distInput = document.getElementById('js-distance')?.querySelector('input, textarea');
916
-
917
- if (azInput) {{
918
- azInput.value = currentAzimuth;
919
- azInput.dispatchEvent(new Event('input', {{ bubbles: true }}));
920
- }}
921
- if (elInput) {{
922
- elInput.value = currentElevation;
923
- elInput.dispatchEvent(new Event('input', {{ bubbles: true }}));
924
- }}
925
- if (distInput) {{
926
- distInput.value = currentDistance;
927
- distInput.dispatchEvent(new Event('input', {{ bubbles: true }}));
928
- }}
929
- }}, 50);
930
- }}
931
-
932
- // Hover effects
933
- container.addEventListener('mouseenter', function(e) {{
934
- hoverControls.style.opacity = '1';
935
- }});
936
-
937
- container.addEventListener('mouseleave', function(e) {{
938
- hoverControls.style.opacity = '0';
939
- }});
940
-
941
- // Arrow button handlers
942
- container.addEventListener('click', function(e) {{
943
- if (e.target.classList.contains('hover-arrow-btn')) {{
944
- e.stopPropagation();
945
- const action = e.target.dataset.action;
946
-
947
- switch(action) {{
948
- case 'left':
949
- currentAzimuth = (currentAzimuth - 45 + 360) % 360;
950
- break;
951
- case 'right':
952
- currentAzimuth = (currentAzimuth + 45) % 360;
953
- break;
954
- case 'up':
955
- currentElevation = Math.max(-30, Math.min(60, currentElevation + 30));
956
- break;
957
- case 'down':
958
- currentElevation = Math.max(-30, Math.min(60, currentElevation - 30));
959
- break;
960
- case 'zoom-out':
961
- currentDistance = Math.max(0.6, Math.min(1.8, currentDistance + 0.4));
962
- break;
963
- case 'zoom-in':
964
- currentDistance = Math.max(0.6, Math.min(1.8, currentDistance - 0.4));
965
- break;
966
- }}
967
-
968
- updateDisplay();
969
-
970
- // Auto-trigger generation by updating inputs
971
- setTimeout(() => {{
972
- const azInput = document.getElementById('js-azimuth')?.querySelector('input, textarea');
973
- const elInput = document.getElementById('js-elevation')?.querySelector('input, textarea');
974
- const distInput = document.getElementById('js-distance')?.querySelector('input, textarea');
975
-
976
- if (azInput && elInput && distInput) {{
977
- // Update all inputs to trigger generation
978
- azInput.value = currentAzimuth;
979
- elInput.value = currentElevation;
980
- distInput.value = currentDistance;
981
-
982
- // Trigger input events to notify Gradio of changes
983
- azInput.dispatchEvent(new Event('input', {{ bubbles: true }}));
984
- elInput.dispatchEvent(new Event('input', {{ bubbles: true }}));
985
- distInput.dispatchEvent(new Event('input', {{ bubbles: true }}));
986
- }}
987
- }}, 200);
988
- }}
989
- }});
990
-
991
- // Button hover effects
992
- container.addEventListener('mouseover', function(e) {{
993
- if (e.target.classList.contains('hover-arrow-btn')) {{
994
- e.target.style.transform += ' scale(1.1)';
995
- }}
996
- }});
997
 
998
- container.addEventListener('mouseout', function(e) {{
999
- if (e.target.classList.contains('hover-arrow-btn')) {{
1000
- e.target.style.transform = e.target.style.transform.replace(' scale(1.1)', '');
1001
- }}
1002
- }});
1003
- }}, 100);
1004
- </script>
1005
- """
1006
-
1007
- return updated_html, final_seed, final_prompt
1008
-
1009
- except Exception as e:
1010
- import traceback
1011
- traceback.print_exc()
1012
- raise gr.Error(f"Generation failed: {str(e)}")
1013
 
1014
- # Function to show uploaded image immediately in HTML component
1015
- def show_uploaded_image_immediately(uploaded_image):
1016
- """When user uploads an image, immediately show it in the HTML component."""
1017
  if uploaded_image is None:
1018
- # Return to placeholder state
1019
  return gr.update(value="""
1020
- <div id="result-container"
1021
- style="width: 100%; height: 500px; background: #f8f8f8; border: 2px solid #e0e0e0; border-radius: 12px;
1022
- position: relative; display: flex; align-items: center; justify-content: center; cursor: default;">
1023
-
1024
- <!-- Placeholder for no image -->
1025
- <div id="no-result-placeholder" style="text-align: center; color: #999;">
1026
  <div style="font-size: 48px; margin-bottom: 10px;">📸</div>
1027
  <p>Upload an image on the left to begin</p>
1028
- <p>Then hover here to see camera controls</p>
1029
  </div>
1030
  </div>
1031
  """)
1032
 
1033
- # Convert PIL image to base64 data URL
1034
  buffered = BytesIO()
1035
  uploaded_image.save(buffered, format="PNG")
1036
  img_str = base64.b64encode(buffered.getvalue()).decode()
1037
  data_url = f"data:image/png;base64,{img_str}"
1038
 
1039
- # Return HTML with uploaded image and hover controls plus JavaScript
1040
  return gr.update(value=f"""
1041
- <div id="result-container"
1042
- style="width: 100%; height: 500px; background: #f8f8f8; border: 2px solid #e0e0e0; border-radius: 12px;
1043
- position: relative; display: flex; align-items: center; justify-content: center; cursor: default;">
1044
 
1045
- <!-- Uploaded image display -->
1046
- <img id="result-image" src="{data_url}" style="max-width: 100%; max-height: 100%; object-fit: contain; display: block;">
1047
 
1048
- <!-- Hover-activated arrow controls -->
1049
- <div id="hover-controls"
1050
- style="position: absolute; inset: 0; pointer-events: none; opacity: 0.7;
1051
- transition: opacity 0.3s ease-in-out; z-index: 100;">
1052
 
1053
- <!-- Left arrow -->
1054
- <button class="hover-arrow-btn" data-action="left"
1055
- style="position: absolute; left: 20px; top: 50%; transform: translateY(-50%);
1056
- width: 60px; height: 60px; background: rgba(0,255,136,0.95); border: none;
1057
- border-radius: 50%; color: white; font-size: 24px; font-weight: bold; cursor: pointer;
1058
- pointer-events: auto; transition: all 0.2s ease; z-index: 101;
1059
- box-shadow: 0 6px 20px rgba(0,0,0,0.4); display: flex; align-items: center; justify-content: center;">
 
 
 
 
 
 
 
 
1060
 
1061
  </button>
1062
 
1063
- <!-- Right arrow -->
1064
- <button class="hover-arrow-btn" data-action="right"
1065
- style="position: absolute; right: 20px; top: 50%; transform: translateY(-50%);
1066
- width: 60px; height: 60px; background: rgba(0,255,136,0.95); border: none;
1067
- border-radius: 50%; color: white; font-size: 24px; font-weight: bold; cursor: pointer;
1068
- pointer-events: auto; transition: all 0.2s ease; z-index: 101;
1069
- box-shadow: 0 6px 20px rgba(0,0,0,0.4); display: flex; align-items: center; justify-content: center;">
 
 
 
 
 
 
 
1070
 
1071
  </button>
1072
 
1073
- <!-- Up arrow -->
1074
- <button class="hover-arrow-btn" data-action="up"
1075
- style="position: absolute; top: 20px; left: 50%; transform: translateX(-50%);
1076
- width: 60px; height: 60px; background: rgba(255,105,180,0.95); border: none;
1077
- border-radius: 50%; color: white; font-size: 24px; font-weight: bold; cursor: pointer;
1078
- pointer-events: auto; transition: all 0.2s ease; z-index: 101;
1079
- box-shadow: 0 6px 20px rgba(0,0,0,0.4); display: flex; align-items: center; justify-content: center;">
 
 
 
 
 
 
 
1080
 
1081
  </button>
1082
 
1083
- <!-- Down arrow -->
1084
- <button class="hover-arrow-btn" data-action="down"
1085
- style="position: absolute; bottom: 80px; left: 50%; transform: translateX(-50%);
1086
- width: 60px; height: 60px; background: rgba(255,105,180,0.95); border: none;
1087
- border-radius: 50%; color: white; font-size: 24px; font-weight: bold; cursor: pointer;
1088
- pointer-events: auto; transition: all 0.2s ease; z-index: 101;
1089
- box-shadow: 0 6px 20px rgba(0,0,0,0.4); display: flex; align-items: center; justify-content: center;">
 
 
 
 
 
 
 
1090
 
1091
  </button>
1092
 
1093
  <!-- Zoom controls -->
1094
- <div style="position: absolute; bottom: 15px; left: 50%; transform: translateX(-50%);
1095
- display: flex; gap: 15px; pointer-events: auto; z-index: 101;">
1096
- <button class="hover-arrow-btn" data-action="zoom-out"
1097
- style="width: 55px; height: 55px; background: rgba(255,165,0,0.95); border: none;
1098
- border-radius: 50%; color: white; font-size: 28px; font-weight: bold; cursor: pointer;
1099
- transition: all 0.2s ease; box-shadow: 0 6px 20px rgba(0,0,0,0.4);
1100
- display: flex; align-items: center; justify-content: center;">
 
 
 
 
 
 
 
 
 
1101
 
1102
  </button>
1103
 
1104
- <button class="hover-arrow-btn" data-action="zoom-in"
1105
- style="width: 55px; height: 55px; background: rgba(255,165,0,0.95); border: none;
1106
- border-radius: 50%; color: white; font-size: 24px; font-weight: bold; cursor: pointer;
1107
- transition: all 0.2s ease; box-shadow: 0 6px 20px rgba(0,0,0,0.4);
1108
- display: flex; align-items: center; justify-content: center;">
 
 
 
 
 
 
 
 
1109
  +
1110
  </button>
1111
  </div>
1112
 
1113
  <!-- Status display -->
1114
- <div id="hover-status"
1115
- style="position: absolute; top: 15px; right: 15px; background: rgba(0,0,0,0.9);
1116
- color: white; padding: 12px 16px; border-radius: 10px; font-family: monospace;
1117
- font-size: 14px; z-index: 101; box-shadow: 0 6px 20px rgba(0,0,0,0.4);
1118
- min-width: 200px;">
1119
- <div id="hover-camera-values" style="margin-bottom: 4px;">Az: 0° | El: 0° | Dist: 1.0</div>
1120
- <div id="hover-prompt-preview" style="color: #00ff88; font-size: 12px; line-height: 1.3;"><sks> front view eye-level shot medium shot</div>
1121
  </div>
1122
  </div>
1123
  </div>
1124
- <script>
1125
- // Initialize event handlers for uploaded image
1126
- setTimeout(() => {{
1127
- console.log('Initializing uploaded image JavaScript...');
1128
- const container = document.getElementById('result-container');
1129
- if (!container) {{
1130
- console.error('Container not found!');
1131
- return;
1132
- }}
1133
- console.log('Container found, setting up controls...');
1134
-
1135
- const hoverControls = container.querySelector('#hover-controls');
1136
- const hoverCameraValues = container.querySelector('#hover-camera-values');
1137
- const hoverPromptPreview = container.querySelector('#hover-prompt-preview');
1138
-
1139
- console.log('Found elements:', !!hoverControls, !!hoverCameraValues, !!hoverPromptPreview);
1140
-
1141
- if (!hoverControls) {{
1142
- console.error('Hover controls not found!');
1143
- return;
1144
- }}
1145
-
1146
- let currentAzimuth = 0;
1147
- let currentElevation = 0;
1148
- let currentDistance = 1.0;
1149
- let isGenerating = false;
1150
-
1151
- // Camera mappings
1152
- const azimuthMap = {{
1153
- 0: "front view", 45: "front-right quarter view", 90: "right side view",
1154
- 135: "back-right quarter view", 180: "back view", 225: "back-left quarter view",
1155
- 270: "left side view", 315: "front-left quarter view"
1156
- }};
1157
- const elevationMap = {{
1158
- "-30": "low-angle shot", "0": "eye-level shot", "30": "elevated shot", "60": "high-angle shot"
1159
- }};
1160
- const distanceMap = {{
1161
- "0.6": "close-up", "1": "medium shot", "1.8": "wide shot"
1162
- }};
1163
-
1164
- function snapToNearest(value, steps) {{
1165
- return steps.reduce((prev, curr) => Math.abs(curr - value) < Math.abs(prev - value) ? curr : prev);
1166
- }}
1167
-
1168
- function buildCameraPrompt(azimuth, elevation, distance) {{
1169
- const azimuthSteps = [0, 45, 90, 135, 180, 225, 270, 315];
1170
- const elevationSteps = [-30, 0, 30, 60];
1171
- const distanceSteps = [0.6, 1.0, 1.8];
1172
-
1173
- const azSnap = snapToNearest(azimuth, azimuthSteps);
1174
- const elSnap = snapToNearest(elevation, elevationSteps);
1175
- const distSnap = snapToNearest(distance, distanceSteps);
1176
 
1177
- const azName = azimuthMap[azSnap];
1178
- const elName = elevationMap[String(elSnap)];
1179
- const distName = distanceMap[distSnap === 1 ? "1" : String(distSnap)];
 
 
1180
 
1181
- return `<sks> ` + azName + ` ` + elName + ` ` + distName;
1182
- }}
1183
-
1184
- function updateDisplay() {{
1185
- hoverCameraValues.textContent = `Az: ` + currentAzimuth + `° | El: ` + currentElevation + `° | Dist: ` + currentDistance;
1186
- hoverPromptPreview.textContent = buildCameraPrompt(currentAzimuth, currentElevation, currentDistance);
1187
- updateGradioInputs();
1188
- }}
1189
-
1190
- function updateGradioInputs() {{
1191
- setTimeout(() => {{
1192
- console.log('Updating Gradio inputs with:', currentAzimuth, currentElevation, currentDistance);
1193
-
1194
- // Try multiple selectors for the hidden inputs
1195
- const azInput = document.getElementById('js-azimuth')?.querySelector('input, textarea') ||
1196
- document.querySelector('#js-azimuth input, #js-azimuth textarea') ||
1197
- document.querySelector('input[data-testid*="azimuth"], textarea[data-testid*="azimuth"]');
1198
- const elInput = document.getElementById('js-elevation')?.querySelector('input, textarea') ||
1199
- document.querySelector('#js-elevation input, #js-elevation textarea') ||
1200
- document.querySelector('input[data-testid*="elevation"], textarea[data-testid*="elevation"]');
1201
- const distInput = document.getElementById('js-distance')?.querySelector('input, textarea') ||
1202
- document.querySelector('#js-distance input, #js-distance textarea') ||
1203
- document.querySelector('input[data-testid*="distance"], textarea[data-testid*="distance"]');
1204
-
1205
- console.log('Found inputs:', !!azInput, !!elInput, !!distInput);
1206
-
1207
- if (azInput) {{
1208
- azInput.value = currentAzimuth;
1209
- azInput.dispatchEvent(new Event('input', {{ bubbles: true }}));
1210
- azInput.dispatchEvent(new Event('change', {{ bubbles: true }}));
1211
- console.log('Updated azimuth to:', currentAzimuth);
1212
- }}
1213
- if (elInput) {{
1214
- elInput.value = currentElevation;
1215
- elInput.dispatchEvent(new Event('input', {{ bubbles: true }}));
1216
- elInput.dispatchEvent(new Event('change', {{ bubbles: true }}));
1217
- console.log('Updated elevation to:', currentElevation);
1218
- }}
1219
- if (distInput) {{
1220
- distInput.value = currentDistance;
1221
- distInput.dispatchEvent(new Event('input', {{ bubbles: true }}));
1222
- distInput.dispatchEvent(new Event('change', {{ bubbles: true }}));
1223
- console.log('Updated distance to:', currentDistance);
1224
- }}
1225
-
1226
- if (!azInput || !elInput || !distInput) {{
1227
- console.warn('Some inputs not found, retrying in 500ms...');
1228
- setTimeout(() => updateGradioInputs(), 500);
1229
- }}
1230
- }}, 100);
1231
- }}
1232
-
1233
- // Hover effects for the container - always show arrows, make brighter on hover
1234
- container.addEventListener('mouseenter', function(e) {{
1235
- hoverControls.style.opacity = '1';
1236
- }});
1237
 
1238
- container.addEventListener('mouseleave', function(e) {{
1239
- hoverControls.style.opacity = '0.7';
1240
- }});
1241
 
1242
- // Arrow button handlers with auto-generation
1243
- container.addEventListener('click', function(e) {{
1244
- if (e.target.classList.contains('hover-arrow-btn') && !isGenerating) {{
1245
- e.stopPropagation();
1246
- e.preventDefault();
1247
- const action = e.target.dataset.action;
1248
- console.log('Arrow clicked:', action, 'Current values:', currentAzimuth, currentElevation, currentDistance);
1249
-
1250
- // Add click animation
1251
- e.target.style.transform += ' scale(0.9)';
1252
- setTimeout(() => {{
1253
- e.target.style.transform = e.target.style.transform.replace(' scale(0.9)', '');
1254
- }}, 150);
1255
-
1256
- const oldValues = {{ az: currentAzimuth, el: currentElevation, dist: currentDistance }};
1257
-
1258
- switch(action) {{
1259
- case 'left':
1260
- currentAzimuth = (currentAzimuth - 45 + 360) % 360;
1261
- break;
1262
- case 'right':
1263
- currentAzimuth = (currentAzimuth + 45) % 360;
1264
- break;
1265
- case 'up':
1266
- currentElevation = Math.max(-30, Math.min(60, currentElevation + 30));
1267
- break;
1268
- case 'down':
1269
- currentElevation = Math.max(-30, Math.min(60, currentElevation - 30));
1270
- break;
1271
- case 'zoom-out':
1272
- currentDistance = Math.max(0.6, Math.min(1.8, currentDistance + 0.4));
1273
- break;
1274
- case 'zoom-in':
1275
- currentDistance = Math.max(0.6, Math.min(1.8, currentDistance - 0.4));
1276
- break;
1277
- }}
1278
-
1279
- console.log('Values changed from:', oldValues, 'to:', {{ az: currentAzimuth, el: currentElevation, dist: currentDistance }});
1280
-
1281
- updateDisplay();
1282
-
1283
- // Auto-trigger generation
1284
- isGenerating = true;
1285
- hoverControls.style.opacity = '0.3';
1286
- console.log('Starting generation...');
1287
-
1288
- // Force update of Gradio inputs multiple times to ensure it works
1289
- updateGradioInputs();
1290
- setTimeout(() => updateGradioInputs(), 200);
1291
- setTimeout(() => updateGradioInputs(), 500);
1292
-
1293
- // Also try clicking the hidden generate button as fallback
1294
- setTimeout(() => {{
1295
- const hiddenBtn = document.getElementById('hidden-generate-btn')?.querySelector('button') ||
1296
- document.querySelector('#hidden-generate-btn button') ||
1297
- document.querySelector('button[data-testid*="hidden-generate"]');
1298
- if (hiddenBtn) {{
1299
- console.log('Clicking hidden generate button');
1300
- hiddenBtn.click();
1301
- }} else {{
1302
- console.warn('Hidden generate button not found');
1303
- }}
1304
- }}, 300);
1305
-
1306
- // Reset generating state
1307
- setTimeout(() => {{
1308
- isGenerating = false;
1309
- hoverControls.style.opacity = '1';
1310
- console.log('Generation timeout ended, re-enabling controls');
1311
- }}, 5000);
1312
- }}
1313
- }});
1314
-
1315
- // Hover effects for individual buttons
1316
- container.addEventListener('mouseover', function(e) {{
1317
- if (e.target.classList.contains('hover-arrow-btn') && !isGenerating) {{
1318
- e.target.style.transform += ' scale(1.1)';
1319
- }}
1320
- }});
1321
-
1322
- container.addEventListener('mouseout', function(e) {{
1323
- if (e.target.classList.contains('hover-arrow-btn')) {{
1324
- e.target.style.transform = e.target.style.transform.replace(' scale(1.1)', '');
1325
- }}
1326
- }});
1327
-
1328
- // Force arrows to be visible immediately for testing
1329
- hoverControls.style.opacity = '0.8';
1330
- console.log('Arrows should now be visible with opacity:', hoverControls.style.opacity);
1331
-
1332
- // Initial display update
1333
- updateDisplay();
1334
-
1335
- console.log('JavaScript initialization completed');
1336
- }}, 100);
1337
- </script>
1338
- """)
1339
 
1340
- # Auto-update dimensions and show image immediately when uploaded
1341
- image.upload(
1342
- fn=update_dimensions_on_upload,
1343
- inputs=[image],
1344
- outputs=[width, height]
1345
- )
1346
 
 
1347
  image.upload(
1348
- fn=show_uploaded_image_immediately,
1349
  inputs=[image],
1350
  outputs=[result_display]
1351
  )
1352
 
1353
- # Auto-generation handler triggered by input changes
1354
- def auto_generate_on_change(js_az, js_el, js_dist, input_image, seed_val, randomize_seed_val, guidance_val, steps_val, h_val, w_val):
1355
- """Auto-generate when camera parameters change from arrow clicks."""
1356
- if input_image is None:
1357
- return gr.update(), seed_val, "<sks> front view eye-level shot medium shot"
1358
-
1359
- return generate_new_view(input_image, js_az, js_el, js_dist, seed_val, randomize_seed_val, guidance_val, steps_val, h_val, w_val)
1360
-
1361
- # Set up auto-generation on parameter changes (keep this as fallback)
1362
- for input_component in [js_azimuth, js_elevation, js_distance]:
1363
- input_component.change(
1364
- fn=auto_generate_on_change,
1365
- inputs=[js_azimuth, js_elevation, js_distance, image, seed, randomize_seed, guidance_scale, num_inference_steps, height, width],
1366
- outputs=[result_display, seed, prompt_display]
1367
  )
1368
 
1369
- # Set up hidden button click for direct generation triggering
1370
- hidden_generate_btn.click(
1371
- fn=auto_generate_on_change,
1372
- inputs=[js_azimuth, js_elevation, js_distance, image, seed, randomize_seed, guidance_scale, num_inference_steps, height, width],
1373
- outputs=[result_display, seed, prompt_display]
1374
- )
1375
 
1376
  if __name__ == "__main__":
1377
- demo.launch(debug=True, show_error=True)
 
 
 
 
 
1
  import gradio as gr
 
 
 
2
  import base64
3
  from io import BytesIO
4
  from PIL import Image
5
+ import torch
6
+ from diffusers import StableDiffusionXLPipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ # Initialize the model (placeholder - replace with your actual model)
9
+ def init_model():
10
+ """Initialize the Qwen model with camera control LoRAs"""
11
+ # This is a placeholder - replace with actual model initialization
12
+ try:
13
+ # pipe = StableDiffusionXLPipeline.from_pretrained(
14
+ # "Qwen/Qwen2-VL-7B-Instruct",
15
+ # torch_dtype=torch.float16,
16
+ # device_map="auto"
17
+ # )
18
+ # return pipe
19
+ return None # Placeholder
20
+ except:
21
+ return None
22
+
23
+ # Camera parameter mappings
24
  AZIMUTH_MAP = {
25
+ 0: "front view", 45: "front-right quarter view", 90: "right side view",
26
+ 135: "back-right quarter view", 180: "back view", 225: "back-left quarter view",
27
+ 270: "left side view", 315: "front-left quarter view"
 
 
 
 
 
28
  }
 
29
  ELEVATION_MAP = {
30
+ -30: "low-angle shot", 0: "eye-level shot", 30: "elevated shot", 60: "high-angle shot"
 
 
 
31
  }
 
32
  DISTANCE_MAP = {
33
+ 0.6: "close-up", 1.0: "medium shot", 1.8: "wide shot"
 
 
34
  }
35
 
36
+ def snap_to_nearest(value, steps):
37
+ """Snap value to nearest step."""
38
+ return min(steps, key=lambda x: abs(x - value))
39
 
40
+ def build_camera_prompt(azimuth, elevation, distance):
41
+ """Build camera prompt from parameters."""
42
+ azimuth_steps = [0, 45, 90, 135, 180, 225, 270, 315]
43
+ elevation_steps = [-30, 0, 30, 60]
44
+ distance_steps = [0.6, 1.0, 1.8]
45
 
46
+ az_snap = snap_to_nearest(azimuth, azimuth_steps)
47
+ el_snap = snap_to_nearest(elevation, elevation_steps)
48
+ dist_snap = snap_to_nearest(distance, distance_steps)
49
 
50
+ az_name = AZIMUTH_MAP[az_snap]
51
+ el_name = ELEVATION_MAP[el_snap]
52
+ dist_name = DISTANCE_MAP[dist_snap]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
+ return f"<sks> {az_name} {el_name} {dist_name}"
55
+
56
+ def generate_new_view(input_image, azimuth, elevation, distance, seed, randomize_seed, guidance_scale, num_inference_steps, height, width):
57
+ """Generate new camera view (placeholder - replace with actual model inference)."""
58
+ if input_image is None:
59
+ return None, seed, build_camera_prompt(azimuth, elevation, distance)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
+ try:
62
+ # Placeholder implementation - replace with actual model inference
63
+ prompt = build_camera_prompt(azimuth, elevation, distance)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
+ # For now, just return the original image as a placeholder
66
+ # In real implementation, this would call your Qwen model with camera parameters
67
+ return input_image, seed, prompt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
+ except Exception as e:
70
+ print(f"Generation error: {e}")
71
+ return None, seed, build_camera_prompt(azimuth, elevation, distance)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
+ def create_camera_control_app():
74
+ """Create the working camera control app."""
 
 
75
 
76
+ with gr.Blocks(title="Camera Control with Directional Arrows - WORKING VERSION") as demo:
77
+ gr.Markdown("# 📸 Camera Control with Directional Arrows")
78
+ gr.Markdown("Upload an image and use arrows to control camera angles for 3D view generation")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
+ with gr.Row():
81
+ # Left column: Image upload and controls
82
+ with gr.Column(scale=1):
83
+ image = gr.Image(label="Upload Image", type="pil", height=400)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
+ # Camera parameter inputs (visible for debugging)
86
+ js_azimuth = gr.Textbox("0", visible=True, elem_id="js-azimuth", label="Azimuth")
87
+ js_elevation = gr.Textbox("0", visible=True, elem_id="js-elevation", label="Elevation")
88
+ js_distance = gr.Textbox("1.0", visible=True, elem_id="js-distance", label="Distance")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
+ # Generation settings
91
+ with gr.Accordion("⚙️ Generation Settings", open=False):
92
+ seed = gr.Slider(minimum=0, maximum=2147483647, step=1, value=42, label="Seed")
93
+ randomize_seed = gr.Checkbox(True, label="Randomize seed")
94
+ guidance_scale = gr.Slider(minimum=1, maximum=20, step=0.1, value=7.5, label="Guidance scale")
95
+ num_inference_steps = gr.Slider(minimum=10, maximum=50, step=1, value=25, label="Number of inference steps")
96
+ height = gr.Slider(minimum=256, maximum=1024, step=64, value=1024, label="Height")
97
+ width = gr.Slider(minimum=256, maximum=1024, step=64, value=1024, label="Width")
98
+
99
+ prompt_display = gr.Textbox(
100
+ label="Current Camera Prompt",
101
+ value="<sks> front view eye-level shot medium shot",
102
+ interactive=False
103
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
+ # Right column: Interactive image view
106
+ with gr.Column(scale=1):
107
+ gr.Markdown("### 🎯 Interactive Image View")
108
+ gr.Markdown("*Upload an image, then hover to see controls and click arrows to generate new views*")
 
 
 
 
 
 
 
109
 
110
+ # Interactive HTML component using working pattern
111
+ result_display = gr.HTML(
112
+ value="""
113
+ <div style="width: 100%; height: 500px; background: #f8f8f8; border: 2px solid #e0e0e0; border-radius: 12px;
114
+ position: relative; display: flex; align-items: center; justify-content: center;">
115
+ <div style="text-align: center; color: #999;">
116
+ <div style="font-size: 48px; margin-bottom: 10px;">📸</div>
117
+ <p>Upload an image on the left to begin</p>
118
+ <p>Then hover to see camera controls</p>
119
+ </div>
120
+ </div>
121
+ """,
122
+ elem_id="result-display"
123
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
+ # Debug output
126
+ debug_output = gr.Textbox(label="Debug Output", visible=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
+ # Functions for handling interactions
129
+ def show_uploaded_image_with_arrows(uploaded_image):
130
+ """Show uploaded image with working arrow controls."""
131
  if uploaded_image is None:
 
132
  return gr.update(value="""
133
+ <div style="width: 100%; height: 500px; background: #f8f8f8; border: 2px solid #e0e0e0; border-radius: 12px;
134
+ position: relative; display: flex; align-items: center; justify-content: center;">
135
+ <div style="text-align: center; color: #999;">
 
 
 
136
  <div style="font-size: 48px; margin-bottom: 10px;">📸</div>
137
  <p>Upload an image on the left to begin</p>
138
+ <p>Then hover to see camera controls</p>
139
  </div>
140
  </div>
141
  """)
142
 
143
+ # Convert to data URL
144
  buffered = BytesIO()
145
  uploaded_image.save(buffered, format="PNG")
146
  img_str = base64.b64encode(buffered.getvalue()).decode()
147
  data_url = f"data:image/png;base64,{img_str}"
148
 
149
+ # Return HTML with image and working arrow controls
150
  return gr.update(value=f"""
151
+ <div style="width: 100%; height: 500px; background: #f8f8f8; border: 2px solid #e0e0e0; border-radius: 12px;
152
+ position: relative; display: flex; align-items: center; justify-content: center;">
 
153
 
154
+ <!-- Uploaded image -->
155
+ <img src="{data_url}" style="max-width: 100%; max-height: 100%; object-fit: contain;">
156
 
157
+ <!-- Arrow controls with CSS hover effects and inline JavaScript -->
158
+ <div style="position: absolute; inset: 0; z-index: 10; opacity: 0; transition: opacity 0.3s ease;"
159
+ onmouseover="this.style.opacity='1'" onmouseout="this.style.opacity='0'">
 
160
 
161
+ <!-- Left arrow (Azimuth -45°) -->
162
+ <button onclick="
163
+ var az = parseInt(document.getElementById('js-azimuth').querySelector('input').value) - 45;
164
+ if (az < 0) az += 360;
165
+ document.getElementById('js-azimuth').querySelector('input').value = az;
166
+ document.getElementById('js-azimuth').querySelector('input').dispatchEvent(new Event('input', {{bubbles: true}}));
167
+ document.getElementById('status-az').textContent = az;
168
+ "
169
+ style="position: absolute; left: 20px; top: 50%; transform: translateY(-50%);
170
+ width: 60px; height: 60px; background: rgba(0,255,136,0.9); border: none;
171
+ border-radius: 50%; color: white; font-size: 24px; cursor: pointer; z-index: 11;
172
+ box-shadow: 0 4px 12px rgba(0,0,0,0.3); transition: transform 0.2s;"
173
+ onmouseover="this.style.transform += ' scale(1.1)'"
174
+ onmouseout="this.style.transform = this.style.transform.replace(' scale(1.1)', '')"
175
+ title="Rotate Left (Azimuth -45°)">
176
 
177
  </button>
178
 
179
+ <!-- Right arrow (Azimuth +45°) -->
180
+ <button onclick="
181
+ var az = (parseInt(document.getElementById('js-azimuth').querySelector('input').value) + 45) % 360;
182
+ document.getElementById('js-azimuth').querySelector('input').value = az;
183
+ document.getElementById('js-azimuth').querySelector('input').dispatchEvent(new Event('input', {{bubbles: true}}));
184
+ document.getElementById('status-az').textContent = az;
185
+ "
186
+ style="position: absolute; right: 20px; top: 50%; transform: translateY(-50%);
187
+ width: 60px; height: 60px; background: rgba(0,255,136,0.9); border: none;
188
+ border-radius: 50%; color: white; font-size: 24px; cursor: pointer; z-index: 11;
189
+ box-shadow: 0 4px 12px rgba(0,0,0,0.3); transition: transform 0.2s;"
190
+ onmouseover="this.style.transform += ' scale(1.1)'"
191
+ onmouseout="this.style.transform = this.style.transform.replace(' scale(1.1)', '')"
192
+ title="Rotate Right (Azimuth +45°)">
193
 
194
  </button>
195
 
196
+ <!-- Up arrow (Elevation +30°) -->
197
+ <button onclick="
198
+ var el = Math.min(60, parseInt(document.getElementById('js-elevation').querySelector('input').value) + 30);
199
+ document.getElementById('js-elevation').querySelector('input').value = el;
200
+ document.getElementById('js-elevation').querySelector('input').dispatchEvent(new Event('input', {{bubbles: true}}));
201
+ document.getElementById('status-el').textContent = el;
202
+ "
203
+ style="position: absolute; top: 20px; left: 50%; transform: translateX(-50%);
204
+ width: 60px; height: 60px; background: rgba(255,105,180,0.9); border: none;
205
+ border-radius: 50%; color: white; font-size: 24px; cursor: pointer; z-index: 11;
206
+ box-shadow: 0 4px 12px rgba(0,0,0,0.3); transition: transform 0.2s;"
207
+ onmouseover="this.style.transform += ' scale(1.1)'"
208
+ onmouseout="this.style.transform = this.style.transform.replace(' scale(1.1)', '')"
209
+ title="Look Up (Elevation +30°)">
210
 
211
  </button>
212
 
213
+ <!-- Down arrow (Elevation -30°) -->
214
+ <button onclick="
215
+ var el = Math.max(-30, parseInt(document.getElementById('js-elevation').querySelector('input').value) - 30);
216
+ document.getElementById('js-elevation').querySelector('input').value = el;
217
+ document.getElementById('js-elevation').querySelector('input').dispatchEvent(new Event('input', {{bubbles: true}}));
218
+ document.getElementById('status-el').textContent = el;
219
+ "
220
+ style="position: absolute; bottom: 80px; left: 50%; transform: translateX(-50%);
221
+ width: 60px; height: 60px; background: rgba(255,105,180,0.9); border: none;
222
+ border-radius: 50%; color: white; font-size: 24px; cursor: pointer; z-index: 11;
223
+ box-shadow: 0 4px 12px rgba(0,0,0,0.3); transition: transform 0.2s;"
224
+ onmouseover="this.style.transform += ' scale(1.1)'"
225
+ onmouseout="this.style.transform = this.style.transform.replace(' scale(1.1)', '')"
226
+ title="Look Down (Elevation -30°)">
227
 
228
  </button>
229
 
230
  <!-- Zoom controls -->
231
+ <div style="position: absolute; bottom: 20px; left: 50%; transform: translateX(-50%);
232
+ display: flex; gap: 15px; z-index: 11;">
233
+
234
+ <!-- Zoom out (Distance +0.4) -->
235
+ <button onclick="
236
+ var dist = Math.min(1.8, parseFloat(document.getElementById('js-distance').querySelector('input').value) + 0.4);
237
+ document.getElementById('js-distance').querySelector('input').value = dist.toFixed(1);
238
+ document.getElementById('js-distance').querySelector('input').dispatchEvent(new Event('input', {{bubbles: true}}));
239
+ document.getElementById('status-dist').textContent = dist.toFixed(1);
240
+ "
241
+ style="width: 55px; height: 55px; background: rgba(255,165,0,0.9); border: none;
242
+ border-radius: 50%; color: white; font-size: 24px; cursor: pointer;
243
+ box-shadow: 0 4px 12px rgba(0,0,0,0.3); transition: transform 0.2s;"
244
+ onmouseover="this.style.transform = 'scale(1.1)'"
245
+ onmouseout="this.style.transform = ''"
246
+ title="Zoom Out (Distance +0.4)">
247
 
248
  </button>
249
 
250
+ <!-- Zoom in (Distance -0.4) -->
251
+ <button onclick="
252
+ var dist = Math.max(0.6, parseFloat(document.getElementById('js-distance').querySelector('input').value) - 0.4);
253
+ document.getElementById('js-distance').querySelector('input').value = dist.toFixed(1);
254
+ document.getElementById('js-distance').querySelector('input').dispatchEvent(new Event('input', {{bubbles: true}}));
255
+ document.getElementById('status-dist').textContent = dist.toFixed(1);
256
+ "
257
+ style="width: 55px; height: 55px; background: rgba(255,165,0,0.9); border: none;
258
+ border-radius: 50%; color: white; font-size: 24px; cursor: pointer;
259
+ box-shadow: 0 4px 12px rgba(0,0,0,0.3); transition: transform 0.2s;"
260
+ onmouseover="this.style.transform = 'scale(1.1)'"
261
+ onmouseout="this.style.transform = ''"
262
+ title="Zoom In (Distance -0.4)">
263
  +
264
  </button>
265
  </div>
266
 
267
  <!-- Status display -->
268
+ <div style="position: absolute; top: 15px; right: 15px; background: rgba(0,0,0,0.85);
269
+ color: white; padding: 10px 14px; border-radius: 8px; font-family: monospace;
270
+ font-size: 13px; z-index: 11; box-shadow: 0 4px 12px rgba(0,0,0,0.4);">
271
+ <div>Az: <span id="status-az">0</span>° | El: <span id="status-el">0</span>° | Dist: <span id="status-dist">1.0</span></div>
 
 
 
272
  </div>
273
  </div>
274
  </div>
275
+ """)
276
+
277
+ def handle_parameter_change(az, el, dist, input_image):
278
+ """Handle camera parameter changes and trigger generation."""
279
+ try:
280
+ azimuth = float(az)
281
+ elevation = float(el)
282
+ distance = float(dist)
283
+
284
+ # Build new prompt
285
+ prompt = build_camera_prompt(azimuth, elevation, distance)
286
+
287
+ # Generate new image (placeholder)
288
+ if input_image is not None:
289
+ # For now, just return the original image
290
+ # In real implementation, call your model here
291
+ new_image = input_image # Placeholder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
 
293
+ # Convert to data URL for display
294
+ buffered = BytesIO()
295
+ new_image.save(buffered, format="PNG")
296
+ img_str = base64.b64encode(buffered.getvalue()).decode()
297
+ data_url = f"data:image/png;base64,{img_str}"
298
 
299
+ # Update HTML with new image (keeping the same arrow structure)
300
+ return show_uploaded_image_with_arrows(new_image).value, prompt, f"Generated view: Az={azimuth}°, El={elevation}°, Dist={distance}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
 
302
+ return gr.update(), prompt, f"Parameters updated: Az={azimuth}°, El={elevation}°, Dist={distance}"
 
 
303
 
304
+ except Exception as e:
305
+ return gr.update(), f"Error: {str(e)}", f"Error processing parameters: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
 
307
+ # Set up event handlers
 
 
 
 
 
308
 
309
+ # Image upload handler
310
  image.upload(
311
+ fn=show_uploaded_image_with_arrows,
312
  inputs=[image],
313
  outputs=[result_display]
314
  )
315
 
316
+ # Parameter change handlers (triggered by arrow clicks)
317
+ for param_input in [js_azimuth, js_elevation, js_distance]:
318
+ param_input.change(
319
+ fn=handle_parameter_change,
320
+ inputs=[js_azimuth, js_elevation, js_distance, image],
321
+ outputs=[result_display, prompt_display, debug_output]
 
 
 
 
 
 
 
 
322
  )
323
 
324
+ return demo
 
 
 
 
 
325
 
326
  if __name__ == "__main__":
327
+ demo = create_camera_control_app()
328
+ demo.launch()