File size: 18,611 Bytes
a12c07f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
import os
import sys
import subprocess
import argparse
import time
import json
from PIL import Image
import shutil
from torchvision import transforms


env = os.environ.copy()

## Focus on model swapping; make a default_BA.py (all BA-based structure) that can reproduce our results, also allow customzied system 
## 

def BlenderAlchemy_run(blender_file_path, start_script, start_render, goal_render, blender_render_script_path, task_instance_id, task, infinigen_installation_path, generator_type, evaluator_type, starter_time=None, tree_dims=(4, 8)):
    '''
    Generation and potentially selection process of the VLM system.

    Inputs:
        blender_file_path: file path to the .blend base file
        start_file_path: file path to the start.py, the script for start scene
        start_render_path: dir path to the rendered images of start scene
        goal_render: dir path to the rendered images of goal scene
        blender_render_script_path: file path to the render script of blender scene
        task: name of the task, like `geometry`, `placement`
        task_instance_id: f'{task}{i}', like `placement1`, `geometry2`
        infinigen_installation_path: file/dir path to infinigen blender executable file for background rendering

    Outputs:
        proposal_edits_paths: a list of file paths to proposal scripts from the VLM system 
        proposal_renders_paths: a dictionary with proposal_edits_paths as keys and paths to their respective renders as values 
        selected_edit_path[optional]: if applicable, the file path to the VLM-system-selected proposal script
        selected_render_path[optional]: if applicable, the dir path to the renders of the VLM-system-selected proposal script
    '''

    task_translate = {
            'geometry': 'geonodes',
            'material': 'material',
            'blendshape': 'shapekey',
            'placement': 'placement',
            'lighting': 'lighting'
    }

    task = task_translate[task]
    variants = ['tune_leap']

    # To automatically differentiate the inference results
    if starter_time:
        output_folder_name = f"outputs/outputs_{starter_time}"
    else:
        output_folder_name = "outputs/outputs_test"

    config_dict = {     # This should allow plug-in for different models
        'task':{'type': task},
        'credentials':{
            'openai': 'credentials/openai_api.txt',
            'claude': 'credentials/claude_api.txt',
            'gemini': 'credentials/gemini_api.txt',
        },
        'input':{
            'text_prompt': None,
            'input_image': f'{goal_render}/render1.png',
            'target_code': None,
        },
        'output':{
            'output_dir': f"{output_folder_name}/{task_instance_id}/"
        },
        'run_config':{
            'blender_command': infinigen_installation_path,
            'edit_style': "edit_code",
            'num_tries': 1,
            'enable_visual_imagination': False, 
            'enable_hypothesis_reversion': True,
            'variants': variants,
            'tree_dims': [
                f"{tree_dims[0]}x{tree_dims[1]}"
            ],
            'edit_generator_type': generator_type,
            'state_evaluator_type': evaluator_type,
            'max_concurrent_rendering_processes': 1,
            'max_concurrent_evaluation_requests': 1,
            'max_concurrent_generator_requests': 1
        }
    }
    import yaml
    config_file_path = os.path.abspath('temp.yml')

    with open(config_file_path, 'w') as file:
        yaml.dump(config_dict, file)

    command = f'''
        cd system && \

        python main.py \
            --starter_blend {blender_file_path} \
            --blender_base {blender_render_script_path} \
            --blender_script {start_script} \
            --config {config_file_path}
    '''

    print(f'config_dict: {config_dict}')
    print(f'command: {command}')

    subprocess.run(command, shell=True, env=env)

    proposal_edits_dir_path = f'system/{output_folder_name}/{task_instance_id}/instance0/{variants[0]}_d{tree_dims[0]}_b{tree_dims[1]}/scripts'
    proposal_renders_dir_path = f'system/{output_folder_name}/{task_instance_id}/instance0/{variants[0]}_d{tree_dims[0]}_b{tree_dims[1]}/renders'
    proposal_edits_paths = [os.path.join(proposal_edits_dir_path, edit_path) for edit_path in os.listdir(proposal_edits_dir_path)]
    proposal_renders_paths = [os.path.join(proposal_renders_dir_path, render_path) for render_path in os.listdir(proposal_renders_dir_path)]

    # TEST: Selectd edit for each iteration
    last_iter_info = f'system/{output_folder_name}/{task_instance_id}/instance0/{variants[0]}_d{tree_dims[0]}_b{tree_dims[1]}/thought_process/iteration_{tree_dims[0]-1}.json'
    with open(last_iter_info, 'r') as file:
        info = json.load(file)
    
    selected_edit_path = "system/" + info[-1]['winner_code']
    selected_render_path = "system/" + info[-1]['winner_image']

    return proposal_edits_paths, proposal_renders_paths, selected_edit_path, selected_render_path

def merge_images_in_directory(directory, saved_to_local=True, merge_dir_into_image=True):
    '''
    Merge all images in the given directory into a single image.
    '''
    # Get a list of image paths
    image_paths = [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith(('png', 'jpg', 'jpeg', 'webp'))]

    # Open images and get their sizes
    images = [Image.open(img) for img in image_paths]
    widths, heights = zip(*(i.size for i in images))

    # Calculate total size for the final image
    total_width = sum(widths)
    max_height = max(heights)

    # Create a new blank image with the calculated size
    if total_width != 0 and max_height != 0:
        new_image = Image.new('RGB', (total_width, max_height))
    else:
        new_image = None

    # Paste all images into the new image
    x_offset = 0
    for img in images:
        new_image.paste(img, (x_offset, 0))
        x_offset += img.width

    if saved_to_local:
        # Save the final image to local
        if not merge_dir_into_image:    # Preserve the dir, adding new image to the dir
            merged_image_path = os.path.join(directory, 'merged_image.png')
        else:   # Delete the dir, and save the merged image as the name of the dir
            shutil.rmtree(directory)
            merged_image_path = directory

        if new_image:
            new_image.save(merged_image_path)
            print(f"Merged image saved to {merged_image_path}")
        return new_image, merged_image_path
    else:
        return new_image, None



def blender_step(infinigen_installation_path, blender_file_path, blender_render_script_path, script_path, render_dir, merge_all_renders=False, replace_if_overlap=True, merge_dir_into_image=False):

    '''
    Generate a rendered image with given script_path at render_dir.

    Inputs:
        blender_file_path: file path to the .blend base file
        blender_render_script_path: file path to the render script of blender scene
        script_path: file path to the script we want to render
        render_dir: dir path to save the rendered images
        merge_all_renders[optional]: True will merge all images in render_dir
        replace_if_overlap[optional]: False will skip if the render_dir exists and is non-empty, and True will proceed replace every overlapping render 
        merge_dir_into_image[optional]: True will delete the render_dir and replace it with the merged image
    '''

    def is_directory_empty(directory_path):
        # Check if the directory exists and is indeed a directory
        if not os.path.isdir(directory_path):
            raise ValueError(f"{directory_path} is not a valid directory path.")
    
        # List the contents of the directory
        return len(os.listdir(directory_path)) == 0

    assert blender_file_path is not None and blender_render_script_path is not None

    if replace_if_overlap:  # Just overwrite the files
        os.makedirs(render_dir, exist_ok=True)
    else:   
        if os.path.isdir(render_dir) and not is_directory_empty(render_dir): # If such dir already exists and is non-empty, skip
            return None 

        os.makedirs(render_dir, exist_ok=True)

    print('blender_render_script_path: ', blender_render_script_path)
    print('script_path: ', script_path)
    print('render_dir: ', render_dir)

    # Enter the blender code
    command = [infinigen_installation_path, "--background", blender_file_path, 
                    "--python", blender_render_script_path, 
                    "--", script_path, render_dir]
    command = ' '.join(command)
    command_run = subprocess.run(command, shell=True, check=True)

    if is_directory_empty(render_dir):
        print(f"The following bpy script didn't run correctly in blender:{script_path}")
        return False
        # raise CodeExecutionException 
    else:
        if merge_all_renders:
            merge_images_in_directory(render_dir, saved_to_local=True, merge_dir_into_image=merge_dir_into_image)

    return True

import sys
import numpy as np
from PIL import Image

import torch
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
from transformers import CLIPProcessor, CLIPModel



def clip_similarity(image1, image2):
    """
    Compute the CLIP similarity between two PIL images.

    Args:
    image1 (PIL.Image): The first input image.
    image2 (PIL.Image): The second input image.

    Returns:
    float: The CLIP similarity between the two images.
    """
    if image1.size != image2.size:
        image2 = image2.resize(image1.size)

    # Load the CLIP model
    model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")

    # Load the CLIP processor
    processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

    # Preprocess the images
    images = [image1, image2]
    inputs = processor(images=images, return_tensors="pt")

    # Compute the features for the images
    with torch.no_grad():
        features = model.get_image_features(**inputs)

    # Compute the cosine similarity between the image features
    sim = torch.nn.functional.cosine_similarity(features[0], features[1], dim=-1)

    return sim.item()

def photometric_loss(image1:Image.Image, image2:Image.Image) -> float:
    """
    Compute the photometric loss between two PIL images.

    Args:
    image1 (PIL.Image): The first input image.
    image2 (PIL.Image): The second input image.

    Returns:
    float: The photometric loss between the two images.
    """

    if image1.size != image2.size:
        image2 = image2.resize(image1.size)
    
    # Convert images to numpy arrays
    img1_array = np.array(image1)[:, :, :3]
    img2_array = np.array(image2)[:, :, :3]

    # Normalize images to [0, 1]
    img1_norm = img1_array.astype(np.float32) / 255.0
    img2_norm = img2_array.astype(np.float32) / 255.0

    # Compute the squared difference between the normalized images
    diff = np.square(img1_norm - img2_norm)

    # Compute the mean squared error
    mse = np.mean(diff)
    return mse

    
def img2text_clip_similarity(image, text):
    """
    Compute the CLIP similarity between a PIL image and a text.

    Args:
    image (PIL.Image): The input image.
    text (str): The input text.

    Returns:
    float: The CLIP similarity between the image and the text.
    """
    
    # Load the CLIP model
    # model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
    model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")

    # Load the CLIP processor
    # processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
    processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")

    # Preprocess the image and text
    inputs = processor(text=text, images=image, return_tensors="pt")

    # Compute the features for the image and text
    with torch.no_grad():
        image_features = model.get_image_features(pixel_values=inputs.pixel_values)
        text_features = model.get_text_features(input_ids=inputs.input_ids)

    # Compute the cosine similarity between the image and text features
    sim = torch.nn.functional.cosine_similarity(image_features, text_features, dim=-1)

    return sim.item()

    
def img2img_clip_similarity(image1, image2):
    """
    Compute the CLIP similarity between two PIL images.

    Args:
    image1 (PIL.Image): The first input image.
    image2 (PIL.Image): The second input image.

    Returns:
    float: The CLIP similarity between the two images.
    """
    
    if image1.size != image2.size:
        image2 = image2.resize(image1.size)

    # Load the CLIP model
    model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")

    # Load the CLIP processor
    processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

    # # Preprocess the images
    images = [image1, image2]
    # images = torch.tensor(images, dtype=torch.float32)  # Explicit dtype
    # inputs = processor(images=images, return_tensors="pt")

    # Define a transform that converts PIL images to tensors
    transform = transforms.ToTensor()

    # Convert images
    images = [transform(image) for image in images]  # Converts each PIL image to a tensor

    # Stack into a batch (Assuming both images have the same size)
    images = torch.stack(images)  

    inputs = processor(images=images, return_tensors="pt")

    # Compute the features for the images
    with torch.no_grad():
        features = model.get_image_features(**inputs)

    # Compute the cosine similarity between the image features
    sim = torch.nn.functional.cosine_similarity(features[0], features[1], dim=-1)

    return sim.item()

def tree_dim_parse(tree_dims):
    try:
        depth, breadth = tree_dims.split('x')
        return (int(depth), int(breadth))
    except:
        raise ValueError('The tree_dims input format is not correct! Please make sure you enter something like "dxb". ')

# class VLMSystem():
#     def __init__(self) -> None:
#         self.env = os.environ.copy()

#     def run(self, blender_file_path, start_script, start_render, goal_render, blender_render_script_path, task_instance_id, task, infinigen_installation_path):
#         '''
#         Generation and potentially selection process of the VLM system.

#         Inputs:
#             blender_file_path: file path to the .blend base file
#             start_file_path: file path to the start.py, the script for start scene
#             start_render_path: dir path to the rendered images of start scene
#             goal_render: dir path to the rendered images of goal scene
#             blender_render_script_path: file path to the render script of blender scene
#             task: name of the task, like `geometry`, `placement`
#             task_instance_id: f'{task}{i}', like `placement1`, `geometry2`
#             infinigen_installation_path: file/dir path to infinigen blender executable file for background rendering

#         Outputs:
#             proposal_edits_paths: a list of file paths to proposal scripts from the VLM system 
#             proposal_renders_paths: a dictionary with proposal_edits_paths as keys and paths to their respective renders as values 
#             selected_edit_path[optional]: if applicable, the file path to the VLM-system-selected proposal script
#             selected_render_path[optional]: if applicable, the dir path to the renders of the VLM-system-selected proposal script
#         '''

#         task_translate = {
#                 'geometry': 'geonodes',
#                 'material': 'material',
#                 'blendshape': 'shapekey',
#                 'placement': 'placement',
#                 'lighting': 'lighting'
#         }

#         task = task_translate[task]

#         config_dict = {
#             'task':{'type': task},
#             'credentials':{
#                 'openai': '/home/richard/Documents/system/openai_api.txt'
#             },
#             'input':{
#                 'text_prompt': None,
#                 'input_image': f'{start_render}/render1.png',
#                 'target_code': None,
#             },
#             'output':{
#                 'output_dir': f"output/{task_instance_id}/"
#             },
#             'run_config':{
#                 'blender_command': infinigen_installation_path,
#                 'edit_style': "rewrite_code",
#                 'num_tries': 1,
#                 'enable_visual_imagination': False, 
#                 'enable_hypothesis_reversion': True,
#                 'variants': [
#                     "tune"
#                 ],
#                 'tree_dims': [
#                     "2x2"
#                 ],
#                 'edit_generator_type': "GPT4V",
#                 'state_evaluator_type': "GPT4V",
#                 'max_concurrent_rendering_processes': 4,
#                 'max_concurrent_evaluation_requests': 2,
#                 'max_concurrent_generator_requests': 4
#             }
#         }
#         import yaml
#         config_file_path = '/home/richard/Documents/blendergym_test/temp.yml'

#         with open(config_file_path, 'w') as file:
#             yaml.dump(config_dict, file)

#         command = f'''
#             cd /home/richard/Documents/system && \

#             python main.py \
#                 --starter_blend {blender_file_path} \
#                 --blender_base {blender_render_script_path} \
#                 --blender_script {start_script} \
#                 --config {config_file_path}
#         '''

#         # print(f'config_dict: {config_dict}')
#         # print(f'command: {command}')

#         # subprocess.run(command, shell=True, env=self.env)

#         proposal_edits_dir_path = f'/home/richard/Documents/system/output/{task_instance_id}/instance0/tune_d2_b2/scripts'
#         proposal_renders_dir_path = f'/home/richard/Documents/system/output/{task_instance_id}/instance0/tune_d2_b2/renders'
#         proposal_edits_paths = [os.path.join(proposal_edits_dir_path, edit_path) for edit_path in os.listdir(proposal_edits_dir_path)]
#         proposal_renders_paths = [os.path.join(proposal_renders_dir_path, render_path) for render_path in os.listdir(proposal_renders_dir_path)]
#         # selected_edit_path = '/home/richard/Documents/system/output/task_instance_id/instance0/tune_d2_b3/renders'
#         # selected_render_path = '/home/richard/Documents/system/output/task_instance_id/instance0/tune_d2_b3/renders'
#         return proposal_edits_paths, proposal_renders_paths, None, None