File size: 18,611 Bytes

a12c07f

import os
import sys
import subprocess
import argparse
import time
import json
from PIL import Image
import shutil
from torchvision import transforms


env = os.environ.copy()

## Focus on model swapping; make a default_BA.py (all BA-based structure) that can reproduce our results, also allow customzied system 
## 

def BlenderAlchemy_run(blender_file_path, start_script, start_render, goal_render, blender_render_script_path, task_instance_id, task, infinigen_installation_path, generator_type, evaluator_type, starter_time=None, tree_dims=(4, 8)):
    '''
    Generation and potentially selection process of the VLM system.

    Inputs:
        blender_file_path: file path to the .blend base file
        start_file_path: file path to the start.py, the script for start scene
        start_render_path: dir path to the rendered images of start scene
        goal_render: dir path to the rendered images of goal scene
        blender_render_script_path: file path to the render script of blender scene
        task: name of the task, like `geometry`, `placement`
        task_instance_id: f'{task}{i}', like `placement1`, `geometry2`
        infinigen_installation_path: file/dir path to infinigen blender executable file for background rendering

    Outputs:
        proposal_edits_paths: a list of file paths to proposal scripts from the VLM system 
        proposal_renders_paths: a dictionary with proposal_edits_paths as keys and paths to their respective renders as values 
        selected_edit_path[optional]: if applicable, the file path to the VLM-system-selected proposal script
        selected_render_path[optional]: if applicable, the dir path to the renders of the VLM-system-selected proposal script
    '''

    task_translate = {
            'geometry': 'geonodes',
            'material': 'material',
            'blendshape': 'shapekey',
            'placement': 'placement',
            'lighting': 'lighting'
    }

    task = task_translate[task]
    variants = ['tune_leap']

    # To automatically differentiate the inference results
    if starter_time:
        output_folder_name = f"outputs/outputs_{starter_time}"
    else:
        output_folder_name = "outputs/outputs_test"

    config_dict = {     # This should allow plug-in for different models
        'task':{'type': task},
        'credentials':{
            'openai': 'credentials/openai_api.txt',
            'claude': 'credentials/claude_api.txt',
            'gemini': 'credentials/gemini_api.txt',
        },
        'input':{
            'text_prompt': None,
            'input_image': f'{goal_render}/render1.png',
            'target_code': None,
        },
        'output':{
            'output_dir': f"{output_folder_name}/{task_instance_id}/"
        },
        'run_config':{
            'blender_command': infinigen_installation_path,
            'edit_style': "edit_code",
            'num_tries': 1,
            'enable_visual_imagination': False, 
            'enable_hypothesis_reversion': True,
            'variants': variants,
            'tree_dims': [
                f"{tree_dims[0]}x{tree_dims[1]}"
            ],
            'edit_generator_type': generator_type,
            'state_evaluator_type': evaluator_type,
            'max_concurrent_rendering_processes': 1,
            'max_concurrent_evaluation_requests': 1,
            'max_concurrent_generator_requests': 1
        }
    }
    import yaml
    config_file_path = os.path.abspath('temp.yml')

    with open(config_file_path, 'w') as file:
        yaml.dump(config_dict, file)

    command = f'''
        cd system && \

        python main.py \
            --starter_blend {blender_file_path} \
            --blender_base {blender_render_script_path} \
            --blender_script {start_script} \
            --config {config_file_path}
    '''

    print(f'config_dict: {config_dict}')
    print(f'command: {command}')

    subprocess.run(command, shell=True, env=env)

    proposal_edits_dir_path = f'system/{output_folder_name}/{task_instance_id}/instance0/{variants[0]}_d{tree_dims[0]}_b{tree_dims[1]}/scripts'
    proposal_renders_dir_path = f'system/{output_folder_name}/{task_instance_id}/instance0/{variants[0]}_d{tree_dims[0]}_b{tree_dims[1]}/renders'
    proposal_edits_paths = [os.path.join(proposal_edits_dir_path, edit_path) for edit_path in os.listdir(proposal_edits_dir_path)]
    proposal_renders_paths = [os.path.join(proposal_renders_dir_path, render_path) for render_path in os.listdir(proposal_renders_dir_path)]

    # TEST: Selectd edit for each iteration
    last_iter_info = f'system/{output_folder_name}/{task_instance_id}/instance0/{variants[0]}_d{tree_dims[0]}_b{tree_dims[1]}/thought_process/iteration_{tree_dims[0]-1}.json'
    with open(last_iter_info, 'r') as file:
        info = json.load(file)
    
    selected_edit_path = "system/" + info[-1]['winner_code']
    selected_render_path = "system/" + info[-1]['winner_image']

    return proposal_edits_paths, proposal_renders_paths, selected_edit_path, selected_render_path

def merge_images_in_directory(directory, saved_to_local=True, merge_dir_into_image=True):
    '''
    Merge all images in the given directory into a single image.
    '''
    # Get a list of image paths
    image_paths = [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith(('png', 'jpg', 'jpeg', 'webp'))]

    # Open images and get their sizes
    images = [Image.open(img) for img in image_paths]
    widths, heights = zip(*(i.size for i in images))

    # Calculate total size for the final image
    total_width = sum(widths)
    max_height = max(heights)

    # Create a new blank image with the calculated size
    if total_width != 0 and max_height != 0:
        new_image = Image.new('RGB', (total_width, max_height))
    else:
        new_image = None

    # Paste all images into the new image
    x_offset = 0
    for img in images:
        new_image.paste(img, (x_offset, 0))
        x_offset += img.width

    if saved_to_local:
        # Save the final image to local
        if not merge_dir_into_image:    # Preserve the dir, adding new image to the dir
            merged_image_path = os.path.join(directory, 'merged_image.png')
        else:   # Delete the dir, and save the merged image as the name of the dir
            shutil.rmtree(directory)
            merged_image_path = directory

        if new_image:
            new_image.save(merged_image_path)
            print(f"Merged image saved to {merged_image_path}")
        return new_image, merged_image_path
    else:
        return new_image, None



def blender_step(infinigen_installation_path, blender_file_path, blender_render_script_path, script_path, render_dir, merge_all_renders=False, replace_if_overlap=True, merge_dir_into_image=False):

    '''
    Generate a rendered image with given script_path at render_dir.

    Inputs:
        blender_file_path: file path to the .blend base file
        blender_render_script_path: file path to the render script of blender scene
        script_path: file path to the script we want to render
        render_dir: dir path to save the rendered images
        merge_all_renders[optional]: True will merge all images in render_dir
        replace_if_overlap[optional]: False will skip if the render_dir exists and is non-empty, and True will proceed replace every overlapping render 
        merge_dir_into_image[optional]: True will delete the render_dir and replace it with the merged image
    '''

    def is_directory_empty(directory_path):
        # Check if the directory exists and is indeed a directory
        if not os.path.isdir(directory_path):
            raise ValueError(f"{directory_path} is not a valid directory path.")
    
        # List the contents of the directory
        return len(os.listdir(directory_path)) == 0

    assert blender_file_path is not None and blender_render_script_path is not None

    if replace_if_overlap:  # Just overwrite the files
        os.makedirs(render_dir, exist_ok=True)
    else:   
        if os.path.isdir(render_dir) and not is_directory_empty(render_dir): # If such dir already exists and is non-empty, skip
            return None 

        os.makedirs(render_dir, exist_ok=True)

    print('blender_render_script_path: ', blender_render_script_path)
    print('script_path: ', script_path)
    print('render_dir: ', render_dir)

    # Enter the blender code
    command = [infinigen_installation_path, "--background", blender_file_path, 
                    "--python", blender_render_script_path, 
                    "--", script_path, render_dir]
    command = ' '.join(command)
    command_run = subprocess.run(command, shell=True, check=True)

    if is_directory_empty(render_dir):
        print(f"The following bpy script didn't run correctly in blender:{script_path}")
        return False
        # raise CodeExecutionException 
    else:
        if merge_all_renders:
            merge_images_in_directory(render_dir, saved_to_local=True, merge_dir_into_image=merge_dir_into_image)

    return True

import sys
import numpy as np
from PIL import Image

import torch
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
from transformers import CLIPProcessor, CLIPModel



def clip_similarity(image1, image2):
    """
    Compute the CLIP similarity between two PIL images.

    Args:
    image1 (PIL.Image): The first input image.
    image2 (PIL.Image): The second input image.

    Returns:
    float: The CLIP similarity between the two images.
    """
    if image1.size != image2.size:
        image2 = image2.resize(image1.size)

    # Load the CLIP model
    model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")

    # Load the CLIP processor
    processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

    # Preprocess the images
    images = [image1, image2]
    inputs = processor(images=images, return_tensors="pt")

    # Compute the features for the images
    with torch.no_grad():
        features = model.get_image_features(**inputs)

    # Compute the cosine similarity between the image features
    sim = torch.nn.functional.cosine_similarity(features[0], features[1], dim=-1)

    return sim.item()

def photometric_loss(image1:Image.Image, image2:Image.Image) -> float:
    """
    Compute the photometric loss between two PIL images.

    Args:
    image1 (PIL.Image): The first input image.
    image2 (PIL.Image): The second input image.

    Returns:
    float: The photometric loss between the two images.
    """

    if image1.size != image2.size:
        image2 = image2.resize(image1.size)
    
    # Convert images to numpy arrays
    img1_array = np.array(image1)[:, :, :3]
    img2_array = np.array(image2)[:, :, :3]

    # Normalize images to [0, 1]
    img1_norm = img1_array.astype(np.float32) / 255.0
    img2_norm = img2_array.astype(np.float32) / 255.0

    # Compute the squared difference between the normalized images
    diff = np.square(img1_norm - img2_norm)

    # Compute the mean squared error
    mse = np.mean(diff)
    return mse

    
def img2text_clip_similarity(image, text):
    """
    Compute the CLIP similarity between a PIL image and a text.

    Args:
    image (PIL.Image): The input image.
    text (str): The input text.

    Returns:
    float: The CLIP similarity between the image and the text.
    """
    
    # Load the CLIP model
    # model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
    model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")

    # Load the CLIP processor
    # processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
    processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")

    # Preprocess the image and text
    inputs = processor(text=text, images=image, return_tensors="pt")

    # Compute the features for the image and text
    with torch.no_grad():
        image_features = model.get_image_features(pixel_values=inputs.pixel_values)
        text_features = model.get_text_features(input_ids=inputs.input_ids)

    # Compute the cosine similarity between the image and text features
    sim = torch.nn.functional.cosine_similarity(image_features, text_features, dim=-1)

    return sim.item()

    
def img2img_clip_similarity(image1, image2):
    """
    Compute the CLIP similarity between two PIL images.

    Args:
    image1 (PIL.Image): The first input image.
    image2 (PIL.Image): The second input image.

    Returns:
    float: The CLIP similarity between the two images.
    """
    
    if image1.size != image2.size:
        image2 = image2.resize(image1.size)

    # Load the CLIP model
    model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")

    # Load the CLIP processor
    processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

    # # Preprocess the images
    images = [image1, image2]
    # images = torch.tensor(images, dtype=torch.float32)  # Explicit dtype
    # inputs = processor(images=images, return_tensors="pt")

    # Define a transform that converts PIL images to tensors
    transform = transforms.ToTensor()

    # Convert images
    images = [transform(image) for image in images]  # Converts each PIL image to a tensor

    # Stack into a batch (Assuming both images have the same size)
    images = torch.stack(images)  

    inputs = processor(images=images, return_tensors="pt")

    # Compute the features for the images
    with torch.no_grad():
        features = model.get_image_features(**inputs)

    # Compute the cosine similarity between the image features
    sim = torch.nn.functional.cosine_similarity(features[0], features[1], dim=-1)

    return sim.item()

def tree_dim_parse(tree_dims):
    try:
        depth, breadth = tree_dims.split('x')
        return (int(depth), int(breadth))
    except:
        raise ValueError('The tree_dims input format is not correct! Please make sure you enter something like "dxb". ')

# class VLMSystem():
#     def __init__(self) -> None:
#         self.env = os.environ.copy()

#     def run(self, blender_file_path, start_script, start_render, goal_render, blender_render_script_path, task_instance_id, task, infinigen_installation_path):
#         '''
#         Generation and potentially selection process of the VLM system.

#         Inputs:
#             blender_file_path: file path to the .blend base file
#             start_file_path: file path to the start.py, the script for start scene
#             start_render_path: dir path to the rendered images of start scene
#             goal_render: dir path to the rendered images of goal scene
#             blender_render_script_path: file path to the render script of blender scene
#             task: name of the task, like `geometry`, `placement`
#             task_instance_id: f'{task}{i}', like `placement1`, `geometry2`
#             infinigen_installation_path: file/dir path to infinigen blender executable file for background rendering

#         Outputs:
#             proposal_edits_paths: a list of file paths to proposal scripts from the VLM system 
#             proposal_renders_paths: a dictionary with proposal_edits_paths as keys and paths to their respective renders as values 
#             selected_edit_path[optional]: if applicable, the file path to the VLM-system-selected proposal script
#             selected_render_path[optional]: if applicable, the dir path to the renders of the VLM-system-selected proposal script
#         '''

#         task_translate = {
#                 'geometry': 'geonodes',
#                 'material': 'material',
#                 'blendshape': 'shapekey',
#                 'placement': 'placement',
#                 'lighting': 'lighting'
#         }

#         task = task_translate[task]

#         config_dict = {
#             'task':{'type': task},
#             'credentials':{
#                 'openai': '/home/richard/Documents/system/openai_api.txt'
#             },
#             'input':{
#                 'text_prompt': None,
#                 'input_image': f'{start_render}/render1.png',
#                 'target_code': None,
#             },
#             'output':{
#                 'output_dir': f"output/{task_instance_id}/"
#             },
#             'run_config':{
#                 'blender_command': infinigen_installation_path,
#                 'edit_style': "rewrite_code",
#                 'num_tries': 1,
#                 'enable_visual_imagination': False, 
#                 'enable_hypothesis_reversion': True,
#                 'variants': [
#                     "tune"
#                 ],
#                 'tree_dims': [
#                     "2x2"
#                 ],
#                 'edit_generator_type': "GPT4V",
#                 'state_evaluator_type': "GPT4V",
#                 'max_concurrent_rendering_processes': 4,
#                 'max_concurrent_evaluation_requests': 2,
#                 'max_concurrent_generator_requests': 4
#             }
#         }
#         import yaml
#         config_file_path = '/home/richard/Documents/blendergym_test/temp.yml'

#         with open(config_file_path, 'w') as file:
#             yaml.dump(config_dict, file)

#         command = f'''
#             cd /home/richard/Documents/system && \

#             python main.py \
#                 --starter_blend {blender_file_path} \
#                 --blender_base {blender_render_script_path} \
#                 --blender_script {start_script} \
#                 --config {config_file_path}
#         '''

#         # print(f'config_dict: {config_dict}')
#         # print(f'command: {command}')

#         # subprocess.run(command, shell=True, env=self.env)

#         proposal_edits_dir_path = f'/home/richard/Documents/system/output/{task_instance_id}/instance0/tune_d2_b2/scripts'
#         proposal_renders_dir_path = f'/home/richard/Documents/system/output/{task_instance_id}/instance0/tune_d2_b2/renders'
#         proposal_edits_paths = [os.path.join(proposal_edits_dir_path, edit_path) for edit_path in os.listdir(proposal_edits_dir_path)]
#         proposal_renders_paths = [os.path.join(proposal_renders_dir_path, render_path) for render_path in os.listdir(proposal_renders_dir_path)]
#         # selected_edit_path = '/home/richard/Documents/system/output/task_instance_id/instance0/tune_d2_b3/renders'
#         # selected_render_path = '/home/richard/Documents/system/output/task_instance_id/instance0/tune_d2_b3/renders'
#         return proposal_edits_paths, proposal_renders_paths, None, None