MarioDiffusion-MLM-regular0 / text_to_level_diffusion.py
schrum2's picture
Loading into root will supposedly make them easier to find
a09cfc1 verified
raw
history blame
8.59 kB
from interactive_generation import InteractiveGeneration
import torch
from level_dataset import visualize_samples, convert_to_level_format, positive_negative_caption_split
from caption_match import compare_captions, process_scene_segments
from create_ascii_captions import assign_caption
from util import extract_tileset
from sampler import scene_to_ascii
import argparse
import common_settings as common_settings
from sampler import SampleOutput
from pipeline_loader import get_pipeline
def parse_args():
parser = argparse.ArgumentParser(description="Generate levels using a trained diffusion model")
# Model and generation parameters
parser.add_argument("--model_path", type=str, required=True, help="Path to the trained diffusion model")
parser.add_argument("--tileset", default='..\TheVGLC\Super Mario Bros\smb.json', help="Descriptions of individual tile types")
#parser.add_argument("--describe_locations", action="store_true", default=False, help="Include location descriptions in the captions")
parser.add_argument("--describe_absence", action="store_true", default=False, help="Indicate when there are no occurrences of an item or structure")
parser.add_argument("--automatic_negative_captions", action="store_true", default=False, help="Automatically create negative captions for prompts so the user doesn't have to")
parser.add_argument(
"--game",
type=str,
default="Mario",
choices=["Mario", "LR"],
help="Which game to create a model for (affects sample style and tile count)"
)
return parser.parse_args()
class InteractiveLevelGeneration(InteractiveGeneration):
def __init__(self, args):
super().__init__(
{
"caption": str,
"width": int,
"negative_prompt": str,
"start_seed": int,
"end_seed": int,
"num_inference_steps": int,
"guidance_scale": float
},
default_parameters={
"width": width, #common_settings.MARIO_WIDTH,
"start_seed": 1,
"end_seed": 1, # Will be set to start_seed if blank
"num_inference_steps": common_settings.NUM_INFERENCE_STEPS,
"guidance_scale": common_settings.GUIDANCE_SCALE,
"caption": "",
"negative_prompt": ""
}
)
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.pipe = get_pipeline(args.model_path).to(self.device)
self.pipe.print_unet_architecture()
#self.pipe.save_unet_architecture_pdf(height, width)
if args.automatic_negative_captions or not self.pipe.supports_negative_prompt:
self.input_parameters.pop('negative_prompt', None)
self.default_parameters.pop('negative_prompt', None)
if args.automatic_negative_captions and not self.pipe.supports_negative_prompt:
raise ValueError("Automatic negative caption generation is not possible with a model that doesn't support it")
if args.tileset:
_, self.id_to_char, self.char_to_id, self.tile_descriptors = extract_tileset(args.tileset)
self.args = args
if self.args.game == "LR":
del self.input_parameters["width"]
print(f"Tileset in use: {self.args.tileset}")
def generate_image(self, param_values, generator, **extra_params):
if self.args.automatic_negative_captions:
pos, neg = positive_negative_caption_split(param_values["caption"], True)
param_values["negative_prompt"] = neg
images = self.pipe(
generator=generator,
**param_values
).images
# Convert to indices
sample_tensor = images[0].unsqueeze(0)
sample_indices = convert_to_level_format(sample_tensor)
# Add level data to the list
scene = sample_indices[0].tolist()
if self.args.game == "LR":
number_of_tiles = common_settings.LR_TILE_COUNT
scene = [[x % number_of_tiles for x in row] for row in scene]
# Assign a caption to the sceneof whichever game is being played
if self.args.game == "Mario":
actual_caption = assign_caption(scene, self.id_to_char, self.char_to_id, self.tile_descriptors, False, self.args.describe_absence)
level_width = common_settings.MARIO_WIDTH
elif self.args.game == "LR":
actual_caption = lr_assign_caption(scene, self.id_to_char, self.char_to_id, self.tile_descriptors, False, self.args.describe_absence)
level_width = common_settings.LR_WIDTH
else:
raise ValueError(f"Unknown game: {self.args.game}")
if args.game == "LR":
print(f"Describe resulting image: {actual_caption}")
lr_compare_score = lr_compare_captions(param_values.get("caption", ""), actual_caption)
print(f"Comparison score: {lr_compare_score}")
# Use the new function to process scene segments
average_score, segment_captions, segment_scores = lr_process_scene_segments(
scene=scene,
segment_width=level_width,
prompt=param_values.get("caption", ""),
id_to_char=self.id_to_char,
char_to_id=self.char_to_id,
tile_descriptors=self.tile_descriptors,
describe_locations=False, #self.args.describe_locations,
describe_absence=self.args.describe_absence,
verbose=True
)
elif args.game == "Mario":
compare_score = compare_captions(param_values.get("caption", ""), actual_caption)
print(f"Comparison score: {compare_score}")
# Use the new function to process scene segments
average_score, segment_captions, segment_scores = process_scene_segments(
scene=scene,
segment_width=level_width,
prompt=param_values.get("caption", ""),
id_to_char=self.id_to_char,
char_to_id=self.char_to_id,
tile_descriptors=self.tile_descriptors,
describe_locations=False, #self.args.describe_locations,
describe_absence=self.args.describe_absence,
verbose=True
)
# Ask if user wants to play level
play_level = input("Do you want to play this level? (y/n): ").strip().lower()
if play_level == 'y':
print("Playing level...")
char_grid = scene_to_ascii(scene, self.id_to_char, False)
level = SampleOutput(level=char_grid, use_snes_graphics=False)
console_output = level.run_astar()
print(console_output)
elif play_level == 'n':
print("Level not played.")
else:
raise ValueError(f"Unknown input: {play_level}")
return visualize_samples(images)
def get_extra_params(self, param_values):
if "negative_prompt" in param_values and param_values["negative_prompt"] == "":
del param_values["negative_prompt"]
if param_values["caption"] == "":
del param_values["caption"]
param_values["output_type"] = "tensor"
# Lode Runner
if self.args.game == "LR":
param_values["height"] = common_settings.LR_HEIGHT
param_values["width"] = common_settings.LR_WIDTH
return dict()
if __name__ == "__main__":
args = parse_args()
if args.game == "Mario":
args.num_tiles = common_settings.MARIO_TILE_COUNT
height = common_settings.MARIO_HEIGHT
width = common_settings.MARIO_WIDTH
args.tile_size = common_settings.MARIO_TILE_PIXEL_DIM
args.tileset = '..\TheVGLC\Super Mario Bros\smb.json'
elif args.game == "LR":
args.num_tiles = common_settings.LR_TILE_COUNT
height = common_settings.LR_HEIGHT
width = common_settings.LR_WIDTH
args.tile_size = common_settings.LR_TILE_PIXEL_DIM
args.tileset = '..\TheVGLC\Lode Runner\Loderunner.json'
else:
raise ValueError(f"Unknown game: {args.game}")
ig = InteractiveLevelGeneration(args)
ig.start()