Deleting directories, moving files into root

Browse files

Files changed (3) hide show

captions/caption_match.py +0 -247
captions/evaluate_caption_order_tolerance.py +0 -288
captions/util.py +0 -216

captions/caption_match.py DELETED Viewed

@@ -1,247 +0,0 @@
-from create_ascii_captions import assign_caption
-# Quantity order for scoring partial matches
-QUANTITY_TERMS = ["one", "two", "a few", "several", "many"]
-# Topics to compare
-TOPIC_KEYWORDS = [
-    #"giant gap", # I think all gaps are subsumed by the floor topic
-    "floor", "ceiling",
-    "broken pipe", "upside down pipe", "pipe",
-    "coin line", "coin",
-    "platform", "tower", #"wall",
-    "broken cannon", "cannon",
-    "ascending staircase", "descending staircase",
-    "rectangular",
-    "irregular",
-    "question block", "loose block",
-    "enem"  # catch "enemy"/"enemies"
-]
-# Need list because the order matters
-KEYWORD_TO_NEGATED_PLURAL = [
-    (" broken pipe.", ""), # If not the first phrase
-    ("broken pipe. ", ""), # If the first phrase (after removing all others)
-    (" broken cannon.", ""), # If not the first phrase
-    ("broken cannon. ", ""), # If the first phrase (after removing all others)
-    ("pipe", "pipes"),
-    ("cannon", "cannons"),
-    ("platform", "platforms"),
-    ("tower", "towers"),
-    ("staircase", "staircases"),
-    ("enem", "enemies"),
-    ("rectangular", "rectangular block clusters"),
-    ("irregular", "irregular block clusters"),
-    ("coin line", "coin lines"),
-    ("coin.", "coins."), # Need period to avoid matching "coin line"
-    ("question block", "question blocks"),
-    ("loose block", "loose blocks")
-]
-BROKEN_TOPICS = 2 # Number of topics that are considered "broken" (e.g., "broken pipe", "broken cannon")
-# Plural normalization map (irregulars)
-PLURAL_EXCEPTIONS = {
-    "enemies": "enemy",
-}
-def normalize_plural(phrase):
-    # Normalize known irregular plurals
-    for plural, singular in PLURAL_EXCEPTIONS.items():
-        phrase = phrase.replace(plural, singular)
-    # Normalize regular plurals (basic "s" endings)
-    words = phrase.split()
-    normalized_words = []
-    for word in words:
-        if word.endswith('s') and not word.endswith('ss'):  # avoid "class", "boss"
-            singular = word[:-1]
-            normalized_words.append(singular)
-        else:
-            normalized_words.append(word)
-    return ' '.join(normalized_words)
-def extract_phrases(caption, debug=False):
-    phrases = [phrase.strip() for phrase in caption.split('.') if phrase.strip()]
-    topic_to_phrase = {}
-    already_matched_phrases = set()  # Track phrases that have been matched
-    for topic in TOPIC_KEYWORDS:
-        matching_phrases = []
-        for p in phrases:
-            # Only consider phrases that haven't been matched to longer topics
-            if topic in p and p not in already_matched_phrases:
-                matching_phrases.append(p)
-        if matching_phrases:
-            # Filter out "no ..." phrases as equivalent to absence
-            phrase = matching_phrases[0]
-            if phrase.lower().startswith("no "):
-                topic_to_phrase[topic] = None
-                if debug:
-                    print(f"[Extract] Topic '{topic}': detected 'no ...', treating as None")
-            else:
-                topic_to_phrase[topic] = phrase
-                already_matched_phrases.add(phrase)  # Mark this phrase as matched
-                if debug:
-                    print(f"[Extract] Topic '{topic}': found phrase '{phrase}'")
-        else:
-            topic_to_phrase[topic] = None
-            if debug:
-                print(f"[Extract] Topic '{topic}': no phrase found")
-    return topic_to_phrase
-def quantity_score(phrase1, phrase2, debug=False):
-    def find_quantity(phrase):
-        for term in QUANTITY_TERMS:
-            if term in phrase:
-                return term
-        return None
-    qty1 = find_quantity(phrase1)
-    qty2 = find_quantity(phrase2)
-    if debug:
-        print(f"[Quantity] Comparing quantities: '{qty1}' vs. '{qty2}'")
-    if qty1 and qty2:
-        idx1 = QUANTITY_TERMS.index(qty1)
-        idx2 = QUANTITY_TERMS.index(qty2)
-        diff = abs(idx1 - idx2)
-        max_diff = len(QUANTITY_TERMS) - 1
-        score = 1.0 - (diff / max_diff)
-        if debug:
-            print(f"[Quantity] Quantity indices: {idx1} vs. {idx2}, diff: {diff}, score: {score:.2f}")
-        return score
-    if debug:
-        print("[Quantity] At least one quantity missing, assigning partial score 0.1")
-    return 0.1
-def compare_captions(correct_caption, generated_caption, debug=False, return_matches=False):
-    correct_phrases = extract_phrases(correct_caption, debug=debug)
-    generated_phrases = extract_phrases(generated_caption, debug=debug)
-    total_score = 0.0
-    num_topics = len(TOPIC_KEYWORDS)
-    exact_matches = []
-    partial_matches = []
-    excess_phrases = []
-    if debug:
-        print("\n--- Starting Topic Comparison ---\n")
-    for topic in TOPIC_KEYWORDS:
-        correct = correct_phrases[topic]
-        generated = generated_phrases[topic]
-        if debug:
-            print(f"[Topic: {topic}] Correct: {correct} | Generated: {generated}")
-        if correct is None and generated is None:
-            total_score += 1.0
-            if debug:
-                print(f"[Topic: {topic}] Both None — full score: 1.0\n")
-        elif correct is None or generated is None:
-            total_score += -1.0
-            if generated is not None: # Considered an excess phrase
-                excess_phrases.append(generated)
-            if debug:
-                print(f"[Topic: {topic}] One is None — penalty: -1.0\n")
-        else:
-            # Normalize pluralization before comparison
-            norm_correct = normalize_plural(correct)
-            norm_generated = normalize_plural(generated)
-            if debug:
-                print(f"[Topic: {topic}] Normalized: Correct: '{norm_correct}' | Generated: '{norm_generated}'")
-            if norm_correct == norm_generated:
-                total_score += 1.0
-                exact_matches.append(generated)
-                if debug:
-                    print(f"[Topic: {topic}] Exact match — score: 1.0\n")
-            elif any(term in norm_correct for term in QUANTITY_TERMS) and any(term in norm_generated for term in QUANTITY_TERMS):
-                qty_score = quantity_score(norm_correct, norm_generated, debug=debug)
-                total_score += qty_score
-                partial_matches.append(generated)
-                if debug:
-                    print(f"[Topic: {topic}] Quantity-based partial score: {qty_score:.2f}\n")
-            else:
-                total_score += 0.1
-                partial_matches.append(generated)
-                if debug:
-                    print(f"[Topic: {topic}] Partial match (topic overlap) — score: 0.1\n")
-        if debug:
-            print(f"[Topic: {topic}] Current total score: {total_score:.4f}\n")
-    if debug:
-        print("total_score before normalization:", total_score)
-        print(f"Number of topics: {num_topics}")
-    final_score = total_score / num_topics
-    if debug:
-        print(f"--- Final score: {final_score:.4f} ---\n")
-    if return_matches:
-        return final_score, exact_matches, partial_matches, excess_phrases
-    return final_score
-def process_scene_segments(scene, segment_width, prompt, id_to_char, char_to_id, tile_descriptors, describe_locations, describe_absence, verbose=False):
-    """
-    Process a scene by partitioning it into segments, assigning captions, and computing comparison scores.
-    Args:
-        scene (list): The scene to process, represented as a 2D list.
-        segment_width (int): The width of each segment.
-        prompt (str): The prompt to compare captions against.
-        id_to_char (dict): Mapping from tile IDs to characters.
-        char_to_id (dict): Mapping from characters to tile IDs.
-        tile_descriptors (dict): Descriptions of individual tile types.
-        describe_locations (bool): Whether to include location descriptions in captions.
-        describe_absence (bool): Whether to indicate absence of items in captions.
-        verbose (bool): If True, print captions and scores for each segment.
-    Returns:
-        tuple: A tuple containing the average comparison score, captions for each segment, and scores for each segment.
-    """
-    # Partition the scene into segments of the specified width
-    segments = [
-        [row[i:i+segment_width] for row in scene]  # Properly slice each row of the scene
-        for i in range(0, len(scene[0]), segment_width)
-    ]
-    # Assign captions and compute scores for each segment
-    segment_scores = []
-    segment_captions = []
-    for idx, segment in enumerate(segments):
-        segment_caption = assign_caption(segment, id_to_char, char_to_id, tile_descriptors, describe_locations, describe_absence)
-        segment_score = compare_captions(prompt, segment_caption)
-        segment_scores.append(segment_score)
-        segment_captions.append(segment_caption)
-        if verbose:
-            print(f"Segment {idx + 1} caption: {segment_caption}")
-            print(f"Segment {idx + 1} comparison score: {segment_score}")
-    # Compute the average comparison score
-    average_score = sum(segment_scores) / len(segment_scores) if segment_scores else 0
-    if verbose:
-        print(f"Average comparison score across all segments: {average_score}")
-    return average_score, segment_captions, segment_scores
-if __name__ == '__main__':
-    ref = "floor with one gap. two enemies. one platform. one tower."
-    gen = "giant gap with one chunk of floor. two enemies. one platform. one tower."
-    score = compare_captions(ref, gen, debug=True)
-    print(f"Should be: {ref}")
-    print(f"  but was: {gen}")
-    print(f"Score: {score}")

captions/evaluate_caption_order_tolerance.py DELETED Viewed

@@ -1,288 +0,0 @@
-import argparse
-import itertools
-import os
-import random
-from collections import defaultdict
-import sys, os
-sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
-import util.common_settings as common_settings  # adjust import if needed
-from level_dataset import LevelDataset, visualize_samples, colors, mario_tiles  # adjust import if needed
-from torch.utils.data import DataLoader
-from evaluate_caption_adherence import calculate_caption_score_and_samples  # adjust import if needed
-import matplotlib.pyplot as plt
-import matplotlib
-import json
-from tqdm import tqdm
-import numpy as np
-import torch
-from tqdm import tqdm
-from captions.util import extract_tileset
-from models.pipeline_loader import get_pipeline
-def parse_args():
-    parser = argparse.ArgumentParser(description="Evaluate caption order tolerance for a diffusion model.")
-    parser.add_argument("--model_path", type=str, required=True, help="Path to the trained diffusion model")
-    parser.add_argument("--caption", type=str, required=False, default=None, help="Caption to evaluate, phrases separated by periods")
-    parser.add_argument("--tileset", type=str, help="Path to the tileset JSON file")
-    #parser.add_argument("--json", type=str, default="datasets\\Test_for_caption_order_tolerance.json", help="Path to dataset json file")
-    #parser.add_argument("--json", type=str, default="datasets\\SMB1_LevelsAndCaptions-regular-test.json", help="Path to dataset json file")
-    parser.add_argument("--json", type=str, default="datasets\\Mar1and2_LevelsAndCaptions-regular.json", help="Path to dataset json file")
-    #parser.add_argument("--trials", type=int, default=3, help="Number of times to evaluate each caption permutation")
-    parser.add_argument("--inference_steps", type=int, default=common_settings.NUM_INFERENCE_STEPS)
-    parser.add_argument("--guidance_scale", type=float, default=common_settings.GUIDANCE_SCALE)
-    parser.add_argument("--seed", type=int, default=42)
-    parser.add_argument("--game", type=str, choices=["Mario", "LR"], default="Mario", help="Game to evaluate (Mario or Lode Runner)")
-    parser.add_argument("--describe_absence", action="store_true", default=False, help="Indicate when there are no occurrences of an item or structure")
-    parser.add_argument("--save_as_json", action="store_true", help="Save generated levels as JSON")
-    parser.add_argument("--output_dir", type=str, default="visualizations", help="Output directory if not comparing checkpoints (subdir of model directory)")
-    parser.add_argument("--max_permutations", type=int, default=5, help="Maximum amount of permutations that can be made per caption")
-    return parser.parse_args()
-def setup_environment(seed):
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    random.seed(seed)
-    np.random.seed(seed)
-    torch.manual_seed(seed)
-    if torch.cuda.is_available():
-        torch.cuda.manual_seed_all(seed)
-    return device
-def load_captions_from_json(json_path):
-    with open(json_path, 'r', encoding='utf-8') as f:
-        data = json.load(f)
-    # If the JSON is a list of dicts with a "caption" key
-    captions = [entry["caption"] for entry in data if "caption" in entry]
-    return captions
-def creation_of_parameters(caption, max_permutations):
-    args = parse_args()
-    device = setup_environment(args.seed)
-    if args.game == "Mario":
-        num_tiles = common_settings.MARIO_TILE_COUNT
-        tileset = '..\TheVGLC\Super Mario Bros\smb.json'
-    elif args.game == "LR":
-        num_tiles = common_settings.LR_TILE_COUNT
-        tileset = '..\TheVGLC\Lode Runner\Loderunner.json'
-    else:
-        raise ValueError(f"Unknown game: {args.game}")
-    # Load pipeline
-    pipe = get_pipeline(args.model_path).to(device)
-    # Load tile metadata
-    tile_chars, id_to_char, char_to_id, tile_descriptors = extract_tileset(tileset)
-    perm_captions = []
-    if isinstance(caption, list):
-        # captions is a list of caption strings
-        phrases_per_caption = [
-            [p.strip() for p in cap.split('.') if p.strip()]
-            for cap in caption
-        ]
-        permutations = []
-        for phrases in phrases_per_caption:
-            perms = list(itertools.permutations(phrases))
-            if len(perms) > max_permutations:
-                perms = random.sample(perms, max_permutations)
-            permutations.append(perms)
-        perm_captions = ['.'.join(perm) + '.' for perms in permutations for perm in perms]
-    elif isinstance(caption, str):
-        # Split caption into phrases and get all permutations
-        phrase = [p.strip() for p in caption.split('.') if p.strip()]
-        permutations_cap = []
-        perms = list(itertools.permutations(phrase))
-        if len(perms) > max_permutations:
-            perms = random.sample(perms, max_permutations)
-        permutations_cap.append(perms)
-        perm_captions = ['.'.join(perm) + '.' for perms in permutations_cap for perm in perms]
-     # Create a list of dicts as expected by LevelDataset
-    caption_data = [{"scene": None, "caption": cap} for cap in perm_captions]
-    # Initialize dataset
-    dataset = LevelDataset(
-        data_as_list=caption_data,
-        shuffle=False,
-        mode="text",
-        augment=False,
-        num_tiles=common_settings.MARIO_TILE_COUNT,
-        negative_captions=False,
-        block_embeddings=None
-    )
-    # Create dataloader
-    dataloader = DataLoader(
-        dataset,
-        batch_size=min(16, len(perm_captions)),
-        shuffle=False,
-        num_workers=4,
-        drop_last=False,
-        persistent_workers=True
-    )
-    return pipe, device, id_to_char, char_to_id, tile_descriptors, num_tiles, dataloader, perm_captions, caption_data
-def statistics_of_captions(captions, dataloader, compare_all_scores, pipe=None, device=None, id_to_char=None, char_to_id=None, tile_descriptors=None, num_tiles=None):
-    """
-    Calculate statistics of the captions.
-    Returns average, standard deviation, minimum, maximum, and median of caption scores.
-    """
-    args = parse_args()
-    if not captions:
-        print("No captions found in the provided JSON file.")
-        return
-    print(f"\nLoaded {len(captions)} captions from {args.json}")
-    avg_score = np.mean(compare_all_scores)
-    std_dev_score = np.std(compare_all_scores)
-    min_score = np.min(compare_all_scores)
-    max_score = np.max(compare_all_scores)
-    median_score = np.median(compare_all_scores)
-    print("\n-----Scores for each caption permutation-----")
-    for i, score in enumerate(compare_all_scores):
-        print(f"Scores for caption {i + 1}:", score)
-    print("\n-----Statistics of captions-----")
-    print(f"Average score: {avg_score:.4f}")
-    print(f"Standard deviation: {std_dev_score:.4f}")
-    print(f"Minimum score: {min_score:.4f}")
-    print(f"Maximum score: {max_score:.4f}")
-    print(f"Median score: {median_score:.4f}")
-    return compare_all_scores, avg_score, std_dev_score, min_score, max_score, median_score
-def main():
-    args = parse_args()
-    if args.caption is None or args.caption == "":
-        caption = load_captions_from_json(args.json)
-    else:
-        caption = args.caption
-        #caption = ("many pipes. many coins. , many enemies. many blocks. , many platforms. many question blocks.").split(',')
-    all_scores = []
-    all_avg_scores = []
-    all_std_dev_scores = []
-    all_min_scores = []
-    all_max_scores = []
-    all_median_scores = []
-    all_captions =  [item.strip() for s in caption for item in s.split(",")]
-    one_caption = []
-    count = 0
-    output_jsonl_path = os.path.join(args.output_dir, "evaluation_caption_order_results.jsonl")
-    with open(output_jsonl_path, "w") as f:
-        for cap in all_captions:
-            one_caption = cap
-            # Initialize dataset
-            pipe, device, id_to_char, char_to_id, tile_descriptors, num_tiles, dataloader, perm_caption, caption_data = creation_of_parameters(one_caption, args.max_permutations)
-            if not pipe:
-                print("Failed to create pipeline.")
-                return
-            avg_score, all_samples, all_prompts, compare_all_scores = calculate_caption_score_and_samples(device, pipe, dataloader, args.inference_steps, args.guidance_scale, args.seed, id_to_char, char_to_id, tile_descriptors, args.describe_absence, output=True, height=common_settings.MARIO_HEIGHT, width=common_settings.MARIO_WIDTH)
-            scores, avg_score, std_dev_score, min_score, max_score, median_score = statistics_of_captions(perm_caption, dataloader, compare_all_scores, pipe, device, id_to_char, char_to_id, tile_descriptors, num_tiles)
-            if args.save_as_json:
-                result_entry = {
-                        "Caption": one_caption,
-                        "Average score for all permutations": avg_score,
-                        "Standard deviation": std_dev_score,
-                        "Minimum score": min_score,
-                        "Maximum score": max_score,
-                        "Median score": median_score
-                            #"samples": all_samples[i].tolist() if hasattr(all_samples, "__getitem__") else None,
-                            #"prompt": all_prompts[i] if i < len(all_prompts) else "N/A"
-                    }
-                f.write(json.dumps(result_entry) + "\n")
-            all_avg_scores.append(avg_score)
-            #scores, avg_score, std_dev_score, min_score, max_score, median_score = statistics_of_captions(perm_caption, dataloader, compare_all_scores, pipe, device, id_to_char, char_to_id, tile_descriptors, num_tiles)
-            for score in enumerate(scores):
-                all_scores.append(score)
-            all_std_dev_scores.append(std_dev_score)
-            all_min_scores.append(min_score)
-            all_max_scores.append(max_score)
-            all_median_scores.append(median_score)
-            if (count % 10) == 0:
-                f.flush()  # Ensure each result is written immediately
-                os.fsync(f.fileno())  # Ensure file is flushed to disk
-            count = count + 1
-    print(f"\nAverage score across all captions: {avg_score:.4f}")
-    visualizations_dir = os.path.join(os.path.dirname(__file__), "visualizations")
-    if args.caption is not None or "":
-        caption_folder = args.caption.replace(" ", "_").replace(".", "_")
-        output_directory = os.path.join(visualizations_dir, caption_folder)
-        visualize_samples(
-            all_samples,
-            output_dir=output_directory,
-            prompts=all_prompts[0] if all_prompts else "No prompts available"
-        )
-        print(f"\nVisualizations saved to: {output_directory}")
-    print("\nAll samples shape:", all_samples.shape)
-    print("\nAll prompts:", all_prompts)
-    all_avg_score = np.mean(all_avg_scores)
-    all_std_dev_score = np.std(all_std_dev_scores)
-    all_min_score = np.min(all_min_scores)
-    all_max_score = np.max(all_max_scores)
-    all_median_score = np.median(all_median_scores)
-    if args.save_as_json:
-        output_jsonl_path = os.path.join(args.output_dir, "evaluation_caption_order_results.jsonl")
-        with open(output_jsonl_path, "w") as f:
-            if isinstance(caption, list) or (args.caption is None or args.caption == ""):
-                # Multiple captions (permuted)
-                for i, score in enumerate(all_avg_scores):
-                    result_entry = {
-                        "Caption": caption[i] if i < len(caption) else "N/A",
-                        "Average score for all permutations": score,
-                        #"samples": all_samples[i].tolist() if hasattr(all_samples, "__getitem__") else None,
-                        #"prompt": all_prompts[i] if i < len(all_prompts) else "N/A"
-                    }
-                    f.write(json.dumps(result_entry) + "\n")
-            else:
-                # Single caption
-                result_entry = {
-                    "caption": caption,
-                    "avg_score": avg_score,
-                    "samples": all_samples.tolist(),
-                    "prompts": all_prompts
-                }
-                f.write(json.dumps(result_entry) + "\n")
-            results = {
-                "Scores of all captions": {
-                "Scores": all_scores,
-                    "Number of captions": len(all_scores),
-                    "Average of all permutations": all_avg_score,
-                    "Standard deviation of all permutations": all_std_dev_score,
-                    "Min score of all permutations": all_min_score,
-                    "Max score of all permutations": all_max_score,
-                    "Median score of all permutations": all_median_score
-                },
-            }
-            json.dump(results, f, indent=4)
-        print(f"Results saved to {output_jsonl_path}")
-if __name__ == "__main__":
-    main()

captions/util.py DELETED Viewed

@@ -1,216 +0,0 @@
-import json
-import sys
-import os
-from collections import Counter
-# This file contains utility functions for analyzing and describing levels in both Lode Runner and Super Mario Bros.
-# Could define these via the command line, but for now they are hardcoded
-coarse_locations = True
-coarse_counts = True
-pluralize = True
-give_staircase_lengths = False
-def describe_size(count):
-    if count <= 4: return "small"
-    else: return "big"
-def describe_quantity(count):
-    if count == 0: return "no"
-    elif count == 1: return "one"
-    elif count == 2: return "two"
-    elif count < 5: return "a few"
-    elif count < 10: return "several"
-    else: return "many"
-def get_tile_descriptors(tileset):
-    """Creates a mapping from tile character to its list of descriptors."""
-    result = {char: set(attrs) for char, attrs in tileset["tiles"].items()}
-    # Fake tiles. Should these contain anything? Note that code elsewhere expects everything to be passable or solid
-    result["!"] = {"passable"}
-    result["*"] = {"passable"}
-    return result
-def analyze_floor(scene, id_to_char, tile_descriptors, describe_absence):
-    """Analyzes the last row of the 32x32 scene and generates a floor description."""
-    WIDTH = len(scene[0])
-    last_row = scene[-1]  # The FLOOR row of the scene
-    solid_count = sum(
-        1 for tile in last_row
-        if tile in id_to_char and (
-            "solid" in tile_descriptors.get(id_to_char[tile], []) or
-            "diggable" in tile_descriptors.get(id_to_char[tile], [])
-        )
-    )
-    passable_count = sum(
-        1 for tile in last_row if "passable" in tile_descriptors.get(id_to_char[tile], [])
-    )
-    if solid_count == WIDTH:
-        return "full floor"
-    elif passable_count == WIDTH:
-        if describe_absence:
-            return "no floor"
-        else:
-            return ""
-    elif solid_count > passable_count:
-        # Count contiguous groups of passable tiles
-        gaps = 0
-        in_gap = False
-        for tile in last_row:
-            # Enemies are also a gap since they immediately fall into the gap
-            if "passable" in tile_descriptors.get(id_to_char[tile], []) or "enemy" in tile_descriptors.get(id_to_char[tile], []):
-                if not in_gap:
-                    gaps += 1
-                    in_gap = True
-            elif "solid" in tile_descriptors.get(id_to_char[tile], []):
-                in_gap = False
-            else:
-                print("error")
-                print(tile)
-                print(id_to_char[tile])
-                print(tile_descriptors)
-                print(tile_descriptors.get(id_to_char[tile], []))
-                raise ValueError("Every tile should be passable, solid, or enemy")
-        return f"floor with {describe_quantity(gaps) if coarse_counts else gaps} gap" + ("s" if pluralize and gaps != 1 else "")
-    else:
-        # Count contiguous groups of solid tiles
-        chunks = 0
-        in_chunk = False
-        for tile in last_row:
-            if "solid" in tile_descriptors.get(id_to_char[tile], []):
-                if not in_chunk:
-                    chunks += 1
-                    in_chunk = True
-            elif "passable" in tile_descriptors.get(id_to_char[tile], []) or "enemy" in tile_descriptors.get(id_to_char[tile], []):
-                in_chunk = False
-            else:
-                print("error")
-                print(tile)
-                print(tile_descriptors)
-                print(tile_descriptors.get(tile, []))
-                raise ValueError("Every tile should be either passable or solid")
-        return f"giant gap with {describe_quantity(chunks) if coarse_counts else chunks} chunk"+("s" if pluralize and chunks != 1 else "")+" of floor"
-def count_in_scene(scene, tiles, exclude=set()):
-    """ counts standalone tiles, unless they are in the exclude set """
-    count = 0
-    for r, row in enumerate(scene):
-        for c, t in enumerate(row):
-            #if exclude and t in tiles: print(r,c, exclude)
-            if (r,c) not in exclude and t in tiles:
-                #if exclude: print((r,t), exclude, (r,t) in exclude)
-                count += 1
-    #if exclude: print(tiles, exclude, count)
-    return count
-def count_caption_phrase(scene, tiles, name, names, offset = 0, describe_absence=False, exclude=set()):
-    """ offset modifies count used in caption """
-    count = offset + count_in_scene(scene, tiles, exclude)
-    #if name == "loose block": print("count", count)
-    if count > 0:
-        return f" {describe_quantity(count) if coarse_counts else count} " + (names if pluralize and count > 1 else name) + "."
-    elif describe_absence:
-        return f" no {names}."
-    else:
-        return ""
-def in_column(scene, x, tile):
-    for row in scene:
-        if row[x] == tile:
-            return True
-    return False
-def analyze_ceiling(scene, id_to_char, tile_descriptors, describe_absence, ceiling_row = 1):
-    """
-    Analyzes ceiling row (0-based index) to detect a ceiling.
-    Returns a caption phrase or an empty string if no ceiling is detected.
-    """
-    WIDTH = len(scene[0])
-    row = scene[ceiling_row]
-    solid_count = sum(1 for tile in row if "solid" in tile_descriptors.get(id_to_char[tile], []))
-    if solid_count == WIDTH:
-        return " full ceiling."
-    elif solid_count > WIDTH//2:
-        # Count contiguous gaps of passable tiles
-        gaps = 0
-        in_gap = False
-        for tile in row:
-            # Enemies are also a gap since they immediately fall into the gap, but they are marked as "moving" and not "passable"
-            if "passable" in tile_descriptors.get(id_to_char[tile], []) or "moving" in tile_descriptors.get(id_to_char[tile], []):
-                if not in_gap:
-                    gaps += 1
-                    in_gap = True
-            else:
-                in_gap = False
-        result = f" ceiling with {describe_quantity(gaps) if coarse_counts else gaps} gap" + ("s" if pluralize and gaps != 1 else "") + "."
-        # Adding the "moving" check should make this code unnecessary
-        #if result == ' ceiling with no gaps.':
-        #    print("This should not happen: ceiling with no gaps")
-        #    print("ceiling_row:", scene[ceiling_row])
-        #    result = " full ceiling."
-        return result
-    elif describe_absence:
-        return " no ceiling."
-    else:
-        return ""  # Not enough solid tiles for a ceiling
-def extract_tileset(tileset_path):
-    # Load tileset
-    with open(tileset_path, "r") as f:
-        tileset = json.load(f)
-        #print(f"tileset: {tileset}")
-        tile_chars = sorted(tileset['tiles'].keys())
-        # Wiggle room for the tileset to be a bit more flexible.
-        # However, this requires me to add some bogus tiles to the list.
-        # tile_chars.append('!')
-        # tile_chars.append('*')
-        #print(f"tile_chars: {tile_chars}")
-        id_to_char = {idx: char for idx, char in enumerate(tile_chars)}
-        #print(f"id_to_char: {id_to_char}")
-        char_to_id = {char: idx for idx, char in enumerate(tile_chars)}
-        #print(f"char_to_id: {char_to_id}")
-        tile_descriptors = get_tile_descriptors(tileset)
-        #print(f"tile_descriptors: {tile_descriptors}")
-    return tile_chars, id_to_char, char_to_id, tile_descriptors
-def flood_fill(scene, visited, start_row, start_col, id_to_char, tile_descriptors, excluded, pipes=False, target_descriptor=None):
-    stack = [(start_row, start_col)]
-    structure = []
-    while stack:
-        row, col = stack.pop()
-        if (row, col) in visited or (row, col) in excluded:
-            continue
-        tile = scene[row][col]
-        descriptors = tile_descriptors.get(id_to_char[tile], [])
-        # Use target_descriptor if provided, otherwise default to old solid/pipe logic
-        if target_descriptor is not None:
-            if target_descriptor not in descriptors:
-                continue
-        else:
-            if "solid" not in descriptors or (not pipes and "pipe" in descriptors) or (pipes and "pipe" not in descriptors):
-                continue
-        visited.add((row, col))
-        structure.append((row, col))
-        # Check neighbors
-        for d_row, d_col in [(-1,0), (1,0), (0,-1), (0,1)]:
-            # Weird special case for adjacent pipes
-            if (id_to_char[tile] == '>' or id_to_char[tile] == ']') and d_col == 1: # if on the right edge of a pipe
-                continue # Don't go right if on the right edge of a pipe
-            if (id_to_char[tile] == '<' or id_to_char[tile] == '[') and d_col == -1: # if on the left edge of a pipe
-                continue # Don't go left if on the left edge of a pipe
-            n_row, n_col = row + d_row, col + d_col
-            if 0 <= n_row < len(scene) and 0 <= n_col < len(scene[0]):
-                stack.append((n_row, n_col))
-    return structure