from utils.poster_eval_utils import get_poster_text from pathlib import Path import argparse import os def write_poster_texts(root_folder: str | Path) -> dict: """ For each immediate subfolder of `root_folder`, look for a file named `poster.png`. If found, call `get_poster_text(path)` and save the returned text to `poster_text.md` in that same subfolder. Returns a summary dict with counts and any errors. """ root = Path(root_folder) processed = 0 missing = 0 errors: list[tuple[Path, str]] = [] for subdir in root.iterdir(): if not subdir.is_dir(): continue if os.path.exists(subdir / "poster_text.md"): print(f"Skipping {subdir.name} as poster_text.md already exists.") continue print(f"Processing {subdir.name}...") poster_path = subdir / "poster.png" if not poster_path.exists(): print(f"Missing poster.png in {subdir.name}.") missing += 1 continue try: text = get_poster_text(poster_path, False) # assumes this function is available out_path = subdir / "poster_text.md" # Ensure we always write UTF-8 with a trailing newline. Path(out_path).write_text((text or "").rstrip() + "\n", encoding="utf-8") processed += 1 except Exception as e: # keep going even if one folder fails errors.append((poster_path, str(e))) return { "processed": processed, "missing_poster_png": missing, "errors": errors, } if __name__ == "__main__": parser = argparse.ArgumentParser(description="Extract poster texts from images.") parser.add_argument("--root_folder", type=str, help="Root folder containing subfolders with posters.") args = parser.parse_args() result = write_poster_texts(args.root_folder) print(f"Processed {result['processed']} posters.") print(f"Missing poster.png files: {result['missing_poster_png']}") if result['errors']: print("Errors encountered:") for path, error in result['errors']: print(f" {path}: {error}")