File size: 2,165 Bytes
7c08dc3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from utils.poster_eval_utils import get_poster_text

from pathlib import Path
import argparse
import os

def write_poster_texts(root_folder: str | Path) -> dict:
    """
    For each immediate subfolder of `root_folder`, look for a file named `poster.png`.
    If found, call `get_poster_text(path)` and save the returned text to `poster_text.md`
    in that same subfolder.

    Returns a summary dict with counts and any errors.
    """
    root = Path(root_folder)
    processed = 0
    missing = 0
    errors: list[tuple[Path, str]] = []

    for subdir in root.iterdir():
        if not subdir.is_dir():
            continue
        if os.path.exists(subdir / "poster_text.md"):
            print(f"Skipping {subdir.name} as poster_text.md already exists.")
            continue
        print(f"Processing {subdir.name}...")

        poster_path = subdir / "poster.png"
        if not poster_path.exists():
            print(f"Missing poster.png in {subdir.name}.")
            missing += 1
            continue

        try:
            text = get_poster_text(poster_path, False)  # assumes this function is available
            out_path = subdir / "poster_text.md"
            # Ensure we always write UTF-8 with a trailing newline.
            Path(out_path).write_text((text or "").rstrip() + "\n", encoding="utf-8")
            processed += 1
        except Exception as e:  # keep going even if one folder fails
            errors.append((poster_path, str(e)))

    return {
        "processed": processed,
        "missing_poster_png": missing,
        "errors": errors,
    }

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Extract poster texts from images.")
    parser.add_argument("--root_folder", type=str, help="Root folder containing subfolders with posters.")
    args = parser.parse_args()

    result = write_poster_texts(args.root_folder)
    print(f"Processed {result['processed']} posters.")
    print(f"Missing poster.png files: {result['missing_poster_png']}")
    if result['errors']:
        print("Errors encountered:")
        for path, error in result['errors']:
            print(f"  {path}: {error}")