| | |
| | """ |
| | Decode base64 file names inside the Chinese celeb dataset directory. |
| | |
| | Default target: /Users/chenchaoyun/Downloads/chinese_celeb_dataset. |
| | Use --root to override; --dry-run only prints the plan. |
| | """ |
| | import argparse |
| | import base64 |
| | from pathlib import Path |
| | import sys |
| |
|
| | DEFAULT_ROOT = Path("/Users/chenchaoyun/Downloads/chinese_celeb_dataset") |
| |
|
| |
|
| | def _decode_basename(encoded: str) -> str: |
| | padding = "=" * ((4 - len(encoded) % 4) % 4) |
| | try: |
| | return base64.urlsafe_b64decode( |
| | (encoded + padding).encode("ascii")).decode("utf-8") |
| | except Exception: |
| | return encoded |
| |
|
| |
|
| | def rename_dataset(root: Path, dry_run: bool = False) -> int: |
| | if not root.exists(): |
| | print(f"Directory does not exist: {root}", file=sys.stderr) |
| | return 1 |
| | if not root.is_dir(): |
| | print(f"Not a directory: {root}", file=sys.stderr) |
| | return 1 |
| |
|
| | renamed = 0 |
| | for file_path in sorted(root.rglob("*")): |
| | if not file_path.is_file(): |
| | continue |
| | decoded = _decode_basename(file_path.stem) |
| | if decoded == file_path.stem: |
| | continue |
| |
|
| | new_path = file_path.with_name(f"{decoded}{file_path.suffix}") |
| | if new_path == file_path: |
| | continue |
| |
|
| | |
| | counter = 1 |
| | while new_path.exists() and new_path != file_path: |
| | new_path = file_path.with_name( |
| | f"{decoded}_{counter}{file_path.suffix}" |
| | ) |
| | counter += 1 |
| |
|
| | print(f"{file_path} -> {new_path}") |
| | if dry_run: |
| | continue |
| | file_path.rename(new_path) |
| | renamed += 1 |
| |
|
| | print(f"Renamed {renamed} files") |
| | return 0 |
| |
|
| |
|
| | def parse_args() -> argparse.Namespace: |
| | parser = argparse.ArgumentParser( |
| | description="Decode chinese_celeb_dataset file names") |
| | parser.add_argument( |
| | "--root", |
| | type=Path, |
| | default=DEFAULT_ROOT, |
| | help="Dataset root directory (default: %(default)s)", |
| | ) |
| | parser.add_argument( |
| | "--dry-run", |
| | action="store_true", |
| | help="Only print planned renames without applying them", |
| | ) |
| | return parser.parse_args() |
| |
|
| |
|
| | def main() -> int: |
| | args = parse_args() |
| | return rename_dataset(args.root.expanduser().resolve(), args.dry_run) |
| |
|
| |
|
| | if __name__ == "__main__": |
| | sys.exit(main()) |
| |
|