#!/usr/bin/env python3 """ Decode base64 file names inside the Chinese celeb dataset directory. Default target: /Users/chenchaoyun/Downloads/chinese_celeb_dataset. Use --root to override; --dry-run only prints the plan. """ import argparse import base64 from pathlib import Path import sys DEFAULT_ROOT = Path("/Users/chenchaoyun/Downloads/chinese_celeb_dataset") def _decode_basename(encoded: str) -> str: padding = "=" * ((4 - len(encoded) % 4) % 4) try: return base64.urlsafe_b64decode( (encoded + padding).encode("ascii")).decode("utf-8") except Exception: return encoded def rename_dataset(root: Path, dry_run: bool = False) -> int: if not root.exists(): print(f"Directory does not exist: {root}", file=sys.stderr) return 1 if not root.is_dir(): print(f"Not a directory: {root}", file=sys.stderr) return 1 renamed = 0 for file_path in sorted(root.rglob("*")): if not file_path.is_file(): continue decoded = _decode_basename(file_path.stem) if decoded == file_path.stem: continue new_path = file_path.with_name(f"{decoded}{file_path.suffix}") if new_path == file_path: continue # Append a counter if the decoded target already exists counter = 1 while new_path.exists() and new_path != file_path: new_path = file_path.with_name( f"{decoded}_{counter}{file_path.suffix}" ) counter += 1 print(f"{file_path} -> {new_path}") if dry_run: continue file_path.rename(new_path) renamed += 1 print(f"Renamed {renamed} files") return 0 def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( description="Decode chinese_celeb_dataset file names") parser.add_argument( "--root", type=Path, default=DEFAULT_ROOT, help="Dataset root directory (default: %(default)s)", ) parser.add_argument( "--dry-run", action="store_true", help="Only print planned renames without applying them", ) return parser.parse_args() def main() -> int: args = parse_args() return rename_dataset(args.root.expanduser().resolve(), args.dry_run) if __name__ == "__main__": sys.exit(main())