Spaces:
Paused
Paused
| #!/usr/bin/env python3 | |
| """ | |
| Decode base64 file names inside the Chinese celeb dataset directory. | |
| Default target: /Users/chenchaoyun/Downloads/chinese_celeb_dataset. | |
| Use --root to override; --dry-run only prints the plan. | |
| """ | |
| import argparse | |
| import base64 | |
| from pathlib import Path | |
| import sys | |
| DEFAULT_ROOT = Path("/Users/chenchaoyun/Downloads/chinese_celeb_dataset") | |
| def _decode_basename(encoded: str) -> str: | |
| padding = "=" * ((4 - len(encoded) % 4) % 4) | |
| try: | |
| return base64.urlsafe_b64decode( | |
| (encoded + padding).encode("ascii")).decode("utf-8") | |
| except Exception: | |
| return encoded | |
| def rename_dataset(root: Path, dry_run: bool = False) -> int: | |
| if not root.exists(): | |
| print(f"Directory does not exist: {root}", file=sys.stderr) | |
| return 1 | |
| if not root.is_dir(): | |
| print(f"Not a directory: {root}", file=sys.stderr) | |
| return 1 | |
| renamed = 0 | |
| for file_path in sorted(root.rglob("*")): | |
| if not file_path.is_file(): | |
| continue | |
| decoded = _decode_basename(file_path.stem) | |
| if decoded == file_path.stem: | |
| continue | |
| new_path = file_path.with_name(f"{decoded}{file_path.suffix}") | |
| if new_path == file_path: | |
| continue | |
| # Append a counter if the decoded target already exists | |
| counter = 1 | |
| while new_path.exists() and new_path != file_path: | |
| new_path = file_path.with_name( | |
| f"{decoded}_{counter}{file_path.suffix}" | |
| ) | |
| counter += 1 | |
| print(f"{file_path} -> {new_path}") | |
| if dry_run: | |
| continue | |
| file_path.rename(new_path) | |
| renamed += 1 | |
| print(f"Renamed {renamed} files") | |
| return 0 | |
| def parse_args() -> argparse.Namespace: | |
| parser = argparse.ArgumentParser( | |
| description="Decode chinese_celeb_dataset file names") | |
| parser.add_argument( | |
| "--root", | |
| type=Path, | |
| default=DEFAULT_ROOT, | |
| help="Dataset root directory (default: %(default)s)", | |
| ) | |
| parser.add_argument( | |
| "--dry-run", | |
| action="store_true", | |
| help="Only print planned renames without applying them", | |
| ) | |
| return parser.parse_args() | |
| def main() -> int: | |
| args = parse_args() | |
| return rename_dataset(args.root.expanduser().resolve(), args.dry_run) | |
| if __name__ == "__main__": | |
| sys.exit(main()) | |