import os import argparse from collections import defaultdict from src.logger_config import logger from src.google_src.gcloud_wrapper import get_default_wrapper from src.config import get_config_value def get_bucket_stats( account_name: str = "final_data" ): """ List GCS buckets and provide statistics (file counts, subfolders). Iterates through ALL available buckets for the account. """ try: wrapper = get_default_wrapper() client = wrapper.get_storage_client(account_name) logger.info(f"Fetching GCS Stats for Account: {account_name}") try: buckets = list(client.list_buckets()) except Exception as e: logger.error(f"Failed to list buckets: {e}") return if not buckets: logger.info("No buckets found.") return logger.info(f"Found {len(buckets)} buckets. Analyzing...") for bucket_resource in buckets: bucket_name = bucket_resource.name logger.info(f"Bucket: {bucket_name}") try: bucket = client.bucket(bucket_name) blobs = list(client.list_blobs(bucket)) total_files = len(blobs) total_size = sum(b.size for b in blobs if b.size) logger.info(f" Files: {total_files}, Size: {total_size / (1024*1024):.2f} MB") folder_stats = defaultdict(int) for blob in blobs: name = blob.name if name.endswith('/'): continue folder = os.path.dirname(name) or "(root)" folder_stats[folder] += 1 sorted_folders = sorted(folder_stats.items()) if sorted_folders: for folder, count in sorted_folders: logger.info(f" {folder}: {count} files") except Exception as e: logger.info(f" Access Denied or Error: {e}") except Exception as e: logger.error(f"❌ Error getting GCS stats: {e}") if __name__ == "__main__": # Removed CLI args for bucket/prefix as requested signature is simplified get_bucket_stats()