Tools / src /google_src /gcs_stats.py
jebin2's picture
ci: Run GCS stats script with verbose output.
c25e6e9
import os
import argparse
from collections import defaultdict
from src.logger_config import logger
from src.google_src.gcloud_wrapper import get_default_wrapper
from src.config import get_config_value
def get_bucket_stats(
account_name: str = "final_data"
):
"""
List GCS buckets and provide statistics (file counts, subfolders).
Iterates through ALL available buckets for the account.
"""
try:
wrapper = get_default_wrapper()
client = wrapper.get_storage_client(account_name)
logger.info(f"Fetching GCS Stats for Account: {account_name}")
try:
buckets = list(client.list_buckets())
except Exception as e:
logger.error(f"Failed to list buckets: {e}")
return
if not buckets:
logger.info("No buckets found.")
return
logger.info(f"Found {len(buckets)} buckets. Analyzing...")
for bucket_resource in buckets:
bucket_name = bucket_resource.name
logger.info(f"Bucket: {bucket_name}")
try:
bucket = client.bucket(bucket_name)
blobs = list(client.list_blobs(bucket))
total_files = len(blobs)
total_size = sum(b.size for b in blobs if b.size)
logger.info(f" Files: {total_files}, Size: {total_size / (1024*1024):.2f} MB")
folder_stats = defaultdict(int)
for blob in blobs:
name = blob.name
if name.endswith('/'): continue
folder = os.path.dirname(name) or "(root)"
folder_stats[folder] += 1
sorted_folders = sorted(folder_stats.items())
if sorted_folders:
for folder, count in sorted_folders:
logger.info(f" {folder}: {count} files")
except Exception as e:
logger.info(f" Access Denied or Error: {e}")
except Exception as e:
logger.error(f"❌ Error getting GCS stats: {e}")
if __name__ == "__main__":
# Removed CLI args for bucket/prefix as requested signature is simplified
get_bucket_stats()