|
|
import os |
|
|
import argparse |
|
|
from collections import defaultdict |
|
|
from src.logger_config import logger |
|
|
from src.google_src.gcloud_wrapper import get_default_wrapper |
|
|
from src.config import get_config_value |
|
|
|
|
|
def get_bucket_stats( |
|
|
account_name: str = "final_data" |
|
|
): |
|
|
""" |
|
|
List GCS buckets and provide statistics (file counts, subfolders). |
|
|
Iterates through ALL available buckets for the account. |
|
|
""" |
|
|
try: |
|
|
wrapper = get_default_wrapper() |
|
|
client = wrapper.get_storage_client(account_name) |
|
|
|
|
|
logger.info(f"Fetching GCS Stats for Account: {account_name}") |
|
|
|
|
|
try: |
|
|
buckets = list(client.list_buckets()) |
|
|
except Exception as e: |
|
|
logger.error(f"Failed to list buckets: {e}") |
|
|
return |
|
|
|
|
|
if not buckets: |
|
|
logger.info("No buckets found.") |
|
|
return |
|
|
|
|
|
logger.info(f"Found {len(buckets)} buckets. Analyzing...") |
|
|
|
|
|
for bucket_resource in buckets: |
|
|
bucket_name = bucket_resource.name |
|
|
logger.info(f"Bucket: {bucket_name}") |
|
|
|
|
|
try: |
|
|
bucket = client.bucket(bucket_name) |
|
|
blobs = list(client.list_blobs(bucket)) |
|
|
|
|
|
total_files = len(blobs) |
|
|
total_size = sum(b.size for b in blobs if b.size) |
|
|
|
|
|
logger.info(f" Files: {total_files}, Size: {total_size / (1024*1024):.2f} MB") |
|
|
|
|
|
folder_stats = defaultdict(int) |
|
|
for blob in blobs: |
|
|
name = blob.name |
|
|
if name.endswith('/'): continue |
|
|
folder = os.path.dirname(name) or "(root)" |
|
|
folder_stats[folder] += 1 |
|
|
|
|
|
sorted_folders = sorted(folder_stats.items()) |
|
|
if sorted_folders: |
|
|
for folder, count in sorted_folders: |
|
|
logger.info(f" {folder}: {count} files") |
|
|
|
|
|
except Exception as e: |
|
|
logger.info(f" Access Denied or Error: {e}") |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"❌ Error getting GCS stats: {e}") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
get_bucket_stats() |
|
|
|