File size: 2,218 Bytes
8b6d4cc
 
 
f20025d
8b6d4cc
 
 
 
 
 
 
 
 
 
 
 
 
 
c25e6e9
503d4ac
8b6d4cc
 
 
503d4ac
8b6d4cc
 
 
c25e6e9
8b6d4cc
 
c25e6e9
8b6d4cc
 
 
c25e6e9
503d4ac
8b6d4cc
 
 
503d4ac
8b6d4cc
 
503d4ac
c25e6e9
503d4ac
8b6d4cc
 
 
 
 
 
503d4ac
8b6d4cc
 
 
c25e6e9
503d4ac
8b6d4cc
c25e6e9
8b6d4cc
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import os
import argparse
from collections import defaultdict
from src.logger_config import logger
from src.google_src.gcloud_wrapper import get_default_wrapper
from src.config import get_config_value

def get_bucket_stats(
    account_name: str = "final_data"
):
    """
    List GCS buckets and provide statistics (file counts, subfolders).
    Iterates through ALL available buckets for the account.
    """
    try:
        wrapper = get_default_wrapper()
        client = wrapper.get_storage_client(account_name)

        logger.info(f"Fetching GCS Stats for Account: {account_name}")

        try:
            buckets = list(client.list_buckets())
        except Exception as e:
            logger.error(f"Failed to list buckets: {e}")
            return

        if not buckets:
            logger.info("No buckets found.")
            return

        logger.info(f"Found {len(buckets)} buckets. Analyzing...")

        for bucket_resource in buckets:
            bucket_name = bucket_resource.name
            logger.info(f"Bucket: {bucket_name}")

            try:
                bucket = client.bucket(bucket_name)
                blobs = list(client.list_blobs(bucket))

                total_files = len(blobs)
                total_size = sum(b.size for b in blobs if b.size)

                logger.info(f"   Files: {total_files}, Size: {total_size / (1024*1024):.2f} MB")

                folder_stats = defaultdict(int)
                for blob in blobs:
                    name = blob.name
                    if name.endswith('/'): continue
                    folder = os.path.dirname(name) or "(root)"
                    folder_stats[folder] += 1

                sorted_folders = sorted(folder_stats.items())
                if sorted_folders:
                    for folder, count in sorted_folders:
                        logger.info(f"   {folder}: {count} files")

            except Exception as e:
                logger.info(f"   Access Denied or Error: {e}")
            
    except Exception as e:
        logger.error(f"❌ Error getting GCS stats: {e}")

if __name__ == "__main__":
    # Removed CLI args for bucket/prefix as requested signature is simplified
    get_bucket_stats()