Spaces:

NoSlop4U
/

ncdu

Sleeping

File size: 9,889 Bytes

167baca

import gradio as gr
from huggingface_hub import HfApi
import math

def format_size(size_bytes):
    """Converts a size in bytes to a human-readable format (KB, MB, GB)."""
    if size_bytes is None or size_bytes == 0:
        return "0 B"
    size_name = ("B", "KB", "MB", "GB", "TB")
    i = int(math.floor(math.log(size_bytes, 1024)))
    p = math.pow(1024, i)
    s = round(size_bytes / p, 2)
    return f"{s} {size_name[i]}"

def check_models(token, progress=gr.Progress()):
    if not token or not token.strip():
        return "❌ Please enter your HuggingFace API token", ""
    
    try:
        progress(0, desc="Authenticating...")
        api = HfApi(token=token.strip())
        user_info = api.whoami()
        username = user_info["name"]
        
        progress(0.2, desc="Fetching model list...")
        all_models_basic = list(api.list_models(author=username))
        
        if not all_models_basic:
            return f"✅ User: **{username}**\n\nNo models found.", ""
        
        # Get detailed info
        detailed_models = []
        total_models = len(all_models_basic)
        
        for idx, model in enumerate(all_models_basic):
            progress((0.2 + (0.7 * idx / total_models)), 
                    desc=f"Fetching storage info: {idx + 1}/{total_models}")
            try:
                info = api.model_info(model.id, expand=["usedStorage"])
                if hasattr(info, 'usedStorage') and info.usedStorage is not None:
                    detailed_models.append({
                        'id': info.id,
                        'usedStorage': info.usedStorage
                    })
            except Exception as e:
                print(f"Error fetching {model.id}: {e}")
                continue
        
        # Sort by size
        detailed_models.sort(key=lambda x: x['usedStorage'], reverse=True)
        
        # Calculate total
        total_bytes = sum(m['usedStorage'] for m in detailed_models)
        
        # Format summary
        summary = f"""# 🤗 Models for **{username}**

📦 **Total Models:** {len(detailed_models)} (with storage info)
💾 **Total Storage:** {format_size(total_bytes)}
"""
        
        # Format table
        table_header = "| Size | Model |\n|------|-------|\n"
        table_rows = "\n".join([
            f"| {format_size(m['usedStorage'])} | [{m['id']}](https://huggingface.co/{m['id']}) |"
            for m in detailed_models
        ])
        
        table = table_header + table_rows if detailed_models else "No models with storage information found."
        
        progress(1.0, desc="Done!")
        return summary, table
        
    except Exception as e:
        return f"❌ Error: {str(e)}", ""

def check_datasets(token, progress=gr.Progress()):
    if not token or not token.strip():
        return "❌ Please enter your HuggingFace API token", ""
    
    try:
        progress(0, desc="Authenticating...")
        api = HfApi(token=token.strip())
        user_info = api.whoami()
        username = user_info["name"]
        
        progress(0.2, desc="Fetching dataset list...")
        all_datasets_basic = list(api.list_datasets(author=username))
        
        if not all_datasets_basic:
            return f"✅ User: **{username}**\n\nNo datasets found.", ""
        
        # Get detailed info
        detailed_datasets = []
        total_datasets = len(all_datasets_basic)
        
        for idx, dataset in enumerate(all_datasets_basic):
            progress((0.2 + (0.7 * idx / total_datasets)), 
                    desc=f"Fetching storage info: {idx + 1}/{total_datasets}")
            try:
                info = api.dataset_info(dataset.id, expand=["usedStorage"])
                if hasattr(info, 'usedStorage') and info.usedStorage is not None:
                    detailed_datasets.append({
                        'id': info.id,
                        'usedStorage': info.usedStorage
                    })
            except Exception as e:
                print(f"Error fetching {dataset.id}: {e}")
                continue
        
        # Sort by size
        detailed_datasets.sort(key=lambda x: x['usedStorage'], reverse=True)
        
        # Calculate total
        total_bytes = sum(d['usedStorage'] for d in detailed_datasets)
        
        # Format summary
        summary = f"""# 📊 Datasets for **{username}**

📦 **Total Datasets:** {len(detailed_datasets)} (with storage info)
💾 **Total Storage:** {format_size(total_bytes)}
"""
        
        # Format table
        table_header = "| Size | Dataset |\n|------|-------|\n"
        table_rows = "\n".join([
            f"| {format_size(d['usedStorage'])} | [{d['id']}](https://huggingface.co/datasets/{d['id']}) |"
            for d in detailed_datasets
        ])
        
        table = table_header + table_rows if detailed_datasets else "No datasets with storage information found."
        
        progress(1.0, desc="Done!")
        return summary, table
        
    except Exception as e:
        return f"❌ Error: {str(e)}", ""

def check_both(token, progress=gr.Progress()):
    if not token or not token.strip():
        return "❌ Please enter your HuggingFace API token", ""
    
    try:
        progress(0, desc="Authenticating...")
        api = HfApi(token=token.strip())
        user_info = api.whoami()
        username = user_info["name"]
        
        # Models
        progress(0.1, desc="Fetching models...")
        all_models = list(api.list_models(author=username))
        detailed_models = []
        
        for idx, model in enumerate(all_models):
            progress((0.1 + (0.35 * idx / max(len(all_models), 1))), 
                    desc=f"Fetching model info: {idx + 1}/{len(all_models)}")
            try:
                info = api.model_info(model.id, expand=["usedStorage"])
                if hasattr(info, 'usedStorage') and info.usedStorage is not None:
                    detailed_models.append({'id': info.id, 'usedStorage': info.usedStorage})
            except:
                continue
        
        # Datasets
        progress(0.45, desc="Fetching datasets...")
        all_datasets = list(api.list_datasets(author=username))
        detailed_datasets = []
        
        for idx, dataset in enumerate(all_datasets):
            progress((0.45 + (0.35 * idx / max(len(all_datasets), 1))), 
                    desc=f"Fetching dataset info: {idx + 1}/{len(all_datasets)}")
            try:
                info = api.dataset_info(dataset.id, expand=["usedStorage"])
                if hasattr(info, 'usedStorage') and info.usedStorage is not None:
                    detailed_datasets.append({'id': info.id, 'usedStorage': info.usedStorage})
            except:
                continue
        
        model_bytes = sum(m['usedStorage'] for m in detailed_models)
        dataset_bytes = sum(d['usedStorage'] for d in detailed_datasets)
        total_bytes = model_bytes + dataset_bytes
        
        summary = f"""# 🎯 Complete Storage Report for **{username}**

## Models
📦 **Count:** {len(detailed_models)}
💾 **Storage:** {format_size(model_bytes)}

## Datasets
📊 **Count:** {len(detailed_datasets)}
💾 **Storage:** {format_size(dataset_bytes)}

---

## 🔥 Total Storage Used: {format_size(total_bytes)}
"""
        
        progress(1.0, desc="Done!")
        return summary
        
    except Exception as e:
        return f"❌ Error: {str(e)}"

# Create Gradio interface
with gr.Blocks(title="HuggingFace Storage Checker", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # 🤗 HuggingFace Storage Checker
    
    Check your total storage usage for models and datasets.
    
    ### How to get your API token:
    1. Go to [HuggingFace Settings > Tokens](https://huggingface.co/settings/tokens)
    2. Create a new token with **READ** access
    3. Copy and paste it below
    
    ⚠️ **Your token is processed securely and never stored.**
    """)
    
    token_input = gr.Textbox(
        label="HuggingFace API Token",
        placeholder="hf_...",
        type="password",
        info="Your token is only used to fetch your storage information"
    )
    
    with gr.Tabs():
        with gr.Tab("📊 Overview"):
            overview_btn = gr.Button("Check Total Storage", variant="primary", size="lg")
            overview_output = gr.Markdown(label="Summary")
            
            overview_btn.click(
                fn=check_both,
                inputs=[token_input],
                outputs=[overview_output]
            )
        
        with gr.Tab("🤖 Models"):
            models_btn = gr.Button("Check Models", variant="primary", size="lg")
            models_summary = gr.Markdown(label="Summary")
            models_table = gr.Markdown(label="Models by Size")
            
            models_btn.click(
                fn=check_models,
                inputs=[token_input],
                outputs=[models_summary, models_table]
            )
        
        with gr.Tab("📁 Datasets"):
            datasets_btn = gr.Button("Check Datasets", variant="primary", size="lg")
            datasets_summary = gr.Markdown(label="Summary")
            datasets_table = gr.Markdown(label="Datasets by Size")
            
            datasets_btn.click(
                fn=check_datasets,
                inputs=[token_input],
                outputs=[datasets_summary, datasets_table]
            )
    
    gr.Markdown("""
    ---
    
    ### Why use this?
    HuggingFace recently reduced free storage quotas. This tool helps you:
    - 🔍 See which models/datasets use the most storage
    - 🗑️ Identify items you can delete to free up space
    - 📈 Track your total storage usage
    
    **Note:** Only shows items where storage information is available.
    """)

if __name__ == "__main__":
    demo.launch()