import gradio as gr from huggingface_hub import HfApi import math def format_size(size_bytes): """Converts a size in bytes to a human-readable format (KB, MB, GB).""" if size_bytes is None or size_bytes == 0: return "0 B" size_name = ("B", "KB", "MB", "GB", "TB") i = int(math.floor(math.log(size_bytes, 1024))) p = math.pow(1024, i) s = round(size_bytes / p, 2) return f"{s} {size_name[i]}" def check_models(token, progress=gr.Progress()): if not token or not token.strip(): return "❌ Please enter your HuggingFace API token", "" try: progress(0, desc="Authenticating...") api = HfApi(token=token.strip()) user_info = api.whoami() username = user_info["name"] progress(0.2, desc="Fetching model list...") all_models_basic = list(api.list_models(author=username)) if not all_models_basic: return f"✅ User: **{username}**\n\nNo models found.", "" # Get detailed info detailed_models = [] total_models = len(all_models_basic) for idx, model in enumerate(all_models_basic): progress((0.2 + (0.7 * idx / total_models)), desc=f"Fetching storage info: {idx + 1}/{total_models}") try: info = api.model_info(model.id, expand=["usedStorage"]) if hasattr(info, 'usedStorage') and info.usedStorage is not None: detailed_models.append({ 'id': info.id, 'usedStorage': info.usedStorage }) except Exception as e: print(f"Error fetching {model.id}: {e}") continue # Sort by size detailed_models.sort(key=lambda x: x['usedStorage'], reverse=True) # Calculate total total_bytes = sum(m['usedStorage'] for m in detailed_models) # Format summary summary = f"""# 🤗 Models for **{username}** 📦 **Total Models:** {len(detailed_models)} (with storage info) 💾 **Total Storage:** {format_size(total_bytes)} """ # Format table table_header = "| Size | Model |\n|------|-------|\n" table_rows = "\n".join([ f"| {format_size(m['usedStorage'])} | [{m['id']}](https://huggingface.co/{m['id']}) |" for m in detailed_models ]) table = table_header + table_rows if detailed_models else "No models with storage information found." progress(1.0, desc="Done!") return summary, table except Exception as e: return f"❌ Error: {str(e)}", "" def check_datasets(token, progress=gr.Progress()): if not token or not token.strip(): return "❌ Please enter your HuggingFace API token", "" try: progress(0, desc="Authenticating...") api = HfApi(token=token.strip()) user_info = api.whoami() username = user_info["name"] progress(0.2, desc="Fetching dataset list...") all_datasets_basic = list(api.list_datasets(author=username)) if not all_datasets_basic: return f"✅ User: **{username}**\n\nNo datasets found.", "" # Get detailed info detailed_datasets = [] total_datasets = len(all_datasets_basic) for idx, dataset in enumerate(all_datasets_basic): progress((0.2 + (0.7 * idx / total_datasets)), desc=f"Fetching storage info: {idx + 1}/{total_datasets}") try: info = api.dataset_info(dataset.id, expand=["usedStorage"]) if hasattr(info, 'usedStorage') and info.usedStorage is not None: detailed_datasets.append({ 'id': info.id, 'usedStorage': info.usedStorage }) except Exception as e: print(f"Error fetching {dataset.id}: {e}") continue # Sort by size detailed_datasets.sort(key=lambda x: x['usedStorage'], reverse=True) # Calculate total total_bytes = sum(d['usedStorage'] for d in detailed_datasets) # Format summary summary = f"""# 📊 Datasets for **{username}** 📦 **Total Datasets:** {len(detailed_datasets)} (with storage info) 💾 **Total Storage:** {format_size(total_bytes)} """ # Format table table_header = "| Size | Dataset |\n|------|-------|\n" table_rows = "\n".join([ f"| {format_size(d['usedStorage'])} | [{d['id']}](https://huggingface.co/datasets/{d['id']}) |" for d in detailed_datasets ]) table = table_header + table_rows if detailed_datasets else "No datasets with storage information found." progress(1.0, desc="Done!") return summary, table except Exception as e: return f"❌ Error: {str(e)}", "" def check_both(token, progress=gr.Progress()): if not token or not token.strip(): return "❌ Please enter your HuggingFace API token", "" try: progress(0, desc="Authenticating...") api = HfApi(token=token.strip()) user_info = api.whoami() username = user_info["name"] # Models progress(0.1, desc="Fetching models...") all_models = list(api.list_models(author=username)) detailed_models = [] for idx, model in enumerate(all_models): progress((0.1 + (0.35 * idx / max(len(all_models), 1))), desc=f"Fetching model info: {idx + 1}/{len(all_models)}") try: info = api.model_info(model.id, expand=["usedStorage"]) if hasattr(info, 'usedStorage') and info.usedStorage is not None: detailed_models.append({'id': info.id, 'usedStorage': info.usedStorage}) except: continue # Datasets progress(0.45, desc="Fetching datasets...") all_datasets = list(api.list_datasets(author=username)) detailed_datasets = [] for idx, dataset in enumerate(all_datasets): progress((0.45 + (0.35 * idx / max(len(all_datasets), 1))), desc=f"Fetching dataset info: {idx + 1}/{len(all_datasets)}") try: info = api.dataset_info(dataset.id, expand=["usedStorage"]) if hasattr(info, 'usedStorage') and info.usedStorage is not None: detailed_datasets.append({'id': info.id, 'usedStorage': info.usedStorage}) except: continue model_bytes = sum(m['usedStorage'] for m in detailed_models) dataset_bytes = sum(d['usedStorage'] for d in detailed_datasets) total_bytes = model_bytes + dataset_bytes summary = f"""# 🎯 Complete Storage Report for **{username}** ## Models 📦 **Count:** {len(detailed_models)} 💾 **Storage:** {format_size(model_bytes)} ## Datasets 📊 **Count:** {len(detailed_datasets)} 💾 **Storage:** {format_size(dataset_bytes)} --- ## 🔥 Total Storage Used: {format_size(total_bytes)} """ progress(1.0, desc="Done!") return summary except Exception as e: return f"❌ Error: {str(e)}" # Create Gradio interface with gr.Blocks(title="HuggingFace Storage Checker", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # 🤗 HuggingFace Storage Checker Check your total storage usage for models and datasets. ### How to get your API token: 1. Go to [HuggingFace Settings > Tokens](https://huggingface.co/settings/tokens) 2. Create a new token with **READ** access 3. Copy and paste it below ⚠️ **Your token is processed securely and never stored.** """) token_input = gr.Textbox( label="HuggingFace API Token", placeholder="hf_...", type="password", info="Your token is only used to fetch your storage information" ) with gr.Tabs(): with gr.Tab("📊 Overview"): overview_btn = gr.Button("Check Total Storage", variant="primary", size="lg") overview_output = gr.Markdown(label="Summary") overview_btn.click( fn=check_both, inputs=[token_input], outputs=[overview_output] ) with gr.Tab("🤖 Models"): models_btn = gr.Button("Check Models", variant="primary", size="lg") models_summary = gr.Markdown(label="Summary") models_table = gr.Markdown(label="Models by Size") models_btn.click( fn=check_models, inputs=[token_input], outputs=[models_summary, models_table] ) with gr.Tab("📁 Datasets"): datasets_btn = gr.Button("Check Datasets", variant="primary", size="lg") datasets_summary = gr.Markdown(label="Summary") datasets_table = gr.Markdown(label="Datasets by Size") datasets_btn.click( fn=check_datasets, inputs=[token_input], outputs=[datasets_summary, datasets_table] ) gr.Markdown(""" --- ### Why use this? HuggingFace recently reduced free storage quotas. This tool helps you: - 🔍 See which models/datasets use the most storage - 🗑️ Identify items you can delete to free up space - 📈 Track your total storage usage **Note:** Only shows items where storage information is available. """) if __name__ == "__main__": demo.launch()