ncdu / app.py
gghfez's picture
Create app.py
167baca verified
import gradio as gr
from huggingface_hub import HfApi
import math
def format_size(size_bytes):
"""Converts a size in bytes to a human-readable format (KB, MB, GB)."""
if size_bytes is None or size_bytes == 0:
return "0 B"
size_name = ("B", "KB", "MB", "GB", "TB")
i = int(math.floor(math.log(size_bytes, 1024)))
p = math.pow(1024, i)
s = round(size_bytes / p, 2)
return f"{s} {size_name[i]}"
def check_models(token, progress=gr.Progress()):
if not token or not token.strip():
return "❌ Please enter your HuggingFace API token", ""
try:
progress(0, desc="Authenticating...")
api = HfApi(token=token.strip())
user_info = api.whoami()
username = user_info["name"]
progress(0.2, desc="Fetching model list...")
all_models_basic = list(api.list_models(author=username))
if not all_models_basic:
return f"βœ… User: **{username}**\n\nNo models found.", ""
# Get detailed info
detailed_models = []
total_models = len(all_models_basic)
for idx, model in enumerate(all_models_basic):
progress((0.2 + (0.7 * idx / total_models)),
desc=f"Fetching storage info: {idx + 1}/{total_models}")
try:
info = api.model_info(model.id, expand=["usedStorage"])
if hasattr(info, 'usedStorage') and info.usedStorage is not None:
detailed_models.append({
'id': info.id,
'usedStorage': info.usedStorage
})
except Exception as e:
print(f"Error fetching {model.id}: {e}")
continue
# Sort by size
detailed_models.sort(key=lambda x: x['usedStorage'], reverse=True)
# Calculate total
total_bytes = sum(m['usedStorage'] for m in detailed_models)
# Format summary
summary = f"""# πŸ€— Models for **{username}**
πŸ“¦ **Total Models:** {len(detailed_models)} (with storage info)
πŸ’Ύ **Total Storage:** {format_size(total_bytes)}
"""
# Format table
table_header = "| Size | Model |\n|------|-------|\n"
table_rows = "\n".join([
f"| {format_size(m['usedStorage'])} | [{m['id']}](https://huggingface.co/{m['id']}) |"
for m in detailed_models
])
table = table_header + table_rows if detailed_models else "No models with storage information found."
progress(1.0, desc="Done!")
return summary, table
except Exception as e:
return f"❌ Error: {str(e)}", ""
def check_datasets(token, progress=gr.Progress()):
if not token or not token.strip():
return "❌ Please enter your HuggingFace API token", ""
try:
progress(0, desc="Authenticating...")
api = HfApi(token=token.strip())
user_info = api.whoami()
username = user_info["name"]
progress(0.2, desc="Fetching dataset list...")
all_datasets_basic = list(api.list_datasets(author=username))
if not all_datasets_basic:
return f"βœ… User: **{username}**\n\nNo datasets found.", ""
# Get detailed info
detailed_datasets = []
total_datasets = len(all_datasets_basic)
for idx, dataset in enumerate(all_datasets_basic):
progress((0.2 + (0.7 * idx / total_datasets)),
desc=f"Fetching storage info: {idx + 1}/{total_datasets}")
try:
info = api.dataset_info(dataset.id, expand=["usedStorage"])
if hasattr(info, 'usedStorage') and info.usedStorage is not None:
detailed_datasets.append({
'id': info.id,
'usedStorage': info.usedStorage
})
except Exception as e:
print(f"Error fetching {dataset.id}: {e}")
continue
# Sort by size
detailed_datasets.sort(key=lambda x: x['usedStorage'], reverse=True)
# Calculate total
total_bytes = sum(d['usedStorage'] for d in detailed_datasets)
# Format summary
summary = f"""# πŸ“Š Datasets for **{username}**
πŸ“¦ **Total Datasets:** {len(detailed_datasets)} (with storage info)
πŸ’Ύ **Total Storage:** {format_size(total_bytes)}
"""
# Format table
table_header = "| Size | Dataset |\n|------|-------|\n"
table_rows = "\n".join([
f"| {format_size(d['usedStorage'])} | [{d['id']}](https://huggingface.co/datasets/{d['id']}) |"
for d in detailed_datasets
])
table = table_header + table_rows if detailed_datasets else "No datasets with storage information found."
progress(1.0, desc="Done!")
return summary, table
except Exception as e:
return f"❌ Error: {str(e)}", ""
def check_both(token, progress=gr.Progress()):
if not token or not token.strip():
return "❌ Please enter your HuggingFace API token", ""
try:
progress(0, desc="Authenticating...")
api = HfApi(token=token.strip())
user_info = api.whoami()
username = user_info["name"]
# Models
progress(0.1, desc="Fetching models...")
all_models = list(api.list_models(author=username))
detailed_models = []
for idx, model in enumerate(all_models):
progress((0.1 + (0.35 * idx / max(len(all_models), 1))),
desc=f"Fetching model info: {idx + 1}/{len(all_models)}")
try:
info = api.model_info(model.id, expand=["usedStorage"])
if hasattr(info, 'usedStorage') and info.usedStorage is not None:
detailed_models.append({'id': info.id, 'usedStorage': info.usedStorage})
except:
continue
# Datasets
progress(0.45, desc="Fetching datasets...")
all_datasets = list(api.list_datasets(author=username))
detailed_datasets = []
for idx, dataset in enumerate(all_datasets):
progress((0.45 + (0.35 * idx / max(len(all_datasets), 1))),
desc=f"Fetching dataset info: {idx + 1}/{len(all_datasets)}")
try:
info = api.dataset_info(dataset.id, expand=["usedStorage"])
if hasattr(info, 'usedStorage') and info.usedStorage is not None:
detailed_datasets.append({'id': info.id, 'usedStorage': info.usedStorage})
except:
continue
model_bytes = sum(m['usedStorage'] for m in detailed_models)
dataset_bytes = sum(d['usedStorage'] for d in detailed_datasets)
total_bytes = model_bytes + dataset_bytes
summary = f"""# 🎯 Complete Storage Report for **{username}**
## Models
πŸ“¦ **Count:** {len(detailed_models)}
πŸ’Ύ **Storage:** {format_size(model_bytes)}
## Datasets
πŸ“Š **Count:** {len(detailed_datasets)}
πŸ’Ύ **Storage:** {format_size(dataset_bytes)}
---
## πŸ”₯ Total Storage Used: {format_size(total_bytes)}
"""
progress(1.0, desc="Done!")
return summary
except Exception as e:
return f"❌ Error: {str(e)}"
# Create Gradio interface
with gr.Blocks(title="HuggingFace Storage Checker", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# πŸ€— HuggingFace Storage Checker
Check your total storage usage for models and datasets.
### How to get your API token:
1. Go to [HuggingFace Settings > Tokens](https://huggingface.co/settings/tokens)
2. Create a new token with **READ** access
3. Copy and paste it below
⚠️ **Your token is processed securely and never stored.**
""")
token_input = gr.Textbox(
label="HuggingFace API Token",
placeholder="hf_...",
type="password",
info="Your token is only used to fetch your storage information"
)
with gr.Tabs():
with gr.Tab("πŸ“Š Overview"):
overview_btn = gr.Button("Check Total Storage", variant="primary", size="lg")
overview_output = gr.Markdown(label="Summary")
overview_btn.click(
fn=check_both,
inputs=[token_input],
outputs=[overview_output]
)
with gr.Tab("πŸ€– Models"):
models_btn = gr.Button("Check Models", variant="primary", size="lg")
models_summary = gr.Markdown(label="Summary")
models_table = gr.Markdown(label="Models by Size")
models_btn.click(
fn=check_models,
inputs=[token_input],
outputs=[models_summary, models_table]
)
with gr.Tab("πŸ“ Datasets"):
datasets_btn = gr.Button("Check Datasets", variant="primary", size="lg")
datasets_summary = gr.Markdown(label="Summary")
datasets_table = gr.Markdown(label="Datasets by Size")
datasets_btn.click(
fn=check_datasets,
inputs=[token_input],
outputs=[datasets_summary, datasets_table]
)
gr.Markdown("""
---
### Why use this?
HuggingFace recently reduced free storage quotas. This tool helps you:
- πŸ” See which models/datasets use the most storage
- πŸ—‘οΈ Identify items you can delete to free up space
- πŸ“ˆ Track your total storage usage
**Note:** Only shows items where storage information is available.
""")
if __name__ == "__main__":
demo.launch()