|
|
import gradio as gr |
|
|
from huggingface_hub import HfApi |
|
|
import math |
|
|
|
|
|
def format_size(size_bytes): |
|
|
"""Converts a size in bytes to a human-readable format (KB, MB, GB).""" |
|
|
if size_bytes is None or size_bytes == 0: |
|
|
return "0 B" |
|
|
size_name = ("B", "KB", "MB", "GB", "TB") |
|
|
i = int(math.floor(math.log(size_bytes, 1024))) |
|
|
p = math.pow(1024, i) |
|
|
s = round(size_bytes / p, 2) |
|
|
return f"{s} {size_name[i]}" |
|
|
|
|
|
def check_models(token, progress=gr.Progress()): |
|
|
if not token or not token.strip(): |
|
|
return "β Please enter your HuggingFace API token", "" |
|
|
|
|
|
try: |
|
|
progress(0, desc="Authenticating...") |
|
|
api = HfApi(token=token.strip()) |
|
|
user_info = api.whoami() |
|
|
username = user_info["name"] |
|
|
|
|
|
progress(0.2, desc="Fetching model list...") |
|
|
all_models_basic = list(api.list_models(author=username)) |
|
|
|
|
|
if not all_models_basic: |
|
|
return f"β
User: **{username}**\n\nNo models found.", "" |
|
|
|
|
|
|
|
|
detailed_models = [] |
|
|
total_models = len(all_models_basic) |
|
|
|
|
|
for idx, model in enumerate(all_models_basic): |
|
|
progress((0.2 + (0.7 * idx / total_models)), |
|
|
desc=f"Fetching storage info: {idx + 1}/{total_models}") |
|
|
try: |
|
|
info = api.model_info(model.id, expand=["usedStorage"]) |
|
|
if hasattr(info, 'usedStorage') and info.usedStorage is not None: |
|
|
detailed_models.append({ |
|
|
'id': info.id, |
|
|
'usedStorage': info.usedStorage |
|
|
}) |
|
|
except Exception as e: |
|
|
print(f"Error fetching {model.id}: {e}") |
|
|
continue |
|
|
|
|
|
|
|
|
detailed_models.sort(key=lambda x: x['usedStorage'], reverse=True) |
|
|
|
|
|
|
|
|
total_bytes = sum(m['usedStorage'] for m in detailed_models) |
|
|
|
|
|
|
|
|
summary = f"""# π€ Models for **{username}** |
|
|
|
|
|
π¦ **Total Models:** {len(detailed_models)} (with storage info) |
|
|
πΎ **Total Storage:** {format_size(total_bytes)} |
|
|
""" |
|
|
|
|
|
|
|
|
table_header = "| Size | Model |\n|------|-------|\n" |
|
|
table_rows = "\n".join([ |
|
|
f"| {format_size(m['usedStorage'])} | [{m['id']}](https://huggingface.co/{m['id']}) |" |
|
|
for m in detailed_models |
|
|
]) |
|
|
|
|
|
table = table_header + table_rows if detailed_models else "No models with storage information found." |
|
|
|
|
|
progress(1.0, desc="Done!") |
|
|
return summary, table |
|
|
|
|
|
except Exception as e: |
|
|
return f"β Error: {str(e)}", "" |
|
|
|
|
|
def check_datasets(token, progress=gr.Progress()): |
|
|
if not token or not token.strip(): |
|
|
return "β Please enter your HuggingFace API token", "" |
|
|
|
|
|
try: |
|
|
progress(0, desc="Authenticating...") |
|
|
api = HfApi(token=token.strip()) |
|
|
user_info = api.whoami() |
|
|
username = user_info["name"] |
|
|
|
|
|
progress(0.2, desc="Fetching dataset list...") |
|
|
all_datasets_basic = list(api.list_datasets(author=username)) |
|
|
|
|
|
if not all_datasets_basic: |
|
|
return f"β
User: **{username}**\n\nNo datasets found.", "" |
|
|
|
|
|
|
|
|
detailed_datasets = [] |
|
|
total_datasets = len(all_datasets_basic) |
|
|
|
|
|
for idx, dataset in enumerate(all_datasets_basic): |
|
|
progress((0.2 + (0.7 * idx / total_datasets)), |
|
|
desc=f"Fetching storage info: {idx + 1}/{total_datasets}") |
|
|
try: |
|
|
info = api.dataset_info(dataset.id, expand=["usedStorage"]) |
|
|
if hasattr(info, 'usedStorage') and info.usedStorage is not None: |
|
|
detailed_datasets.append({ |
|
|
'id': info.id, |
|
|
'usedStorage': info.usedStorage |
|
|
}) |
|
|
except Exception as e: |
|
|
print(f"Error fetching {dataset.id}: {e}") |
|
|
continue |
|
|
|
|
|
|
|
|
detailed_datasets.sort(key=lambda x: x['usedStorage'], reverse=True) |
|
|
|
|
|
|
|
|
total_bytes = sum(d['usedStorage'] for d in detailed_datasets) |
|
|
|
|
|
|
|
|
summary = f"""# π Datasets for **{username}** |
|
|
|
|
|
π¦ **Total Datasets:** {len(detailed_datasets)} (with storage info) |
|
|
πΎ **Total Storage:** {format_size(total_bytes)} |
|
|
""" |
|
|
|
|
|
|
|
|
table_header = "| Size | Dataset |\n|------|-------|\n" |
|
|
table_rows = "\n".join([ |
|
|
f"| {format_size(d['usedStorage'])} | [{d['id']}](https://huggingface.co/datasets/{d['id']}) |" |
|
|
for d in detailed_datasets |
|
|
]) |
|
|
|
|
|
table = table_header + table_rows if detailed_datasets else "No datasets with storage information found." |
|
|
|
|
|
progress(1.0, desc="Done!") |
|
|
return summary, table |
|
|
|
|
|
except Exception as e: |
|
|
return f"β Error: {str(e)}", "" |
|
|
|
|
|
def check_both(token, progress=gr.Progress()): |
|
|
if not token or not token.strip(): |
|
|
return "β Please enter your HuggingFace API token", "" |
|
|
|
|
|
try: |
|
|
progress(0, desc="Authenticating...") |
|
|
api = HfApi(token=token.strip()) |
|
|
user_info = api.whoami() |
|
|
username = user_info["name"] |
|
|
|
|
|
|
|
|
progress(0.1, desc="Fetching models...") |
|
|
all_models = list(api.list_models(author=username)) |
|
|
detailed_models = [] |
|
|
|
|
|
for idx, model in enumerate(all_models): |
|
|
progress((0.1 + (0.35 * idx / max(len(all_models), 1))), |
|
|
desc=f"Fetching model info: {idx + 1}/{len(all_models)}") |
|
|
try: |
|
|
info = api.model_info(model.id, expand=["usedStorage"]) |
|
|
if hasattr(info, 'usedStorage') and info.usedStorage is not None: |
|
|
detailed_models.append({'id': info.id, 'usedStorage': info.usedStorage}) |
|
|
except: |
|
|
continue |
|
|
|
|
|
|
|
|
progress(0.45, desc="Fetching datasets...") |
|
|
all_datasets = list(api.list_datasets(author=username)) |
|
|
detailed_datasets = [] |
|
|
|
|
|
for idx, dataset in enumerate(all_datasets): |
|
|
progress((0.45 + (0.35 * idx / max(len(all_datasets), 1))), |
|
|
desc=f"Fetching dataset info: {idx + 1}/{len(all_datasets)}") |
|
|
try: |
|
|
info = api.dataset_info(dataset.id, expand=["usedStorage"]) |
|
|
if hasattr(info, 'usedStorage') and info.usedStorage is not None: |
|
|
detailed_datasets.append({'id': info.id, 'usedStorage': info.usedStorage}) |
|
|
except: |
|
|
continue |
|
|
|
|
|
model_bytes = sum(m['usedStorage'] for m in detailed_models) |
|
|
dataset_bytes = sum(d['usedStorage'] for d in detailed_datasets) |
|
|
total_bytes = model_bytes + dataset_bytes |
|
|
|
|
|
summary = f"""# π― Complete Storage Report for **{username}** |
|
|
|
|
|
## Models |
|
|
π¦ **Count:** {len(detailed_models)} |
|
|
πΎ **Storage:** {format_size(model_bytes)} |
|
|
|
|
|
## Datasets |
|
|
π **Count:** {len(detailed_datasets)} |
|
|
πΎ **Storage:** {format_size(dataset_bytes)} |
|
|
|
|
|
--- |
|
|
|
|
|
## π₯ Total Storage Used: {format_size(total_bytes)} |
|
|
""" |
|
|
|
|
|
progress(1.0, desc="Done!") |
|
|
return summary |
|
|
|
|
|
except Exception as e: |
|
|
return f"β Error: {str(e)}" |
|
|
|
|
|
|
|
|
with gr.Blocks(title="HuggingFace Storage Checker", theme=gr.themes.Soft()) as demo: |
|
|
gr.Markdown(""" |
|
|
# π€ HuggingFace Storage Checker |
|
|
|
|
|
Check your total storage usage for models and datasets. |
|
|
|
|
|
### How to get your API token: |
|
|
1. Go to [HuggingFace Settings > Tokens](https://huggingface.co/settings/tokens) |
|
|
2. Create a new token with **READ** access |
|
|
3. Copy and paste it below |
|
|
|
|
|
β οΈ **Your token is processed securely and never stored.** |
|
|
""") |
|
|
|
|
|
token_input = gr.Textbox( |
|
|
label="HuggingFace API Token", |
|
|
placeholder="hf_...", |
|
|
type="password", |
|
|
info="Your token is only used to fetch your storage information" |
|
|
) |
|
|
|
|
|
with gr.Tabs(): |
|
|
with gr.Tab("π Overview"): |
|
|
overview_btn = gr.Button("Check Total Storage", variant="primary", size="lg") |
|
|
overview_output = gr.Markdown(label="Summary") |
|
|
|
|
|
overview_btn.click( |
|
|
fn=check_both, |
|
|
inputs=[token_input], |
|
|
outputs=[overview_output] |
|
|
) |
|
|
|
|
|
with gr.Tab("π€ Models"): |
|
|
models_btn = gr.Button("Check Models", variant="primary", size="lg") |
|
|
models_summary = gr.Markdown(label="Summary") |
|
|
models_table = gr.Markdown(label="Models by Size") |
|
|
|
|
|
models_btn.click( |
|
|
fn=check_models, |
|
|
inputs=[token_input], |
|
|
outputs=[models_summary, models_table] |
|
|
) |
|
|
|
|
|
with gr.Tab("π Datasets"): |
|
|
datasets_btn = gr.Button("Check Datasets", variant="primary", size="lg") |
|
|
datasets_summary = gr.Markdown(label="Summary") |
|
|
datasets_table = gr.Markdown(label="Datasets by Size") |
|
|
|
|
|
datasets_btn.click( |
|
|
fn=check_datasets, |
|
|
inputs=[token_input], |
|
|
outputs=[datasets_summary, datasets_table] |
|
|
) |
|
|
|
|
|
gr.Markdown(""" |
|
|
--- |
|
|
|
|
|
### Why use this? |
|
|
HuggingFace recently reduced free storage quotas. This tool helps you: |
|
|
- π See which models/datasets use the most storage |
|
|
- ποΈ Identify items you can delete to free up space |
|
|
- π Track your total storage usage |
|
|
|
|
|
**Note:** Only shows items where storage information is available. |
|
|
""") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |