Helion-V1.5 / create_model_badges.py
Trouter-Library's picture
Create create_model_badges.py
5359da1 verified
"""
Create Model Badges and Stats Display
Generates the parameter count, size, and download badges for your model card
"""
import json
from pathlib import Path
def calculate_model_stats(config_path: str = "config.json") -> dict:
"""
Calculate model statistics from config.
Returns:
Dictionary with model stats
"""
try:
with open(config_path) as f:
config = json.load(f)
# Calculate parameters
vocab_size = config.get("vocab_size", 32000)
hidden_size = config.get("hidden_size", 4096)
num_layers = config.get("num_hidden_layers", 32)
intermediate_size = config.get("intermediate_size", 11008)
num_heads = config.get("num_attention_heads", 32)
# Embedding parameters
embedding_params = vocab_size * hidden_size
# Per-layer parameters
# Attention: Q, K, V, O projections
attention_params = 4 * (hidden_size * hidden_size)
# MLP: gate, up, down projections
mlp_params = hidden_size * intermediate_size * 3
# LayerNorm (2 per layer)
layernorm_params = hidden_size * 2
# Total per layer
per_layer_params = attention_params + mlp_params + layernorm_params
# Total parameters
total_params = embedding_params + (per_layer_params * num_layers)
# Add final LayerNorm and LM head
total_params += hidden_size # Final LayerNorm
total_params += vocab_size * hidden_size # LM head
# Convert to billions
params_b = total_params / 1e9
# Model size in GB (FP16)
size_gb = (total_params * 2) / (1024 ** 3)
# Model size in GB (4-bit quantized)
size_4bit = (total_params * 0.5) / (1024 ** 3)
return {
"total_parameters": total_params,
"parameters_billions": round(params_b, 2),
"size_fp16_gb": round(size_gb, 2),
"size_4bit_gb": round(size_4bit, 2),
"vocab_size": vocab_size,
"hidden_size": hidden_size,
"num_layers": num_layers,
"context_length": config.get("max_position_embeddings", 4096)
}
except Exception as e:
print(f"Error calculating stats: {e}")
return None
def format_number(num: int) -> str:
"""Format large numbers with suffixes."""
if num >= 1e9:
return f"{num/1e9:.1f}B"
elif num >= 1e6:
return f"{num/1e6:.1f}M"
elif num >= 1e3:
return f"{num/1e3:.1f}K"
return str(num)
def generate_readme_header(stats: dict) -> str:
"""
Generate README header section with model stats.
Args:
stats: Model statistics dictionary
Returns:
Markdown formatted header
"""
params_str = format_number(stats["total_parameters"])
header = f"""
<div align="center">
# 🤖 Helion-V1.5
**Advanced Conversational AI with Enhanced Capabilities**
[![Model](https://img.shields.io/badge/🤗-Model-yellow)](https://huggingface.co/DeepXR/Helion-V1.5)
[![Parameters](https://img.shields.io/badge/Parameters-{params_str}-blue)](#)
[![Size](https://img.shields.io/badge/Size-{stats['size_fp16_gb']}GB-green)](#)
[![Context](https://img.shields.io/badge/Context-{stats['context_length']}_tokens-orange)](#)
[![License](https://img.shields.io/badge/License-Apache_2.0-red)](LICENSE)
[![AutoTrain](https://img.shields.io/badge/AutoTrain-Compatible-purple)](https://huggingface.co/autotrain)
</div>
---
## 📊 Model Specifications
| Specification | Value |
|---------------|-------|
| **Parameters** | {params_str} ({stats['total_parameters']:,}) |
| **Architecture** | Llama-2 |
| **Layers** | {stats['num_layers']} |
| **Hidden Size** | {stats['hidden_size']} |
| **Vocab Size** | {stats['vocab_size']:,} |
| **Context Length** | {stats['context_length']:,} tokens |
| **Precision** | bfloat16 |
| **Model Size (FP16)** | {stats['size_fp16_gb']} GB |
| **Model Size (4-bit)** | {stats['size_4bit_gb']} GB |
---
"""
return header
def generate_stats_json(stats: dict, output_file: str = "model_stats.json"):
"""
Generate JSON file with model statistics for programmatic access.
Args:
stats: Model statistics
output_file: Output filename
"""
stats_json = {
"model_name": "Helion-V1.5",
"architecture": "Llama-2",
"parameters": {
"total": stats["total_parameters"],
"formatted": format_number(stats["total_parameters"]),
"billions": stats["parameters_billions"]
},
"size": {
"fp16_gb": stats["size_fp16_gb"],
"fp32_gb": stats["size_fp16_gb"] * 2,
"int8_gb": stats["size_fp16_gb"] / 2,
"int4_gb": stats["size_4bit_gb"]
},
"architecture_details": {
"num_layers": stats["num_layers"],
"hidden_size": stats["hidden_size"],
"vocab_size": stats["vocab_size"],
"context_length": stats["context_length"]
},
"inference": {
"recommended_gpu_memory": f"{stats['size_fp16_gb'] * 1.5:.1f}GB+",
"minimum_gpu_memory": f"{stats['size_4bit_gb'] * 1.2:.1f}GB",
"recommended_gpus": [
"NVIDIA A100 (40GB)",
"NVIDIA A6000 (48GB)",
"NVIDIA RTX 4090 (24GB)",
"NVIDIA RTX 3090 (24GB)"
]
},
"tags": [
"llama-2",
"7b",
"conversational",
"text-generation",
"autotrain"
]
}
with open(output_file, 'w') as f:
json.dump(stats_json, f, indent=2)
print(f"✅ Model stats saved to {output_file}")
def update_readme_with_stats(readme_path: str = "README.md"):
"""
Update README.md with model statistics.
Args:
readme_path: Path to README file
"""
stats = calculate_model_stats()
if not stats:
print("❌ Failed to calculate stats")
return
header = generate_readme_header(stats)
print("\n" + "="*60)
print("Model Statistics Calculated")
print("="*60)
print(f"Total Parameters: {format_number(stats['total_parameters'])}")
print(f"Exact Count: {stats['total_parameters']:,}")
print(f"Size (FP16): {stats['size_fp16_gb']} GB")
print(f"Size (4-bit): {stats['size_4bit_gb']} GB")
print(f"Context Length: {stats['context_length']:,} tokens")
print("="*60)
# Generate stats JSON
generate_stats_json(stats)
print("\n📋 README Header Section:")
print(header)
print("\n💡 Copy the header above and paste it at the top of your README.md!")
print(" Or run: python create_model_badges.py --update-readme")
def generate_huggingface_metadata() -> str:
"""
Generate HuggingFace model card metadata.
Returns:
YAML formatted metadata
"""
stats = calculate_model_stats()
metadata = f"""---
language:
- en
license: apache-2.0
library_name: transformers
tags:
- text-generation
- conversational
- llama-2
- {format_number(stats['total_parameters']).lower()}
- causal-lm
base_model: meta-llama/Llama-2-7b-hf
pipeline_tag: text-generation
# Model Card Metadata
model-index:
- name: Helion-V1.5
results:
- task:
type: text-generation
dataset:
name: MT-Bench
type: mt-bench
metrics:
- type: score
value: 7.2
name: MT-Bench Score
# Model Size Info
model_size: {stats['parameters_billions']}B
architecture: llama-2
context_length: {stats['context_length']}
precision: bfloat16
---
"""
return metadata
def main():
"""Main function."""
import argparse
parser = argparse.ArgumentParser(
description="Generate model statistics and badges"
)
parser.add_argument(
"--config",
default="config.json",
help="Path to config.json"
)
parser.add_argument(
"--update-readme",
action="store_true",
help="Update README.md with stats"
)
parser.add_argument(
"--generate-metadata",
action="store_true",
help="Generate HuggingFace metadata"
)
args = parser.parse_args()
# Calculate stats
stats = calculate_model_stats(args.config)
if not stats:
print("❌ Failed to calculate model statistics")
return
# Always show stats
print("\n" + "="*60)
print("📊 Helion-V1.5 Model Statistics")
print("="*60)
print(f"\n🔢 Parameters:")
print(f" Total: {stats['total_parameters']:,}")
print(f" Formatted: {format_number(stats['total_parameters'])}")
print(f" Billions: {stats['parameters_billions']}B")
print(f"\n💾 Model Size:")
print(f" FP16: {stats['size_fp16_gb']} GB")
print(f" 4-bit: {stats['size_4bit_gb']} GB")
print(f" Recommended VRAM: {stats['size_fp16_gb'] * 1.5:.1f} GB")
print(f"\n🏗️ Architecture:")
print(f" Layers: {stats['num_layers']}")
print(f" Hidden Size: {stats['hidden_size']}")
print(f" Vocab Size: {stats['vocab_size']:,}")
print(f" Context: {stats['context_length']:,} tokens")
print("="*60 + "\n")
# Generate JSON stats
generate_stats_json(stats)
# Generate README header
if args.update_readme:
header = generate_readme_header(stats)
print("\n📄 README Header Generated:")
print(header)
# Generate HuggingFace metadata
if args.generate_metadata:
metadata = generate_huggingface_metadata()
print("\n🤗 HuggingFace Metadata:")
print(metadata)
with open("model_card_metadata.yaml", 'w') as f:
f.write(metadata)
print("✅ Saved to model_card_metadata.yaml")
if __name__ == "__main__":
main()