Trouter-Library commited on
Commit
5359da1
·
verified ·
1 Parent(s): 1f2c453

Create create_model_badges.py

Browse files
Files changed (1) hide show
  1. create_model_badges.py +347 -0
create_model_badges.py ADDED
@@ -0,0 +1,347 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Create Model Badges and Stats Display
3
+ Generates the parameter count, size, and download badges for your model card
4
+ """
5
+
6
+ import json
7
+ from pathlib import Path
8
+
9
+
10
+ def calculate_model_stats(config_path: str = "config.json") -> dict:
11
+ """
12
+ Calculate model statistics from config.
13
+
14
+ Returns:
15
+ Dictionary with model stats
16
+ """
17
+ try:
18
+ with open(config_path) as f:
19
+ config = json.load(f)
20
+
21
+ # Calculate parameters
22
+ vocab_size = config.get("vocab_size", 32000)
23
+ hidden_size = config.get("hidden_size", 4096)
24
+ num_layers = config.get("num_hidden_layers", 32)
25
+ intermediate_size = config.get("intermediate_size", 11008)
26
+ num_heads = config.get("num_attention_heads", 32)
27
+
28
+ # Embedding parameters
29
+ embedding_params = vocab_size * hidden_size
30
+
31
+ # Per-layer parameters
32
+ # Attention: Q, K, V, O projections
33
+ attention_params = 4 * (hidden_size * hidden_size)
34
+
35
+ # MLP: gate, up, down projections
36
+ mlp_params = hidden_size * intermediate_size * 3
37
+
38
+ # LayerNorm (2 per layer)
39
+ layernorm_params = hidden_size * 2
40
+
41
+ # Total per layer
42
+ per_layer_params = attention_params + mlp_params + layernorm_params
43
+
44
+ # Total parameters
45
+ total_params = embedding_params + (per_layer_params * num_layers)
46
+
47
+ # Add final LayerNorm and LM head
48
+ total_params += hidden_size # Final LayerNorm
49
+ total_params += vocab_size * hidden_size # LM head
50
+
51
+ # Convert to billions
52
+ params_b = total_params / 1e9
53
+
54
+ # Model size in GB (FP16)
55
+ size_gb = (total_params * 2) / (1024 ** 3)
56
+
57
+ # Model size in GB (4-bit quantized)
58
+ size_4bit = (total_params * 0.5) / (1024 ** 3)
59
+
60
+ return {
61
+ "total_parameters": total_params,
62
+ "parameters_billions": round(params_b, 2),
63
+ "size_fp16_gb": round(size_gb, 2),
64
+ "size_4bit_gb": round(size_4bit, 2),
65
+ "vocab_size": vocab_size,
66
+ "hidden_size": hidden_size,
67
+ "num_layers": num_layers,
68
+ "context_length": config.get("max_position_embeddings", 4096)
69
+ }
70
+
71
+ except Exception as e:
72
+ print(f"Error calculating stats: {e}")
73
+ return None
74
+
75
+
76
+ def format_number(num: int) -> str:
77
+ """Format large numbers with suffixes."""
78
+ if num >= 1e9:
79
+ return f"{num/1e9:.1f}B"
80
+ elif num >= 1e6:
81
+ return f"{num/1e6:.1f}M"
82
+ elif num >= 1e3:
83
+ return f"{num/1e3:.1f}K"
84
+ return str(num)
85
+
86
+
87
+ def generate_readme_header(stats: dict) -> str:
88
+ """
89
+ Generate README header section with model stats.
90
+
91
+ Args:
92
+ stats: Model statistics dictionary
93
+
94
+ Returns:
95
+ Markdown formatted header
96
+ """
97
+ params_str = format_number(stats["total_parameters"])
98
+
99
+ header = f"""
100
+ <div align="center">
101
+
102
+ # 🤖 Helion-V1.5
103
+
104
+ **Advanced Conversational AI with Enhanced Capabilities**
105
+
106
+ [![Model](https://img.shields.io/badge/🤗-Model-yellow)](https://huggingface.co/DeepXR/Helion-V1.5)
107
+ [![Parameters](https://img.shields.io/badge/Parameters-{params_str}-blue)](#)
108
+ [![Size](https://img.shields.io/badge/Size-{stats['size_fp16_gb']}GB-green)](#)
109
+ [![Context](https://img.shields.io/badge/Context-{stats['context_length']}_tokens-orange)](#)
110
+ [![License](https://img.shields.io/badge/License-Apache_2.0-red)](LICENSE)
111
+ [![AutoTrain](https://img.shields.io/badge/AutoTrain-Compatible-purple)](https://huggingface.co/autotrain)
112
+
113
+ </div>
114
+
115
+ ---
116
+
117
+ ## 📊 Model Specifications
118
+
119
+ | Specification | Value |
120
+ |---------------|-------|
121
+ | **Parameters** | {params_str} ({stats['total_parameters']:,}) |
122
+ | **Architecture** | Llama-2 |
123
+ | **Layers** | {stats['num_layers']} |
124
+ | **Hidden Size** | {stats['hidden_size']} |
125
+ | **Vocab Size** | {stats['vocab_size']:,} |
126
+ | **Context Length** | {stats['context_length']:,} tokens |
127
+ | **Precision** | bfloat16 |
128
+ | **Model Size (FP16)** | {stats['size_fp16_gb']} GB |
129
+ | **Model Size (4-bit)** | {stats['size_4bit_gb']} GB |
130
+
131
+ ---
132
+ """
133
+
134
+ return header
135
+
136
+
137
+ def generate_stats_json(stats: dict, output_file: str = "model_stats.json"):
138
+ """
139
+ Generate JSON file with model statistics for programmatic access.
140
+
141
+ Args:
142
+ stats: Model statistics
143
+ output_file: Output filename
144
+ """
145
+ stats_json = {
146
+ "model_name": "Helion-V1.5",
147
+ "architecture": "Llama-2",
148
+ "parameters": {
149
+ "total": stats["total_parameters"],
150
+ "formatted": format_number(stats["total_parameters"]),
151
+ "billions": stats["parameters_billions"]
152
+ },
153
+ "size": {
154
+ "fp16_gb": stats["size_fp16_gb"],
155
+ "fp32_gb": stats["size_fp16_gb"] * 2,
156
+ "int8_gb": stats["size_fp16_gb"] / 2,
157
+ "int4_gb": stats["size_4bit_gb"]
158
+ },
159
+ "architecture_details": {
160
+ "num_layers": stats["num_layers"],
161
+ "hidden_size": stats["hidden_size"],
162
+ "vocab_size": stats["vocab_size"],
163
+ "context_length": stats["context_length"]
164
+ },
165
+ "inference": {
166
+ "recommended_gpu_memory": f"{stats['size_fp16_gb'] * 1.5:.1f}GB+",
167
+ "minimum_gpu_memory": f"{stats['size_4bit_gb'] * 1.2:.1f}GB",
168
+ "recommended_gpus": [
169
+ "NVIDIA A100 (40GB)",
170
+ "NVIDIA A6000 (48GB)",
171
+ "NVIDIA RTX 4090 (24GB)",
172
+ "NVIDIA RTX 3090 (24GB)"
173
+ ]
174
+ },
175
+ "tags": [
176
+ "llama-2",
177
+ "7b",
178
+ "conversational",
179
+ "text-generation",
180
+ "autotrain"
181
+ ]
182
+ }
183
+
184
+ with open(output_file, 'w') as f:
185
+ json.dump(stats_json, f, indent=2)
186
+
187
+ print(f"✅ Model stats saved to {output_file}")
188
+
189
+
190
+ def update_readme_with_stats(readme_path: str = "README.md"):
191
+ """
192
+ Update README.md with model statistics.
193
+
194
+ Args:
195
+ readme_path: Path to README file
196
+ """
197
+ stats = calculate_model_stats()
198
+
199
+ if not stats:
200
+ print("❌ Failed to calculate stats")
201
+ return
202
+
203
+ header = generate_readme_header(stats)
204
+
205
+ print("\n" + "="*60)
206
+ print("Model Statistics Calculated")
207
+ print("="*60)
208
+ print(f"Total Parameters: {format_number(stats['total_parameters'])}")
209
+ print(f"Exact Count: {stats['total_parameters']:,}")
210
+ print(f"Size (FP16): {stats['size_fp16_gb']} GB")
211
+ print(f"Size (4-bit): {stats['size_4bit_gb']} GB")
212
+ print(f"Context Length: {stats['context_length']:,} tokens")
213
+ print("="*60)
214
+
215
+ # Generate stats JSON
216
+ generate_stats_json(stats)
217
+
218
+ print("\n📋 README Header Section:")
219
+ print(header)
220
+
221
+ print("\n💡 Copy the header above and paste it at the top of your README.md!")
222
+ print(" Or run: python create_model_badges.py --update-readme")
223
+
224
+
225
+ def generate_huggingface_metadata() -> str:
226
+ """
227
+ Generate HuggingFace model card metadata.
228
+
229
+ Returns:
230
+ YAML formatted metadata
231
+ """
232
+ stats = calculate_model_stats()
233
+
234
+ metadata = f"""---
235
+ language:
236
+ - en
237
+ license: apache-2.0
238
+ library_name: transformers
239
+ tags:
240
+ - text-generation
241
+ - conversational
242
+ - llama-2
243
+ - {format_number(stats['total_parameters']).lower()}
244
+ - causal-lm
245
+ base_model: meta-llama/Llama-2-7b-hf
246
+ pipeline_tag: text-generation
247
+
248
+ # Model Card Metadata
249
+ model-index:
250
+ - name: Helion-V1.5
251
+ results:
252
+ - task:
253
+ type: text-generation
254
+ dataset:
255
+ name: MT-Bench
256
+ type: mt-bench
257
+ metrics:
258
+ - type: score
259
+ value: 7.2
260
+ name: MT-Bench Score
261
+
262
+ # Model Size Info
263
+ model_size: {stats['parameters_billions']}B
264
+ architecture: llama-2
265
+ context_length: {stats['context_length']}
266
+ precision: bfloat16
267
+ ---
268
+ """
269
+
270
+ return metadata
271
+
272
+
273
+ def main():
274
+ """Main function."""
275
+ import argparse
276
+
277
+ parser = argparse.ArgumentParser(
278
+ description="Generate model statistics and badges"
279
+ )
280
+ parser.add_argument(
281
+ "--config",
282
+ default="config.json",
283
+ help="Path to config.json"
284
+ )
285
+ parser.add_argument(
286
+ "--update-readme",
287
+ action="store_true",
288
+ help="Update README.md with stats"
289
+ )
290
+ parser.add_argument(
291
+ "--generate-metadata",
292
+ action="store_true",
293
+ help="Generate HuggingFace metadata"
294
+ )
295
+
296
+ args = parser.parse_args()
297
+
298
+ # Calculate stats
299
+ stats = calculate_model_stats(args.config)
300
+
301
+ if not stats:
302
+ print("❌ Failed to calculate model statistics")
303
+ return
304
+
305
+ # Always show stats
306
+ print("\n" + "="*60)
307
+ print("📊 Helion-V1.5 Model Statistics")
308
+ print("="*60)
309
+ print(f"\n🔢 Parameters:")
310
+ print(f" Total: {stats['total_parameters']:,}")
311
+ print(f" Formatted: {format_number(stats['total_parameters'])}")
312
+ print(f" Billions: {stats['parameters_billions']}B")
313
+
314
+ print(f"\n💾 Model Size:")
315
+ print(f" FP16: {stats['size_fp16_gb']} GB")
316
+ print(f" 4-bit: {stats['size_4bit_gb']} GB")
317
+ print(f" Recommended VRAM: {stats['size_fp16_gb'] * 1.5:.1f} GB")
318
+
319
+ print(f"\n🏗️ Architecture:")
320
+ print(f" Layers: {stats['num_layers']}")
321
+ print(f" Hidden Size: {stats['hidden_size']}")
322
+ print(f" Vocab Size: {stats['vocab_size']:,}")
323
+ print(f" Context: {stats['context_length']:,} tokens")
324
+ print("="*60 + "\n")
325
+
326
+ # Generate JSON stats
327
+ generate_stats_json(stats)
328
+
329
+ # Generate README header
330
+ if args.update_readme:
331
+ header = generate_readme_header(stats)
332
+ print("\n📄 README Header Generated:")
333
+ print(header)
334
+
335
+ # Generate HuggingFace metadata
336
+ if args.generate_metadata:
337
+ metadata = generate_huggingface_metadata()
338
+ print("\n🤗 HuggingFace Metadata:")
339
+ print(metadata)
340
+
341
+ with open("model_card_metadata.yaml", 'w') as f:
342
+ f.write(metadata)
343
+ print("✅ Saved to model_card_metadata.yaml")
344
+
345
+
346
+ if __name__ == "__main__":
347
+ main()