Trouter-Library commited on
Commit
4f1d5cf
·
verified ·
1 Parent(s): 940292e

Create MODEL_CARD.json

Browse files
Files changed (1) hide show
  1. MODEL_CARD.json +231 -0
MODEL_CARD.json ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_details": {
3
+ "name": "Helion-2.5-Rnd",
4
+ "version": "2.5.0-rnd",
5
+ "full_name": "DeepXR/Helion-2.5-Rnd",
6
+ "description": "Advanced research language model for reasoning, code generation, and multilingual understanding",
7
+ "organization": "DeepXR",
8
+ "license": "Apache-2.0",
9
+ "status": "research",
10
+ "release_date": "2025-01-30",
11
+ "model_type": "causal language model",
12
+ "architecture": "LLaMA",
13
+ "parameters": "70B+",
14
+ "base_model": "meta-llama/Meta-Llama-3.1-70B"
15
+ },
16
+ "intended_use": {
17
+ "primary_uses": [
18
+ "Research in natural language processing",
19
+ "Advanced reasoning and problem-solving",
20
+ "Code generation and programming assistance",
21
+ "Mathematical computation and proof generation",
22
+ "Multilingual text understanding and generation",
23
+ "Scientific analysis and research assistance",
24
+ "Educational applications"
25
+ ],
26
+ "primary_users": [
27
+ "AI researchers",
28
+ "Software developers",
29
+ "Data scientists",
30
+ "Academic researchers",
31
+ "Students and educators"
32
+ ],
33
+ "out_of_scope": [
34
+ "Production systems without extensive validation",
35
+ "Critical decision-making without human oversight",
36
+ "Medical diagnosis or treatment recommendations",
37
+ "Legal advice or financial guidance",
38
+ "Real-time safety-critical applications"
39
+ ]
40
+ },
41
+ "factors": {
42
+ "relevant_factors": [
43
+ "Input language and complexity",
44
+ "Task domain and specialization",
45
+ "Context length requirements",
46
+ "Computational resources available",
47
+ "User expertise and validation capability"
48
+ ],
49
+ "evaluation_factors": [
50
+ "Accuracy on benchmark datasets",
51
+ "Reasoning capability",
52
+ "Code correctness",
53
+ "Mathematical precision",
54
+ "Multilingual performance",
55
+ "Context utilization",
56
+ "Generation quality"
57
+ ]
58
+ },
59
+ "metrics": {
60
+ "reasoning": {
61
+ "MMLU": 0.847,
62
+ "ARC-Challenge": 0.834,
63
+ "HellaSwag": 0.889,
64
+ "WinoGrande": 0.823
65
+ },
66
+ "mathematics": {
67
+ "GSM8K": 0.892,
68
+ "MATH": 0.567,
69
+ "Minerva": 0.534
70
+ },
71
+ "code": {
72
+ "HumanEval": 0.756,
73
+ "MBPP": 0.723,
74
+ "DS-1000": 0.645
75
+ },
76
+ "knowledge": {
77
+ "TruthfulQA": 0.612
78
+ },
79
+ "perplexity": 2.34
80
+ },
81
+ "training_data": {
82
+ "note": "Training data information is proprietary to DeepXR Research",
83
+ "preprocessing": [
84
+ "Quality filtering",
85
+ "Deduplication",
86
+ "PII removal",
87
+ "Format standardization",
88
+ "Language identification",
89
+ "Toxicity filtering"
90
+ ]
91
+ },
92
+ "ethical_considerations": {
93
+ "risks": [
94
+ "Potential for generating biased content",
95
+ "May produce factually incorrect information",
96
+ "Could be misused for harmful content generation",
97
+ "Privacy concerns with training data",
98
+ "Environmental impact of training and inference"
99
+ ],
100
+ "mitigations": [
101
+ "Content filtering mechanisms",
102
+ "Regular bias auditing",
103
+ "Clear documentation of limitations",
104
+ "User education on responsible use",
105
+ "Toxicity detection and prevention",
106
+ "PII detection in outputs"
107
+ ],
108
+ "recommendations": [
109
+ "Implement additional safety layers for production use",
110
+ "Regular monitoring and evaluation of outputs",
111
+ "Human oversight for critical applications",
112
+ "Transparency about model capabilities and limitations",
113
+ "Respect for user privacy and data protection"
114
+ ]
115
+ },
116
+ "caveats_and_recommendations": {
117
+ "limitations": [
118
+ "Research model - requires validation before production use",
119
+ "May exhibit biases present in training data",
120
+ "Can generate plausible but incorrect information",
121
+ "Performance varies across specialized domains",
122
+ "Long context performance degrades beyond 64K tokens",
123
+ "Computational requirements are substantial",
124
+ "Not optimized for real-time applications"
125
+ ],
126
+ "recommendations": [
127
+ "Always verify outputs for critical applications",
128
+ "Implement appropriate content filtering",
129
+ "Monitor for bias in specific use cases",
130
+ "Test thoroughly before deployment",
131
+ "Use temperature=0 for deterministic tasks",
132
+ "Implement retry logic for API failures",
133
+ "Consider quantization for resource constraints"
134
+ ]
135
+ },
136
+ "technical_specifications": {
137
+ "context_window": 131072,
138
+ "vocabulary_size": 128256,
139
+ "hidden_size": 4096,
140
+ "num_layers": 32,
141
+ "num_attention_heads": 32,
142
+ "num_key_value_heads": 8,
143
+ "intermediate_size": 14336,
144
+ "rope_theta": 500000.0,
145
+ "rope_scaling": {
146
+ "type": "yarn",
147
+ "factor": 8.0,
148
+ "original_max_position_embeddings": 16384
149
+ },
150
+ "weight_format": "safetensors",
151
+ "supported_precisions": [
152
+ "fp16"
153
+ ],
154
+ "quantization": "none",
155
+ "safetensors_shards": 82,
156
+ "shard_naming": "shard_01 to shard_82",
157
+ "shard_size_gb": 1.57,
158
+ "supported_frameworks": [
159
+ "transformers",
160
+ "vllm",
161
+ "text-generation-inference"
162
+ ]
163
+ },
164
+ "hardware_requirements": {
165
+ "minimum": {
166
+ "gpu": "2x NVIDIA A100 80GB",
167
+ "vram": "160GB",
168
+ "ram": "256GB",
169
+ "storage": "500GB NVMe"
170
+ },
171
+ "recommended": {
172
+ "gpu": "4x NVIDIA H100 80GB",
173
+ "vram": "320GB",
174
+ "ram": "512GB",
175
+ "storage": "1TB+ NVMe"
176
+ },
177
+ "inference_speed": {
178
+ "tokens_per_second": "30-50 (depending on hardware)",
179
+ "latency": "100-300ms first token",
180
+ "throughput": "High with batch processing"
181
+ }
182
+ },
183
+ "model_sources": {
184
+ "repository": "https://huggingface.co/DeepXR/Helion-2.5-Rnd",
185
+ "paper": null,
186
+ "demo": null,
187
+ "organization": "https://deepxr.ai"
188
+ },
189
+ "citation": {
190
+ "bibtex": "@misc{helion-2.5-rnd-2025,\n title={Helion-2.5-Rnd: Advanced Research Language Model},\n author={DeepXR Research Team},\n year={2025},\n publisher={DeepXR},\n url={https://huggingface.co/DeepXR/Helion-2.5-Rnd}\n}",
191
+ "apa": "DeepXR Research Team. (2025). Helion-2.5-Rnd: Advanced Research Language Model. DeepXR. https://huggingface.co/DeepXR/Helion-2.5-Rnd"
192
+ },
193
+ "contact": {
194
+ "email": "research@deepxr.ai",
195
+ "website": "https://deepxr.ai",
196
+ "github": "https://github.com/DeepXR",
197
+ "support": "support@deepxr.ai"
198
+ },
199
+ "additional_information": {
200
+ "languages_supported": [
201
+ "English", "Spanish", "French", "German", "Italian", "Portuguese",
202
+ "Chinese (Simplified)", "Chinese (Traditional)", "Japanese", "Korean",
203
+ "Russian", "Arabic", "Hindi", "Bengali", "Turkish", "Vietnamese",
204
+ "Polish", "Ukrainian", "Romanian", "Dutch", "Greek", "Czech",
205
+ "Swedish", "Hungarian", "Finnish", "Norwegian", "Danish", "Hebrew",
206
+ "Thai", "Indonesian", "Malay", "Filipino", "Persian", "Urdu",
207
+ "Tamil", "Telugu", "Kannada", "Malayalam", "Gujarati", "Marathi",
208
+ "Punjabi", "Swahili", "Amharic", "Yoruba", "Igbo", "Hausa"
209
+ ],
210
+ "programming_languages": [
211
+ "Python", "JavaScript", "TypeScript", "Java", "C++", "C#", "Go",
212
+ "Rust", "Swift", "Kotlin", "Ruby", "PHP", "Scala", "R", "MATLAB",
213
+ "SQL", "Shell", "PowerShell", "HTML", "CSS", "LaTeX"
214
+ ],
215
+ "deployment_options": [
216
+ "Docker containers",
217
+ "Kubernetes clusters",
218
+ "Cloud platforms (AWS, GCP, Azure)",
219
+ "On-premise servers",
220
+ "API endpoints",
221
+ "Batch processing pipelines"
222
+ ],
223
+ "monitoring_tools": [
224
+ "Prometheus metrics",
225
+ "Grafana dashboards",
226
+ "Custom logging",
227
+ "Performance profiling",
228
+ "Token usage tracking"
229
+ ]
230
+ }
231
+ }