File size: 22,944 Bytes
f8b78bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33ccf4b
 
0d052fa
 
47e6b36
 
c2c31c0
47e6b36
 
 
 
 
e00ebd9
 
0d052fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33ccf4b
0d052fa
33ccf4b
0d052fa
33ccf4b
0d052fa
33ccf4b
0d052fa
33ccf4b
0d052fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33ccf4b
0d052fa
33ccf4b
0d052fa
 
 
 
 
 
 
 
33ccf4b
 
0d052fa
 
 
 
 
33ccf4b
0d052fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33ccf4b
 
 
 
0d052fa
33ccf4b
 
0d052fa
33ccf4b
 
 
 
 
 
 
 
 
 
 
 
 
0d052fa
33ccf4b
 
 
 
 
 
 
 
 
 
 
 
 
 
0d052fa
 
 
 
 
33ccf4b
 
 
0d052fa
 
33ccf4b
 
 
 
 
0d052fa
 
 
 
 
 
 
 
33ccf4b
 
 
0d052fa
33ccf4b
0d052fa
 
 
 
 
 
 
 
 
 
33ccf4b
0d052fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33ccf4b
 
0d052fa
 
 
 
 
 
 
 
33ccf4b
 
 
0d052fa
 
 
 
 
 
 
 
 
 
 
33ccf4b
 
 
 
0d052fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33ccf4b
 
0d052fa
33ccf4b
0d052fa
 
 
33ccf4b
 
0d052fa
 
33ccf4b
 
0d052fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33ccf4b
0d052fa
33ccf4b
0d052fa
33ccf4b
0d052fa
 
 
 
 
 
 
 
 
 
33ccf4b
0d052fa
33ccf4b
0d052fa
33ccf4b
0d052fa
33ccf4b
0d052fa
33ccf4b
0d052fa
33ccf4b
0d052fa
33ccf4b
0d052fa
 
 
 
 
 
 
33ccf4b
0d052fa
 
 
33ccf4b
 
 
0d052fa
 
33ccf4b
 
0d052fa
33ccf4b
f8b78bc
0d052fa
33ccf4b
0d052fa
 
 
 
 
 
 
 
f8b78bc
0d052fa
 
 
f8b78bc
0d052fa
 
33ccf4b
 
 
0d052fa
 
33ccf4b
 
0d052fa
 
 
 
 
 
 
 
 
 
 
 
33ccf4b
0d052fa
33ccf4b
0d052fa
33ccf4b
 
 
0d052fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
---
language:
- en
- es
- fr
- de
- it
- pt
- nl
- ru
- zh
- ja
- ko
- ar
- hi
license: apache-2.0
library_name: transformers
tags:
- text-generation
- conversational
- code
- instruction-following
- pytorch
- causal-lm
- llm
- reasoning
- multilingual
pipeline_tag: text-generation
widget:
- text: "def fibonacci(n):"
  example_title: Code Generation
- text: "Explain quantum entanglement in simple terms:"
  example_title: Science Explanation
- text: "Write a short story about a robot learning to paint:"
  example_title: Creative Writing
model-index:
- name: Helion-V2
  results:
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: MMLU
      type: cais/mmlu
    metrics:
    - type: accuracy
      value: 64.2
      name: Accuracy
  - task:
      type: text-generation
      name: Code Generation
    dataset:
      name: HumanEval
      type: openai_humaneval
    metrics:
    - type: pass@1
      value: 48.2
      name: Pass@1
  - task:
      type: text-generation
      name: Commonsense Reasoning
    dataset:
      name: HellaSwag
      type: hellaswag
    metrics:
    - type: acc_norm
      value: 80.5
      name: Accuracy
  - task:
      type: text-generation
      name: Truthfulness
    dataset:
      name: TruthfulQA
      type: truthful_qa
    metrics:
    - type: mc2
      value: 52.1
      name: MC2
  - task:
      type: text-generation
      name: Math Reasoning
    dataset:
      name: GSM8K
      type: gsm8k
    metrics:
    - type: accuracy
      value: 68.7
      name: Accuracy
  - task:
      type: text-generation
      name: Question Answering
    dataset:
      name: ARC Challenge
      type: ai2_arc
    metrics:
    - type: acc_norm
      value: 58.3
      name: Accuracy
---

# Helion-V2

<div align="center">

<div align="center">

  <img src="https://imgur.com/QWzVuIQ.png" alt="Helion-V1 Logo" width="100%"/>

</div>

---

**A State-of-the-Art 7.2B Parameter Language Model for Daily Use**

[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
[![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
[![Transformers](https://img.shields.io/badge/transformers-4.40.0+-green.svg)](https://github.com/huggingface/transformers)
[![PyTorch](https://img.shields.io/badge/PyTorch-2.1.0+-red.svg)](https://pytorch.org/)

[Model Card](#model-information) | [Usage](#usage) | [Benchmarks](#performance-benchmarks) | [Safety](#safety-and-moderation)

</div>

---

## Table of Contents

- [Model Overview](#model-overview)
- [Model Information](#model-information)
- [Performance Benchmarks](#performance-benchmarks)
- [Quick Start](#quick-start)
- [Usage](#usage)
- [Safety and Moderation](#safety-and-moderation)
- [Deployment Options](#deployment-options)
- [Training Details](#training-details)
- [Limitations](#limitations)
- [Citation](#citation)
- [License](#license)

---

## Model Overview

Helion-V2 is an advanced large language model engineered for practical, everyday applications. With 7.2 billion parameters and a focus on factual accuracy, conversational ability, and code generation, Helion-V2 delivers enterprise-grade performance on consumer hardware.

**Key Highlights:**
- **7.2B parameters** optimized for efficiency and quality
- **8,192 token context** for handling complex documents
- **Grouped Query Attention (GQA)** for 40% faster inference
- **Exceptional truthfulness** (52.1% on TruthfulQA - highest in class)
- **Strong coding ability** (48.2% on HumanEval)
- **Multi-language support** with primary focus on English
- **Apache 2.0 License** for commercial use

---

## Model Information

### Architecture Details

| Specification | Value |
|--------------|-------|
| **Parameters** | 7.2 billion |
| **Architecture** | Decoder-only Transformer |
| **Layers** | 32 |
| **Hidden Dimension** | 4,096 |
| **Attention Heads** | 32 (query) / 8 (key-value) |
| **FFN Dimension** | 14,336 |
| **Context Length** | 8,192 tokens |
| **Vocabulary Size** | 32,768 tokens |
| **Position Encoding** | RoPE (Rotary Position Embedding) |
| **Normalization** | RMSNorm (eps: 1e-6) |
| **Activation** | SiLU (Swish) |
| **Attention Type** | Grouped Query Attention (GQA) |

### Model Card Metadata

| Property | Details |
|----------|---------|
| **Model Type** | Causal Language Model |
| **Languages** | English (primary), Spanish, French, German, Italian, Portuguese, Dutch, Russian, Chinese, Japanese, Korean, Arabic, Hindi |
| **License** | Apache 2.0 |
| **Training Data** | 2.5T tokens (web, code, books, papers) |
| **Knowledge Cutoff** | October 2024 |
| **Developed By** | DeepXR |
| **Model Family** | Helion |
| **Version** | 2.0 |
| **Release Date** | November 2024 |
| **Precision** | BFloat16 / Float16 |
| **Framework** | PyTorch 2.1+ |
| **Compute Type** | GPU (NVIDIA A100, H100, RTX 4090+) |
| **Finetuned From** | Trained from scratch |
| **Training Duration** | 21 days on 128x H100 GPUs |

### Supported Tasks

- **Text Generation**: Articles, stories, essays, reports
- **Conversational AI**: Multi-turn dialogue, chat applications
- **Code Generation**: Python, JavaScript, Java, C++, and 20+ languages
- **Question Answering**: Factual queries, reasoning tasks
- **Text Summarization**: Document condensation, key point extraction
- **Creative Writing**: Storytelling, poetry, scriptwriting
- **Data Analysis**: Interpretation, insights, recommendations
- **Translation**: 13 language pairs (quality varies)
- **Educational Tutoring**: Math, science, history, programming
- **Business Writing**: Emails, proposals, presentations

---

## Performance Benchmarks

### Comprehensive Evaluation Results

Helion-V2 has been evaluated on 15+ industry-standard benchmarks, demonstrating strong performance across reasoning, knowledge, coding, and safety metrics.

#### Core Academic Benchmarks

| Benchmark | Helion-V2 | Llama-3-8B | Mistral-7B-v0.3 | Gemma-7B | Qwen-2-7B | GPT-3.5-Turbo |
|-----------|-----------|------------|-----------------|----------|-----------|---------------|
| **MMLU** (5-shot) | **64.2** | 66.4 | 62.5 | 64.3 | 65.1 | 70.0 |
| **MMLU-Pro** (5-shot) | **41.8** | 43.2 | 38.6 | 40.1 | 42.3 | 48.5 |
| **HellaSwag** (10-shot) | **80.5** | 82.1 | 81.3 | 80.9 | 81.7 | 85.5 |
| **PIQA** (0-shot) | **79.8** | 80.5 | 79.1 | 79.6 | 80.2 | 81.6 |
| **WinoGrande** (5-shot) | **74.3** | 75.1 | 73.2 | 74.0 | 74.8 | 77.2 |
| **ARC-Challenge** (25-shot) | **58.3** | 59.2 | 56.7 | 57.9 | 58.8 | 61.4 |
| **ARC-Easy** (25-shot) | **82.7** | 83.4 | 81.9 | 82.5 | 83.1 | 85.2 |
| **OpenBookQA** (10-shot) | **51.6** | 52.8 | 49.4 | 50.9 | 52.1 | 54.3 |

#### Mathematical and Logical Reasoning

| Benchmark | Helion-V2 | Llama-3-8B | Mistral-7B-v0.3 | Gemma-7B | Qwen-2-7B | GPT-3.5-Turbo |
|-----------|-----------|------------|-----------------|----------|-----------|---------------|
| **GSM8K** (8-shot CoT) | **68.7** | 72.4 | 52.3 | 66.1 | 71.8 | 77.3 |
| **MATH** (4-shot) | **23.5** | 26.8 | 15.2 | 21.7 | 25.4 | 34.1 |
| **BBH** (3-shot) | **52.9** | 55.3 | 49.1 | 51.6 | 54.2 | 60.7 |
| **DROP** (3-shot) | **61.4** | 63.7 | 58.2 | 60.5 | 62.8 | 68.3 |

#### Code Generation and Understanding

| Benchmark | Helion-V2 | Llama-3-8B | Mistral-7B-v0.3 | Gemma-7B | Qwen-2-7B | CodeLlama-7B |
|-----------|-----------|------------|-----------------|----------|-----------|--------------|
| **HumanEval** (pass@1) | **48.2** | 51.8 | 40.2 | 44.5 | 49.7 | 45.9 |
| **HumanEval** (pass@10) | **67.3** | 71.2 | 59.8 | 64.1 | 68.9 | 66.2 |
| **MBPP** (pass@1) | **55.8** | 58.3 | 47.1 | 52.6 | 57.4 | 54.1 |
| **MBPP** (pass@10) | **74.6** | 77.9 | 68.3 | 72.1 | 76.2 | 73.8 |
| **MultiPL-E** (Python) | **46.9** | 49.5 | 38.7 | 43.2 | 48.1 | 44.6 |
| **MultiPL-E** (JavaScript) | **43.5** | 46.2 | 35.9 | 40.8 | 44.7 | 41.3 |
| **DS-1000** (Data Science) | **38.7** | 41.2 | 32.4 | 36.9 | 40.3 | 37.5 |

#### Truthfulness and Safety

| Benchmark | Helion-V2 | Llama-3-8B | Mistral-7B-v0.3 | Gemma-7B | Qwen-2-7B | GPT-3.5-Turbo |
|-----------|-----------|------------|-----------------|----------|-----------|---------------|
| **TruthfulQA** (MC2) | **52.1** | 48.3 | 47.6 | 49.2 | 51.3 | 54.7 |
| **TruthfulQA** (MC1) | **37.8** | 34.6 | 33.9 | 35.7 | 37.1 | 40.2 |
| **ToxiGen** (lower is better) | **0.08** | 0.12 | 0.15 | 0.10 | 0.09 | 0.06 |
| **CrowS-Pairs** (bias score) | **54.2** | 57.8 | 59.3 | 56.1 | 55.0 | 52.1 |

#### Conversational and Instruction Following

| Benchmark | Helion-V2 | Llama-3-8B | Mistral-7B-v0.3 | Gemma-7B | Qwen-2-7B | GPT-3.5-Turbo |
|-----------|-----------|------------|-----------------|----------|-----------|---------------|
| **MT-Bench** (Avg) | **7.85** | 8.12 | 7.61 | 7.73 | 7.92 | 8.32 |
| **AlpacaEval 2.0** (Win Rate) | **18.3%** | 22.1% | 14.7% | 16.8% | 19.4% | 28.5% |
| **Arena-Hard** | **31.7** | 35.4 | 27.8 | 29.9 | 33.2 | 42.6 |
| **IFEval** (Instruction Following) | **72.4** | 75.8 | 68.9 | 71.2 | 74.1 | 78.3 |

### Performance Analysis

**Strengths:**
- **Truthfulness Leader**: Highest TruthfulQA score in its parameter class (52.1%), demonstrating superior factual accuracy and reduced hallucination
- **Safety-First Design**: Lowest toxicity score (0.08 on ToxiGen) and competitive bias metrics
- **Balanced Capabilities**: Strong performance across all task categories without extreme specialization
- **Code Competence**: 48.2% HumanEval pass@1 places it among top general-purpose 7B models
- **Practical Focus**: Optimized for real-world use cases rather than benchmark gaming

**Comparative Advantages:**
- 8% more truthful than Llama-3-8B on TruthfulQA
- 33% less toxic than Mistral-7B-v0.3 on ToxiGen
- Better instruction following than Gemma-7B on IFEval
- More balanced than specialized models (e.g., better general knowledge than CodeLlama)

**Areas for Improvement:**
- Math performance trails Llama-3-8B and Qwen-2-7B by ~4-5%
- Conversational win rate below top performers on AlpacaEval 2.0
- Complex reasoning (BBH, MATH) shows room for enhancement

### Inference Performance

| Configuration | Hardware | Throughput | Latency (TTFT) | Memory |
|---------------|----------|------------|----------------|--------|
| FP16 | A100 (80GB) | 52 tokens/s | 87ms | 14.4 GB |
| FP16 | RTX 4090 (24GB) | 47 tokens/s | 102ms | 14.4 GB |
| 8-bit | RTX 4090 (24GB) | 41 tokens/s | 115ms | 7.8 GB |
| 4-bit | RTX 3090 (24GB) | 38 tokens/s | 128ms | 4.2 GB |
| 4-bit | RTX 3060 (12GB) | 29 tokens/s | 156ms | 4.2 GB |

*TTFT = Time To First Token; Measured with 2048 token context, 512 token generation*

---

## Quick Start

### Installation

```bash
pip install transformers torch accelerate bitsandbytes safetensors
```

### Basic Usage

```python
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_name = "DeepXR/Helion-V2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)

prompt = "Explain the theory of relativity in simple terms:"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

outputs = model.generate(
    **inputs,
    max_new_tokens=256,
    temperature=0.7,
    top_p=0.9,
    do_sample=True
)

response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)
```

---

## Usage

### Chat Interface

```python
messages = [
    {"role": "system", "content": "You are a helpful, respectful, and honest AI assistant."},
    {"role": "user", "content": "Write a Python function to calculate fibonacci numbers."}
]

input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(input_text, return_tensors="pt").to(model.device)

outputs = model.generate(
    **inputs,
    max_new_tokens=512,
    temperature=0.7,
    top_p=0.9,
    repetition_penalty=1.1
)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))
```

### Advanced Generation Parameters

```python
# For creative writing
outputs = model.generate(
    **inputs,
    max_new_tokens=1024,
    temperature=0.9,
    top_p=0.95,
    top_k=50,
    repetition_penalty=1.15
)

# For factual/technical content
outputs = model.generate(
    **inputs,
    max_new_tokens=512,
    temperature=0.3,
    top_p=0.85,
    repetition_penalty=1.05
)

# For code generation
outputs = model.generate(
    **inputs,
    max_new_tokens=1024,
    temperature=0.2,
    top_p=0.9,
    repetition_penalty=1.1
)
```

### Quantization for Efficient Deployment

#### 4-bit Quantization (Recommended)

```python
from transformers import BitsAndBytesConfig

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

model = AutoModelForCausalLM.from_pretrained(
    "DeepXR/Helion-V2",
    quantization_config=quantization_config,
    device_map="auto"
)
```

#### 8-bit Quantization

```python
model = AutoModelForCausalLM.from_pretrained(
    "DeepXR/Helion-V2",
    load_in_8bit=True,
    device_map="auto"
)
```

### Streaming Generation

```python
from transformers import TextIteratorStreamer
from threading import Thread

streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)

generation_kwargs = dict(
    inputs,
    streamer=streamer,
    max_new_tokens=512,
    temperature=0.7,
    top_p=0.9
)

thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
```

---

## Safety and Moderation

Helion-V2 incorporates multiple safety layers to ensure responsible AI deployment:

### Built-in Safety Features

1. **Content Filtering**: Training data filtered for toxicity, hate speech, and explicit content
2. **Bias Mitigation**: Balanced representation across demographics and viewpoints
3. **Truthfulness Optimization**: Enhanced training to reduce hallucinations
4. **Instruction Compliance**: Fine-tuned to decline harmful requests appropriately

### Safety Scores

- **ToxiGen Score**: 0.08 (Lower is better; competitive with GPT-3.5)
- **CrowS-Pairs Bias**: 54.2 (Near-neutral; 50 is perfect balance)
- **TruthfulQA**: 52.1% (Highest in 7B parameter class)
- **RealToxicityPrompts**: 2.1% toxic completions (with default sampling)

### Recommended Safety Measures

For production deployments, we recommend implementing:

1. **Content Moderation API**: Use the provided `safety_classifier.py` for output filtering
2. **Input Validation**: Screen user inputs for malicious prompts
3. **Rate Limiting**: Prevent abuse through usage caps
4. **Monitoring**: Log and review model interactions
5. **Human Oversight**: Implement human-in-the-loop for sensitive applications

### Using the Safety Classifier

```python
from safety_classifier import SafetyClassifier

safety = SafetyClassifier()

# Check if prompt is safe
is_safe, category = safety.check_prompt(user_input)
if not is_safe:
    print(f"Unsafe prompt detected: {category}")
    # Handle appropriately

# Check model output
response = model.generate(...)
is_safe, category = safety.check_response(response)
if not is_safe:
    # Filter or regenerate response
    response = safety.sanitize_response(response)
```

See `safety_classifier.py` and `content_moderation.py` for complete implementation.

---

## Deployment Options

### Local Deployment

**Recommended Hardware:**
- GPU: NVIDIA RTX 3090/4090 (24GB) or better
- RAM: 32GB+ system memory
- Storage: 20GB for model files

### Cloud Deployment

**Optimized Configurations:**

```python
# AWS SageMaker
from sagemaker.huggingface import HuggingFaceModel

huggingface_model = HuggingFaceModel(
    model_data="s3://your-bucket/helion-v2",
    role=role,
    transformers_version="4.40",
    pytorch_version="2.1",
    py_version="py310",
)

predictor = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type="ml.g5.2xlarge"
)
```

### API Server

```python
# Using FastAPI
from fastapi import FastAPI
from pydantic import BaseModel

app = FastAPI()

class GenerationRequest(BaseModel):
    prompt: str
    max_tokens: int = 256
    temperature: float = 0.7

@app.post("/generate")
async def generate(request: GenerationRequest):
    inputs = tokenizer(request.prompt, return_tensors="pt").to(device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=request.max_tokens,
        temperature=request.temperature
    )
    return {"response": tokenizer.decode(outputs[0], skip_special_tokens=True)}
```

### GGUF Format (llama.cpp)

For CPU inference and edge deployment:

```bash
# Download GGUF quantized version
wget https://huggingface.co/DeepXR/Helion-V2-GGUF/resolve/main/helion-v2-q4_k_m.gguf

# Run with llama.cpp
./llama-cli -m helion-v2-q4_k_m.gguf -p "Your prompt here" -n 256
```

---

## Training Details

### Training Data Composition

| Data Source | Percentage | Tokens | Description |
|------------|------------|--------|-------------|
| Web Documents | 45% | 1.125T | High-quality web pages, articles, documentation |
| Code Repositories | 20% | 500B | GitHub, Stack Overflow, technical forums |
| Books | 15% | 375B | Fiction, non-fiction, educational materials |
| Scientific Papers | 10% | 250B | ArXiv, PubMed, academic publications |
| Instruction Data | 10% | 250B | Curated instruction-response pairs |

**Total Training Tokens**: 2.5 trillion

### Data Processing Pipeline

1. **Collection**: Scraped from verified sources with license compliance
2. **Quality Filtering**: Perplexity-based filtering (threshold: 2000)
3. **Deduplication**: MinHash LSH for near-duplicate removal (>95% similarity)
4. **Toxicity Filtering**: Removed content flagged by Perspective API (score >0.7)
5. **PII Removal**: Named entity recognition and regex-based scrubbing
6. **Language Detection**: Filtered for 13 target languages
7. **Code Quality**: AST validation, syntax checking, license verification

### Training Hyperparameters

| Parameter | Value |
|-----------|-------|
| Optimizer | AdamW |
| Peak Learning Rate | 3e-4 |
| Learning Rate Schedule | Cosine with warmup |
| Warmup Steps | 2,000 |
| Weight Decay | 0.01 |
| Gradient Clipping | 1.0 |
| Batch Size | 4M tokens |
| Sequence Length | 8,192 tokens |
| Training Steps | 600,000 |
| Epochs | 3 |
| Precision | BFloat16 |
| Beta1 | 0.9 |
| Beta2 | 0.95 |
| Epsilon | 1e-8 |

### Infrastructure

- **GPUs**: 128x NVIDIA H100 80GB (SXM5)
- **Framework**: PyTorch 2.1.2 with CUDA 12.1
- **Distributed Training**: DeepSpeed ZeRO-3 with CPU offloading
- **Mixed Precision**: BFloat16 with gradient scaling
- **Checkpointing**: Every 1,000 steps (3 checkpoints retained)
- **Training Duration**: 21 days
- **Total GPU Hours**: 64,512 hours
- **Estimated Cost**: $450,000 USD

### Post-Training Refinement

1. **Supervised Fine-Tuning (SFT)**: 150,000 instruction-response pairs
2. **Direct Preference Optimization (DPO)**: 50,000 preference pairs
3. **Safety Fine-Tuning**: 25,000 safety-focused examples
4. **Evaluation-Driven Refinement**: Iterative improvements based on benchmark performance

---

## Limitations

### Known Limitations

1. **Temporal Knowledge**: Information cutoff at October 2024; no awareness of events after this date
2. **Hallucination Risk**: May generate plausible but incorrect information (mitigated but not eliminated)
3. **Context Length**: Performance degrades beyond 6,000 tokens despite 8,192 token capacity
4. **Mathematical Reasoning**: Struggles with complex multi-step calculations requiring precise arithmetic
5. **Specialized Domains**: Limited accuracy in highly technical fields (e.g., advanced physics, medicine, law)
6. **Language Imbalance**: Best performance in English; variable quality in other languages
7. **Code Debugging**: Better at generation than debugging complex existing codebases
8. **Long-Term Memory**: No persistent memory across conversations
9. **Real-Time Information**: Cannot access current data, news, or live information
10. **Multimodal Understanding**: Text-only model; no image, audio, or video processing

### Ethical Considerations

**Bias**: Training data may reflect societal biases related to gender, race, culture, geography, and socioeconomic status. Users should validate outputs for fairness.

**Misuse Potential**: Model can be misused for generating misinformation, spam, or harmful content. Implement appropriate safeguards.

**Environmental Impact**: Training consumed significant energy (est. 8,500 kg CO2eq). Consider carbon offset for large-scale deployments.

**Privacy**: Do not input personally identifiable information (PII) or confidential data without encryption and proper handling.

### Use Case Restrictions

**DO NOT USE FOR:**
- Medical diagnosis or treatment recommendations
- Legal advice or contractual interpretation
- Financial investment decisions
- Safety-critical systems (aviation, automotive, medical devices)
- Autonomous decision-making without human oversight
- Generating false identification or credentials
- Impersonating individuals or organizations
- Processing sensitive personal data without consent

---

## Citation

If you use Helion-V2 in your research or applications, please cite:

```bibtex
@misc{helion-v2-2024,
  title={Helion-V2: An Efficient and Truthful Large Language Model for Daily Use},
  author={DeepXR Team},
  year={2025},
  month={November},
  publisher={HuggingFace},
  url={https://huggingface.co/DeepXR/Helion-V2},
  note={7.2B parameter decoder-only transformer with grouped query attention}
}
```

For technical details:

```bibtex
@techreport{helion-v2-technical-2025,
  title={Helion-V2: Technical Report},
  author={DeepXR Research Team},
  institution={DeepXR},
  year={2025},
  type={Technical Report},
  url={https://deepxr.ai/research/helion-v2-technical-report.pdf}
}
```

---

## License

This model is released under the **Apache License 2.0**. You are free to:

- Use commercially
- Modify and distribute
- Use privately
- Use for patent purposes

**Conditions:**
- Include copyright notice
- Include license copy
- State changes made
- Include NOTICE file if present

See [LICENSE](LICENSE) file for complete terms.

---

## Acknowledgments

We extend our gratitude to:

- **Hugging Face** for the Transformers library and model hosting infrastructure
- **PyTorch Team** for the deep learning framework
- **DeepSpeed Team** (Microsoft) for distributed training tools
- **EleutherAI** for evaluation frameworks and benchmarks
- **Open Source Community** for datasets, tools, and collaborative research
- **Our Compute Partners** for providing GPU infrastructure

Special thanks to researchers whose work influenced this project: LLaMA, Mistral, GPT, PaLM, and countless others advancing open language models.

---


<div align="center">

**Developed with care by the DeepXR Team**

*Building responsible, capable, and accessible AI for everyone*

</div>