DeepXR
/

Helion-V2

+# Model Card for Helion-V2
+model_details:
+  name: Helion-V2
+  version: 2.0
+  description: A 7.2B parameter large language model optimized for daily use, featuring strong performance across reasoning, coding, and conversational tasks.
+  organization: DeepXR
+  license: Apache-2.0
+  release_date: 2024-11-15
+  model_type: Causal Language Model
+  architecture: Decoder-only Transformer
+  parameters: 7200000000
+  precision: bfloat16
+intended_use:
+  primary_use: General-purpose language model for conversational AI, code generation, and text completion
+  intended_users: Developers, researchers, businesses, and individuals
+  use_cases:
+    - Conversational AI assistants
+    - Code generation and debugging
+    - Content creation and writing assistance
+    - Question answering systems
+    - Educational tutoring
+    - Data analysis and summarization
+  out_of_scope:
+    - Medical diagnosis or treatment recommendations
+    - Legal advice or contractual interpretation
+    - Financial investment decisions
+    - Safety-critical systems
+    - Autonomous decision-making without human oversight
+    - Generating harmful or illegal content
+factors:
+  languages:
+    - English (primary)
+    - Spanish
+    - French
+    - German
+    - Italian
+    - Portuguese
+    - Dutch
+    - Russian
+    - Chinese
+    - Japanese
+    - Korean
+    - Arabic
+    - Hindi
+  demographic_considerations:
+    - Model trained on diverse global data sources
+    - Performance may vary across languages and cultural contexts
+    - English language performance is strongest
+  technical_limitations:
+    - Context length limited to 8,192 tokens
+    - Knowledge cutoff at October 2024
+    - May generate plausible but incorrect information
+    - Performance degrades with highly specialized technical content
+metrics:
+  evaluation_benchmarks:
+    MMLU:
+      score: 64.2
+      type: accuracy
+      description: Massive Multitask Language Understanding (5-shot)
+    HumanEval:
+      score: 48.2
+      type: pass@1
+      description: Code generation benchmark
+    HellaSwag:
+      score: 80.5
+      type: accuracy
+      description: Commonsense reasoning (10-shot)
+    TruthfulQA:
+      score: 52.1
+      type: mc2_accuracy
+      description: Truthfulness and factual accuracy
+    GSM8K:
+      score: 68.7
+      type: accuracy
+      description: Grade school math (8-shot, chain-of-thought)
+    ARC_Challenge:
+      score: 58.3
+      type: accuracy
+      description: AI2 Reasoning Challenge (25-shot)
+    MT_Bench:
+      score: 7.85
+      type: rating
+      description: Multi-turn conversation quality
+    ToxiGen:
+      score: 0.08
+      type: toxicity
+      description: Toxicity detection (lower is better)
+training_data:
+  description: Diverse corpus of approximately 2.5 trillion tokens
+  sources:
+    - Web documents and articles (45%)
+    - Code repositories (20%)
+    - Books and educational materials (15%)
+    - Scientific papers (10%)
+    - Instruction-following datasets (10%)
+  preprocessing:
+    - Quality filtering with perplexity thresholds
+    - Deduplication using MinHash LSH
+    - Toxicity filtering with Perspective API
+    - PII removal and scrubbing
+    - License compliance verification
+  knowledge_cutoff: 2024-10-31
+training_procedure:
+  optimizer: AdamW
+  learning_rate: 0.0003
+  lr_schedule: Cosine with warmup
+  warmup_steps: 2000
+  batch_size: 4194304 tokens
+  sequence_length: 8192
+  training_steps: 600000
+  epochs: 3
+  precision: bfloat16
+  hardware: 128x NVIDIA H100 80GB GPUs
+  framework: PyTorch 2.1.2
+  distributed_strategy: DeepSpeed ZeRO-3
+  training_time: 21 days
+  compute_hours: 64512 GPU-hours
+evaluation_data:
+  datasets:
+    - MMLU (Massive Multitask Language Understanding)
+    - HumanEval (Code generation)
+    - MBPP (Mostly Basic Python Problems)
+    - HellaSwag (Commonsense reasoning)
+    - PIQA (Physical commonsense)
+    - WinoGrande (Coreference resolution)
+    - ARC (AI2 Reasoning Challenge)
+    - TruthfulQA (Truthfulness)
+    - GSM8K (Math reasoning)
+    - MATH (Advanced mathematics)
+    - BBH (Big-Bench Hard)
+    - MT-Bench (Multi-turn conversation)
+    - AlpacaEval (Instruction following)
+    - ToxiGen (Toxicity detection)
+    - CrowS-Pairs (Bias detection)
+ethical_considerations:
+  bias_analysis:
+    - Training data may contain societal biases
+    - Gender, racial, cultural, and geographic biases possible
+    - Users should validate outputs for fairness
+    - Ongoing monitoring and evaluation recommended
+  risks_and_limitations:
+    - May generate plausible but incorrect information
+    - Can be misused for generating harmful content
+    - Should not replace professional advice
+    - Requires appropriate safeguards for production use
+  mitigation_strategies:
+    - Content filtering during training
+    - Safety fine-tuning with human feedback
+    - Built-in refusal mechanisms
+    - Comprehensive safety documentation
+    - Rate limiting and monitoring recommended
+  safety_features:
+    - Low toxicity score (0.08 on ToxiGen)
+    - High truthfulness (52.1% on TruthfulQA)
+    - Content moderation capabilities
+    - PII detection and redaction
+    - Crisis detection and resource routing
+environmental_impact:
+  carbon_emissions: 8500 kg CO2eq
+  compute_region: United States
+  hardware: 128x NVIDIA H100 80GB GPUs
+  training_hours: 64512 GPU-hours
+  estimated_cost: 450000 USD
+  mitigation:
+    - Support for quantization (4-bit, 8-bit)
+    - Efficient inference optimization
+    - Carbon offset programs recommended
+citations:
+  bibtex: |
+    @misc{helion-v2-2024,
+      title={Helion-V2: An Efficient and Truthful Large Language Model for Daily Use},
+      author={DeepXR Team},
+      year={2024},
+      month={November},
+      publisher={HuggingFace},
+      url={https://huggingface.co/DeepXR/Helion-V2},
+      note={7.2B parameter decoder-only transformer with grouped query attention}
+    }
+contact:
+  email: contact@deepxr.ai
+  github: https://github.com/DeepXR/Helion-V2
+  issues: https://github.com/DeepXR/Helion-V2/issues
+  twitter: "@DeepXR_AI"
+  discord: https://discord.gg/deepxr
+license:
+  name: Apache License 2.0
+  url: https://www.apache.org/licenses/LICENSE-2.0
+  commercial_use: true
+  modifications_allowed: true
+  distribution_allowed: true
+  patent_use: true
+model_card_version: 1.0
+model_card_authors:
+  - DeepXR Team
+model_card_contact: contact@deepxr.ai
+model_card_date: 2024-11-15