DeepXR
/

Helion-OSC

@@ -1,14 +1,14 @@
 {
   "model_type": "helion-osc",
   "architectures": ["HelionOSCForCausalLM"],
-  "vocab_size": 50280,
-  "hidden_size": 4096,
-  "num_hidden_layers": 32,
-  "num_attention_heads": 32,
   "num_key_value_heads": 8,
-  "intermediate_size": 14336,
-  "hidden_act": "silu",
-  "max_position_embeddings": 8192,
   "initializer_range": 0.02,
   "rms_norm_eps": 1e-6,
   "use_cache": true,
@@ -17,45 +17,199 @@
   "eos_token_id": 2,
   "tie_word_embeddings": false,
   "rope_theta": 10000.0,
-  "rope_scaling": null,
   "attention_bias": false,
   "attention_dropout": 0.0,
   "mlp_bias": false,
   "torch_dtype": "bfloat16",
-  "transformers_version": "4.36.0",
   "task_specific_params": {
     "code_generation": {
-      "max_length": 2048,
       "temperature": 0.7,
       "top_p": 0.95,
-      "do_sample": true
     },
     "mathematical_reasoning": {
-      "max_length": 1024,
       "temperature": 0.3,
       "top_p": 0.9,
-      "do_sample": false
     }
   },
   "specialization": {
     "domain": "coding_and_mathematics",
     "languages_supported": [
       "python",
       "javascript",
       "typescript",
       "java",
-      "c++",
-      "rust",
       "go",
-      "sql"
     ],
     "features": [
       "code_generation",
       "code_completion",
       "bug_detection",
       "mathematical_reasoning",
       "algorithm_design",
-      "code_optimization"
     ]
   }
 }

 {
   "model_type": "helion-osc",
   "architectures": ["HelionOSCForCausalLM"],
+  "vocab_size": 102400,
+  "hidden_size": 5120,
+  "num_hidden_layers": 48,
+  "num_attention_heads": 40,
   "num_key_value_heads": 8,
+  "intermediate_size": 18432,
+  "hidden_act": "swiglu",
+  "max_position_embeddings": 16384,
   "initializer_range": 0.02,
   "rms_norm_eps": 1e-6,
   "use_cache": true,
   "eos_token_id": 2,
   "tie_word_embeddings": false,
   "rope_theta": 10000.0,
+  "rope_scaling": {
+    "type": "linear",
+    "factor": 2.0
+  },
   "attention_bias": false,
   "attention_dropout": 0.0,
   "mlp_bias": false,
   "torch_dtype": "bfloat16",
+  "transformers_version": "4.40.0",
+  "model_version": "1.0",
+  "use_flash_attention": true,
+  "sliding_window": null,
+  "gradient_checkpointing": false,
   "task_specific_params": {
     "code_generation": {
+      "max_length": 4096,
       "temperature": 0.7,
       "top_p": 0.95,
+      "top_k": 50,
+      "do_sample": true,
+      "repetition_penalty": 1.05,
+      "length_penalty": 1.0
     },
     "mathematical_reasoning": {
+      "max_length": 2048,
       "temperature": 0.3,
       "top_p": 0.9,
+      "top_k": 40,
+      "do_sample": false,
+      "repetition_penalty": 1.0,
+      "length_penalty": 1.2
+    },
+    "code_completion": {
+      "max_length": 1024,
+      "temperature": 0.6,
+      "top_p": 0.92,
+      "top_k": 45,
+      "do_sample": true,
+      "repetition_penalty": 1.03,
+      "stop_sequences": ["\n\n", "```", "###"]
+    },
+    "algorithm_design": {
+      "max_length": 3072,
+      "temperature": 0.5,
+      "top_p": 0.93,
+      "top_k": 50,
+      "do_sample": true,
+      "repetition_penalty": 1.08
+    },
+    "debugging": {
+      "max_length": 2048,
+      "temperature": 0.4,
+      "top_p": 0.88,
+      "do_sample": false,
+      "repetition_penalty": 1.0
     }
   },
   "specialization": {
     "domain": "coding_and_mathematics",
+    "primary_focus": "code_generation_with_mathematical_reasoning",
+    "verification_enabled": true,
+    "step_by_step_reasoning": true,
     "languages_supported": [
       "python",
       "javascript",
       "typescript",
       "java",
+      "c",
+      "cpp",
+      "csharp",
       "go",
+      "rust",
+      "ruby",
+      "php",
+      "swift",
+      "kotlin",
+      "scala",
+      "r",
+      "sql",
+      "bash",
+      "shell"
     ],
     "features": [
       "code_generation",
       "code_completion",
       "bug_detection",
+      "bug_fixing",
       "mathematical_reasoning",
+      "theorem_proving",
       "algorithm_design",
+      "algorithm_optimization",
+      "code_refactoring",
+      "documentation_generation",
+      "test_generation",
+      "complexity_analysis"
+    ],
+    "mathematical_capabilities": [
+      "arithmetic",
+      "algebra",
+      "calculus",
+      "discrete_mathematics",
+      "linear_algebra",
+      "probability",
+      "statistics",
+      "number_theory",
+      "graph_theory",
+      "combinatorics"
+    ]
+  },
+  "training_config": {
+    "training_precision": "bf16",
+    "optimizer": "adamw",
+    "learning_rate": 2e-5,
+    "warmup_steps": 2000,
+    "weight_decay": 0.01,
+    "max_grad_norm": 1.0
+  },
+  "quantization_config": {
+    "quant_method": "bitsandbytes",
+    "load_in_8bit": false,
+    "load_in_4bit": false,
+    "bnb_4bit_compute_dtype": "bfloat16",
+    "bnb_4bit_use_double_quant": true,
+    "bnb_4bit_quant_type": "nf4"
+  },
+  "generation_config": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "top_k": 50,
+    "do_sample": true,
+    "max_new_tokens": 2048,
+    "min_new_tokens": 1,
+    "num_beams": 1,
+    "early_stopping": false,
+    "no_repeat_ngram_size": 3,
+    "encoder_no_repeat_ngram_size": 0,
+    "diversity_penalty": 0.0,
+    "repetition_penalty": 1.05,
+    "length_penalty": 1.0,
+    "exponential_decay_length_penalty": null
+  },
+  "special_tokens": {
+    "pad_token": "<|pad|>",
+    "bos_token": "<|begin_of_text|>",
+    "eos_token": "<|end_of_text|>",
+    "unk_token": "<|unk|>",
+    "code_start_token": "<|code_start|>",
+    "code_end_token": "<|code_end|>",
+    "math_start_token": "<|math_start|>",
+    "math_end_token": "<|math_end|>",
+    "reasoning_start_token": "<|reasoning_start|>",
+    "reasoning_end_token": "<|reasoning_end|>",
+    "explanation_start_token": "<|explanation_start|>",
+    "explanation_end_token": "<|explanation_end|>"
+  },
+  "supported_frameworks": [
+    "pytorch",
+    "tensorflow",
+    "onnx",
+    "jax"
+  ],
+  "evaluation_metrics": {
+    "humaneval_pass_at_1": 0.852,
+    "humaneval_pass_at_10": 0.928,
+    "mbpp_pass_at_1": 0.795,
+    "mbpp_pass_at_10": 0.891,
+    "gsm8k_accuracy": 0.785,
+    "math_accuracy": 0.623,
+    "apps_accuracy": 0.412
+  },
+  "hardware_requirements": {
+    "minimum_vram_gb": 16,
+    "recommended_vram_gb": 24,
+    "minimum_ram_gb": 32,
+    "recommended_ram_gb": 64,
+    "cpu_cores": 8,
+    "gpu_support": true,
+    "multi_gpu_support": true,
+    "cpu_only_support": true
+  },
+  "deployment_options": {
+    "inference_frameworks": [
+      "vllm",
+      "text-generation-inference",
+      "ollama",
+      "llama.cpp"
+    ],
+    "optimization_support": [
+      "quantization",
+      "pruning",
+      "distillation",
+      "tensorrt",
+      "onnx_runtime"
     ]
   }
 }