| { | |
| "model_name": "Echo-TDM", | |
| "architecture": "EchoLLM", | |
| "vocab_size": 32000, | |
| "max_position_embeddings": 8192, | |
| "d_model": 768, | |
| "num_layers": 12, | |
| "num_heads": 12, | |
| "dim_feedforward": 3072, | |
| "dropout": 0.1, | |
| "activation": "gelu", | |
| "num_experts": 9, | |
| "use_memory": true, | |
| "memory_size": 2048, | |
| "batch_size": 8, | |
| "learning_rate": 5e-05, | |
| "num_epochs": 10, | |
| "optimizer": "Adafactor", | |
| "scheduler": "cosine", | |
| "warmup_steps": 1000, | |
| "weight_decay": 0.01, | |
| "curriculum_learning": true, | |
| "loss_function": "CrossEntropyLoss", | |
| "eval_metric": "perplexity", | |
| "output_dir": "EchoTDM_Model\\model", | |
| "checkpoint_frequency": 1, | |
| "save_best_model": true, | |
| "load_from_checkpoint": null, | |
| "use_gradient_checkpointing": true, | |
| "use_mixed_precision": true, | |
| "gradient_accumulation_steps": 4, | |
| "export_formats": [ | |
| "safetensors" | |
| ], | |
| "precision": "float16", | |
| "lora_rank": 4, | |
| "lora_alpha": 16, | |
| "lora_dropout": 0.1, | |
| "temp_memory_threshold": 100, | |
| "perm_memory_limit": 0.05, | |
| "use_gamification": true, | |
| "reward_points": 10, | |
| "user_profile_enabled": true, | |
| "max_token_length": 2048 | |
| } |