400 / model_analysis.json
eyad-silx's picture
Add HFA checkpoint analysis and parameter mappings
2cdc30b verified
{
"checkpoint_path": "/QuasarV4/checkpoints/step_220000",
"analysis_timestamp": "0",
"model_architecture": "HFA (Hierarchical Flow Anchoring)",
"parameter_mappings": {
"checkpoint_params": [
"token_embedding.weight",
"blocks.0.attention.hierarchical_flow.evolution_rate",
"blocks.0.attention.hierarchical_flow.memory_decay",
"blocks.0.attention.hierarchical_flow.attention_memory",
"blocks.0.attention.hierarchical_flow.q_proj.weight",
"blocks.0.attention.hierarchical_flow.k_proj.weight",
"blocks.0.attention.hierarchical_flow.v_proj.weight",
"blocks.0.attention.hierarchical_flow.out_proj.weight",
"blocks.0.attention.hierarchical_flow.attention_evolution.weight",
"blocks.0.attention.hierarchical_flow.attention_evolution.bias",
"blocks.0.attention.hierarchical_flow.memory_gate.weight",
"blocks.0.attention.hierarchical_flow.memory_gate.bias",
"blocks.0.attention.hierarchical_flow.temporal_dynamics.weight",
"blocks.0.attention.hierarchical_flow.temporal_dynamics.bias",
"blocks.0.attention.hierarchical_flow.checkpoint_trigger.checkpoint_frequency",
"blocks.0.attention.hierarchical_flow.checkpoint_trigger.entropy_analyzer.weight",
"blocks.0.attention.hierarchical_flow.checkpoint_trigger.entropy_analyzer.bias",
"blocks.0.attention.hierarchical_flow.checkpoint_trigger.semantic_detector.0.weight",
"blocks.0.attention.hierarchical_flow.checkpoint_trigger.semantic_detector.0.bias",
"blocks.0.attention.hierarchical_flow.checkpoint_trigger.semantic_detector.2.weight"
]
},
"checkpoint_structure": {
"type": "nested_model_state_dict",
"num_parameters": 274
},
"loading_instructions": [
"1. Load checkpoint with torch.load()",
"2. Extract model_state_dict from checkpoint dictionary",
"3. Map parameter names:",
" - hfa_layers.X -> blocks.X.attention.hierarchical_flow",
" - token_embedding -> token_embedding (direct match)",
" - lm_head -> lm_head (direct match)",
" - layer_norm -> layer_norm (direct match)",
"4. Use strict=False for loading to handle mismatches"
],
"training_metadata": {
"step": 220000,
"epoch": 1,
"train_loss": 4.591190338134766,
"val_loss": 4.591190338134766,
"timestamp": 1757907906.1673536,
"save_duration": 2.1279516220092773,
"checkpoint_type": "hybrid_fast",
"file_size_mb": 881.7255353927612
}
}