{ "version": "DomainEmbedder-v2.6", "step": 4000, "base_model": "FireDevourerEmbedder-RL-v3.6", "avg_reward": 1.5269566774368286, "accuracy": 92.5, "all_scores": { "avg_reward": 1.5269566774368286, "accuracy": 92.5 }, "timestamp": "2026-02-10T02:01:18.178319", "method": "TRUE LoRA (PEFT) + SUPERVISED RL + CURRICULUM", "config": { "lora_rank": 16, "lora_alpha": 32, "lora_target_modules": [ "query", "value" ], "rl_algorithm": "Supervised A2C Policy Gradient", "rl_total_steps": 5000, "rl_gamma": 0.99, "rl_entropy_coef": 0.1, "correctness_bonus": 1.0, "correctness_penalty": 0.5, "curriculum_learning": true, "domains": [ "medical", "legal", "code", "finance", "scientific" ] }, "files": { "base_model": "FireDevourerEmbedder-RL-v3.6.pt", "rl_policy": "rl_policy.pt", "lora_adapters": { "medical": "medical_lora/", "legal": "legal_lora/", "code": "code_lora/", "finance": "finance_lora/", "scientific": "scientific_lora/" } } }