| { | |
| "version": "DomainEmbedder-v2.6", | |
| "step": 4000, | |
| "base_model": "FireDevourerEmbedder-RL-v3.6", | |
| "avg_reward": 1.5269566774368286, | |
| "accuracy": 92.5, | |
| "all_scores": { | |
| "avg_reward": 1.5269566774368286, | |
| "accuracy": 92.5 | |
| }, | |
| "timestamp": "2026-02-10T02:01:18.178319", | |
| "method": "TRUE LoRA (PEFT) + SUPERVISED RL + CURRICULUM", | |
| "config": { | |
| "lora_rank": 16, | |
| "lora_alpha": 32, | |
| "lora_target_modules": [ | |
| "query", | |
| "value" | |
| ], | |
| "rl_algorithm": "Supervised A2C Policy Gradient", | |
| "rl_total_steps": 5000, | |
| "rl_gamma": 0.99, | |
| "rl_entropy_coef": 0.1, | |
| "correctness_bonus": 1.0, | |
| "correctness_penalty": 0.5, | |
| "curriculum_learning": true, | |
| "domains": [ | |
| "medical", | |
| "legal", | |
| "code", | |
| "finance", | |
| "scientific" | |
| ] | |
| }, | |
| "files": { | |
| "base_model": "FireDevourerEmbedder-RL-v3.6.pt", | |
| "rl_policy": "rl_policy.pt", | |
| "lora_adapters": { | |
| "medical": "medical_lora/", | |
| "legal": "legal_lora/", | |
| "code": "code_lora/", | |
| "finance": "finance_lora/", | |
| "scientific": "scientific_lora/" | |
| } | |
| } | |
| } |