AnonyRepo commited on
Commit
54a07de
·
verified ·
1 Parent(s): 5b737c8

Initial release: ormprotocol-causal-lasttoken-s42

Browse files
Files changed (3) hide show
  1. README.md +10 -0
  2. adapter.safetensors +3 -0
  3. config.json +32 -0
README.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ base_model: Dream-org/Dream-v0-Instruct-7B
4
+ tags: [process-reward-model, discrete-diffusion, gsm8k, lora]
5
+ library_name: peft
6
+ ---
7
+
8
+ # ormprotocol-causal-lasttoken
9
+
10
+ ORM-protocol Causal LoRA with last-token pooling (seed 42). Trained on final states only (no step embedding, 8407 steps). Final accuracy = **0.842** at mask=0. Decision-tree Outcome B evidence: confirms architectural effect persists when training protocol is matched with the bidir ORM.
adapter.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6c4c34e3a13a70d81e45f8f2b6573e50d35e38871b9f7562b595eef9cd0f807
3
+ size 34890548
config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "source_checkpoint_size_gb": 15.266361777,
3
+ "num_kept_params": 116,
4
+ "num_total_params": 455,
5
+ "kept_size_mb": 34.873348,
6
+ "extracted_with": "extract_lora_only.py",
7
+ "parameter_prefixes_kept": [
8
+ "lora_A",
9
+ "lora_B",
10
+ "reward_head",
11
+ "step_proj",
12
+ "step_embed"
13
+ ],
14
+ "training_config": {
15
+ "batch_size": 4,
16
+ "grad_accum": 8,
17
+ "lr": 1e-05,
18
+ "seed": 42,
19
+ "lora_r": 16,
20
+ "lora_alpha": 32,
21
+ "lora_dropout": 0.05,
22
+ "step_embed_dim": 256,
23
+ "reward_hidden": 1024,
24
+ "min_mask_ratio": 0.0,
25
+ "max_mask_ratio": 0.0,
26
+ "causal": true,
27
+ "no_step_embed": true,
28
+ "no_mask_aware": false,
29
+ "pool_strategy": "last_token",
30
+ "max_steps": 15000
31
+ }
32
+ }