explcre commited on
Commit
a04c578
·
verified ·
1 Parent(s): a3337b2

Upload _paper_results/reasoning_rl_multiseed_summary.json with huggingface_hub

Browse files
_paper_results/reasoning_rl_multiseed_summary.json CHANGED
@@ -44,6 +44,24 @@
44
  },
45
  "t2": {
46
  "per_seed": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  {
48
  "seed": 42,
49
  "score_path": "/workspace/dnathinker/runs/eval_reasoning_t2_v7r128_postRL_alpha1_20260506_004858/score.json",
@@ -55,12 +73,14 @@
55
  }
56
  ],
57
  "tfg_stats": {
58
- "n": 1,
59
- "mean": 0.3650301689387592,
60
- "std": 0.0,
61
- "min": 0.3650301689387592,
62
  "max": 0.3650301689387592,
63
  "values": [
 
 
64
  0.3650301689387592
65
  ]
66
  }
 
44
  },
45
  "t2": {
46
  "per_seed": [
47
+ {
48
+ "seed": 2,
49
+ "score_path": "/workspace/dnathinker/runs/eval_reasoning_t2_v7r128_postRL_alpha1_s2_20260507_023054/score.json",
50
+ "tfg": 0.33895446884229713,
51
+ "n_cited": 16.22,
52
+ "n_grounded": 5.84,
53
+ "n_halluc": 10.22,
54
+ "reasoning_tags_rate": 0.82
55
+ },
56
+ {
57
+ "seed": 3,
58
+ "score_path": "/workspace/dnathinker/runs/eval_reasoning_t2_v7r128_postRL_alpha1_s3_20260507_044835/score.json",
59
+ "tfg": 0.2666117252466633,
60
+ "n_cited": 15.2,
61
+ "n_grounded": 4.7,
62
+ "n_halluc": 10.16,
63
+ "reasoning_tags_rate": 0.74
64
+ },
65
  {
66
  "seed": 42,
67
  "score_path": "/workspace/dnathinker/runs/eval_reasoning_t2_v7r128_postRL_alpha1_20260506_004858/score.json",
 
73
  }
74
  ],
75
  "tfg_stats": {
76
+ "n": 3,
77
+ "mean": 0.3235321210092399,
78
+ "std": 0.05098954916653428,
79
+ "min": 0.2666117252466633,
80
  "max": 0.3650301689387592,
81
  "values": [
82
+ 0.33895446884229713,
83
+ 0.2666117252466633,
84
  0.3650301689387592
85
  ]
86
  }