Upload _paper_results/reasoning_rl_multiseed_summary.json with huggingface_hub
Browse files
_paper_results/reasoning_rl_multiseed_summary.json
CHANGED
|
@@ -44,6 +44,24 @@
|
|
| 44 |
},
|
| 45 |
"t2": {
|
| 46 |
"per_seed": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
{
|
| 48 |
"seed": 42,
|
| 49 |
"score_path": "/workspace/dnathinker/runs/eval_reasoning_t2_v7r128_postRL_alpha1_20260506_004858/score.json",
|
|
@@ -55,12 +73,14 @@
|
|
| 55 |
}
|
| 56 |
],
|
| 57 |
"tfg_stats": {
|
| 58 |
-
"n":
|
| 59 |
-
"mean": 0.
|
| 60 |
-
"std": 0.
|
| 61 |
-
"min": 0.
|
| 62 |
"max": 0.3650301689387592,
|
| 63 |
"values": [
|
|
|
|
|
|
|
| 64 |
0.3650301689387592
|
| 65 |
]
|
| 66 |
}
|
|
|
|
| 44 |
},
|
| 45 |
"t2": {
|
| 46 |
"per_seed": [
|
| 47 |
+
{
|
| 48 |
+
"seed": 2,
|
| 49 |
+
"score_path": "/workspace/dnathinker/runs/eval_reasoning_t2_v7r128_postRL_alpha1_s2_20260507_023054/score.json",
|
| 50 |
+
"tfg": 0.33895446884229713,
|
| 51 |
+
"n_cited": 16.22,
|
| 52 |
+
"n_grounded": 5.84,
|
| 53 |
+
"n_halluc": 10.22,
|
| 54 |
+
"reasoning_tags_rate": 0.82
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"seed": 3,
|
| 58 |
+
"score_path": "/workspace/dnathinker/runs/eval_reasoning_t2_v7r128_postRL_alpha1_s3_20260507_044835/score.json",
|
| 59 |
+
"tfg": 0.2666117252466633,
|
| 60 |
+
"n_cited": 15.2,
|
| 61 |
+
"n_grounded": 4.7,
|
| 62 |
+
"n_halluc": 10.16,
|
| 63 |
+
"reasoning_tags_rate": 0.74
|
| 64 |
+
},
|
| 65 |
{
|
| 66 |
"seed": 42,
|
| 67 |
"score_path": "/workspace/dnathinker/runs/eval_reasoning_t2_v7r128_postRL_alpha1_20260506_004858/score.json",
|
|
|
|
| 73 |
}
|
| 74 |
],
|
| 75 |
"tfg_stats": {
|
| 76 |
+
"n": 3,
|
| 77 |
+
"mean": 0.3235321210092399,
|
| 78 |
+
"std": 0.05098954916653428,
|
| 79 |
+
"min": 0.2666117252466633,
|
| 80 |
"max": 0.3650301689387592,
|
| 81 |
"values": [
|
| 82 |
+
0.33895446884229713,
|
| 83 |
+
0.2666117252466633,
|
| 84 |
0.3650301689387592
|
| 85 |
]
|
| 86 |
}
|