explcre commited on
Commit
b459e4c
·
verified ·
1 Parent(s): 65223fb

Upload runs/exp_t2_grid_separatedQA_20260426_h100_vllm_full/zs_raw/metrics.json with huggingface_hub

Browse files
runs/exp_t2_grid_separatedQA_20260426_h100_vllm_full/zs_raw/metrics.json ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "pair_prediction-zeroshot-raw",
3
+ "task_type": "pair_prediction",
4
+ "model_tag": "Qwen3.5-2B-zeroshot-raw",
5
+ "metrics": {
6
+ "accuracy": 0.5000080599661482,
7
+ "f1": 6.984928673132204e-05,
8
+ "precision": 0.65,
9
+ "recall": 3.492651997528277e-05,
10
+ "parse_rate": 1.0,
11
+ "n": 744420.0
12
+ },
13
+ "per_cell_type": {
14
+ "Ast": {
15
+ "accuracy": 0.4999794336013821,
16
+ "f1": 0.0,
17
+ "precision": 0.0,
18
+ "recall": 0.0,
19
+ "parse_rate": 1.0,
20
+ "n": 97246.0
21
+ },
22
+ "End": {
23
+ "accuracy": 0.5,
24
+ "f1": 0.0,
25
+ "precision": 0.0,
26
+ "recall": 0.0,
27
+ "parse_rate": 1.0,
28
+ "n": 16718.0
29
+ },
30
+ "Ex": {
31
+ "accuracy": 0.5000116160208159,
32
+ "f1": 9.292169024554559e-05,
33
+ "precision": 0.6666666666666666,
34
+ "recall": 4.646408326363721e-05,
35
+ "parse_rate": 1.0,
36
+ "n": 172176.0
37
+ },
38
+ "In": {
39
+ "accuracy": 0.5000196571788017,
40
+ "f1": 0.00011793379982703042,
41
+ "precision": 0.75,
42
+ "recall": 5.897153640509514e-05,
43
+ "parse_rate": 1.0,
44
+ "n": 101744.0
45
+ },
46
+ "Mic": {
47
+ "accuracy": 0.5,
48
+ "f1": 2.672724842977416e-05,
49
+ "precision": 0.5,
50
+ "recall": 1.3363981397337895e-05,
51
+ "parse_rate": 1.0,
52
+ "n": 149656.0
53
+ },
54
+ "OPC": {
55
+ "accuracy": 0.5000124495792042,
56
+ "f1": 4.979707691158529e-05,
57
+ "precision": 1.0,
58
+ "recall": 2.4899158408445795e-05,
59
+ "parse_rate": 1.0,
60
+ "n": 80324.0
61
+ },
62
+ "Oli": {
63
+ "accuracy": 0.5000237049211417,
64
+ "f1": 0.000126416257130667,
65
+ "precision": 0.8,
66
+ "recall": 6.321312304434401e-05,
67
+ "parse_rate": 1.0,
68
+ "n": 126556.0
69
+ }
70
+ },
71
+ "wall_seconds": 6943.698348045349,
72
+ "config": {
73
+ "task": "pair_prediction",
74
+ "mode": "zeroshot",
75
+ "prompt": "raw",
76
+ "model": "Qwen/Qwen3.5-2B",
77
+ "eval_jsonl": "/dev/shm/dnathinker/data/prod_full_test/jsonl/test.pair_prediction.jsonl",
78
+ "output_dir": "/dev/shm/dnathinker/runs/exp_t2_grid_separatedQA_20260426_h100_vllm_full/zs_raw",
79
+ "n": 744420,
80
+ "max_new_tokens": 128,
81
+ "max_model_len": 4096,
82
+ "gpu_mem_frac": 0.85,
83
+ "temperature": 0.0,
84
+ "engine": "vllm"
85
+ }
86
+ }