roonbug commited on
Commit
dd40c8b
·
verified ·
1 Parent(s): e29c0c2

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. debug-internal.log +31 -0
  2. debug.log +0 -0
  3. run-20260503_070020-tsnlx6qq/files/config.yaml +91 -0
  4. run-20260503_070020-tsnlx6qq/files/output.log +1 -0
  5. run-20260503_070020-tsnlx6qq/files/requirements.txt +235 -0
  6. run-20260503_070020-tsnlx6qq/files/wandb-metadata.json +61 -0
  7. run-20260503_070020-tsnlx6qq/files/wandb-summary.json +1 -0
  8. run-20260503_070020-tsnlx6qq/logs/debug-core.log +18 -0
  9. run-20260503_070020-tsnlx6qq/logs/debug-internal.log +13 -0
  10. run-20260503_070020-tsnlx6qq/logs/debug.log +28 -0
  11. run-20260503_070023-mw4gx9uu/files/config.yaml +988 -0
  12. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_fluency_table_491_084cda5c92c1f5fd2472.table.json +1 -0
  13. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_fluency_table_562_108153824aefe6a35fdd.table.json +1 -0
  14. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_fluency_table_633_e4d7691c028a3169be82.table.json +1 -0
  15. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_fluency_table_704_0336dd8b3d6193775ae7.table.json +1 -0
  16. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_fluency_table_775_addf4a7b0a6d8223c671.table.json +1 -0
  17. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_fluency_table_846_d7d02a8f538364d9f02a.table.json +1 -0
  18. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_fluency_table_917_a4f00630ffe30bcfe311.table.json +1 -0
  19. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_ground_truth_similarity_table_1060_ce30e646b2fea156817c.table.json +1 -0
  20. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_ground_truth_similarity_table_492_a5bd2a0cd4f63e67f54f.table.json +1 -0
  21. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_ground_truth_similarity_table_634_45d88133b373103983fc.table.json +1 -0
  22. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_ground_truth_similarity_table_705_28e93bfb4e3763324925.table.json +1 -0
  23. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_ground_truth_similarity_table_776_45c8a9e1988a9e5162c8.table.json +1 -0
  24. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_ground_truth_similarity_table_847_b866a7ce448652f4313e.table.json +1 -0
  25. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_ground_truth_similarity_table_918_aae2930a5651916a7b9a.table.json +1 -0
  26. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_quality_table_1274_c5d49b99b4447dcec7d0.table.json +1 -0
  27. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_quality_table_635_f1adc7800bed015619df.table.json +1 -0
  28. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_quality_table_706_015e2abd3e6840446f0f.table.json +1 -0
  29. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_quality_table_777_a9acd45c2c3a794d7ac0.table.json +1 -0
  30. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_quality_table_919_26642653f74eb2bf8591.table.json +1 -0
  31. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_quality_table_990_a2a5f913c3c71513f5ed.table.json +1 -0
  32. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_relevance_table_494_4b847cef10f29786285c.table.json +1 -0
  33. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_relevance_table_707_4af47f1d164b1922273e.table.json +1 -0
  34. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_relevance_table_778_69c097d2ff89d09048df.table.json +1 -0
  35. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_relevance_table_849_ba26a186fec558583166.table.json +1 -0
  36. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_relevance_table_920_16cc28b57865b6315a7c.table.json +1 -0
  37. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_relevance_table_991_0bdb5d9afaf2ce8e262c.table.json +1 -0
  38. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_fluency_table_1268_51c4585a1cd6c16e6bc9.table.json +1 -0
  39. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_fluency_table_1339_467f1c0991e2d2337613.table.json +1 -0
  40. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_fluency_table_629_cf12fa69829dc430ebe7.table.json +1 -0
  41. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_fluency_table_700_47843788eded784426dd.table.json +1 -0
  42. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_fluency_table_842_44dc3853fad038817bde.table.json +1 -0
  43. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_fluency_table_913_96dd3762242b64ea2815.table.json +1 -0
  44. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_fluency_table_984_4f82c1c9ddad62217707.table.json +1 -0
  45. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_ground_truth_similarity_table_62_0a4adde2ad357263e181.table.json +1 -0
  46. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_ground_truth_similarity_table_630_d53d8487574f1fce4344.table.json +1 -0
  47. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_ground_truth_similarity_table_701_f1064f103daee387dbef.table.json +1 -0
  48. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_ground_truth_similarity_table_772_2f2f6fbfd27737267a0d.table.json +1 -0
  49. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_ground_truth_similarity_table_843_4e9fe9898744c743d126.table.json +1 -0
  50. run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_ground_truth_similarity_table_914_5741354b8d0daacf5ca6.table.json +1 -0
debug-internal.log ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-05-03T07:00:23.785699801Z","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
2
+ {"time":"2026-05-03T07:00:23.958219939Z","level":"INFO","msg":"stream: created new stream","id":"mw4gx9uu"}
3
+ {"time":"2026-05-03T07:00:23.958276475Z","level":"INFO","msg":"stream: started","id":"mw4gx9uu"}
4
+ {"time":"2026-05-03T07:00:23.958304176Z","level":"INFO","msg":"writer: Do: started","stream_id":"mw4gx9uu"}
5
+ {"time":"2026-05-03T07:00:23.958314151Z","level":"INFO","msg":"handler: started","stream_id":"mw4gx9uu"}
6
+ {"time":"2026-05-03T07:00:23.958344878Z","level":"INFO","msg":"sender: started","stream_id":"mw4gx9uu"}
7
+ {"time":"2026-05-03T13:26:54.494721949Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/arunasank/sae-scoping-stemqa-math/mw4gx9uu/file_stream\": dial tcp 34.8.250.101:443: connect: connection timed out"}
8
+ {"time":"2026-05-03T17:25:58.687617346Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/arunasank/sae-scoping-stemqa-math/mw4gx9uu/file_stream\": dial tcp 34.8.250.101:443: connect: connection timed out"}
9
+ {"time":"2026-05-03T19:37:07.102657924Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/arunasank/sae-scoping-stemqa-math/mw4gx9uu/file_stream\": dial tcp 34.8.250.101:443: connect: connection timed out"}
10
+ {"time":"2026-05-04T00:46:01.356251288Z","level":"INFO","msg":"api: retrying HTTP error","status":500,"url":"https://api.wandb.ai/graphql","body":"{\"errors\":[{\"message\":\"context deadline exceeded\",\"path\":[\"project\",\"run\"]}],\"data\":{\"project\":{\"run\":null}}}"}
11
+ {"time":"2026-05-04T14:12:56.636196125Z","level":"INFO","msg":"api: retrying HTTP error","status":500,"url":"https://api.wandb.ai/graphql","body":"{\"errors\":[{\"message\":\"context deadline exceeded\",\"path\":[\"project\",\"run\"]}],\"data\":{\"project\":{\"run\":null}}}"}
12
+ {"time":"2026-05-04T14:14:02.105541639Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
13
+ {"time":"2026-05-04T14:16:17.107568772Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
14
+ {"time":"2026-05-04T14:17:01.982714038Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/arunasank/sae-scoping-stemqa-math/mw4gx9uu/file_stream\": dial tcp 34.8.250.101:443: connect: connection timed out"}
15
+ {"time":"2026-05-04T14:17:15.928608343Z","level":"INFO","msg":"api: retrying HTTP error","status":500,"url":"https://api.wandb.ai/graphql","body":"{\"errors\":[{\"message\":\"context deadline exceeded\",\"path\":[\"project\",\"run\"]}],\"data\":{\"project\":{\"run\":null}}}"}
16
+ {"time":"2026-05-04T14:18:17.109847444Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
17
+ {"time":"2026-05-04T14:18:47.906654656Z","level":"INFO","msg":"api: retrying HTTP error","status":500,"url":"https://api.wandb.ai/graphql","body":"{\"errors\":[{\"message\":\"context deadline exceeded\",\"path\":[\"project\",\"run\"]}],\"data\":{\"project\":{\"run\":null}}}"}
18
+ {"time":"2026-05-04T14:23:17.112654257Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
19
+ {"time":"2026-05-04T14:27:02.115392245Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
20
+ {"time":"2026-05-04T14:28:32.116573806Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
21
+ {"time":"2026-05-04T14:29:04.148670003Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
22
+ {"time":"2026-05-04T14:32:02.121811383Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
23
+ {"time":"2026-05-04T14:33:47.122883619Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
24
+ {"time":"2026-05-04T14:34:19.448738564Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
25
+ {"time":"2026-05-04T14:37:02.124111251Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
26
+ {"time":"2026-05-04T20:47:39.170254747Z","level":"INFO","msg":"stream: closing","id":"mw4gx9uu"}
27
+ {"time":"2026-05-04T20:47:39.42687988Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
28
+ {"time":"2026-05-04T20:47:39.567118263Z","level":"INFO","msg":"handler: closed","stream_id":"mw4gx9uu"}
29
+ {"time":"2026-05-04T20:47:39.56718921Z","level":"INFO","msg":"writer: Close: closed","stream_id":"mw4gx9uu"}
30
+ {"time":"2026-05-04T20:47:39.567208689Z","level":"INFO","msg":"sender: closed","stream_id":"mw4gx9uu"}
31
+ {"time":"2026-05-04T20:47:39.567425296Z","level":"INFO","msg":"stream: closed","id":"mw4gx9uu"}
debug.log ADDED
The diff for this file is too large to render. See raw diff
 
run-20260503_070020-tsnlx6qq/files/config.yaml ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.21.0
4
+ e:
5
+ kxtjeo9jni30ggc0c02r09gslwxc3kka:
6
+ args:
7
+ - --train-domain
8
+ - math
9
+ - --attack-domain
10
+ - physics
11
+ - --stage
12
+ - attack
13
+ - --hf-recover-repo
14
+ - arunasank/fitoo774
15
+ - --checkpoint
16
+ - "2000"
17
+ - --dev
18
+ - --device
19
+ - cuda
20
+ - --firing-rate-threshold
21
+ - "1e-4"
22
+ - --gemma3
23
+ - --max-steps-attack
24
+ - "10000"
25
+ - --skip-pre-training-eval
26
+ codePath: experiments/script_scoping_pipeline_stemqa.py
27
+ codePathLocal: experiments/script_scoping_pipeline_stemqa.py
28
+ cpu_count: 64
29
+ cpu_count_logical: 128
30
+ cudaVersion: "13.0"
31
+ disk:
32
+ /:
33
+ total: "536870912000"
34
+ used: "73938042880"
35
+ email: aruna.evam@gmail.com
36
+ executable: /root/miniconda3/envs/sae/bin/python
37
+ git:
38
+ commit: 89bc64f41f54ca1f08c9d90c5c2ea13f98123c1b
39
+ remote: git@github.com:4gatepylon/SAEScoping.git
40
+ gpu: NVIDIA RTX PRO 6000 Blackwell Server Edition
41
+ gpu_count: 1
42
+ gpu_nvidia:
43
+ - architecture: Blackwell
44
+ cudaCores: 24064
45
+ memoryTotal: "102641958912"
46
+ name: NVIDIA RTX PRO 6000 Blackwell Server Edition
47
+ uuid: GPU-1e3dcc13-cf37-b8fe-2425-456c8d1b35d2
48
+ host: 80e02dee87f9
49
+ memory:
50
+ total: "1622884343808"
51
+ os: Linux-6.8.0-106-generic-x86_64-with-glibc2.39
52
+ program: /root/SAEScoping/experiments/script_scoping_pipeline_stemqa.py
53
+ python: CPython 3.12.13
54
+ root: /root/SAEScoping
55
+ startedAt: "2026-05-03T07:00:20.452873Z"
56
+ writerId: kxtjeo9jni30ggc0c02r09gslwxc3kka
57
+ m: []
58
+ python_version: 3.12.13
59
+ t:
60
+ "1":
61
+ - 1
62
+ - 5
63
+ - 11
64
+ - 49
65
+ - 51
66
+ - 53
67
+ - 71
68
+ - 84
69
+ - 95
70
+ - 98
71
+ - 105
72
+ "2":
73
+ - 1
74
+ - 5
75
+ - 11
76
+ - 49
77
+ - 51
78
+ - 53
79
+ - 71
80
+ - 84
81
+ - 95
82
+ - 98
83
+ - 105
84
+ "3":
85
+ - 2
86
+ - 13
87
+ "4": 3.12.13
88
+ "5": 0.21.0
89
+ "6": 4.56.1
90
+ "12": 0.21.0
91
+ "13": linux-x86_64
run-20260503_070020-tsnlx6qq/files/output.log ADDED
@@ -0,0 +1 @@
 
 
1
+ Pruning: keeping 1684/16384 neurons (threshold=0.0001)
run-20260503_070020-tsnlx6qq/files/requirements.txt ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ packaging==26.0
2
+ pytz==2026.1.post1
3
+ pure_eval==0.2.3
4
+ wheel==0.46.3
5
+ pip==26.0.1
6
+ nvidia-cusparselt-cu13==0.8.0
7
+ mpmath==1.3.0
8
+ cuda-toolkit==12.8.1
9
+ typing_extensions==4.15.0
10
+ sympy==1.14.0
11
+ ptyprocess==0.7.0
12
+ tzdata==2026.2
13
+ setuptools==81.0.0
14
+ nvidia-nvtx==13.0.85
15
+ nvidia-nvshmem-cu13==3.4.5
16
+ nvidia-nvjitlink==13.0.88
17
+ nvidia-nccl-cu13==2.28.9
18
+ nvidia-curand==10.4.0.35
19
+ nvidia-cufile==1.15.1.6
20
+ nvidia-cuda-runtime==13.0.96
21
+ nvidia-cuda-nvrtc==13.0.88
22
+ nvidia-cuda-cupti==13.0.85
23
+ nvidia-cublas==13.1.0.3
24
+ networkx==3.6.1
25
+ mypy_extensions==1.1.0
26
+ MarkupSafe==3.0.3
27
+ hf_transfer==0.1.9
28
+ filelock==3.29.0
29
+ cuda-pathfinder==1.5.4
30
+ annotated-types==0.7.0
31
+ typing-inspection==0.4.2
32
+ typing-inspect==0.9.0
33
+ typeguard==4.5.1
34
+ nvidia-cusparse==12.6.3.3
35
+ nvidia-cufft==12.0.0.61
36
+ nvidia-cudnn-cu13==9.19.0.56
37
+ Jinja2==3.1.6
38
+ nvidia-cusolver==12.0.4.66
39
+ pandera==0.31.1
40
+ nvidia-nvshmem-cu12==3.4.5
41
+ tokenizers==0.22.2
42
+ bitsandbytes==0.49.2
43
+ webencodings==0.5.1
44
+ nvidia-cusparselt-cu12==0.7.1
45
+ i2==0.1.63
46
+ fastjsonschema==2.21.2
47
+ better-abc==0.0.3
48
+ zipp==3.23.1
49
+ xxhash==3.7.0
50
+ websocket-client==1.9.0
51
+ webcolors==25.10.0
52
+ wcwidth==0.7.0
53
+ wadler_lindig==0.1.7
54
+ urllib3==2.6.3
55
+ uri-template==1.3.0
56
+ idna==3.13
57
+ nvidia-nvtx-cu12==12.8.90
58
+ traitlets==5.14.3
59
+ tqdm==4.67.3
60
+ tornado==6.5.5
61
+ tinycss2==1.4.0
62
+ threadpoolctl==3.6.0
63
+ tenacity==9.1.4
64
+ soupsieve==2.8.3
65
+ sniffio==1.3.1
66
+ smmap==5.0.3
67
+ six==1.17.0
68
+ sentencepiece==0.2.1
69
+ Send2Trash==2.1.0
70
+ safetensors==0.5.3
71
+ ruff==0.14.2
72
+ rpds-py==0.30.0
73
+ rfc3986-validator==0.1.1
74
+ regex==2026.4.4
75
+ pyzmq==27.1.0
76
+ PyYAML==6.0.3
77
+ python-json-logger==4.1.0
78
+ python-dotenv==1.2.2
79
+ pyparsing==3.3.2
80
+ Pygments==2.20.0
81
+ pydantic_core==2.41.5
82
+ pycparser==3.0
83
+ pyarrow==24.0.0
84
+ psutil==7.2.2
85
+ protobuf==6.33.6
86
+ propcache==0.4.1
87
+ prometheus_client==0.25.0
88
+ pluggy==1.6.0
89
+ platformdirs==4.9.6
90
+ pillow==12.2.0
91
+ pexpect==4.9.0
92
+ parso==0.8.7
93
+ pandocfilters==1.5.1
94
+ orjson==3.11.0
95
+ nvidia-nvjitlink-cu12==12.8.93
96
+ nvidia-nccl-cu12==2.28.9
97
+ nvidia-curand-cu12==10.3.9.90
98
+ nvidia-cufile-cu12==1.13.1.3
99
+ nvidia-cuda-runtime-cu12==12.8.90
100
+ nvidia-cuda-nvrtc-cu12==12.8.93
101
+ nvidia-cuda-cupti-cu12==12.8.90
102
+ nvidia-cublas-cu12==12.8.4.1
103
+ numpy==1.26.4
104
+ nest-asyncio==1.6.0
105
+ natsort==8.4.0
106
+ narwhals==2.20.0
107
+ multidict==6.7.1
108
+ mistune==3.2.0
109
+ mdurl==0.1.2
110
+ lark==1.3.1
111
+ kiwisolver==1.5.0
112
+ jupyterlab_pygments==0.3.0
113
+ jsonpointer==3.1.1
114
+ json5==0.14.0
115
+ joblib==1.5.3
116
+ jiter==0.14.0
117
+ iniconfig==2.3.0
118
+ importlib_resources==7.1.0
119
+ hf-xet==1.4.3
120
+ h11==0.16.0
121
+ config2py==0.1.47
122
+ fsspec==2025.3.0
123
+ frozenlist==1.8.0
124
+ fqdn==1.5.1
125
+ fonttools==4.62.1
126
+ fancy-einsum==0.0.3
127
+ executing==2.2.1
128
+ einops==0.8.2
129
+ dol==0.3.41
130
+ docstring_parser==0.18.0
131
+ distro==1.9.0
132
+ dill==0.3.8
133
+ defusedxml==0.7.1
134
+ decorator==5.2.1
135
+ debugpy==1.8.20
136
+ cycler==0.12.1
137
+ comm==0.2.3
138
+ click==8.2.1
139
+ charset-normalizer==3.4.7
140
+ certifi==2026.4.22
141
+ bleach==6.3.0
142
+ beartype==0.14.1
143
+ babel==2.18.0
144
+ attrs==26.1.0
145
+ async-lru==2.3.0
146
+ asttokens==3.0.1
147
+ aiohappyeyeballs==2.6.1
148
+ yarl==1.23.0
149
+ terminado==0.18.1
150
+ stack-data==0.6.3
151
+ simple-parsing==0.1.8
152
+ sentry-sdk==2.58.0
153
+ scipy==1.17.1
154
+ rfc3987-syntax==1.1.0
155
+ rfc3339-validator==0.1.4
156
+ requests==2.32.4
157
+ referencing==0.37.0
158
+ python-dateutil==2.9.0.post0
159
+ pytest==8.4.1
160
+ pydantic==2.12.4
161
+ prompt_toolkit==3.0.52
162
+ plotly==6.3.0
163
+ patsy==1.0.2
164
+ nvidia-cusparse-cu12==12.5.8.93
165
+ nvidia-cudnn-cu12==9.19.0.56
166
+ torch==2.11.0+cu128
167
+ nltk==3.9.4
168
+ multiprocess==0.70.16
169
+ matplotlib-inline==0.2.1
170
+ markdown-it-py==4.0.0
171
+ jupyter_core==5.9.1
172
+ jedi==0.20.0
173
+ jaxtyping==0.3.2
174
+ ipython_pygments_lexers==1.1.1
175
+ importlib_metadata==9.0.0
176
+ httpcore==1.0.9
177
+ gitdb==4.0.12
178
+ contourpy==1.3.3
179
+ cffi==2.0.0
180
+ beautifulsoup4==4.14.3
181
+ anyio==4.13.0
182
+ aiosignal==1.4.0
183
+ tiktoken==0.12.0
184
+ scikit-learn==1.7.1
185
+ rich==15.0.0
186
+ py2store==0.1.22
187
+ pandas==2.3.1
188
+ nvidia-cusolver-cu12==11.7.3.90
189
+ matplotlib==3.10.9
190
+ jupyter_server_terminals==0.5.4
191
+ jupyter_client==8.8.0
192
+ jsonschema-specifications==2025.9.1
193
+ ipython==9.4.0
194
+ huggingface_hub==0.36.2
195
+ httpx==0.28.1
196
+ graze==0.1.39
197
+ GitPython==3.1.49
198
+ arrow==1.4.0
199
+ argon2-cffi-bindings==25.1.0
200
+ aiohttp==3.13.5
201
+ wandb==0.21.0
202
+ sae-scoping==2020.0.0
203
+ statsmodels==0.14.6
204
+ seaborn==0.13.2
205
+ openai==2.33.0
206
+ jsonschema==4.26.0
207
+ isoduration==20.11.0
208
+ ipykernel==7.2.0
209
+ babe==0.0.7
210
+ argon2-cffi==25.1.0
211
+ transformers==4.56.1
212
+ schedulefree==1.4.1
213
+ plotly-express==0.4.1
214
+ nbformat==5.10.4
215
+ litellm==1.74.7
216
+ datasets==4.0.0
217
+ accelerate==1.13.0
218
+ trl==0.22.2
219
+ transformers-stream-generator==0.0.5
220
+ peft==0.16.0
221
+ nbclient==0.10.4
222
+ jupyter-events==0.12.1
223
+ eai-sparsify==1.3.0
224
+ transformer-lens==2.17.0
225
+ nbconvert==7.17.1
226
+ sae-lens==6.43.0
227
+ jupyter_server==2.17.0
228
+ notebook_shim==0.2.4
229
+ jupyterlab_server==2.28.0
230
+ jupyter-lsp==2.3.1
231
+ jupyterlab==4.4.10
232
+ notebook==7.4.4
233
+ triton==3.6.0
234
+ cuda-bindings==12.9.4
235
+ nvidia-cufft-cu12==11.3.3.83
run-20260503_070020-tsnlx6qq/files/wandb-metadata.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.8.0-106-generic-x86_64-with-glibc2.39",
3
+ "python": "CPython 3.12.13",
4
+ "startedAt": "2026-05-03T07:00:20.452873Z",
5
+ "args": [
6
+ "--train-domain",
7
+ "math",
8
+ "--attack-domain",
9
+ "physics",
10
+ "--stage",
11
+ "attack",
12
+ "--hf-recover-repo",
13
+ "arunasank/fitoo774",
14
+ "--checkpoint",
15
+ "2000",
16
+ "--dev",
17
+ "--device",
18
+ "cuda",
19
+ "--firing-rate-threshold",
20
+ "1e-4",
21
+ "--gemma3",
22
+ "--max-steps-attack",
23
+ "10000",
24
+ "--skip-pre-training-eval"
25
+ ],
26
+ "program": "/root/SAEScoping/experiments/script_scoping_pipeline_stemqa.py",
27
+ "codePath": "experiments/script_scoping_pipeline_stemqa.py",
28
+ "codePathLocal": "experiments/script_scoping_pipeline_stemqa.py",
29
+ "git": {
30
+ "remote": "git@github.com:4gatepylon/SAEScoping.git",
31
+ "commit": "89bc64f41f54ca1f08c9d90c5c2ea13f98123c1b"
32
+ },
33
+ "email": "aruna.evam@gmail.com",
34
+ "root": "/root/SAEScoping",
35
+ "host": "80e02dee87f9",
36
+ "executable": "/root/miniconda3/envs/sae/bin/python",
37
+ "cpu_count": 64,
38
+ "cpu_count_logical": 128,
39
+ "gpu": "NVIDIA RTX PRO 6000 Blackwell Server Edition",
40
+ "gpu_count": 1,
41
+ "disk": {
42
+ "/": {
43
+ "total": "536870912000",
44
+ "used": "73938042880"
45
+ }
46
+ },
47
+ "memory": {
48
+ "total": "1622884343808"
49
+ },
50
+ "gpu_nvidia": [
51
+ {
52
+ "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition",
53
+ "memoryTotal": "102641958912",
54
+ "cudaCores": 24064,
55
+ "architecture": "Blackwell",
56
+ "uuid": "GPU-1e3dcc13-cf37-b8fe-2425-456c8d1b35d2"
57
+ }
58
+ ],
59
+ "cudaVersion": "13.0",
60
+ "writerId": "kxtjeo9jni30ggc0c02r09gslwxc3kka"
61
+ }
run-20260503_070020-tsnlx6qq/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"true_baseline/llm_judge/chemistry/out_of_scope/ground_truth_similarity":0.845,"true_baseline/llm_judge/physics/out_of_scope/fluency":0.965,"true_baseline/llm_judge/math/in_scope/fluency":0.96,"_runtime":2,"true_baseline/llm_judge/physics/out_of_scope/relevance":0.97,"true_baseline/llm_judge/math/in_scope/quality":0.93,"llm_judge/math/in_scope/quality_pre_scoping":0.93,"llm_judge/math/in_scope/ground_truth_similarity_pre_scoping":0.835,"llm_judge/physics/out_of_scope/fluency_pre_scoping":0.965,"llm_judge/biology/out_of_scope/relevance_pre_scoping":1,"true_baseline/llm_judge/math/in_scope/ground_truth_similarity":0.835,"true_baseline/llm_judge/chemistry/out_of_scope/fluency":0.975,"trainer/global_step":0,"true_baseline/llm_judge/biology/out_of_scope/relevance":1,"llm_judge/chemistry/out_of_scope/ground_truth_similarity_pre_scoping":0.845,"llm_judge/physics/out_of_scope/ground_truth_similarity_pre_scoping":0.67,"true_baseline/llm_judge/biology/out_of_scope/fluency":0.995,"llm_judge/math/in_scope/fluency_pre_scoping":0.96,"llm_judge/chemistry/out_of_scope/quality_pre_scoping":0.9366666666666666,"_step":1,"true_baseline/llm_judge/biology/out_of_scope/quality":0.9383333333333334,"llm_judge/biology/out_of_scope/quality_pre_scoping":0.9383333333333334,"true_baseline/llm_judge/chemistry/out_of_scope/relevance":0.99,"llm_judge/chemistry/out_of_scope/relevance_pre_scoping":0.99,"true_baseline/llm_judge/biology/out_of_scope/ground_truth_similarity":0.82,"llm_judge/physics/out_of_scope/relevance_pre_scoping":0.97,"llm_judge/physics/out_of_scope/quality_pre_scoping":0.8683333333333333,"true_baseline/llm_judge/chemistry/out_of_scope/quality":0.9366666666666666,"llm_judge/biology/out_of_scope/ground_truth_similarity_pre_scoping":0.82,"true_baseline/llm_judge/physics/out_of_scope/ground_truth_similarity":0.67,"llm_judge/chemistry/out_of_scope/fluency_pre_scoping":0.975,"_wandb":{"runtime":2},"llm_judge/biology/out_of_scope/fluency_pre_scoping":0.995,"llm_judge/math/in_scope/relevance_pre_scoping":0.995,"true_baseline/llm_judge/physics/out_of_scope/quality":0.8683333333333333,"true_baseline/llm_judge/math/in_scope/relevance":0.995,"_timestamp":1.7777916212437994e+09}
run-20260503_070020-tsnlx6qq/logs/debug-core.log ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-05-03T07:00:20.469220164Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp_j91eab8/port-10341.txt","pid":10341,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2026-05-03T07:00:20.46970843Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":10341}
3
+ {"time":"2026-05-03T07:00:20.469687729Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-10341-15586-3861218357/socket","Net":"unix"}}
4
+ {"time":"2026-05-03T07:00:20.657887643Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2026-05-03T07:00:20.660790402Z","level":"INFO","msg":"handleInformInit: received","streamId":"tsnlx6qq","id":"1(@)"}
6
+ {"time":"2026-05-03T07:00:20.848571595Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"tsnlx6qq","id":"1(@)"}
7
+ {"time":"2026-05-03T07:00:23.783791062Z","level":"INFO","msg":"handleInformFinish: finish message received","streamId":"tsnlx6qq","id":"1(@)"}
8
+ {"time":"2026-05-03T07:00:23.785249021Z","level":"INFO","msg":"handleInformFinish: stream closed","streamId":"tsnlx6qq","id":"1(@)"}
9
+ {"time":"2026-05-03T07:00:23.785623376Z","level":"INFO","msg":"handleInformInit: received","streamId":"mw4gx9uu","id":"1(@)"}
10
+ {"time":"2026-05-03T07:00:23.958283535Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"mw4gx9uu","id":"1(@)"}
11
+ {"time":"2026-05-04T20:47:39.170132472Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
12
+ {"time":"2026-05-04T20:47:39.170220385Z","level":"INFO","msg":"server is shutting down"}
13
+ {"time":"2026-05-04T20:47:39.170208517Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
14
+ {"time":"2026-05-04T20:47:39.170282418Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
15
+ {"time":"2026-05-04T20:47:39.170328918Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-10341-15586-3861218357/socket","Net":"unix"}}
16
+ {"time":"2026-05-04T20:47:39.568705145Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
17
+ {"time":"2026-05-04T20:47:39.568786097Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
18
+ {"time":"2026-05-04T20:47:39.568805747Z","level":"INFO","msg":"server is closed"}
run-20260503_070020-tsnlx6qq/logs/debug-internal.log ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-05-03T07:00:20.660976072Z","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
2
+ {"time":"2026-05-03T07:00:20.848539286Z","level":"INFO","msg":"stream: created new stream","id":"tsnlx6qq"}
3
+ {"time":"2026-05-03T07:00:20.848568149Z","level":"INFO","msg":"stream: started","id":"tsnlx6qq"}
4
+ {"time":"2026-05-03T07:00:20.84858856Z","level":"INFO","msg":"writer: Do: started","stream_id":"tsnlx6qq"}
5
+ {"time":"2026-05-03T07:00:20.8486014Z","level":"INFO","msg":"handler: started","stream_id":"tsnlx6qq"}
6
+ {"time":"2026-05-03T07:00:20.848590483Z","level":"INFO","msg":"sender: started","stream_id":"tsnlx6qq"}
7
+ {"time":"2026-05-03T07:00:23.585072348Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
8
+ {"time":"2026-05-03T07:00:23.782187102Z","level":"INFO","msg":"handler: operation stats","stats":{}}
9
+ {"time":"2026-05-03T07:00:23.783806655Z","level":"INFO","msg":"stream: closing","id":"tsnlx6qq"}
10
+ {"time":"2026-05-03T07:00:23.783814807Z","level":"INFO","msg":"handler: closed","stream_id":"tsnlx6qq"}
11
+ {"time":"2026-05-03T07:00:23.783820416Z","level":"INFO","msg":"writer: Close: closed","stream_id":"tsnlx6qq"}
12
+ {"time":"2026-05-03T07:00:23.783824061Z","level":"INFO","msg":"sender: closed","stream_id":"tsnlx6qq"}
13
+ {"time":"2026-05-03T07:00:23.783862599Z","level":"INFO","msg":"stream: closed","id":"tsnlx6qq"}
run-20260503_070020-tsnlx6qq/logs/debug.log ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-05-03 07:00:20,453 INFO MainThread:10341 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
2
+ 2026-05-03 07:00:20,453 INFO MainThread:10341 [wandb_setup.py:_flush():80] Configure stats pid to 10341
3
+ 2026-05-03 07:00:20,453 INFO MainThread:10341 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
4
+ 2026-05-03 07:00:20,453 INFO MainThread:10341 [wandb_setup.py:_flush():80] Loading settings from /root/SAEScoping/wandb/settings
5
+ 2026-05-03 07:00:20,453 INFO MainThread:10341 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2026-05-03 07:00:20,454 INFO MainThread:10341 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /root/SAEScoping/wandb/run-20260503_070020-tsnlx6qq/logs/debug.log
7
+ 2026-05-03 07:00:20,454 INFO MainThread:10341 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /root/SAEScoping/wandb/run-20260503_070020-tsnlx6qq/logs/debug-internal.log
8
+ 2026-05-03 07:00:20,454 INFO MainThread:10341 [wandb_init.py:init():830] calling init triggers
9
+ 2026-05-03 07:00:20,454 INFO MainThread:10341 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2026-05-03 07:00:20,454 INFO MainThread:10341 [wandb_init.py:init():871] starting backend
12
+ 2026-05-03 07:00:20,658 INFO MainThread:10341 [wandb_init.py:init():874] sending inform_init request
13
+ 2026-05-03 07:00:20,659 INFO MainThread:10341 [wandb_init.py:init():882] backend started and connected
14
+ 2026-05-03 07:00:20,660 INFO MainThread:10341 [wandb_init.py:init():953] updated telemetry
15
+ 2026-05-03 07:00:20,663 INFO MainThread:10341 [wandb_init.py:init():977] communicating run to backend with 180.0 second timeout
16
+ 2026-05-03 07:00:21,168 INFO MainThread:10341 [wandb_init.py:init():1029] starting run threads in backend
17
+ 2026-05-03 07:00:21,242 INFO MainThread:10341 [wandb_run.py:_console_start():2458] atexit reg
18
+ 2026-05-03 07:00:21,242 INFO MainThread:10341 [wandb_run.py:_redirect():2306] redirect: wrap_raw
19
+ 2026-05-03 07:00:21,242 INFO MainThread:10341 [wandb_run.py:_redirect():2375] Wrapping output streams.
20
+ 2026-05-03 07:00:21,242 INFO MainThread:10341 [wandb_run.py:_redirect():2398] Redirects installed.
21
+ 2026-05-03 07:00:21,243 INFO MainThread:10341 [wandb_init.py:init():1075] run started, returning control to user process
22
+ 2026-05-03 07:00:23,258 INFO MainThread:10341 [wandb_run.py:_finish():2224] finishing run arunasank/sae-scoping-stemqa-math/tsnlx6qq
23
+ 2026-05-03 07:00:23,258 INFO MainThread:10341 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0
24
+ 2026-05-03 07:00:23,258 INFO MainThread:10341 [wandb_run.py:_restore():2405] restore
25
+ 2026-05-03 07:00:23,258 INFO MainThread:10341 [wandb_run.py:_restore():2411] restore done
26
+ 2026-05-03 07:00:23,783 INFO MainThread:10341 [wandb_run.py:_footer_history_summary_info():3903] rendering history
27
+ 2026-05-03 07:00:23,783 INFO MainThread:10341 [wandb_run.py:_footer_history_summary_info():3935] rendering summary
28
+ 2026-05-03 07:00:23,783 INFO MainThread:10341 [wandb_run.py:_footer_sync_info():3864] logging synced files
run-20260503_070023-mw4gx9uu/files/config.yaml ADDED
@@ -0,0 +1,988 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _name_or_path:
2
+ value: /workspace/.cache/huggingface/hub/models--arunasank--fitoo774/snapshots/fadf3c3034b2218f784375223d93790921fec9ff/checkpoint-2000
3
+ _wandb:
4
+ value:
5
+ cli_version: 0.21.0
6
+ e:
7
+ k9ay0f3la4y2006c5cqjs8lajyxtc4og:
8
+ args:
9
+ - --train-domain
10
+ - math
11
+ - --attack-domain
12
+ - physics
13
+ - --stage
14
+ - attack
15
+ - --hf-recover-repo
16
+ - arunasank/fitoo774
17
+ - --checkpoint
18
+ - "2000"
19
+ - --dev
20
+ - --device
21
+ - cuda
22
+ - --firing-rate-threshold
23
+ - "1e-4"
24
+ - --gemma3
25
+ - --max-steps-attack
26
+ - "10000"
27
+ - --skip-pre-training-eval
28
+ codePath: experiments/script_scoping_pipeline_stemqa.py
29
+ codePathLocal: experiments/script_scoping_pipeline_stemqa.py
30
+ cpu_count: 64
31
+ cpu_count_logical: 128
32
+ cudaVersion: "13.0"
33
+ disk:
34
+ /:
35
+ total: "536870912000"
36
+ used: "73938083840"
37
+ email: aruna.evam@gmail.com
38
+ executable: /root/miniconda3/envs/sae/bin/python
39
+ git:
40
+ commit: 89bc64f41f54ca1f08c9d90c5c2ea13f98123c1b
41
+ remote: git@github.com:4gatepylon/SAEScoping.git
42
+ gpu: NVIDIA RTX PRO 6000 Blackwell Server Edition
43
+ gpu_count: 1
44
+ gpu_nvidia:
45
+ - architecture: Blackwell
46
+ cudaCores: 24064
47
+ memoryTotal: "102641958912"
48
+ name: NVIDIA RTX PRO 6000 Blackwell Server Edition
49
+ uuid: GPU-1e3dcc13-cf37-b8fe-2425-456c8d1b35d2
50
+ host: 80e02dee87f9
51
+ memory:
52
+ total: "1622884343808"
53
+ os: Linux-6.8.0-106-generic-x86_64-with-glibc2.39
54
+ program: /root/SAEScoping/experiments/script_scoping_pipeline_stemqa.py
55
+ python: CPython 3.12.13
56
+ root: /root/SAEScoping
57
+ startedAt: "2026-05-03T07:00:23.784582Z"
58
+ writerId: k9ay0f3la4y2006c5cqjs8lajyxtc4og
59
+ m:
60
+ - "1": train/global_step
61
+ "6":
62
+ - 3
63
+ "7": []
64
+ - "2": '*'
65
+ "5": 1
66
+ "6":
67
+ - 1
68
+ "7": []
69
+ python_version: 3.12.13
70
+ t:
71
+ "1":
72
+ - 1
73
+ - 5
74
+ - 11
75
+ - 49
76
+ - 51
77
+ - 53
78
+ - 71
79
+ - 84
80
+ - 95
81
+ - 98
82
+ - 105
83
+ "2":
84
+ - 1
85
+ - 5
86
+ - 11
87
+ - 49
88
+ - 51
89
+ - 53
90
+ - 71
91
+ - 84
92
+ - 95
93
+ - 98
94
+ - 105
95
+ "3":
96
+ - 7
97
+ - 13
98
+ - 14
99
+ - 19
100
+ - 62
101
+ - 66
102
+ "4": 3.12.13
103
+ "5": 0.21.0
104
+ "6": 4.56.1
105
+ "9":
106
+ "1": transformers_trainer
107
+ "12": 0.21.0
108
+ "13": linux-x86_64
109
+ visualize:
110
+ charts/llm_judge_diff_baseline_fluency:
111
+ panel_config:
112
+ fieldSettings:
113
+ lineKey: lineKey
114
+ lineVal: lineVal
115
+ step: step
116
+ panelDefId: wandb/lineseries/v0
117
+ stringSettings:
118
+ title: 'LLM Judge diff vs baseline: fluency'
119
+ xname: Training Step
120
+ transform:
121
+ name: tableWithLeafColNames
122
+ userQuery:
123
+ queryFields:
124
+ - args:
125
+ - name: runSets
126
+ value: ${runSets}
127
+ fields:
128
+ - fields: []
129
+ name: id
130
+ - fields: []
131
+ name: name
132
+ - fields: []
133
+ name: _defaultColorIndex
134
+ - args:
135
+ - name: tableKey
136
+ value: charts/llm_judge_diff_baseline_fluency_table
137
+ fields: []
138
+ name: summaryTable
139
+ name: runSets
140
+ panel_type: Vega2
141
+ charts/llm_judge_diff_baseline_ground_truth_similarity:
142
+ panel_config:
143
+ fieldSettings:
144
+ lineKey: lineKey
145
+ lineVal: lineVal
146
+ step: step
147
+ panelDefId: wandb/lineseries/v0
148
+ stringSettings:
149
+ title: 'LLM Judge diff vs baseline: ground_truth_similarity'
150
+ xname: Training Step
151
+ transform:
152
+ name: tableWithLeafColNames
153
+ userQuery:
154
+ queryFields:
155
+ - args:
156
+ - name: runSets
157
+ value: ${runSets}
158
+ fields:
159
+ - fields: []
160
+ name: id
161
+ - fields: []
162
+ name: name
163
+ - fields: []
164
+ name: _defaultColorIndex
165
+ - args:
166
+ - name: tableKey
167
+ value: charts/llm_judge_diff_baseline_ground_truth_similarity_table
168
+ fields: []
169
+ name: summaryTable
170
+ name: runSets
171
+ panel_type: Vega2
172
+ charts/llm_judge_diff_baseline_quality:
173
+ panel_config:
174
+ fieldSettings:
175
+ lineKey: lineKey
176
+ lineVal: lineVal
177
+ step: step
178
+ panelDefId: wandb/lineseries/v0
179
+ stringSettings:
180
+ title: 'LLM Judge diff vs baseline: quality'
181
+ xname: Training Step
182
+ transform:
183
+ name: tableWithLeafColNames
184
+ userQuery:
185
+ queryFields:
186
+ - args:
187
+ - name: runSets
188
+ value: ${runSets}
189
+ fields:
190
+ - fields: []
191
+ name: id
192
+ - fields: []
193
+ name: name
194
+ - fields: []
195
+ name: _defaultColorIndex
196
+ - args:
197
+ - name: tableKey
198
+ value: charts/llm_judge_diff_baseline_quality_table
199
+ fields: []
200
+ name: summaryTable
201
+ name: runSets
202
+ panel_type: Vega2
203
+ charts/llm_judge_diff_baseline_relevance:
204
+ panel_config:
205
+ fieldSettings:
206
+ lineKey: lineKey
207
+ lineVal: lineVal
208
+ step: step
209
+ panelDefId: wandb/lineseries/v0
210
+ stringSettings:
211
+ title: 'LLM Judge diff vs baseline: relevance'
212
+ xname: Training Step
213
+ transform:
214
+ name: tableWithLeafColNames
215
+ userQuery:
216
+ queryFields:
217
+ - args:
218
+ - name: runSets
219
+ value: ${runSets}
220
+ fields:
221
+ - fields: []
222
+ name: id
223
+ - fields: []
224
+ name: name
225
+ - fields: []
226
+ name: _defaultColorIndex
227
+ - args:
228
+ - name: tableKey
229
+ value: charts/llm_judge_diff_baseline_relevance_table
230
+ fields: []
231
+ name: summaryTable
232
+ name: runSets
233
+ panel_type: Vega2
234
+ charts/llm_judge_fluency:
235
+ panel_config:
236
+ fieldSettings:
237
+ lineKey: lineKey
238
+ lineVal: lineVal
239
+ step: step
240
+ panelDefId: wandb/lineseries/v0
241
+ stringSettings:
242
+ title: 'LLM Judge: fluency'
243
+ xname: Training Step
244
+ transform:
245
+ name: tableWithLeafColNames
246
+ userQuery:
247
+ queryFields:
248
+ - args:
249
+ - name: runSets
250
+ value: ${runSets}
251
+ fields:
252
+ - fields: []
253
+ name: id
254
+ - fields: []
255
+ name: name
256
+ - fields: []
257
+ name: _defaultColorIndex
258
+ - args:
259
+ - name: tableKey
260
+ value: charts/llm_judge_fluency_table
261
+ fields: []
262
+ name: summaryTable
263
+ name: runSets
264
+ panel_type: Vega2
265
+ charts/llm_judge_ground_truth_similarity:
266
+ panel_config:
267
+ fieldSettings:
268
+ lineKey: lineKey
269
+ lineVal: lineVal
270
+ step: step
271
+ panelDefId: wandb/lineseries/v0
272
+ stringSettings:
273
+ title: 'LLM Judge: ground_truth_similarity'
274
+ xname: Training Step
275
+ transform:
276
+ name: tableWithLeafColNames
277
+ userQuery:
278
+ queryFields:
279
+ - args:
280
+ - name: runSets
281
+ value: ${runSets}
282
+ fields:
283
+ - fields: []
284
+ name: id
285
+ - fields: []
286
+ name: name
287
+ - fields: []
288
+ name: _defaultColorIndex
289
+ - args:
290
+ - name: tableKey
291
+ value: charts/llm_judge_ground_truth_similarity_table
292
+ fields: []
293
+ name: summaryTable
294
+ name: runSets
295
+ panel_type: Vega2
296
+ charts/llm_judge_quality:
297
+ panel_config:
298
+ fieldSettings:
299
+ lineKey: lineKey
300
+ lineVal: lineVal
301
+ step: step
302
+ panelDefId: wandb/lineseries/v0
303
+ stringSettings:
304
+ title: 'LLM Judge: quality'
305
+ xname: Training Step
306
+ transform:
307
+ name: tableWithLeafColNames
308
+ userQuery:
309
+ queryFields:
310
+ - args:
311
+ - name: runSets
312
+ value: ${runSets}
313
+ fields:
314
+ - fields: []
315
+ name: id
316
+ - fields: []
317
+ name: name
318
+ - fields: []
319
+ name: _defaultColorIndex
320
+ - args:
321
+ - name: tableKey
322
+ value: charts/llm_judge_quality_table
323
+ fields: []
324
+ name: summaryTable
325
+ name: runSets
326
+ panel_type: Vega2
327
+ charts/llm_judge_relevance:
328
+ panel_config:
329
+ fieldSettings:
330
+ lineKey: lineKey
331
+ lineVal: lineVal
332
+ step: step
333
+ panelDefId: wandb/lineseries/v0
334
+ stringSettings:
335
+ title: 'LLM Judge: relevance'
336
+ xname: Training Step
337
+ transform:
338
+ name: tableWithLeafColNames
339
+ userQuery:
340
+ queryFields:
341
+ - args:
342
+ - name: runSets
343
+ value: ${runSets}
344
+ fields:
345
+ - fields: []
346
+ name: id
347
+ - fields: []
348
+ name: name
349
+ - fields: []
350
+ name: _defaultColorIndex
351
+ - args:
352
+ - name: tableKey
353
+ value: charts/llm_judge_relevance_table
354
+ fields: []
355
+ name: summaryTable
356
+ name: runSets
357
+ panel_type: Vega2
358
+ accelerator_config:
359
+ value:
360
+ dispatch_batches: null
361
+ even_batches: true
362
+ gradient_accumulation_kwargs: null
363
+ non_blocking: false
364
+ split_batches: false
365
+ use_seedable_sampler: true
366
+ activation_offloading:
367
+ value: false
368
+ adafactor:
369
+ value: false
370
+ adam_beta1:
371
+ value: 0.9
372
+ adam_beta2:
373
+ value: 0.999
374
+ adam_epsilon:
375
+ value: 1e-08
376
+ add_cross_attention:
377
+ value: false
378
+ architectures:
379
+ value:
380
+ - Gemma3ForConditionalGeneration
381
+ assistant_only_loss:
382
+ value: false
383
+ auto_find_batch_size:
384
+ value: false
385
+ average_tokens_across_devices:
386
+ value: false
387
+ bad_words_ids:
388
+ value: null
389
+ batch_eval_metrics:
390
+ value: false
391
+ begin_suppress_tokens:
392
+ value: null
393
+ bf16:
394
+ value: true
395
+ bf16_full_eval:
396
+ value: false
397
+ boi_token_index:
398
+ value: 255999
399
+ bos_token_id:
400
+ value: 2
401
+ chat_template_path:
402
+ value: null
403
+ chunk_size_feed_forward:
404
+ value: 0
405
+ completion_only_loss:
406
+ value: null
407
+ cross_attention_hidden_size:
408
+ value: null
409
+ data_seed:
410
+ value: null
411
+ dataloader_drop_last:
412
+ value: false
413
+ dataloader_num_workers:
414
+ value: 0
415
+ dataloader_persistent_workers:
416
+ value: false
417
+ dataloader_pin_memory:
418
+ value: true
419
+ dataloader_prefetch_factor:
420
+ value: null
421
+ dataset_kwargs:
422
+ value: null
423
+ dataset_num_proc:
424
+ value: null
425
+ dataset_text_field:
426
+ value: text
427
+ ddp_backend:
428
+ value: null
429
+ ddp_broadcast_buffers:
430
+ value: null
431
+ ddp_bucket_cap_mb:
432
+ value: null
433
+ ddp_find_unused_parameters:
434
+ value: null
435
+ ddp_timeout:
436
+ value: 1800
437
+ debug:
438
+ value: []
439
+ decoder_start_token_id:
440
+ value: null
441
+ deepspeed:
442
+ value: null
443
+ disable_tqdm:
444
+ value: false
445
+ diversity_penalty:
446
+ value: 0
447
+ do_eval:
448
+ value: true
449
+ do_predict:
450
+ value: false
451
+ do_sample:
452
+ value: false
453
+ do_train:
454
+ value: false
455
+ dtype:
456
+ value: bfloat16
457
+ early_stopping:
458
+ value: false
459
+ encoder_no_repeat_ngram_size:
460
+ value: 0
461
+ eoi_token_index:
462
+ value: 256000
463
+ eos_token:
464
+ value: <EOS_TOKEN>
465
+ eos_token_id:
466
+ value: 1
467
+ eval_accumulation_steps:
468
+ value: 16
469
+ eval_delay:
470
+ value: 0
471
+ eval_do_concat_batches:
472
+ value: true
473
+ eval_on_start:
474
+ value: false
475
+ eval_packing:
476
+ value: null
477
+ eval_steps:
478
+ value: 100
479
+ eval_strategy:
480
+ value: steps
481
+ eval_use_gather_object:
482
+ value: false
483
+ exponential_decay_length_penalty:
484
+ value: null
485
+ finetuning_task:
486
+ value: null
487
+ forced_bos_token_id:
488
+ value: null
489
+ forced_eos_token_id:
490
+ value: null
491
+ fp16:
492
+ value: false
493
+ fp16_backend:
494
+ value: auto
495
+ fp16_full_eval:
496
+ value: false
497
+ fp16_opt_level:
498
+ value: O1
499
+ fsdp:
500
+ value: []
501
+ fsdp_config:
502
+ value:
503
+ min_num_params: 0
504
+ xla: false
505
+ xla_fsdp_grad_ckpt: false
506
+ xla_fsdp_v2: false
507
+ fsdp_min_num_params:
508
+ value: 0
509
+ fsdp_transformer_layer_cls_to_wrap:
510
+ value: null
511
+ full_determinism:
512
+ value: false
513
+ gradient_accumulation_steps:
514
+ value: 16
515
+ gradient_checkpointing:
516
+ value: true
517
+ gradient_checkpointing_kwargs:
518
+ value:
519
+ use_reentrant: false
520
+ greater_is_better:
521
+ value: null
522
+ group_by_length:
523
+ value: false
524
+ half_precision_backend:
525
+ value: auto
526
+ hub_always_push:
527
+ value: false
528
+ hub_model_id:
529
+ value: null
530
+ hub_private_repo:
531
+ value: null
532
+ hub_revision:
533
+ value: null
534
+ hub_strategy:
535
+ value: every_save
536
+ hub_token:
537
+ value: <HUB_TOKEN>
538
+ id2label:
539
+ value:
540
+ "0": LABEL_0
541
+ "1": LABEL_1
542
+ ignore_data_skip:
543
+ value: false
544
+ image_token_index:
545
+ value: 262144
546
+ include_for_metrics:
547
+ value: []
548
+ include_inputs_for_metrics:
549
+ value: false
550
+ include_num_input_tokens_seen:
551
+ value: false
552
+ include_tokens_per_second:
553
+ value: false
554
+ initializer_range:
555
+ value: 0.02
556
+ is_decoder:
557
+ value: false
558
+ is_encoder_decoder:
559
+ value: false
560
+ jit_mode_eval:
561
+ value: false
562
+ label_names:
563
+ value: null
564
+ label_smoothing_factor:
565
+ value: 0
566
+ label2id:
567
+ value:
568
+ LABEL_0: 0
569
+ LABEL_1: 1
570
+ learning_rate:
571
+ value: 2e-05
572
+ length_column_name:
573
+ value: length
574
+ length_penalty:
575
+ value: 1
576
+ liger_kernel_config:
577
+ value: null
578
+ load_best_model_at_end:
579
+ value: false
580
+ local_rank:
581
+ value: 0
582
+ log_level:
583
+ value: passive
584
+ log_level_replica:
585
+ value: warning
586
+ log_on_each_node:
587
+ value: true
588
+ logging_dir:
589
+ value: /root/SAEScoping/experiments/outputs_scoping/google--gemma-3-12b-it/layer_31--width_16k--canonical/math/h0.0001/k1684/mv6qs85r/attack/physics/mw4gx9uu/runs/May03_07-00-24_80e02dee87f9
590
+ logging_first_step:
591
+ value: false
592
+ logging_nan_inf_filter:
593
+ value: true
594
+ logging_steps:
595
+ value: 10
596
+ logging_strategy:
597
+ value: steps
598
+ lr_scheduler_type:
599
+ value: linear
600
+ max_grad_norm:
601
+ value: 1
602
+ max_length:
603
+ value: 1024
604
+ max_steps:
605
+ value: 10000
606
+ metric_for_best_model:
607
+ value: null
608
+ min_length:
609
+ value: 0
610
+ mm_tokens_per_image:
611
+ value: 256
612
+ model/num_parameters:
613
+ value: 12187325040
614
+ model_init_kwargs:
615
+ value: null
616
+ model_type:
617
+ value: gemma3
618
+ mp_parameters:
619
+ value: ""
620
+ neftune_noise_alpha:
621
+ value: null
622
+ no_cuda:
623
+ value: false
624
+ no_repeat_ngram_size:
625
+ value: 0
626
+ num_beam_groups:
627
+ value: 1
628
+ num_beams:
629
+ value: 1
630
+ num_return_sequences:
631
+ value: 1
632
+ num_train_epochs:
633
+ value: 1
634
+ optim:
635
+ value: paged_adamw_8bit
636
+ optim_args:
637
+ value: null
638
+ optim_target_modules:
639
+ value: null
640
+ output_attentions:
641
+ value: false
642
+ output_dir:
643
+ value: /root/SAEScoping/experiments/outputs_scoping/google--gemma-3-12b-it/layer_31--width_16k--canonical/math/h0.0001/k1684/mv6qs85r/attack/physics/mw4gx9uu
644
+ output_hidden_states:
645
+ value: false
646
+ output_scores:
647
+ value: false
648
+ overwrite_output_dir:
649
+ value: false
650
+ packing:
651
+ value: false
652
+ packing_strategy:
653
+ value: bfd
654
+ pad_to_multiple_of:
655
+ value: null
656
+ pad_token:
657
+ value: <PAD_TOKEN>
658
+ pad_token_id:
659
+ value: 0
660
+ padding_free:
661
+ value: false
662
+ parallelism_config:
663
+ value: null
664
+ past_index:
665
+ value: -1
666
+ per_device_eval_batch_size:
667
+ value: 4
668
+ per_device_train_batch_size:
669
+ value: 4
670
+ per_gpu_eval_batch_size:
671
+ value: null
672
+ per_gpu_train_batch_size:
673
+ value: null
674
+ prediction_loss_only:
675
+ value: false
676
+ prefix:
677
+ value: null
678
+ problem_type:
679
+ value: null
680
+ push_to_hub:
681
+ value: false
682
+ push_to_hub_model_id:
683
+ value: null
684
+ push_to_hub_organization:
685
+ value: null
686
+ push_to_hub_token:
687
+ value: <PUSH_TO_HUB_TOKEN>
688
+ ray_scope:
689
+ value: last
690
+ remove_invalid_values:
691
+ value: false
692
+ remove_unused_columns:
693
+ value: true
694
+ repetition_penalty:
695
+ value: 1
696
+ report_to:
697
+ value:
698
+ - wandb
699
+ restore_callback_states_from_checkpoint:
700
+ value: false
701
+ resume_from_checkpoint:
702
+ value: true
703
+ return_dict:
704
+ value: true
705
+ return_dict_in_generate:
706
+ value: false
707
+ run_name:
708
+ value: null
709
+ save_on_each_node:
710
+ value: false
711
+ save_only_model:
712
+ value: false
713
+ save_safetensors:
714
+ value: true
715
+ save_steps:
716
+ value: 500
717
+ save_strategy:
718
+ value: steps
719
+ save_total_limit:
720
+ value: 5
721
+ seed:
722
+ value: 42
723
+ sep_token_id:
724
+ value: null
725
+ skip_memory_metrics:
726
+ value: true
727
+ suppress_tokens:
728
+ value: null
729
+ task_specific_params:
730
+ value: null
731
+ temperature:
732
+ value: 1
733
+ text_config:
734
+ value:
735
+ _name_or_path: ""
736
+ _sliding_window_pattern: 6
737
+ add_cross_attention: false
738
+ architectures: null
739
+ attention_bias: false
740
+ attention_dropout: 0
741
+ attn_logit_softcapping: null
742
+ bad_words_ids: null
743
+ begin_suppress_tokens: null
744
+ bos_token_id: 2
745
+ chunk_size_feed_forward: 0
746
+ cross_attention_hidden_size: null
747
+ decoder_start_token_id: null
748
+ diversity_penalty: 0
749
+ do_sample: false
750
+ dtype: bfloat16
751
+ early_stopping: false
752
+ encoder_no_repeat_ngram_size: 0
753
+ eos_token_id: 1
754
+ exponential_decay_length_penalty: null
755
+ final_logit_softcapping: null
756
+ finetuning_task: null
757
+ forced_bos_token_id: null
758
+ forced_eos_token_id: null
759
+ head_dim: 256
760
+ hidden_activation: gelu_pytorch_tanh
761
+ hidden_size: 3840
762
+ id2label:
763
+ "0": LABEL_0
764
+ "1": LABEL_1
765
+ initializer_range: 0.02
766
+ intermediate_size: 15360
767
+ is_decoder: false
768
+ is_encoder_decoder: false
769
+ label2id:
770
+ LABEL_0: 0
771
+ LABEL_1: 1
772
+ layer_types:
773
+ - sliding_attention
774
+ - sliding_attention
775
+ - sliding_attention
776
+ - sliding_attention
777
+ - sliding_attention
778
+ - full_attention
779
+ - sliding_attention
780
+ - sliding_attention
781
+ - sliding_attention
782
+ - sliding_attention
783
+ - sliding_attention
784
+ - full_attention
785
+ - sliding_attention
786
+ - sliding_attention
787
+ - sliding_attention
788
+ - sliding_attention
789
+ - sliding_attention
790
+ - full_attention
791
+ - sliding_attention
792
+ - sliding_attention
793
+ - sliding_attention
794
+ - sliding_attention
795
+ - sliding_attention
796
+ - full_attention
797
+ - sliding_attention
798
+ - sliding_attention
799
+ - sliding_attention
800
+ - sliding_attention
801
+ - sliding_attention
802
+ - full_attention
803
+ - sliding_attention
804
+ - sliding_attention
805
+ - sliding_attention
806
+ - sliding_attention
807
+ - sliding_attention
808
+ - full_attention
809
+ - sliding_attention
810
+ - sliding_attention
811
+ - sliding_attention
812
+ - sliding_attention
813
+ - sliding_attention
814
+ - full_attention
815
+ - sliding_attention
816
+ - sliding_attention
817
+ - sliding_attention
818
+ - sliding_attention
819
+ - sliding_attention
820
+ - full_attention
821
+ length_penalty: 1
822
+ max_length: 20
823
+ max_position_embeddings: 131072
824
+ min_length: 0
825
+ model_type: gemma3_text
826
+ no_repeat_ngram_size: 0
827
+ num_attention_heads: 16
828
+ num_beam_groups: 1
829
+ num_beams: 1
830
+ num_hidden_layers: 48
831
+ num_key_value_heads: 8
832
+ num_return_sequences: 1
833
+ output_attentions: false
834
+ output_hidden_states: false
835
+ output_scores: false
836
+ pad_token_id: 0
837
+ prefix: null
838
+ problem_type: null
839
+ query_pre_attn_scalar: 256
840
+ remove_invalid_values: false
841
+ repetition_penalty: 1
842
+ return_dict: true
843
+ return_dict_in_generate: false
844
+ rms_norm_eps: 1e-06
845
+ rope_local_base_freq: 10000
846
+ rope_scaling:
847
+ factor: 8
848
+ rope_type: linear
849
+ rope_theta: 1e+06
850
+ sep_token_id: null
851
+ sliding_window: 1024
852
+ suppress_tokens: null
853
+ task_specific_params: null
854
+ temperature: 1
855
+ tf_legacy_loss: false
856
+ tie_encoder_decoder: false
857
+ tie_word_embeddings: true
858
+ tokenizer_class: null
859
+ top_k: 50
860
+ top_p: 1
861
+ torchscript: false
862
+ typical_p: 1
863
+ use_bfloat16: false
864
+ use_cache: true
865
+ vocab_size: 262208
866
+ tf_legacy_loss:
867
+ value: false
868
+ tf32:
869
+ value: null
870
+ tie_encoder_decoder:
871
+ value: false
872
+ tie_word_embeddings:
873
+ value: true
874
+ tokenizer_class:
875
+ value: null
876
+ top_k:
877
+ value: 50
878
+ top_p:
879
+ value: 1
880
+ torch_compile:
881
+ value: false
882
+ torch_compile_backend:
883
+ value: null
884
+ torch_compile_mode:
885
+ value: null
886
+ torch_empty_cache_steps:
887
+ value: null
888
+ torchdynamo:
889
+ value: null
890
+ torchscript:
891
+ value: false
892
+ tpu_metrics_debug:
893
+ value: false
894
+ tpu_num_cores:
895
+ value: null
896
+ transformers_version:
897
+ value: 4.56.1
898
+ typical_p:
899
+ value: 1
900
+ use_bfloat16:
901
+ value: false
902
+ use_cpu:
903
+ value: false
904
+ use_ipex:
905
+ value: false
906
+ use_legacy_prediction_loop:
907
+ value: false
908
+ use_liger_kernel:
909
+ value: false
910
+ use_mps_device:
911
+ value: false
912
+ vision_config:
913
+ value:
914
+ _name_or_path: ""
915
+ add_cross_attention: false
916
+ architectures: null
917
+ attention_dropout: 0
918
+ bad_words_ids: null
919
+ begin_suppress_tokens: null
920
+ bos_token_id: null
921
+ chunk_size_feed_forward: 0
922
+ cross_attention_hidden_size: null
923
+ decoder_start_token_id: null
924
+ diversity_penalty: 0
925
+ do_sample: false
926
+ dtype: bfloat16
927
+ early_stopping: false
928
+ encoder_no_repeat_ngram_size: 0
929
+ eos_token_id: null
930
+ exponential_decay_length_penalty: null
931
+ finetuning_task: null
932
+ forced_bos_token_id: null
933
+ forced_eos_token_id: null
934
+ hidden_act: gelu_pytorch_tanh
935
+ hidden_size: 1152
936
+ id2label:
937
+ "0": LABEL_0
938
+ "1": LABEL_1
939
+ image_size: 896
940
+ intermediate_size: 4304
941
+ is_decoder: false
942
+ is_encoder_decoder: false
943
+ label2id:
944
+ LABEL_0: 0
945
+ LABEL_1: 1
946
+ layer_norm_eps: 1e-06
947
+ length_penalty: 1
948
+ max_length: 20
949
+ min_length: 0
950
+ model_type: siglip_vision_model
951
+ no_repeat_ngram_size: 0
952
+ num_attention_heads: 16
953
+ num_beam_groups: 1
954
+ num_beams: 1
955
+ num_channels: 3
956
+ num_hidden_layers: 27
957
+ num_return_sequences: 1
958
+ output_attentions: false
959
+ output_hidden_states: false
960
+ output_scores: false
961
+ pad_token_id: null
962
+ patch_size: 14
963
+ prefix: null
964
+ problem_type: null
965
+ remove_invalid_values: false
966
+ repetition_penalty: 1
967
+ return_dict: true
968
+ return_dict_in_generate: false
969
+ sep_token_id: null
970
+ suppress_tokens: null
971
+ task_specific_params: null
972
+ temperature: 1
973
+ tf_legacy_loss: false
974
+ tie_encoder_decoder: false
975
+ tie_word_embeddings: true
976
+ tokenizer_class: null
977
+ top_k: 50
978
+ top_p: 1
979
+ torchscript: false
980
+ typical_p: 1
981
+ use_bfloat16: false
982
+ vision_use_head: false
983
+ warmup_ratio:
984
+ value: 0.1
985
+ warmup_steps:
986
+ value: 0
987
+ weight_decay:
988
+ value: 0.1
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_fluency_table_491_084cda5c92c1f5fd2472.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", -0.07499999999999996], [1000, "math/in_scope", -0.06499999999999995], [1500, "math/in_scope", -0.08499999999999996], [2000, "math/in_scope", -0.13], [2500, "math/in_scope", -0.09999999999999998], [3000, "math/in_scope", -0.20999999999999996], [3500, "math/in_scope", -0.19499999999999995], [500, "physics/attack_scope", -0.45499999999999996], [1000, "physics/attack_scope", -0.25], [1500, "physics/attack_scope", -0.21999999999999997], [2000, "physics/attack_scope", -0.11499999999999999], [2500, "physics/attack_scope", -0.12], [3000, "physics/attack_scope", -0.15000000000000002], [3500, "physics/attack_scope", -0.245]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_fluency_table_562_108153824aefe6a35fdd.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", -0.07499999999999996], [1000, "math/in_scope", -0.06499999999999995], [1500, "math/in_scope", -0.08499999999999996], [2000, "math/in_scope", -0.13], [2500, "math/in_scope", -0.09999999999999998], [3000, "math/in_scope", -0.20999999999999996], [3500, "math/in_scope", -0.19499999999999995], [4000, "math/in_scope", -0.15999999999999992], [500, "physics/attack_scope", -0.45499999999999996], [1000, "physics/attack_scope", -0.25], [1500, "physics/attack_scope", -0.21999999999999997], [2000, "physics/attack_scope", -0.11499999999999999], [2500, "physics/attack_scope", -0.12], [3000, "physics/attack_scope", -0.15000000000000002], [3500, "physics/attack_scope", -0.245], [4000, "physics/attack_scope", -0.21999999999999997]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_fluency_table_633_e4d7691c028a3169be82.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", -0.07499999999999996], [1000, "math/in_scope", -0.06499999999999995], [1500, "math/in_scope", -0.08499999999999996], [2000, "math/in_scope", -0.13], [2500, "math/in_scope", -0.09999999999999998], [3000, "math/in_scope", -0.20999999999999996], [3500, "math/in_scope", -0.19499999999999995], [4000, "math/in_scope", -0.15999999999999992], [4500, "math/in_scope", -0.18999999999999995], [500, "physics/attack_scope", -0.45499999999999996], [1000, "physics/attack_scope", -0.25], [1500, "physics/attack_scope", -0.21999999999999997], [2000, "physics/attack_scope", -0.11499999999999999], [2500, "physics/attack_scope", -0.12], [3000, "physics/attack_scope", -0.15000000000000002], [3500, "physics/attack_scope", -0.245], [4000, "physics/attack_scope", -0.21999999999999997], [4500, "physics/attack_scope", -0.245]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_fluency_table_704_0336dd8b3d6193775ae7.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", -0.07499999999999996], [1000, "math/in_scope", -0.06499999999999995], [1500, "math/in_scope", -0.08499999999999996], [2000, "math/in_scope", -0.13], [2500, "math/in_scope", -0.09999999999999998], [3000, "math/in_scope", -0.20999999999999996], [3500, "math/in_scope", -0.19499999999999995], [4000, "math/in_scope", -0.15999999999999992], [4500, "math/in_scope", -0.18999999999999995], [5000, "math/in_scope", -0.20499999999999996], [500, "physics/attack_scope", -0.45499999999999996], [1000, "physics/attack_scope", -0.25], [1500, "physics/attack_scope", -0.21999999999999997], [2000, "physics/attack_scope", -0.11499999999999999], [2500, "physics/attack_scope", -0.12], [3000, "physics/attack_scope", -0.15000000000000002], [3500, "physics/attack_scope", -0.245], [4000, "physics/attack_scope", -0.21999999999999997], [4500, "physics/attack_scope", -0.245], [5000, "physics/attack_scope", -0.20999999999999996]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_fluency_table_775_addf4a7b0a6d8223c671.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", -0.07499999999999996], [1000, "math/in_scope", -0.06499999999999995], [1500, "math/in_scope", -0.08499999999999996], [2000, "math/in_scope", -0.13], [2500, "math/in_scope", -0.09999999999999998], [3000, "math/in_scope", -0.20999999999999996], [3500, "math/in_scope", -0.19499999999999995], [4000, "math/in_scope", -0.15999999999999992], [4500, "math/in_scope", -0.18999999999999995], [5000, "math/in_scope", -0.20499999999999996], [5500, "math/in_scope", -0.1499999999999999], [500, "physics/attack_scope", -0.45499999999999996], [1000, "physics/attack_scope", -0.25], [1500, "physics/attack_scope", -0.21999999999999997], [2000, "physics/attack_scope", -0.11499999999999999], [2500, "physics/attack_scope", -0.12], [3000, "physics/attack_scope", -0.15000000000000002], [3500, "physics/attack_scope", -0.245], [4000, "physics/attack_scope", -0.21999999999999997], [4500, "physics/attack_scope", -0.245], [5000, "physics/attack_scope", -0.20999999999999996], [5500, "physics/attack_scope", -0.19499999999999995]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_fluency_table_846_d7d02a8f538364d9f02a.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", -0.07499999999999996], [1000, "math/in_scope", -0.06499999999999995], [1500, "math/in_scope", -0.08499999999999996], [2000, "math/in_scope", -0.13], [2500, "math/in_scope", -0.09999999999999998], [3000, "math/in_scope", -0.20999999999999996], [3500, "math/in_scope", -0.19499999999999995], [4000, "math/in_scope", -0.15999999999999992], [4500, "math/in_scope", -0.18999999999999995], [5000, "math/in_scope", -0.20499999999999996], [5500, "math/in_scope", -0.1499999999999999], [6000, "math/in_scope", -0.235], [500, "physics/attack_scope", -0.45499999999999996], [1000, "physics/attack_scope", -0.25], [1500, "physics/attack_scope", -0.21999999999999997], [2000, "physics/attack_scope", -0.11499999999999999], [2500, "physics/attack_scope", -0.12], [3000, "physics/attack_scope", -0.15000000000000002], [3500, "physics/attack_scope", -0.245], [4000, "physics/attack_scope", -0.21999999999999997], [4500, "physics/attack_scope", -0.245], [5000, "physics/attack_scope", -0.20999999999999996], [5500, "physics/attack_scope", -0.19499999999999995], [6000, "physics/attack_scope", -0.20499999999999996]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_fluency_table_917_a4f00630ffe30bcfe311.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", -0.07499999999999996], [1000, "math/in_scope", -0.06499999999999995], [1500, "math/in_scope", -0.08499999999999996], [2000, "math/in_scope", -0.13], [2500, "math/in_scope", -0.09999999999999998], [3000, "math/in_scope", -0.20999999999999996], [3500, "math/in_scope", -0.19499999999999995], [4000, "math/in_scope", -0.15999999999999992], [4500, "math/in_scope", -0.18999999999999995], [5000, "math/in_scope", -0.20499999999999996], [5500, "math/in_scope", -0.1499999999999999], [6000, "math/in_scope", -0.235], [6500, "math/in_scope", -0.20499999999999996], [500, "physics/attack_scope", -0.45499999999999996], [1000, "physics/attack_scope", -0.25], [1500, "physics/attack_scope", -0.21999999999999997], [2000, "physics/attack_scope", -0.11499999999999999], [2500, "physics/attack_scope", -0.12], [3000, "physics/attack_scope", -0.15000000000000002], [3500, "physics/attack_scope", -0.245], [4000, "physics/attack_scope", -0.21999999999999997], [4500, "physics/attack_scope", -0.245], [5000, "physics/attack_scope", -0.20999999999999996], [5500, "physics/attack_scope", -0.19499999999999995], [6000, "physics/attack_scope", -0.20499999999999996], [6500, "physics/attack_scope", -0.18999999999999995]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_ground_truth_similarity_table_1060_ce30e646b2fea156817c.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", -0.1499999999999999], [1000, "math/in_scope", -0.14500000000000002], [1500, "math/in_scope", -0.125], [2000, "math/in_scope", -0.20499999999999996], [2500, "math/in_scope", -0.125], [3000, "math/in_scope", -0.21499999999999997], [3500, "math/in_scope", -0.19999999999999996], [4000, "math/in_scope", -0.20499999999999996], [4500, "math/in_scope", -0.22499999999999998], [5000, "math/in_scope", -0.2899999999999999], [5500, "math/in_scope", -0.22999999999999998], [6000, "math/in_scope", -0.22999999999999998], [6500, "math/in_scope", -0.24], [7000, "math/in_scope", -0.235], [7500, "math/in_scope", -0.22999999999999998], [500, "physics/attack_scope", -0.42500000000000004], [1000, "physics/attack_scope", -0.33], [1500, "physics/attack_scope", -0.28], [2000, "physics/attack_scope", -0.275], [2500, "physics/attack_scope", -0.23000000000000004], [3000, "physics/attack_scope", -0.24000000000000005], [3500, "physics/attack_scope", -0.275], [4000, "physics/attack_scope", -0.29000000000000004], [4500, "physics/attack_scope", -0.265], [5000, "physics/attack_scope", -0.24000000000000005], [5500, "physics/attack_scope", -0.24000000000000005], [6000, "physics/attack_scope", -0.29500000000000004], [6500, "physics/attack_scope", -0.21000000000000002], [7000, "physics/attack_scope", -0.30500000000000005], [7500, "physics/attack_scope", -0.24000000000000005]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_ground_truth_similarity_table_492_a5bd2a0cd4f63e67f54f.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", -0.1499999999999999], [1000, "math/in_scope", -0.14500000000000002], [1500, "math/in_scope", -0.125], [2000, "math/in_scope", -0.20499999999999996], [2500, "math/in_scope", -0.125], [3000, "math/in_scope", -0.21499999999999997], [3500, "math/in_scope", -0.19999999999999996], [500, "physics/attack_scope", -0.42500000000000004], [1000, "physics/attack_scope", -0.33], [1500, "physics/attack_scope", -0.28], [2000, "physics/attack_scope", -0.275], [2500, "physics/attack_scope", -0.23000000000000004], [3000, "physics/attack_scope", -0.24000000000000005], [3500, "physics/attack_scope", -0.275]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_ground_truth_similarity_table_634_45d88133b373103983fc.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", -0.1499999999999999], [1000, "math/in_scope", -0.14500000000000002], [1500, "math/in_scope", -0.125], [2000, "math/in_scope", -0.20499999999999996], [2500, "math/in_scope", -0.125], [3000, "math/in_scope", -0.21499999999999997], [3500, "math/in_scope", -0.19999999999999996], [4000, "math/in_scope", -0.20499999999999996], [4500, "math/in_scope", -0.22499999999999998], [500, "physics/attack_scope", -0.42500000000000004], [1000, "physics/attack_scope", -0.33], [1500, "physics/attack_scope", -0.28], [2000, "physics/attack_scope", -0.275], [2500, "physics/attack_scope", -0.23000000000000004], [3000, "physics/attack_scope", -0.24000000000000005], [3500, "physics/attack_scope", -0.275], [4000, "physics/attack_scope", -0.29000000000000004], [4500, "physics/attack_scope", -0.265]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_ground_truth_similarity_table_705_28e93bfb4e3763324925.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", -0.1499999999999999], [1000, "math/in_scope", -0.14500000000000002], [1500, "math/in_scope", -0.125], [2000, "math/in_scope", -0.20499999999999996], [2500, "math/in_scope", -0.125], [3000, "math/in_scope", -0.21499999999999997], [3500, "math/in_scope", -0.19999999999999996], [4000, "math/in_scope", -0.20499999999999996], [4500, "math/in_scope", -0.22499999999999998], [5000, "math/in_scope", -0.2899999999999999], [500, "physics/attack_scope", -0.42500000000000004], [1000, "physics/attack_scope", -0.33], [1500, "physics/attack_scope", -0.28], [2000, "physics/attack_scope", -0.275], [2500, "physics/attack_scope", -0.23000000000000004], [3000, "physics/attack_scope", -0.24000000000000005], [3500, "physics/attack_scope", -0.275], [4000, "physics/attack_scope", -0.29000000000000004], [4500, "physics/attack_scope", -0.265], [5000, "physics/attack_scope", -0.24000000000000005]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_ground_truth_similarity_table_776_45c8a9e1988a9e5162c8.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", -0.1499999999999999], [1000, "math/in_scope", -0.14500000000000002], [1500, "math/in_scope", -0.125], [2000, "math/in_scope", -0.20499999999999996], [2500, "math/in_scope", -0.125], [3000, "math/in_scope", -0.21499999999999997], [3500, "math/in_scope", -0.19999999999999996], [4000, "math/in_scope", -0.20499999999999996], [4500, "math/in_scope", -0.22499999999999998], [5000, "math/in_scope", -0.2899999999999999], [5500, "math/in_scope", -0.22999999999999998], [500, "physics/attack_scope", -0.42500000000000004], [1000, "physics/attack_scope", -0.33], [1500, "physics/attack_scope", -0.28], [2000, "physics/attack_scope", -0.275], [2500, "physics/attack_scope", -0.23000000000000004], [3000, "physics/attack_scope", -0.24000000000000005], [3500, "physics/attack_scope", -0.275], [4000, "physics/attack_scope", -0.29000000000000004], [4500, "physics/attack_scope", -0.265], [5000, "physics/attack_scope", -0.24000000000000005], [5500, "physics/attack_scope", -0.24000000000000005]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_ground_truth_similarity_table_847_b866a7ce448652f4313e.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", -0.1499999999999999], [1000, "math/in_scope", -0.14500000000000002], [1500, "math/in_scope", -0.125], [2000, "math/in_scope", -0.20499999999999996], [2500, "math/in_scope", -0.125], [3000, "math/in_scope", -0.21499999999999997], [3500, "math/in_scope", -0.19999999999999996], [4000, "math/in_scope", -0.20499999999999996], [4500, "math/in_scope", -0.22499999999999998], [5000, "math/in_scope", -0.2899999999999999], [5500, "math/in_scope", -0.22999999999999998], [6000, "math/in_scope", -0.22999999999999998], [500, "physics/attack_scope", -0.42500000000000004], [1000, "physics/attack_scope", -0.33], [1500, "physics/attack_scope", -0.28], [2000, "physics/attack_scope", -0.275], [2500, "physics/attack_scope", -0.23000000000000004], [3000, "physics/attack_scope", -0.24000000000000005], [3500, "physics/attack_scope", -0.275], [4000, "physics/attack_scope", -0.29000000000000004], [4500, "physics/attack_scope", -0.265], [5000, "physics/attack_scope", -0.24000000000000005], [5500, "physics/attack_scope", -0.24000000000000005], [6000, "physics/attack_scope", -0.29500000000000004]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_ground_truth_similarity_table_918_aae2930a5651916a7b9a.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", -0.1499999999999999], [1000, "math/in_scope", -0.14500000000000002], [1500, "math/in_scope", -0.125], [2000, "math/in_scope", -0.20499999999999996], [2500, "math/in_scope", -0.125], [3000, "math/in_scope", -0.21499999999999997], [3500, "math/in_scope", -0.19999999999999996], [4000, "math/in_scope", -0.20499999999999996], [4500, "math/in_scope", -0.22499999999999998], [5000, "math/in_scope", -0.2899999999999999], [5500, "math/in_scope", -0.22999999999999998], [6000, "math/in_scope", -0.22999999999999998], [6500, "math/in_scope", -0.24], [500, "physics/attack_scope", -0.42500000000000004], [1000, "physics/attack_scope", -0.33], [1500, "physics/attack_scope", -0.28], [2000, "physics/attack_scope", -0.275], [2500, "physics/attack_scope", -0.23000000000000004], [3000, "physics/attack_scope", -0.24000000000000005], [3500, "physics/attack_scope", -0.275], [4000, "physics/attack_scope", -0.29000000000000004], [4500, "physics/attack_scope", -0.265], [5000, "physics/attack_scope", -0.24000000000000005], [5500, "physics/attack_scope", -0.24000000000000005], [6000, "physics/attack_scope", -0.29500000000000004], [6500, "physics/attack_scope", -0.21000000000000002]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_quality_table_1274_c5d49b99b4447dcec7d0.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", -0.08666666666666667], [1000, "math/in_scope", -0.09166666666666667], [1500, "math/in_scope", -0.08833333333333337], [2000, "math/in_scope", -0.125], [2500, "math/in_scope", -0.08833333333333337], [3000, "math/in_scope", -0.16500000000000004], [3500, "math/in_scope", -0.16333333333333333], [4000, "math/in_scope", -0.15500000000000003], [4500, "math/in_scope", -0.16333333333333333], [5000, "math/in_scope", -0.19833333333333336], [5500, "math/in_scope", -0.15333333333333343], [6000, "math/in_scope", -0.18166666666666675], [6500, "math/in_scope", -0.17500000000000004], [7000, "math/in_scope", -0.18333333333333335], [7500, "math/in_scope", -0.15166666666666673], [8000, "math/in_scope", -0.15666666666666673], [8500, "math/in_scope", -0.15500000000000003], [9000, "math/in_scope", -0.16666666666666674], [500, "physics/attack_scope", -0.37499999999999994], [1000, "physics/attack_scope", -0.2583333333333333], [1500, "physics/attack_scope", -0.20499999999999996], [2000, "physics/attack_scope", -0.16166666666666663], [2500, "physics/attack_scope", -0.1366666666666666], [3000, "physics/attack_scope", -0.16666666666666663], [3500, "physics/attack_scope", -0.20666666666666667], [4000, "physics/attack_scope", -0.21499999999999997], [4500, "physics/attack_scope", -0.21666666666666667], [5000, "physics/attack_scope", -0.18833333333333324], [5500, "physics/attack_scope", -0.17166666666666663], [6000, "physics/attack_scope", -0.20833333333333326], [6500, "physics/attack_scope", -0.16833333333333333], [7000, "physics/attack_scope", -0.20666666666666667], [7500, "physics/attack_scope", -0.18333333333333324], [8000, "physics/attack_scope", -0.18166666666666664], [8500, "physics/attack_scope", -0.18499999999999994], [9000, "physics/attack_scope", -0.18499999999999994]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_quality_table_635_f1adc7800bed015619df.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", -0.08666666666666667], [1000, "math/in_scope", -0.09166666666666667], [1500, "math/in_scope", -0.08833333333333337], [2000, "math/in_scope", -0.125], [2500, "math/in_scope", -0.08833333333333337], [3000, "math/in_scope", -0.16500000000000004], [3500, "math/in_scope", -0.16333333333333333], [4000, "math/in_scope", -0.15500000000000003], [4500, "math/in_scope", -0.16333333333333333], [500, "physics/attack_scope", -0.37499999999999994], [1000, "physics/attack_scope", -0.2583333333333333], [1500, "physics/attack_scope", -0.20499999999999996], [2000, "physics/attack_scope", -0.16166666666666663], [2500, "physics/attack_scope", -0.1366666666666666], [3000, "physics/attack_scope", -0.16666666666666663], [3500, "physics/attack_scope", -0.20666666666666667], [4000, "physics/attack_scope", -0.21499999999999997], [4500, "physics/attack_scope", -0.21666666666666667]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_quality_table_706_015e2abd3e6840446f0f.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", -0.08666666666666667], [1000, "math/in_scope", -0.09166666666666667], [1500, "math/in_scope", -0.08833333333333337], [2000, "math/in_scope", -0.125], [2500, "math/in_scope", -0.08833333333333337], [3000, "math/in_scope", -0.16500000000000004], [3500, "math/in_scope", -0.16333333333333333], [4000, "math/in_scope", -0.15500000000000003], [4500, "math/in_scope", -0.16333333333333333], [5000, "math/in_scope", -0.19833333333333336], [500, "physics/attack_scope", -0.37499999999999994], [1000, "physics/attack_scope", -0.2583333333333333], [1500, "physics/attack_scope", -0.20499999999999996], [2000, "physics/attack_scope", -0.16166666666666663], [2500, "physics/attack_scope", -0.1366666666666666], [3000, "physics/attack_scope", -0.16666666666666663], [3500, "physics/attack_scope", -0.20666666666666667], [4000, "physics/attack_scope", -0.21499999999999997], [4500, "physics/attack_scope", -0.21666666666666667], [5000, "physics/attack_scope", -0.18833333333333324]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_quality_table_777_a9acd45c2c3a794d7ac0.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", -0.08666666666666667], [1000, "math/in_scope", -0.09166666666666667], [1500, "math/in_scope", -0.08833333333333337], [2000, "math/in_scope", -0.125], [2500, "math/in_scope", -0.08833333333333337], [3000, "math/in_scope", -0.16500000000000004], [3500, "math/in_scope", -0.16333333333333333], [4000, "math/in_scope", -0.15500000000000003], [4500, "math/in_scope", -0.16333333333333333], [5000, "math/in_scope", -0.19833333333333336], [5500, "math/in_scope", -0.15333333333333343], [500, "physics/attack_scope", -0.37499999999999994], [1000, "physics/attack_scope", -0.2583333333333333], [1500, "physics/attack_scope", -0.20499999999999996], [2000, "physics/attack_scope", -0.16166666666666663], [2500, "physics/attack_scope", -0.1366666666666666], [3000, "physics/attack_scope", -0.16666666666666663], [3500, "physics/attack_scope", -0.20666666666666667], [4000, "physics/attack_scope", -0.21499999999999997], [4500, "physics/attack_scope", -0.21666666666666667], [5000, "physics/attack_scope", -0.18833333333333324], [5500, "physics/attack_scope", -0.17166666666666663]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_quality_table_919_26642653f74eb2bf8591.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", -0.08666666666666667], [1000, "math/in_scope", -0.09166666666666667], [1500, "math/in_scope", -0.08833333333333337], [2000, "math/in_scope", -0.125], [2500, "math/in_scope", -0.08833333333333337], [3000, "math/in_scope", -0.16500000000000004], [3500, "math/in_scope", -0.16333333333333333], [4000, "math/in_scope", -0.15500000000000003], [4500, "math/in_scope", -0.16333333333333333], [5000, "math/in_scope", -0.19833333333333336], [5500, "math/in_scope", -0.15333333333333343], [6000, "math/in_scope", -0.18166666666666675], [6500, "math/in_scope", -0.17500000000000004], [500, "physics/attack_scope", -0.37499999999999994], [1000, "physics/attack_scope", -0.2583333333333333], [1500, "physics/attack_scope", -0.20499999999999996], [2000, "physics/attack_scope", -0.16166666666666663], [2500, "physics/attack_scope", -0.1366666666666666], [3000, "physics/attack_scope", -0.16666666666666663], [3500, "physics/attack_scope", -0.20666666666666667], [4000, "physics/attack_scope", -0.21499999999999997], [4500, "physics/attack_scope", -0.21666666666666667], [5000, "physics/attack_scope", -0.18833333333333324], [5500, "physics/attack_scope", -0.17166666666666663], [6000, "physics/attack_scope", -0.20833333333333326], [6500, "physics/attack_scope", -0.16833333333333333]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_quality_table_990_a2a5f913c3c71513f5ed.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", -0.08666666666666667], [1000, "math/in_scope", -0.09166666666666667], [1500, "math/in_scope", -0.08833333333333337], [2000, "math/in_scope", -0.125], [2500, "math/in_scope", -0.08833333333333337], [3000, "math/in_scope", -0.16500000000000004], [3500, "math/in_scope", -0.16333333333333333], [4000, "math/in_scope", -0.15500000000000003], [4500, "math/in_scope", -0.16333333333333333], [5000, "math/in_scope", -0.19833333333333336], [5500, "math/in_scope", -0.15333333333333343], [6000, "math/in_scope", -0.18166666666666675], [6500, "math/in_scope", -0.17500000000000004], [7000, "math/in_scope", -0.18333333333333335], [500, "physics/attack_scope", -0.37499999999999994], [1000, "physics/attack_scope", -0.2583333333333333], [1500, "physics/attack_scope", -0.20499999999999996], [2000, "physics/attack_scope", -0.16166666666666663], [2500, "physics/attack_scope", -0.1366666666666666], [3000, "physics/attack_scope", -0.16666666666666663], [3500, "physics/attack_scope", -0.20666666666666667], [4000, "physics/attack_scope", -0.21499999999999997], [4500, "physics/attack_scope", -0.21666666666666667], [5000, "physics/attack_scope", -0.18833333333333324], [5500, "physics/attack_scope", -0.17166666666666663], [6000, "physics/attack_scope", -0.20833333333333326], [6500, "physics/attack_scope", -0.16833333333333333], [7000, "physics/attack_scope", -0.20666666666666667]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_relevance_table_494_4b847cef10f29786285c.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", -0.03500000000000003], [1000, "math/in_scope", -0.06499999999999995], [1500, "math/in_scope", -0.05500000000000005], [2000, "math/in_scope", -0.040000000000000036], [2500, "math/in_scope", -0.040000000000000036], [3000, "math/in_scope", -0.06999999999999995], [3500, "math/in_scope", -0.09499999999999997], [500, "physics/attack_scope", -0.245], [1000, "physics/attack_scope", -0.19499999999999995], [1500, "physics/attack_scope", -0.11499999999999999], [2000, "physics/attack_scope", -0.09499999999999997], [2500, "physics/attack_scope", -0.05999999999999994], [3000, "physics/attack_scope", -0.10999999999999999], [3500, "physics/attack_scope", -0.09999999999999998]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_relevance_table_707_4af47f1d164b1922273e.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", -0.03500000000000003], [1000, "math/in_scope", -0.06499999999999995], [1500, "math/in_scope", -0.05500000000000005], [2000, "math/in_scope", -0.040000000000000036], [2500, "math/in_scope", -0.040000000000000036], [3000, "math/in_scope", -0.06999999999999995], [3500, "math/in_scope", -0.09499999999999997], [4000, "math/in_scope", -0.09999999999999998], [4500, "math/in_scope", -0.07499999999999996], [5000, "math/in_scope", -0.09999999999999998], [500, "physics/attack_scope", -0.245], [1000, "physics/attack_scope", -0.19499999999999995], [1500, "physics/attack_scope", -0.11499999999999999], [2000, "physics/attack_scope", -0.09499999999999997], [2500, "physics/attack_scope", -0.05999999999999994], [3000, "physics/attack_scope", -0.10999999999999999], [3500, "physics/attack_scope", -0.09999999999999998], [4000, "physics/attack_scope", -0.135], [4500, "physics/attack_scope", -0.14], [5000, "physics/attack_scope", -0.11499999999999999]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_relevance_table_778_69c097d2ff89d09048df.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", -0.03500000000000003], [1000, "math/in_scope", -0.06499999999999995], [1500, "math/in_scope", -0.05500000000000005], [2000, "math/in_scope", -0.040000000000000036], [2500, "math/in_scope", -0.040000000000000036], [3000, "math/in_scope", -0.06999999999999995], [3500, "math/in_scope", -0.09499999999999997], [4000, "math/in_scope", -0.09999999999999998], [4500, "math/in_scope", -0.07499999999999996], [5000, "math/in_scope", -0.09999999999999998], [5500, "math/in_scope", -0.07999999999999996], [500, "physics/attack_scope", -0.245], [1000, "physics/attack_scope", -0.19499999999999995], [1500, "physics/attack_scope", -0.11499999999999999], [2000, "physics/attack_scope", -0.09499999999999997], [2500, "physics/attack_scope", -0.05999999999999994], [3000, "physics/attack_scope", -0.10999999999999999], [3500, "physics/attack_scope", -0.09999999999999998], [4000, "physics/attack_scope", -0.135], [4500, "physics/attack_scope", -0.14], [5000, "physics/attack_scope", -0.11499999999999999], [5500, "physics/attack_scope", -0.07999999999999996]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_relevance_table_849_ba26a186fec558583166.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", -0.03500000000000003], [1000, "math/in_scope", -0.06499999999999995], [1500, "math/in_scope", -0.05500000000000005], [2000, "math/in_scope", -0.040000000000000036], [2500, "math/in_scope", -0.040000000000000036], [3000, "math/in_scope", -0.06999999999999995], [3500, "math/in_scope", -0.09499999999999997], [4000, "math/in_scope", -0.09999999999999998], [4500, "math/in_scope", -0.07499999999999996], [5000, "math/in_scope", -0.09999999999999998], [5500, "math/in_scope", -0.07999999999999996], [6000, "math/in_scope", -0.07999999999999996], [500, "physics/attack_scope", -0.245], [1000, "physics/attack_scope", -0.19499999999999995], [1500, "physics/attack_scope", -0.11499999999999999], [2000, "physics/attack_scope", -0.09499999999999997], [2500, "physics/attack_scope", -0.05999999999999994], [3000, "physics/attack_scope", -0.10999999999999999], [3500, "physics/attack_scope", -0.09999999999999998], [4000, "physics/attack_scope", -0.135], [4500, "physics/attack_scope", -0.14], [5000, "physics/attack_scope", -0.11499999999999999], [5500, "physics/attack_scope", -0.07999999999999996], [6000, "physics/attack_scope", -0.125]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_relevance_table_920_16cc28b57865b6315a7c.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", -0.03500000000000003], [1000, "math/in_scope", -0.06499999999999995], [1500, "math/in_scope", -0.05500000000000005], [2000, "math/in_scope", -0.040000000000000036], [2500, "math/in_scope", -0.040000000000000036], [3000, "math/in_scope", -0.06999999999999995], [3500, "math/in_scope", -0.09499999999999997], [4000, "math/in_scope", -0.09999999999999998], [4500, "math/in_scope", -0.07499999999999996], [5000, "math/in_scope", -0.09999999999999998], [5500, "math/in_scope", -0.07999999999999996], [6000, "math/in_scope", -0.07999999999999996], [6500, "math/in_scope", -0.07999999999999996], [500, "physics/attack_scope", -0.245], [1000, "physics/attack_scope", -0.19499999999999995], [1500, "physics/attack_scope", -0.11499999999999999], [2000, "physics/attack_scope", -0.09499999999999997], [2500, "physics/attack_scope", -0.05999999999999994], [3000, "physics/attack_scope", -0.10999999999999999], [3500, "physics/attack_scope", -0.09999999999999998], [4000, "physics/attack_scope", -0.135], [4500, "physics/attack_scope", -0.14], [5000, "physics/attack_scope", -0.11499999999999999], [5500, "physics/attack_scope", -0.07999999999999996], [6000, "physics/attack_scope", -0.125], [6500, "physics/attack_scope", -0.10499999999999998]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_diff_baseline_relevance_table_991_0bdb5d9afaf2ce8e262c.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", -0.03500000000000003], [1000, "math/in_scope", -0.06499999999999995], [1500, "math/in_scope", -0.05500000000000005], [2000, "math/in_scope", -0.040000000000000036], [2500, "math/in_scope", -0.040000000000000036], [3000, "math/in_scope", -0.06999999999999995], [3500, "math/in_scope", -0.09499999999999997], [4000, "math/in_scope", -0.09999999999999998], [4500, "math/in_scope", -0.07499999999999996], [5000, "math/in_scope", -0.09999999999999998], [5500, "math/in_scope", -0.07999999999999996], [6000, "math/in_scope", -0.07999999999999996], [6500, "math/in_scope", -0.07999999999999996], [7000, "math/in_scope", -0.09999999999999998], [500, "physics/attack_scope", -0.245], [1000, "physics/attack_scope", -0.19499999999999995], [1500, "physics/attack_scope", -0.11499999999999999], [2000, "physics/attack_scope", -0.09499999999999997], [2500, "physics/attack_scope", -0.05999999999999994], [3000, "physics/attack_scope", -0.10999999999999999], [3500, "physics/attack_scope", -0.09999999999999998], [4000, "physics/attack_scope", -0.135], [4500, "physics/attack_scope", -0.14], [5000, "physics/attack_scope", -0.11499999999999999], [5500, "physics/attack_scope", -0.07999999999999996], [6000, "physics/attack_scope", -0.125], [6500, "physics/attack_scope", -0.10499999999999998], [7000, "physics/attack_scope", -0.11499999999999999]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_fluency_table_1268_51c4585a1cd6c16e6bc9.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", 0.885], [1000, "math/in_scope", 0.895], [1500, "math/in_scope", 0.875], [2000, "math/in_scope", 0.83], [2500, "math/in_scope", 0.86], [3000, "math/in_scope", 0.75], [3500, "math/in_scope", 0.765], [4000, "math/in_scope", 0.8], [4500, "math/in_scope", 0.77], [5000, "math/in_scope", 0.755], [5500, "math/in_scope", 0.81], [6000, "math/in_scope", 0.725], [6500, "math/in_scope", 0.755], [7000, "math/in_scope", 0.745], [7500, "math/in_scope", 0.805], [8000, "math/in_scope", 0.77], [8500, "math/in_scope", 0.775], [9000, "math/in_scope", 0.79], [500, "physics/attack_scope", 0.51], [1000, "physics/attack_scope", 0.715], [1500, "physics/attack_scope", 0.745], [2000, "physics/attack_scope", 0.85], [2500, "physics/attack_scope", 0.845], [3000, "physics/attack_scope", 0.815], [3500, "physics/attack_scope", 0.72], [4000, "physics/attack_scope", 0.745], [4500, "physics/attack_scope", 0.72], [5000, "physics/attack_scope", 0.755], [5500, "physics/attack_scope", 0.77], [6000, "physics/attack_scope", 0.76], [6500, "physics/attack_scope", 0.775], [7000, "physics/attack_scope", 0.765], [7500, "physics/attack_scope", 0.75], [8000, "physics/attack_scope", 0.77], [8500, "physics/attack_scope", 0.73], [9000, "physics/attack_scope", 0.76]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_fluency_table_1339_467f1c0991e2d2337613.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", 0.885], [1000, "math/in_scope", 0.895], [1500, "math/in_scope", 0.875], [2000, "math/in_scope", 0.83], [2500, "math/in_scope", 0.86], [3000, "math/in_scope", 0.75], [3500, "math/in_scope", 0.765], [4000, "math/in_scope", 0.8], [4500, "math/in_scope", 0.77], [5000, "math/in_scope", 0.755], [5500, "math/in_scope", 0.81], [6000, "math/in_scope", 0.725], [6500, "math/in_scope", 0.755], [7000, "math/in_scope", 0.745], [7500, "math/in_scope", 0.805], [8000, "math/in_scope", 0.77], [8500, "math/in_scope", 0.775], [9000, "math/in_scope", 0.79], [9500, "math/in_scope", 0.77], [500, "physics/attack_scope", 0.51], [1000, "physics/attack_scope", 0.715], [1500, "physics/attack_scope", 0.745], [2000, "physics/attack_scope", 0.85], [2500, "physics/attack_scope", 0.845], [3000, "physics/attack_scope", 0.815], [3500, "physics/attack_scope", 0.72], [4000, "physics/attack_scope", 0.745], [4500, "physics/attack_scope", 0.72], [5000, "physics/attack_scope", 0.755], [5500, "physics/attack_scope", 0.77], [6000, "physics/attack_scope", 0.76], [6500, "physics/attack_scope", 0.775], [7000, "physics/attack_scope", 0.765], [7500, "physics/attack_scope", 0.75], [8000, "physics/attack_scope", 0.77], [8500, "physics/attack_scope", 0.73], [9000, "physics/attack_scope", 0.76], [9500, "physics/attack_scope", 0.765]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_fluency_table_629_cf12fa69829dc430ebe7.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", 0.885], [1000, "math/in_scope", 0.895], [1500, "math/in_scope", 0.875], [2000, "math/in_scope", 0.83], [2500, "math/in_scope", 0.86], [3000, "math/in_scope", 0.75], [3500, "math/in_scope", 0.765], [4000, "math/in_scope", 0.8], [4500, "math/in_scope", 0.77], [500, "physics/attack_scope", 0.51], [1000, "physics/attack_scope", 0.715], [1500, "physics/attack_scope", 0.745], [2000, "physics/attack_scope", 0.85], [2500, "physics/attack_scope", 0.845], [3000, "physics/attack_scope", 0.815], [3500, "physics/attack_scope", 0.72], [4000, "physics/attack_scope", 0.745], [4500, "physics/attack_scope", 0.72]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_fluency_table_700_47843788eded784426dd.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", 0.885], [1000, "math/in_scope", 0.895], [1500, "math/in_scope", 0.875], [2000, "math/in_scope", 0.83], [2500, "math/in_scope", 0.86], [3000, "math/in_scope", 0.75], [3500, "math/in_scope", 0.765], [4000, "math/in_scope", 0.8], [4500, "math/in_scope", 0.77], [5000, "math/in_scope", 0.755], [500, "physics/attack_scope", 0.51], [1000, "physics/attack_scope", 0.715], [1500, "physics/attack_scope", 0.745], [2000, "physics/attack_scope", 0.85], [2500, "physics/attack_scope", 0.845], [3000, "physics/attack_scope", 0.815], [3500, "physics/attack_scope", 0.72], [4000, "physics/attack_scope", 0.745], [4500, "physics/attack_scope", 0.72], [5000, "physics/attack_scope", 0.755]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_fluency_table_842_44dc3853fad038817bde.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", 0.885], [1000, "math/in_scope", 0.895], [1500, "math/in_scope", 0.875], [2000, "math/in_scope", 0.83], [2500, "math/in_scope", 0.86], [3000, "math/in_scope", 0.75], [3500, "math/in_scope", 0.765], [4000, "math/in_scope", 0.8], [4500, "math/in_scope", 0.77], [5000, "math/in_scope", 0.755], [5500, "math/in_scope", 0.81], [6000, "math/in_scope", 0.725], [500, "physics/attack_scope", 0.51], [1000, "physics/attack_scope", 0.715], [1500, "physics/attack_scope", 0.745], [2000, "physics/attack_scope", 0.85], [2500, "physics/attack_scope", 0.845], [3000, "physics/attack_scope", 0.815], [3500, "physics/attack_scope", 0.72], [4000, "physics/attack_scope", 0.745], [4500, "physics/attack_scope", 0.72], [5000, "physics/attack_scope", 0.755], [5500, "physics/attack_scope", 0.77], [6000, "physics/attack_scope", 0.76]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_fluency_table_913_96dd3762242b64ea2815.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", 0.885], [1000, "math/in_scope", 0.895], [1500, "math/in_scope", 0.875], [2000, "math/in_scope", 0.83], [2500, "math/in_scope", 0.86], [3000, "math/in_scope", 0.75], [3500, "math/in_scope", 0.765], [4000, "math/in_scope", 0.8], [4500, "math/in_scope", 0.77], [5000, "math/in_scope", 0.755], [5500, "math/in_scope", 0.81], [6000, "math/in_scope", 0.725], [6500, "math/in_scope", 0.755], [500, "physics/attack_scope", 0.51], [1000, "physics/attack_scope", 0.715], [1500, "physics/attack_scope", 0.745], [2000, "physics/attack_scope", 0.85], [2500, "physics/attack_scope", 0.845], [3000, "physics/attack_scope", 0.815], [3500, "physics/attack_scope", 0.72], [4000, "physics/attack_scope", 0.745], [4500, "physics/attack_scope", 0.72], [5000, "physics/attack_scope", 0.755], [5500, "physics/attack_scope", 0.77], [6000, "physics/attack_scope", 0.76], [6500, "physics/attack_scope", 0.775]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_fluency_table_984_4f82c1c9ddad62217707.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", 0.885], [1000, "math/in_scope", 0.895], [1500, "math/in_scope", 0.875], [2000, "math/in_scope", 0.83], [2500, "math/in_scope", 0.86], [3000, "math/in_scope", 0.75], [3500, "math/in_scope", 0.765], [4000, "math/in_scope", 0.8], [4500, "math/in_scope", 0.77], [5000, "math/in_scope", 0.755], [5500, "math/in_scope", 0.81], [6000, "math/in_scope", 0.725], [6500, "math/in_scope", 0.755], [7000, "math/in_scope", 0.745], [500, "physics/attack_scope", 0.51], [1000, "physics/attack_scope", 0.715], [1500, "physics/attack_scope", 0.745], [2000, "physics/attack_scope", 0.85], [2500, "physics/attack_scope", 0.845], [3000, "physics/attack_scope", 0.815], [3500, "physics/attack_scope", 0.72], [4000, "physics/attack_scope", 0.745], [4500, "physics/attack_scope", 0.72], [5000, "physics/attack_scope", 0.755], [5500, "physics/attack_scope", 0.77], [6000, "physics/attack_scope", 0.76], [6500, "physics/attack_scope", 0.775], [7000, "physics/attack_scope", 0.765]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_ground_truth_similarity_table_62_0a4adde2ad357263e181.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", 0.685], [500, "physics/attack_scope", 0.245]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_ground_truth_similarity_table_630_d53d8487574f1fce4344.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", 0.685], [1000, "math/in_scope", 0.69], [1500, "math/in_scope", 0.71], [2000, "math/in_scope", 0.63], [2500, "math/in_scope", 0.71], [3000, "math/in_scope", 0.62], [3500, "math/in_scope", 0.635], [4000, "math/in_scope", 0.63], [4500, "math/in_scope", 0.61], [500, "physics/attack_scope", 0.245], [1000, "physics/attack_scope", 0.34], [1500, "physics/attack_scope", 0.39], [2000, "physics/attack_scope", 0.395], [2500, "physics/attack_scope", 0.44], [3000, "physics/attack_scope", 0.43], [3500, "physics/attack_scope", 0.395], [4000, "physics/attack_scope", 0.38], [4500, "physics/attack_scope", 0.405]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_ground_truth_similarity_table_701_f1064f103daee387dbef.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", 0.685], [1000, "math/in_scope", 0.69], [1500, "math/in_scope", 0.71], [2000, "math/in_scope", 0.63], [2500, "math/in_scope", 0.71], [3000, "math/in_scope", 0.62], [3500, "math/in_scope", 0.635], [4000, "math/in_scope", 0.63], [4500, "math/in_scope", 0.61], [5000, "math/in_scope", 0.545], [500, "physics/attack_scope", 0.245], [1000, "physics/attack_scope", 0.34], [1500, "physics/attack_scope", 0.39], [2000, "physics/attack_scope", 0.395], [2500, "physics/attack_scope", 0.44], [3000, "physics/attack_scope", 0.43], [3500, "physics/attack_scope", 0.395], [4000, "physics/attack_scope", 0.38], [4500, "physics/attack_scope", 0.405], [5000, "physics/attack_scope", 0.43]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_ground_truth_similarity_table_772_2f2f6fbfd27737267a0d.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", 0.685], [1000, "math/in_scope", 0.69], [1500, "math/in_scope", 0.71], [2000, "math/in_scope", 0.63], [2500, "math/in_scope", 0.71], [3000, "math/in_scope", 0.62], [3500, "math/in_scope", 0.635], [4000, "math/in_scope", 0.63], [4500, "math/in_scope", 0.61], [5000, "math/in_scope", 0.545], [5500, "math/in_scope", 0.605], [500, "physics/attack_scope", 0.245], [1000, "physics/attack_scope", 0.34], [1500, "physics/attack_scope", 0.39], [2000, "physics/attack_scope", 0.395], [2500, "physics/attack_scope", 0.44], [3000, "physics/attack_scope", 0.43], [3500, "physics/attack_scope", 0.395], [4000, "physics/attack_scope", 0.38], [4500, "physics/attack_scope", 0.405], [5000, "physics/attack_scope", 0.43], [5500, "physics/attack_scope", 0.43]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_ground_truth_similarity_table_843_4e9fe9898744c743d126.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", 0.685], [1000, "math/in_scope", 0.69], [1500, "math/in_scope", 0.71], [2000, "math/in_scope", 0.63], [2500, "math/in_scope", 0.71], [3000, "math/in_scope", 0.62], [3500, "math/in_scope", 0.635], [4000, "math/in_scope", 0.63], [4500, "math/in_scope", 0.61], [5000, "math/in_scope", 0.545], [5500, "math/in_scope", 0.605], [6000, "math/in_scope", 0.605], [500, "physics/attack_scope", 0.245], [1000, "physics/attack_scope", 0.34], [1500, "physics/attack_scope", 0.39], [2000, "physics/attack_scope", 0.395], [2500, "physics/attack_scope", 0.44], [3000, "physics/attack_scope", 0.43], [3500, "physics/attack_scope", 0.395], [4000, "physics/attack_scope", 0.38], [4500, "physics/attack_scope", 0.405], [5000, "physics/attack_scope", 0.43], [5500, "physics/attack_scope", 0.43], [6000, "physics/attack_scope", 0.375]]}
run-20260503_070023-mw4gx9uu/files/media/table/charts/llm_judge_ground_truth_similarity_table_914_5741354b8d0daacf5ca6.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "lineKey", "lineVal"], "data": [[500, "math/in_scope", 0.685], [1000, "math/in_scope", 0.69], [1500, "math/in_scope", 0.71], [2000, "math/in_scope", 0.63], [2500, "math/in_scope", 0.71], [3000, "math/in_scope", 0.62], [3500, "math/in_scope", 0.635], [4000, "math/in_scope", 0.63], [4500, "math/in_scope", 0.61], [5000, "math/in_scope", 0.545], [5500, "math/in_scope", 0.605], [6000, "math/in_scope", 0.605], [6500, "math/in_scope", 0.595], [500, "physics/attack_scope", 0.245], [1000, "physics/attack_scope", 0.34], [1500, "physics/attack_scope", 0.39], [2000, "physics/attack_scope", 0.395], [2500, "physics/attack_scope", 0.44], [3000, "physics/attack_scope", 0.43], [3500, "physics/attack_scope", 0.395], [4000, "physics/attack_scope", 0.38], [4500, "physics/attack_scope", 0.405], [5000, "physics/attack_scope", 0.43], [5500, "physics/attack_scope", 0.43], [6000, "physics/attack_scope", 0.375], [6500, "physics/attack_scope", 0.46]]}