yuccaaa commited on
Commit
9440cb3
·
verified ·
1 Parent(s): c633a73

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. EasyR1-new/examples/baselines/qwen2_5_vl_3b_clevr.sh +19 -0
  2. EasyR1-new/examples/baselines/qwen2_5_vl_3b_geoqa8k.sh +19 -0
  3. EasyR1-new/examples/format_prompt/bio_format.jinja +2 -0
  4. EasyR1-new/examples/format_prompt/dapo.jinja +1 -0
  5. EasyR1-new/examples/format_prompt/math.jinja +1 -0
  6. EasyR1-new/examples/format_prompt/r1v.jinja +1 -0
  7. EasyR1-new/examples/qwen2_5_vl_7b_multi_image.sh +18 -0
  8. EasyR1-new/examples/qwen3_14b_dapo17k_dapo.sh +43 -0
  9. EasyR1-new/examples/qwen3_4b_math_grpo.sh +13 -0
  10. EasyR1-new/examples/reward_function/__pycache__/bio.cpython-310.pyc +0 -0
  11. EasyR1-new/examples/reward_function/bio.py +183 -0
  12. EasyR1-new/examples/reward_function/dapo.py +163 -0
  13. EasyR1-new/examples/reward_function/math.py +49 -0
  14. EasyR1-new/examples/reward_function/r1v.py +50 -0
  15. EasyR1-new/examples/runtime_env.yaml +9 -0
  16. EasyR1-new/examples/wandb/debug-internal.log +13 -0
  17. EasyR1-new/examples/wandb/debug.log +28 -0
  18. EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/files/output.log +2 -0
  19. EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/files/requirements.txt +295 -0
  20. EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/files/wandb-metadata.json +71 -0
  21. EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/logs/debug-internal.log +6 -0
  22. EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/logs/debug.log +21 -0
  23. EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/run-lkflebyj.wandb +0 -0
  24. EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/config.yaml +322 -0
  25. EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/output.log +72 -0
  26. EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/requirements.txt +295 -0
  27. EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/wandb-metadata.json +92 -0
  28. EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/wandb-summary.json +1 -0
  29. EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/logs/debug-internal.log +15 -0
  30. EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/logs/debug.log +28 -0
  31. EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/run-nji9xqxs.wandb +0 -0
  32. EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/files/output.log +0 -0
  33. EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/files/requirements.txt +295 -0
  34. EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/files/wandb-metadata.json +36 -0
  35. EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/logs/debug-internal.log +6 -0
  36. EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/logs/debug.log +21 -0
  37. EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/run-wmarwr6l.wandb +0 -0
  38. EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/files/output.log +2 -0
  39. EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/files/requirements.txt +295 -0
  40. EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/files/wandb-metadata.json +92 -0
  41. EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/logs/debug-internal.log +6 -0
  42. EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/logs/debug.log +21 -0
  43. EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/run-zkytrm61.wandb +0 -0
  44. EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/files/output.log +2 -0
  45. EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/files/requirements.txt +295 -0
  46. EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/files/wandb-metadata.json +92 -0
  47. EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/logs/debug-internal.log +6 -0
  48. EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/logs/debug.log +21 -0
  49. EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/run-eo9xzqez.wandb +0 -0
  50. EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/files/output.log +2 -0
EasyR1-new/examples/baselines/qwen2_5_vl_3b_clevr.sh ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ set -x
4
+
5
+ export PYTHONUNBUFFERED=1
6
+
7
+ MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct # replace it with your local file path
8
+
9
+ python3 -m verl.trainer.main \
10
+ config=examples/config.yaml \
11
+ data.train_files=BUAADreamer/clevr_count_70k@train \
12
+ data.val_files=BUAADreamer/clevr_count_70k@test \
13
+ data.format_prompt=./examples/format_prompt/r1v.jinja \
14
+ worker.actor.model.model_path=${MODEL_PATH} \
15
+ worker.rollout.tensor_parallel_size=1 \
16
+ worker.reward.reward_type=sequential \
17
+ worker.reward.reward_function=./examples/reward_function/r1v.py:compute_score \
18
+ trainer.experiment_name=qwen2_5_vl_3b_clevr \
19
+ trainer.n_gpus_per_node=2
EasyR1-new/examples/baselines/qwen2_5_vl_3b_geoqa8k.sh ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ set -x
4
+
5
+ export PYTHONUNBUFFERED=1
6
+
7
+ MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct # replace it with your local file path
8
+
9
+ python3 -m verl.trainer.main \
10
+ config=examples/config.yaml \
11
+ data.train_files=leonardPKU/GEOQA_8K_R1V@train \
12
+ data.val_files=leonardPKU/GEOQA_8K_R1V@test \
13
+ data.format_prompt=./examples/format_prompt/r1v.jinja \
14
+ worker.actor.model.model_path=${MODEL_PATH} \
15
+ worker.rollout.tensor_parallel_size=1 \
16
+ worker.reward.reward_type=sequential \
17
+ worker.reward.reward_function=./examples/reward_function/r1v.py:compute_score \
18
+ trainer.experiment_name=qwen2_5_vl_3b_geoqa8k \
19
+ trainer.n_gpus_per_node=8
EasyR1-new/examples/format_prompt/bio_format.jinja ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ {{ content | trim }} You must first reason through the question step by step, as if you're thinking aloud. Enclose your full reasoning process within <think> </think> tags. After your reasoning, output only the number corresponding to the final answer choice inside <answer> </answer> tags.For example:<think> reasoning process </think> <answer>result number</answer>
2
+
EasyR1-new/examples/format_prompt/dapo.jinja ADDED
@@ -0,0 +1 @@
 
 
1
+ Solve the following math problem step by step. The last line of your response should be of the form Answer: $Answer (without quotes) where $Answer is the answer to the problem.\n\n{{ content | trim }}\n\nRemember to put your answer on its own line after "Answer:".
EasyR1-new/examples/format_prompt/math.jinja ADDED
@@ -0,0 +1 @@
 
 
1
+ {{ content | trim }} You FIRST think about the reasoning process as an internal monologue and then provide the final answer. The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}.
EasyR1-new/examples/format_prompt/r1v.jinja ADDED
@@ -0,0 +1 @@
 
 
1
+ {{ content | trim }} A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>
EasyR1-new/examples/qwen2_5_vl_7b_multi_image.sh ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # REMINDER: this script uses test data split and should ONLY be used for debugging. DO NOT use for training.
3
+
4
+ set -x
5
+
6
+ export PYTHONUNBUFFERED=1
7
+
8
+ MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct # replace it with your local file path
9
+
10
+ python3 -m verl.trainer.main \
11
+ config=examples/config.yaml \
12
+ data.train_files=hiyouga/journeybench-multi-image-vqa@train \
13
+ data.val_files=hiyouga/journeybench-multi-image-vqa@test \
14
+ data.rollout_batch_size=256 \
15
+ worker.actor.model.model_path=${MODEL_PATH} \
16
+ worker.rollout.limit_images=2 \
17
+ trainer.experiment_name=qwen2_5_vl_7b_multi_image \
18
+ trainer.n_gpus_per_node=8
EasyR1-new/examples/qwen3_14b_dapo17k_dapo.sh ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ set -x
4
+
5
+ export PYTHONUNBUFFERED=1
6
+
7
+ MODEL_PATH=Qwen/Qwen3-14B-Base # replace it with your local file path
8
+
9
+ python3 -m verl.trainer.main \
10
+ config=examples/config.yaml \
11
+ data.train_files=Saigyouji-Yuyuko1000/dapo17k@train \
12
+ data.val_files=Saigyouji-Yuyuko1000/dapo17k@test \
13
+ data.format_prompt=./examples/format_prompt/dapo.jinja \
14
+ data.max_prompt_length=2048 \
15
+ data.max_response_length=20480 \
16
+ data.rollout_batch_size=512 \
17
+ data.mini_rollout_batch_size=256 \
18
+ worker.actor.micro_batch_size_per_device_for_update=1 \
19
+ worker.actor.micro_batch_size_per_device_for_experience=8 \
20
+ worker.actor.model.model_path=${MODEL_PATH} \
21
+ worker.actor.fsdp.torch_dtype=bf16 \
22
+ worker.actor.optim.strategy=adamw_bf16 \
23
+ worker.actor.optim.weight_decay=0.1 \
24
+ worker.actor.optim.lr_warmup_steps=10 \
25
+ worker.actor.global_batch_size=32 \
26
+ worker.actor.clip_ratio_low=0.2 \
27
+ worker.actor.clip_ratio_high=0.28 \
28
+ worker.actor.clip_ratio_dual=10.0 \
29
+ worker.rollout.n=16 \
30
+ worker.rollout.max_num_batched_tokens=22528 \
31
+ worker.rollout.val_override_config='{"n":16,"temperature":1.0,"top_p":0.7}' \
32
+ worker.rollout.gpu_memory_utilization=0.8 \
33
+ worker.reward.reward_function=./examples/reward_function/dapo.py:compute_score \
34
+ worker.reward.reward_function_kwargs='{"max_response_length":20480,"overlong_buffer_length":4096,"overlong_penalty_factor":1.0}' \
35
+ algorithm.disable_kl=True \
36
+ algorithm.online_filtering=True \
37
+ algorithm.filter_key=accuracy_normalized \
38
+ algorithm.filter_low=0.01 \
39
+ algorithm.filter_high=0.99 \
40
+ trainer.total_epochs=10 \
41
+ trainer.max_try_make_batch=10 \
42
+ trainer.experiment_name=qwen3_14b_dapo17k_dapo \
43
+ trainer.n_gpus_per_node=8
EasyR1-new/examples/qwen3_4b_math_grpo.sh ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ set -x
4
+
5
+ export PYTHONUNBUFFERED=1
6
+
7
+ MODEL_PATH=Qwen/Qwen3-4B # replace it with your local file path
8
+
9
+ python3 -m verl.trainer.main \
10
+ config=examples/config.yaml \
11
+ data.max_response_length=4096 \
12
+ worker.actor.model.model_path=${MODEL_PATH} \
13
+ trainer.experiment_name=qwen3_4b_math_grpo
EasyR1-new/examples/reward_function/__pycache__/bio.cpython-310.pyc ADDED
Binary file (4.5 kB). View file
 
EasyR1-new/examples/reward_function/bio.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+
4
+ from itertools import islice, zip_longest
5
+ from typing import Callable, Dict, List, Optional, Tuple, TypedDict
6
+ import json
7
+
8
+ def repeatness_reward(s: str):
9
+ def ranks(l):
10
+ index = {v: i for i, v in enumerate(sorted(set(l)))}
11
+ return [index[v] for v in l]
12
+
13
+ def suffixArray(s):
14
+ line = ranks(s)
15
+ n, k, ans, sa = len(s), 1, line, [0] * len(s)
16
+ while k < n - 1:
17
+ line = ranks(list(zip_longest(line, islice(line, k, None), fillvalue=-1)))
18
+ ans, k = line, k << 1
19
+ for i, k in enumerate(ans):
20
+ sa[k] = i
21
+ return ans, sa
22
+
23
+ def lcp(arr, suffixArr, inv_suff):
24
+ n, ans, k = len(arr), [0] * len(arr), 0
25
+
26
+ for i in range(n):
27
+ if inv_suff[i] == n - 1:
28
+ k = 0
29
+ continue
30
+
31
+ j = suffixArr[inv_suff[i] + 1]
32
+ while i + k < n and j + k < n and arr[i + k] == arr[j + k]:
33
+ k += 1
34
+
35
+ ans[inv_suff[i]] = k
36
+ if k > 0:
37
+ k -= 1
38
+
39
+ return ans
40
+
41
+ arr = [ord(i) for i in s]
42
+ n = len(arr)
43
+ if n <= 1:
44
+ return 0
45
+ c, sa = suffixArray(arr)
46
+ cnt = sum(lcp(arr, sa, c))
47
+
48
+ return 1 - cnt * 2 / (n * (n + 1))
49
+
50
+ import re
51
+
52
+ def format_reward(predict_str: str) -> float:
53
+ """
54
+ 格式奖励函数,严格要求输出格式为:
55
+ <think>...</think><answer>...</answer>
56
+ 中间不能有多余内容
57
+ """
58
+ pattern = r'^<think>.*?</think>\s*<answer>\s*([0-9])\s*</answer>$'
59
+ return 1.0 if re.fullmatch(pattern, predict_str.strip(), re.DOTALL) else 0.0
60
+
61
+ def acc_reward(predict_str: str, ground_truth: str) -> float:
62
+ """
63
+ 准确率奖励函数
64
+ 要求<answer>中内容与ground_truth完全一致(顺序、空格等)
65
+ """
66
+ match = re.search(r'<answer>\s*([0-9])\s*</answer>', predict_str)
67
+ if not match:
68
+ return 0.0
69
+ answer_content = match.group(1)
70
+ # print(ground_truth)
71
+ # print(answer_content)
72
+ # print(int(answer_content) == ground_truth)
73
+ # print("ground_truth 类型:", type(ground_truth))
74
+ # print("answer_content 类型:", type(answer_content))
75
+ # print("========")
76
+ if int(answer_content) == ground_truth:
77
+ return 1.0
78
+ else:
79
+ return 0.0
80
+ # return 1.0 if answer_content == ground_truth else 0.0
81
+ # match = re.search(r'<answer>(.*?)</answer>', predict_str, re.DOTALL)
82
+ # if not match:
83
+ # return 0.0
84
+ # answer_content = match.group(1).strip()
85
+ # return 1.0 if answer_content == ground_truth else 0.0
86
+
87
+ # def compute_score( solution_str: str, ground_truth: str, extra_info):
88
+ # """
89
+ # 综合评分函数
90
+ # """
91
+ def compute_score(predicts: List[str], ground_truths: List[str], format_weight: float = 0.1) -> List[Dict[str, float]]:
92
+ scores = []
93
+ save_path="/nas/shared/kilab/wangyujia/check_rl/result-06170934.jsonl"
94
+ with open(save_path, "w", encoding="utf-8") as f:
95
+ for solution_str, ground_truth in zip(predicts, ground_truths):
96
+ format_score = format_reward(solution_str)
97
+ acc_score = acc_reward(solution_str, ground_truth)
98
+
99
+ # 提取<think>内容
100
+ think_match = re.search(r'<think>(.*?)</think>', solution_str, re.DOTALL)
101
+ think_str = think_match.group(1).strip() if think_match else ""
102
+ repeat_score = repeatness_reward(think_str)
103
+
104
+ scores.append(
105
+ {
106
+ "overall": format_score + acc_score + repeat_score,
107
+ "format": format_score,
108
+ "accuracy": acc_score,
109
+ "repeat" : repeat_score,
110
+ }
111
+ )
112
+
113
+ # 写入 JSONL 文件
114
+ f.write(json.dumps({
115
+ "solution_str": solution_str,
116
+ "ground_truth": ground_truth,
117
+ "overall": format_score + acc_score + repeat_score,
118
+ "format": format_score,
119
+ "accuracy": acc_score,
120
+ "repeat" : repeat_score,
121
+ }, ensure_ascii=False) + "\n")
122
+
123
+ # 加权综合评分(格式占30%,准确率占70%)
124
+ # 合成字典
125
+ # total_score = {
126
+ # "format_score": format_score,
127
+ # "acc_score": acc_score,
128
+ # "repeat_score": repeat_score,
129
+ # "total_score": format_score + acc_score + repeat_score
130
+ # }
131
+ #total_score=format_score + acc_score + repeat_score
132
+
133
+ return scores
134
+
135
+
136
+ # print(format_reward("<think>Step-by-step logic</think> <answer> 5 </answer>"))
137
+ # print(format_reward("<think>Something\nacross lines</think>\n<answer> 0 </answer>"))
138
+
139
+ # print(format_reward("No tags here"))
140
+ # print(format_reward("<think>OK</think><answer>12</answer>")) # 多位数字
141
+ # print(format_reward("<think>OK</think><answer>A</answer>")) # 字母不允许
142
+ # print(format_reward("<think>Yes</think><answer> </answer>")) # 空的答案
143
+ # print(format_reward("<think>OK</think><answer>3</answer>extra")) # 多余内容
144
+ # print(format_reward("<answer>3</answer><think>Reasoning</think>")) # 标签顺序错误
145
+
146
+ # print(acc_reward("<think>Step-by-step logic</think> <answer> 5 </answer>",'5'))
147
+ # print(acc_reward("<think>Something\nacross lines</think>\n<answer> 0 </answer>",'1'))
148
+
149
+
150
+ # str_="<think>\nThe protein name is P32783, the protein amino acid sequence is MSTKPEKPIWMSQEDYDRQYGSITGDESSTVSKKDSKVTANAPGDGNGSLPVLQSSSILTSKVSDLPIEAESGFKIQKRRHERYDQEERLRKQRAQKLREEQLKRHEIEMTANRSINVDQIVREHYNERTIIANRAKRNLSPIIKLRNFNNAIKYMLIDKYTKPGDVVLELGCGKGGDLRKYGAAGISQFIGIDISNASIQEAHKRYRSMRNLDYQVVLITGDCFGESLGVAVEPFPDCRFPCDIVSTQFCLHYAFETEEKARRALLNVAKSLKIGGHFFGTIPDSEFIRYKLNKFPKEVEKPSWGNSIYKVTFENNSYQKNDYEFTSPYGQMYTYWLEDAIDNVPEYVVPFETLRSLADEYGLELVSQMPFNKFFVQEIPKWIERFSPKMREGLQRSDGRYGVEGDEKEAASYFYTMFAFRKVKQYIEPESVKPN, the protein localization prediction for P32783 is Cell.membrane,M, so the location label is 4. Therefore, option 4 is the correct answer.\n</think>\n<answer>\n4\n</answer>"
151
+ # print(format_reward(str_))
152
+
153
+
154
+
155
+ def check_rewards(jsonl_path: str) -> List[Dict[str, float]]:
156
+ results = []
157
+ with open(jsonl_path, "r", encoding="utf-8") as f:
158
+ for line in f:
159
+ data = json.loads(line)
160
+ solution_str = data["solution_str"]
161
+ ground_truth = data["ground_truth"]
162
+
163
+ # 重新计算三个分数
164
+ format_score = format_reward(solution_str)
165
+ acc_score = acc_reward(solution_str, ground_truth)
166
+ think_match = re.search(r'<think>(.*?)</think>', solution_str, re.DOTALL)
167
+ think_str = think_match.group(1).strip() if think_match else ""
168
+ repeat_score = repeatness_reward(think_str)
169
+
170
+ total_score = format_score + acc_score + repeat_score
171
+
172
+
173
+ result = {
174
+ "format": format_score,
175
+ "accuracy": acc_score,
176
+ "repeat": repeat_score,
177
+ "overall": total_score,
178
+ }
179
+ # results.append(result)
180
+
181
+ print(json.dumps(result, indent=2, ensure_ascii=False))
182
+
183
+ check_rewards("/nas/shared/kilab/wangyujia/check_rl/check.jsonl")
EasyR1-new/examples/reward_function/dapo.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024 Bytedance Ltd. and/or its affiliates
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import re
16
+ from typing import Any, Dict, List
17
+
18
+
19
+ # Constants for normalization
20
+ SUBSTITUTIONS = [
21
+ ("an ", ""),
22
+ ("a ", ""),
23
+ (".$", "$"),
24
+ ("\\$", ""),
25
+ (r"\ ", ""),
26
+ (" ", ""),
27
+ ("mbox", "text"),
28
+ (",\\text{and}", ","),
29
+ ("\\text{and}", ","),
30
+ ("\\text{m}", "\\text{}"),
31
+ ]
32
+
33
+ REMOVED_EXPRESSIONS = [
34
+ "square",
35
+ "ways",
36
+ "integers",
37
+ "dollars",
38
+ "mph",
39
+ "inches",
40
+ "hours",
41
+ "km",
42
+ "units",
43
+ "\\ldots",
44
+ "sue",
45
+ "points",
46
+ "feet",
47
+ "minutes",
48
+ "digits",
49
+ "cents",
50
+ "degrees",
51
+ "cm",
52
+ "gm",
53
+ "pounds",
54
+ "meters",
55
+ "meals",
56
+ "edges",
57
+ "students",
58
+ "childrentickets",
59
+ "multiples",
60
+ "\\text{s}",
61
+ "\\text{.}",
62
+ "\\text{\ns}",
63
+ "\\text{}^2",
64
+ "\\text{}^3",
65
+ "\\text{\n}",
66
+ "\\text{}",
67
+ r"\mathrm{th}",
68
+ r"^\circ",
69
+ r"^{\circ}",
70
+ r"\;",
71
+ r",\!",
72
+ "{,}",
73
+ '"',
74
+ "\\dots",
75
+ ]
76
+
77
+
78
+ def normalize_final_answer(final_answer: str) -> str:
79
+ """Normalize a final answer to a quantitative reasoning question.
80
+
81
+ Args:
82
+ final_answer: The answer string to normalize
83
+
84
+ Returns:
85
+ Normalized answer string
86
+ """
87
+ final_answer = final_answer.split("=")[-1]
88
+
89
+ # Apply substitutions and removals
90
+ for before, after in SUBSTITUTIONS:
91
+ final_answer = final_answer.replace(before, after)
92
+ for expr in REMOVED_EXPRESSIONS:
93
+ final_answer = final_answer.replace(expr, "")
94
+
95
+ # Extract and normalize LaTeX math
96
+ final_answer = re.sub(r"(.*?)(\$)(.*?)(\$)(.*)", "$\\3$", final_answer)
97
+ final_answer = re.sub(r"(\\text\{)(.*?)(\})", "\\2", final_answer)
98
+ final_answer = re.sub(r"(\\textbf\{)(.*?)(\})", "\\2", final_answer)
99
+ final_answer = re.sub(r"(\\overline\{)(.*?)(\})", "\\2", final_answer)
100
+ final_answer = re.sub(r"(\\boxed\{)(.*)(\})", "\\2", final_answer)
101
+
102
+ # Normalize shorthand TeX:
103
+ # \fracab -> \frac{a}{b}
104
+ # \frac{abc}{bef} -> \frac{abc}{bef}
105
+ # \fracabc -> \frac{a}{b}c
106
+ # \sqrta -> \sqrt{a}
107
+ # \sqrtab -> sqrt{a}b
108
+ final_answer = re.sub(r"(frac)([^{])(.)", "frac{\\2}{\\3}", final_answer)
109
+ final_answer = re.sub(r"(sqrt)([^{])", "sqrt{\\2}", final_answer)
110
+ final_answer = final_answer.replace("$", "")
111
+
112
+ # Normalize numbers
113
+ if final_answer.replace(",", "").isdigit():
114
+ final_answer = final_answer.replace(",", "")
115
+
116
+ return final_answer.strip()
117
+
118
+
119
+ def accuracy_reward(response: str, ground_truth: str) -> float:
120
+ match = re.findall(r"(?i)Answer\s*:\s*([^\n]+)", response)
121
+ answer = match[-1] if match else "[INVALID]"
122
+ if normalize_final_answer(answer) == normalize_final_answer(ground_truth):
123
+ return 1.0
124
+ else:
125
+ return -1.0
126
+
127
+
128
+ def soft_overlong_punishment(response_length: int, max_response_length: int, overlong_buffer_length: int):
129
+ expected_len = max_response_length - overlong_buffer_length
130
+ if response_length <= expected_len:
131
+ return 0.0
132
+ elif response_length <= max_response_length:
133
+ return (expected_len - response_length) / overlong_buffer_length
134
+ else:
135
+ return -1.0
136
+
137
+
138
+ def compute_score(
139
+ reward_inputs: List[Dict[str, Any]],
140
+ max_response_length: int,
141
+ overlong_buffer_length: int,
142
+ overlong_penalty_factor: float,
143
+ ) -> List[Dict[str, float]]:
144
+ if not isinstance(reward_inputs, list):
145
+ raise ValueError("Please use `reward_type=batch` for dapo reward function.")
146
+
147
+ scores = []
148
+ for reward_input in reward_inputs:
149
+ response = reward_input["response"][-300:] # The longest answer in MATH-500 has 159 characters
150
+ accuracy_score = accuracy_reward(response, reward_input["ground_truth"])
151
+ overlong_score = soft_overlong_punishment(
152
+ reward_input["response_length"], max_response_length, overlong_buffer_length
153
+ )
154
+ scores.append(
155
+ {
156
+ "overall": accuracy_score + overlong_score * overlong_penalty_factor,
157
+ "accuracy": accuracy_score,
158
+ "overlong": overlong_score,
159
+ "accuracy_normalized": 0.5 * (accuracy_score + 1.0),
160
+ }
161
+ )
162
+
163
+ return scores
EasyR1-new/examples/reward_function/math.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024 Bytedance Ltd. and/or its affiliates
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import re
16
+ from typing import Any, Dict, List
17
+
18
+ from mathruler.grader import extract_boxed_content, grade_answer
19
+
20
+
21
+ def format_reward(response: str) -> float:
22
+ pattern = re.compile(r"<think>.*</think>.*\\boxed\{.*\}.*", re.DOTALL)
23
+ format_match = re.fullmatch(pattern, response)
24
+ return 1.0 if format_match else 0.0
25
+
26
+
27
+ def accuracy_reward(response: str, ground_truth: str) -> float:
28
+ answer = extract_boxed_content(response)
29
+ return 1.0 if grade_answer(answer, ground_truth) else 0.0
30
+
31
+
32
+ def compute_score(reward_inputs: List[Dict[str, Any]], format_weight: float = 0.1) -> List[Dict[str, float]]:
33
+ if not isinstance(reward_inputs, list):
34
+ raise ValueError("Please use `reward_type=batch` for math reward function.")
35
+
36
+ scores = []
37
+ for reward_input in reward_inputs:
38
+ response = re.sub(r"\s*(<|>|/)\s*", r"\1", reward_input["response"]) # handle qwen2.5vl-32b format
39
+ format_score = format_reward(response)
40
+ accuracy_score = accuracy_reward(response, reward_input["ground_truth"])
41
+ scores.append(
42
+ {
43
+ "overall": (1 - format_weight) * accuracy_score + format_weight * format_score,
44
+ "format": format_score,
45
+ "accuracy": accuracy_score,
46
+ }
47
+ )
48
+
49
+ return scores
EasyR1-new/examples/reward_function/r1v.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024 Bytedance Ltd. and/or its affiliates
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import re
16
+ from typing import Any, Dict
17
+
18
+ from mathruler.grader import grade_answer
19
+
20
+
21
+ def format_reward(response: str) -> float:
22
+ pattern = re.compile(r"<think>.*?</think>\s*<answer>.*?</answer>", re.DOTALL)
23
+ format_match = re.fullmatch(pattern, response)
24
+ return 1.0 if format_match else 0.0
25
+
26
+
27
+ def accuracy_reward(response: str, ground_truth: str) -> float:
28
+ try:
29
+ content_match = re.search(r"<answer>(.*?)</answer>", response)
30
+ given_answer = content_match.group(1).strip() if content_match else response.strip()
31
+ if grade_answer(given_answer, ground_truth.strip()):
32
+ return 1.0
33
+
34
+ except Exception:
35
+ pass
36
+
37
+ return 0.0
38
+
39
+
40
+ def compute_score(reward_input: Dict[str, Any], format_weight: float = 0.5) -> Dict[str, float]:
41
+ if not isinstance(reward_input, dict):
42
+ raise ValueError("Please use `reward_type=sequential` for r1v reward function.")
43
+
44
+ format_score = format_reward(reward_input["response"])
45
+ accuracy_score = accuracy_reward(reward_input["response"], reward_input["ground_truth"])
46
+ return {
47
+ "overall": (1 - format_weight) * accuracy_score + format_weight * format_score,
48
+ "format": format_score,
49
+ "accuracy": accuracy_score,
50
+ }
EasyR1-new/examples/runtime_env.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ working_dir: ./
2
+ excludes: ["/.git/"]
3
+ env_vars:
4
+ TOKENIZERS_PARALLELISM: "true"
5
+ NCCL_DEBUG: "WARN"
6
+ VLLM_LOGGING_LEVEL: "WARN"
7
+ TORCH_NCCL_AVOID_RECORD_STREAMS: "1"
8
+ PYTORCH_CUDA_ALLOC_CONF: "expandable_segments:False"
9
+ PYTHONUNBUFFERED: "1"
EasyR1-new/examples/wandb/debug-internal.log ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-07-21T14:07:35.211628547+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
2
+ {"time":"2025-07-21T14:07:50.875611638+08:00","level":"INFO","msg":"stream: created new stream","id":"a9qblh0u"}
3
+ {"time":"2025-07-21T14:07:50.876588753+08:00","level":"INFO","msg":"stream: started","id":"a9qblh0u"}
4
+ {"time":"2025-07-21T14:07:50.87663237+08:00","level":"INFO","msg":"sender: started","stream_id":"a9qblh0u"}
5
+ {"time":"2025-07-21T14:07:50.876605114+08:00","level":"INFO","msg":"handler: started","stream_id":"a9qblh0u"}
6
+ {"time":"2025-07-21T14:07:50.87665507+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"a9qblh0u"}
7
+ {"time":"2025-07-21T14:08:05.783504415+08:00","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"updating run metadata","runtime_seconds":7.434542791},{"desc":"uploading data","runtime_seconds":0.571568597}],"total_operations":2}}
8
+ {"time":"2025-07-21T14:08:31.955353631+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
+ {"time":"2025-07-21T14:08:56.48244624+08:00","level":"INFO","msg":"stream: closing","id":"a9qblh0u"}
10
+ {"time":"2025-07-21T14:08:56.48558812+08:00","level":"INFO","msg":"handler: closed","stream_id":"a9qblh0u"}
11
+ {"time":"2025-07-21T14:08:56.485598269+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"a9qblh0u"}
12
+ {"time":"2025-07-21T14:08:56.485607803+08:00","level":"INFO","msg":"sender: closed","stream_id":"a9qblh0u"}
13
+ {"time":"2025-07-21T14:08:56.50017009+08:00","level":"INFO","msg":"stream: closed","id":"a9qblh0u"}
EasyR1-new/examples/wandb/debug.log ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-07-21 14:07:34,952 INFO MainThread:317976 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
2
+ 2025-07-21 14:07:34,952 INFO MainThread:317976 [wandb_setup.py:_flush():80] Configure stats pid to 317976
3
+ 2025-07-21 14:07:34,952 INFO MainThread:317976 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
4
+ 2025-07-21 14:07:34,952 INFO MainThread:317976 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
5
+ 2025-07-21 14:07:34,952 INFO MainThread:317976 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-07-21 14:07:34,952 INFO MainThread:317976 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/logs/debug.log
7
+ 2025-07-21 14:07:34,952 INFO MainThread:317976 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/logs/debug-internal.log
8
+ 2025-07-21 14:07:34,952 INFO MainThread:317976 [wandb_init.py:init():830] calling init triggers
9
+ 2025-07-21 14:07:34,953 INFO MainThread:317976 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
10
+ config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
11
+ 2025-07-21 14:07:34,953 INFO MainThread:317976 [wandb_init.py:init():871] starting backend
12
+ 2025-07-21 14:07:35,172 INFO MainThread:317976 [wandb_init.py:init():874] sending inform_init request
13
+ 2025-07-21 14:07:35,174 INFO MainThread:317976 [wandb_init.py:init():882] backend started and connected
14
+ 2025-07-21 14:07:35,186 INFO MainThread:317976 [wandb_init.py:init():953] updated telemetry
15
+ 2025-07-21 14:07:35,302 INFO MainThread:317976 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
16
+ 2025-07-21 14:07:58,269 INFO MainThread:317976 [wandb_init.py:init():1029] starting run threads in backend
17
+ 2025-07-21 14:07:58,556 INFO MainThread:317976 [wandb_run.py:_console_start():2458] atexit reg
18
+ 2025-07-21 14:07:58,556 INFO MainThread:317976 [wandb_run.py:_redirect():2306] redirect: wrap_raw
19
+ 2025-07-21 14:07:58,562 INFO MainThread:317976 [wandb_run.py:_redirect():2375] Wrapping output streams.
20
+ 2025-07-21 14:07:58,562 INFO MainThread:317976 [wandb_run.py:_redirect():2398] Redirects installed.
21
+ 2025-07-21 14:07:58,574 INFO MainThread:317976 [wandb_init.py:init():1075] run started, returning control to user process
22
+ 2025-07-21 14:08:04,748 INFO MainThread:317976 [wandb_run.py:_finish():2224] finishing run gia0603yucca/easy_r1/a9qblh0u
23
+ 2025-07-21 14:08:04,755 INFO MainThread:317976 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0
24
+ 2025-07-21 14:08:04,767 INFO MainThread:317976 [wandb_run.py:_restore():2405] restore
25
+ 2025-07-21 14:08:04,771 INFO MainThread:317976 [wandb_run.py:_restore():2411] restore done
26
+ 2025-07-21 14:08:56,463 INFO MainThread:317976 [wandb_run.py:_footer_history_summary_info():3903] rendering history
27
+ 2025-07-21 14:08:56,469 INFO MainThread:317976 [wandb_run.py:_footer_history_summary_info():3935] rendering summary
28
+ 2025-07-21 14:08:56,469 INFO MainThread:317976 [wandb_run.py:_footer_sync_info():3864] logging synced files
EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/files/output.log ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ {"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.6.45", "pid": 7695, "uuid": "4931688589ea40edb6b0579192261e95", "closed": false}
2
+ Start validation...
EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/files/requirements.txt ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ setproctitle==1.2.2
2
+ colorama==0.4.6
3
+ psutil==7.0.0
4
+ numpy==2.2.6
5
+ pylatexenc==2.10
6
+ webdataset==1.0.2
7
+ email_validator==2.2.0
8
+ confection==0.1.5
9
+ text-unidecode==1.3
10
+ python-dotenv==1.1.1
11
+ starlette==0.47.1
12
+ pyasn1==0.6.1
13
+ contexttimer==0.3.3
14
+ requests==2.32.4
15
+ omegaconf==2.3.0
16
+ tzdata==2025.2
17
+ yarl==1.20.1
18
+ nvidia-cuda-nvrtc-cu12==12.4.127
19
+ decord==0.6.0
20
+ nvidia-cublas-cu12==12.4.5.8
21
+ proto-plus==1.26.1
22
+ opentelemetry-semantic-conventions-ai==0.4.11
23
+ scipy==1.15.3
24
+ googleapis-common-protos==1.70.0
25
+ nvidia-cufile-cu12==1.11.1.6
26
+ parso==0.8.4
27
+ opentelemetry-exporter-otlp-proto-http==1.26.0
28
+ vllm==0.8.5.post1
29
+ sniffio==1.3.1
30
+ python-dateutil==2.9.0.post0
31
+ openai==1.90.0
32
+ absl-py==2.3.1
33
+ Deprecated==1.2.18
34
+ cupy-cuda12x==13.5.1
35
+ setuptools==78.1.1
36
+ peft==0.16.0
37
+ rignore==0.6.2
38
+ joblib==1.5.1
39
+ platformdirs==4.3.8
40
+ regex==2024.11.6
41
+ datasets==4.0.0
42
+ preshed==3.0.10
43
+ aiohappyeyeballs==2.6.1
44
+ uvloop==0.21.0
45
+ sentry-sdk==2.32.0
46
+ virtualenv==20.31.2
47
+ lazy_loader==0.4
48
+ rich==14.0.0
49
+ pycocotools==2.0.10
50
+ timm==0.4.12
51
+ rich-toolkit==0.14.8
52
+ fastapi-cli==0.0.8
53
+ antlr4-python3-runtime==4.9.3
54
+ salesforce-lavis==1.0.2
55
+ Pygments==2.19.2
56
+ gitdb==4.0.12
57
+ six==1.17.0
58
+ verl==0.3.2.dev0
59
+ smmap==5.0.2
60
+ fastapi-cloud-cli==0.1.4
61
+ opencensus==0.11.4
62
+ annotated-types==0.7.0
63
+ xxhash==3.5.0
64
+ frozenlist==1.7.0
65
+ pyzmq==27.0.0
66
+ Jinja2==3.1.6
67
+ ptyprocess==0.7.0
68
+ interegular==0.3.3
69
+ opentelemetry-semantic-conventions==0.47b0
70
+ jiter==0.10.0
71
+ idna==3.10
72
+ typing_extensions==4.14.1
73
+ nvidia-cusolver-cu12==11.6.1.9
74
+ propcache==0.3.2
75
+ nest-asyncio==1.6.0
76
+ pillow==11.3.0
77
+ tenacity==9.1.2
78
+ sentencepiece==0.2.0
79
+ portalocker==3.2.0
80
+ matplotlib-inline==0.1.7
81
+ pandas==2.3.1
82
+ compressed-tensors==0.9.3
83
+ typing-inspection==0.4.1
84
+ nltk==3.9.1
85
+ opencv-python-headless==4.12.0.88
86
+ dnspython==2.7.0
87
+ tokenizers==0.21.2
88
+ wheel==0.45.1
89
+ python-multipart==0.0.20
90
+ catalogue==2.0.10
91
+ smart_open==7.3.0.post1
92
+ multidict==6.6.3
93
+ xgrammar==0.1.18
94
+ aiosignal==1.4.0
95
+ pybase64==1.4.1
96
+ blake3==1.0.5
97
+ certifi==2025.7.14
98
+ torchdata==0.11.0
99
+ qwen-vl-utils==0.0.11
100
+ nvidia-nvjitlink-cu12==12.4.127
101
+ urllib3==2.5.0
102
+ aiohttp-cors==0.8.1
103
+ outlines_core==0.1.26
104
+ pydantic-extra-types==2.10.5
105
+ filelock==3.18.0
106
+ airportsdata==20250706
107
+ ipython==8.37.0
108
+ pydantic==2.11.7
109
+ cloudpickle==3.1.1
110
+ torchaudio==2.6.0
111
+ tiktoken==0.9.0
112
+ pexpect==4.9.0
113
+ flash-attn==2.7.1.post1
114
+ nvidia-nvtx-cu12==12.4.127
115
+ bleach==6.2.0
116
+ watchfiles==1.1.0
117
+ uvicorn==0.35.0
118
+ numba==0.61.2
119
+ tornado==6.5.1
120
+ networkx==3.4.2
121
+ sympy==1.13.1
122
+ watchdog==6.0.0
123
+ kaggle==1.7.4.5
124
+ pyarrow==20.0.0
125
+ accelerate==1.8.1
126
+ mpmath==1.3.0
127
+ lightning-utilities==0.14.3
128
+ codetiming==1.4.0
129
+ ftfy==6.3.1
130
+ triton==3.2.0
131
+ referencing==0.36.2
132
+ dill==0.3.8
133
+ language_data==1.3.0
134
+ python-magic==0.4.27
135
+ wasabi==1.1.3
136
+ pyvers==0.1.0
137
+ murmurhash==1.0.13
138
+ mathruler==0.1.0
139
+ jsonschema-specifications==2025.4.1
140
+ blinker==1.9.0
141
+ imageio==2.37.0
142
+ pycocoevalcap==1.2
143
+ python-json-logger==3.3.0
144
+ nvidia-cuda-cupti-cu12==12.4.127
145
+ fairscale==0.4.4
146
+ httptools==0.6.4
147
+ identify==2.6.12
148
+ streamlit==1.46.1
149
+ mdurl==0.1.2
150
+ decorator==5.2.1
151
+ h11==0.16.0
152
+ distlib==0.3.9
153
+ webencodings==0.5.1
154
+ transformers==4.52.4
155
+ srsly==2.5.1
156
+ fsspec==2025.3.0
157
+ diskcache==5.6.3
158
+ click==8.2.1
159
+ blis==1.3.0
160
+ colorful==0.5.7
161
+ websockets==15.0.1
162
+ liger_kernel==0.6.0
163
+ lark==1.2.2
164
+ cymem==2.0.11
165
+ anyio==4.9.0
166
+ opentelemetry-exporter-otlp-proto-grpc==1.26.0
167
+ fastapi==0.116.1
168
+ tensordict==0.9.1
169
+ pre_commit==4.2.0
170
+ wrapt==1.17.2
171
+ opentelemetry-api==1.26.0
172
+ nvidia-curand-cu12==10.3.5.147
173
+ spacy==3.8.7
174
+ narwhals==1.47.0
175
+ exceptiongroup==1.3.0
176
+ braceexpand==0.1.7
177
+ rouge_score==0.1.2
178
+ msgpack==1.1.1
179
+ async-timeout==5.0.1
180
+ protobuf==4.25.8
181
+ huggingface-hub==0.33.4
182
+ wandb==0.21.0
183
+ httpx==0.28.1
184
+ mistral_common==1.8.0
185
+ gguf==0.17.1
186
+ opentelemetry-proto==1.26.0
187
+ nvidia-nccl-cu12==2.21.5
188
+ wcwidth==0.2.13
189
+ nvidia-cusparselt-cu12==0.6.2
190
+ scikit-image==0.25.2
191
+ cfgv==3.4.0
192
+ markdown-it-py==3.0.0
193
+ packaging==25.0
194
+ charset-normalizer==3.4.2
195
+ executing==2.2.0
196
+ py-spy==0.4.0
197
+ pure_eval==0.2.3
198
+ safetensors==0.5.3
199
+ pyasn1_modules==0.4.2
200
+ jsonschema==4.24.0
201
+ spacy-legacy==3.0.12
202
+ astor==0.8.1
203
+ shellingham==1.5.4
204
+ langcodes==3.5.0
205
+ pytz==2025.2
206
+ distro==1.9.0
207
+ google-api-core==2.25.1
208
+ rsa==4.9.1
209
+ multiprocess==0.70.16
210
+ iopath==0.1.10
211
+ weasel==0.4.1
212
+ tifffile==2025.5.10
213
+ nodeenv==1.9.1
214
+ opentelemetry-exporter-prometheus==0.56b0
215
+ einops==0.8.1
216
+ lm-format-enforcer==0.10.11
217
+ pydantic_core==2.33.2
218
+ hf-xet==1.1.5
219
+ opentelemetry-sdk==1.26.0
220
+ ninja==1.11.1.4
221
+ altair==5.5.0
222
+ ray==2.47.1
223
+ depyf==0.18.0
224
+ attrs==25.3.0
225
+ tqdm==4.67.1
226
+ xformers==0.0.29.post2
227
+ pydeck==0.9.1
228
+ stack-data==0.6.3
229
+ prometheus-fastapi-instrumentator==7.1.0
230
+ grpcio==1.73.1
231
+ torch==2.6.0
232
+ plotly==6.2.0
233
+ nvidia-cudnn-cu12==9.1.0.70
234
+ python-slugify==8.0.4
235
+ opencensus-context==0.1.3
236
+ importlib_metadata==8.0.0
237
+ orjson==3.10.18
238
+ prompt_toolkit==3.0.51
239
+ psutil==7.0.0
240
+ opendatasets==0.1.22
241
+ asttokens==3.0.0
242
+ pycountry==24.6.1
243
+ partial-json-parser==0.2.1.1.post6
244
+ zipp==3.23.0
245
+ pip==25.1
246
+ MarkupSafe==3.0.2
247
+ opentelemetry-exporter-otlp-proto-common==1.26.0
248
+ llvmlite==0.44.0
249
+ nvidia-cufft-cu12==11.2.1.3
250
+ GitPython==3.1.44
251
+ fastrlock==0.8.3
252
+ PyYAML==6.0.2
253
+ opentelemetry-exporter-otlp==1.26.0
254
+ typer==0.16.0
255
+ cloudpathlib==0.21.1
256
+ toml==0.10.2
257
+ pytorch-lightning==2.5.2
258
+ marisa-trie==1.2.1
259
+ msgspec==0.19.0
260
+ llguidance==0.7.30
261
+ google-auth==2.40.3
262
+ traitlets==5.14.3
263
+ rpds-py==0.26.0
264
+ cachetools==5.5.2
265
+ spacy-loggers==1.0.5
266
+ nvidia-cuda-runtime-cu12==12.4.127
267
+ aiohttp==3.12.14
268
+ torchvision==0.21.0
269
+ av==15.0.0
270
+ torchmetrics==1.7.4
271
+ nvidia-cusparse-cu12==12.3.1.170
272
+ outlines==0.1.11
273
+ jedi==0.19.2
274
+ thinc==8.3.6
275
+ prometheus_client==0.22.1
276
+ httpcore==1.0.9
277
+ py-cpuinfo==9.0.0
278
+ modelscope==1.28.0
279
+ verl==0.3.2.dev0
280
+ jaraco.functools==4.0.1
281
+ inflect==7.3.1
282
+ jaraco.collections==5.1.0
283
+ packaging==24.2
284
+ wheel==0.45.1
285
+ tomli==2.0.1
286
+ platformdirs==4.2.2
287
+ typing_extensions==4.12.2
288
+ more-itertools==10.3.0
289
+ autocommand==2.2.2
290
+ jaraco.text==3.12.1
291
+ importlib_metadata==8.0.0
292
+ jaraco.context==5.3.0
293
+ zipp==3.19.2
294
+ backports.tarfile==1.2.0
295
+ typeguard==4.3.0
EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/files/wandb-metadata.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.0",
4
+ "startedAt": "2025-07-16T13:03:06.627811Z",
5
+ "args": [
6
+ "--node-ip-address=10.1.6.45",
7
+ "--node-manager-port=42325",
8
+ "--object-store-name=/tmp/ray/session_2025-07-16_20-51-10_730275_5196/sockets/plasma_store",
9
+ "--raylet-name=/tmp/ray/session_2025-07-16_20-51-10_730275_5196/sockets/raylet",
10
+ "--redis-address=None",
11
+ "--metrics-agent-port=54069",
12
+ "--logging-rotate-bytes=536870912",
13
+ "--logging-rotate-backup-count=5",
14
+ "--runtime-env-agent-port=57480",
15
+ "--gcs-address=10.1.6.45:54882",
16
+ "--session-name=session_2025-07-16_20-51-10_730275_5196",
17
+ "--temp-dir=/tmp/ray",
18
+ "--webui=127.0.0.1:8265",
19
+ "--cluster-id=a69f29ea92b56cbc2f572353862768b5a0832495b7a590f4f273963a",
20
+ "--startup-token=28",
21
+ "--worker-launch-time-ms=1752670273261",
22
+ "--node-id=e54e37f4f5b34463471871dbe5c90937958f768732bc6e9579a13842",
23
+ "--runtime-env-hash=-115784934",
24
+ "--enable-resource-isolation=false"
25
+ ],
26
+ "program": "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
27
+ "git": {
28
+ "remote": "https://github.com/PorUna-byte/PAR.git",
29
+ "commit": "b8caf406aa1699c788f0ca6e44a1769452c317db"
30
+ },
31
+ "root": "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
32
+ "host": "dsw-297442-5bd684fbff-4l96r",
33
+ "executable": "/root/miniconda3/envs/easyr1-new/bin/python3",
34
+ "cpu_count": 28,
35
+ "cpu_count_logical": 28,
36
+ "gpu": "NVIDIA A800-SXM4-80GB",
37
+ "gpu_count": 4,
38
+ "disk": {
39
+ "/": {
40
+ "total": "1623302262784",
41
+ "used": "1165746176"
42
+ }
43
+ },
44
+ "memory": {
45
+ "total": "549755813888"
46
+ },
47
+ "gpu_nvidia": [
48
+ {
49
+ "name": "NVIDIA A800-SXM4-80GB",
50
+ "architecture": "Ampere",
51
+ "uuid": "GPU-c783413d-e4e1-22c5-7c48-9296c28b08a0"
52
+ },
53
+ {
54
+ "name": "NVIDIA A800-SXM4-80GB",
55
+ "architecture": "Ampere",
56
+ "uuid": "GPU-0ad82850-a679-fa6b-9200-a26edb1bb8a4"
57
+ },
58
+ {
59
+ "name": "NVIDIA A800-SXM4-80GB",
60
+ "architecture": "Ampere",
61
+ "uuid": "GPU-e73b7d7b-4455-62ee-ec7e-a2eb1d845e07"
62
+ },
63
+ {
64
+ "name": "NVIDIA A800-SXM4-80GB",
65
+ "architecture": "Ampere",
66
+ "uuid": "GPU-71ee45de-57b2-ac7c-13c1-08a1f197eb20"
67
+ }
68
+ ],
69
+ "cudaVersion": "12.1",
70
+ "writerId": "t6v0x6ljtdqkxmc6nxsvdn00ede7tanp"
71
+ }
EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/logs/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-07-16T21:03:07.422600635+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
2
+ {"time":"2025-07-16T21:03:27.915788626+08:00","level":"INFO","msg":"stream: created new stream","id":"lkflebyj"}
3
+ {"time":"2025-07-16T21:03:27.937736115+08:00","level":"INFO","msg":"stream: started","id":"lkflebyj"}
4
+ {"time":"2025-07-16T21:03:27.937745307+08:00","level":"INFO","msg":"handler: started","stream_id":"lkflebyj"}
5
+ {"time":"2025-07-16T21:03:27.937759674+08:00","level":"INFO","msg":"sender: started","stream_id":"lkflebyj"}
6
+ {"time":"2025-07-16T21:03:27.937780163+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"lkflebyj"}
EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/logs/debug.log ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-07-16 21:03:07,126 INFO MainThread:7695 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
2
+ 2025-07-16 21:03:07,126 INFO MainThread:7695 [wandb_setup.py:_flush():80] Configure stats pid to 7695
3
+ 2025-07-16 21:03:07,126 INFO MainThread:7695 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
4
+ 2025-07-16 21:03:07,126 INFO MainThread:7695 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
5
+ 2025-07-16 21:03:07,127 INFO MainThread:7695 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-07-16 21:03:07,127 INFO MainThread:7695 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/logs/debug.log
7
+ 2025-07-16 21:03:07,127 INFO MainThread:7695 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/logs/debug-internal.log
8
+ 2025-07-16 21:03:07,127 INFO MainThread:7695 [wandb_init.py:init():830] calling init triggers
9
+ 2025-07-16 21:03:07,127 INFO MainThread:7695 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
10
+ config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 4, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
11
+ 2025-07-16 21:03:07,127 INFO MainThread:7695 [wandb_init.py:init():871] starting backend
12
+ 2025-07-16 21:03:07,349 INFO MainThread:7695 [wandb_init.py:init():874] sending inform_init request
13
+ 2025-07-16 21:03:07,374 INFO MainThread:7695 [wandb_init.py:init():882] backend started and connected
14
+ 2025-07-16 21:03:07,388 INFO MainThread:7695 [wandb_init.py:init():953] updated telemetry
15
+ 2025-07-16 21:03:08,265 INFO MainThread:7695 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
16
+ 2025-07-16 21:03:32,572 INFO MainThread:7695 [wandb_init.py:init():1029] starting run threads in backend
17
+ 2025-07-16 21:03:32,900 INFO MainThread:7695 [wandb_run.py:_console_start():2458] atexit reg
18
+ 2025-07-16 21:03:32,901 INFO MainThread:7695 [wandb_run.py:_redirect():2306] redirect: wrap_raw
19
+ 2025-07-16 21:03:32,907 INFO MainThread:7695 [wandb_run.py:_redirect():2375] Wrapping output streams.
20
+ 2025-07-16 21:03:32,912 INFO MainThread:7695 [wandb_run.py:_redirect():2398] Redirects installed.
21
+ 2025-07-16 21:03:32,949 INFO MainThread:7695 [wandb_init.py:init():1075] run started, returning control to user process
EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/run-lkflebyj.wandb ADDED
File without changes
EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.21.0
4
+ e:
5
+ repix6q725hnzsubljgya3pkb0pg0b9q:
6
+ args:
7
+ - --node-ip-address=10.1.5.237
8
+ - --node-manager-port=37853
9
+ - --object-store-name=/tmp/ray/session_2025-07-18_15-56-28_336135_54391/sockets/plasma_store
10
+ - --raylet-name=/tmp/ray/session_2025-07-18_15-56-28_336135_54391/sockets/raylet
11
+ - --redis-address=None
12
+ - --metrics-agent-port=43790
13
+ - --logging-rotate-bytes=536870912
14
+ - --logging-rotate-backup-count=5
15
+ - --runtime-env-agent-port=63904
16
+ - --gcs-address=10.1.5.237:56758
17
+ - --session-name=session_2025-07-18_15-56-28_336135_54391
18
+ - --temp-dir=/tmp/ray
19
+ - --webui=127.0.0.1:8265
20
+ - --cluster-id=cc22236bcaa2a9ab2bdb3c76723ef15af4933b041414da957aa668b9
21
+ - --startup-token=64
22
+ - --worker-launch-time-ms=1752825390762
23
+ - --node-id=0967616139eea74249995a5549bf4039d244c259acd06a5a8fe0b7aa
24
+ - --runtime-env-hash=-115784934
25
+ - --enable-resource-isolation=false
26
+ cpu_count: 64
27
+ cpu_count_logical: 64
28
+ cudaVersion: "12.1"
29
+ disk:
30
+ /:
31
+ total: "1623302262784"
32
+ used: "1224904704"
33
+ email: gia0603yucca@gmail.com
34
+ executable: /root/miniconda3/envs/easyr1-new/bin/python3
35
+ git:
36
+ commit: b8caf406aa1699c788f0ca6e44a1769452c317db
37
+ remote: https://github.com/PorUna-byte/PAR.git
38
+ gpu: NVIDIA A800-SXM4-80GB
39
+ gpu_count: 8
40
+ gpu_nvidia:
41
+ - architecture: Ampere
42
+ name: NVIDIA A800-SXM4-80GB
43
+ uuid: GPU-f7e858cd-ae03-031d-b834-86bf87923211
44
+ - architecture: Ampere
45
+ name: NVIDIA A800-SXM4-80GB
46
+ uuid: GPU-82fabc17-3326-51dd-9fb7-cbe82d26b6a0
47
+ - architecture: Ampere
48
+ name: NVIDIA A800-SXM4-80GB
49
+ uuid: GPU-1bba2921-208c-d0ad-1a05-25fc85d62630
50
+ - architecture: Ampere
51
+ name: NVIDIA A800-SXM4-80GB
52
+ uuid: GPU-9537a8bd-f536-20e2-b766-25a3c4bd5a71
53
+ - architecture: Ampere
54
+ name: NVIDIA A800-SXM4-80GB
55
+ uuid: GPU-482091e7-cb8c-dacb-7d0c-a78a3b795f0b
56
+ - architecture: Ampere
57
+ name: NVIDIA A800-SXM4-80GB
58
+ uuid: GPU-becb8d59-2ab7-b50d-5770-183c6478747a
59
+ - architecture: Ampere
60
+ name: NVIDIA A800-SXM4-80GB
61
+ uuid: GPU-3f0f2b1d-d6be-b6ad-b4ad-0ec841b91c40
62
+ - architecture: Ampere
63
+ name: NVIDIA A800-SXM4-80GB
64
+ uuid: GPU-e53376dd-84ff-523c-a8c7-7cc7f7a18655
65
+ host: dsw-266702-dc4b748ff-f7c66
66
+ memory:
67
+ total: "549755813888"
68
+ os: Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35
69
+ program: /root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py
70
+ python: CPython 3.10.0
71
+ root: /nas/shared/kilab/wangyujia/EasyR1-new/examples
72
+ startedAt: "2025-07-18T08:00:33.186442Z"
73
+ writerId: repix6q725hnzsubljgya3pkb0pg0b9q
74
+ m: []
75
+ python_version: 3.10.0
76
+ t:
77
+ "1":
78
+ - 1
79
+ - 9
80
+ - 11
81
+ - 30
82
+ - 33
83
+ - 41
84
+ - 49
85
+ - 51
86
+ - 63
87
+ - 71
88
+ - 95
89
+ - 98
90
+ - 103
91
+ - 105
92
+ "2":
93
+ - 1
94
+ - 9
95
+ - 11
96
+ - 30
97
+ - 33
98
+ - 41
99
+ - 49
100
+ - 51
101
+ - 63
102
+ - 71
103
+ - 95
104
+ - 98
105
+ - 103
106
+ - 105
107
+ "3":
108
+ - 2
109
+ - 13
110
+ - 16
111
+ "4": 3.10.0
112
+ "5": 0.21.0
113
+ "6": 4.52.4
114
+ "12": 0.21.0
115
+ "13": linux-x86_64
116
+ algorithm:
117
+ value:
118
+ adv_estimator: grpo
119
+ disable_kl: false
120
+ filter_high: 0.99
121
+ filter_key: overall
122
+ filter_low: 0.01
123
+ gamma: 1
124
+ kl_coef: 0.01
125
+ kl_horizon: 10000
126
+ kl_penalty: low_var_kl
127
+ kl_target: 0.1
128
+ kl_type: fixed
129
+ lam: 1
130
+ online_filtering: false
131
+ use_kl_loss: true
132
+ data:
133
+ value:
134
+ answer_key: answer
135
+ filter_overlong_prompts: true
136
+ filter_overlong_prompts_workers: 16
137
+ format_prompt: /nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja
138
+ image_dir: null
139
+ image_key: images
140
+ max_pixels: 4194304
141
+ max_prompt_length: 4096
142
+ max_response_length: 16384
143
+ min_pixels: 262144
144
+ mini_rollout_batch_size: null
145
+ override_chat_template: null
146
+ prompt_key: question
147
+ protein_key: protein
148
+ rollout_batch_size: 128
149
+ seed: 1
150
+ shuffle: true
151
+ train_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl
152
+ val_batch_size: 256
153
+ val_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl
154
+ video_fps: 2
155
+ video_key: videos
156
+ trainer:
157
+ value:
158
+ critic_warmup: 0
159
+ experiment_name: qwen2.5_7b_bio_06182042
160
+ load_checkpoint_path: null
161
+ logger:
162
+ - console
163
+ - wandb
164
+ max_steps: null
165
+ max_try_make_batch: 20
166
+ n_gpus_per_node: 8
167
+ nnodes: 1
168
+ project_name: easy_r1
169
+ save_checkpoint_path: /oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042
170
+ save_freq: 5
171
+ save_limit: 3
172
+ save_model_only: false
173
+ total_epochs: 1
174
+ val_before_train: true
175
+ val_freq: 5
176
+ val_generations_to_log: 3
177
+ val_only: false
178
+ worker:
179
+ value:
180
+ actor:
181
+ clip_ratio_dual: 3
182
+ clip_ratio_high: 0.3
183
+ clip_ratio_low: 0.2
184
+ disable_kl: false
185
+ fsdp:
186
+ enable_cpu_offload: false
187
+ enable_full_shard: true
188
+ enable_rank0_init: true
189
+ fsdp_size: -1
190
+ mp_buffer_dtype: fp32
191
+ mp_param_dtype: bf16
192
+ mp_reduce_dtype: fp32
193
+ torch_dtype: null
194
+ use_orig_params: false
195
+ global_batch_size: 64
196
+ global_batch_size_per_device: -1
197
+ kl_coef: 0.01
198
+ kl_penalty: low_var_kl
199
+ loss_avg_mode: token
200
+ max_grad_norm: 1
201
+ micro_batch_size_per_device_for_experience: 16
202
+ micro_batch_size_per_device_for_update: 2
203
+ model:
204
+ enable_gradient_checkpointing: true
205
+ freeze_vision_tower: false
206
+ model_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model
207
+ tokenizer_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model
208
+ trust_remote_code: false
209
+ offload:
210
+ offload_optimizer: true
211
+ offload_params: true
212
+ optim:
213
+ betas:
214
+ - 0.9
215
+ - 0.999
216
+ lr: 1e-06
217
+ lr_warmup_ratio: 0
218
+ lr_warmup_steps: null
219
+ min_lr_ratio: null
220
+ strategy: adamw
221
+ training_steps: 72
222
+ warmup_style: constant
223
+ weight_decay: 0.01
224
+ padding_free: true
225
+ ppo_epochs: 1
226
+ strategy: fsdp
227
+ ulysses_size: 1
228
+ use_kl_loss: true
229
+ use_torch_compile: true
230
+ critic:
231
+ cliprange_value: 0.5
232
+ fsdp:
233
+ enable_cpu_offload: false
234
+ enable_full_shard: true
235
+ enable_rank0_init: true
236
+ fsdp_size: -1
237
+ mp_buffer_dtype: fp32
238
+ mp_param_dtype: bf16
239
+ mp_reduce_dtype: fp32
240
+ torch_dtype: null
241
+ use_orig_params: false
242
+ global_batch_size: 256
243
+ global_batch_size_per_device: -1
244
+ loss_avg_mode: token
245
+ max_grad_norm: 1
246
+ micro_batch_size_per_device_for_experience: 16
247
+ micro_batch_size_per_device_for_update: 4
248
+ model:
249
+ enable_gradient_checkpointing: true
250
+ freeze_vision_tower: false
251
+ model_path: null
252
+ tokenizer_path: null
253
+ trust_remote_code: true
254
+ offload:
255
+ offload_optimizer: false
256
+ offload_params: false
257
+ optim:
258
+ betas:
259
+ - 0.9
260
+ - 0.999
261
+ lr: 1e-06
262
+ lr_warmup_ratio: 0
263
+ lr_warmup_steps: null
264
+ min_lr_ratio: null
265
+ strategy: adamw
266
+ training_steps: 72
267
+ warmup_style: constant
268
+ weight_decay: 0.01
269
+ padding_free: false
270
+ ppo_epochs: 1
271
+ strategy: fsdp
272
+ ulysses_size: 1
273
+ hybrid_engine: true
274
+ ref:
275
+ fsdp:
276
+ enable_cpu_offload: true
277
+ enable_full_shard: true
278
+ enable_rank0_init: true
279
+ fsdp_size: -1
280
+ mp_buffer_dtype: fp32
281
+ mp_param_dtype: bf16
282
+ mp_reduce_dtype: fp32
283
+ torch_dtype: null
284
+ use_orig_params: false
285
+ micro_batch_size_per_device_for_experience: 16
286
+ offload:
287
+ offload_optimizer: false
288
+ offload_params: false
289
+ padding_free: true
290
+ strategy: fsdp
291
+ ulysses_size: 1
292
+ use_torch_compile: true
293
+ reward:
294
+ num_cpus: 1
295
+ reward_function: /nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py
296
+ reward_function_name: main
297
+ reward_type: batch
298
+ skip_special_tokens: true
299
+ rollout:
300
+ disable_log_stats: true
301
+ disable_tqdm: false
302
+ dtype: bf16
303
+ enable_chunked_prefill: false
304
+ enforce_eager: false
305
+ gpu_memory_utilization: 0.6
306
+ ignore_eos: false
307
+ limit_images: 0
308
+ max_model_len: null
309
+ max_num_batched_tokens: 24576
310
+ "n": 5
311
+ name: vllm
312
+ prompt_length: 4096
313
+ response_length: 16384
314
+ seed: 1
315
+ temperature: 1
316
+ tensor_parallel_size: 1
317
+ top_k: -1
318
+ top_p: 0.99
319
+ trust_remote_code: false
320
+ val_override_config:
321
+ "n": 1
322
+ temperature: 0.5
EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/output.log ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.5.237", "pid": 59301, "uuid": "79b41be0b4cb4caea00399d5e67f3adb", "closed": false}
2
+ Start validation...
3
+ Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.actor_rollout_ref_prepare_rollout_engine() (pid=61215, ip=10.1.5.237, actor_id=8dbb70fdf561d45e1bb95fbd01000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7fc005e599c0>)
4
+ File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func
5
+ return getattr(self.worker_dict[key], name)(*args, **kwargs)
6
+ File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner
7
+ return func(*args, **kwargs)
8
+ File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 544, in prepare_rollout_engine
9
+ self.rollout_sharding_manager.load_vllm_and_sync_weights()
10
+ File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/sharding_manager/fsdp_vllm.py", line 124, in load_vllm_and_sync_weights
11
+ if "tags" in inspect.signature(self.inference_engine.wake_up).parameters:
12
+ AttributeError: 'str' object has no attribute 'wake_up'
13
+ Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.actor_rollout_ref_prepare_rollout_engine() (pid=61214, ip=10.1.5.237, actor_id=12428909aea9647197558b3701000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7f02884019c0>)
14
+ File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func
15
+ return getattr(self.worker_dict[key], name)(*args, **kwargs)
16
+ File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner
17
+ return func(*args, **kwargs)
18
+ File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 544, in prepare_rollout_engine
19
+ self.rollout_sharding_manager.load_vllm_and_sync_weights()
20
+ File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/sharding_manager/fsdp_vllm.py", line 124, in load_vllm_and_sync_weights
21
+ if "tags" in inspect.signature(self.inference_engine.wake_up).parameters:
22
+ AttributeError: 'str' object has no attribute 'wake_up'
23
+ Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.actor_rollout_ref_prepare_rollout_engine() (pid=61213, ip=10.1.5.237, actor_id=17a3ff05d33225db9d5f3d2001000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7fc706441930>)
24
+ File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func
25
+ return getattr(self.worker_dict[key], name)(*args, **kwargs)
26
+ File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner
27
+ return func(*args, **kwargs)
28
+ File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 544, in prepare_rollout_engine
29
+ self.rollout_sharding_manager.load_vllm_and_sync_weights()
30
+ File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/sharding_manager/fsdp_vllm.py", line 124, in load_vllm_and_sync_weights
31
+ if "tags" in inspect.signature(self.inference_engine.wake_up).parameters:
32
+ AttributeError: 'str' object has no attribute 'wake_up'
33
+ Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.actor_rollout_ref_prepare_rollout_engine() (pid=61212, ip=10.1.5.237, actor_id=8038d6b87c20ea82378ff46b01000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7edfcc7299c0>)
34
+ File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func
35
+ return getattr(self.worker_dict[key], name)(*args, **kwargs)
36
+ File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner
37
+ return func(*args, **kwargs)
38
+ File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 544, in prepare_rollout_engine
39
+ self.rollout_sharding_manager.load_vllm_and_sync_weights()
40
+ File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/sharding_manager/fsdp_vllm.py", line 124, in load_vllm_and_sync_weights
41
+ if "tags" in inspect.signature(self.inference_engine.wake_up).parameters:
42
+ AttributeError: 'str' object has no attribute 'wake_up'
43
+ Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.actor_rollout_ref_prepare_rollout_engine() (pid=61211, ip=10.1.5.237, actor_id=1e5423d0856a1d601b82502801000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7f26f00119f0>)
44
+ File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func
45
+ return getattr(self.worker_dict[key], name)(*args, **kwargs)
46
+ File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner
47
+ return func(*args, **kwargs)
48
+ File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 544, in prepare_rollout_engine
49
+ self.rollout_sharding_manager.load_vllm_and_sync_weights()
50
+ File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/sharding_manager/fsdp_vllm.py", line 124, in load_vllm_and_sync_weights
51
+ if "tags" in inspect.signature(self.inference_engine.wake_up).parameters:
52
+ AttributeError: 'str' object has no attribute 'wake_up'
53
+ Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.actor_rollout_ref_prepare_rollout_engine() (pid=61209, ip=10.1.5.237, actor_id=061e0c8de42fd2b69b89561501000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7fba20e05a20>)
54
+ File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func
55
+ return getattr(self.worker_dict[key], name)(*args, **kwargs)
56
+ File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner
57
+ return func(*args, **kwargs)
58
+ File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 544, in prepare_rollout_engine
59
+ self.rollout_sharding_manager.load_vllm_and_sync_weights()
60
+ File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/sharding_manager/fsdp_vllm.py", line 124, in load_vllm_and_sync_weights
61
+ if "tags" in inspect.signature(self.inference_engine.wake_up).parameters:
62
+ AttributeError: 'str' object has no attribute 'wake_up'
63
+ Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.actor_rollout_ref_prepare_rollout_engine() (pid=60985, ip=10.1.5.237, actor_id=8073bd5c566ab2faaa122c0e01000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7f37c0e19780>)
64
+ File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func
65
+ return getattr(self.worker_dict[key], name)(*args, **kwargs)
66
+ File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner
67
+ return func(*args, **kwargs)
68
+ File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 544, in prepare_rollout_engine
69
+ self.rollout_sharding_manager.load_vllm_and_sync_weights()
70
+ File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/sharding_manager/fsdp_vllm.py", line 124, in load_vllm_and_sync_weights
71
+ if "tags" in inspect.signature(self.inference_engine.wake_up).parameters:
72
+ AttributeError: 'str' object has no attribute 'wake_up'
EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/requirements.txt ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ setproctitle==1.2.2
2
+ colorama==0.4.6
3
+ psutil==7.0.0
4
+ attrs==25.3.0
5
+ tqdm==4.67.1
6
+ langcodes==3.5.0
7
+ nvidia-cublas-cu12==12.4.5.8
8
+ airportsdata==20250706
9
+ absl-py==2.3.1
10
+ hf-xet==1.1.5
11
+ opentelemetry-exporter-otlp-proto-http==1.26.0
12
+ interegular==0.3.3
13
+ tifffile==2025.5.10
14
+ nvidia-cufile-cu12==1.11.1.6
15
+ nltk==3.9.1
16
+ tokenizers==0.21.2
17
+ salesforce-lavis==1.0.2
18
+ tzdata==2025.2
19
+ prometheus_client==0.22.1
20
+ google-auth==2.40.3
21
+ ipython==8.37.0
22
+ pydantic==2.11.7
23
+ mathruler==0.1.0
24
+ six==1.17.0
25
+ python-dateutil==2.9.0.post0
26
+ requests==2.32.4
27
+ mistral_common==1.8.0
28
+ huggingface-hub==0.33.4
29
+ preshed==3.0.10
30
+ torchmetrics==1.7.4
31
+ blinker==1.9.0
32
+ nvidia-cusparse-cu12==12.3.1.170
33
+ rich-toolkit==0.14.8
34
+ pytz==2025.2
35
+ pandas==2.3.1
36
+ packaging==25.0
37
+ async-timeout==5.0.1
38
+ diskcache==5.6.3
39
+ google-api-core==2.25.1
40
+ parso==0.8.4
41
+ joblib==1.5.1
42
+ pycountry==24.6.1
43
+ triton==3.2.0
44
+ pybase64==1.4.1
45
+ marisa-trie==1.2.1
46
+ plotly==6.2.0
47
+ wandb==0.21.0
48
+ PyYAML==6.0.2
49
+ regex==2024.11.6
50
+ idna==3.10
51
+ numba==0.61.2
52
+ nvidia-curand-cu12==10.3.5.147
53
+ uvicorn==0.35.0
54
+ srsly==2.5.1
55
+ confection==0.1.5
56
+ opentelemetry-semantic-conventions-ai==0.4.11
57
+ typing-inspection==0.4.1
58
+ opencv-python-headless==4.12.0.88
59
+ pyasn1==0.6.1
60
+ av==15.0.0
61
+ xgrammar==0.1.18
62
+ distlib==0.3.9
63
+ datasets==4.0.0
64
+ networkx==3.4.2
65
+ prometheus-fastapi-instrumentator==7.1.0
66
+ lightning-utilities==0.14.3
67
+ executing==2.2.0
68
+ pycocoevalcap==1.2
69
+ h11==0.16.0
70
+ certifi==2025.7.14
71
+ sniffio==1.3.1
72
+ wheel==0.45.1
73
+ transformers==4.52.4
74
+ wrapt==1.17.2
75
+ jsonschema-specifications==2025.4.1
76
+ mpmath==1.3.0
77
+ msgspec==0.19.0
78
+ py-cpuinfo==9.0.0
79
+ contexttimer==0.3.3
80
+ watchdog==6.0.0
81
+ pexpect==4.9.0
82
+ webencodings==0.5.1
83
+ verl==0.3.2.dev0
84
+ webdataset==1.0.2
85
+ httpcore==1.0.9
86
+ opentelemetry-exporter-otlp==1.26.0
87
+ lm-format-enforcer==0.10.11
88
+ googleapis-common-protos==1.70.0
89
+ pyzmq==27.0.0
90
+ fsspec==2025.3.0
91
+ grpcio==1.73.1
92
+ cymem==2.0.11
93
+ timm==0.4.12
94
+ zipp==3.23.0
95
+ llguidance==0.7.30
96
+ opencensus-context==0.1.3
97
+ omegaconf==2.3.0
98
+ python-json-logger==3.3.0
99
+ opentelemetry-exporter-otlp-proto-common==1.26.0
100
+ watchfiles==1.1.0
101
+ nvidia-nvjitlink-cu12==12.4.127
102
+ peft==0.16.0
103
+ sentry-sdk==2.32.0
104
+ rpds-py==0.26.0
105
+ email_validator==2.2.0
106
+ nodeenv==1.9.1
107
+ distro==1.9.0
108
+ jiter==0.10.0
109
+ compressed-tensors==0.9.3
110
+ annotated-types==0.7.0
111
+ matplotlib-inline==0.1.7
112
+ rich==14.0.0
113
+ GitPython==3.1.44
114
+ lazy_loader==0.4
115
+ fastapi-cloud-cli==0.1.4
116
+ cupy-cuda12x==13.5.1
117
+ prompt_toolkit==3.0.51
118
+ gguf==0.17.1
119
+ blis==1.3.0
120
+ thinc==8.3.6
121
+ cloudpickle==3.1.1
122
+ multidict==6.6.3
123
+ nvidia-nvtx-cu12==12.4.127
124
+ flash-attn==2.7.1.post1
125
+ pyasn1_modules==0.4.2
126
+ rsa==4.9.1
127
+ weasel==0.4.1
128
+ uvloop==0.21.0
129
+ click==8.2.1
130
+ numpy==2.2.6
131
+ torchdata==0.11.0
132
+ pylatexenc==2.10
133
+ cachetools==5.5.2
134
+ Jinja2==3.1.6
135
+ typer==0.16.0
136
+ nvidia-cudnn-cu12==9.1.0.70
137
+ fastapi-cli==0.0.8
138
+ xxhash==3.5.0
139
+ tornado==6.5.1
140
+ scipy==1.15.3
141
+ rouge_score==0.1.2
142
+ cloudpathlib==0.21.1
143
+ streamlit==1.46.1
144
+ jedi==0.19.2
145
+ referencing==0.36.2
146
+ accelerate==1.8.1
147
+ decord==0.6.0
148
+ setuptools==78.1.1
149
+ mdurl==0.1.2
150
+ vllm==0.8.5.post1
151
+ identify==2.6.12
152
+ python-slugify==8.0.4
153
+ dnspython==2.7.0
154
+ dill==0.3.8
155
+ opentelemetry-proto==1.26.0
156
+ orjson==3.10.18
157
+ msgpack==1.1.1
158
+ aiohttp==3.12.14
159
+ aiosignal==1.4.0
160
+ typing_extensions==4.14.1
161
+ tiktoken==0.9.0
162
+ catalogue==2.0.10
163
+ platformdirs==4.3.8
164
+ narwhals==1.47.0
165
+ antlr4-python3-runtime==4.9.3
166
+ pydantic-extra-types==2.10.5
167
+ nvidia-cusolver-cu12==11.6.1.9
168
+ kaggle==1.7.4.5
169
+ propcache==0.3.2
170
+ urllib3==2.5.0
171
+ opentelemetry-exporter-otlp-proto-grpc==1.26.0
172
+ pydeck==0.9.1
173
+ nvidia-cufft-cu12==11.2.1.3
174
+ pyarrow==20.0.0
175
+ nvidia-nccl-cu12==2.21.5
176
+ httptools==0.6.4
177
+ qwen-vl-utils==0.0.11
178
+ markdown-it-py==3.0.0
179
+ gitdb==4.0.12
180
+ altair==5.5.0
181
+ torchvision==0.21.0
182
+ python-magic==0.4.27
183
+ iopath==0.1.10
184
+ ray==2.47.1
185
+ blake3==1.0.5
186
+ pillow==11.3.0
187
+ python-dotenv==1.1.1
188
+ torchaudio==2.6.0
189
+ partial-json-parser==0.2.1.1.post6
190
+ httpx==0.28.1
191
+ torch==2.6.0
192
+ anyio==4.9.0
193
+ fairscale==0.4.4
194
+ traitlets==5.14.3
195
+ pure_eval==0.2.3
196
+ sympy==1.13.1
197
+ nvidia-cusparselt-cu12==0.6.2
198
+ jsonschema==4.24.0
199
+ imageio==2.37.0
200
+ opencensus==0.11.4
201
+ stack-data==0.6.3
202
+ shellingham==1.5.4
203
+ tensordict==0.9.1
204
+ nvidia-cuda-runtime-cu12==12.4.127
205
+ nest-asyncio==1.6.0
206
+ einops==0.8.1
207
+ lark==1.2.2
208
+ tenacity==9.1.2
209
+ virtualenv==20.31.2
210
+ ptyprocess==0.7.0
211
+ outlines==0.1.11
212
+ depyf==0.18.0
213
+ starlette==0.47.1
214
+ cfgv==3.4.0
215
+ pre_commit==4.2.0
216
+ language_data==1.3.0
217
+ pip==25.1
218
+ Pygments==2.19.2
219
+ nvidia-cuda-cupti-cu12==12.4.127
220
+ protobuf==4.25.8
221
+ safetensors==0.5.3
222
+ text-unidecode==1.3
223
+ wcwidth==0.2.13
224
+ charset-normalizer==3.4.2
225
+ aiohappyeyeballs==2.6.1
226
+ outlines_core==0.1.26
227
+ fastrlock==0.8.3
228
+ asttokens==3.0.0
229
+ psutil==7.0.0
230
+ smmap==5.0.2
231
+ exceptiongroup==1.3.0
232
+ murmurhash==1.0.13
233
+ pytorch-lightning==2.5.2
234
+ filelock==3.18.0
235
+ astor==0.8.1
236
+ py-spy==0.4.0
237
+ pydantic_core==2.33.2
238
+ colorful==0.5.7
239
+ fastapi==0.116.1
240
+ opentelemetry-api==1.26.0
241
+ openai==1.90.0
242
+ ninja==1.11.1.4
243
+ opentelemetry-semantic-conventions==0.47b0
244
+ spacy-legacy==3.0.12
245
+ opendatasets==0.1.22
246
+ Deprecated==1.2.18
247
+ proto-plus==1.26.1
248
+ rignore==0.6.2
249
+ aiohttp-cors==0.8.1
250
+ liger_kernel==0.6.0
251
+ opentelemetry-exporter-prometheus==0.56b0
252
+ python-multipart==0.0.20
253
+ multiprocess==0.70.16
254
+ opentelemetry-sdk==1.26.0
255
+ decorator==5.2.1
256
+ xformers==0.0.29.post2
257
+ spacy==3.8.7
258
+ pyvers==0.1.0
259
+ pycocotools==2.0.10
260
+ websockets==15.0.1
261
+ wasabi==1.1.3
262
+ frozenlist==1.7.0
263
+ codetiming==1.4.0
264
+ sentencepiece==0.2.0
265
+ toml==0.10.2
266
+ scikit-image==0.25.2
267
+ ftfy==6.3.1
268
+ bleach==6.2.0
269
+ yarl==1.20.1
270
+ nvidia-cuda-nvrtc-cu12==12.4.127
271
+ importlib_metadata==8.0.0
272
+ spacy-loggers==1.0.5
273
+ smart_open==7.3.0.post1
274
+ portalocker==3.2.0
275
+ llvmlite==0.44.0
276
+ MarkupSafe==3.0.2
277
+ braceexpand==0.1.7
278
+ modelscope==1.28.0
279
+ verl==0.3.2.dev0
280
+ jaraco.context==5.3.0
281
+ more-itertools==10.3.0
282
+ jaraco.functools==4.0.1
283
+ jaraco.text==3.12.1
284
+ platformdirs==4.2.2
285
+ packaging==24.2
286
+ wheel==0.45.1
287
+ zipp==3.19.2
288
+ inflect==7.3.1
289
+ autocommand==2.2.2
290
+ typeguard==4.3.0
291
+ jaraco.collections==5.1.0
292
+ backports.tarfile==1.2.0
293
+ tomli==2.0.1
294
+ importlib_metadata==8.0.0
295
+ typing_extensions==4.12.2
EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/wandb-metadata.json ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.0",
4
+ "startedAt": "2025-07-18T08:00:33.186442Z",
5
+ "args": [
6
+ "--node-ip-address=10.1.5.237",
7
+ "--node-manager-port=37853",
8
+ "--object-store-name=/tmp/ray/session_2025-07-18_15-56-28_336135_54391/sockets/plasma_store",
9
+ "--raylet-name=/tmp/ray/session_2025-07-18_15-56-28_336135_54391/sockets/raylet",
10
+ "--redis-address=None",
11
+ "--metrics-agent-port=43790",
12
+ "--logging-rotate-bytes=536870912",
13
+ "--logging-rotate-backup-count=5",
14
+ "--runtime-env-agent-port=63904",
15
+ "--gcs-address=10.1.5.237:56758",
16
+ "--session-name=session_2025-07-18_15-56-28_336135_54391",
17
+ "--temp-dir=/tmp/ray",
18
+ "--webui=127.0.0.1:8265",
19
+ "--cluster-id=cc22236bcaa2a9ab2bdb3c76723ef15af4933b041414da957aa668b9",
20
+ "--startup-token=64",
21
+ "--worker-launch-time-ms=1752825390762",
22
+ "--node-id=0967616139eea74249995a5549bf4039d244c259acd06a5a8fe0b7aa",
23
+ "--runtime-env-hash=-115784934",
24
+ "--enable-resource-isolation=false"
25
+ ],
26
+ "program": "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
27
+ "git": {
28
+ "remote": "https://github.com/PorUna-byte/PAR.git",
29
+ "commit": "b8caf406aa1699c788f0ca6e44a1769452c317db"
30
+ },
31
+ "email": "gia0603yucca@gmail.com",
32
+ "root": "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
33
+ "host": "dsw-266702-dc4b748ff-f7c66",
34
+ "executable": "/root/miniconda3/envs/easyr1-new/bin/python3",
35
+ "cpu_count": 64,
36
+ "cpu_count_logical": 64,
37
+ "gpu": "NVIDIA A800-SXM4-80GB",
38
+ "gpu_count": 8,
39
+ "disk": {
40
+ "/": {
41
+ "total": "1623302262784",
42
+ "used": "1224904704"
43
+ }
44
+ },
45
+ "memory": {
46
+ "total": "549755813888"
47
+ },
48
+ "gpu_nvidia": [
49
+ {
50
+ "name": "NVIDIA A800-SXM4-80GB",
51
+ "architecture": "Ampere",
52
+ "uuid": "GPU-f7e858cd-ae03-031d-b834-86bf87923211"
53
+ },
54
+ {
55
+ "name": "NVIDIA A800-SXM4-80GB",
56
+ "architecture": "Ampere",
57
+ "uuid": "GPU-82fabc17-3326-51dd-9fb7-cbe82d26b6a0"
58
+ },
59
+ {
60
+ "name": "NVIDIA A800-SXM4-80GB",
61
+ "architecture": "Ampere",
62
+ "uuid": "GPU-1bba2921-208c-d0ad-1a05-25fc85d62630"
63
+ },
64
+ {
65
+ "name": "NVIDIA A800-SXM4-80GB",
66
+ "architecture": "Ampere",
67
+ "uuid": "GPU-9537a8bd-f536-20e2-b766-25a3c4bd5a71"
68
+ },
69
+ {
70
+ "name": "NVIDIA A800-SXM4-80GB",
71
+ "architecture": "Ampere",
72
+ "uuid": "GPU-482091e7-cb8c-dacb-7d0c-a78a3b795f0b"
73
+ },
74
+ {
75
+ "name": "NVIDIA A800-SXM4-80GB",
76
+ "architecture": "Ampere",
77
+ "uuid": "GPU-becb8d59-2ab7-b50d-5770-183c6478747a"
78
+ },
79
+ {
80
+ "name": "NVIDIA A800-SXM4-80GB",
81
+ "architecture": "Ampere",
82
+ "uuid": "GPU-3f0f2b1d-d6be-b6ad-b4ad-0ec841b91c40"
83
+ },
84
+ {
85
+ "name": "NVIDIA A800-SXM4-80GB",
86
+ "architecture": "Ampere",
87
+ "uuid": "GPU-e53376dd-84ff-523c-a8c7-7cc7f7a18655"
88
+ }
89
+ ],
90
+ "cudaVersion": "12.1",
91
+ "writerId": "repix6q725hnzsubljgya3pkb0pg0b9q"
92
+ }
EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":1},"_runtime":1}
EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/logs/debug-internal.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-07-18T16:00:33.944898175+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
2
+ {"time":"2025-07-18T16:01:04.056910886+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
3
+ {"time":"2025-07-18T16:01:14.919464259+08:00","level":"INFO","msg":"stream: created new stream","id":"nji9xqxs"}
4
+ {"time":"2025-07-18T16:01:14.926346872+08:00","level":"INFO","msg":"sender: started","stream_id":"nji9xqxs"}
5
+ {"time":"2025-07-18T16:01:14.926359513+08:00","level":"INFO","msg":"stream: started","id":"nji9xqxs"}
6
+ {"time":"2025-07-18T16:01:14.926369749+08:00","level":"INFO","msg":"handler: started","stream_id":"nji9xqxs"}
7
+ {"time":"2025-07-18T16:01:14.926391685+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"nji9xqxs"}
8
+ {"time":"2025-07-18T16:01:44.221082826+08:00","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"uploading data","runtime_seconds":2.593669605},{"desc":"updating run metadata","runtime_seconds":2.593493161},{"desc":"uploading wandb-metadata.json","runtime_seconds":1.024626407}],"total_operations":3}}
9
+ {"time":"2025-07-18T16:01:58.697029208+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/nji9xqxs/file_stream\": unexpected EOF"}
10
+ {"time":"2025-07-18T16:02:17.601004486+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
11
+ {"time":"2025-07-18T16:02:38.850804129+08:00","level":"INFO","msg":"stream: closing","id":"nji9xqxs"}
12
+ {"time":"2025-07-18T16:02:38.850824284+08:00","level":"INFO","msg":"handler: closed","stream_id":"nji9xqxs"}
13
+ {"time":"2025-07-18T16:02:38.850832353+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"nji9xqxs"}
14
+ {"time":"2025-07-18T16:02:38.850837848+08:00","level":"INFO","msg":"sender: closed","stream_id":"nji9xqxs"}
15
+ {"time":"2025-07-18T16:02:38.858004163+08:00","level":"INFO","msg":"stream: closed","id":"nji9xqxs"}
EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/logs/debug.log ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-07-18 16:00:33,700 INFO MainThread:59301 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
2
+ 2025-07-18 16:00:33,700 INFO MainThread:59301 [wandb_setup.py:_flush():80] Configure stats pid to 59301
3
+ 2025-07-18 16:00:33,700 INFO MainThread:59301 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
4
+ 2025-07-18 16:00:33,701 INFO MainThread:59301 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
5
+ 2025-07-18 16:00:33,701 INFO MainThread:59301 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-07-18 16:00:33,701 INFO MainThread:59301 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/logs/debug.log
7
+ 2025-07-18 16:00:33,701 INFO MainThread:59301 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/logs/debug-internal.log
8
+ 2025-07-18 16:00:33,701 INFO MainThread:59301 [wandb_init.py:init():830] calling init triggers
9
+ 2025-07-18 16:00:33,701 INFO MainThread:59301 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
10
+ config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
11
+ 2025-07-18 16:00:33,701 INFO MainThread:59301 [wandb_init.py:init():871] starting backend
12
+ 2025-07-18 16:00:33,911 INFO MainThread:59301 [wandb_init.py:init():874] sending inform_init request
13
+ 2025-07-18 16:00:33,914 INFO MainThread:59301 [wandb_init.py:init():882] backend started and connected
14
+ 2025-07-18 16:00:33,934 INFO MainThread:59301 [wandb_init.py:init():953] updated telemetry
15
+ 2025-07-18 16:00:34,824 INFO MainThread:59301 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
16
+ 2025-07-18 16:01:41,621 INFO MainThread:59301 [wandb_init.py:init():1029] starting run threads in backend
17
+ 2025-07-18 16:01:41,877 INFO MainThread:59301 [wandb_run.py:_console_start():2458] atexit reg
18
+ 2025-07-18 16:01:41,877 INFO MainThread:59301 [wandb_run.py:_redirect():2306] redirect: wrap_raw
19
+ 2025-07-18 16:01:41,899 INFO MainThread:59301 [wandb_run.py:_redirect():2375] Wrapping output streams.
20
+ 2025-07-18 16:01:41,899 INFO MainThread:59301 [wandb_run.py:_redirect():2398] Redirects installed.
21
+ 2025-07-18 16:01:41,927 INFO MainThread:59301 [wandb_init.py:init():1075] run started, returning control to user process
22
+ 2025-07-18 16:01:43,199 INFO MainThread:59301 [wandb_run.py:_finish():2224] finishing run gia0603yucca/easy_r1/nji9xqxs
23
+ 2025-07-18 16:01:43,199 INFO MainThread:59301 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0
24
+ 2025-07-18 16:01:43,207 INFO MainThread:59301 [wandb_run.py:_restore():2405] restore
25
+ 2025-07-18 16:01:43,211 INFO MainThread:59301 [wandb_run.py:_restore():2411] restore done
26
+ 2025-07-18 16:02:38,840 INFO MainThread:59301 [wandb_run.py:_footer_history_summary_info():3903] rendering history
27
+ 2025-07-18 16:02:38,841 INFO MainThread:59301 [wandb_run.py:_footer_history_summary_info():3935] rendering summary
28
+ 2025-07-18 16:02:38,841 INFO MainThread:59301 [wandb_run.py:_footer_sync_info():3864] logging synced files
EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/run-nji9xqxs.wandb ADDED
Binary file (15.9 kB). View file
 
EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/files/output.log ADDED
File without changes
EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/files/requirements.txt ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ setproctitle==1.2.2
2
+ colorama==0.4.6
3
+ psutil==7.0.0
4
+ attrs==25.3.0
5
+ tqdm==4.67.1
6
+ langcodes==3.5.0
7
+ nvidia-cublas-cu12==12.4.5.8
8
+ airportsdata==20250706
9
+ absl-py==2.3.1
10
+ hf-xet==1.1.5
11
+ opentelemetry-exporter-otlp-proto-http==1.26.0
12
+ interegular==0.3.3
13
+ tifffile==2025.5.10
14
+ nvidia-cufile-cu12==1.11.1.6
15
+ nltk==3.9.1
16
+ tokenizers==0.21.2
17
+ salesforce-lavis==1.0.2
18
+ tzdata==2025.2
19
+ prometheus_client==0.22.1
20
+ google-auth==2.40.3
21
+ ipython==8.37.0
22
+ pydantic==2.11.7
23
+ mathruler==0.1.0
24
+ six==1.17.0
25
+ python-dateutil==2.9.0.post0
26
+ requests==2.32.4
27
+ mistral_common==1.8.0
28
+ huggingface-hub==0.33.4
29
+ preshed==3.0.10
30
+ torchmetrics==1.7.4
31
+ blinker==1.9.0
32
+ nvidia-cusparse-cu12==12.3.1.170
33
+ rich-toolkit==0.14.8
34
+ pytz==2025.2
35
+ pandas==2.3.1
36
+ packaging==25.0
37
+ async-timeout==5.0.1
38
+ diskcache==5.6.3
39
+ google-api-core==2.25.1
40
+ parso==0.8.4
41
+ joblib==1.5.1
42
+ pycountry==24.6.1
43
+ triton==3.2.0
44
+ pybase64==1.4.1
45
+ marisa-trie==1.2.1
46
+ plotly==6.2.0
47
+ wandb==0.21.0
48
+ PyYAML==6.0.2
49
+ regex==2024.11.6
50
+ idna==3.10
51
+ numba==0.61.2
52
+ nvidia-curand-cu12==10.3.5.147
53
+ uvicorn==0.35.0
54
+ srsly==2.5.1
55
+ confection==0.1.5
56
+ opentelemetry-semantic-conventions-ai==0.4.11
57
+ typing-inspection==0.4.1
58
+ opencv-python-headless==4.12.0.88
59
+ pyasn1==0.6.1
60
+ av==15.0.0
61
+ xgrammar==0.1.18
62
+ distlib==0.3.9
63
+ datasets==4.0.0
64
+ networkx==3.4.2
65
+ prometheus-fastapi-instrumentator==7.1.0
66
+ lightning-utilities==0.14.3
67
+ executing==2.2.0
68
+ pycocoevalcap==1.2
69
+ h11==0.16.0
70
+ certifi==2025.7.14
71
+ sniffio==1.3.1
72
+ wheel==0.45.1
73
+ transformers==4.52.4
74
+ wrapt==1.17.2
75
+ jsonschema-specifications==2025.4.1
76
+ mpmath==1.3.0
77
+ msgspec==0.19.0
78
+ py-cpuinfo==9.0.0
79
+ contexttimer==0.3.3
80
+ watchdog==6.0.0
81
+ pexpect==4.9.0
82
+ webencodings==0.5.1
83
+ verl==0.3.2.dev0
84
+ webdataset==1.0.2
85
+ httpcore==1.0.9
86
+ opentelemetry-exporter-otlp==1.26.0
87
+ lm-format-enforcer==0.10.11
88
+ googleapis-common-protos==1.70.0
89
+ pyzmq==27.0.0
90
+ fsspec==2025.3.0
91
+ grpcio==1.73.1
92
+ cymem==2.0.11
93
+ timm==0.4.12
94
+ zipp==3.23.0
95
+ llguidance==0.7.30
96
+ opencensus-context==0.1.3
97
+ omegaconf==2.3.0
98
+ python-json-logger==3.3.0
99
+ opentelemetry-exporter-otlp-proto-common==1.26.0
100
+ watchfiles==1.1.0
101
+ nvidia-nvjitlink-cu12==12.4.127
102
+ peft==0.16.0
103
+ sentry-sdk==2.32.0
104
+ rpds-py==0.26.0
105
+ email_validator==2.2.0
106
+ nodeenv==1.9.1
107
+ distro==1.9.0
108
+ jiter==0.10.0
109
+ compressed-tensors==0.9.3
110
+ annotated-types==0.7.0
111
+ matplotlib-inline==0.1.7
112
+ rich==14.0.0
113
+ GitPython==3.1.44
114
+ lazy_loader==0.4
115
+ fastapi-cloud-cli==0.1.4
116
+ cupy-cuda12x==13.5.1
117
+ prompt_toolkit==3.0.51
118
+ gguf==0.17.1
119
+ blis==1.3.0
120
+ thinc==8.3.6
121
+ cloudpickle==3.1.1
122
+ multidict==6.6.3
123
+ nvidia-nvtx-cu12==12.4.127
124
+ flash-attn==2.7.1.post1
125
+ pyasn1_modules==0.4.2
126
+ rsa==4.9.1
127
+ weasel==0.4.1
128
+ uvloop==0.21.0
129
+ click==8.2.1
130
+ numpy==2.2.6
131
+ torchdata==0.11.0
132
+ pylatexenc==2.10
133
+ cachetools==5.5.2
134
+ Jinja2==3.1.6
135
+ typer==0.16.0
136
+ nvidia-cudnn-cu12==9.1.0.70
137
+ fastapi-cli==0.0.8
138
+ xxhash==3.5.0
139
+ tornado==6.5.1
140
+ scipy==1.15.3
141
+ rouge_score==0.1.2
142
+ cloudpathlib==0.21.1
143
+ streamlit==1.46.1
144
+ jedi==0.19.2
145
+ referencing==0.36.2
146
+ accelerate==1.8.1
147
+ decord==0.6.0
148
+ setuptools==78.1.1
149
+ mdurl==0.1.2
150
+ vllm==0.8.5.post1
151
+ identify==2.6.12
152
+ python-slugify==8.0.4
153
+ dnspython==2.7.0
154
+ dill==0.3.8
155
+ opentelemetry-proto==1.26.0
156
+ orjson==3.10.18
157
+ msgpack==1.1.1
158
+ aiohttp==3.12.14
159
+ aiosignal==1.4.0
160
+ typing_extensions==4.14.1
161
+ tiktoken==0.9.0
162
+ catalogue==2.0.10
163
+ platformdirs==4.3.8
164
+ narwhals==1.47.0
165
+ antlr4-python3-runtime==4.9.3
166
+ pydantic-extra-types==2.10.5
167
+ nvidia-cusolver-cu12==11.6.1.9
168
+ kaggle==1.7.4.5
169
+ propcache==0.3.2
170
+ urllib3==2.5.0
171
+ opentelemetry-exporter-otlp-proto-grpc==1.26.0
172
+ pydeck==0.9.1
173
+ nvidia-cufft-cu12==11.2.1.3
174
+ pyarrow==20.0.0
175
+ nvidia-nccl-cu12==2.21.5
176
+ httptools==0.6.4
177
+ qwen-vl-utils==0.0.11
178
+ markdown-it-py==3.0.0
179
+ gitdb==4.0.12
180
+ altair==5.5.0
181
+ torchvision==0.21.0
182
+ python-magic==0.4.27
183
+ iopath==0.1.10
184
+ ray==2.47.1
185
+ blake3==1.0.5
186
+ pillow==11.3.0
187
+ python-dotenv==1.1.1
188
+ torchaudio==2.6.0
189
+ partial-json-parser==0.2.1.1.post6
190
+ httpx==0.28.1
191
+ torch==2.6.0
192
+ anyio==4.9.0
193
+ fairscale==0.4.4
194
+ traitlets==5.14.3
195
+ pure_eval==0.2.3
196
+ sympy==1.13.1
197
+ nvidia-cusparselt-cu12==0.6.2
198
+ jsonschema==4.24.0
199
+ imageio==2.37.0
200
+ opencensus==0.11.4
201
+ stack-data==0.6.3
202
+ shellingham==1.5.4
203
+ tensordict==0.9.1
204
+ nvidia-cuda-runtime-cu12==12.4.127
205
+ nest-asyncio==1.6.0
206
+ einops==0.8.1
207
+ lark==1.2.2
208
+ tenacity==9.1.2
209
+ virtualenv==20.31.2
210
+ ptyprocess==0.7.0
211
+ outlines==0.1.11
212
+ depyf==0.18.0
213
+ starlette==0.47.1
214
+ cfgv==3.4.0
215
+ pre_commit==4.2.0
216
+ language_data==1.3.0
217
+ pip==25.1
218
+ Pygments==2.19.2
219
+ nvidia-cuda-cupti-cu12==12.4.127
220
+ protobuf==4.25.8
221
+ safetensors==0.5.3
222
+ text-unidecode==1.3
223
+ wcwidth==0.2.13
224
+ charset-normalizer==3.4.2
225
+ aiohappyeyeballs==2.6.1
226
+ outlines_core==0.1.26
227
+ fastrlock==0.8.3
228
+ asttokens==3.0.0
229
+ psutil==7.0.0
230
+ smmap==5.0.2
231
+ exceptiongroup==1.3.0
232
+ murmurhash==1.0.13
233
+ pytorch-lightning==2.5.2
234
+ filelock==3.18.0
235
+ astor==0.8.1
236
+ py-spy==0.4.0
237
+ pydantic_core==2.33.2
238
+ colorful==0.5.7
239
+ fastapi==0.116.1
240
+ opentelemetry-api==1.26.0
241
+ openai==1.90.0
242
+ ninja==1.11.1.4
243
+ opentelemetry-semantic-conventions==0.47b0
244
+ spacy-legacy==3.0.12
245
+ opendatasets==0.1.22
246
+ Deprecated==1.2.18
247
+ proto-plus==1.26.1
248
+ rignore==0.6.2
249
+ aiohttp-cors==0.8.1
250
+ liger_kernel==0.6.0
251
+ opentelemetry-exporter-prometheus==0.56b0
252
+ python-multipart==0.0.20
253
+ multiprocess==0.70.16
254
+ opentelemetry-sdk==1.26.0
255
+ decorator==5.2.1
256
+ xformers==0.0.29.post2
257
+ spacy==3.8.7
258
+ pyvers==0.1.0
259
+ pycocotools==2.0.10
260
+ websockets==15.0.1
261
+ wasabi==1.1.3
262
+ frozenlist==1.7.0
263
+ codetiming==1.4.0
264
+ sentencepiece==0.2.0
265
+ toml==0.10.2
266
+ scikit-image==0.25.2
267
+ ftfy==6.3.1
268
+ bleach==6.2.0
269
+ yarl==1.20.1
270
+ nvidia-cuda-nvrtc-cu12==12.4.127
271
+ importlib_metadata==8.0.0
272
+ spacy-loggers==1.0.5
273
+ smart_open==7.3.0.post1
274
+ portalocker==3.2.0
275
+ llvmlite==0.44.0
276
+ MarkupSafe==3.0.2
277
+ braceexpand==0.1.7
278
+ modelscope==1.28.0
279
+ verl==0.3.2.dev0
280
+ jaraco.context==5.3.0
281
+ more-itertools==10.3.0
282
+ jaraco.functools==4.0.1
283
+ jaraco.text==3.12.1
284
+ platformdirs==4.2.2
285
+ packaging==24.2
286
+ wheel==0.45.1
287
+ zipp==3.19.2
288
+ inflect==7.3.1
289
+ autocommand==2.2.2
290
+ typeguard==4.3.0
291
+ jaraco.collections==5.1.0
292
+ backports.tarfile==1.2.0
293
+ tomli==2.0.1
294
+ importlib_metadata==8.0.0
295
+ typing_extensions==4.12.2
EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/files/wandb-metadata.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.0",
4
+ "startedAt": "2025-07-18T10:01:27.794840Z",
5
+ "args": [
6
+ "--node-ip-address=10.1.5.237",
7
+ "--node-manager-port=34033",
8
+ "--object-store-name=/tmp/ray/session_2025-07-18_17-59-46_929054_90432/sockets/plasma_store",
9
+ "--raylet-name=/tmp/ray/session_2025-07-18_17-59-46_929054_90432/sockets/raylet",
10
+ "--redis-address=None",
11
+ "--metrics-agent-port=52220",
12
+ "--logging-rotate-bytes=536870912",
13
+ "--logging-rotate-backup-count=5",
14
+ "--runtime-env-agent-port=58307",
15
+ "--gcs-address=10.1.5.237:63437",
16
+ "--session-name=session_2025-07-18_17-59-46_929054_90432",
17
+ "--temp-dir=/tmp/ray",
18
+ "--webui=127.0.0.1:8265",
19
+ "--cluster-id=2320bfb132f181fae6a438fbb8ba4302101825636e86b29ea49d2a26",
20
+ "--startup-token=64",
21
+ "--worker-launch-time-ms=1752832790343",
22
+ "--node-id=d351a5bfa85748ebf678bc24e7adda6ad59e09972b13108dbb01547f",
23
+ "--runtime-env-hash=-115784934",
24
+ "--enable-resource-isolation=false"
25
+ ],
26
+ "program": "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
27
+ "git": {
28
+ "remote": "https://github.com/PorUna-byte/PAR.git",
29
+ "commit": "b8caf406aa1699c788f0ca6e44a1769452c317db"
30
+ },
31
+ "email": "gia0603yucca@gmail.com",
32
+ "root": "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
33
+ "host": "dsw-266702-dc4b748ff-f7c66",
34
+ "executable": "/root/miniconda3/envs/easyr1-new/bin/python3",
35
+ "writerId": "qpm36h9mjv3m2bmimjfqh0pw0u9a4282"
36
+ }
EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/logs/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-07-18T18:01:28.970283308+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
2
+ {"time":"2025-07-18T18:01:36.389685825+08:00","level":"INFO","msg":"stream: created new stream","id":"wmarwr6l"}
3
+ {"time":"2025-07-18T18:01:36.413332423+08:00","level":"INFO","msg":"handler: started","stream_id":"wmarwr6l"}
4
+ {"time":"2025-07-18T18:01:36.413371741+08:00","level":"INFO","msg":"stream: started","id":"wmarwr6l"}
5
+ {"time":"2025-07-18T18:01:36.413392401+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"wmarwr6l"}
6
+ {"time":"2025-07-18T18:01:36.413389742+08:00","level":"INFO","msg":"sender: started","stream_id":"wmarwr6l"}
EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/logs/debug.log ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-07-18 18:01:28,690 INFO MainThread:95226 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
2
+ 2025-07-18 18:01:28,690 INFO MainThread:95226 [wandb_setup.py:_flush():80] Configure stats pid to 95226
3
+ 2025-07-18 18:01:28,690 INFO MainThread:95226 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
4
+ 2025-07-18 18:01:28,690 INFO MainThread:95226 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
5
+ 2025-07-18 18:01:28,690 INFO MainThread:95226 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-07-18 18:01:28,690 INFO MainThread:95226 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/logs/debug.log
7
+ 2025-07-18 18:01:28,690 INFO MainThread:95226 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/logs/debug-internal.log
8
+ 2025-07-18 18:01:28,690 INFO MainThread:95226 [wandb_init.py:init():830] calling init triggers
9
+ 2025-07-18 18:01:28,690 INFO MainThread:95226 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
10
+ config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
11
+ 2025-07-18 18:01:28,690 INFO MainThread:95226 [wandb_init.py:init():871] starting backend
12
+ 2025-07-18 18:01:28,902 INFO MainThread:95226 [wandb_init.py:init():874] sending inform_init request
13
+ 2025-07-18 18:01:28,904 INFO MainThread:95226 [wandb_init.py:init():882] backend started and connected
14
+ 2025-07-18 18:01:28,909 INFO MainThread:95226 [wandb_init.py:init():953] updated telemetry
15
+ 2025-07-18 18:01:29,464 INFO MainThread:95226 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
16
+ 2025-07-18 18:01:40,777 INFO MainThread:95226 [wandb_init.py:init():1029] starting run threads in backend
17
+ 2025-07-18 18:01:41,069 INFO MainThread:95226 [wandb_run.py:_console_start():2458] atexit reg
18
+ 2025-07-18 18:01:41,069 INFO MainThread:95226 [wandb_run.py:_redirect():2306] redirect: wrap_raw
19
+ 2025-07-18 18:01:41,079 INFO MainThread:95226 [wandb_run.py:_redirect():2375] Wrapping output streams.
20
+ 2025-07-18 18:01:41,079 INFO MainThread:95226 [wandb_run.py:_redirect():2398] Redirects installed.
21
+ 2025-07-18 18:01:41,125 INFO MainThread:95226 [wandb_init.py:init():1075] run started, returning control to user process
EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/run-wmarwr6l.wandb ADDED
File without changes
EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/files/output.log ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ {"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.5.237", "pid": 104882, "uuid": "0f066e81b2fc4d09a338174f40c2e400", "closed": false}
2
+ Start validation...
EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/files/requirements.txt ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ setproctitle==1.2.2
2
+ colorama==0.4.6
3
+ psutil==7.0.0
4
+ attrs==25.3.0
5
+ tqdm==4.67.1
6
+ langcodes==3.5.0
7
+ nvidia-cublas-cu12==12.4.5.8
8
+ airportsdata==20250706
9
+ absl-py==2.3.1
10
+ hf-xet==1.1.5
11
+ opentelemetry-exporter-otlp-proto-http==1.26.0
12
+ interegular==0.3.3
13
+ tifffile==2025.5.10
14
+ nvidia-cufile-cu12==1.11.1.6
15
+ nltk==3.9.1
16
+ tokenizers==0.21.2
17
+ salesforce-lavis==1.0.2
18
+ tzdata==2025.2
19
+ prometheus_client==0.22.1
20
+ google-auth==2.40.3
21
+ ipython==8.37.0
22
+ pydantic==2.11.7
23
+ mathruler==0.1.0
24
+ six==1.17.0
25
+ python-dateutil==2.9.0.post0
26
+ requests==2.32.4
27
+ mistral_common==1.8.0
28
+ huggingface-hub==0.33.4
29
+ preshed==3.0.10
30
+ torchmetrics==1.7.4
31
+ blinker==1.9.0
32
+ nvidia-cusparse-cu12==12.3.1.170
33
+ rich-toolkit==0.14.8
34
+ pytz==2025.2
35
+ pandas==2.3.1
36
+ packaging==25.0
37
+ async-timeout==5.0.1
38
+ diskcache==5.6.3
39
+ google-api-core==2.25.1
40
+ parso==0.8.4
41
+ joblib==1.5.1
42
+ pycountry==24.6.1
43
+ triton==3.2.0
44
+ pybase64==1.4.1
45
+ marisa-trie==1.2.1
46
+ plotly==6.2.0
47
+ wandb==0.21.0
48
+ PyYAML==6.0.2
49
+ regex==2024.11.6
50
+ idna==3.10
51
+ numba==0.61.2
52
+ nvidia-curand-cu12==10.3.5.147
53
+ uvicorn==0.35.0
54
+ srsly==2.5.1
55
+ confection==0.1.5
56
+ opentelemetry-semantic-conventions-ai==0.4.11
57
+ typing-inspection==0.4.1
58
+ opencv-python-headless==4.12.0.88
59
+ pyasn1==0.6.1
60
+ av==15.0.0
61
+ xgrammar==0.1.18
62
+ distlib==0.3.9
63
+ datasets==4.0.0
64
+ networkx==3.4.2
65
+ prometheus-fastapi-instrumentator==7.1.0
66
+ lightning-utilities==0.14.3
67
+ executing==2.2.0
68
+ pycocoevalcap==1.2
69
+ h11==0.16.0
70
+ certifi==2025.7.14
71
+ sniffio==1.3.1
72
+ wheel==0.45.1
73
+ transformers==4.52.4
74
+ wrapt==1.17.2
75
+ jsonschema-specifications==2025.4.1
76
+ mpmath==1.3.0
77
+ msgspec==0.19.0
78
+ py-cpuinfo==9.0.0
79
+ contexttimer==0.3.3
80
+ watchdog==6.0.0
81
+ pexpect==4.9.0
82
+ webencodings==0.5.1
83
+ verl==0.3.2.dev0
84
+ webdataset==1.0.2
85
+ httpcore==1.0.9
86
+ opentelemetry-exporter-otlp==1.26.0
87
+ lm-format-enforcer==0.10.11
88
+ googleapis-common-protos==1.70.0
89
+ pyzmq==27.0.0
90
+ fsspec==2025.3.0
91
+ grpcio==1.73.1
92
+ cymem==2.0.11
93
+ timm==0.4.12
94
+ zipp==3.23.0
95
+ llguidance==0.7.30
96
+ opencensus-context==0.1.3
97
+ omegaconf==2.3.0
98
+ python-json-logger==3.3.0
99
+ opentelemetry-exporter-otlp-proto-common==1.26.0
100
+ watchfiles==1.1.0
101
+ nvidia-nvjitlink-cu12==12.4.127
102
+ peft==0.16.0
103
+ sentry-sdk==2.32.0
104
+ rpds-py==0.26.0
105
+ email_validator==2.2.0
106
+ nodeenv==1.9.1
107
+ distro==1.9.0
108
+ jiter==0.10.0
109
+ compressed-tensors==0.9.3
110
+ annotated-types==0.7.0
111
+ matplotlib-inline==0.1.7
112
+ rich==14.0.0
113
+ GitPython==3.1.44
114
+ lazy_loader==0.4
115
+ fastapi-cloud-cli==0.1.4
116
+ cupy-cuda12x==13.5.1
117
+ prompt_toolkit==3.0.51
118
+ gguf==0.17.1
119
+ blis==1.3.0
120
+ thinc==8.3.6
121
+ cloudpickle==3.1.1
122
+ multidict==6.6.3
123
+ nvidia-nvtx-cu12==12.4.127
124
+ flash-attn==2.7.1.post1
125
+ pyasn1_modules==0.4.2
126
+ rsa==4.9.1
127
+ weasel==0.4.1
128
+ uvloop==0.21.0
129
+ click==8.2.1
130
+ numpy==2.2.6
131
+ torchdata==0.11.0
132
+ pylatexenc==2.10
133
+ cachetools==5.5.2
134
+ Jinja2==3.1.6
135
+ typer==0.16.0
136
+ nvidia-cudnn-cu12==9.1.0.70
137
+ fastapi-cli==0.0.8
138
+ xxhash==3.5.0
139
+ tornado==6.5.1
140
+ scipy==1.15.3
141
+ rouge_score==0.1.2
142
+ cloudpathlib==0.21.1
143
+ streamlit==1.46.1
144
+ jedi==0.19.2
145
+ referencing==0.36.2
146
+ accelerate==1.8.1
147
+ decord==0.6.0
148
+ setuptools==78.1.1
149
+ mdurl==0.1.2
150
+ vllm==0.8.5.post1
151
+ identify==2.6.12
152
+ python-slugify==8.0.4
153
+ dnspython==2.7.0
154
+ dill==0.3.8
155
+ opentelemetry-proto==1.26.0
156
+ orjson==3.10.18
157
+ msgpack==1.1.1
158
+ aiohttp==3.12.14
159
+ aiosignal==1.4.0
160
+ typing_extensions==4.14.1
161
+ tiktoken==0.9.0
162
+ catalogue==2.0.10
163
+ platformdirs==4.3.8
164
+ narwhals==1.47.0
165
+ antlr4-python3-runtime==4.9.3
166
+ pydantic-extra-types==2.10.5
167
+ nvidia-cusolver-cu12==11.6.1.9
168
+ kaggle==1.7.4.5
169
+ propcache==0.3.2
170
+ urllib3==2.5.0
171
+ opentelemetry-exporter-otlp-proto-grpc==1.26.0
172
+ pydeck==0.9.1
173
+ nvidia-cufft-cu12==11.2.1.3
174
+ pyarrow==20.0.0
175
+ nvidia-nccl-cu12==2.21.5
176
+ httptools==0.6.4
177
+ qwen-vl-utils==0.0.11
178
+ markdown-it-py==3.0.0
179
+ gitdb==4.0.12
180
+ altair==5.5.0
181
+ torchvision==0.21.0
182
+ python-magic==0.4.27
183
+ iopath==0.1.10
184
+ ray==2.47.1
185
+ blake3==1.0.5
186
+ pillow==11.3.0
187
+ python-dotenv==1.1.1
188
+ torchaudio==2.6.0
189
+ partial-json-parser==0.2.1.1.post6
190
+ httpx==0.28.1
191
+ torch==2.6.0
192
+ anyio==4.9.0
193
+ fairscale==0.4.4
194
+ traitlets==5.14.3
195
+ pure_eval==0.2.3
196
+ sympy==1.13.1
197
+ nvidia-cusparselt-cu12==0.6.2
198
+ jsonschema==4.24.0
199
+ imageio==2.37.0
200
+ opencensus==0.11.4
201
+ stack-data==0.6.3
202
+ shellingham==1.5.4
203
+ tensordict==0.9.1
204
+ nvidia-cuda-runtime-cu12==12.4.127
205
+ nest-asyncio==1.6.0
206
+ einops==0.8.1
207
+ lark==1.2.2
208
+ tenacity==9.1.2
209
+ virtualenv==20.31.2
210
+ ptyprocess==0.7.0
211
+ outlines==0.1.11
212
+ depyf==0.18.0
213
+ starlette==0.47.1
214
+ cfgv==3.4.0
215
+ pre_commit==4.2.0
216
+ language_data==1.3.0
217
+ pip==25.1
218
+ Pygments==2.19.2
219
+ nvidia-cuda-cupti-cu12==12.4.127
220
+ protobuf==4.25.8
221
+ safetensors==0.5.3
222
+ text-unidecode==1.3
223
+ wcwidth==0.2.13
224
+ charset-normalizer==3.4.2
225
+ aiohappyeyeballs==2.6.1
226
+ outlines_core==0.1.26
227
+ fastrlock==0.8.3
228
+ asttokens==3.0.0
229
+ psutil==7.0.0
230
+ smmap==5.0.2
231
+ exceptiongroup==1.3.0
232
+ murmurhash==1.0.13
233
+ pytorch-lightning==2.5.2
234
+ filelock==3.18.0
235
+ astor==0.8.1
236
+ py-spy==0.4.0
237
+ pydantic_core==2.33.2
238
+ colorful==0.5.7
239
+ fastapi==0.116.1
240
+ opentelemetry-api==1.26.0
241
+ openai==1.90.0
242
+ ninja==1.11.1.4
243
+ opentelemetry-semantic-conventions==0.47b0
244
+ spacy-legacy==3.0.12
245
+ opendatasets==0.1.22
246
+ Deprecated==1.2.18
247
+ proto-plus==1.26.1
248
+ rignore==0.6.2
249
+ aiohttp-cors==0.8.1
250
+ liger_kernel==0.6.0
251
+ opentelemetry-exporter-prometheus==0.56b0
252
+ python-multipart==0.0.20
253
+ multiprocess==0.70.16
254
+ opentelemetry-sdk==1.26.0
255
+ decorator==5.2.1
256
+ xformers==0.0.29.post2
257
+ spacy==3.8.7
258
+ pyvers==0.1.0
259
+ pycocotools==2.0.10
260
+ websockets==15.0.1
261
+ wasabi==1.1.3
262
+ frozenlist==1.7.0
263
+ codetiming==1.4.0
264
+ sentencepiece==0.2.0
265
+ toml==0.10.2
266
+ scikit-image==0.25.2
267
+ ftfy==6.3.1
268
+ bleach==6.2.0
269
+ yarl==1.20.1
270
+ nvidia-cuda-nvrtc-cu12==12.4.127
271
+ importlib_metadata==8.0.0
272
+ spacy-loggers==1.0.5
273
+ smart_open==7.3.0.post1
274
+ portalocker==3.2.0
275
+ llvmlite==0.44.0
276
+ MarkupSafe==3.0.2
277
+ braceexpand==0.1.7
278
+ modelscope==1.28.0
279
+ verl==0.3.2.dev0
280
+ jaraco.context==5.3.0
281
+ more-itertools==10.3.0
282
+ jaraco.functools==4.0.1
283
+ jaraco.text==3.12.1
284
+ platformdirs==4.2.2
285
+ packaging==24.2
286
+ wheel==0.45.1
287
+ zipp==3.19.2
288
+ inflect==7.3.1
289
+ autocommand==2.2.2
290
+ typeguard==4.3.0
291
+ jaraco.collections==5.1.0
292
+ backports.tarfile==1.2.0
293
+ tomli==2.0.1
294
+ importlib_metadata==8.0.0
295
+ typing_extensions==4.12.2
EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/files/wandb-metadata.json ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.0",
4
+ "startedAt": "2025-07-18T10:10:22.154415Z",
5
+ "args": [
6
+ "--node-ip-address=10.1.5.237",
7
+ "--node-manager-port=45779",
8
+ "--object-store-name=/tmp/ray/session_2025-07-18_18-08-41_995857_100101/sockets/plasma_store",
9
+ "--raylet-name=/tmp/ray/session_2025-07-18_18-08-41_995857_100101/sockets/raylet",
10
+ "--redis-address=None",
11
+ "--metrics-agent-port=60724",
12
+ "--logging-rotate-bytes=536870912",
13
+ "--logging-rotate-backup-count=5",
14
+ "--runtime-env-agent-port=59748",
15
+ "--gcs-address=10.1.5.237:65420",
16
+ "--session-name=session_2025-07-18_18-08-41_995857_100101",
17
+ "--temp-dir=/tmp/ray",
18
+ "--webui=127.0.0.1:8265",
19
+ "--cluster-id=d5af14f82d6884b3972b319ba9c8871ee6d621d0b395536182e77073",
20
+ "--startup-token=64",
21
+ "--worker-launch-time-ms=1752833324419",
22
+ "--node-id=affe3b277e8d66adb6a1a72266e2e5ce24fa5e48471c99f30a7a9bdf",
23
+ "--runtime-env-hash=-115784934",
24
+ "--enable-resource-isolation=false"
25
+ ],
26
+ "program": "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
27
+ "git": {
28
+ "remote": "https://github.com/PorUna-byte/PAR.git",
29
+ "commit": "b8caf406aa1699c788f0ca6e44a1769452c317db"
30
+ },
31
+ "email": "gia0603yucca@gmail.com",
32
+ "root": "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
33
+ "host": "dsw-266702-dc4b748ff-f7c66",
34
+ "executable": "/root/miniconda3/envs/easyr1-new/bin/python3",
35
+ "cpu_count": 64,
36
+ "cpu_count_logical": 64,
37
+ "gpu": "NVIDIA A800-SXM4-80GB",
38
+ "gpu_count": 8,
39
+ "disk": {
40
+ "/": {
41
+ "total": "1623302262784",
42
+ "used": "1225166848"
43
+ }
44
+ },
45
+ "memory": {
46
+ "total": "549755813888"
47
+ },
48
+ "gpu_nvidia": [
49
+ {
50
+ "name": "NVIDIA A800-SXM4-80GB",
51
+ "architecture": "Ampere",
52
+ "uuid": "GPU-f7e858cd-ae03-031d-b834-86bf87923211"
53
+ },
54
+ {
55
+ "name": "NVIDIA A800-SXM4-80GB",
56
+ "architecture": "Ampere",
57
+ "uuid": "GPU-82fabc17-3326-51dd-9fb7-cbe82d26b6a0"
58
+ },
59
+ {
60
+ "name": "NVIDIA A800-SXM4-80GB",
61
+ "architecture": "Ampere",
62
+ "uuid": "GPU-1bba2921-208c-d0ad-1a05-25fc85d62630"
63
+ },
64
+ {
65
+ "name": "NVIDIA A800-SXM4-80GB",
66
+ "architecture": "Ampere",
67
+ "uuid": "GPU-9537a8bd-f536-20e2-b766-25a3c4bd5a71"
68
+ },
69
+ {
70
+ "name": "NVIDIA A800-SXM4-80GB",
71
+ "architecture": "Ampere",
72
+ "uuid": "GPU-482091e7-cb8c-dacb-7d0c-a78a3b795f0b"
73
+ },
74
+ {
75
+ "name": "NVIDIA A800-SXM4-80GB",
76
+ "architecture": "Ampere",
77
+ "uuid": "GPU-becb8d59-2ab7-b50d-5770-183c6478747a"
78
+ },
79
+ {
80
+ "name": "NVIDIA A800-SXM4-80GB",
81
+ "architecture": "Ampere",
82
+ "uuid": "GPU-3f0f2b1d-d6be-b6ad-b4ad-0ec841b91c40"
83
+ },
84
+ {
85
+ "name": "NVIDIA A800-SXM4-80GB",
86
+ "architecture": "Ampere",
87
+ "uuid": "GPU-e53376dd-84ff-523c-a8c7-7cc7f7a18655"
88
+ }
89
+ ],
90
+ "cudaVersion": "12.1",
91
+ "writerId": "71sc2v9oxtkr7yiqxoaago0bipl2xjby"
92
+ }
EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/logs/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-07-18T18:10:23.417471358+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
2
+ {"time":"2025-07-18T18:10:25.159462304+08:00","level":"INFO","msg":"stream: created new stream","id":"zkytrm61"}
3
+ {"time":"2025-07-18T18:10:25.159507377+08:00","level":"INFO","msg":"stream: started","id":"zkytrm61"}
4
+ {"time":"2025-07-18T18:10:25.159528642+08:00","level":"INFO","msg":"handler: started","stream_id":"zkytrm61"}
5
+ {"time":"2025-07-18T18:10:25.15958268+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"zkytrm61"}
6
+ {"time":"2025-07-18T18:10:25.159587635+08:00","level":"INFO","msg":"sender: started","stream_id":"zkytrm61"}
EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/logs/debug.log ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-07-18 18:10:23,122 INFO MainThread:104882 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
2
+ 2025-07-18 18:10:23,123 INFO MainThread:104882 [wandb_setup.py:_flush():80] Configure stats pid to 104882
3
+ 2025-07-18 18:10:23,123 INFO MainThread:104882 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
4
+ 2025-07-18 18:10:23,123 INFO MainThread:104882 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
5
+ 2025-07-18 18:10:23,123 INFO MainThread:104882 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-07-18 18:10:23,123 INFO MainThread:104882 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/logs/debug.log
7
+ 2025-07-18 18:10:23,123 INFO MainThread:104882 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/logs/debug-internal.log
8
+ 2025-07-18 18:10:23,123 INFO MainThread:104882 [wandb_init.py:init():830] calling init triggers
9
+ 2025-07-18 18:10:23,123 INFO MainThread:104882 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
10
+ config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
11
+ 2025-07-18 18:10:23,123 INFO MainThread:104882 [wandb_init.py:init():871] starting backend
12
+ 2025-07-18 18:10:23,360 INFO MainThread:104882 [wandb_init.py:init():874] sending inform_init request
13
+ 2025-07-18 18:10:23,362 INFO MainThread:104882 [wandb_init.py:init():882] backend started and connected
14
+ 2025-07-18 18:10:23,390 INFO MainThread:104882 [wandb_init.py:init():953] updated telemetry
15
+ 2025-07-18 18:10:23,939 INFO MainThread:104882 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
16
+ 2025-07-18 18:10:26,092 INFO MainThread:104882 [wandb_init.py:init():1029] starting run threads in backend
17
+ 2025-07-18 18:10:26,299 INFO MainThread:104882 [wandb_run.py:_console_start():2458] atexit reg
18
+ 2025-07-18 18:10:26,299 INFO MainThread:104882 [wandb_run.py:_redirect():2306] redirect: wrap_raw
19
+ 2025-07-18 18:10:26,308 INFO MainThread:104882 [wandb_run.py:_redirect():2375] Wrapping output streams.
20
+ 2025-07-18 18:10:26,319 INFO MainThread:104882 [wandb_run.py:_redirect():2398] Redirects installed.
21
+ 2025-07-18 18:10:26,355 INFO MainThread:104882 [wandb_init.py:init():1075] run started, returning control to user process
EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/run-zkytrm61.wandb ADDED
File without changes
EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/files/output.log ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ {"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.5.237", "pid": 160623, "uuid": "34b2c74ee0024065b840369ef674694c", "closed": false}
2
+ Start validation...
EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/files/requirements.txt ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ setproctitle==1.2.2
2
+ colorama==0.4.6
3
+ psutil==7.0.0
4
+ attrs==25.3.0
5
+ tqdm==4.67.1
6
+ langcodes==3.5.0
7
+ nvidia-cublas-cu12==12.4.5.8
8
+ airportsdata==20250706
9
+ absl-py==2.3.1
10
+ hf-xet==1.1.5
11
+ opentelemetry-exporter-otlp-proto-http==1.26.0
12
+ interegular==0.3.3
13
+ tifffile==2025.5.10
14
+ nvidia-cufile-cu12==1.11.1.6
15
+ nltk==3.9.1
16
+ tokenizers==0.21.2
17
+ salesforce-lavis==1.0.2
18
+ tzdata==2025.2
19
+ prometheus_client==0.22.1
20
+ google-auth==2.40.3
21
+ ipython==8.37.0
22
+ pydantic==2.11.7
23
+ mathruler==0.1.0
24
+ six==1.17.0
25
+ python-dateutil==2.9.0.post0
26
+ requests==2.32.4
27
+ mistral_common==1.8.0
28
+ huggingface-hub==0.33.4
29
+ preshed==3.0.10
30
+ torchmetrics==1.7.4
31
+ blinker==1.9.0
32
+ nvidia-cusparse-cu12==12.3.1.170
33
+ rich-toolkit==0.14.8
34
+ pytz==2025.2
35
+ pandas==2.3.1
36
+ packaging==25.0
37
+ async-timeout==5.0.1
38
+ diskcache==5.6.3
39
+ google-api-core==2.25.1
40
+ parso==0.8.4
41
+ joblib==1.5.1
42
+ pycountry==24.6.1
43
+ triton==3.2.0
44
+ pybase64==1.4.1
45
+ marisa-trie==1.2.1
46
+ plotly==6.2.0
47
+ wandb==0.21.0
48
+ PyYAML==6.0.2
49
+ regex==2024.11.6
50
+ idna==3.10
51
+ numba==0.61.2
52
+ nvidia-curand-cu12==10.3.5.147
53
+ uvicorn==0.35.0
54
+ srsly==2.5.1
55
+ confection==0.1.5
56
+ opentelemetry-semantic-conventions-ai==0.4.11
57
+ typing-inspection==0.4.1
58
+ opencv-python-headless==4.12.0.88
59
+ pyasn1==0.6.1
60
+ av==15.0.0
61
+ xgrammar==0.1.18
62
+ distlib==0.3.9
63
+ datasets==4.0.0
64
+ networkx==3.4.2
65
+ prometheus-fastapi-instrumentator==7.1.0
66
+ lightning-utilities==0.14.3
67
+ executing==2.2.0
68
+ pycocoevalcap==1.2
69
+ h11==0.16.0
70
+ certifi==2025.7.14
71
+ sniffio==1.3.1
72
+ wheel==0.45.1
73
+ transformers==4.52.4
74
+ wrapt==1.17.2
75
+ jsonschema-specifications==2025.4.1
76
+ mpmath==1.3.0
77
+ msgspec==0.19.0
78
+ py-cpuinfo==9.0.0
79
+ contexttimer==0.3.3
80
+ watchdog==6.0.0
81
+ pexpect==4.9.0
82
+ webencodings==0.5.1
83
+ verl==0.3.2.dev0
84
+ webdataset==1.0.2
85
+ httpcore==1.0.9
86
+ opentelemetry-exporter-otlp==1.26.0
87
+ lm-format-enforcer==0.10.11
88
+ googleapis-common-protos==1.70.0
89
+ pyzmq==27.0.0
90
+ fsspec==2025.3.0
91
+ grpcio==1.73.1
92
+ cymem==2.0.11
93
+ timm==0.4.12
94
+ zipp==3.23.0
95
+ llguidance==0.7.30
96
+ opencensus-context==0.1.3
97
+ omegaconf==2.3.0
98
+ python-json-logger==3.3.0
99
+ opentelemetry-exporter-otlp-proto-common==1.26.0
100
+ watchfiles==1.1.0
101
+ nvidia-nvjitlink-cu12==12.4.127
102
+ peft==0.16.0
103
+ sentry-sdk==2.32.0
104
+ rpds-py==0.26.0
105
+ email_validator==2.2.0
106
+ nodeenv==1.9.1
107
+ distro==1.9.0
108
+ jiter==0.10.0
109
+ compressed-tensors==0.9.3
110
+ annotated-types==0.7.0
111
+ matplotlib-inline==0.1.7
112
+ rich==14.0.0
113
+ GitPython==3.1.44
114
+ lazy_loader==0.4
115
+ fastapi-cloud-cli==0.1.4
116
+ cupy-cuda12x==13.5.1
117
+ prompt_toolkit==3.0.51
118
+ gguf==0.17.1
119
+ blis==1.3.0
120
+ thinc==8.3.6
121
+ cloudpickle==3.1.1
122
+ multidict==6.6.3
123
+ nvidia-nvtx-cu12==12.4.127
124
+ flash-attn==2.7.1.post1
125
+ pyasn1_modules==0.4.2
126
+ rsa==4.9.1
127
+ weasel==0.4.1
128
+ uvloop==0.21.0
129
+ click==8.2.1
130
+ numpy==2.2.6
131
+ torchdata==0.11.0
132
+ pylatexenc==2.10
133
+ cachetools==5.5.2
134
+ Jinja2==3.1.6
135
+ typer==0.16.0
136
+ nvidia-cudnn-cu12==9.1.0.70
137
+ fastapi-cli==0.0.8
138
+ xxhash==3.5.0
139
+ tornado==6.5.1
140
+ scipy==1.15.3
141
+ rouge_score==0.1.2
142
+ cloudpathlib==0.21.1
143
+ streamlit==1.46.1
144
+ jedi==0.19.2
145
+ referencing==0.36.2
146
+ accelerate==1.8.1
147
+ decord==0.6.0
148
+ setuptools==78.1.1
149
+ mdurl==0.1.2
150
+ vllm==0.8.5.post1
151
+ identify==2.6.12
152
+ python-slugify==8.0.4
153
+ dnspython==2.7.0
154
+ dill==0.3.8
155
+ opentelemetry-proto==1.26.0
156
+ orjson==3.10.18
157
+ msgpack==1.1.1
158
+ aiohttp==3.12.14
159
+ aiosignal==1.4.0
160
+ typing_extensions==4.14.1
161
+ tiktoken==0.9.0
162
+ catalogue==2.0.10
163
+ platformdirs==4.3.8
164
+ narwhals==1.47.0
165
+ antlr4-python3-runtime==4.9.3
166
+ pydantic-extra-types==2.10.5
167
+ nvidia-cusolver-cu12==11.6.1.9
168
+ kaggle==1.7.4.5
169
+ propcache==0.3.2
170
+ urllib3==2.5.0
171
+ opentelemetry-exporter-otlp-proto-grpc==1.26.0
172
+ pydeck==0.9.1
173
+ nvidia-cufft-cu12==11.2.1.3
174
+ pyarrow==20.0.0
175
+ nvidia-nccl-cu12==2.21.5
176
+ httptools==0.6.4
177
+ qwen-vl-utils==0.0.11
178
+ markdown-it-py==3.0.0
179
+ gitdb==4.0.12
180
+ altair==5.5.0
181
+ torchvision==0.21.0
182
+ python-magic==0.4.27
183
+ iopath==0.1.10
184
+ ray==2.47.1
185
+ blake3==1.0.5
186
+ pillow==11.3.0
187
+ python-dotenv==1.1.1
188
+ torchaudio==2.6.0
189
+ partial-json-parser==0.2.1.1.post6
190
+ httpx==0.28.1
191
+ torch==2.6.0
192
+ anyio==4.9.0
193
+ fairscale==0.4.4
194
+ traitlets==5.14.3
195
+ pure_eval==0.2.3
196
+ sympy==1.13.1
197
+ nvidia-cusparselt-cu12==0.6.2
198
+ jsonschema==4.24.0
199
+ imageio==2.37.0
200
+ opencensus==0.11.4
201
+ stack-data==0.6.3
202
+ shellingham==1.5.4
203
+ tensordict==0.9.1
204
+ nvidia-cuda-runtime-cu12==12.4.127
205
+ nest-asyncio==1.6.0
206
+ einops==0.8.1
207
+ lark==1.2.2
208
+ tenacity==9.1.2
209
+ virtualenv==20.31.2
210
+ ptyprocess==0.7.0
211
+ outlines==0.1.11
212
+ depyf==0.18.0
213
+ starlette==0.47.1
214
+ cfgv==3.4.0
215
+ pre_commit==4.2.0
216
+ language_data==1.3.0
217
+ pip==25.1
218
+ Pygments==2.19.2
219
+ nvidia-cuda-cupti-cu12==12.4.127
220
+ protobuf==4.25.8
221
+ safetensors==0.5.3
222
+ text-unidecode==1.3
223
+ wcwidth==0.2.13
224
+ charset-normalizer==3.4.2
225
+ aiohappyeyeballs==2.6.1
226
+ outlines_core==0.1.26
227
+ fastrlock==0.8.3
228
+ asttokens==3.0.0
229
+ psutil==7.0.0
230
+ smmap==5.0.2
231
+ exceptiongroup==1.3.0
232
+ murmurhash==1.0.13
233
+ pytorch-lightning==2.5.2
234
+ filelock==3.18.0
235
+ astor==0.8.1
236
+ py-spy==0.4.0
237
+ pydantic_core==2.33.2
238
+ colorful==0.5.7
239
+ fastapi==0.116.1
240
+ opentelemetry-api==1.26.0
241
+ openai==1.90.0
242
+ ninja==1.11.1.4
243
+ opentelemetry-semantic-conventions==0.47b0
244
+ spacy-legacy==3.0.12
245
+ opendatasets==0.1.22
246
+ Deprecated==1.2.18
247
+ proto-plus==1.26.1
248
+ rignore==0.6.2
249
+ aiohttp-cors==0.8.1
250
+ liger_kernel==0.6.0
251
+ opentelemetry-exporter-prometheus==0.56b0
252
+ python-multipart==0.0.20
253
+ multiprocess==0.70.16
254
+ opentelemetry-sdk==1.26.0
255
+ decorator==5.2.1
256
+ xformers==0.0.29.post2
257
+ spacy==3.8.7
258
+ pyvers==0.1.0
259
+ pycocotools==2.0.10
260
+ websockets==15.0.1
261
+ wasabi==1.1.3
262
+ frozenlist==1.7.0
263
+ codetiming==1.4.0
264
+ sentencepiece==0.2.0
265
+ toml==0.10.2
266
+ scikit-image==0.25.2
267
+ ftfy==6.3.1
268
+ bleach==6.2.0
269
+ yarl==1.20.1
270
+ nvidia-cuda-nvrtc-cu12==12.4.127
271
+ importlib_metadata==8.0.0
272
+ spacy-loggers==1.0.5
273
+ smart_open==7.3.0.post1
274
+ portalocker==3.2.0
275
+ llvmlite==0.44.0
276
+ MarkupSafe==3.0.2
277
+ braceexpand==0.1.7
278
+ modelscope==1.28.0
279
+ verl==0.3.2.dev0
280
+ jaraco.context==5.3.0
281
+ more-itertools==10.3.0
282
+ jaraco.functools==4.0.1
283
+ jaraco.text==3.12.1
284
+ platformdirs==4.2.2
285
+ packaging==24.2
286
+ wheel==0.45.1
287
+ zipp==3.19.2
288
+ inflect==7.3.1
289
+ autocommand==2.2.2
290
+ typeguard==4.3.0
291
+ jaraco.collections==5.1.0
292
+ backports.tarfile==1.2.0
293
+ tomli==2.0.1
294
+ importlib_metadata==8.0.0
295
+ typing_extensions==4.12.2
EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/files/wandb-metadata.json ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.0",
4
+ "startedAt": "2025-07-18T12:19:06.753628Z",
5
+ "args": [
6
+ "--node-ip-address=10.1.5.237",
7
+ "--node-manager-port=37651",
8
+ "--object-store-name=/tmp/ray/session_2025-07-18_20-17-27_987959_155806/sockets/plasma_store",
9
+ "--raylet-name=/tmp/ray/session_2025-07-18_20-17-27_987959_155806/sockets/raylet",
10
+ "--redis-address=None",
11
+ "--metrics-agent-port=46087",
12
+ "--logging-rotate-bytes=536870912",
13
+ "--logging-rotate-backup-count=5",
14
+ "--runtime-env-agent-port=64279",
15
+ "--gcs-address=10.1.5.237:55485",
16
+ "--session-name=session_2025-07-18_20-17-27_987959_155806",
17
+ "--temp-dir=/tmp/ray",
18
+ "--webui=127.0.0.1:8265",
19
+ "--cluster-id=498a1e71e98cee5fa6c370066e878218480d78d02b0b0a20550a5571",
20
+ "--startup-token=64",
21
+ "--worker-launch-time-ms=1752841050410",
22
+ "--node-id=18fda1330b00f9c3f38fdc4c1387555fb29a9f963f649897c6fc1dc5",
23
+ "--runtime-env-hash=-115784934",
24
+ "--enable-resource-isolation=false"
25
+ ],
26
+ "program": "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
27
+ "git": {
28
+ "remote": "https://github.com/PorUna-byte/PAR.git",
29
+ "commit": "b8caf406aa1699c788f0ca6e44a1769452c317db"
30
+ },
31
+ "email": "gia0603yucca@gmail.com",
32
+ "root": "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
33
+ "host": "dsw-266702-dc4b748ff-f7c66",
34
+ "executable": "/root/miniconda3/envs/easyr1-new/bin/python3",
35
+ "cpu_count": 64,
36
+ "cpu_count_logical": 64,
37
+ "gpu": "NVIDIA A800-SXM4-80GB",
38
+ "gpu_count": 8,
39
+ "disk": {
40
+ "/": {
41
+ "total": "1623302262784",
42
+ "used": "1225195520"
43
+ }
44
+ },
45
+ "memory": {
46
+ "total": "549755813888"
47
+ },
48
+ "gpu_nvidia": [
49
+ {
50
+ "name": "NVIDIA A800-SXM4-80GB",
51
+ "architecture": "Ampere",
52
+ "uuid": "GPU-f7e858cd-ae03-031d-b834-86bf87923211"
53
+ },
54
+ {
55
+ "name": "NVIDIA A800-SXM4-80GB",
56
+ "architecture": "Ampere",
57
+ "uuid": "GPU-82fabc17-3326-51dd-9fb7-cbe82d26b6a0"
58
+ },
59
+ {
60
+ "name": "NVIDIA A800-SXM4-80GB",
61
+ "architecture": "Ampere",
62
+ "uuid": "GPU-1bba2921-208c-d0ad-1a05-25fc85d62630"
63
+ },
64
+ {
65
+ "name": "NVIDIA A800-SXM4-80GB",
66
+ "architecture": "Ampere",
67
+ "uuid": "GPU-9537a8bd-f536-20e2-b766-25a3c4bd5a71"
68
+ },
69
+ {
70
+ "name": "NVIDIA A800-SXM4-80GB",
71
+ "architecture": "Ampere",
72
+ "uuid": "GPU-482091e7-cb8c-dacb-7d0c-a78a3b795f0b"
73
+ },
74
+ {
75
+ "name": "NVIDIA A800-SXM4-80GB",
76
+ "architecture": "Ampere",
77
+ "uuid": "GPU-becb8d59-2ab7-b50d-5770-183c6478747a"
78
+ },
79
+ {
80
+ "name": "NVIDIA A800-SXM4-80GB",
81
+ "architecture": "Ampere",
82
+ "uuid": "GPU-3f0f2b1d-d6be-b6ad-b4ad-0ec841b91c40"
83
+ },
84
+ {
85
+ "name": "NVIDIA A800-SXM4-80GB",
86
+ "architecture": "Ampere",
87
+ "uuid": "GPU-e53376dd-84ff-523c-a8c7-7cc7f7a18655"
88
+ }
89
+ ],
90
+ "cudaVersion": "12.1",
91
+ "writerId": "99k3ygrrojzudcyj8lzv8s7kibi19jo9"
92
+ }
EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/logs/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-07-18T20:19:07.57787547+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
2
+ {"time":"2025-07-18T20:19:13.028328674+08:00","level":"INFO","msg":"stream: created new stream","id":"eo9xzqez"}
3
+ {"time":"2025-07-18T20:19:13.038047308+08:00","level":"INFO","msg":"stream: started","id":"eo9xzqez"}
4
+ {"time":"2025-07-18T20:19:13.038097996+08:00","level":"INFO","msg":"handler: started","stream_id":"eo9xzqez"}
5
+ {"time":"2025-07-18T20:19:13.038104971+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"eo9xzqez"}
6
+ {"time":"2025-07-18T20:19:13.038125386+08:00","level":"INFO","msg":"sender: started","stream_id":"eo9xzqez"}
EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/logs/debug.log ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-07-18 20:19:07,268 INFO MainThread:160623 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
2
+ 2025-07-18 20:19:07,268 INFO MainThread:160623 [wandb_setup.py:_flush():80] Configure stats pid to 160623
3
+ 2025-07-18 20:19:07,268 INFO MainThread:160623 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
4
+ 2025-07-18 20:19:07,268 INFO MainThread:160623 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
5
+ 2025-07-18 20:19:07,268 INFO MainThread:160623 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-07-18 20:19:07,268 INFO MainThread:160623 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/logs/debug.log
7
+ 2025-07-18 20:19:07,268 INFO MainThread:160623 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/logs/debug-internal.log
8
+ 2025-07-18 20:19:07,268 INFO MainThread:160623 [wandb_init.py:init():830] calling init triggers
9
+ 2025-07-18 20:19:07,268 INFO MainThread:160623 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
10
+ config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
11
+ 2025-07-18 20:19:07,268 INFO MainThread:160623 [wandb_init.py:init():871] starting backend
12
+ 2025-07-18 20:19:07,479 INFO MainThread:160623 [wandb_init.py:init():874] sending inform_init request
13
+ 2025-07-18 20:19:07,481 INFO MainThread:160623 [wandb_init.py:init():882] backend started and connected
14
+ 2025-07-18 20:19:07,510 INFO MainThread:160623 [wandb_init.py:init():953] updated telemetry
15
+ 2025-07-18 20:19:08,011 INFO MainThread:160623 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
16
+ 2025-07-18 20:19:17,965 INFO MainThread:160623 [wandb_init.py:init():1029] starting run threads in backend
17
+ 2025-07-18 20:19:18,182 INFO MainThread:160623 [wandb_run.py:_console_start():2458] atexit reg
18
+ 2025-07-18 20:19:18,183 INFO MainThread:160623 [wandb_run.py:_redirect():2306] redirect: wrap_raw
19
+ 2025-07-18 20:19:18,213 INFO MainThread:160623 [wandb_run.py:_redirect():2375] Wrapping output streams.
20
+ 2025-07-18 20:19:18,213 INFO MainThread:160623 [wandb_run.py:_redirect():2398] Redirects installed.
21
+ 2025-07-18 20:19:18,223 INFO MainThread:160623 [wandb_init.py:init():1075] run started, returning control to user process
EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/run-eo9xzqez.wandb ADDED
File without changes
EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/files/output.log ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ {"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.5.237", "pid": 182641, "uuid": "99853167d0014a0cbe06d35970a786c8", "closed": false}
2
+ Start validation...