diff --git a/EasyR1-new/examples/baselines/qwen2_5_vl_3b_clevr.sh b/EasyR1-new/examples/baselines/qwen2_5_vl_3b_clevr.sh
new file mode 100644
index 0000000000000000000000000000000000000000..36d77b0c010d15254449c48601fad73bf58dce97
--- /dev/null
+++ b/EasyR1-new/examples/baselines/qwen2_5_vl_3b_clevr.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+set -x
+
+export PYTHONUNBUFFERED=1
+
+MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct  # replace it with your local file path
+
+python3 -m verl.trainer.main \
+    config=examples/config.yaml \
+    data.train_files=BUAADreamer/clevr_count_70k@train \
+    data.val_files=BUAADreamer/clevr_count_70k@test \
+    data.format_prompt=./examples/format_prompt/r1v.jinja \
+    worker.actor.model.model_path=${MODEL_PATH} \
+    worker.rollout.tensor_parallel_size=1 \
+    worker.reward.reward_type=sequential \
+    worker.reward.reward_function=./examples/reward_function/r1v.py:compute_score \
+    trainer.experiment_name=qwen2_5_vl_3b_clevr \
+    trainer.n_gpus_per_node=2
diff --git a/EasyR1-new/examples/baselines/qwen2_5_vl_3b_geoqa8k.sh b/EasyR1-new/examples/baselines/qwen2_5_vl_3b_geoqa8k.sh
new file mode 100644
index 0000000000000000000000000000000000000000..6f38ae2fd98ff6ac65b53c45c30596ce805ee4f2
--- /dev/null
+++ b/EasyR1-new/examples/baselines/qwen2_5_vl_3b_geoqa8k.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+set -x
+
+export PYTHONUNBUFFERED=1
+
+MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct  # replace it with your local file path
+
+python3 -m verl.trainer.main \
+    config=examples/config.yaml \
+    data.train_files=leonardPKU/GEOQA_8K_R1V@train \
+    data.val_files=leonardPKU/GEOQA_8K_R1V@test \
+    data.format_prompt=./examples/format_prompt/r1v.jinja \
+    worker.actor.model.model_path=${MODEL_PATH} \
+    worker.rollout.tensor_parallel_size=1 \
+    worker.reward.reward_type=sequential \
+    worker.reward.reward_function=./examples/reward_function/r1v.py:compute_score \
+    trainer.experiment_name=qwen2_5_vl_3b_geoqa8k \
+    trainer.n_gpus_per_node=8
diff --git a/EasyR1-new/examples/format_prompt/bio_format.jinja b/EasyR1-new/examples/format_prompt/bio_format.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..7cf6628dc7b22d6f9116c0e0f244597f871a3882
--- /dev/null
+++ b/EasyR1-new/examples/format_prompt/bio_format.jinja
@@ -0,0 +1,2 @@
+{{ content | trim }} You must first reason through the question step by step, as if you're thinking aloud. Enclose your full reasoning process within <think> </think> tags. After your reasoning, output only the number corresponding to the final answer choice inside <answer> </answer> tags.For example:<think> reasoning process </think> <answer>result number</answer>  
+
diff --git a/EasyR1-new/examples/format_prompt/dapo.jinja b/EasyR1-new/examples/format_prompt/dapo.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..ea56a6a64c1d8c6fac481abf6dbf9ebcfff37420
--- /dev/null
+++ b/EasyR1-new/examples/format_prompt/dapo.jinja
@@ -0,0 +1 @@
+Solve the following math problem step by step. The last line of your response should be of the form Answer: $Answer (without quotes) where $Answer is the answer to the problem.\n\n{{ content | trim }}\n\nRemember to put your answer on its own line after "Answer:".
diff --git a/EasyR1-new/examples/format_prompt/math.jinja b/EasyR1-new/examples/format_prompt/math.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..8d6aa2344c57d654c87dbd26d90f7af473f82cc6
--- /dev/null
+++ b/EasyR1-new/examples/format_prompt/math.jinja
@@ -0,0 +1 @@
+{{ content | trim }} You FIRST think about the reasoning process as an internal monologue and then provide the final answer. The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}.
diff --git a/EasyR1-new/examples/format_prompt/r1v.jinja b/EasyR1-new/examples/format_prompt/r1v.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..0ecf6f471146202113d4e44c10777d60e65c8e8a
--- /dev/null
+++ b/EasyR1-new/examples/format_prompt/r1v.jinja
@@ -0,0 +1 @@
+{{ content | trim }} A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>
diff --git a/EasyR1-new/examples/qwen2_5_vl_7b_multi_image.sh b/EasyR1-new/examples/qwen2_5_vl_7b_multi_image.sh
new file mode 100644
index 0000000000000000000000000000000000000000..bcf9a48616ea1897fb9e96e6a64e8d2d34672b23
--- /dev/null
+++ b/EasyR1-new/examples/qwen2_5_vl_7b_multi_image.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+# REMINDER: this script uses test data split and should ONLY be used for debugging. DO NOT use for training.
+
+set -x
+
+export PYTHONUNBUFFERED=1
+
+MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct  # replace it with your local file path
+
+python3 -m verl.trainer.main \
+    config=examples/config.yaml \
+    data.train_files=hiyouga/journeybench-multi-image-vqa@train \
+    data.val_files=hiyouga/journeybench-multi-image-vqa@test \
+    data.rollout_batch_size=256 \
+    worker.actor.model.model_path=${MODEL_PATH} \
+    worker.rollout.limit_images=2 \
+    trainer.experiment_name=qwen2_5_vl_7b_multi_image \
+    trainer.n_gpus_per_node=8
diff --git a/EasyR1-new/examples/qwen3_14b_dapo17k_dapo.sh b/EasyR1-new/examples/qwen3_14b_dapo17k_dapo.sh
new file mode 100644
index 0000000000000000000000000000000000000000..18cc7b3408f22c12fb4193b2da3ed957394ca34c
--- /dev/null
+++ b/EasyR1-new/examples/qwen3_14b_dapo17k_dapo.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+set -x
+
+export PYTHONUNBUFFERED=1
+
+MODEL_PATH=Qwen/Qwen3-14B-Base  # replace it with your local file path
+
+python3 -m verl.trainer.main \
+    config=examples/config.yaml \
+    data.train_files=Saigyouji-Yuyuko1000/dapo17k@train \
+    data.val_files=Saigyouji-Yuyuko1000/dapo17k@test \
+    data.format_prompt=./examples/format_prompt/dapo.jinja \
+    data.max_prompt_length=2048 \
+    data.max_response_length=20480 \
+    data.rollout_batch_size=512 \
+    data.mini_rollout_batch_size=256 \
+    worker.actor.micro_batch_size_per_device_for_update=1 \
+    worker.actor.micro_batch_size_per_device_for_experience=8 \
+    worker.actor.model.model_path=${MODEL_PATH} \
+    worker.actor.fsdp.torch_dtype=bf16 \
+    worker.actor.optim.strategy=adamw_bf16 \
+    worker.actor.optim.weight_decay=0.1 \
+    worker.actor.optim.lr_warmup_steps=10 \
+    worker.actor.global_batch_size=32 \
+    worker.actor.clip_ratio_low=0.2 \
+    worker.actor.clip_ratio_high=0.28 \
+    worker.actor.clip_ratio_dual=10.0 \
+    worker.rollout.n=16 \
+    worker.rollout.max_num_batched_tokens=22528 \
+    worker.rollout.val_override_config='{"n":16,"temperature":1.0,"top_p":0.7}' \
+    worker.rollout.gpu_memory_utilization=0.8 \
+    worker.reward.reward_function=./examples/reward_function/dapo.py:compute_score \
+    worker.reward.reward_function_kwargs='{"max_response_length":20480,"overlong_buffer_length":4096,"overlong_penalty_factor":1.0}' \
+    algorithm.disable_kl=True \
+    algorithm.online_filtering=True \
+    algorithm.filter_key=accuracy_normalized \
+    algorithm.filter_low=0.01 \
+    algorithm.filter_high=0.99 \
+    trainer.total_epochs=10 \
+    trainer.max_try_make_batch=10 \
+    trainer.experiment_name=qwen3_14b_dapo17k_dapo \
+    trainer.n_gpus_per_node=8
diff --git a/EasyR1-new/examples/qwen3_4b_math_grpo.sh b/EasyR1-new/examples/qwen3_4b_math_grpo.sh
new file mode 100644
index 0000000000000000000000000000000000000000..32bbaac99327c298aa2aaf13e5e5f2f61b3e2d45
--- /dev/null
+++ b/EasyR1-new/examples/qwen3_4b_math_grpo.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+set -x
+
+export PYTHONUNBUFFERED=1
+
+MODEL_PATH=Qwen/Qwen3-4B  # replace it with your local file path
+
+python3 -m verl.trainer.main \
+    config=examples/config.yaml \
+    data.max_response_length=4096 \
+    worker.actor.model.model_path=${MODEL_PATH} \
+    trainer.experiment_name=qwen3_4b_math_grpo
diff --git a/EasyR1-new/examples/reward_function/__pycache__/bio.cpython-310.pyc b/EasyR1-new/examples/reward_function/__pycache__/bio.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f1d41a12dfdb23d100c6e1605e07cfd55ebaa183
Binary files /dev/null and b/EasyR1-new/examples/reward_function/__pycache__/bio.cpython-310.pyc differ
diff --git a/EasyR1-new/examples/reward_function/bio.py b/EasyR1-new/examples/reward_function/bio.py
new file mode 100644
index 0000000000000000000000000000000000000000..3eed5582876e57d1ec0d0f0d092284e8a8841eaa
--- /dev/null
+++ b/EasyR1-new/examples/reward_function/bio.py
@@ -0,0 +1,183 @@
+
+
+
+from itertools import islice, zip_longest
+from typing import Callable, Dict, List, Optional, Tuple, TypedDict
+import json
+
+def repeatness_reward(s: str):
+    def ranks(l):
+        index = {v: i for i, v in enumerate(sorted(set(l)))}
+        return [index[v] for v in l]
+
+    def suffixArray(s):
+        line = ranks(s)
+        n, k, ans, sa = len(s), 1, line, [0] * len(s)
+        while k < n - 1:
+            line = ranks(list(zip_longest(line, islice(line, k, None), fillvalue=-1)))
+            ans, k = line, k << 1
+        for i, k in enumerate(ans):
+            sa[k] = i
+        return ans, sa
+
+    def lcp(arr, suffixArr, inv_suff):
+        n, ans, k = len(arr), [0] * len(arr), 0
+
+        for i in range(n):
+            if inv_suff[i] == n - 1:
+                k = 0
+                continue
+
+            j = suffixArr[inv_suff[i] + 1]
+            while i + k < n and j + k < n and arr[i + k] == arr[j + k]:
+                k += 1
+
+            ans[inv_suff[i]] = k
+            if k > 0:
+                k -= 1
+
+        return ans
+
+    arr = [ord(i) for i in s]
+    n = len(arr)
+    if n <= 1:
+        return 0
+    c, sa = suffixArray(arr)
+    cnt = sum(lcp(arr, sa, c))
+
+    return 1 - cnt * 2 / (n * (n + 1))
+
+import re
+
+def format_reward(predict_str: str) -> float:
+    """
+    格式奖励函数，严格要求输出格式为：
+    <think>...</think><answer>...</answer>
+    中间不能有多余内容
+    """
+    pattern = r'^<think>.*?</think>\s*<answer>\s*([0-9])\s*</answer>$'
+    return 1.0 if re.fullmatch(pattern, predict_str.strip(), re.DOTALL) else 0.0
+
+def acc_reward(predict_str: str, ground_truth: str) -> float:
+    """
+    准确率奖励函数
+    要求<answer>中内容与ground_truth完全一致（顺序、空格等）
+    """
+    match = re.search(r'<answer>\s*([0-9])\s*</answer>', predict_str)
+    if not match:
+        return 0.0
+    answer_content = match.group(1)
+    # print(ground_truth)
+    # print(answer_content)
+    # print(int(answer_content) == ground_truth)
+    # print("ground_truth 类型：", type(ground_truth))
+    # print("answer_content 类型：", type(answer_content))
+    # print("========")
+    if int(answer_content) == ground_truth:
+        return 1.0
+    else:
+        return 0.0
+    # return 1.0 if answer_content == ground_truth else 0.0
+    # match = re.search(r'<answer>(.*?)</answer>', predict_str, re.DOTALL)
+    # if not match:
+    #     return 0.0
+    # answer_content = match.group(1).strip()
+    # return 1.0 if answer_content == ground_truth else 0.0
+
+# def compute_score( solution_str: str, ground_truth: str, extra_info):
+#     """
+#     综合评分函数
+#     """
+def compute_score(predicts: List[str], ground_truths: List[str], format_weight: float = 0.1) -> List[Dict[str, float]]:
+    scores = []
+    save_path="/nas/shared/kilab/wangyujia/check_rl/result-06170934.jsonl"
+    with open(save_path, "w", encoding="utf-8") as f:
+        for solution_str, ground_truth in zip(predicts, ground_truths):
+            format_score = format_reward(solution_str)
+            acc_score = acc_reward(solution_str, ground_truth)
+
+            # 提取<think>内容
+            think_match = re.search(r'<think>(.*?)</think>', solution_str, re.DOTALL)
+            think_str = think_match.group(1).strip() if think_match else ""
+            repeat_score = repeatness_reward(think_str)
+
+            scores.append(
+                {
+                    "overall": format_score + acc_score + repeat_score,
+                    "format": format_score,
+                    "accuracy": acc_score,
+                    "repeat" : repeat_score,
+                }
+            )
+
+            # 写入 JSONL 文件
+            f.write(json.dumps({
+                "solution_str": solution_str,
+                "ground_truth": ground_truth,
+                "overall": format_score + acc_score + repeat_score,
+                "format": format_score,
+                "accuracy": acc_score,
+                "repeat" : repeat_score,
+            }, ensure_ascii=False) + "\n")
+    
+    # 加权综合评分（格式占30%，准确率占70%）
+    # 合成字典
+    # total_score = {
+    #     "format_score": format_score,
+    #     "acc_score": acc_score,
+    #     "repeat_score": repeat_score,
+    #     "total_score": format_score + acc_score + repeat_score
+    # }
+    #total_score=format_score + acc_score + repeat_score
+
+    return scores
+
+
+# print(format_reward("<think>Step-by-step logic</think>   <answer> 5 </answer>"))
+# print(format_reward("<think>Something\nacross lines</think>\n<answer> 0 </answer>"))
+
+# print(format_reward("No tags here"))
+# print(format_reward("<think>OK</think><answer>12</answer>"))  # 多位数字
+# print(format_reward("<think>OK</think><answer>A</answer>"))   # 字母不允许
+# print(format_reward("<think>Yes</think><answer> </answer>"))  # 空的答案
+# print(format_reward("<think>OK</think><answer>3</answer>extra"))  # 多余内容
+# print(format_reward("<answer>3</answer><think>Reasoning</think>"))  # 标签顺序错误
+
+# print(acc_reward("<think>Step-by-step logic</think>   <answer> 5 </answer>",'5'))
+# print(acc_reward("<think>Something\nacross lines</think>\n<answer> 0 </answer>",'1'))
+
+
+# str_="<think>\nThe protein name is P32783, the protein amino acid sequence is MSTKPEKPIWMSQEDYDRQYGSITGDESSTVSKKDSKVTANAPGDGNGSLPVLQSSSILTSKVSDLPIEAESGFKIQKRRHERYDQEERLRKQRAQKLREEQLKRHEIEMTANRSINVDQIVREHYNERTIIANRAKRNLSPIIKLRNFNNAIKYMLIDKYTKPGDVVLELGCGKGGDLRKYGAAGISQFIGIDISNASIQEAHKRYRSMRNLDYQVVLITGDCFGESLGVAVEPFPDCRFPCDIVSTQFCLHYAFETEEKARRALLNVAKSLKIGGHFFGTIPDSEFIRYKLNKFPKEVEKPSWGNSIYKVTFENNSYQKNDYEFTSPYGQMYTYWLEDAIDNVPEYVVPFETLRSLADEYGLELVSQMPFNKFFVQEIPKWIERFSPKMREGLQRSDGRYGVEGDEKEAASYFYTMFAFRKVKQYIEPESVKPN, the protein localization prediction for P32783 is Cell.membrane,M, so the location label is 4. Therefore, option 4 is the correct answer.\n</think>\n<answer>\n4\n</answer>"
+# print(format_reward(str_))
+
+
+
+def check_rewards(jsonl_path: str) -> List[Dict[str, float]]:
+    results = []
+    with open(jsonl_path, "r", encoding="utf-8") as f:
+        for line in f:
+            data = json.loads(line)
+            solution_str = data["solution_str"]
+            ground_truth = data["ground_truth"]
+
+            # 重新计算三个分数
+            format_score = format_reward(solution_str)
+            acc_score = acc_reward(solution_str, ground_truth)
+            think_match = re.search(r'<think>(.*?)</think>', solution_str, re.DOTALL)
+            think_str = think_match.group(1).strip() if think_match else ""
+            repeat_score = repeatness_reward(think_str)
+
+            total_score = format_score + acc_score + repeat_score
+
+
+            result = {
+                "format": format_score,
+                "accuracy": acc_score,
+                "repeat": repeat_score,
+                "overall": total_score,
+            }
+            # results.append(result)
+
+            print(json.dumps(result, indent=2, ensure_ascii=False)) 
+
+check_rewards("/nas/shared/kilab/wangyujia/check_rl/check.jsonl")
\ No newline at end of file
diff --git a/EasyR1-new/examples/reward_function/dapo.py b/EasyR1-new/examples/reward_function/dapo.py
new file mode 100644
index 0000000000000000000000000000000000000000..ceeee46377ffd17702f4aaae9528b93eb052389c
--- /dev/null
+++ b/EasyR1-new/examples/reward_function/dapo.py
@@ -0,0 +1,163 @@
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+from typing import Any, Dict, List
+
+
+# Constants for normalization
+SUBSTITUTIONS = [
+    ("an ", ""),
+    ("a ", ""),
+    (".$", "$"),
+    ("\\$", ""),
+    (r"\ ", ""),
+    (" ", ""),
+    ("mbox", "text"),
+    (",\\text{and}", ","),
+    ("\\text{and}", ","),
+    ("\\text{m}", "\\text{}"),
+]
+
+REMOVED_EXPRESSIONS = [
+    "square",
+    "ways",
+    "integers",
+    "dollars",
+    "mph",
+    "inches",
+    "hours",
+    "km",
+    "units",
+    "\\ldots",
+    "sue",
+    "points",
+    "feet",
+    "minutes",
+    "digits",
+    "cents",
+    "degrees",
+    "cm",
+    "gm",
+    "pounds",
+    "meters",
+    "meals",
+    "edges",
+    "students",
+    "childrentickets",
+    "multiples",
+    "\\text{s}",
+    "\\text{.}",
+    "\\text{\ns}",
+    "\\text{}^2",
+    "\\text{}^3",
+    "\\text{\n}",
+    "\\text{}",
+    r"\mathrm{th}",
+    r"^\circ",
+    r"^{\circ}",
+    r"\;",
+    r",\!",
+    "{,}",
+    '"',
+    "\\dots",
+]
+
+
+def normalize_final_answer(final_answer: str) -> str:
+    """Normalize a final answer to a quantitative reasoning question.
+
+    Args:
+        final_answer: The answer string to normalize
+
+    Returns:
+        Normalized answer string
+    """
+    final_answer = final_answer.split("=")[-1]
+
+    # Apply substitutions and removals
+    for before, after in SUBSTITUTIONS:
+        final_answer = final_answer.replace(before, after)
+    for expr in REMOVED_EXPRESSIONS:
+        final_answer = final_answer.replace(expr, "")
+
+    # Extract and normalize LaTeX math
+    final_answer = re.sub(r"(.*?)(\$)(.*?)(\$)(.*)", "$\\3$", final_answer)
+    final_answer = re.sub(r"(\\text\{)(.*?)(\})", "\\2", final_answer)
+    final_answer = re.sub(r"(\\textbf\{)(.*?)(\})", "\\2", final_answer)
+    final_answer = re.sub(r"(\\overline\{)(.*?)(\})", "\\2", final_answer)
+    final_answer = re.sub(r"(\\boxed\{)(.*)(\})", "\\2", final_answer)
+
+    # Normalize shorthand TeX:
+    #  \fracab -> \frac{a}{b}
+    #  \frac{abc}{bef} -> \frac{abc}{bef}
+    #  \fracabc -> \frac{a}{b}c
+    #  \sqrta -> \sqrt{a}
+    #  \sqrtab -> sqrt{a}b
+    final_answer = re.sub(r"(frac)([^{])(.)", "frac{\\2}{\\3}", final_answer)
+    final_answer = re.sub(r"(sqrt)([^{])", "sqrt{\\2}", final_answer)
+    final_answer = final_answer.replace("$", "")
+
+    # Normalize numbers
+    if final_answer.replace(",", "").isdigit():
+        final_answer = final_answer.replace(",", "")
+
+    return final_answer.strip()
+
+
+def accuracy_reward(response: str, ground_truth: str) -> float:
+    match = re.findall(r"(?i)Answer\s*:\s*([^\n]+)", response)
+    answer = match[-1] if match else "[INVALID]"
+    if normalize_final_answer(answer) == normalize_final_answer(ground_truth):
+        return 1.0
+    else:
+        return -1.0
+
+
+def soft_overlong_punishment(response_length: int, max_response_length: int, overlong_buffer_length: int):
+    expected_len = max_response_length - overlong_buffer_length
+    if response_length <= expected_len:
+        return 0.0
+    elif response_length <= max_response_length:
+        return (expected_len - response_length) / overlong_buffer_length
+    else:
+        return -1.0
+
+
+def compute_score(
+    reward_inputs: List[Dict[str, Any]],
+    max_response_length: int,
+    overlong_buffer_length: int,
+    overlong_penalty_factor: float,
+) -> List[Dict[str, float]]:
+    if not isinstance(reward_inputs, list):
+        raise ValueError("Please use `reward_type=batch` for dapo reward function.")
+
+    scores = []
+    for reward_input in reward_inputs:
+        response = reward_input["response"][-300:]  # The longest answer in MATH-500 has 159 characters
+        accuracy_score = accuracy_reward(response, reward_input["ground_truth"])
+        overlong_score = soft_overlong_punishment(
+            reward_input["response_length"], max_response_length, overlong_buffer_length
+        )
+        scores.append(
+            {
+                "overall": accuracy_score + overlong_score * overlong_penalty_factor,
+                "accuracy": accuracy_score,
+                "overlong": overlong_score,
+                "accuracy_normalized": 0.5 * (accuracy_score + 1.0),
+            }
+        )
+
+    return scores
diff --git a/EasyR1-new/examples/reward_function/math.py b/EasyR1-new/examples/reward_function/math.py
new file mode 100644
index 0000000000000000000000000000000000000000..f41ffe13f6357fd2d755cb668572ff937ebf1463
--- /dev/null
+++ b/EasyR1-new/examples/reward_function/math.py
@@ -0,0 +1,49 @@
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+from typing import Any, Dict, List
+
+from mathruler.grader import extract_boxed_content, grade_answer
+
+
+def format_reward(response: str) -> float:
+    pattern = re.compile(r"<think>.*</think>.*\\boxed\{.*\}.*", re.DOTALL)
+    format_match = re.fullmatch(pattern, response)
+    return 1.0 if format_match else 0.0
+
+
+def accuracy_reward(response: str, ground_truth: str) -> float:
+    answer = extract_boxed_content(response)
+    return 1.0 if grade_answer(answer, ground_truth) else 0.0
+
+
+def compute_score(reward_inputs: List[Dict[str, Any]], format_weight: float = 0.1) -> List[Dict[str, float]]:
+    if not isinstance(reward_inputs, list):
+        raise ValueError("Please use `reward_type=batch` for math reward function.")
+
+    scores = []
+    for reward_input in reward_inputs:
+        response = re.sub(r"\s*(<|>|/)\s*", r"\1", reward_input["response"])  # handle qwen2.5vl-32b format
+        format_score = format_reward(response)
+        accuracy_score = accuracy_reward(response, reward_input["ground_truth"])
+        scores.append(
+            {
+                "overall": (1 - format_weight) * accuracy_score + format_weight * format_score,
+                "format": format_score,
+                "accuracy": accuracy_score,
+            }
+        )
+
+    return scores
diff --git a/EasyR1-new/examples/reward_function/r1v.py b/EasyR1-new/examples/reward_function/r1v.py
new file mode 100644
index 0000000000000000000000000000000000000000..97b03c794946fa84d7428dba478a7c77a389f4f6
--- /dev/null
+++ b/EasyR1-new/examples/reward_function/r1v.py
@@ -0,0 +1,50 @@
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+from typing import Any, Dict
+
+from mathruler.grader import grade_answer
+
+
+def format_reward(response: str) -> float:
+    pattern = re.compile(r"<think>.*?</think>\s*<answer>.*?</answer>", re.DOTALL)
+    format_match = re.fullmatch(pattern, response)
+    return 1.0 if format_match else 0.0
+
+
+def accuracy_reward(response: str, ground_truth: str) -> float:
+    try:
+        content_match = re.search(r"<answer>(.*?)</answer>", response)
+        given_answer = content_match.group(1).strip() if content_match else response.strip()
+        if grade_answer(given_answer, ground_truth.strip()):
+            return 1.0
+
+    except Exception:
+        pass
+
+    return 0.0
+
+
+def compute_score(reward_input: Dict[str, Any], format_weight: float = 0.5) -> Dict[str, float]:
+    if not isinstance(reward_input, dict):
+        raise ValueError("Please use `reward_type=sequential` for r1v reward function.")
+
+    format_score = format_reward(reward_input["response"])
+    accuracy_score = accuracy_reward(reward_input["response"], reward_input["ground_truth"])
+    return {
+        "overall": (1 - format_weight) * accuracy_score + format_weight * format_score,
+        "format": format_score,
+        "accuracy": accuracy_score,
+    }
diff --git a/EasyR1-new/examples/runtime_env.yaml b/EasyR1-new/examples/runtime_env.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a4086253d4f7198f85225941c076285ab9f5321a
--- /dev/null
+++ b/EasyR1-new/examples/runtime_env.yaml
@@ -0,0 +1,9 @@
+working_dir: ./
+excludes: ["/.git/"]
+env_vars:
+  TOKENIZERS_PARALLELISM: "true"
+  NCCL_DEBUG: "WARN"
+  VLLM_LOGGING_LEVEL: "WARN"
+  TORCH_NCCL_AVOID_RECORD_STREAMS: "1"
+  PYTORCH_CUDA_ALLOC_CONF: "expandable_segments:False"
+  PYTHONUNBUFFERED: "1"
diff --git a/EasyR1-new/examples/wandb/debug-internal.log b/EasyR1-new/examples/wandb/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..3a020c591187aae5f1f529cb23d2665fe481c73e
--- /dev/null
+++ b/EasyR1-new/examples/wandb/debug-internal.log
@@ -0,0 +1,13 @@
+{"time":"2025-07-21T14:07:35.211628547+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-21T14:07:50.875611638+08:00","level":"INFO","msg":"stream: created new stream","id":"a9qblh0u"}
+{"time":"2025-07-21T14:07:50.876588753+08:00","level":"INFO","msg":"stream: started","id":"a9qblh0u"}
+{"time":"2025-07-21T14:07:50.87663237+08:00","level":"INFO","msg":"sender: started","stream_id":"a9qblh0u"}
+{"time":"2025-07-21T14:07:50.876605114+08:00","level":"INFO","msg":"handler: started","stream_id":"a9qblh0u"}
+{"time":"2025-07-21T14:07:50.87665507+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"a9qblh0u"}
+{"time":"2025-07-21T14:08:05.783504415+08:00","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"updating run metadata","runtime_seconds":7.434542791},{"desc":"uploading data","runtime_seconds":0.571568597}],"total_operations":2}}
+{"time":"2025-07-21T14:08:31.955353631+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-07-21T14:08:56.48244624+08:00","level":"INFO","msg":"stream: closing","id":"a9qblh0u"}
+{"time":"2025-07-21T14:08:56.48558812+08:00","level":"INFO","msg":"handler: closed","stream_id":"a9qblh0u"}
+{"time":"2025-07-21T14:08:56.485598269+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"a9qblh0u"}
+{"time":"2025-07-21T14:08:56.485607803+08:00","level":"INFO","msg":"sender: closed","stream_id":"a9qblh0u"}
+{"time":"2025-07-21T14:08:56.50017009+08:00","level":"INFO","msg":"stream: closed","id":"a9qblh0u"}
diff --git a/EasyR1-new/examples/wandb/debug.log b/EasyR1-new/examples/wandb/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..b7e2572e2a286ff92c36c4fc2635c9b518e94415
--- /dev/null
+++ b/EasyR1-new/examples/wandb/debug.log
@@ -0,0 +1,28 @@
+2025-07-21 14:07:34,952 INFO    MainThread:317976 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-21 14:07:34,952 INFO    MainThread:317976 [wandb_setup.py:_flush():80] Configure stats pid to 317976
+2025-07-21 14:07:34,952 INFO    MainThread:317976 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-21 14:07:34,952 INFO    MainThread:317976 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-21 14:07:34,952 INFO    MainThread:317976 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-21 14:07:34,952 INFO    MainThread:317976 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/logs/debug.log
+2025-07-21 14:07:34,952 INFO    MainThread:317976 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/logs/debug-internal.log
+2025-07-21 14:07:34,952 INFO    MainThread:317976 [wandb_init.py:init():830] calling init triggers
+2025-07-21 14:07:34,953 INFO    MainThread:317976 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-21 14:07:34,953 INFO    MainThread:317976 [wandb_init.py:init():871] starting backend
+2025-07-21 14:07:35,172 INFO    MainThread:317976 [wandb_init.py:init():874] sending inform_init request
+2025-07-21 14:07:35,174 INFO    MainThread:317976 [wandb_init.py:init():882] backend started and connected
+2025-07-21 14:07:35,186 INFO    MainThread:317976 [wandb_init.py:init():953] updated telemetry
+2025-07-21 14:07:35,302 INFO    MainThread:317976 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-21 14:07:58,269 INFO    MainThread:317976 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-21 14:07:58,556 INFO    MainThread:317976 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-21 14:07:58,556 INFO    MainThread:317976 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-21 14:07:58,562 INFO    MainThread:317976 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-21 14:07:58,562 INFO    MainThread:317976 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-21 14:07:58,574 INFO    MainThread:317976 [wandb_init.py:init():1075] run started, returning control to user process
+2025-07-21 14:08:04,748 INFO    MainThread:317976 [wandb_run.py:_finish():2224] finishing run gia0603yucca/easy_r1/a9qblh0u
+2025-07-21 14:08:04,755 INFO    MainThread:317976 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0
+2025-07-21 14:08:04,767 INFO    MainThread:317976 [wandb_run.py:_restore():2405] restore
+2025-07-21 14:08:04,771 INFO    MainThread:317976 [wandb_run.py:_restore():2411] restore done
+2025-07-21 14:08:56,463 INFO    MainThread:317976 [wandb_run.py:_footer_history_summary_info():3903] rendering history
+2025-07-21 14:08:56,469 INFO    MainThread:317976 [wandb_run.py:_footer_history_summary_info():3935] rendering summary
+2025-07-21 14:08:56,469 INFO    MainThread:317976 [wandb_run.py:_footer_sync_info():3864] logging synced files
diff --git a/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/files/output.log b/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..087604f53da01ea95a5c98279640a5d4fbdfc220
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/files/output.log
@@ -0,0 +1,2 @@
+{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.6.45", "pid": 7695, "uuid": "4931688589ea40edb6b0579192261e95", "closed": false}
+Start validation...
diff --git a/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e5a308ce5af6b0415ec3555d11191377649cd8d
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/files/requirements.txt
@@ -0,0 +1,295 @@
+setproctitle==1.2.2
+colorama==0.4.6
+psutil==7.0.0
+numpy==2.2.6
+pylatexenc==2.10
+webdataset==1.0.2
+email_validator==2.2.0
+confection==0.1.5
+text-unidecode==1.3
+python-dotenv==1.1.1
+starlette==0.47.1
+pyasn1==0.6.1
+contexttimer==0.3.3
+requests==2.32.4
+omegaconf==2.3.0
+tzdata==2025.2
+yarl==1.20.1
+nvidia-cuda-nvrtc-cu12==12.4.127
+decord==0.6.0
+nvidia-cublas-cu12==12.4.5.8
+proto-plus==1.26.1
+opentelemetry-semantic-conventions-ai==0.4.11
+scipy==1.15.3
+googleapis-common-protos==1.70.0
+nvidia-cufile-cu12==1.11.1.6
+parso==0.8.4
+opentelemetry-exporter-otlp-proto-http==1.26.0
+vllm==0.8.5.post1
+sniffio==1.3.1
+python-dateutil==2.9.0.post0
+openai==1.90.0
+absl-py==2.3.1
+Deprecated==1.2.18
+cupy-cuda12x==13.5.1
+setuptools==78.1.1
+peft==0.16.0
+rignore==0.6.2
+joblib==1.5.1
+platformdirs==4.3.8
+regex==2024.11.6
+datasets==4.0.0
+preshed==3.0.10
+aiohappyeyeballs==2.6.1
+uvloop==0.21.0
+sentry-sdk==2.32.0
+virtualenv==20.31.2
+lazy_loader==0.4
+rich==14.0.0
+pycocotools==2.0.10
+timm==0.4.12
+rich-toolkit==0.14.8
+fastapi-cli==0.0.8
+antlr4-python3-runtime==4.9.3
+salesforce-lavis==1.0.2
+Pygments==2.19.2
+gitdb==4.0.12
+six==1.17.0
+verl==0.3.2.dev0
+smmap==5.0.2
+fastapi-cloud-cli==0.1.4
+opencensus==0.11.4
+annotated-types==0.7.0
+xxhash==3.5.0
+frozenlist==1.7.0
+pyzmq==27.0.0
+Jinja2==3.1.6
+ptyprocess==0.7.0
+interegular==0.3.3
+opentelemetry-semantic-conventions==0.47b0
+jiter==0.10.0
+idna==3.10
+typing_extensions==4.14.1
+nvidia-cusolver-cu12==11.6.1.9
+propcache==0.3.2
+nest-asyncio==1.6.0
+pillow==11.3.0
+tenacity==9.1.2
+sentencepiece==0.2.0
+portalocker==3.2.0
+matplotlib-inline==0.1.7
+pandas==2.3.1
+compressed-tensors==0.9.3
+typing-inspection==0.4.1
+nltk==3.9.1
+opencv-python-headless==4.12.0.88
+dnspython==2.7.0
+tokenizers==0.21.2
+wheel==0.45.1
+python-multipart==0.0.20
+catalogue==2.0.10
+smart_open==7.3.0.post1
+multidict==6.6.3
+xgrammar==0.1.18
+aiosignal==1.4.0
+pybase64==1.4.1
+blake3==1.0.5
+certifi==2025.7.14
+torchdata==0.11.0
+qwen-vl-utils==0.0.11
+nvidia-nvjitlink-cu12==12.4.127
+urllib3==2.5.0
+aiohttp-cors==0.8.1
+outlines_core==0.1.26
+pydantic-extra-types==2.10.5
+filelock==3.18.0
+airportsdata==20250706
+ipython==8.37.0
+pydantic==2.11.7
+cloudpickle==3.1.1
+torchaudio==2.6.0
+tiktoken==0.9.0
+pexpect==4.9.0
+flash-attn==2.7.1.post1
+nvidia-nvtx-cu12==12.4.127
+bleach==6.2.0
+watchfiles==1.1.0
+uvicorn==0.35.0
+numba==0.61.2
+tornado==6.5.1
+networkx==3.4.2
+sympy==1.13.1
+watchdog==6.0.0
+kaggle==1.7.4.5
+pyarrow==20.0.0
+accelerate==1.8.1
+mpmath==1.3.0
+lightning-utilities==0.14.3
+codetiming==1.4.0
+ftfy==6.3.1
+triton==3.2.0
+referencing==0.36.2
+dill==0.3.8
+language_data==1.3.0
+python-magic==0.4.27
+wasabi==1.1.3
+pyvers==0.1.0
+murmurhash==1.0.13
+mathruler==0.1.0
+jsonschema-specifications==2025.4.1
+blinker==1.9.0
+imageio==2.37.0
+pycocoevalcap==1.2
+python-json-logger==3.3.0
+nvidia-cuda-cupti-cu12==12.4.127
+fairscale==0.4.4
+httptools==0.6.4
+identify==2.6.12
+streamlit==1.46.1
+mdurl==0.1.2
+decorator==5.2.1
+h11==0.16.0
+distlib==0.3.9
+webencodings==0.5.1
+transformers==4.52.4
+srsly==2.5.1
+fsspec==2025.3.0
+diskcache==5.6.3
+click==8.2.1
+blis==1.3.0
+colorful==0.5.7
+websockets==15.0.1
+liger_kernel==0.6.0
+lark==1.2.2
+cymem==2.0.11
+anyio==4.9.0
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+fastapi==0.116.1
+tensordict==0.9.1
+pre_commit==4.2.0
+wrapt==1.17.2
+opentelemetry-api==1.26.0
+nvidia-curand-cu12==10.3.5.147
+spacy==3.8.7
+narwhals==1.47.0
+exceptiongroup==1.3.0
+braceexpand==0.1.7
+rouge_score==0.1.2
+msgpack==1.1.1
+async-timeout==5.0.1
+protobuf==4.25.8
+huggingface-hub==0.33.4
+wandb==0.21.0
+httpx==0.28.1
+mistral_common==1.8.0
+gguf==0.17.1
+opentelemetry-proto==1.26.0
+nvidia-nccl-cu12==2.21.5
+wcwidth==0.2.13
+nvidia-cusparselt-cu12==0.6.2
+scikit-image==0.25.2
+cfgv==3.4.0
+markdown-it-py==3.0.0
+packaging==25.0
+charset-normalizer==3.4.2
+executing==2.2.0
+py-spy==0.4.0
+pure_eval==0.2.3
+safetensors==0.5.3
+pyasn1_modules==0.4.2
+jsonschema==4.24.0
+spacy-legacy==3.0.12
+astor==0.8.1
+shellingham==1.5.4
+langcodes==3.5.0
+pytz==2025.2
+distro==1.9.0
+google-api-core==2.25.1
+rsa==4.9.1
+multiprocess==0.70.16
+iopath==0.1.10
+weasel==0.4.1
+tifffile==2025.5.10
+nodeenv==1.9.1
+opentelemetry-exporter-prometheus==0.56b0
+einops==0.8.1
+lm-format-enforcer==0.10.11
+pydantic_core==2.33.2
+hf-xet==1.1.5
+opentelemetry-sdk==1.26.0
+ninja==1.11.1.4
+altair==5.5.0
+ray==2.47.1
+depyf==0.18.0
+attrs==25.3.0
+tqdm==4.67.1
+xformers==0.0.29.post2
+pydeck==0.9.1
+stack-data==0.6.3
+prometheus-fastapi-instrumentator==7.1.0
+grpcio==1.73.1
+torch==2.6.0
+plotly==6.2.0
+nvidia-cudnn-cu12==9.1.0.70
+python-slugify==8.0.4
+opencensus-context==0.1.3
+importlib_metadata==8.0.0
+orjson==3.10.18
+prompt_toolkit==3.0.51
+psutil==7.0.0
+opendatasets==0.1.22
+asttokens==3.0.0
+pycountry==24.6.1
+partial-json-parser==0.2.1.1.post6
+zipp==3.23.0
+pip==25.1
+MarkupSafe==3.0.2
+opentelemetry-exporter-otlp-proto-common==1.26.0
+llvmlite==0.44.0
+nvidia-cufft-cu12==11.2.1.3
+GitPython==3.1.44
+fastrlock==0.8.3
+PyYAML==6.0.2
+opentelemetry-exporter-otlp==1.26.0
+typer==0.16.0
+cloudpathlib==0.21.1
+toml==0.10.2
+pytorch-lightning==2.5.2
+marisa-trie==1.2.1
+msgspec==0.19.0
+llguidance==0.7.30
+google-auth==2.40.3
+traitlets==5.14.3
+rpds-py==0.26.0
+cachetools==5.5.2
+spacy-loggers==1.0.5
+nvidia-cuda-runtime-cu12==12.4.127
+aiohttp==3.12.14
+torchvision==0.21.0
+av==15.0.0
+torchmetrics==1.7.4
+nvidia-cusparse-cu12==12.3.1.170
+outlines==0.1.11
+jedi==0.19.2
+thinc==8.3.6
+prometheus_client==0.22.1
+httpcore==1.0.9
+py-cpuinfo==9.0.0
+modelscope==1.28.0
+verl==0.3.2.dev0
+jaraco.functools==4.0.1
+inflect==7.3.1
+jaraco.collections==5.1.0
+packaging==24.2
+wheel==0.45.1
+tomli==2.0.1
+platformdirs==4.2.2
+typing_extensions==4.12.2
+more-itertools==10.3.0
+autocommand==2.2.2
+jaraco.text==3.12.1
+importlib_metadata==8.0.0
+jaraco.context==5.3.0
+zipp==3.19.2
+backports.tarfile==1.2.0
+typeguard==4.3.0
diff --git a/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..b6c612f8abb67917fe37f8c955472ad4e9343c7d
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/files/wandb-metadata.json
@@ -0,0 +1,71 @@
+{
+  "os":  "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.0",
+  "startedAt":  "2025-07-16T13:03:06.627811Z",
+  "args":  [
+    "--node-ip-address=10.1.6.45",
+    "--node-manager-port=42325",
+    "--object-store-name=/tmp/ray/session_2025-07-16_20-51-10_730275_5196/sockets/plasma_store",
+    "--raylet-name=/tmp/ray/session_2025-07-16_20-51-10_730275_5196/sockets/raylet",
+    "--redis-address=None",
+    "--metrics-agent-port=54069",
+    "--logging-rotate-bytes=536870912",
+    "--logging-rotate-backup-count=5",
+    "--runtime-env-agent-port=57480",
+    "--gcs-address=10.1.6.45:54882",
+    "--session-name=session_2025-07-16_20-51-10_730275_5196",
+    "--temp-dir=/tmp/ray",
+    "--webui=127.0.0.1:8265",
+    "--cluster-id=a69f29ea92b56cbc2f572353862768b5a0832495b7a590f4f273963a",
+    "--startup-token=28",
+    "--worker-launch-time-ms=1752670273261",
+    "--node-id=e54e37f4f5b34463471871dbe5c90937958f768732bc6e9579a13842",
+    "--runtime-env-hash=-115784934",
+    "--enable-resource-isolation=false"
+  ],
+  "program":  "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
+  "git":  {
+    "remote":  "https://github.com/PorUna-byte/PAR.git",
+    "commit":  "b8caf406aa1699c788f0ca6e44a1769452c317db"
+  },
+  "root":  "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
+  "host":  "dsw-297442-5bd684fbff-4l96r",
+  "executable":  "/root/miniconda3/envs/easyr1-new/bin/python3",
+  "cpu_count":  28,
+  "cpu_count_logical":  28,
+  "gpu":  "NVIDIA A800-SXM4-80GB",
+  "gpu_count":  4,
+  "disk":  {
+    "/":  {
+      "total":  "1623302262784",
+      "used":  "1165746176"
+    }
+  },
+  "memory":  {
+    "total":  "549755813888"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-c783413d-e4e1-22c5-7c48-9296c28b08a0"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-0ad82850-a679-fa6b-9200-a26edb1bb8a4"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-e73b7d7b-4455-62ee-ec7e-a2eb1d845e07"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-71ee45de-57b2-ac7c-13c1-08a1f197eb20"
+    }
+  ],
+  "cudaVersion":  "12.1",
+  "writerId":  "t6v0x6ljtdqkxmc6nxsvdn00ede7tanp"
+}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..c196cb934788278b679f8f697906da20df22a455
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/logs/debug-internal.log
@@ -0,0 +1,6 @@
+{"time":"2025-07-16T21:03:07.422600635+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-16T21:03:27.915788626+08:00","level":"INFO","msg":"stream: created new stream","id":"lkflebyj"}
+{"time":"2025-07-16T21:03:27.937736115+08:00","level":"INFO","msg":"stream: started","id":"lkflebyj"}
+{"time":"2025-07-16T21:03:27.937745307+08:00","level":"INFO","msg":"handler: started","stream_id":"lkflebyj"}
+{"time":"2025-07-16T21:03:27.937759674+08:00","level":"INFO","msg":"sender: started","stream_id":"lkflebyj"}
+{"time":"2025-07-16T21:03:27.937780163+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"lkflebyj"}
diff --git a/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/logs/debug.log b/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..ec471f7540ec7b50d3da94e38aa1cf19d0bfbb44
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/logs/debug.log
@@ -0,0 +1,21 @@
+2025-07-16 21:03:07,126 INFO    MainThread:7695 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-16 21:03:07,126 INFO    MainThread:7695 [wandb_setup.py:_flush():80] Configure stats pid to 7695
+2025-07-16 21:03:07,126 INFO    MainThread:7695 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-16 21:03:07,126 INFO    MainThread:7695 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-16 21:03:07,127 INFO    MainThread:7695 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-16 21:03:07,127 INFO    MainThread:7695 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/logs/debug.log
+2025-07-16 21:03:07,127 INFO    MainThread:7695 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/logs/debug-internal.log
+2025-07-16 21:03:07,127 INFO    MainThread:7695 [wandb_init.py:init():830] calling init triggers
+2025-07-16 21:03:07,127 INFO    MainThread:7695 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 4, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-16 21:03:07,127 INFO    MainThread:7695 [wandb_init.py:init():871] starting backend
+2025-07-16 21:03:07,349 INFO    MainThread:7695 [wandb_init.py:init():874] sending inform_init request
+2025-07-16 21:03:07,374 INFO    MainThread:7695 [wandb_init.py:init():882] backend started and connected
+2025-07-16 21:03:07,388 INFO    MainThread:7695 [wandb_init.py:init():953] updated telemetry
+2025-07-16 21:03:08,265 INFO    MainThread:7695 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-16 21:03:32,572 INFO    MainThread:7695 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-16 21:03:32,900 INFO    MainThread:7695 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-16 21:03:32,901 INFO    MainThread:7695 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-16 21:03:32,907 INFO    MainThread:7695 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-16 21:03:32,912 INFO    MainThread:7695 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-16 21:03:32,949 INFO    MainThread:7695 [wandb_init.py:init():1075] run started, returning control to user process
diff --git a/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/run-lkflebyj.wandb b/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/run-lkflebyj.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/config.yaml b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6924da0d3a04a67e9fc8a9bae1d778f49dc90a7c
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/config.yaml
@@ -0,0 +1,322 @@
+_wandb:
+    value:
+        cli_version: 0.21.0
+        e:
+            repix6q725hnzsubljgya3pkb0pg0b9q:
+                args:
+                    - --node-ip-address=10.1.5.237
+                    - --node-manager-port=37853
+                    - --object-store-name=/tmp/ray/session_2025-07-18_15-56-28_336135_54391/sockets/plasma_store
+                    - --raylet-name=/tmp/ray/session_2025-07-18_15-56-28_336135_54391/sockets/raylet
+                    - --redis-address=None
+                    - --metrics-agent-port=43790
+                    - --logging-rotate-bytes=536870912
+                    - --logging-rotate-backup-count=5
+                    - --runtime-env-agent-port=63904
+                    - --gcs-address=10.1.5.237:56758
+                    - --session-name=session_2025-07-18_15-56-28_336135_54391
+                    - --temp-dir=/tmp/ray
+                    - --webui=127.0.0.1:8265
+                    - --cluster-id=cc22236bcaa2a9ab2bdb3c76723ef15af4933b041414da957aa668b9
+                    - --startup-token=64
+                    - --worker-launch-time-ms=1752825390762
+                    - --node-id=0967616139eea74249995a5549bf4039d244c259acd06a5a8fe0b7aa
+                    - --runtime-env-hash=-115784934
+                    - --enable-resource-isolation=false
+                cpu_count: 64
+                cpu_count_logical: 64
+                cudaVersion: "12.1"
+                disk:
+                    /:
+                        total: "1623302262784"
+                        used: "1224904704"
+                email: gia0603yucca@gmail.com
+                executable: /root/miniconda3/envs/easyr1-new/bin/python3
+                git:
+                    commit: b8caf406aa1699c788f0ca6e44a1769452c317db
+                    remote: https://github.com/PorUna-byte/PAR.git
+                gpu: NVIDIA A800-SXM4-80GB
+                gpu_count: 8
+                gpu_nvidia:
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-f7e858cd-ae03-031d-b834-86bf87923211
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-82fabc17-3326-51dd-9fb7-cbe82d26b6a0
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-1bba2921-208c-d0ad-1a05-25fc85d62630
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-9537a8bd-f536-20e2-b766-25a3c4bd5a71
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-482091e7-cb8c-dacb-7d0c-a78a3b795f0b
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-becb8d59-2ab7-b50d-5770-183c6478747a
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-3f0f2b1d-d6be-b6ad-b4ad-0ec841b91c40
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-e53376dd-84ff-523c-a8c7-7cc7f7a18655
+                host: dsw-266702-dc4b748ff-f7c66
+                memory:
+                    total: "549755813888"
+                os: Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35
+                program: /root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py
+                python: CPython 3.10.0
+                root: /nas/shared/kilab/wangyujia/EasyR1-new/examples
+                startedAt: "2025-07-18T08:00:33.186442Z"
+                writerId: repix6q725hnzsubljgya3pkb0pg0b9q
+        m: []
+        python_version: 3.10.0
+        t:
+            "1":
+                - 1
+                - 9
+                - 11
+                - 30
+                - 33
+                - 41
+                - 49
+                - 51
+                - 63
+                - 71
+                - 95
+                - 98
+                - 103
+                - 105
+            "2":
+                - 1
+                - 9
+                - 11
+                - 30
+                - 33
+                - 41
+                - 49
+                - 51
+                - 63
+                - 71
+                - 95
+                - 98
+                - 103
+                - 105
+            "3":
+                - 2
+                - 13
+                - 16
+            "4": 3.10.0
+            "5": 0.21.0
+            "6": 4.52.4
+            "12": 0.21.0
+            "13": linux-x86_64
+algorithm:
+    value:
+        adv_estimator: grpo
+        disable_kl: false
+        filter_high: 0.99
+        filter_key: overall
+        filter_low: 0.01
+        gamma: 1
+        kl_coef: 0.01
+        kl_horizon: 10000
+        kl_penalty: low_var_kl
+        kl_target: 0.1
+        kl_type: fixed
+        lam: 1
+        online_filtering: false
+        use_kl_loss: true
+data:
+    value:
+        answer_key: answer
+        filter_overlong_prompts: true
+        filter_overlong_prompts_workers: 16
+        format_prompt: /nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja
+        image_dir: null
+        image_key: images
+        max_pixels: 4194304
+        max_prompt_length: 4096
+        max_response_length: 16384
+        min_pixels: 262144
+        mini_rollout_batch_size: null
+        override_chat_template: null
+        prompt_key: question
+        protein_key: protein
+        rollout_batch_size: 128
+        seed: 1
+        shuffle: true
+        train_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl
+        val_batch_size: 256
+        val_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl
+        video_fps: 2
+        video_key: videos
+trainer:
+    value:
+        critic_warmup: 0
+        experiment_name: qwen2.5_7b_bio_06182042
+        load_checkpoint_path: null
+        logger:
+            - console
+            - wandb
+        max_steps: null
+        max_try_make_batch: 20
+        n_gpus_per_node: 8
+        nnodes: 1
+        project_name: easy_r1
+        save_checkpoint_path: /oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042
+        save_freq: 5
+        save_limit: 3
+        save_model_only: false
+        total_epochs: 1
+        val_before_train: true
+        val_freq: 5
+        val_generations_to_log: 3
+        val_only: false
+worker:
+    value:
+        actor:
+            clip_ratio_dual: 3
+            clip_ratio_high: 0.3
+            clip_ratio_low: 0.2
+            disable_kl: false
+            fsdp:
+                enable_cpu_offload: false
+                enable_full_shard: true
+                enable_rank0_init: true
+                fsdp_size: -1
+                mp_buffer_dtype: fp32
+                mp_param_dtype: bf16
+                mp_reduce_dtype: fp32
+                torch_dtype: null
+                use_orig_params: false
+            global_batch_size: 64
+            global_batch_size_per_device: -1
+            kl_coef: 0.01
+            kl_penalty: low_var_kl
+            loss_avg_mode: token
+            max_grad_norm: 1
+            micro_batch_size_per_device_for_experience: 16
+            micro_batch_size_per_device_for_update: 2
+            model:
+                enable_gradient_checkpointing: true
+                freeze_vision_tower: false
+                model_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model
+                tokenizer_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model
+                trust_remote_code: false
+            offload:
+                offload_optimizer: true
+                offload_params: true
+            optim:
+                betas:
+                    - 0.9
+                    - 0.999
+                lr: 1e-06
+                lr_warmup_ratio: 0
+                lr_warmup_steps: null
+                min_lr_ratio: null
+                strategy: adamw
+                training_steps: 72
+                warmup_style: constant
+                weight_decay: 0.01
+            padding_free: true
+            ppo_epochs: 1
+            strategy: fsdp
+            ulysses_size: 1
+            use_kl_loss: true
+            use_torch_compile: true
+        critic:
+            cliprange_value: 0.5
+            fsdp:
+                enable_cpu_offload: false
+                enable_full_shard: true
+                enable_rank0_init: true
+                fsdp_size: -1
+                mp_buffer_dtype: fp32
+                mp_param_dtype: bf16
+                mp_reduce_dtype: fp32
+                torch_dtype: null
+                use_orig_params: false
+            global_batch_size: 256
+            global_batch_size_per_device: -1
+            loss_avg_mode: token
+            max_grad_norm: 1
+            micro_batch_size_per_device_for_experience: 16
+            micro_batch_size_per_device_for_update: 4
+            model:
+                enable_gradient_checkpointing: true
+                freeze_vision_tower: false
+                model_path: null
+                tokenizer_path: null
+                trust_remote_code: true
+            offload:
+                offload_optimizer: false
+                offload_params: false
+            optim:
+                betas:
+                    - 0.9
+                    - 0.999
+                lr: 1e-06
+                lr_warmup_ratio: 0
+                lr_warmup_steps: null
+                min_lr_ratio: null
+                strategy: adamw
+                training_steps: 72
+                warmup_style: constant
+                weight_decay: 0.01
+            padding_free: false
+            ppo_epochs: 1
+            strategy: fsdp
+            ulysses_size: 1
+        hybrid_engine: true
+        ref:
+            fsdp:
+                enable_cpu_offload: true
+                enable_full_shard: true
+                enable_rank0_init: true
+                fsdp_size: -1
+                mp_buffer_dtype: fp32
+                mp_param_dtype: bf16
+                mp_reduce_dtype: fp32
+                torch_dtype: null
+                use_orig_params: false
+            micro_batch_size_per_device_for_experience: 16
+            offload:
+                offload_optimizer: false
+                offload_params: false
+            padding_free: true
+            strategy: fsdp
+            ulysses_size: 1
+            use_torch_compile: true
+        reward:
+            num_cpus: 1
+            reward_function: /nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py
+            reward_function_name: main
+            reward_type: batch
+            skip_special_tokens: true
+        rollout:
+            disable_log_stats: true
+            disable_tqdm: false
+            dtype: bf16
+            enable_chunked_prefill: false
+            enforce_eager: false
+            gpu_memory_utilization: 0.6
+            ignore_eos: false
+            limit_images: 0
+            max_model_len: null
+            max_num_batched_tokens: 24576
+            "n": 5
+            name: vllm
+            prompt_length: 4096
+            response_length: 16384
+            seed: 1
+            temperature: 1
+            tensor_parallel_size: 1
+            top_k: -1
+            top_p: 0.99
+            trust_remote_code: false
+            val_override_config:
+                "n": 1
+                temperature: 0.5
diff --git a/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/output.log b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..2ab5c2a41cd559e0abe85962768c97759069aa61
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/output.log
@@ -0,0 +1,72 @@
+{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.5.237", "pid": 59301, "uuid": "79b41be0b4cb4caea00399d5e67f3adb", "closed": false}
+Start validation...
+Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::WorkerDict.actor_rollout_ref_prepare_rollout_engine()[39m (pid=61215, ip=10.1.5.237, actor_id=8dbb70fdf561d45e1bb95fbd01000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7fc005e599c0>)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func
+    return getattr(self.worker_dict[key], name)(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 544, in prepare_rollout_engine
+    self.rollout_sharding_manager.load_vllm_and_sync_weights()
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/sharding_manager/fsdp_vllm.py", line 124, in load_vllm_and_sync_weights
+    if "tags" in inspect.signature(self.inference_engine.wake_up).parameters:
+AttributeError: 'str' object has no attribute 'wake_up'
+Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::WorkerDict.actor_rollout_ref_prepare_rollout_engine()[39m (pid=61214, ip=10.1.5.237, actor_id=12428909aea9647197558b3701000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7f02884019c0>)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func
+    return getattr(self.worker_dict[key], name)(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 544, in prepare_rollout_engine
+    self.rollout_sharding_manager.load_vllm_and_sync_weights()
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/sharding_manager/fsdp_vllm.py", line 124, in load_vllm_and_sync_weights
+    if "tags" in inspect.signature(self.inference_engine.wake_up).parameters:
+AttributeError: 'str' object has no attribute 'wake_up'
+Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::WorkerDict.actor_rollout_ref_prepare_rollout_engine()[39m (pid=61213, ip=10.1.5.237, actor_id=17a3ff05d33225db9d5f3d2001000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7fc706441930>)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func
+    return getattr(self.worker_dict[key], name)(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 544, in prepare_rollout_engine
+    self.rollout_sharding_manager.load_vllm_and_sync_weights()
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/sharding_manager/fsdp_vllm.py", line 124, in load_vllm_and_sync_weights
+    if "tags" in inspect.signature(self.inference_engine.wake_up).parameters:
+AttributeError: 'str' object has no attribute 'wake_up'
+Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::WorkerDict.actor_rollout_ref_prepare_rollout_engine()[39m (pid=61212, ip=10.1.5.237, actor_id=8038d6b87c20ea82378ff46b01000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7edfcc7299c0>)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func
+    return getattr(self.worker_dict[key], name)(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 544, in prepare_rollout_engine
+    self.rollout_sharding_manager.load_vllm_and_sync_weights()
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/sharding_manager/fsdp_vllm.py", line 124, in load_vllm_and_sync_weights
+    if "tags" in inspect.signature(self.inference_engine.wake_up).parameters:
+AttributeError: 'str' object has no attribute 'wake_up'
+Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::WorkerDict.actor_rollout_ref_prepare_rollout_engine()[39m (pid=61211, ip=10.1.5.237, actor_id=1e5423d0856a1d601b82502801000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7f26f00119f0>)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func
+    return getattr(self.worker_dict[key], name)(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 544, in prepare_rollout_engine
+    self.rollout_sharding_manager.load_vllm_and_sync_weights()
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/sharding_manager/fsdp_vllm.py", line 124, in load_vllm_and_sync_weights
+    if "tags" in inspect.signature(self.inference_engine.wake_up).parameters:
+AttributeError: 'str' object has no attribute 'wake_up'
+Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::WorkerDict.actor_rollout_ref_prepare_rollout_engine()[39m (pid=61209, ip=10.1.5.237, actor_id=061e0c8de42fd2b69b89561501000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7fba20e05a20>)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func
+    return getattr(self.worker_dict[key], name)(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 544, in prepare_rollout_engine
+    self.rollout_sharding_manager.load_vllm_and_sync_weights()
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/sharding_manager/fsdp_vllm.py", line 124, in load_vllm_and_sync_weights
+    if "tags" in inspect.signature(self.inference_engine.wake_up).parameters:
+AttributeError: 'str' object has no attribute 'wake_up'
+Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::WorkerDict.actor_rollout_ref_prepare_rollout_engine()[39m (pid=60985, ip=10.1.5.237, actor_id=8073bd5c566ab2faaa122c0e01000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7f37c0e19780>)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func
+    return getattr(self.worker_dict[key], name)(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 544, in prepare_rollout_engine
+    self.rollout_sharding_manager.load_vllm_and_sync_weights()
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/sharding_manager/fsdp_vllm.py", line 124, in load_vllm_and_sync_weights
+    if "tags" in inspect.signature(self.inference_engine.wake_up).parameters:
+AttributeError: 'str' object has no attribute 'wake_up'
diff --git a/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7396100b45ae50733f37383324036e5ff4cdf5aa
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/requirements.txt
@@ -0,0 +1,295 @@
+setproctitle==1.2.2
+colorama==0.4.6
+psutil==7.0.0
+attrs==25.3.0
+tqdm==4.67.1
+langcodes==3.5.0
+nvidia-cublas-cu12==12.4.5.8
+airportsdata==20250706
+absl-py==2.3.1
+hf-xet==1.1.5
+opentelemetry-exporter-otlp-proto-http==1.26.0
+interegular==0.3.3
+tifffile==2025.5.10
+nvidia-cufile-cu12==1.11.1.6
+nltk==3.9.1
+tokenizers==0.21.2
+salesforce-lavis==1.0.2
+tzdata==2025.2
+prometheus_client==0.22.1
+google-auth==2.40.3
+ipython==8.37.0
+pydantic==2.11.7
+mathruler==0.1.0
+six==1.17.0
+python-dateutil==2.9.0.post0
+requests==2.32.4
+mistral_common==1.8.0
+huggingface-hub==0.33.4
+preshed==3.0.10
+torchmetrics==1.7.4
+blinker==1.9.0
+nvidia-cusparse-cu12==12.3.1.170
+rich-toolkit==0.14.8
+pytz==2025.2
+pandas==2.3.1
+packaging==25.0
+async-timeout==5.0.1
+diskcache==5.6.3
+google-api-core==2.25.1
+parso==0.8.4
+joblib==1.5.1
+pycountry==24.6.1
+triton==3.2.0
+pybase64==1.4.1
+marisa-trie==1.2.1
+plotly==6.2.0
+wandb==0.21.0
+PyYAML==6.0.2
+regex==2024.11.6
+idna==3.10
+numba==0.61.2
+nvidia-curand-cu12==10.3.5.147
+uvicorn==0.35.0
+srsly==2.5.1
+confection==0.1.5
+opentelemetry-semantic-conventions-ai==0.4.11
+typing-inspection==0.4.1
+opencv-python-headless==4.12.0.88
+pyasn1==0.6.1
+av==15.0.0
+xgrammar==0.1.18
+distlib==0.3.9
+datasets==4.0.0
+networkx==3.4.2
+prometheus-fastapi-instrumentator==7.1.0
+lightning-utilities==0.14.3
+executing==2.2.0
+pycocoevalcap==1.2
+h11==0.16.0
+certifi==2025.7.14
+sniffio==1.3.1
+wheel==0.45.1
+transformers==4.52.4
+wrapt==1.17.2
+jsonschema-specifications==2025.4.1
+mpmath==1.3.0
+msgspec==0.19.0
+py-cpuinfo==9.0.0
+contexttimer==0.3.3
+watchdog==6.0.0
+pexpect==4.9.0
+webencodings==0.5.1
+verl==0.3.2.dev0
+webdataset==1.0.2
+httpcore==1.0.9
+opentelemetry-exporter-otlp==1.26.0
+lm-format-enforcer==0.10.11
+googleapis-common-protos==1.70.0
+pyzmq==27.0.0
+fsspec==2025.3.0
+grpcio==1.73.1
+cymem==2.0.11
+timm==0.4.12
+zipp==3.23.0
+llguidance==0.7.30
+opencensus-context==0.1.3
+omegaconf==2.3.0
+python-json-logger==3.3.0
+opentelemetry-exporter-otlp-proto-common==1.26.0
+watchfiles==1.1.0
+nvidia-nvjitlink-cu12==12.4.127
+peft==0.16.0
+sentry-sdk==2.32.0
+rpds-py==0.26.0
+email_validator==2.2.0
+nodeenv==1.9.1
+distro==1.9.0
+jiter==0.10.0
+compressed-tensors==0.9.3
+annotated-types==0.7.0
+matplotlib-inline==0.1.7
+rich==14.0.0
+GitPython==3.1.44
+lazy_loader==0.4
+fastapi-cloud-cli==0.1.4
+cupy-cuda12x==13.5.1
+prompt_toolkit==3.0.51
+gguf==0.17.1
+blis==1.3.0
+thinc==8.3.6
+cloudpickle==3.1.1
+multidict==6.6.3
+nvidia-nvtx-cu12==12.4.127
+flash-attn==2.7.1.post1
+pyasn1_modules==0.4.2
+rsa==4.9.1
+weasel==0.4.1
+uvloop==0.21.0
+click==8.2.1
+numpy==2.2.6
+torchdata==0.11.0
+pylatexenc==2.10
+cachetools==5.5.2
+Jinja2==3.1.6
+typer==0.16.0
+nvidia-cudnn-cu12==9.1.0.70
+fastapi-cli==0.0.8
+xxhash==3.5.0
+tornado==6.5.1
+scipy==1.15.3
+rouge_score==0.1.2
+cloudpathlib==0.21.1
+streamlit==1.46.1
+jedi==0.19.2
+referencing==0.36.2
+accelerate==1.8.1
+decord==0.6.0
+setuptools==78.1.1
+mdurl==0.1.2
+vllm==0.8.5.post1
+identify==2.6.12
+python-slugify==8.0.4
+dnspython==2.7.0
+dill==0.3.8
+opentelemetry-proto==1.26.0
+orjson==3.10.18
+msgpack==1.1.1
+aiohttp==3.12.14
+aiosignal==1.4.0
+typing_extensions==4.14.1
+tiktoken==0.9.0
+catalogue==2.0.10
+platformdirs==4.3.8
+narwhals==1.47.0
+antlr4-python3-runtime==4.9.3
+pydantic-extra-types==2.10.5
+nvidia-cusolver-cu12==11.6.1.9
+kaggle==1.7.4.5
+propcache==0.3.2
+urllib3==2.5.0
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+pydeck==0.9.1
+nvidia-cufft-cu12==11.2.1.3
+pyarrow==20.0.0
+nvidia-nccl-cu12==2.21.5
+httptools==0.6.4
+qwen-vl-utils==0.0.11
+markdown-it-py==3.0.0
+gitdb==4.0.12
+altair==5.5.0
+torchvision==0.21.0
+python-magic==0.4.27
+iopath==0.1.10
+ray==2.47.1
+blake3==1.0.5
+pillow==11.3.0
+python-dotenv==1.1.1
+torchaudio==2.6.0
+partial-json-parser==0.2.1.1.post6
+httpx==0.28.1
+torch==2.6.0
+anyio==4.9.0
+fairscale==0.4.4
+traitlets==5.14.3
+pure_eval==0.2.3
+sympy==1.13.1
+nvidia-cusparselt-cu12==0.6.2
+jsonschema==4.24.0
+imageio==2.37.0
+opencensus==0.11.4
+stack-data==0.6.3
+shellingham==1.5.4
+tensordict==0.9.1
+nvidia-cuda-runtime-cu12==12.4.127
+nest-asyncio==1.6.0
+einops==0.8.1
+lark==1.2.2
+tenacity==9.1.2
+virtualenv==20.31.2
+ptyprocess==0.7.0
+outlines==0.1.11
+depyf==0.18.0
+starlette==0.47.1
+cfgv==3.4.0
+pre_commit==4.2.0
+language_data==1.3.0
+pip==25.1
+Pygments==2.19.2
+nvidia-cuda-cupti-cu12==12.4.127
+protobuf==4.25.8
+safetensors==0.5.3
+text-unidecode==1.3
+wcwidth==0.2.13
+charset-normalizer==3.4.2
+aiohappyeyeballs==2.6.1
+outlines_core==0.1.26
+fastrlock==0.8.3
+asttokens==3.0.0
+psutil==7.0.0
+smmap==5.0.2
+exceptiongroup==1.3.0
+murmurhash==1.0.13
+pytorch-lightning==2.5.2
+filelock==3.18.0
+astor==0.8.1
+py-spy==0.4.0
+pydantic_core==2.33.2
+colorful==0.5.7
+fastapi==0.116.1
+opentelemetry-api==1.26.0
+openai==1.90.0
+ninja==1.11.1.4
+opentelemetry-semantic-conventions==0.47b0
+spacy-legacy==3.0.12
+opendatasets==0.1.22
+Deprecated==1.2.18
+proto-plus==1.26.1
+rignore==0.6.2
+aiohttp-cors==0.8.1
+liger_kernel==0.6.0
+opentelemetry-exporter-prometheus==0.56b0
+python-multipart==0.0.20
+multiprocess==0.70.16
+opentelemetry-sdk==1.26.0
+decorator==5.2.1
+xformers==0.0.29.post2
+spacy==3.8.7
+pyvers==0.1.0
+pycocotools==2.0.10
+websockets==15.0.1
+wasabi==1.1.3
+frozenlist==1.7.0
+codetiming==1.4.0
+sentencepiece==0.2.0
+toml==0.10.2
+scikit-image==0.25.2
+ftfy==6.3.1
+bleach==6.2.0
+yarl==1.20.1
+nvidia-cuda-nvrtc-cu12==12.4.127
+importlib_metadata==8.0.0
+spacy-loggers==1.0.5
+smart_open==7.3.0.post1
+portalocker==3.2.0
+llvmlite==0.44.0
+MarkupSafe==3.0.2
+braceexpand==0.1.7
+modelscope==1.28.0
+verl==0.3.2.dev0
+jaraco.context==5.3.0
+more-itertools==10.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+platformdirs==4.2.2
+packaging==24.2
+wheel==0.45.1
+zipp==3.19.2
+inflect==7.3.1
+autocommand==2.2.2
+typeguard==4.3.0
+jaraco.collections==5.1.0
+backports.tarfile==1.2.0
+tomli==2.0.1
+importlib_metadata==8.0.0
+typing_extensions==4.12.2
diff --git a/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..e1c61cb0c0f4f69f957b59a8dc070f8bac3b8f0e
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/wandb-metadata.json
@@ -0,0 +1,92 @@
+{
+  "os":  "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.0",
+  "startedAt":  "2025-07-18T08:00:33.186442Z",
+  "args":  [
+    "--node-ip-address=10.1.5.237",
+    "--node-manager-port=37853",
+    "--object-store-name=/tmp/ray/session_2025-07-18_15-56-28_336135_54391/sockets/plasma_store",
+    "--raylet-name=/tmp/ray/session_2025-07-18_15-56-28_336135_54391/sockets/raylet",
+    "--redis-address=None",
+    "--metrics-agent-port=43790",
+    "--logging-rotate-bytes=536870912",
+    "--logging-rotate-backup-count=5",
+    "--runtime-env-agent-port=63904",
+    "--gcs-address=10.1.5.237:56758",
+    "--session-name=session_2025-07-18_15-56-28_336135_54391",
+    "--temp-dir=/tmp/ray",
+    "--webui=127.0.0.1:8265",
+    "--cluster-id=cc22236bcaa2a9ab2bdb3c76723ef15af4933b041414da957aa668b9",
+    "--startup-token=64",
+    "--worker-launch-time-ms=1752825390762",
+    "--node-id=0967616139eea74249995a5549bf4039d244c259acd06a5a8fe0b7aa",
+    "--runtime-env-hash=-115784934",
+    "--enable-resource-isolation=false"
+  ],
+  "program":  "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
+  "git":  {
+    "remote":  "https://github.com/PorUna-byte/PAR.git",
+    "commit":  "b8caf406aa1699c788f0ca6e44a1769452c317db"
+  },
+  "email":  "gia0603yucca@gmail.com",
+  "root":  "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
+  "host":  "dsw-266702-dc4b748ff-f7c66",
+  "executable":  "/root/miniconda3/envs/easyr1-new/bin/python3",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA A800-SXM4-80GB",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1623302262784",
+      "used":  "1224904704"
+    }
+  },
+  "memory":  {
+    "total":  "549755813888"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-f7e858cd-ae03-031d-b834-86bf87923211"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-82fabc17-3326-51dd-9fb7-cbe82d26b6a0"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-1bba2921-208c-d0ad-1a05-25fc85d62630"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-9537a8bd-f536-20e2-b766-25a3c4bd5a71"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-482091e7-cb8c-dacb-7d0c-a78a3b795f0b"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-becb8d59-2ab7-b50d-5770-183c6478747a"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-3f0f2b1d-d6be-b6ad-b4ad-0ec841b91c40"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-e53376dd-84ff-523c-a8c7-7cc7f7a18655"
+    }
+  ],
+  "cudaVersion":  "12.1",
+  "writerId":  "repix6q725hnzsubljgya3pkb0pg0b9q"
+}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/wandb-summary.json b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..7b4fdaac0ef2f0f7b27bbdd0252e8c2048547735
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":1},"_runtime":1}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..956b73d1226f03b272a8c74f0a7aed2e82e7f79a
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/logs/debug-internal.log
@@ -0,0 +1,15 @@
+{"time":"2025-07-18T16:00:33.944898175+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-18T16:01:04.056910886+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
+{"time":"2025-07-18T16:01:14.919464259+08:00","level":"INFO","msg":"stream: created new stream","id":"nji9xqxs"}
+{"time":"2025-07-18T16:01:14.926346872+08:00","level":"INFO","msg":"sender: started","stream_id":"nji9xqxs"}
+{"time":"2025-07-18T16:01:14.926359513+08:00","level":"INFO","msg":"stream: started","id":"nji9xqxs"}
+{"time":"2025-07-18T16:01:14.926369749+08:00","level":"INFO","msg":"handler: started","stream_id":"nji9xqxs"}
+{"time":"2025-07-18T16:01:14.926391685+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"nji9xqxs"}
+{"time":"2025-07-18T16:01:44.221082826+08:00","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"uploading data","runtime_seconds":2.593669605},{"desc":"updating run metadata","runtime_seconds":2.593493161},{"desc":"uploading wandb-metadata.json","runtime_seconds":1.024626407}],"total_operations":3}}
+{"time":"2025-07-18T16:01:58.697029208+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/nji9xqxs/file_stream\": unexpected EOF"}
+{"time":"2025-07-18T16:02:17.601004486+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-07-18T16:02:38.850804129+08:00","level":"INFO","msg":"stream: closing","id":"nji9xqxs"}
+{"time":"2025-07-18T16:02:38.850824284+08:00","level":"INFO","msg":"handler: closed","stream_id":"nji9xqxs"}
+{"time":"2025-07-18T16:02:38.850832353+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"nji9xqxs"}
+{"time":"2025-07-18T16:02:38.850837848+08:00","level":"INFO","msg":"sender: closed","stream_id":"nji9xqxs"}
+{"time":"2025-07-18T16:02:38.858004163+08:00","level":"INFO","msg":"stream: closed","id":"nji9xqxs"}
diff --git a/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/logs/debug.log b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..708bfef6b860b451c60bbbf5e58fd562f8786837
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/logs/debug.log
@@ -0,0 +1,28 @@
+2025-07-18 16:00:33,700 INFO    MainThread:59301 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-18 16:00:33,700 INFO    MainThread:59301 [wandb_setup.py:_flush():80] Configure stats pid to 59301
+2025-07-18 16:00:33,700 INFO    MainThread:59301 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-18 16:00:33,701 INFO    MainThread:59301 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-18 16:00:33,701 INFO    MainThread:59301 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-18 16:00:33,701 INFO    MainThread:59301 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/logs/debug.log
+2025-07-18 16:00:33,701 INFO    MainThread:59301 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/logs/debug-internal.log
+2025-07-18 16:00:33,701 INFO    MainThread:59301 [wandb_init.py:init():830] calling init triggers
+2025-07-18 16:00:33,701 INFO    MainThread:59301 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-18 16:00:33,701 INFO    MainThread:59301 [wandb_init.py:init():871] starting backend
+2025-07-18 16:00:33,911 INFO    MainThread:59301 [wandb_init.py:init():874] sending inform_init request
+2025-07-18 16:00:33,914 INFO    MainThread:59301 [wandb_init.py:init():882] backend started and connected
+2025-07-18 16:00:33,934 INFO    MainThread:59301 [wandb_init.py:init():953] updated telemetry
+2025-07-18 16:00:34,824 INFO    MainThread:59301 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-18 16:01:41,621 INFO    MainThread:59301 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-18 16:01:41,877 INFO    MainThread:59301 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-18 16:01:41,877 INFO    MainThread:59301 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-18 16:01:41,899 INFO    MainThread:59301 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-18 16:01:41,899 INFO    MainThread:59301 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-18 16:01:41,927 INFO    MainThread:59301 [wandb_init.py:init():1075] run started, returning control to user process
+2025-07-18 16:01:43,199 INFO    MainThread:59301 [wandb_run.py:_finish():2224] finishing run gia0603yucca/easy_r1/nji9xqxs
+2025-07-18 16:01:43,199 INFO    MainThread:59301 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0
+2025-07-18 16:01:43,207 INFO    MainThread:59301 [wandb_run.py:_restore():2405] restore
+2025-07-18 16:01:43,211 INFO    MainThread:59301 [wandb_run.py:_restore():2411] restore done
+2025-07-18 16:02:38,840 INFO    MainThread:59301 [wandb_run.py:_footer_history_summary_info():3903] rendering history
+2025-07-18 16:02:38,841 INFO    MainThread:59301 [wandb_run.py:_footer_history_summary_info():3935] rendering summary
+2025-07-18 16:02:38,841 INFO    MainThread:59301 [wandb_run.py:_footer_sync_info():3864] logging synced files
diff --git a/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/run-nji9xqxs.wandb b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/run-nji9xqxs.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..9342d4a19119a3bb703fe1b02151727e1bfbcafb
Binary files /dev/null and b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/run-nji9xqxs.wandb differ
diff --git a/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/files/output.log b/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7396100b45ae50733f37383324036e5ff4cdf5aa
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/files/requirements.txt
@@ -0,0 +1,295 @@
+setproctitle==1.2.2
+colorama==0.4.6
+psutil==7.0.0
+attrs==25.3.0
+tqdm==4.67.1
+langcodes==3.5.0
+nvidia-cublas-cu12==12.4.5.8
+airportsdata==20250706
+absl-py==2.3.1
+hf-xet==1.1.5
+opentelemetry-exporter-otlp-proto-http==1.26.0
+interegular==0.3.3
+tifffile==2025.5.10
+nvidia-cufile-cu12==1.11.1.6
+nltk==3.9.1
+tokenizers==0.21.2
+salesforce-lavis==1.0.2
+tzdata==2025.2
+prometheus_client==0.22.1
+google-auth==2.40.3
+ipython==8.37.0
+pydantic==2.11.7
+mathruler==0.1.0
+six==1.17.0
+python-dateutil==2.9.0.post0
+requests==2.32.4
+mistral_common==1.8.0
+huggingface-hub==0.33.4
+preshed==3.0.10
+torchmetrics==1.7.4
+blinker==1.9.0
+nvidia-cusparse-cu12==12.3.1.170
+rich-toolkit==0.14.8
+pytz==2025.2
+pandas==2.3.1
+packaging==25.0
+async-timeout==5.0.1
+diskcache==5.6.3
+google-api-core==2.25.1
+parso==0.8.4
+joblib==1.5.1
+pycountry==24.6.1
+triton==3.2.0
+pybase64==1.4.1
+marisa-trie==1.2.1
+plotly==6.2.0
+wandb==0.21.0
+PyYAML==6.0.2
+regex==2024.11.6
+idna==3.10
+numba==0.61.2
+nvidia-curand-cu12==10.3.5.147
+uvicorn==0.35.0
+srsly==2.5.1
+confection==0.1.5
+opentelemetry-semantic-conventions-ai==0.4.11
+typing-inspection==0.4.1
+opencv-python-headless==4.12.0.88
+pyasn1==0.6.1
+av==15.0.0
+xgrammar==0.1.18
+distlib==0.3.9
+datasets==4.0.0
+networkx==3.4.2
+prometheus-fastapi-instrumentator==7.1.0
+lightning-utilities==0.14.3
+executing==2.2.0
+pycocoevalcap==1.2
+h11==0.16.0
+certifi==2025.7.14
+sniffio==1.3.1
+wheel==0.45.1
+transformers==4.52.4
+wrapt==1.17.2
+jsonschema-specifications==2025.4.1
+mpmath==1.3.0
+msgspec==0.19.0
+py-cpuinfo==9.0.0
+contexttimer==0.3.3
+watchdog==6.0.0
+pexpect==4.9.0
+webencodings==0.5.1
+verl==0.3.2.dev0
+webdataset==1.0.2
+httpcore==1.0.9
+opentelemetry-exporter-otlp==1.26.0
+lm-format-enforcer==0.10.11
+googleapis-common-protos==1.70.0
+pyzmq==27.0.0
+fsspec==2025.3.0
+grpcio==1.73.1
+cymem==2.0.11
+timm==0.4.12
+zipp==3.23.0
+llguidance==0.7.30
+opencensus-context==0.1.3
+omegaconf==2.3.0
+python-json-logger==3.3.0
+opentelemetry-exporter-otlp-proto-common==1.26.0
+watchfiles==1.1.0
+nvidia-nvjitlink-cu12==12.4.127
+peft==0.16.0
+sentry-sdk==2.32.0
+rpds-py==0.26.0
+email_validator==2.2.0
+nodeenv==1.9.1
+distro==1.9.0
+jiter==0.10.0
+compressed-tensors==0.9.3
+annotated-types==0.7.0
+matplotlib-inline==0.1.7
+rich==14.0.0
+GitPython==3.1.44
+lazy_loader==0.4
+fastapi-cloud-cli==0.1.4
+cupy-cuda12x==13.5.1
+prompt_toolkit==3.0.51
+gguf==0.17.1
+blis==1.3.0
+thinc==8.3.6
+cloudpickle==3.1.1
+multidict==6.6.3
+nvidia-nvtx-cu12==12.4.127
+flash-attn==2.7.1.post1
+pyasn1_modules==0.4.2
+rsa==4.9.1
+weasel==0.4.1
+uvloop==0.21.0
+click==8.2.1
+numpy==2.2.6
+torchdata==0.11.0
+pylatexenc==2.10
+cachetools==5.5.2
+Jinja2==3.1.6
+typer==0.16.0
+nvidia-cudnn-cu12==9.1.0.70
+fastapi-cli==0.0.8
+xxhash==3.5.0
+tornado==6.5.1
+scipy==1.15.3
+rouge_score==0.1.2
+cloudpathlib==0.21.1
+streamlit==1.46.1
+jedi==0.19.2
+referencing==0.36.2
+accelerate==1.8.1
+decord==0.6.0
+setuptools==78.1.1
+mdurl==0.1.2
+vllm==0.8.5.post1
+identify==2.6.12
+python-slugify==8.0.4
+dnspython==2.7.0
+dill==0.3.8
+opentelemetry-proto==1.26.0
+orjson==3.10.18
+msgpack==1.1.1
+aiohttp==3.12.14
+aiosignal==1.4.0
+typing_extensions==4.14.1
+tiktoken==0.9.0
+catalogue==2.0.10
+platformdirs==4.3.8
+narwhals==1.47.0
+antlr4-python3-runtime==4.9.3
+pydantic-extra-types==2.10.5
+nvidia-cusolver-cu12==11.6.1.9
+kaggle==1.7.4.5
+propcache==0.3.2
+urllib3==2.5.0
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+pydeck==0.9.1
+nvidia-cufft-cu12==11.2.1.3
+pyarrow==20.0.0
+nvidia-nccl-cu12==2.21.5
+httptools==0.6.4
+qwen-vl-utils==0.0.11
+markdown-it-py==3.0.0
+gitdb==4.0.12
+altair==5.5.0
+torchvision==0.21.0
+python-magic==0.4.27
+iopath==0.1.10
+ray==2.47.1
+blake3==1.0.5
+pillow==11.3.0
+python-dotenv==1.1.1
+torchaudio==2.6.0
+partial-json-parser==0.2.1.1.post6
+httpx==0.28.1
+torch==2.6.0
+anyio==4.9.0
+fairscale==0.4.4
+traitlets==5.14.3
+pure_eval==0.2.3
+sympy==1.13.1
+nvidia-cusparselt-cu12==0.6.2
+jsonschema==4.24.0
+imageio==2.37.0
+opencensus==0.11.4
+stack-data==0.6.3
+shellingham==1.5.4
+tensordict==0.9.1
+nvidia-cuda-runtime-cu12==12.4.127
+nest-asyncio==1.6.0
+einops==0.8.1
+lark==1.2.2
+tenacity==9.1.2
+virtualenv==20.31.2
+ptyprocess==0.7.0
+outlines==0.1.11
+depyf==0.18.0
+starlette==0.47.1
+cfgv==3.4.0
+pre_commit==4.2.0
+language_data==1.3.0
+pip==25.1
+Pygments==2.19.2
+nvidia-cuda-cupti-cu12==12.4.127
+protobuf==4.25.8
+safetensors==0.5.3
+text-unidecode==1.3
+wcwidth==0.2.13
+charset-normalizer==3.4.2
+aiohappyeyeballs==2.6.1
+outlines_core==0.1.26
+fastrlock==0.8.3
+asttokens==3.0.0
+psutil==7.0.0
+smmap==5.0.2
+exceptiongroup==1.3.0
+murmurhash==1.0.13
+pytorch-lightning==2.5.2
+filelock==3.18.0
+astor==0.8.1
+py-spy==0.4.0
+pydantic_core==2.33.2
+colorful==0.5.7
+fastapi==0.116.1
+opentelemetry-api==1.26.0
+openai==1.90.0
+ninja==1.11.1.4
+opentelemetry-semantic-conventions==0.47b0
+spacy-legacy==3.0.12
+opendatasets==0.1.22
+Deprecated==1.2.18
+proto-plus==1.26.1
+rignore==0.6.2
+aiohttp-cors==0.8.1
+liger_kernel==0.6.0
+opentelemetry-exporter-prometheus==0.56b0
+python-multipart==0.0.20
+multiprocess==0.70.16
+opentelemetry-sdk==1.26.0
+decorator==5.2.1
+xformers==0.0.29.post2
+spacy==3.8.7
+pyvers==0.1.0
+pycocotools==2.0.10
+websockets==15.0.1
+wasabi==1.1.3
+frozenlist==1.7.0
+codetiming==1.4.0
+sentencepiece==0.2.0
+toml==0.10.2
+scikit-image==0.25.2
+ftfy==6.3.1
+bleach==6.2.0
+yarl==1.20.1
+nvidia-cuda-nvrtc-cu12==12.4.127
+importlib_metadata==8.0.0
+spacy-loggers==1.0.5
+smart_open==7.3.0.post1
+portalocker==3.2.0
+llvmlite==0.44.0
+MarkupSafe==3.0.2
+braceexpand==0.1.7
+modelscope==1.28.0
+verl==0.3.2.dev0
+jaraco.context==5.3.0
+more-itertools==10.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+platformdirs==4.2.2
+packaging==24.2
+wheel==0.45.1
+zipp==3.19.2
+inflect==7.3.1
+autocommand==2.2.2
+typeguard==4.3.0
+jaraco.collections==5.1.0
+backports.tarfile==1.2.0
+tomli==2.0.1
+importlib_metadata==8.0.0
+typing_extensions==4.12.2
diff --git a/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..819b843e2e94d09f81c1a276811070dc974cdefc
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/files/wandb-metadata.json
@@ -0,0 +1,36 @@
+{
+  "os":  "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.0",
+  "startedAt":  "2025-07-18T10:01:27.794840Z",
+  "args":  [
+    "--node-ip-address=10.1.5.237",
+    "--node-manager-port=34033",
+    "--object-store-name=/tmp/ray/session_2025-07-18_17-59-46_929054_90432/sockets/plasma_store",
+    "--raylet-name=/tmp/ray/session_2025-07-18_17-59-46_929054_90432/sockets/raylet",
+    "--redis-address=None",
+    "--metrics-agent-port=52220",
+    "--logging-rotate-bytes=536870912",
+    "--logging-rotate-backup-count=5",
+    "--runtime-env-agent-port=58307",
+    "--gcs-address=10.1.5.237:63437",
+    "--session-name=session_2025-07-18_17-59-46_929054_90432",
+    "--temp-dir=/tmp/ray",
+    "--webui=127.0.0.1:8265",
+    "--cluster-id=2320bfb132f181fae6a438fbb8ba4302101825636e86b29ea49d2a26",
+    "--startup-token=64",
+    "--worker-launch-time-ms=1752832790343",
+    "--node-id=d351a5bfa85748ebf678bc24e7adda6ad59e09972b13108dbb01547f",
+    "--runtime-env-hash=-115784934",
+    "--enable-resource-isolation=false"
+  ],
+  "program":  "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
+  "git":  {
+    "remote":  "https://github.com/PorUna-byte/PAR.git",
+    "commit":  "b8caf406aa1699c788f0ca6e44a1769452c317db"
+  },
+  "email":  "gia0603yucca@gmail.com",
+  "root":  "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
+  "host":  "dsw-266702-dc4b748ff-f7c66",
+  "executable":  "/root/miniconda3/envs/easyr1-new/bin/python3",
+  "writerId":  "qpm36h9mjv3m2bmimjfqh0pw0u9a4282"
+}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..ad8148fc0a694f50660e3c3d777201b9e2ea368c
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/logs/debug-internal.log
@@ -0,0 +1,6 @@
+{"time":"2025-07-18T18:01:28.970283308+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-18T18:01:36.389685825+08:00","level":"INFO","msg":"stream: created new stream","id":"wmarwr6l"}
+{"time":"2025-07-18T18:01:36.413332423+08:00","level":"INFO","msg":"handler: started","stream_id":"wmarwr6l"}
+{"time":"2025-07-18T18:01:36.413371741+08:00","level":"INFO","msg":"stream: started","id":"wmarwr6l"}
+{"time":"2025-07-18T18:01:36.413392401+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"wmarwr6l"}
+{"time":"2025-07-18T18:01:36.413389742+08:00","level":"INFO","msg":"sender: started","stream_id":"wmarwr6l"}
diff --git a/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/logs/debug.log b/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..6263673936833403ea2ce0f1ff8f970f7be677d6
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/logs/debug.log
@@ -0,0 +1,21 @@
+2025-07-18 18:01:28,690 INFO    MainThread:95226 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-18 18:01:28,690 INFO    MainThread:95226 [wandb_setup.py:_flush():80] Configure stats pid to 95226
+2025-07-18 18:01:28,690 INFO    MainThread:95226 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-18 18:01:28,690 INFO    MainThread:95226 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-18 18:01:28,690 INFO    MainThread:95226 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-18 18:01:28,690 INFO    MainThread:95226 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/logs/debug.log
+2025-07-18 18:01:28,690 INFO    MainThread:95226 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/logs/debug-internal.log
+2025-07-18 18:01:28,690 INFO    MainThread:95226 [wandb_init.py:init():830] calling init triggers
+2025-07-18 18:01:28,690 INFO    MainThread:95226 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-18 18:01:28,690 INFO    MainThread:95226 [wandb_init.py:init():871] starting backend
+2025-07-18 18:01:28,902 INFO    MainThread:95226 [wandb_init.py:init():874] sending inform_init request
+2025-07-18 18:01:28,904 INFO    MainThread:95226 [wandb_init.py:init():882] backend started and connected
+2025-07-18 18:01:28,909 INFO    MainThread:95226 [wandb_init.py:init():953] updated telemetry
+2025-07-18 18:01:29,464 INFO    MainThread:95226 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-18 18:01:40,777 INFO    MainThread:95226 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-18 18:01:41,069 INFO    MainThread:95226 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-18 18:01:41,069 INFO    MainThread:95226 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-18 18:01:41,079 INFO    MainThread:95226 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-18 18:01:41,079 INFO    MainThread:95226 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-18 18:01:41,125 INFO    MainThread:95226 [wandb_init.py:init():1075] run started, returning control to user process
diff --git a/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/run-wmarwr6l.wandb b/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/run-wmarwr6l.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/files/output.log b/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..65c4fac1e916abf486a3044e9f28ae1f90c7e133
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/files/output.log
@@ -0,0 +1,2 @@
+{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.5.237", "pid": 104882, "uuid": "0f066e81b2fc4d09a338174f40c2e400", "closed": false}
+Start validation...
diff --git a/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7396100b45ae50733f37383324036e5ff4cdf5aa
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/files/requirements.txt
@@ -0,0 +1,295 @@
+setproctitle==1.2.2
+colorama==0.4.6
+psutil==7.0.0
+attrs==25.3.0
+tqdm==4.67.1
+langcodes==3.5.0
+nvidia-cublas-cu12==12.4.5.8
+airportsdata==20250706
+absl-py==2.3.1
+hf-xet==1.1.5
+opentelemetry-exporter-otlp-proto-http==1.26.0
+interegular==0.3.3
+tifffile==2025.5.10
+nvidia-cufile-cu12==1.11.1.6
+nltk==3.9.1
+tokenizers==0.21.2
+salesforce-lavis==1.0.2
+tzdata==2025.2
+prometheus_client==0.22.1
+google-auth==2.40.3
+ipython==8.37.0
+pydantic==2.11.7
+mathruler==0.1.0
+six==1.17.0
+python-dateutil==2.9.0.post0
+requests==2.32.4
+mistral_common==1.8.0
+huggingface-hub==0.33.4
+preshed==3.0.10
+torchmetrics==1.7.4
+blinker==1.9.0
+nvidia-cusparse-cu12==12.3.1.170
+rich-toolkit==0.14.8
+pytz==2025.2
+pandas==2.3.1
+packaging==25.0
+async-timeout==5.0.1
+diskcache==5.6.3
+google-api-core==2.25.1
+parso==0.8.4
+joblib==1.5.1
+pycountry==24.6.1
+triton==3.2.0
+pybase64==1.4.1
+marisa-trie==1.2.1
+plotly==6.2.0
+wandb==0.21.0
+PyYAML==6.0.2
+regex==2024.11.6
+idna==3.10
+numba==0.61.2
+nvidia-curand-cu12==10.3.5.147
+uvicorn==0.35.0
+srsly==2.5.1
+confection==0.1.5
+opentelemetry-semantic-conventions-ai==0.4.11
+typing-inspection==0.4.1
+opencv-python-headless==4.12.0.88
+pyasn1==0.6.1
+av==15.0.0
+xgrammar==0.1.18
+distlib==0.3.9
+datasets==4.0.0
+networkx==3.4.2
+prometheus-fastapi-instrumentator==7.1.0
+lightning-utilities==0.14.3
+executing==2.2.0
+pycocoevalcap==1.2
+h11==0.16.0
+certifi==2025.7.14
+sniffio==1.3.1
+wheel==0.45.1
+transformers==4.52.4
+wrapt==1.17.2
+jsonschema-specifications==2025.4.1
+mpmath==1.3.0
+msgspec==0.19.0
+py-cpuinfo==9.0.0
+contexttimer==0.3.3
+watchdog==6.0.0
+pexpect==4.9.0
+webencodings==0.5.1
+verl==0.3.2.dev0
+webdataset==1.0.2
+httpcore==1.0.9
+opentelemetry-exporter-otlp==1.26.0
+lm-format-enforcer==0.10.11
+googleapis-common-protos==1.70.0
+pyzmq==27.0.0
+fsspec==2025.3.0
+grpcio==1.73.1
+cymem==2.0.11
+timm==0.4.12
+zipp==3.23.0
+llguidance==0.7.30
+opencensus-context==0.1.3
+omegaconf==2.3.0
+python-json-logger==3.3.0
+opentelemetry-exporter-otlp-proto-common==1.26.0
+watchfiles==1.1.0
+nvidia-nvjitlink-cu12==12.4.127
+peft==0.16.0
+sentry-sdk==2.32.0
+rpds-py==0.26.0
+email_validator==2.2.0
+nodeenv==1.9.1
+distro==1.9.0
+jiter==0.10.0
+compressed-tensors==0.9.3
+annotated-types==0.7.0
+matplotlib-inline==0.1.7
+rich==14.0.0
+GitPython==3.1.44
+lazy_loader==0.4
+fastapi-cloud-cli==0.1.4
+cupy-cuda12x==13.5.1
+prompt_toolkit==3.0.51
+gguf==0.17.1
+blis==1.3.0
+thinc==8.3.6
+cloudpickle==3.1.1
+multidict==6.6.3
+nvidia-nvtx-cu12==12.4.127
+flash-attn==2.7.1.post1
+pyasn1_modules==0.4.2
+rsa==4.9.1
+weasel==0.4.1
+uvloop==0.21.0
+click==8.2.1
+numpy==2.2.6
+torchdata==0.11.0
+pylatexenc==2.10
+cachetools==5.5.2
+Jinja2==3.1.6
+typer==0.16.0
+nvidia-cudnn-cu12==9.1.0.70
+fastapi-cli==0.0.8
+xxhash==3.5.0
+tornado==6.5.1
+scipy==1.15.3
+rouge_score==0.1.2
+cloudpathlib==0.21.1
+streamlit==1.46.1
+jedi==0.19.2
+referencing==0.36.2
+accelerate==1.8.1
+decord==0.6.0
+setuptools==78.1.1
+mdurl==0.1.2
+vllm==0.8.5.post1
+identify==2.6.12
+python-slugify==8.0.4
+dnspython==2.7.0
+dill==0.3.8
+opentelemetry-proto==1.26.0
+orjson==3.10.18
+msgpack==1.1.1
+aiohttp==3.12.14
+aiosignal==1.4.0
+typing_extensions==4.14.1
+tiktoken==0.9.0
+catalogue==2.0.10
+platformdirs==4.3.8
+narwhals==1.47.0
+antlr4-python3-runtime==4.9.3
+pydantic-extra-types==2.10.5
+nvidia-cusolver-cu12==11.6.1.9
+kaggle==1.7.4.5
+propcache==0.3.2
+urllib3==2.5.0
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+pydeck==0.9.1
+nvidia-cufft-cu12==11.2.1.3
+pyarrow==20.0.0
+nvidia-nccl-cu12==2.21.5
+httptools==0.6.4
+qwen-vl-utils==0.0.11
+markdown-it-py==3.0.0
+gitdb==4.0.12
+altair==5.5.0
+torchvision==0.21.0
+python-magic==0.4.27
+iopath==0.1.10
+ray==2.47.1
+blake3==1.0.5
+pillow==11.3.0
+python-dotenv==1.1.1
+torchaudio==2.6.0
+partial-json-parser==0.2.1.1.post6
+httpx==0.28.1
+torch==2.6.0
+anyio==4.9.0
+fairscale==0.4.4
+traitlets==5.14.3
+pure_eval==0.2.3
+sympy==1.13.1
+nvidia-cusparselt-cu12==0.6.2
+jsonschema==4.24.0
+imageio==2.37.0
+opencensus==0.11.4
+stack-data==0.6.3
+shellingham==1.5.4
+tensordict==0.9.1
+nvidia-cuda-runtime-cu12==12.4.127
+nest-asyncio==1.6.0
+einops==0.8.1
+lark==1.2.2
+tenacity==9.1.2
+virtualenv==20.31.2
+ptyprocess==0.7.0
+outlines==0.1.11
+depyf==0.18.0
+starlette==0.47.1
+cfgv==3.4.0
+pre_commit==4.2.0
+language_data==1.3.0
+pip==25.1
+Pygments==2.19.2
+nvidia-cuda-cupti-cu12==12.4.127
+protobuf==4.25.8
+safetensors==0.5.3
+text-unidecode==1.3
+wcwidth==0.2.13
+charset-normalizer==3.4.2
+aiohappyeyeballs==2.6.1
+outlines_core==0.1.26
+fastrlock==0.8.3
+asttokens==3.0.0
+psutil==7.0.0
+smmap==5.0.2
+exceptiongroup==1.3.0
+murmurhash==1.0.13
+pytorch-lightning==2.5.2
+filelock==3.18.0
+astor==0.8.1
+py-spy==0.4.0
+pydantic_core==2.33.2
+colorful==0.5.7
+fastapi==0.116.1
+opentelemetry-api==1.26.0
+openai==1.90.0
+ninja==1.11.1.4
+opentelemetry-semantic-conventions==0.47b0
+spacy-legacy==3.0.12
+opendatasets==0.1.22
+Deprecated==1.2.18
+proto-plus==1.26.1
+rignore==0.6.2
+aiohttp-cors==0.8.1
+liger_kernel==0.6.0
+opentelemetry-exporter-prometheus==0.56b0
+python-multipart==0.0.20
+multiprocess==0.70.16
+opentelemetry-sdk==1.26.0
+decorator==5.2.1
+xformers==0.0.29.post2
+spacy==3.8.7
+pyvers==0.1.0
+pycocotools==2.0.10
+websockets==15.0.1
+wasabi==1.1.3
+frozenlist==1.7.0
+codetiming==1.4.0
+sentencepiece==0.2.0
+toml==0.10.2
+scikit-image==0.25.2
+ftfy==6.3.1
+bleach==6.2.0
+yarl==1.20.1
+nvidia-cuda-nvrtc-cu12==12.4.127
+importlib_metadata==8.0.0
+spacy-loggers==1.0.5
+smart_open==7.3.0.post1
+portalocker==3.2.0
+llvmlite==0.44.0
+MarkupSafe==3.0.2
+braceexpand==0.1.7
+modelscope==1.28.0
+verl==0.3.2.dev0
+jaraco.context==5.3.0
+more-itertools==10.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+platformdirs==4.2.2
+packaging==24.2
+wheel==0.45.1
+zipp==3.19.2
+inflect==7.3.1
+autocommand==2.2.2
+typeguard==4.3.0
+jaraco.collections==5.1.0
+backports.tarfile==1.2.0
+tomli==2.0.1
+importlib_metadata==8.0.0
+typing_extensions==4.12.2
diff --git a/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..8aebeb6b614972282a34be8538e064ae8f0091b6
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/files/wandb-metadata.json
@@ -0,0 +1,92 @@
+{
+  "os":  "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.0",
+  "startedAt":  "2025-07-18T10:10:22.154415Z",
+  "args":  [
+    "--node-ip-address=10.1.5.237",
+    "--node-manager-port=45779",
+    "--object-store-name=/tmp/ray/session_2025-07-18_18-08-41_995857_100101/sockets/plasma_store",
+    "--raylet-name=/tmp/ray/session_2025-07-18_18-08-41_995857_100101/sockets/raylet",
+    "--redis-address=None",
+    "--metrics-agent-port=60724",
+    "--logging-rotate-bytes=536870912",
+    "--logging-rotate-backup-count=5",
+    "--runtime-env-agent-port=59748",
+    "--gcs-address=10.1.5.237:65420",
+    "--session-name=session_2025-07-18_18-08-41_995857_100101",
+    "--temp-dir=/tmp/ray",
+    "--webui=127.0.0.1:8265",
+    "--cluster-id=d5af14f82d6884b3972b319ba9c8871ee6d621d0b395536182e77073",
+    "--startup-token=64",
+    "--worker-launch-time-ms=1752833324419",
+    "--node-id=affe3b277e8d66adb6a1a72266e2e5ce24fa5e48471c99f30a7a9bdf",
+    "--runtime-env-hash=-115784934",
+    "--enable-resource-isolation=false"
+  ],
+  "program":  "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
+  "git":  {
+    "remote":  "https://github.com/PorUna-byte/PAR.git",
+    "commit":  "b8caf406aa1699c788f0ca6e44a1769452c317db"
+  },
+  "email":  "gia0603yucca@gmail.com",
+  "root":  "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
+  "host":  "dsw-266702-dc4b748ff-f7c66",
+  "executable":  "/root/miniconda3/envs/easyr1-new/bin/python3",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA A800-SXM4-80GB",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1623302262784",
+      "used":  "1225166848"
+    }
+  },
+  "memory":  {
+    "total":  "549755813888"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-f7e858cd-ae03-031d-b834-86bf87923211"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-82fabc17-3326-51dd-9fb7-cbe82d26b6a0"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-1bba2921-208c-d0ad-1a05-25fc85d62630"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-9537a8bd-f536-20e2-b766-25a3c4bd5a71"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-482091e7-cb8c-dacb-7d0c-a78a3b795f0b"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-becb8d59-2ab7-b50d-5770-183c6478747a"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-3f0f2b1d-d6be-b6ad-b4ad-0ec841b91c40"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-e53376dd-84ff-523c-a8c7-7cc7f7a18655"
+    }
+  ],
+  "cudaVersion":  "12.1",
+  "writerId":  "71sc2v9oxtkr7yiqxoaago0bipl2xjby"
+}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..3759dd46e765b82983092a6a8eead014df4ca555
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/logs/debug-internal.log
@@ -0,0 +1,6 @@
+{"time":"2025-07-18T18:10:23.417471358+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-18T18:10:25.159462304+08:00","level":"INFO","msg":"stream: created new stream","id":"zkytrm61"}
+{"time":"2025-07-18T18:10:25.159507377+08:00","level":"INFO","msg":"stream: started","id":"zkytrm61"}
+{"time":"2025-07-18T18:10:25.159528642+08:00","level":"INFO","msg":"handler: started","stream_id":"zkytrm61"}
+{"time":"2025-07-18T18:10:25.15958268+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"zkytrm61"}
+{"time":"2025-07-18T18:10:25.159587635+08:00","level":"INFO","msg":"sender: started","stream_id":"zkytrm61"}
diff --git a/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/logs/debug.log b/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..e2c9a5eb31c68fab90edb95515f84c8067d3be14
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/logs/debug.log
@@ -0,0 +1,21 @@
+2025-07-18 18:10:23,122 INFO    MainThread:104882 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-18 18:10:23,123 INFO    MainThread:104882 [wandb_setup.py:_flush():80] Configure stats pid to 104882
+2025-07-18 18:10:23,123 INFO    MainThread:104882 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-18 18:10:23,123 INFO    MainThread:104882 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-18 18:10:23,123 INFO    MainThread:104882 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-18 18:10:23,123 INFO    MainThread:104882 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/logs/debug.log
+2025-07-18 18:10:23,123 INFO    MainThread:104882 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/logs/debug-internal.log
+2025-07-18 18:10:23,123 INFO    MainThread:104882 [wandb_init.py:init():830] calling init triggers
+2025-07-18 18:10:23,123 INFO    MainThread:104882 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-18 18:10:23,123 INFO    MainThread:104882 [wandb_init.py:init():871] starting backend
+2025-07-18 18:10:23,360 INFO    MainThread:104882 [wandb_init.py:init():874] sending inform_init request
+2025-07-18 18:10:23,362 INFO    MainThread:104882 [wandb_init.py:init():882] backend started and connected
+2025-07-18 18:10:23,390 INFO    MainThread:104882 [wandb_init.py:init():953] updated telemetry
+2025-07-18 18:10:23,939 INFO    MainThread:104882 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-18 18:10:26,092 INFO    MainThread:104882 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-18 18:10:26,299 INFO    MainThread:104882 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-18 18:10:26,299 INFO    MainThread:104882 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-18 18:10:26,308 INFO    MainThread:104882 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-18 18:10:26,319 INFO    MainThread:104882 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-18 18:10:26,355 INFO    MainThread:104882 [wandb_init.py:init():1075] run started, returning control to user process
diff --git a/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/run-zkytrm61.wandb b/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/run-zkytrm61.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/files/output.log b/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..52511b6259efbc2a768bddaffcb1451523307a9e
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/files/output.log
@@ -0,0 +1,2 @@
+{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.5.237", "pid": 160623, "uuid": "34b2c74ee0024065b840369ef674694c", "closed": false}
+Start validation...
diff --git a/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7396100b45ae50733f37383324036e5ff4cdf5aa
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/files/requirements.txt
@@ -0,0 +1,295 @@
+setproctitle==1.2.2
+colorama==0.4.6
+psutil==7.0.0
+attrs==25.3.0
+tqdm==4.67.1
+langcodes==3.5.0
+nvidia-cublas-cu12==12.4.5.8
+airportsdata==20250706
+absl-py==2.3.1
+hf-xet==1.1.5
+opentelemetry-exporter-otlp-proto-http==1.26.0
+interegular==0.3.3
+tifffile==2025.5.10
+nvidia-cufile-cu12==1.11.1.6
+nltk==3.9.1
+tokenizers==0.21.2
+salesforce-lavis==1.0.2
+tzdata==2025.2
+prometheus_client==0.22.1
+google-auth==2.40.3
+ipython==8.37.0
+pydantic==2.11.7
+mathruler==0.1.0
+six==1.17.0
+python-dateutil==2.9.0.post0
+requests==2.32.4
+mistral_common==1.8.0
+huggingface-hub==0.33.4
+preshed==3.0.10
+torchmetrics==1.7.4
+blinker==1.9.0
+nvidia-cusparse-cu12==12.3.1.170
+rich-toolkit==0.14.8
+pytz==2025.2
+pandas==2.3.1
+packaging==25.0
+async-timeout==5.0.1
+diskcache==5.6.3
+google-api-core==2.25.1
+parso==0.8.4
+joblib==1.5.1
+pycountry==24.6.1
+triton==3.2.0
+pybase64==1.4.1
+marisa-trie==1.2.1
+plotly==6.2.0
+wandb==0.21.0
+PyYAML==6.0.2
+regex==2024.11.6
+idna==3.10
+numba==0.61.2
+nvidia-curand-cu12==10.3.5.147
+uvicorn==0.35.0
+srsly==2.5.1
+confection==0.1.5
+opentelemetry-semantic-conventions-ai==0.4.11
+typing-inspection==0.4.1
+opencv-python-headless==4.12.0.88
+pyasn1==0.6.1
+av==15.0.0
+xgrammar==0.1.18
+distlib==0.3.9
+datasets==4.0.0
+networkx==3.4.2
+prometheus-fastapi-instrumentator==7.1.0
+lightning-utilities==0.14.3
+executing==2.2.0
+pycocoevalcap==1.2
+h11==0.16.0
+certifi==2025.7.14
+sniffio==1.3.1
+wheel==0.45.1
+transformers==4.52.4
+wrapt==1.17.2
+jsonschema-specifications==2025.4.1
+mpmath==1.3.0
+msgspec==0.19.0
+py-cpuinfo==9.0.0
+contexttimer==0.3.3
+watchdog==6.0.0
+pexpect==4.9.0
+webencodings==0.5.1
+verl==0.3.2.dev0
+webdataset==1.0.2
+httpcore==1.0.9
+opentelemetry-exporter-otlp==1.26.0
+lm-format-enforcer==0.10.11
+googleapis-common-protos==1.70.0
+pyzmq==27.0.0
+fsspec==2025.3.0
+grpcio==1.73.1
+cymem==2.0.11
+timm==0.4.12
+zipp==3.23.0
+llguidance==0.7.30
+opencensus-context==0.1.3
+omegaconf==2.3.0
+python-json-logger==3.3.0
+opentelemetry-exporter-otlp-proto-common==1.26.0
+watchfiles==1.1.0
+nvidia-nvjitlink-cu12==12.4.127
+peft==0.16.0
+sentry-sdk==2.32.0
+rpds-py==0.26.0
+email_validator==2.2.0
+nodeenv==1.9.1
+distro==1.9.0
+jiter==0.10.0
+compressed-tensors==0.9.3
+annotated-types==0.7.0
+matplotlib-inline==0.1.7
+rich==14.0.0
+GitPython==3.1.44
+lazy_loader==0.4
+fastapi-cloud-cli==0.1.4
+cupy-cuda12x==13.5.1
+prompt_toolkit==3.0.51
+gguf==0.17.1
+blis==1.3.0
+thinc==8.3.6
+cloudpickle==3.1.1
+multidict==6.6.3
+nvidia-nvtx-cu12==12.4.127
+flash-attn==2.7.1.post1
+pyasn1_modules==0.4.2
+rsa==4.9.1
+weasel==0.4.1
+uvloop==0.21.0
+click==8.2.1
+numpy==2.2.6
+torchdata==0.11.0
+pylatexenc==2.10
+cachetools==5.5.2
+Jinja2==3.1.6
+typer==0.16.0
+nvidia-cudnn-cu12==9.1.0.70
+fastapi-cli==0.0.8
+xxhash==3.5.0
+tornado==6.5.1
+scipy==1.15.3
+rouge_score==0.1.2
+cloudpathlib==0.21.1
+streamlit==1.46.1
+jedi==0.19.2
+referencing==0.36.2
+accelerate==1.8.1
+decord==0.6.0
+setuptools==78.1.1
+mdurl==0.1.2
+vllm==0.8.5.post1
+identify==2.6.12
+python-slugify==8.0.4
+dnspython==2.7.0
+dill==0.3.8
+opentelemetry-proto==1.26.0
+orjson==3.10.18
+msgpack==1.1.1
+aiohttp==3.12.14
+aiosignal==1.4.0
+typing_extensions==4.14.1
+tiktoken==0.9.0
+catalogue==2.0.10
+platformdirs==4.3.8
+narwhals==1.47.0
+antlr4-python3-runtime==4.9.3
+pydantic-extra-types==2.10.5
+nvidia-cusolver-cu12==11.6.1.9
+kaggle==1.7.4.5
+propcache==0.3.2
+urllib3==2.5.0
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+pydeck==0.9.1
+nvidia-cufft-cu12==11.2.1.3
+pyarrow==20.0.0
+nvidia-nccl-cu12==2.21.5
+httptools==0.6.4
+qwen-vl-utils==0.0.11
+markdown-it-py==3.0.0
+gitdb==4.0.12
+altair==5.5.0
+torchvision==0.21.0
+python-magic==0.4.27
+iopath==0.1.10
+ray==2.47.1
+blake3==1.0.5
+pillow==11.3.0
+python-dotenv==1.1.1
+torchaudio==2.6.0
+partial-json-parser==0.2.1.1.post6
+httpx==0.28.1
+torch==2.6.0
+anyio==4.9.0
+fairscale==0.4.4
+traitlets==5.14.3
+pure_eval==0.2.3
+sympy==1.13.1
+nvidia-cusparselt-cu12==0.6.2
+jsonschema==4.24.0
+imageio==2.37.0
+opencensus==0.11.4
+stack-data==0.6.3
+shellingham==1.5.4
+tensordict==0.9.1
+nvidia-cuda-runtime-cu12==12.4.127
+nest-asyncio==1.6.0
+einops==0.8.1
+lark==1.2.2
+tenacity==9.1.2
+virtualenv==20.31.2
+ptyprocess==0.7.0
+outlines==0.1.11
+depyf==0.18.0
+starlette==0.47.1
+cfgv==3.4.0
+pre_commit==4.2.0
+language_data==1.3.0
+pip==25.1
+Pygments==2.19.2
+nvidia-cuda-cupti-cu12==12.4.127
+protobuf==4.25.8
+safetensors==0.5.3
+text-unidecode==1.3
+wcwidth==0.2.13
+charset-normalizer==3.4.2
+aiohappyeyeballs==2.6.1
+outlines_core==0.1.26
+fastrlock==0.8.3
+asttokens==3.0.0
+psutil==7.0.0
+smmap==5.0.2
+exceptiongroup==1.3.0
+murmurhash==1.0.13
+pytorch-lightning==2.5.2
+filelock==3.18.0
+astor==0.8.1
+py-spy==0.4.0
+pydantic_core==2.33.2
+colorful==0.5.7
+fastapi==0.116.1
+opentelemetry-api==1.26.0
+openai==1.90.0
+ninja==1.11.1.4
+opentelemetry-semantic-conventions==0.47b0
+spacy-legacy==3.0.12
+opendatasets==0.1.22
+Deprecated==1.2.18
+proto-plus==1.26.1
+rignore==0.6.2
+aiohttp-cors==0.8.1
+liger_kernel==0.6.0
+opentelemetry-exporter-prometheus==0.56b0
+python-multipart==0.0.20
+multiprocess==0.70.16
+opentelemetry-sdk==1.26.0
+decorator==5.2.1
+xformers==0.0.29.post2
+spacy==3.8.7
+pyvers==0.1.0
+pycocotools==2.0.10
+websockets==15.0.1
+wasabi==1.1.3
+frozenlist==1.7.0
+codetiming==1.4.0
+sentencepiece==0.2.0
+toml==0.10.2
+scikit-image==0.25.2
+ftfy==6.3.1
+bleach==6.2.0
+yarl==1.20.1
+nvidia-cuda-nvrtc-cu12==12.4.127
+importlib_metadata==8.0.0
+spacy-loggers==1.0.5
+smart_open==7.3.0.post1
+portalocker==3.2.0
+llvmlite==0.44.0
+MarkupSafe==3.0.2
+braceexpand==0.1.7
+modelscope==1.28.0
+verl==0.3.2.dev0
+jaraco.context==5.3.0
+more-itertools==10.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+platformdirs==4.2.2
+packaging==24.2
+wheel==0.45.1
+zipp==3.19.2
+inflect==7.3.1
+autocommand==2.2.2
+typeguard==4.3.0
+jaraco.collections==5.1.0
+backports.tarfile==1.2.0
+tomli==2.0.1
+importlib_metadata==8.0.0
+typing_extensions==4.12.2
diff --git a/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..eafc69925e2c6abda0f83fb49745b65c3ce12896
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/files/wandb-metadata.json
@@ -0,0 +1,92 @@
+{
+  "os":  "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.0",
+  "startedAt":  "2025-07-18T12:19:06.753628Z",
+  "args":  [
+    "--node-ip-address=10.1.5.237",
+    "--node-manager-port=37651",
+    "--object-store-name=/tmp/ray/session_2025-07-18_20-17-27_987959_155806/sockets/plasma_store",
+    "--raylet-name=/tmp/ray/session_2025-07-18_20-17-27_987959_155806/sockets/raylet",
+    "--redis-address=None",
+    "--metrics-agent-port=46087",
+    "--logging-rotate-bytes=536870912",
+    "--logging-rotate-backup-count=5",
+    "--runtime-env-agent-port=64279",
+    "--gcs-address=10.1.5.237:55485",
+    "--session-name=session_2025-07-18_20-17-27_987959_155806",
+    "--temp-dir=/tmp/ray",
+    "--webui=127.0.0.1:8265",
+    "--cluster-id=498a1e71e98cee5fa6c370066e878218480d78d02b0b0a20550a5571",
+    "--startup-token=64",
+    "--worker-launch-time-ms=1752841050410",
+    "--node-id=18fda1330b00f9c3f38fdc4c1387555fb29a9f963f649897c6fc1dc5",
+    "--runtime-env-hash=-115784934",
+    "--enable-resource-isolation=false"
+  ],
+  "program":  "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
+  "git":  {
+    "remote":  "https://github.com/PorUna-byte/PAR.git",
+    "commit":  "b8caf406aa1699c788f0ca6e44a1769452c317db"
+  },
+  "email":  "gia0603yucca@gmail.com",
+  "root":  "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
+  "host":  "dsw-266702-dc4b748ff-f7c66",
+  "executable":  "/root/miniconda3/envs/easyr1-new/bin/python3",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA A800-SXM4-80GB",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1623302262784",
+      "used":  "1225195520"
+    }
+  },
+  "memory":  {
+    "total":  "549755813888"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-f7e858cd-ae03-031d-b834-86bf87923211"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-82fabc17-3326-51dd-9fb7-cbe82d26b6a0"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-1bba2921-208c-d0ad-1a05-25fc85d62630"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-9537a8bd-f536-20e2-b766-25a3c4bd5a71"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-482091e7-cb8c-dacb-7d0c-a78a3b795f0b"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-becb8d59-2ab7-b50d-5770-183c6478747a"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-3f0f2b1d-d6be-b6ad-b4ad-0ec841b91c40"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-e53376dd-84ff-523c-a8c7-7cc7f7a18655"
+    }
+  ],
+  "cudaVersion":  "12.1",
+  "writerId":  "99k3ygrrojzudcyj8lzv8s7kibi19jo9"
+}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..b77c67f346efaab72f54cdb99ba00a4b329d2146
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/logs/debug-internal.log
@@ -0,0 +1,6 @@
+{"time":"2025-07-18T20:19:07.57787547+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-18T20:19:13.028328674+08:00","level":"INFO","msg":"stream: created new stream","id":"eo9xzqez"}
+{"time":"2025-07-18T20:19:13.038047308+08:00","level":"INFO","msg":"stream: started","id":"eo9xzqez"}
+{"time":"2025-07-18T20:19:13.038097996+08:00","level":"INFO","msg":"handler: started","stream_id":"eo9xzqez"}
+{"time":"2025-07-18T20:19:13.038104971+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"eo9xzqez"}
+{"time":"2025-07-18T20:19:13.038125386+08:00","level":"INFO","msg":"sender: started","stream_id":"eo9xzqez"}
diff --git a/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/logs/debug.log b/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..79278d88118191164b274eafaa2e8feb9b88f9cf
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/logs/debug.log
@@ -0,0 +1,21 @@
+2025-07-18 20:19:07,268 INFO    MainThread:160623 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-18 20:19:07,268 INFO    MainThread:160623 [wandb_setup.py:_flush():80] Configure stats pid to 160623
+2025-07-18 20:19:07,268 INFO    MainThread:160623 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-18 20:19:07,268 INFO    MainThread:160623 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-18 20:19:07,268 INFO    MainThread:160623 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-18 20:19:07,268 INFO    MainThread:160623 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/logs/debug.log
+2025-07-18 20:19:07,268 INFO    MainThread:160623 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/logs/debug-internal.log
+2025-07-18 20:19:07,268 INFO    MainThread:160623 [wandb_init.py:init():830] calling init triggers
+2025-07-18 20:19:07,268 INFO    MainThread:160623 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-18 20:19:07,268 INFO    MainThread:160623 [wandb_init.py:init():871] starting backend
+2025-07-18 20:19:07,479 INFO    MainThread:160623 [wandb_init.py:init():874] sending inform_init request
+2025-07-18 20:19:07,481 INFO    MainThread:160623 [wandb_init.py:init():882] backend started and connected
+2025-07-18 20:19:07,510 INFO    MainThread:160623 [wandb_init.py:init():953] updated telemetry
+2025-07-18 20:19:08,011 INFO    MainThread:160623 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-18 20:19:17,965 INFO    MainThread:160623 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-18 20:19:18,182 INFO    MainThread:160623 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-18 20:19:18,183 INFO    MainThread:160623 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-18 20:19:18,213 INFO    MainThread:160623 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-18 20:19:18,213 INFO    MainThread:160623 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-18 20:19:18,223 INFO    MainThread:160623 [wandb_init.py:init():1075] run started, returning control to user process
diff --git a/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/run-eo9xzqez.wandb b/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/run-eo9xzqez.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/files/output.log b/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..e67e6518bd3a49ed7d59e2b81420b42f4b3dbf1f
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/files/output.log
@@ -0,0 +1,2 @@
+{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.5.237", "pid": 182641, "uuid": "99853167d0014a0cbe06d35970a786c8", "closed": false}
+Start validation...
diff --git a/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7396100b45ae50733f37383324036e5ff4cdf5aa
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/files/requirements.txt
@@ -0,0 +1,295 @@
+setproctitle==1.2.2
+colorama==0.4.6
+psutil==7.0.0
+attrs==25.3.0
+tqdm==4.67.1
+langcodes==3.5.0
+nvidia-cublas-cu12==12.4.5.8
+airportsdata==20250706
+absl-py==2.3.1
+hf-xet==1.1.5
+opentelemetry-exporter-otlp-proto-http==1.26.0
+interegular==0.3.3
+tifffile==2025.5.10
+nvidia-cufile-cu12==1.11.1.6
+nltk==3.9.1
+tokenizers==0.21.2
+salesforce-lavis==1.0.2
+tzdata==2025.2
+prometheus_client==0.22.1
+google-auth==2.40.3
+ipython==8.37.0
+pydantic==2.11.7
+mathruler==0.1.0
+six==1.17.0
+python-dateutil==2.9.0.post0
+requests==2.32.4
+mistral_common==1.8.0
+huggingface-hub==0.33.4
+preshed==3.0.10
+torchmetrics==1.7.4
+blinker==1.9.0
+nvidia-cusparse-cu12==12.3.1.170
+rich-toolkit==0.14.8
+pytz==2025.2
+pandas==2.3.1
+packaging==25.0
+async-timeout==5.0.1
+diskcache==5.6.3
+google-api-core==2.25.1
+parso==0.8.4
+joblib==1.5.1
+pycountry==24.6.1
+triton==3.2.0
+pybase64==1.4.1
+marisa-trie==1.2.1
+plotly==6.2.0
+wandb==0.21.0
+PyYAML==6.0.2
+regex==2024.11.6
+idna==3.10
+numba==0.61.2
+nvidia-curand-cu12==10.3.5.147
+uvicorn==0.35.0
+srsly==2.5.1
+confection==0.1.5
+opentelemetry-semantic-conventions-ai==0.4.11
+typing-inspection==0.4.1
+opencv-python-headless==4.12.0.88
+pyasn1==0.6.1
+av==15.0.0
+xgrammar==0.1.18
+distlib==0.3.9
+datasets==4.0.0
+networkx==3.4.2
+prometheus-fastapi-instrumentator==7.1.0
+lightning-utilities==0.14.3
+executing==2.2.0
+pycocoevalcap==1.2
+h11==0.16.0
+certifi==2025.7.14
+sniffio==1.3.1
+wheel==0.45.1
+transformers==4.52.4
+wrapt==1.17.2
+jsonschema-specifications==2025.4.1
+mpmath==1.3.0
+msgspec==0.19.0
+py-cpuinfo==9.0.0
+contexttimer==0.3.3
+watchdog==6.0.0
+pexpect==4.9.0
+webencodings==0.5.1
+verl==0.3.2.dev0
+webdataset==1.0.2
+httpcore==1.0.9
+opentelemetry-exporter-otlp==1.26.0
+lm-format-enforcer==0.10.11
+googleapis-common-protos==1.70.0
+pyzmq==27.0.0
+fsspec==2025.3.0
+grpcio==1.73.1
+cymem==2.0.11
+timm==0.4.12
+zipp==3.23.0
+llguidance==0.7.30
+opencensus-context==0.1.3
+omegaconf==2.3.0
+python-json-logger==3.3.0
+opentelemetry-exporter-otlp-proto-common==1.26.0
+watchfiles==1.1.0
+nvidia-nvjitlink-cu12==12.4.127
+peft==0.16.0
+sentry-sdk==2.32.0
+rpds-py==0.26.0
+email_validator==2.2.0
+nodeenv==1.9.1
+distro==1.9.0
+jiter==0.10.0
+compressed-tensors==0.9.3
+annotated-types==0.7.0
+matplotlib-inline==0.1.7
+rich==14.0.0
+GitPython==3.1.44
+lazy_loader==0.4
+fastapi-cloud-cli==0.1.4
+cupy-cuda12x==13.5.1
+prompt_toolkit==3.0.51
+gguf==0.17.1
+blis==1.3.0
+thinc==8.3.6
+cloudpickle==3.1.1
+multidict==6.6.3
+nvidia-nvtx-cu12==12.4.127
+flash-attn==2.7.1.post1
+pyasn1_modules==0.4.2
+rsa==4.9.1
+weasel==0.4.1
+uvloop==0.21.0
+click==8.2.1
+numpy==2.2.6
+torchdata==0.11.0
+pylatexenc==2.10
+cachetools==5.5.2
+Jinja2==3.1.6
+typer==0.16.0
+nvidia-cudnn-cu12==9.1.0.70
+fastapi-cli==0.0.8
+xxhash==3.5.0
+tornado==6.5.1
+scipy==1.15.3
+rouge_score==0.1.2
+cloudpathlib==0.21.1
+streamlit==1.46.1
+jedi==0.19.2
+referencing==0.36.2
+accelerate==1.8.1
+decord==0.6.0
+setuptools==78.1.1
+mdurl==0.1.2
+vllm==0.8.5.post1
+identify==2.6.12
+python-slugify==8.0.4
+dnspython==2.7.0
+dill==0.3.8
+opentelemetry-proto==1.26.0
+orjson==3.10.18
+msgpack==1.1.1
+aiohttp==3.12.14
+aiosignal==1.4.0
+typing_extensions==4.14.1
+tiktoken==0.9.0
+catalogue==2.0.10
+platformdirs==4.3.8
+narwhals==1.47.0
+antlr4-python3-runtime==4.9.3
+pydantic-extra-types==2.10.5
+nvidia-cusolver-cu12==11.6.1.9
+kaggle==1.7.4.5
+propcache==0.3.2
+urllib3==2.5.0
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+pydeck==0.9.1
+nvidia-cufft-cu12==11.2.1.3
+pyarrow==20.0.0
+nvidia-nccl-cu12==2.21.5
+httptools==0.6.4
+qwen-vl-utils==0.0.11
+markdown-it-py==3.0.0
+gitdb==4.0.12
+altair==5.5.0
+torchvision==0.21.0
+python-magic==0.4.27
+iopath==0.1.10
+ray==2.47.1
+blake3==1.0.5
+pillow==11.3.0
+python-dotenv==1.1.1
+torchaudio==2.6.0
+partial-json-parser==0.2.1.1.post6
+httpx==0.28.1
+torch==2.6.0
+anyio==4.9.0
+fairscale==0.4.4
+traitlets==5.14.3
+pure_eval==0.2.3
+sympy==1.13.1
+nvidia-cusparselt-cu12==0.6.2
+jsonschema==4.24.0
+imageio==2.37.0
+opencensus==0.11.4
+stack-data==0.6.3
+shellingham==1.5.4
+tensordict==0.9.1
+nvidia-cuda-runtime-cu12==12.4.127
+nest-asyncio==1.6.0
+einops==0.8.1
+lark==1.2.2
+tenacity==9.1.2
+virtualenv==20.31.2
+ptyprocess==0.7.0
+outlines==0.1.11
+depyf==0.18.0
+starlette==0.47.1
+cfgv==3.4.0
+pre_commit==4.2.0
+language_data==1.3.0
+pip==25.1
+Pygments==2.19.2
+nvidia-cuda-cupti-cu12==12.4.127
+protobuf==4.25.8
+safetensors==0.5.3
+text-unidecode==1.3
+wcwidth==0.2.13
+charset-normalizer==3.4.2
+aiohappyeyeballs==2.6.1
+outlines_core==0.1.26
+fastrlock==0.8.3
+asttokens==3.0.0
+psutil==7.0.0
+smmap==5.0.2
+exceptiongroup==1.3.0
+murmurhash==1.0.13
+pytorch-lightning==2.5.2
+filelock==3.18.0
+astor==0.8.1
+py-spy==0.4.0
+pydantic_core==2.33.2
+colorful==0.5.7
+fastapi==0.116.1
+opentelemetry-api==1.26.0
+openai==1.90.0
+ninja==1.11.1.4
+opentelemetry-semantic-conventions==0.47b0
+spacy-legacy==3.0.12
+opendatasets==0.1.22
+Deprecated==1.2.18
+proto-plus==1.26.1
+rignore==0.6.2
+aiohttp-cors==0.8.1
+liger_kernel==0.6.0
+opentelemetry-exporter-prometheus==0.56b0
+python-multipart==0.0.20
+multiprocess==0.70.16
+opentelemetry-sdk==1.26.0
+decorator==5.2.1
+xformers==0.0.29.post2
+spacy==3.8.7
+pyvers==0.1.0
+pycocotools==2.0.10
+websockets==15.0.1
+wasabi==1.1.3
+frozenlist==1.7.0
+codetiming==1.4.0
+sentencepiece==0.2.0
+toml==0.10.2
+scikit-image==0.25.2
+ftfy==6.3.1
+bleach==6.2.0
+yarl==1.20.1
+nvidia-cuda-nvrtc-cu12==12.4.127
+importlib_metadata==8.0.0
+spacy-loggers==1.0.5
+smart_open==7.3.0.post1
+portalocker==3.2.0
+llvmlite==0.44.0
+MarkupSafe==3.0.2
+braceexpand==0.1.7
+modelscope==1.28.0
+verl==0.3.2.dev0
+jaraco.context==5.3.0
+more-itertools==10.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+platformdirs==4.2.2
+packaging==24.2
+wheel==0.45.1
+zipp==3.19.2
+inflect==7.3.1
+autocommand==2.2.2
+typeguard==4.3.0
+jaraco.collections==5.1.0
+backports.tarfile==1.2.0
+tomli==2.0.1
+importlib_metadata==8.0.0
+typing_extensions==4.12.2
diff --git a/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..1711474cf776654595bb7ef0ee5572ce3818846c
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/files/wandb-metadata.json
@@ -0,0 +1,91 @@
+{
+  "os":  "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.0",
+  "startedAt":  "2025-07-18T13:25:11.393703Z",
+  "args":  [
+    "--node-ip-address=10.1.5.237",
+    "--node-manager-port=46647",
+    "--object-store-name=/tmp/ray/session_2025-07-18_21-23-26_144453_177856/sockets/plasma_store",
+    "--raylet-name=/tmp/ray/session_2025-07-18_21-23-26_144453_177856/sockets/raylet",
+    "--redis-address=None",
+    "--metrics-agent-port=39471",
+    "--logging-rotate-bytes=536870912",
+    "--logging-rotate-backup-count=5",
+    "--runtime-env-agent-port=50428",
+    "--gcs-address=10.1.5.237:57263",
+    "--session-name=session_2025-07-18_21-23-26_144453_177856",
+    "--temp-dir=/tmp/ray",
+    "--webui=127.0.0.1:8265",
+    "--cluster-id=0c4472bb4b52db8edcea777b259959e49dd785e1e795a3856c465fef",
+    "--startup-token=64",
+    "--worker-launch-time-ms=1752845008645",
+    "--node-id=a12d55c028304a40ba5aecfb7278b8e5f70b228834872967378029cf",
+    "--runtime-env-hash=-115784934",
+    "--enable-resource-isolation=false"
+  ],
+  "program":  "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
+  "git":  {
+    "remote":  "https://github.com/PorUna-byte/PAR.git",
+    "commit":  "b8caf406aa1699c788f0ca6e44a1769452c317db"
+  },
+  "root":  "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
+  "host":  "dsw-266702-dc4b748ff-f7c66",
+  "executable":  "/root/miniconda3/envs/easyr1-new/bin/python3",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA A800-SXM4-80GB",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1623302262784",
+      "used":  "1225252864"
+    }
+  },
+  "memory":  {
+    "total":  "549755813888"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-f7e858cd-ae03-031d-b834-86bf87923211"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-82fabc17-3326-51dd-9fb7-cbe82d26b6a0"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-1bba2921-208c-d0ad-1a05-25fc85d62630"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-9537a8bd-f536-20e2-b766-25a3c4bd5a71"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-482091e7-cb8c-dacb-7d0c-a78a3b795f0b"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-becb8d59-2ab7-b50d-5770-183c6478747a"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-3f0f2b1d-d6be-b6ad-b4ad-0ec841b91c40"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-e53376dd-84ff-523c-a8c7-7cc7f7a18655"
+    }
+  ],
+  "cudaVersion":  "12.1",
+  "writerId":  "dbvg4pjdg1e4bz0ifs06d8zkh1pcyxf0"
+}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..a8da9fe90f0c93dfaeb6a5801a162fcb8751c39a
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/logs/debug-internal.log
@@ -0,0 +1,13 @@
+{"time":"2025-07-18T21:25:12.469804386+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-18T21:25:22.33136325+08:00","level":"INFO","msg":"stream: created new stream","id":"egfsoxro"}
+{"time":"2025-07-18T21:25:22.34344642+08:00","level":"INFO","msg":"handler: started","stream_id":"egfsoxro"}
+{"time":"2025-07-18T21:25:22.343473598+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"egfsoxro"}
+{"time":"2025-07-18T21:25:22.343477736+08:00","level":"INFO","msg":"stream: started","id":"egfsoxro"}
+{"time":"2025-07-18T21:25:22.343503001+08:00","level":"INFO","msg":"sender: started","stream_id":"egfsoxro"}
+{"time":"2025-07-18T21:29:24.147448876+08:00","level":"ERROR","msg":"sender: sendStopStatus: failed to get run stopped status: context deadline exceeded (Client.Timeout or context cancellation while reading body)"}
+{"time":"2025-07-18T21:34:51.606197319+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/egfsoxro/file_stream\": read tcp 10.1.5.237:54740->172.67.193.61:443: read: connection timed out"}
+{"time":"2025-07-18T21:35:11.662284272+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/egfsoxro/file_stream\": unexpected EOF"}
+{"time":"2025-07-18T21:38:58.390140338+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/egfsoxro/file_stream\": read tcp 10.1.5.237:39892->172.67.193.61:443: read: connection timed out"}
+{"time":"2025-07-18T21:42:37.527116841+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/egfsoxro/file_stream\": read tcp 10.1.5.237:52510->104.21.20.172:443: read: connection timed out"}
+{"time":"2025-07-18T21:45:57.207400741+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/egfsoxro/file_stream\": read tcp 10.1.5.237:51260->104.21.20.172:443: read: connection reset by peer"}
+{"time":"2025-07-18T21:48:37.975131005+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/egfsoxro/file_stream\": read tcp 10.1.5.237:35598->172.67.193.61:443: read: connection timed out"}
diff --git a/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/logs/debug.log b/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..e9c8978cee26d7b44df09eb2dfcd76e08a9b009d
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/logs/debug.log
@@ -0,0 +1,21 @@
+2025-07-18 21:25:12,185 INFO    MainThread:182641 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-18 21:25:12,185 INFO    MainThread:182641 [wandb_setup.py:_flush():80] Configure stats pid to 182641
+2025-07-18 21:25:12,185 INFO    MainThread:182641 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-18 21:25:12,185 INFO    MainThread:182641 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-18 21:25:12,185 INFO    MainThread:182641 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-18 21:25:12,185 INFO    MainThread:182641 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/logs/debug.log
+2025-07-18 21:25:12,185 INFO    MainThread:182641 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/logs/debug-internal.log
+2025-07-18 21:25:12,185 INFO    MainThread:182641 [wandb_init.py:init():830] calling init triggers
+2025-07-18 21:25:12,185 INFO    MainThread:182641 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-18 21:25:12,185 INFO    MainThread:182641 [wandb_init.py:init():871] starting backend
+2025-07-18 21:25:12,397 INFO    MainThread:182641 [wandb_init.py:init():874] sending inform_init request
+2025-07-18 21:25:12,399 INFO    MainThread:182641 [wandb_init.py:init():882] backend started and connected
+2025-07-18 21:25:12,404 INFO    MainThread:182641 [wandb_init.py:init():953] updated telemetry
+2025-07-18 21:25:12,959 INFO    MainThread:182641 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-18 21:25:23,805 INFO    MainThread:182641 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-18 21:25:24,145 INFO    MainThread:182641 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-18 21:25:24,145 INFO    MainThread:182641 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-18 21:25:24,151 INFO    MainThread:182641 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-18 21:25:24,160 INFO    MainThread:182641 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-18 21:25:24,180 INFO    MainThread:182641 [wandb_init.py:init():1075] run started, returning control to user process
diff --git a/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/files/output.log b/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..c501f4ec246cbbe687fd1c5d625de55776715882
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/files/output.log
@@ -0,0 +1,2 @@
+{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.5.237", "pid": 209844, "uuid": "231f6197dcf24423b2307815cebae57f", "closed": false}
+Start validation...
diff --git a/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7396100b45ae50733f37383324036e5ff4cdf5aa
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/files/requirements.txt
@@ -0,0 +1,295 @@
+setproctitle==1.2.2
+colorama==0.4.6
+psutil==7.0.0
+attrs==25.3.0
+tqdm==4.67.1
+langcodes==3.5.0
+nvidia-cublas-cu12==12.4.5.8
+airportsdata==20250706
+absl-py==2.3.1
+hf-xet==1.1.5
+opentelemetry-exporter-otlp-proto-http==1.26.0
+interegular==0.3.3
+tifffile==2025.5.10
+nvidia-cufile-cu12==1.11.1.6
+nltk==3.9.1
+tokenizers==0.21.2
+salesforce-lavis==1.0.2
+tzdata==2025.2
+prometheus_client==0.22.1
+google-auth==2.40.3
+ipython==8.37.0
+pydantic==2.11.7
+mathruler==0.1.0
+six==1.17.0
+python-dateutil==2.9.0.post0
+requests==2.32.4
+mistral_common==1.8.0
+huggingface-hub==0.33.4
+preshed==3.0.10
+torchmetrics==1.7.4
+blinker==1.9.0
+nvidia-cusparse-cu12==12.3.1.170
+rich-toolkit==0.14.8
+pytz==2025.2
+pandas==2.3.1
+packaging==25.0
+async-timeout==5.0.1
+diskcache==5.6.3
+google-api-core==2.25.1
+parso==0.8.4
+joblib==1.5.1
+pycountry==24.6.1
+triton==3.2.0
+pybase64==1.4.1
+marisa-trie==1.2.1
+plotly==6.2.0
+wandb==0.21.0
+PyYAML==6.0.2
+regex==2024.11.6
+idna==3.10
+numba==0.61.2
+nvidia-curand-cu12==10.3.5.147
+uvicorn==0.35.0
+srsly==2.5.1
+confection==0.1.5
+opentelemetry-semantic-conventions-ai==0.4.11
+typing-inspection==0.4.1
+opencv-python-headless==4.12.0.88
+pyasn1==0.6.1
+av==15.0.0
+xgrammar==0.1.18
+distlib==0.3.9
+datasets==4.0.0
+networkx==3.4.2
+prometheus-fastapi-instrumentator==7.1.0
+lightning-utilities==0.14.3
+executing==2.2.0
+pycocoevalcap==1.2
+h11==0.16.0
+certifi==2025.7.14
+sniffio==1.3.1
+wheel==0.45.1
+transformers==4.52.4
+wrapt==1.17.2
+jsonschema-specifications==2025.4.1
+mpmath==1.3.0
+msgspec==0.19.0
+py-cpuinfo==9.0.0
+contexttimer==0.3.3
+watchdog==6.0.0
+pexpect==4.9.0
+webencodings==0.5.1
+verl==0.3.2.dev0
+webdataset==1.0.2
+httpcore==1.0.9
+opentelemetry-exporter-otlp==1.26.0
+lm-format-enforcer==0.10.11
+googleapis-common-protos==1.70.0
+pyzmq==27.0.0
+fsspec==2025.3.0
+grpcio==1.73.1
+cymem==2.0.11
+timm==0.4.12
+zipp==3.23.0
+llguidance==0.7.30
+opencensus-context==0.1.3
+omegaconf==2.3.0
+python-json-logger==3.3.0
+opentelemetry-exporter-otlp-proto-common==1.26.0
+watchfiles==1.1.0
+nvidia-nvjitlink-cu12==12.4.127
+peft==0.16.0
+sentry-sdk==2.32.0
+rpds-py==0.26.0
+email_validator==2.2.0
+nodeenv==1.9.1
+distro==1.9.0
+jiter==0.10.0
+compressed-tensors==0.9.3
+annotated-types==0.7.0
+matplotlib-inline==0.1.7
+rich==14.0.0
+GitPython==3.1.44
+lazy_loader==0.4
+fastapi-cloud-cli==0.1.4
+cupy-cuda12x==13.5.1
+prompt_toolkit==3.0.51
+gguf==0.17.1
+blis==1.3.0
+thinc==8.3.6
+cloudpickle==3.1.1
+multidict==6.6.3
+nvidia-nvtx-cu12==12.4.127
+flash-attn==2.7.1.post1
+pyasn1_modules==0.4.2
+rsa==4.9.1
+weasel==0.4.1
+uvloop==0.21.0
+click==8.2.1
+numpy==2.2.6
+torchdata==0.11.0
+pylatexenc==2.10
+cachetools==5.5.2
+Jinja2==3.1.6
+typer==0.16.0
+nvidia-cudnn-cu12==9.1.0.70
+fastapi-cli==0.0.8
+xxhash==3.5.0
+tornado==6.5.1
+scipy==1.15.3
+rouge_score==0.1.2
+cloudpathlib==0.21.1
+streamlit==1.46.1
+jedi==0.19.2
+referencing==0.36.2
+accelerate==1.8.1
+decord==0.6.0
+setuptools==78.1.1
+mdurl==0.1.2
+vllm==0.8.5.post1
+identify==2.6.12
+python-slugify==8.0.4
+dnspython==2.7.0
+dill==0.3.8
+opentelemetry-proto==1.26.0
+orjson==3.10.18
+msgpack==1.1.1
+aiohttp==3.12.14
+aiosignal==1.4.0
+typing_extensions==4.14.1
+tiktoken==0.9.0
+catalogue==2.0.10
+platformdirs==4.3.8
+narwhals==1.47.0
+antlr4-python3-runtime==4.9.3
+pydantic-extra-types==2.10.5
+nvidia-cusolver-cu12==11.6.1.9
+kaggle==1.7.4.5
+propcache==0.3.2
+urllib3==2.5.0
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+pydeck==0.9.1
+nvidia-cufft-cu12==11.2.1.3
+pyarrow==20.0.0
+nvidia-nccl-cu12==2.21.5
+httptools==0.6.4
+qwen-vl-utils==0.0.11
+markdown-it-py==3.0.0
+gitdb==4.0.12
+altair==5.5.0
+torchvision==0.21.0
+python-magic==0.4.27
+iopath==0.1.10
+ray==2.47.1
+blake3==1.0.5
+pillow==11.3.0
+python-dotenv==1.1.1
+torchaudio==2.6.0
+partial-json-parser==0.2.1.1.post6
+httpx==0.28.1
+torch==2.6.0
+anyio==4.9.0
+fairscale==0.4.4
+traitlets==5.14.3
+pure_eval==0.2.3
+sympy==1.13.1
+nvidia-cusparselt-cu12==0.6.2
+jsonschema==4.24.0
+imageio==2.37.0
+opencensus==0.11.4
+stack-data==0.6.3
+shellingham==1.5.4
+tensordict==0.9.1
+nvidia-cuda-runtime-cu12==12.4.127
+nest-asyncio==1.6.0
+einops==0.8.1
+lark==1.2.2
+tenacity==9.1.2
+virtualenv==20.31.2
+ptyprocess==0.7.0
+outlines==0.1.11
+depyf==0.18.0
+starlette==0.47.1
+cfgv==3.4.0
+pre_commit==4.2.0
+language_data==1.3.0
+pip==25.1
+Pygments==2.19.2
+nvidia-cuda-cupti-cu12==12.4.127
+protobuf==4.25.8
+safetensors==0.5.3
+text-unidecode==1.3
+wcwidth==0.2.13
+charset-normalizer==3.4.2
+aiohappyeyeballs==2.6.1
+outlines_core==0.1.26
+fastrlock==0.8.3
+asttokens==3.0.0
+psutil==7.0.0
+smmap==5.0.2
+exceptiongroup==1.3.0
+murmurhash==1.0.13
+pytorch-lightning==2.5.2
+filelock==3.18.0
+astor==0.8.1
+py-spy==0.4.0
+pydantic_core==2.33.2
+colorful==0.5.7
+fastapi==0.116.1
+opentelemetry-api==1.26.0
+openai==1.90.0
+ninja==1.11.1.4
+opentelemetry-semantic-conventions==0.47b0
+spacy-legacy==3.0.12
+opendatasets==0.1.22
+Deprecated==1.2.18
+proto-plus==1.26.1
+rignore==0.6.2
+aiohttp-cors==0.8.1
+liger_kernel==0.6.0
+opentelemetry-exporter-prometheus==0.56b0
+python-multipart==0.0.20
+multiprocess==0.70.16
+opentelemetry-sdk==1.26.0
+decorator==5.2.1
+xformers==0.0.29.post2
+spacy==3.8.7
+pyvers==0.1.0
+pycocotools==2.0.10
+websockets==15.0.1
+wasabi==1.1.3
+frozenlist==1.7.0
+codetiming==1.4.0
+sentencepiece==0.2.0
+toml==0.10.2
+scikit-image==0.25.2
+ftfy==6.3.1
+bleach==6.2.0
+yarl==1.20.1
+nvidia-cuda-nvrtc-cu12==12.4.127
+importlib_metadata==8.0.0
+spacy-loggers==1.0.5
+smart_open==7.3.0.post1
+portalocker==3.2.0
+llvmlite==0.44.0
+MarkupSafe==3.0.2
+braceexpand==0.1.7
+modelscope==1.28.0
+verl==0.3.2.dev0
+jaraco.context==5.3.0
+more-itertools==10.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+platformdirs==4.2.2
+packaging==24.2
+wheel==0.45.1
+zipp==3.19.2
+inflect==7.3.1
+autocommand==2.2.2
+typeguard==4.3.0
+jaraco.collections==5.1.0
+backports.tarfile==1.2.0
+tomli==2.0.1
+importlib_metadata==8.0.0
+typing_extensions==4.12.2
diff --git a/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..4a24bcc304aea41008f5b0e3ccb15f1e76d8fb87
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/files/wandb-metadata.json
@@ -0,0 +1,91 @@
+{
+  "os":  "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.0",
+  "startedAt":  "2025-07-18T14:41:31.644159Z",
+  "args":  [
+    "--node-ip-address=10.1.5.237",
+    "--node-manager-port=45369",
+    "--object-store-name=/tmp/ray/session_2025-07-18_22-39-43_301828_205004/sockets/plasma_store",
+    "--raylet-name=/tmp/ray/session_2025-07-18_22-39-43_301828_205004/sockets/raylet",
+    "--redis-address=None",
+    "--metrics-agent-port=43565",
+    "--logging-rotate-bytes=536870912",
+    "--logging-rotate-backup-count=5",
+    "--runtime-env-agent-port=58432",
+    "--gcs-address=10.1.5.237:40516",
+    "--session-name=session_2025-07-18_22-39-43_301828_205004",
+    "--temp-dir=/tmp/ray",
+    "--webui=127.0.0.1:8265",
+    "--cluster-id=eac498e1f2975b1ecde2971d163e13136b422b2bc16efcb3d04e18cc",
+    "--startup-token=64",
+    "--worker-launch-time-ms=1752849585965",
+    "--node-id=f63242f63dfbc38276020b5aea2cd0938dd3a768eff5ad09f4f8182d",
+    "--runtime-env-hash=-115784934",
+    "--enable-resource-isolation=false"
+  ],
+  "program":  "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
+  "git":  {
+    "remote":  "https://github.com/PorUna-byte/PAR.git",
+    "commit":  "b8caf406aa1699c788f0ca6e44a1769452c317db"
+  },
+  "root":  "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
+  "host":  "dsw-266702-dc4b748ff-f7c66",
+  "executable":  "/root/miniconda3/envs/easyr1-new/bin/python3",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA A800-SXM4-80GB",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1623302262784",
+      "used":  "1225330688"
+    }
+  },
+  "memory":  {
+    "total":  "549755813888"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-f7e858cd-ae03-031d-b834-86bf87923211"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-82fabc17-3326-51dd-9fb7-cbe82d26b6a0"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-1bba2921-208c-d0ad-1a05-25fc85d62630"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-9537a8bd-f536-20e2-b766-25a3c4bd5a71"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-482091e7-cb8c-dacb-7d0c-a78a3b795f0b"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-becb8d59-2ab7-b50d-5770-183c6478747a"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-3f0f2b1d-d6be-b6ad-b4ad-0ec841b91c40"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-e53376dd-84ff-523c-a8c7-7cc7f7a18655"
+    }
+  ],
+  "cudaVersion":  "12.1",
+  "writerId":  "99rgf27lonrq0txkxxch5fdtjzzzjfev"
+}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..c3c0b3d98e53764df145d7228b6942fa3b3f5167
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/logs/debug-internal.log
@@ -0,0 +1,15 @@
+{"time":"2025-07-18T22:41:33.020430983+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-18T22:41:52.945982131+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": unexpected EOF"}
+{"time":"2025-07-18T22:42:00.753263753+08:00","level":"INFO","msg":"stream: created new stream","id":"vx2sr49f"}
+{"time":"2025-07-18T22:42:00.754755493+08:00","level":"INFO","msg":"stream: started","id":"vx2sr49f"}
+{"time":"2025-07-18T22:42:00.754778938+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"vx2sr49f"}
+{"time":"2025-07-18T22:42:00.754802329+08:00","level":"INFO","msg":"sender: started","stream_id":"vx2sr49f"}
+{"time":"2025-07-18T22:42:00.754811646+08:00","level":"INFO","msg":"handler: started","stream_id":"vx2sr49f"}
+{"time":"2025-07-18T22:42:49.744113716+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
+{"time":"2025-07-18T22:42:54.672269282+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
+{"time":"2025-07-18T22:43:02.89295424+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
+{"time":"2025-07-18T22:43:26.945471906+08:00","level":"ERROR","msg":"runupserter: failed to upload changes","error":"net/http: request canceled (Client.Timeout or context cancellation while reading body)"}
+{"time":"2025-07-18T22:43:35.280027865+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
+{"time":"2025-07-18T22:44:24.629460284+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/vx2sr49f/file_stream\": unexpected EOF"}
+{"time":"2025-07-18T22:47:34.871114852+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/vx2sr49f/file_stream\": read tcp 10.1.5.237:33020->104.21.20.172:443: read: connection timed out"}
+{"time":"2025-07-18T22:51:11.217832907+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/vx2sr49f/file_stream\": read tcp 10.1.5.237:42534->104.21.20.172:443: read: connection reset by peer"}
diff --git a/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/logs/debug.log b/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..31cf2bf9502034e28128343e8f489c68957f46cd
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/logs/debug.log
@@ -0,0 +1,21 @@
+2025-07-18 22:41:32,687 INFO    MainThread:209844 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-18 22:41:32,687 INFO    MainThread:209844 [wandb_setup.py:_flush():80] Configure stats pid to 209844
+2025-07-18 22:41:32,687 INFO    MainThread:209844 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-18 22:41:32,687 INFO    MainThread:209844 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-18 22:41:32,687 INFO    MainThread:209844 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-18 22:41:32,687 INFO    MainThread:209844 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/logs/debug.log
+2025-07-18 22:41:32,687 INFO    MainThread:209844 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/logs/debug-internal.log
+2025-07-18 22:41:32,687 INFO    MainThread:209844 [wandb_init.py:init():830] calling init triggers
+2025-07-18 22:41:32,687 INFO    MainThread:209844 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-18 22:41:32,687 INFO    MainThread:209844 [wandb_init.py:init():871] starting backend
+2025-07-18 22:41:32,934 INFO    MainThread:209844 [wandb_init.py:init():874] sending inform_init request
+2025-07-18 22:41:32,936 INFO    MainThread:209844 [wandb_init.py:init():882] backend started and connected
+2025-07-18 22:41:32,946 INFO    MainThread:209844 [wandb_init.py:init():953] updated telemetry
+2025-07-18 22:41:33,334 INFO    MainThread:209844 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-18 22:42:02,587 INFO    MainThread:209844 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-18 22:42:02,890 INFO    MainThread:209844 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-18 22:42:02,890 INFO    MainThread:209844 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-18 22:42:02,904 INFO    MainThread:209844 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-18 22:42:02,904 INFO    MainThread:209844 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-18 22:42:02,920 INFO    MainThread:209844 [wandb_init.py:init():1075] run started, returning control to user process
diff --git a/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/run-vx2sr49f.wandb b/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/run-vx2sr49f.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..1f7bd3c7bac016b4e7dd14eb2a326e7a305a20a8
Binary files /dev/null and b/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/run-vx2sr49f.wandb differ
diff --git a/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/files/output.log b/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..35a09afc3d473818c42b25134633f01ca5a529c6
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/files/output.log
@@ -0,0 +1,2 @@
+{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.4.164", "pid": 44339, "uuid": "e52fee178cde4f6c8786c5a75d6c593c", "closed": false}
+Start validation...
diff --git a/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/files/requirements.txt
@@ -0,0 +1,295 @@
+colorama==0.4.6
+psutil==7.0.0
+setproctitle==1.2.2
+ipython==8.37.0
+gitdb==4.0.12
+smmap==5.0.2
+pyzmq==27.0.0
+wcwidth==0.2.13
+antlr4-python3-runtime==4.9.3
+streamlit==1.46.1
+opentelemetry-proto==1.26.0
+tiktoken==0.9.0
+MarkupSafe==3.0.2
+openai==1.90.0
+jiter==0.10.0
+markdown-it-py==3.0.0
+rich-toolkit==0.14.8
+PyYAML==6.0.2
+pycountry==24.6.1
+nvidia-cusolver-cu12==11.6.1.9
+codetiming==1.4.0
+text-unidecode==1.3
+aiohttp-cors==0.8.1
+prometheus_client==0.22.1
+pandas==2.3.1
+wrapt==1.17.2
+tifffile==2025.5.10
+vllm==0.8.5.post1
+google-auth==2.40.3
+nvidia-curand-cu12==10.3.5.147
+networkx==3.4.2
+protobuf==4.25.8
+depyf==0.18.0
+altair==5.5.0
+wandb==0.21.0
+opentelemetry-sdk==1.26.0
+nvidia-cufft-cu12==11.2.1.3
+frozenlist==1.7.0
+ninja==1.11.1.4
+anyio==4.9.0
+rignore==0.6.2
+pydantic-extra-types==2.10.5
+tzdata==2025.2
+orjson==3.10.18
+smart_open==7.3.0.post1
+nvidia-cublas-cu12==12.4.5.8
+astor==0.8.1
+uvicorn==0.35.0
+sentry-sdk==2.32.0
+weasel==0.4.1
+opencensus==0.11.4
+certifi==2025.7.14
+nvidia-cuda-nvrtc-cu12==12.4.127
+cupy-cuda12x==13.5.1
+jedi==0.19.2
+GitPython==3.1.44
+xgrammar==0.1.18
+sniffio==1.3.1
+dill==0.3.8
+python-json-logger==3.3.0
+peft==0.16.0
+python-slugify==8.0.4
+watchfiles==1.1.0
+torchaudio==2.6.0
+omegaconf==2.3.0
+interegular==0.3.3
+torchmetrics==1.7.4
+tenacity==9.1.2
+async-timeout==5.0.1
+pybase64==1.4.1
+tqdm==4.67.1
+confection==0.1.5
+absl-py==2.3.1
+six==1.17.0
+colorful==0.5.7
+xxhash==3.5.0
+regex==2024.11.6
+nvidia-cuda-runtime-cu12==12.4.127
+pip==25.1
+annotated-types==0.7.0
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+salesforce-lavis==1.0.2
+fastapi-cloud-cli==0.1.4
+av==15.0.0
+nvidia-nvjitlink-cu12==12.4.127
+lightning-utilities==0.14.3
+pytz==2025.2
+webdataset==1.0.2
+cachetools==5.5.2
+nltk==3.9.1
+prometheus-fastapi-instrumentator==7.1.0
+pexpect==4.9.0
+jsonschema-specifications==2025.4.1
+Jinja2==3.1.6
+nvidia-cusparse-cu12==12.3.1.170
+nvidia-cudnn-cu12==9.1.0.70
+decord==0.6.0
+transformers==4.52.4
+dnspython==2.7.0
+joblib==1.5.1
+kaggle==1.7.4.5
+pyasn1_modules==0.4.2
+httpcore==1.0.9
+accelerate==1.8.1
+psutil==7.0.0
+pycocotools==2.0.10
+lm-format-enforcer==0.10.11
+liger_kernel==0.6.0
+googleapis-common-protos==1.70.0
+idna==3.10
+aiohappyeyeballs==2.6.1
+numba==0.61.2
+tornado==6.5.1
+opentelemetry-semantic-conventions==0.47b0
+torchvision==0.21.0
+exceptiongroup==1.3.0
+cfgv==3.4.0
+py-cpuinfo==9.0.0
+murmurhash==1.0.13
+pillow==11.3.0
+asttokens==3.0.0
+spacy==3.8.7
+blinker==1.9.0
+llguidance==0.7.30
+fastapi==0.116.1
+python-dateutil==2.9.0.post0
+prompt_toolkit==3.0.51
+opentelemetry-api==1.26.0
+referencing==0.36.2
+Pygments==2.19.2
+mpmath==1.3.0
+thinc==8.3.6
+multidict==6.6.3
+python-magic==0.4.27
+fairscale==0.4.4
+nodeenv==1.9.1
+mathruler==0.1.0
+identify==2.6.12
+multiprocess==0.70.16
+ftfy==6.3.1
+spacy-legacy==3.0.12
+rsa==4.9.1
+cymem==2.0.11
+flash-attn==2.7.1.post1
+typing-inspection==0.4.1
+nvidia-cufile-cu12==1.11.1.6
+filelock==3.18.0
+jsonschema==4.24.0
+language_data==1.3.0
+iopath==0.1.10
+cloudpickle==3.1.1
+pre_commit==4.2.0
+python-multipart==0.0.20
+gguf==0.17.1
+toml==0.10.2
+lazy_loader==0.4
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+opencv-python-headless==4.12.0.88
+rouge_score==0.1.2
+portalocker==3.2.0
+diskcache==5.6.3
+pycocoevalcap==1.2
+mdurl==0.1.2
+pure_eval==0.2.3
+ray==2.47.1
+langcodes==3.5.0
+distlib==0.3.9
+pydeck==0.9.1
+traitlets==5.14.3
+aiohttp==3.12.14
+decorator==5.2.1
+opentelemetry-exporter-otlp-proto-http==1.26.0
+verl==0.3.2.dev0
+fsspec==2025.3.0
+pydantic_core==2.33.2
+matplotlib-inline==0.1.7
+httpx==0.28.1
+fastrlock==0.8.3
+zipp==3.23.0
+aiosignal==1.4.0
+uvloop==0.21.0
+opendatasets==0.1.22
+python-dotenv==1.1.1
+attrs==25.3.0
+starlette==0.47.1
+distro==1.9.0
+pyasn1==0.6.1
+plotly==6.2.0
+opencensus-context==0.1.3
+datasets==4.0.0
+bleach==6.2.0
+hf-xet==1.1.5
+pyvers==0.1.0
+rich==14.0.0
+pylatexenc==2.10
+tensordict==0.9.1
+urllib3==2.5.0
+imageio==2.37.0
+platformdirs==4.3.8
+preshed==3.0.10
+catalogue==2.0.10
+h11==0.16.0
+outlines_core==0.1.26
+wasabi==1.1.3
+proto-plus==1.26.1
+scikit-image==0.25.2
+blis==1.3.0
+fastapi-cli==0.0.8
+opentelemetry-exporter-prometheus==0.56b0
+opentelemetry-exporter-otlp==1.26.0
+compressed-tensors==0.9.3
+pyarrow==20.0.0
+opentelemetry-semantic-conventions-ai==0.4.11
+partial-json-parser==0.2.1.1.post6
+parso==0.8.4
+importlib_metadata==8.0.0
+tokenizers==0.21.2
+opentelemetry-exporter-otlp-proto-common==1.26.0
+torchdata==0.11.0
+py-spy==0.4.0
+propcache==0.3.2
+braceexpand==0.1.7
+numpy==2.2.6
+cloudpathlib==0.21.1
+email_validator==2.2.0
+srsly==2.5.1
+webencodings==0.5.1
+airportsdata==20250706
+rpds-py==0.26.0
+outlines==0.1.11
+packaging==25.0
+yarl==1.20.1
+nvidia-cuda-cupti-cu12==12.4.127
+typing_extensions==4.14.1
+pydantic==2.11.7
+xformers==0.0.29.post2
+einops==0.8.1
+grpcio==1.73.1
+setuptools==78.1.1
+httptools==0.6.4
+nvidia-nccl-cu12==2.21.5
+Deprecated==1.2.18
+ptyprocess==0.7.0
+websockets==15.0.1
+torch==2.6.0
+scipy==1.15.3
+typer==0.16.0
+pytorch-lightning==2.5.2
+virtualenv==20.31.2
+huggingface-hub==0.33.4
+contexttimer==0.3.3
+marisa-trie==1.2.1
+shellingham==1.5.4
+charset-normalizer==3.4.2
+nest-asyncio==1.6.0
+executing==2.2.0
+stack-data==0.6.3
+msgpack==1.1.1
+blake3==1.0.5
+narwhals==1.47.0
+sentencepiece==0.2.0
+llvmlite==0.44.0
+click==8.2.1
+lark==1.2.2
+google-api-core==2.25.1
+sympy==1.13.1
+wheel==0.45.1
+safetensors==0.5.3
+mistral_common==1.8.0
+triton==3.2.0
+msgspec==0.19.0
+watchdog==6.0.0
+requests==2.32.4
+spacy-loggers==1.0.5
+timm==0.4.12
+qwen-vl-utils==0.0.11
+modelscope==1.28.0
+verl==0.3.2.dev0
+jaraco.text==3.12.1
+autocommand==2.2.2
+packaging==24.2
+jaraco.context==5.3.0
+tomli==2.0.1
+typeguard==4.3.0
+zipp==3.19.2
+backports.tarfile==1.2.0
+typing_extensions==4.12.2
+jaraco.collections==5.1.0
+inflect==7.3.1
+more-itertools==10.3.0
+jaraco.functools==4.0.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+wheel==0.45.1
diff --git a/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..181d58e96b9d2b6453a6ad03567951f226d615e2
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/files/wandb-metadata.json
@@ -0,0 +1,91 @@
+{
+  "os":  "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.0",
+  "startedAt":  "2025-07-20T05:42:34.299479Z",
+  "args":  [
+    "--node-ip-address=10.1.4.164",
+    "--node-manager-port=37845",
+    "--object-store-name=/tmp/ray/session_2025-07-20_13-33-41_705569_39575/sockets/plasma_store",
+    "--raylet-name=/tmp/ray/session_2025-07-20_13-33-41_705569_39575/sockets/raylet",
+    "--redis-address=None",
+    "--metrics-agent-port=63873",
+    "--logging-rotate-bytes=536870912",
+    "--logging-rotate-backup-count=5",
+    "--runtime-env-agent-port=64414",
+    "--gcs-address=10.1.4.164:53875",
+    "--session-name=session_2025-07-20_13-33-41_705569_39575",
+    "--temp-dir=/tmp/ray",
+    "--webui=127.0.0.1:8265",
+    "--cluster-id=fbdc233d73dca0f6543c269a751d673a935fab0cb81fb078e395b1a3",
+    "--startup-token=64",
+    "--worker-launch-time-ms=1752989625411",
+    "--node-id=e86bda52ca2366292b136e105f4196a896ce4cd7c5afef70608609d8",
+    "--runtime-env-hash=-115784934",
+    "--enable-resource-isolation=false"
+  ],
+  "program":  "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
+  "git":  {
+    "remote":  "https://github.com/PorUna-byte/PAR.git",
+    "commit":  "b8caf406aa1699c788f0ca6e44a1769452c317db"
+  },
+  "root":  "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
+  "host":  "dsw-266702-557cd69888-g24kv",
+  "executable":  "/root/miniconda3/envs/easyr1-new/bin/python3",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA A800-SXM4-80GB",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1623302262784",
+      "used":  "1178062848"
+    }
+  },
+  "memory":  {
+    "total":  "549755813888"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-dd05c780-0a19-17fd-c584-d09f4318f680"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-2782427d-b86d-e7c7-247f-edefe39eec71"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-52755b3d-d761-c869-0d08-300873ba5f02"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-90465488-5319-9508-9e8a-b3b12918be35"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a"
+    }
+  ],
+  "cudaVersion":  "12.1",
+  "writerId":  "tqus8osn3l5740extl676gu4rhqn0zv8"
+}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..bd9edaf1f409a7cddb83ee82a2933b2a9c443b31
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/logs/debug-internal.log
@@ -0,0 +1,16 @@
+{"time":"2025-07-20T13:42:34.643621776+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-20T13:42:43.882833108+08:00","level":"INFO","msg":"stream: created new stream","id":"au69cs3i"}
+{"time":"2025-07-20T13:42:43.883617829+08:00","level":"INFO","msg":"stream: started","id":"au69cs3i"}
+{"time":"2025-07-20T13:42:43.88365162+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"au69cs3i"}
+{"time":"2025-07-20T13:42:43.883669273+08:00","level":"INFO","msg":"handler: started","stream_id":"au69cs3i"}
+{"time":"2025-07-20T13:42:43.883668078+08:00","level":"INFO","msg":"sender: started","stream_id":"au69cs3i"}
+{"time":"2025-07-20T13:43:21.282576909+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/au69cs3i/file_stream\": read tcp 10.1.4.164:47278->172.67.193.61:443: read: connection reset by peer"}
+{"time":"2025-07-20T13:43:53.49841014+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/au69cs3i/file_stream\": unexpected EOF"}
+{"time":"2025-07-20T13:51:01.891406742+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/au69cs3i/file_stream\": read tcp 10.1.4.164:42702->104.21.20.172:443: read: connection timed out"}
+{"time":"2025-07-20T13:51:28.722334221+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/au69cs3i/file_stream\": read tcp 10.1.4.164:52726->104.21.20.172:443: read: connection reset by peer"}
+{"time":"2025-07-20T13:54:04.997626954+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/au69cs3i/file_stream\": read tcp 10.1.4.164:50096->172.67.193.61:443: read: connection reset by peer"}
+{"time":"2025-07-20T13:56:40.143377068+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/au69cs3i/file_stream\": read tcp 10.1.4.164:44282->104.21.20.172:443: read: connection reset by peer"}
+{"time":"2025-07-20T13:59:34.91644692+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/au69cs3i/file_stream\": read tcp 10.1.4.164:59608->172.67.193.61:443: read: connection reset by peer"}
+{"time":"2025-07-20T14:00:22.63835448+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/au69cs3i/file_stream\": read tcp 10.1.4.164:55470->172.67.193.61:443: read: connection reset by peer"}
+{"time":"2025-07-20T14:00:46.542087968+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/au69cs3i/file_stream\": unexpected EOF"}
+{"time":"2025-07-20T14:04:17.265732964+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/au69cs3i/file_stream\": read tcp 10.1.4.164:34306->172.67.193.61:443: read: connection reset by peer"}
diff --git a/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..ab6cade8b17f6584386b8dd6b5aa561ed4fffb0b
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/logs/debug.log
@@ -0,0 +1,21 @@
+2025-07-20 13:42:34,421 INFO    MainThread:44339 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-20 13:42:34,421 INFO    MainThread:44339 [wandb_setup.py:_flush():80] Configure stats pid to 44339
+2025-07-20 13:42:34,421 INFO    MainThread:44339 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-20 13:42:34,421 INFO    MainThread:44339 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-20 13:42:34,421 INFO    MainThread:44339 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-20 13:42:34,421 INFO    MainThread:44339 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/logs/debug.log
+2025-07-20 13:42:34,421 INFO    MainThread:44339 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/logs/debug-internal.log
+2025-07-20 13:42:34,421 INFO    MainThread:44339 [wandb_init.py:init():830] calling init triggers
+2025-07-20 13:42:34,421 INFO    MainThread:44339 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-20 13:42:34,421 INFO    MainThread:44339 [wandb_init.py:init():871] starting backend
+2025-07-20 13:42:34,632 INFO    MainThread:44339 [wandb_init.py:init():874] sending inform_init request
+2025-07-20 13:42:34,634 INFO    MainThread:44339 [wandb_init.py:init():882] backend started and connected
+2025-07-20 13:42:34,640 INFO    MainThread:44339 [wandb_init.py:init():953] updated telemetry
+2025-07-20 13:42:35,099 INFO    MainThread:44339 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-20 13:42:45,262 INFO    MainThread:44339 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-20 13:42:45,460 INFO    MainThread:44339 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-20 13:42:45,460 INFO    MainThread:44339 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-20 13:42:45,465 INFO    MainThread:44339 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-20 13:42:45,465 INFO    MainThread:44339 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-20 13:42:45,469 INFO    MainThread:44339 [wandb_init.py:init():1075] run started, returning control to user process
diff --git a/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/files/output.log b/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..9c84dc83be5fcb6408dd406a6d5a609d1646e048
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/files/output.log
@@ -0,0 +1,2 @@
+{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.4.164", "pid": 75477, "uuid": "340f2435954d4933a68a16b643df4460", "closed": false}
+Start validation...
diff --git a/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/files/requirements.txt
@@ -0,0 +1,295 @@
+colorama==0.4.6
+psutil==7.0.0
+setproctitle==1.2.2
+ipython==8.37.0
+gitdb==4.0.12
+smmap==5.0.2
+pyzmq==27.0.0
+wcwidth==0.2.13
+antlr4-python3-runtime==4.9.3
+streamlit==1.46.1
+opentelemetry-proto==1.26.0
+tiktoken==0.9.0
+MarkupSafe==3.0.2
+openai==1.90.0
+jiter==0.10.0
+markdown-it-py==3.0.0
+rich-toolkit==0.14.8
+PyYAML==6.0.2
+pycountry==24.6.1
+nvidia-cusolver-cu12==11.6.1.9
+codetiming==1.4.0
+text-unidecode==1.3
+aiohttp-cors==0.8.1
+prometheus_client==0.22.1
+pandas==2.3.1
+wrapt==1.17.2
+tifffile==2025.5.10
+vllm==0.8.5.post1
+google-auth==2.40.3
+nvidia-curand-cu12==10.3.5.147
+networkx==3.4.2
+protobuf==4.25.8
+depyf==0.18.0
+altair==5.5.0
+wandb==0.21.0
+opentelemetry-sdk==1.26.0
+nvidia-cufft-cu12==11.2.1.3
+frozenlist==1.7.0
+ninja==1.11.1.4
+anyio==4.9.0
+rignore==0.6.2
+pydantic-extra-types==2.10.5
+tzdata==2025.2
+orjson==3.10.18
+smart_open==7.3.0.post1
+nvidia-cublas-cu12==12.4.5.8
+astor==0.8.1
+uvicorn==0.35.0
+sentry-sdk==2.32.0
+weasel==0.4.1
+opencensus==0.11.4
+certifi==2025.7.14
+nvidia-cuda-nvrtc-cu12==12.4.127
+cupy-cuda12x==13.5.1
+jedi==0.19.2
+GitPython==3.1.44
+xgrammar==0.1.18
+sniffio==1.3.1
+dill==0.3.8
+python-json-logger==3.3.0
+peft==0.16.0
+python-slugify==8.0.4
+watchfiles==1.1.0
+torchaudio==2.6.0
+omegaconf==2.3.0
+interegular==0.3.3
+torchmetrics==1.7.4
+tenacity==9.1.2
+async-timeout==5.0.1
+pybase64==1.4.1
+tqdm==4.67.1
+confection==0.1.5
+absl-py==2.3.1
+six==1.17.0
+colorful==0.5.7
+xxhash==3.5.0
+regex==2024.11.6
+nvidia-cuda-runtime-cu12==12.4.127
+pip==25.1
+annotated-types==0.7.0
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+salesforce-lavis==1.0.2
+fastapi-cloud-cli==0.1.4
+av==15.0.0
+nvidia-nvjitlink-cu12==12.4.127
+lightning-utilities==0.14.3
+pytz==2025.2
+webdataset==1.0.2
+cachetools==5.5.2
+nltk==3.9.1
+prometheus-fastapi-instrumentator==7.1.0
+pexpect==4.9.0
+jsonschema-specifications==2025.4.1
+Jinja2==3.1.6
+nvidia-cusparse-cu12==12.3.1.170
+nvidia-cudnn-cu12==9.1.0.70
+decord==0.6.0
+transformers==4.52.4
+dnspython==2.7.0
+joblib==1.5.1
+kaggle==1.7.4.5
+pyasn1_modules==0.4.2
+httpcore==1.0.9
+accelerate==1.8.1
+psutil==7.0.0
+pycocotools==2.0.10
+lm-format-enforcer==0.10.11
+liger_kernel==0.6.0
+googleapis-common-protos==1.70.0
+idna==3.10
+aiohappyeyeballs==2.6.1
+numba==0.61.2
+tornado==6.5.1
+opentelemetry-semantic-conventions==0.47b0
+torchvision==0.21.0
+exceptiongroup==1.3.0
+cfgv==3.4.0
+py-cpuinfo==9.0.0
+murmurhash==1.0.13
+pillow==11.3.0
+asttokens==3.0.0
+spacy==3.8.7
+blinker==1.9.0
+llguidance==0.7.30
+fastapi==0.116.1
+python-dateutil==2.9.0.post0
+prompt_toolkit==3.0.51
+opentelemetry-api==1.26.0
+referencing==0.36.2
+Pygments==2.19.2
+mpmath==1.3.0
+thinc==8.3.6
+multidict==6.6.3
+python-magic==0.4.27
+fairscale==0.4.4
+nodeenv==1.9.1
+mathruler==0.1.0
+identify==2.6.12
+multiprocess==0.70.16
+ftfy==6.3.1
+spacy-legacy==3.0.12
+rsa==4.9.1
+cymem==2.0.11
+flash-attn==2.7.1.post1
+typing-inspection==0.4.1
+nvidia-cufile-cu12==1.11.1.6
+filelock==3.18.0
+jsonschema==4.24.0
+language_data==1.3.0
+iopath==0.1.10
+cloudpickle==3.1.1
+pre_commit==4.2.0
+python-multipart==0.0.20
+gguf==0.17.1
+toml==0.10.2
+lazy_loader==0.4
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+opencv-python-headless==4.12.0.88
+rouge_score==0.1.2
+portalocker==3.2.0
+diskcache==5.6.3
+pycocoevalcap==1.2
+mdurl==0.1.2
+pure_eval==0.2.3
+ray==2.47.1
+langcodes==3.5.0
+distlib==0.3.9
+pydeck==0.9.1
+traitlets==5.14.3
+aiohttp==3.12.14
+decorator==5.2.1
+opentelemetry-exporter-otlp-proto-http==1.26.0
+verl==0.3.2.dev0
+fsspec==2025.3.0
+pydantic_core==2.33.2
+matplotlib-inline==0.1.7
+httpx==0.28.1
+fastrlock==0.8.3
+zipp==3.23.0
+aiosignal==1.4.0
+uvloop==0.21.0
+opendatasets==0.1.22
+python-dotenv==1.1.1
+attrs==25.3.0
+starlette==0.47.1
+distro==1.9.0
+pyasn1==0.6.1
+plotly==6.2.0
+opencensus-context==0.1.3
+datasets==4.0.0
+bleach==6.2.0
+hf-xet==1.1.5
+pyvers==0.1.0
+rich==14.0.0
+pylatexenc==2.10
+tensordict==0.9.1
+urllib3==2.5.0
+imageio==2.37.0
+platformdirs==4.3.8
+preshed==3.0.10
+catalogue==2.0.10
+h11==0.16.0
+outlines_core==0.1.26
+wasabi==1.1.3
+proto-plus==1.26.1
+scikit-image==0.25.2
+blis==1.3.0
+fastapi-cli==0.0.8
+opentelemetry-exporter-prometheus==0.56b0
+opentelemetry-exporter-otlp==1.26.0
+compressed-tensors==0.9.3
+pyarrow==20.0.0
+opentelemetry-semantic-conventions-ai==0.4.11
+partial-json-parser==0.2.1.1.post6
+parso==0.8.4
+importlib_metadata==8.0.0
+tokenizers==0.21.2
+opentelemetry-exporter-otlp-proto-common==1.26.0
+torchdata==0.11.0
+py-spy==0.4.0
+propcache==0.3.2
+braceexpand==0.1.7
+numpy==2.2.6
+cloudpathlib==0.21.1
+email_validator==2.2.0
+srsly==2.5.1
+webencodings==0.5.1
+airportsdata==20250706
+rpds-py==0.26.0
+outlines==0.1.11
+packaging==25.0
+yarl==1.20.1
+nvidia-cuda-cupti-cu12==12.4.127
+typing_extensions==4.14.1
+pydantic==2.11.7
+xformers==0.0.29.post2
+einops==0.8.1
+grpcio==1.73.1
+setuptools==78.1.1
+httptools==0.6.4
+nvidia-nccl-cu12==2.21.5
+Deprecated==1.2.18
+ptyprocess==0.7.0
+websockets==15.0.1
+torch==2.6.0
+scipy==1.15.3
+typer==0.16.0
+pytorch-lightning==2.5.2
+virtualenv==20.31.2
+huggingface-hub==0.33.4
+contexttimer==0.3.3
+marisa-trie==1.2.1
+shellingham==1.5.4
+charset-normalizer==3.4.2
+nest-asyncio==1.6.0
+executing==2.2.0
+stack-data==0.6.3
+msgpack==1.1.1
+blake3==1.0.5
+narwhals==1.47.0
+sentencepiece==0.2.0
+llvmlite==0.44.0
+click==8.2.1
+lark==1.2.2
+google-api-core==2.25.1
+sympy==1.13.1
+wheel==0.45.1
+safetensors==0.5.3
+mistral_common==1.8.0
+triton==3.2.0
+msgspec==0.19.0
+watchdog==6.0.0
+requests==2.32.4
+spacy-loggers==1.0.5
+timm==0.4.12
+qwen-vl-utils==0.0.11
+modelscope==1.28.0
+verl==0.3.2.dev0
+jaraco.text==3.12.1
+autocommand==2.2.2
+packaging==24.2
+jaraco.context==5.3.0
+tomli==2.0.1
+typeguard==4.3.0
+zipp==3.19.2
+backports.tarfile==1.2.0
+typing_extensions==4.12.2
+jaraco.collections==5.1.0
+inflect==7.3.1
+more-itertools==10.3.0
+jaraco.functools==4.0.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+wheel==0.45.1
diff --git a/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..c105f1c7be5e456b70a89c3a1823cbe87f4b1dc5
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/files/wandb-metadata.json
@@ -0,0 +1,91 @@
+{
+  "os":  "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.0",
+  "startedAt":  "2025-07-20T07:25:17.141598Z",
+  "args":  [
+    "--node-ip-address=10.1.4.164",
+    "--node-manager-port=35231",
+    "--object-store-name=/tmp/ray/session_2025-07-20_15-23-31_151146_70713/sockets/plasma_store",
+    "--raylet-name=/tmp/ray/session_2025-07-20_15-23-31_151146_70713/sockets/raylet",
+    "--redis-address=None",
+    "--metrics-agent-port=63830",
+    "--logging-rotate-bytes=536870912",
+    "--logging-rotate-backup-count=5",
+    "--runtime-env-agent-port=63651",
+    "--gcs-address=10.1.4.164:52207",
+    "--session-name=session_2025-07-20_15-23-31_151146_70713",
+    "--temp-dir=/tmp/ray",
+    "--webui=127.0.0.1:8265",
+    "--cluster-id=994c498dad4446fd86e2ea1ff3b1252af0b70673317e94a345017756",
+    "--startup-token=64",
+    "--worker-launch-time-ms=1752996213728",
+    "--node-id=0d8570de666f6f5e4f59f594c737fc4943e302fa432c09bf39023de5",
+    "--runtime-env-hash=-115784934",
+    "--enable-resource-isolation=false"
+  ],
+  "program":  "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
+  "git":  {
+    "remote":  "https://github.com/PorUna-byte/PAR.git",
+    "commit":  "b8caf406aa1699c788f0ca6e44a1769452c317db"
+  },
+  "root":  "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
+  "host":  "dsw-266702-557cd69888-g24kv",
+  "executable":  "/root/miniconda3/envs/easyr1-new/bin/python3",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA A800-SXM4-80GB",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1623302262784",
+      "used":  "1178181632"
+    }
+  },
+  "memory":  {
+    "total":  "549755813888"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-dd05c780-0a19-17fd-c584-d09f4318f680"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-2782427d-b86d-e7c7-247f-edefe39eec71"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-52755b3d-d761-c869-0d08-300873ba5f02"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-90465488-5319-9508-9e8a-b3b12918be35"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a"
+    }
+  ],
+  "cudaVersion":  "12.1",
+  "writerId":  "u3qpbij4lvd82az03j7kug0ih15hauaq"
+}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..5715afb018fa64214a85c17f918366becec1a412
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/logs/debug-internal.log
@@ -0,0 +1,7 @@
+{"time":"2025-07-20T15:25:17.492981977+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-20T15:25:36.925600892+08:00","level":"INFO","msg":"stream: created new stream","id":"v0wv109i"}
+{"time":"2025-07-20T15:25:36.926584351+08:00","level":"INFO","msg":"stream: started","id":"v0wv109i"}
+{"time":"2025-07-20T15:25:36.926596894+08:00","level":"INFO","msg":"handler: started","stream_id":"v0wv109i"}
+{"time":"2025-07-20T15:25:36.92661766+08:00","level":"INFO","msg":"sender: started","stream_id":"v0wv109i"}
+{"time":"2025-07-20T15:25:36.926629865+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"v0wv109i"}
+{"time":"2025-07-20T15:26:51.299029849+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/v0wv109i/file_stream\": unexpected EOF"}
diff --git a/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..e32ebd60b4da0ae148eba45ff73101207abceaa0
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/logs/debug.log
@@ -0,0 +1,21 @@
+2025-07-20 15:25:17,267 INFO    MainThread:75477 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-20 15:25:17,267 INFO    MainThread:75477 [wandb_setup.py:_flush():80] Configure stats pid to 75477
+2025-07-20 15:25:17,267 INFO    MainThread:75477 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-20 15:25:17,267 INFO    MainThread:75477 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-20 15:25:17,267 INFO    MainThread:75477 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-20 15:25:17,267 INFO    MainThread:75477 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/logs/debug.log
+2025-07-20 15:25:17,267 INFO    MainThread:75477 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/logs/debug-internal.log
+2025-07-20 15:25:17,267 INFO    MainThread:75477 [wandb_init.py:init():830] calling init triggers
+2025-07-20 15:25:17,268 INFO    MainThread:75477 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-20 15:25:17,268 INFO    MainThread:75477 [wandb_init.py:init():871] starting backend
+2025-07-20 15:25:17,481 INFO    MainThread:75477 [wandb_init.py:init():874] sending inform_init request
+2025-07-20 15:25:17,483 INFO    MainThread:75477 [wandb_init.py:init():882] backend started and connected
+2025-07-20 15:25:17,489 INFO    MainThread:75477 [wandb_init.py:init():953] updated telemetry
+2025-07-20 15:25:17,533 INFO    MainThread:75477 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-20 15:25:40,441 INFO    MainThread:75477 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-20 15:25:40,619 INFO    MainThread:75477 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-20 15:25:40,619 INFO    MainThread:75477 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-20 15:25:40,623 INFO    MainThread:75477 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-20 15:25:40,623 INFO    MainThread:75477 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-20 15:25:40,625 INFO    MainThread:75477 [wandb_init.py:init():1075] run started, returning control to user process
diff --git a/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/run-v0wv109i.wandb b/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/run-v0wv109i.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..9c728dd12c06af24f0c56c0d65424aec920113fd
Binary files /dev/null and b/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/run-v0wv109i.wandb differ
diff --git a/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/files/output.log b/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..58025e3a3c5ba8f38dea717573709acf35011ecb
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/files/output.log
@@ -0,0 +1,2 @@
+{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.4.164", "pid": 86084, "uuid": "58b35defd6e645dfb338e85b828c3067", "closed": false}
+Start validation...
diff --git a/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/files/requirements.txt
@@ -0,0 +1,295 @@
+colorama==0.4.6
+psutil==7.0.0
+setproctitle==1.2.2
+ipython==8.37.0
+gitdb==4.0.12
+smmap==5.0.2
+pyzmq==27.0.0
+wcwidth==0.2.13
+antlr4-python3-runtime==4.9.3
+streamlit==1.46.1
+opentelemetry-proto==1.26.0
+tiktoken==0.9.0
+MarkupSafe==3.0.2
+openai==1.90.0
+jiter==0.10.0
+markdown-it-py==3.0.0
+rich-toolkit==0.14.8
+PyYAML==6.0.2
+pycountry==24.6.1
+nvidia-cusolver-cu12==11.6.1.9
+codetiming==1.4.0
+text-unidecode==1.3
+aiohttp-cors==0.8.1
+prometheus_client==0.22.1
+pandas==2.3.1
+wrapt==1.17.2
+tifffile==2025.5.10
+vllm==0.8.5.post1
+google-auth==2.40.3
+nvidia-curand-cu12==10.3.5.147
+networkx==3.4.2
+protobuf==4.25.8
+depyf==0.18.0
+altair==5.5.0
+wandb==0.21.0
+opentelemetry-sdk==1.26.0
+nvidia-cufft-cu12==11.2.1.3
+frozenlist==1.7.0
+ninja==1.11.1.4
+anyio==4.9.0
+rignore==0.6.2
+pydantic-extra-types==2.10.5
+tzdata==2025.2
+orjson==3.10.18
+smart_open==7.3.0.post1
+nvidia-cublas-cu12==12.4.5.8
+astor==0.8.1
+uvicorn==0.35.0
+sentry-sdk==2.32.0
+weasel==0.4.1
+opencensus==0.11.4
+certifi==2025.7.14
+nvidia-cuda-nvrtc-cu12==12.4.127
+cupy-cuda12x==13.5.1
+jedi==0.19.2
+GitPython==3.1.44
+xgrammar==0.1.18
+sniffio==1.3.1
+dill==0.3.8
+python-json-logger==3.3.0
+peft==0.16.0
+python-slugify==8.0.4
+watchfiles==1.1.0
+torchaudio==2.6.0
+omegaconf==2.3.0
+interegular==0.3.3
+torchmetrics==1.7.4
+tenacity==9.1.2
+async-timeout==5.0.1
+pybase64==1.4.1
+tqdm==4.67.1
+confection==0.1.5
+absl-py==2.3.1
+six==1.17.0
+colorful==0.5.7
+xxhash==3.5.0
+regex==2024.11.6
+nvidia-cuda-runtime-cu12==12.4.127
+pip==25.1
+annotated-types==0.7.0
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+salesforce-lavis==1.0.2
+fastapi-cloud-cli==0.1.4
+av==15.0.0
+nvidia-nvjitlink-cu12==12.4.127
+lightning-utilities==0.14.3
+pytz==2025.2
+webdataset==1.0.2
+cachetools==5.5.2
+nltk==3.9.1
+prometheus-fastapi-instrumentator==7.1.0
+pexpect==4.9.0
+jsonschema-specifications==2025.4.1
+Jinja2==3.1.6
+nvidia-cusparse-cu12==12.3.1.170
+nvidia-cudnn-cu12==9.1.0.70
+decord==0.6.0
+transformers==4.52.4
+dnspython==2.7.0
+joblib==1.5.1
+kaggle==1.7.4.5
+pyasn1_modules==0.4.2
+httpcore==1.0.9
+accelerate==1.8.1
+psutil==7.0.0
+pycocotools==2.0.10
+lm-format-enforcer==0.10.11
+liger_kernel==0.6.0
+googleapis-common-protos==1.70.0
+idna==3.10
+aiohappyeyeballs==2.6.1
+numba==0.61.2
+tornado==6.5.1
+opentelemetry-semantic-conventions==0.47b0
+torchvision==0.21.0
+exceptiongroup==1.3.0
+cfgv==3.4.0
+py-cpuinfo==9.0.0
+murmurhash==1.0.13
+pillow==11.3.0
+asttokens==3.0.0
+spacy==3.8.7
+blinker==1.9.0
+llguidance==0.7.30
+fastapi==0.116.1
+python-dateutil==2.9.0.post0
+prompt_toolkit==3.0.51
+opentelemetry-api==1.26.0
+referencing==0.36.2
+Pygments==2.19.2
+mpmath==1.3.0
+thinc==8.3.6
+multidict==6.6.3
+python-magic==0.4.27
+fairscale==0.4.4
+nodeenv==1.9.1
+mathruler==0.1.0
+identify==2.6.12
+multiprocess==0.70.16
+ftfy==6.3.1
+spacy-legacy==3.0.12
+rsa==4.9.1
+cymem==2.0.11
+flash-attn==2.7.1.post1
+typing-inspection==0.4.1
+nvidia-cufile-cu12==1.11.1.6
+filelock==3.18.0
+jsonschema==4.24.0
+language_data==1.3.0
+iopath==0.1.10
+cloudpickle==3.1.1
+pre_commit==4.2.0
+python-multipart==0.0.20
+gguf==0.17.1
+toml==0.10.2
+lazy_loader==0.4
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+opencv-python-headless==4.12.0.88
+rouge_score==0.1.2
+portalocker==3.2.0
+diskcache==5.6.3
+pycocoevalcap==1.2
+mdurl==0.1.2
+pure_eval==0.2.3
+ray==2.47.1
+langcodes==3.5.0
+distlib==0.3.9
+pydeck==0.9.1
+traitlets==5.14.3
+aiohttp==3.12.14
+decorator==5.2.1
+opentelemetry-exporter-otlp-proto-http==1.26.0
+verl==0.3.2.dev0
+fsspec==2025.3.0
+pydantic_core==2.33.2
+matplotlib-inline==0.1.7
+httpx==0.28.1
+fastrlock==0.8.3
+zipp==3.23.0
+aiosignal==1.4.0
+uvloop==0.21.0
+opendatasets==0.1.22
+python-dotenv==1.1.1
+attrs==25.3.0
+starlette==0.47.1
+distro==1.9.0
+pyasn1==0.6.1
+plotly==6.2.0
+opencensus-context==0.1.3
+datasets==4.0.0
+bleach==6.2.0
+hf-xet==1.1.5
+pyvers==0.1.0
+rich==14.0.0
+pylatexenc==2.10
+tensordict==0.9.1
+urllib3==2.5.0
+imageio==2.37.0
+platformdirs==4.3.8
+preshed==3.0.10
+catalogue==2.0.10
+h11==0.16.0
+outlines_core==0.1.26
+wasabi==1.1.3
+proto-plus==1.26.1
+scikit-image==0.25.2
+blis==1.3.0
+fastapi-cli==0.0.8
+opentelemetry-exporter-prometheus==0.56b0
+opentelemetry-exporter-otlp==1.26.0
+compressed-tensors==0.9.3
+pyarrow==20.0.0
+opentelemetry-semantic-conventions-ai==0.4.11
+partial-json-parser==0.2.1.1.post6
+parso==0.8.4
+importlib_metadata==8.0.0
+tokenizers==0.21.2
+opentelemetry-exporter-otlp-proto-common==1.26.0
+torchdata==0.11.0
+py-spy==0.4.0
+propcache==0.3.2
+braceexpand==0.1.7
+numpy==2.2.6
+cloudpathlib==0.21.1
+email_validator==2.2.0
+srsly==2.5.1
+webencodings==0.5.1
+airportsdata==20250706
+rpds-py==0.26.0
+outlines==0.1.11
+packaging==25.0
+yarl==1.20.1
+nvidia-cuda-cupti-cu12==12.4.127
+typing_extensions==4.14.1
+pydantic==2.11.7
+xformers==0.0.29.post2
+einops==0.8.1
+grpcio==1.73.1
+setuptools==78.1.1
+httptools==0.6.4
+nvidia-nccl-cu12==2.21.5
+Deprecated==1.2.18
+ptyprocess==0.7.0
+websockets==15.0.1
+torch==2.6.0
+scipy==1.15.3
+typer==0.16.0
+pytorch-lightning==2.5.2
+virtualenv==20.31.2
+huggingface-hub==0.33.4
+contexttimer==0.3.3
+marisa-trie==1.2.1
+shellingham==1.5.4
+charset-normalizer==3.4.2
+nest-asyncio==1.6.0
+executing==2.2.0
+stack-data==0.6.3
+msgpack==1.1.1
+blake3==1.0.5
+narwhals==1.47.0
+sentencepiece==0.2.0
+llvmlite==0.44.0
+click==8.2.1
+lark==1.2.2
+google-api-core==2.25.1
+sympy==1.13.1
+wheel==0.45.1
+safetensors==0.5.3
+mistral_common==1.8.0
+triton==3.2.0
+msgspec==0.19.0
+watchdog==6.0.0
+requests==2.32.4
+spacy-loggers==1.0.5
+timm==0.4.12
+qwen-vl-utils==0.0.11
+modelscope==1.28.0
+verl==0.3.2.dev0
+jaraco.text==3.12.1
+autocommand==2.2.2
+packaging==24.2
+jaraco.context==5.3.0
+tomli==2.0.1
+typeguard==4.3.0
+zipp==3.19.2
+backports.tarfile==1.2.0
+typing_extensions==4.12.2
+jaraco.collections==5.1.0
+inflect==7.3.1
+more-itertools==10.3.0
+jaraco.functools==4.0.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+wheel==0.45.1
diff --git a/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..2afdc450dadcec889bf1e2c4b49516a49c3e841b
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/files/wandb-metadata.json
@@ -0,0 +1,91 @@
+{
+  "os":  "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.0",
+  "startedAt":  "2025-07-20T07:34:23.233545Z",
+  "args":  [
+    "--node-ip-address=10.1.4.164",
+    "--node-manager-port=43923",
+    "--object-store-name=/tmp/ray/session_2025-07-20_15-32-39_601252_81328/sockets/plasma_store",
+    "--raylet-name=/tmp/ray/session_2025-07-20_15-32-39_601252_81328/sockets/raylet",
+    "--redis-address=None",
+    "--metrics-agent-port=58095",
+    "--logging-rotate-bytes=536870912",
+    "--logging-rotate-backup-count=5",
+    "--runtime-env-agent-port=64817",
+    "--gcs-address=10.1.4.164:49625",
+    "--session-name=session_2025-07-20_15-32-39_601252_81328",
+    "--temp-dir=/tmp/ray",
+    "--webui=127.0.0.1:8265",
+    "--cluster-id=ef081ee94c630b8c4c1c59e7beefec090a5fbe229f6a981570b02fa1",
+    "--startup-token=64",
+    "--worker-launch-time-ms=1752996762184",
+    "--node-id=1f2c877a478b841eac2c1a33494d6be302c9c3ab6e684222e4733ce4",
+    "--runtime-env-hash=-115784934",
+    "--enable-resource-isolation=false"
+  ],
+  "program":  "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
+  "git":  {
+    "remote":  "https://github.com/PorUna-byte/PAR.git",
+    "commit":  "b8caf406aa1699c788f0ca6e44a1769452c317db"
+  },
+  "root":  "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
+  "host":  "dsw-266702-557cd69888-g24kv",
+  "executable":  "/root/miniconda3/envs/easyr1-new/bin/python3",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA A800-SXM4-80GB",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1623302262784",
+      "used":  "1178259456"
+    }
+  },
+  "memory":  {
+    "total":  "549755813888"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-dd05c780-0a19-17fd-c584-d09f4318f680"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-2782427d-b86d-e7c7-247f-edefe39eec71"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-52755b3d-d761-c869-0d08-300873ba5f02"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-90465488-5319-9508-9e8a-b3b12918be35"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a"
+    }
+  ],
+  "cudaVersion":  "12.1",
+  "writerId":  "9pihay9bn00sjm1rx40glf0dzx7vamvs"
+}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..8515c754550201dd97c36861f88f482c70ab76ab
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/logs/debug-internal.log
@@ -0,0 +1,6 @@
+{"time":"2025-07-20T15:34:23.549904932+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-20T15:34:34.888814067+08:00","level":"INFO","msg":"stream: created new stream","id":"bw8ozibk"}
+{"time":"2025-07-20T15:34:34.889759219+08:00","level":"INFO","msg":"stream: started","id":"bw8ozibk"}
+{"time":"2025-07-20T15:34:34.88981801+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"bw8ozibk"}
+{"time":"2025-07-20T15:34:34.889843085+08:00","level":"INFO","msg":"handler: started","stream_id":"bw8ozibk"}
+{"time":"2025-07-20T15:34:34.890312328+08:00","level":"INFO","msg":"sender: started","stream_id":"bw8ozibk"}
diff --git a/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..1fc3e91020ff09d3d5c1b21d3393db30bb98ff71
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/logs/debug.log
@@ -0,0 +1,21 @@
+2025-07-20 15:34:23,331 INFO    MainThread:86084 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-20 15:34:23,331 INFO    MainThread:86084 [wandb_setup.py:_flush():80] Configure stats pid to 86084
+2025-07-20 15:34:23,331 INFO    MainThread:86084 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-20 15:34:23,331 INFO    MainThread:86084 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-20 15:34:23,331 INFO    MainThread:86084 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-20 15:34:23,331 INFO    MainThread:86084 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/logs/debug.log
+2025-07-20 15:34:23,332 INFO    MainThread:86084 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/logs/debug-internal.log
+2025-07-20 15:34:23,332 INFO    MainThread:86084 [wandb_init.py:init():830] calling init triggers
+2025-07-20 15:34:23,332 INFO    MainThread:86084 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-20 15:34:23,332 INFO    MainThread:86084 [wandb_init.py:init():871] starting backend
+2025-07-20 15:34:23,538 INFO    MainThread:86084 [wandb_init.py:init():874] sending inform_init request
+2025-07-20 15:34:23,543 INFO    MainThread:86084 [wandb_init.py:init():882] backend started and connected
+2025-07-20 15:34:23,545 INFO    MainThread:86084 [wandb_init.py:init():953] updated telemetry
+2025-07-20 15:34:23,578 INFO    MainThread:86084 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-20 15:34:41,563 INFO    MainThread:86084 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-20 15:34:41,729 INFO    MainThread:86084 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-20 15:34:41,729 INFO    MainThread:86084 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-20 15:34:41,733 INFO    MainThread:86084 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-20 15:34:41,733 INFO    MainThread:86084 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-20 15:34:41,735 INFO    MainThread:86084 [wandb_init.py:init():1075] run started, returning control to user process
diff --git a/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/run-bw8ozibk.wandb b/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/run-bw8ozibk.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/files/output.log b/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..53b5730d918480aff4872ea44eb5bb40b5ae563a
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/files/output.log
@@ -0,0 +1,2 @@
+{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.4.164", "pid": 94972, "uuid": "a17f4aa9176b432b9cba40a20a79ec8a", "closed": false}
+Start validation...
diff --git a/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/files/requirements.txt
@@ -0,0 +1,295 @@
+colorama==0.4.6
+psutil==7.0.0
+setproctitle==1.2.2
+ipython==8.37.0
+gitdb==4.0.12
+smmap==5.0.2
+pyzmq==27.0.0
+wcwidth==0.2.13
+antlr4-python3-runtime==4.9.3
+streamlit==1.46.1
+opentelemetry-proto==1.26.0
+tiktoken==0.9.0
+MarkupSafe==3.0.2
+openai==1.90.0
+jiter==0.10.0
+markdown-it-py==3.0.0
+rich-toolkit==0.14.8
+PyYAML==6.0.2
+pycountry==24.6.1
+nvidia-cusolver-cu12==11.6.1.9
+codetiming==1.4.0
+text-unidecode==1.3
+aiohttp-cors==0.8.1
+prometheus_client==0.22.1
+pandas==2.3.1
+wrapt==1.17.2
+tifffile==2025.5.10
+vllm==0.8.5.post1
+google-auth==2.40.3
+nvidia-curand-cu12==10.3.5.147
+networkx==3.4.2
+protobuf==4.25.8
+depyf==0.18.0
+altair==5.5.0
+wandb==0.21.0
+opentelemetry-sdk==1.26.0
+nvidia-cufft-cu12==11.2.1.3
+frozenlist==1.7.0
+ninja==1.11.1.4
+anyio==4.9.0
+rignore==0.6.2
+pydantic-extra-types==2.10.5
+tzdata==2025.2
+orjson==3.10.18
+smart_open==7.3.0.post1
+nvidia-cublas-cu12==12.4.5.8
+astor==0.8.1
+uvicorn==0.35.0
+sentry-sdk==2.32.0
+weasel==0.4.1
+opencensus==0.11.4
+certifi==2025.7.14
+nvidia-cuda-nvrtc-cu12==12.4.127
+cupy-cuda12x==13.5.1
+jedi==0.19.2
+GitPython==3.1.44
+xgrammar==0.1.18
+sniffio==1.3.1
+dill==0.3.8
+python-json-logger==3.3.0
+peft==0.16.0
+python-slugify==8.0.4
+watchfiles==1.1.0
+torchaudio==2.6.0
+omegaconf==2.3.0
+interegular==0.3.3
+torchmetrics==1.7.4
+tenacity==9.1.2
+async-timeout==5.0.1
+pybase64==1.4.1
+tqdm==4.67.1
+confection==0.1.5
+absl-py==2.3.1
+six==1.17.0
+colorful==0.5.7
+xxhash==3.5.0
+regex==2024.11.6
+nvidia-cuda-runtime-cu12==12.4.127
+pip==25.1
+annotated-types==0.7.0
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+salesforce-lavis==1.0.2
+fastapi-cloud-cli==0.1.4
+av==15.0.0
+nvidia-nvjitlink-cu12==12.4.127
+lightning-utilities==0.14.3
+pytz==2025.2
+webdataset==1.0.2
+cachetools==5.5.2
+nltk==3.9.1
+prometheus-fastapi-instrumentator==7.1.0
+pexpect==4.9.0
+jsonschema-specifications==2025.4.1
+Jinja2==3.1.6
+nvidia-cusparse-cu12==12.3.1.170
+nvidia-cudnn-cu12==9.1.0.70
+decord==0.6.0
+transformers==4.52.4
+dnspython==2.7.0
+joblib==1.5.1
+kaggle==1.7.4.5
+pyasn1_modules==0.4.2
+httpcore==1.0.9
+accelerate==1.8.1
+psutil==7.0.0
+pycocotools==2.0.10
+lm-format-enforcer==0.10.11
+liger_kernel==0.6.0
+googleapis-common-protos==1.70.0
+idna==3.10
+aiohappyeyeballs==2.6.1
+numba==0.61.2
+tornado==6.5.1
+opentelemetry-semantic-conventions==0.47b0
+torchvision==0.21.0
+exceptiongroup==1.3.0
+cfgv==3.4.0
+py-cpuinfo==9.0.0
+murmurhash==1.0.13
+pillow==11.3.0
+asttokens==3.0.0
+spacy==3.8.7
+blinker==1.9.0
+llguidance==0.7.30
+fastapi==0.116.1
+python-dateutil==2.9.0.post0
+prompt_toolkit==3.0.51
+opentelemetry-api==1.26.0
+referencing==0.36.2
+Pygments==2.19.2
+mpmath==1.3.0
+thinc==8.3.6
+multidict==6.6.3
+python-magic==0.4.27
+fairscale==0.4.4
+nodeenv==1.9.1
+mathruler==0.1.0
+identify==2.6.12
+multiprocess==0.70.16
+ftfy==6.3.1
+spacy-legacy==3.0.12
+rsa==4.9.1
+cymem==2.0.11
+flash-attn==2.7.1.post1
+typing-inspection==0.4.1
+nvidia-cufile-cu12==1.11.1.6
+filelock==3.18.0
+jsonschema==4.24.0
+language_data==1.3.0
+iopath==0.1.10
+cloudpickle==3.1.1
+pre_commit==4.2.0
+python-multipart==0.0.20
+gguf==0.17.1
+toml==0.10.2
+lazy_loader==0.4
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+opencv-python-headless==4.12.0.88
+rouge_score==0.1.2
+portalocker==3.2.0
+diskcache==5.6.3
+pycocoevalcap==1.2
+mdurl==0.1.2
+pure_eval==0.2.3
+ray==2.47.1
+langcodes==3.5.0
+distlib==0.3.9
+pydeck==0.9.1
+traitlets==5.14.3
+aiohttp==3.12.14
+decorator==5.2.1
+opentelemetry-exporter-otlp-proto-http==1.26.0
+verl==0.3.2.dev0
+fsspec==2025.3.0
+pydantic_core==2.33.2
+matplotlib-inline==0.1.7
+httpx==0.28.1
+fastrlock==0.8.3
+zipp==3.23.0
+aiosignal==1.4.0
+uvloop==0.21.0
+opendatasets==0.1.22
+python-dotenv==1.1.1
+attrs==25.3.0
+starlette==0.47.1
+distro==1.9.0
+pyasn1==0.6.1
+plotly==6.2.0
+opencensus-context==0.1.3
+datasets==4.0.0
+bleach==6.2.0
+hf-xet==1.1.5
+pyvers==0.1.0
+rich==14.0.0
+pylatexenc==2.10
+tensordict==0.9.1
+urllib3==2.5.0
+imageio==2.37.0
+platformdirs==4.3.8
+preshed==3.0.10
+catalogue==2.0.10
+h11==0.16.0
+outlines_core==0.1.26
+wasabi==1.1.3
+proto-plus==1.26.1
+scikit-image==0.25.2
+blis==1.3.0
+fastapi-cli==0.0.8
+opentelemetry-exporter-prometheus==0.56b0
+opentelemetry-exporter-otlp==1.26.0
+compressed-tensors==0.9.3
+pyarrow==20.0.0
+opentelemetry-semantic-conventions-ai==0.4.11
+partial-json-parser==0.2.1.1.post6
+parso==0.8.4
+importlib_metadata==8.0.0
+tokenizers==0.21.2
+opentelemetry-exporter-otlp-proto-common==1.26.0
+torchdata==0.11.0
+py-spy==0.4.0
+propcache==0.3.2
+braceexpand==0.1.7
+numpy==2.2.6
+cloudpathlib==0.21.1
+email_validator==2.2.0
+srsly==2.5.1
+webencodings==0.5.1
+airportsdata==20250706
+rpds-py==0.26.0
+outlines==0.1.11
+packaging==25.0
+yarl==1.20.1
+nvidia-cuda-cupti-cu12==12.4.127
+typing_extensions==4.14.1
+pydantic==2.11.7
+xformers==0.0.29.post2
+einops==0.8.1
+grpcio==1.73.1
+setuptools==78.1.1
+httptools==0.6.4
+nvidia-nccl-cu12==2.21.5
+Deprecated==1.2.18
+ptyprocess==0.7.0
+websockets==15.0.1
+torch==2.6.0
+scipy==1.15.3
+typer==0.16.0
+pytorch-lightning==2.5.2
+virtualenv==20.31.2
+huggingface-hub==0.33.4
+contexttimer==0.3.3
+marisa-trie==1.2.1
+shellingham==1.5.4
+charset-normalizer==3.4.2
+nest-asyncio==1.6.0
+executing==2.2.0
+stack-data==0.6.3
+msgpack==1.1.1
+blake3==1.0.5
+narwhals==1.47.0
+sentencepiece==0.2.0
+llvmlite==0.44.0
+click==8.2.1
+lark==1.2.2
+google-api-core==2.25.1
+sympy==1.13.1
+wheel==0.45.1
+safetensors==0.5.3
+mistral_common==1.8.0
+triton==3.2.0
+msgspec==0.19.0
+watchdog==6.0.0
+requests==2.32.4
+spacy-loggers==1.0.5
+timm==0.4.12
+qwen-vl-utils==0.0.11
+modelscope==1.28.0
+verl==0.3.2.dev0
+jaraco.text==3.12.1
+autocommand==2.2.2
+packaging==24.2
+jaraco.context==5.3.0
+tomli==2.0.1
+typeguard==4.3.0
+zipp==3.19.2
+backports.tarfile==1.2.0
+typing_extensions==4.12.2
+jaraco.collections==5.1.0
+inflect==7.3.1
+more-itertools==10.3.0
+jaraco.functools==4.0.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+wheel==0.45.1
diff --git a/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..87355bc51d31077b24b98d20e126984fc824511c
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/files/wandb-metadata.json
@@ -0,0 +1,91 @@
+{
+  "os":  "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.0",
+  "startedAt":  "2025-07-20T07:40:30.414188Z",
+  "args":  [
+    "--node-ip-address=10.1.4.164",
+    "--node-manager-port=40967",
+    "--object-store-name=/tmp/ray/session_2025-07-20_15-38-46_389693_90215/sockets/plasma_store",
+    "--raylet-name=/tmp/ray/session_2025-07-20_15-38-46_389693_90215/sockets/raylet",
+    "--redis-address=None",
+    "--metrics-agent-port=56230",
+    "--logging-rotate-bytes=536870912",
+    "--logging-rotate-backup-count=5",
+    "--runtime-env-agent-port=59794",
+    "--gcs-address=10.1.4.164:64428",
+    "--session-name=session_2025-07-20_15-38-46_389693_90215",
+    "--temp-dir=/tmp/ray",
+    "--webui=127.0.0.1:8265",
+    "--cluster-id=ca5b30865afbad8796c50b46b9afd7529648e977b687eff906df06a2",
+    "--startup-token=64",
+    "--worker-launch-time-ms=1752997128928",
+    "--node-id=02e766ac939324b04dc391b619b0ea864767e322c5ef00f62c05a76b",
+    "--runtime-env-hash=-115784934",
+    "--enable-resource-isolation=false"
+  ],
+  "program":  "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
+  "git":  {
+    "remote":  "https://github.com/PorUna-byte/PAR.git",
+    "commit":  "b8caf406aa1699c788f0ca6e44a1769452c317db"
+  },
+  "root":  "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
+  "host":  "dsw-266702-557cd69888-g24kv",
+  "executable":  "/root/miniconda3/envs/easyr1-new/bin/python3",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA A800-SXM4-80GB",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1623302262784",
+      "used":  "1178312704"
+    }
+  },
+  "memory":  {
+    "total":  "549755813888"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-dd05c780-0a19-17fd-c584-d09f4318f680"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-2782427d-b86d-e7c7-247f-edefe39eec71"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-52755b3d-d761-c869-0d08-300873ba5f02"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-90465488-5319-9508-9e8a-b3b12918be35"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a"
+    }
+  ],
+  "cudaVersion":  "12.1",
+  "writerId":  "00b180ndhib4fms8g4grnfq7yctsuqqs"
+}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..2c75b6bf1370cfe12ffc6e38d2fa44b25d670d70
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/logs/debug-internal.log
@@ -0,0 +1,9 @@
+{"time":"2025-07-20T15:40:30.858787419+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-20T15:40:46.029828269+08:00","level":"INFO","msg":"stream: created new stream","id":"m94nrzgn"}
+{"time":"2025-07-20T15:40:46.03096628+08:00","level":"INFO","msg":"stream: started","id":"m94nrzgn"}
+{"time":"2025-07-20T15:40:46.030999671+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"m94nrzgn"}
+{"time":"2025-07-20T15:40:46.031014389+08:00","level":"INFO","msg":"sender: started","stream_id":"m94nrzgn"}
+{"time":"2025-07-20T15:40:46.031045708+08:00","level":"INFO","msg":"handler: started","stream_id":"m94nrzgn"}
+{"time":"2025-07-20T15:42:18.760260271+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
+{"time":"2025-07-20T15:42:51.067292854+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
+{"time":"2025-07-20T15:43:25.184851272+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
diff --git a/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..85b84a8d04a5d88e753d5929fe5c76aa19a3ac7a
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/logs/debug.log
@@ -0,0 +1,21 @@
+2025-07-20 15:40:30,628 INFO    MainThread:94972 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-20 15:40:30,628 INFO    MainThread:94972 [wandb_setup.py:_flush():80] Configure stats pid to 94972
+2025-07-20 15:40:30,628 INFO    MainThread:94972 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-20 15:40:30,628 INFO    MainThread:94972 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-20 15:40:30,628 INFO    MainThread:94972 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-20 15:40:30,628 INFO    MainThread:94972 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/logs/debug.log
+2025-07-20 15:40:30,628 INFO    MainThread:94972 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/logs/debug-internal.log
+2025-07-20 15:40:30,628 INFO    MainThread:94972 [wandb_init.py:init():830] calling init triggers
+2025-07-20 15:40:30,628 INFO    MainThread:94972 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-20 15:40:30,628 INFO    MainThread:94972 [wandb_init.py:init():871] starting backend
+2025-07-20 15:40:30,837 INFO    MainThread:94972 [wandb_init.py:init():874] sending inform_init request
+2025-07-20 15:40:30,839 INFO    MainThread:94972 [wandb_init.py:init():882] backend started and connected
+2025-07-20 15:40:30,853 INFO    MainThread:94972 [wandb_init.py:init():953] updated telemetry
+2025-07-20 15:40:30,989 INFO    MainThread:94972 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-20 15:40:48,575 INFO    MainThread:94972 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-20 15:40:48,758 INFO    MainThread:94972 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-20 15:40:48,759 INFO    MainThread:94972 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-20 15:40:48,763 INFO    MainThread:94972 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-20 15:40:48,763 INFO    MainThread:94972 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-20 15:40:48,767 INFO    MainThread:94972 [wandb_init.py:init():1075] run started, returning control to user process
diff --git a/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/run-m94nrzgn.wandb b/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/run-m94nrzgn.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..13927cb7290bba6efd0223cc4e1946519db67b29
Binary files /dev/null and b/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/run-m94nrzgn.wandb differ
diff --git a/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/files/output.log b/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..d26c8aef3ac6d4e30953659fd6c83fab98e8f8f6
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/files/output.log
@@ -0,0 +1,2 @@
+{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.4.164", "pid": 104213, "uuid": "4d1d82b5395a476bb6b006bf8181afe5", "closed": false}
+Start validation...
diff --git a/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/files/requirements.txt
@@ -0,0 +1,295 @@
+colorama==0.4.6
+psutil==7.0.0
+setproctitle==1.2.2
+ipython==8.37.0
+gitdb==4.0.12
+smmap==5.0.2
+pyzmq==27.0.0
+wcwidth==0.2.13
+antlr4-python3-runtime==4.9.3
+streamlit==1.46.1
+opentelemetry-proto==1.26.0
+tiktoken==0.9.0
+MarkupSafe==3.0.2
+openai==1.90.0
+jiter==0.10.0
+markdown-it-py==3.0.0
+rich-toolkit==0.14.8
+PyYAML==6.0.2
+pycountry==24.6.1
+nvidia-cusolver-cu12==11.6.1.9
+codetiming==1.4.0
+text-unidecode==1.3
+aiohttp-cors==0.8.1
+prometheus_client==0.22.1
+pandas==2.3.1
+wrapt==1.17.2
+tifffile==2025.5.10
+vllm==0.8.5.post1
+google-auth==2.40.3
+nvidia-curand-cu12==10.3.5.147
+networkx==3.4.2
+protobuf==4.25.8
+depyf==0.18.0
+altair==5.5.0
+wandb==0.21.0
+opentelemetry-sdk==1.26.0
+nvidia-cufft-cu12==11.2.1.3
+frozenlist==1.7.0
+ninja==1.11.1.4
+anyio==4.9.0
+rignore==0.6.2
+pydantic-extra-types==2.10.5
+tzdata==2025.2
+orjson==3.10.18
+smart_open==7.3.0.post1
+nvidia-cublas-cu12==12.4.5.8
+astor==0.8.1
+uvicorn==0.35.0
+sentry-sdk==2.32.0
+weasel==0.4.1
+opencensus==0.11.4
+certifi==2025.7.14
+nvidia-cuda-nvrtc-cu12==12.4.127
+cupy-cuda12x==13.5.1
+jedi==0.19.2
+GitPython==3.1.44
+xgrammar==0.1.18
+sniffio==1.3.1
+dill==0.3.8
+python-json-logger==3.3.0
+peft==0.16.0
+python-slugify==8.0.4
+watchfiles==1.1.0
+torchaudio==2.6.0
+omegaconf==2.3.0
+interegular==0.3.3
+torchmetrics==1.7.4
+tenacity==9.1.2
+async-timeout==5.0.1
+pybase64==1.4.1
+tqdm==4.67.1
+confection==0.1.5
+absl-py==2.3.1
+six==1.17.0
+colorful==0.5.7
+xxhash==3.5.0
+regex==2024.11.6
+nvidia-cuda-runtime-cu12==12.4.127
+pip==25.1
+annotated-types==0.7.0
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+salesforce-lavis==1.0.2
+fastapi-cloud-cli==0.1.4
+av==15.0.0
+nvidia-nvjitlink-cu12==12.4.127
+lightning-utilities==0.14.3
+pytz==2025.2
+webdataset==1.0.2
+cachetools==5.5.2
+nltk==3.9.1
+prometheus-fastapi-instrumentator==7.1.0
+pexpect==4.9.0
+jsonschema-specifications==2025.4.1
+Jinja2==3.1.6
+nvidia-cusparse-cu12==12.3.1.170
+nvidia-cudnn-cu12==9.1.0.70
+decord==0.6.0
+transformers==4.52.4
+dnspython==2.7.0
+joblib==1.5.1
+kaggle==1.7.4.5
+pyasn1_modules==0.4.2
+httpcore==1.0.9
+accelerate==1.8.1
+psutil==7.0.0
+pycocotools==2.0.10
+lm-format-enforcer==0.10.11
+liger_kernel==0.6.0
+googleapis-common-protos==1.70.0
+idna==3.10
+aiohappyeyeballs==2.6.1
+numba==0.61.2
+tornado==6.5.1
+opentelemetry-semantic-conventions==0.47b0
+torchvision==0.21.0
+exceptiongroup==1.3.0
+cfgv==3.4.0
+py-cpuinfo==9.0.0
+murmurhash==1.0.13
+pillow==11.3.0
+asttokens==3.0.0
+spacy==3.8.7
+blinker==1.9.0
+llguidance==0.7.30
+fastapi==0.116.1
+python-dateutil==2.9.0.post0
+prompt_toolkit==3.0.51
+opentelemetry-api==1.26.0
+referencing==0.36.2
+Pygments==2.19.2
+mpmath==1.3.0
+thinc==8.3.6
+multidict==6.6.3
+python-magic==0.4.27
+fairscale==0.4.4
+nodeenv==1.9.1
+mathruler==0.1.0
+identify==2.6.12
+multiprocess==0.70.16
+ftfy==6.3.1
+spacy-legacy==3.0.12
+rsa==4.9.1
+cymem==2.0.11
+flash-attn==2.7.1.post1
+typing-inspection==0.4.1
+nvidia-cufile-cu12==1.11.1.6
+filelock==3.18.0
+jsonschema==4.24.0
+language_data==1.3.0
+iopath==0.1.10
+cloudpickle==3.1.1
+pre_commit==4.2.0
+python-multipart==0.0.20
+gguf==0.17.1
+toml==0.10.2
+lazy_loader==0.4
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+opencv-python-headless==4.12.0.88
+rouge_score==0.1.2
+portalocker==3.2.0
+diskcache==5.6.3
+pycocoevalcap==1.2
+mdurl==0.1.2
+pure_eval==0.2.3
+ray==2.47.1
+langcodes==3.5.0
+distlib==0.3.9
+pydeck==0.9.1
+traitlets==5.14.3
+aiohttp==3.12.14
+decorator==5.2.1
+opentelemetry-exporter-otlp-proto-http==1.26.0
+verl==0.3.2.dev0
+fsspec==2025.3.0
+pydantic_core==2.33.2
+matplotlib-inline==0.1.7
+httpx==0.28.1
+fastrlock==0.8.3
+zipp==3.23.0
+aiosignal==1.4.0
+uvloop==0.21.0
+opendatasets==0.1.22
+python-dotenv==1.1.1
+attrs==25.3.0
+starlette==0.47.1
+distro==1.9.0
+pyasn1==0.6.1
+plotly==6.2.0
+opencensus-context==0.1.3
+datasets==4.0.0
+bleach==6.2.0
+hf-xet==1.1.5
+pyvers==0.1.0
+rich==14.0.0
+pylatexenc==2.10
+tensordict==0.9.1
+urllib3==2.5.0
+imageio==2.37.0
+platformdirs==4.3.8
+preshed==3.0.10
+catalogue==2.0.10
+h11==0.16.0
+outlines_core==0.1.26
+wasabi==1.1.3
+proto-plus==1.26.1
+scikit-image==0.25.2
+blis==1.3.0
+fastapi-cli==0.0.8
+opentelemetry-exporter-prometheus==0.56b0
+opentelemetry-exporter-otlp==1.26.0
+compressed-tensors==0.9.3
+pyarrow==20.0.0
+opentelemetry-semantic-conventions-ai==0.4.11
+partial-json-parser==0.2.1.1.post6
+parso==0.8.4
+importlib_metadata==8.0.0
+tokenizers==0.21.2
+opentelemetry-exporter-otlp-proto-common==1.26.0
+torchdata==0.11.0
+py-spy==0.4.0
+propcache==0.3.2
+braceexpand==0.1.7
+numpy==2.2.6
+cloudpathlib==0.21.1
+email_validator==2.2.0
+srsly==2.5.1
+webencodings==0.5.1
+airportsdata==20250706
+rpds-py==0.26.0
+outlines==0.1.11
+packaging==25.0
+yarl==1.20.1
+nvidia-cuda-cupti-cu12==12.4.127
+typing_extensions==4.14.1
+pydantic==2.11.7
+xformers==0.0.29.post2
+einops==0.8.1
+grpcio==1.73.1
+setuptools==78.1.1
+httptools==0.6.4
+nvidia-nccl-cu12==2.21.5
+Deprecated==1.2.18
+ptyprocess==0.7.0
+websockets==15.0.1
+torch==2.6.0
+scipy==1.15.3
+typer==0.16.0
+pytorch-lightning==2.5.2
+virtualenv==20.31.2
+huggingface-hub==0.33.4
+contexttimer==0.3.3
+marisa-trie==1.2.1
+shellingham==1.5.4
+charset-normalizer==3.4.2
+nest-asyncio==1.6.0
+executing==2.2.0
+stack-data==0.6.3
+msgpack==1.1.1
+blake3==1.0.5
+narwhals==1.47.0
+sentencepiece==0.2.0
+llvmlite==0.44.0
+click==8.2.1
+lark==1.2.2
+google-api-core==2.25.1
+sympy==1.13.1
+wheel==0.45.1
+safetensors==0.5.3
+mistral_common==1.8.0
+triton==3.2.0
+msgspec==0.19.0
+watchdog==6.0.0
+requests==2.32.4
+spacy-loggers==1.0.5
+timm==0.4.12
+qwen-vl-utils==0.0.11
+modelscope==1.28.0
+verl==0.3.2.dev0
+jaraco.text==3.12.1
+autocommand==2.2.2
+packaging==24.2
+jaraco.context==5.3.0
+tomli==2.0.1
+typeguard==4.3.0
+zipp==3.19.2
+backports.tarfile==1.2.0
+typing_extensions==4.12.2
+jaraco.collections==5.1.0
+inflect==7.3.1
+more-itertools==10.3.0
+jaraco.functools==4.0.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+wheel==0.45.1
diff --git a/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..8931917e110019fd754a86848462f2ba4e154653
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/files/wandb-metadata.json
@@ -0,0 +1,91 @@
+{
+  "os":  "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.0",
+  "startedAt":  "2025-07-20T07:46:06.281691Z",
+  "args":  [
+    "--node-ip-address=10.1.4.164",
+    "--node-manager-port=40431",
+    "--object-store-name=/tmp/ray/session_2025-07-20_15-44-22_687829_99456/sockets/plasma_store",
+    "--raylet-name=/tmp/ray/session_2025-07-20_15-44-22_687829_99456/sockets/raylet",
+    "--redis-address=None",
+    "--metrics-agent-port=58574",
+    "--logging-rotate-bytes=536870912",
+    "--logging-rotate-backup-count=5",
+    "--runtime-env-agent-port=50001",
+    "--gcs-address=10.1.4.164:64219",
+    "--session-name=session_2025-07-20_15-44-22_687829_99456",
+    "--temp-dir=/tmp/ray",
+    "--webui=127.0.0.1:8265",
+    "--cluster-id=76295dbf7deef67486e1e6b5c342710db221379deebbaa7e797cdf45",
+    "--startup-token=64",
+    "--worker-launch-time-ms=1752997465206",
+    "--node-id=d537fcc56a2bc0badeca70f7b961324d2fdd437e239db617fa81da42",
+    "--runtime-env-hash=-115784934",
+    "--enable-resource-isolation=false"
+  ],
+  "program":  "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
+  "git":  {
+    "remote":  "https://github.com/PorUna-byte/PAR.git",
+    "commit":  "b8caf406aa1699c788f0ca6e44a1769452c317db"
+  },
+  "root":  "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
+  "host":  "dsw-266702-557cd69888-g24kv",
+  "executable":  "/root/miniconda3/envs/easyr1-new/bin/python3",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA A800-SXM4-80GB",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1623302262784",
+      "used":  "1178333184"
+    }
+  },
+  "memory":  {
+    "total":  "549755813888"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-dd05c780-0a19-17fd-c584-d09f4318f680"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-2782427d-b86d-e7c7-247f-edefe39eec71"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-52755b3d-d761-c869-0d08-300873ba5f02"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-90465488-5319-9508-9e8a-b3b12918be35"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a"
+    }
+  ],
+  "cudaVersion":  "12.1",
+  "writerId":  "qyz6wumcffvf7izbshj2cparvip2q70z"
+}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..1669df511ef4d5f92fbab8a5b14226d3c6c997aa
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/logs/debug-internal.log
@@ -0,0 +1,6 @@
+{"time":"2025-07-20T15:46:06.581647226+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-20T15:46:11.117406425+08:00","level":"INFO","msg":"stream: created new stream","id":"b40ine7y"}
+{"time":"2025-07-20T15:46:11.11812207+08:00","level":"INFO","msg":"stream: started","id":"b40ine7y"}
+{"time":"2025-07-20T15:46:11.118152722+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"b40ine7y"}
+{"time":"2025-07-20T15:46:11.118167955+08:00","level":"INFO","msg":"handler: started","stream_id":"b40ine7y"}
+{"time":"2025-07-20T15:46:11.118202843+08:00","level":"INFO","msg":"sender: started","stream_id":"b40ine7y"}
diff --git a/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..3ee1f69351a4048ae60a66c003497aa90f8c31da
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/logs/debug.log
@@ -0,0 +1,21 @@
+2025-07-20 15:46:06,364 INFO    MainThread:104213 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-20 15:46:06,364 INFO    MainThread:104213 [wandb_setup.py:_flush():80] Configure stats pid to 104213
+2025-07-20 15:46:06,364 INFO    MainThread:104213 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-20 15:46:06,364 INFO    MainThread:104213 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-20 15:46:06,364 INFO    MainThread:104213 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-20 15:46:06,364 INFO    MainThread:104213 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/logs/debug.log
+2025-07-20 15:46:06,364 INFO    MainThread:104213 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/logs/debug-internal.log
+2025-07-20 15:46:06,364 INFO    MainThread:104213 [wandb_init.py:init():830] calling init triggers
+2025-07-20 15:46:06,364 INFO    MainThread:104213 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-20 15:46:06,364 INFO    MainThread:104213 [wandb_init.py:init():871] starting backend
+2025-07-20 15:46:06,572 INFO    MainThread:104213 [wandb_init.py:init():874] sending inform_init request
+2025-07-20 15:46:06,573 INFO    MainThread:104213 [wandb_init.py:init():882] backend started and connected
+2025-07-20 15:46:06,578 INFO    MainThread:104213 [wandb_init.py:init():953] updated telemetry
+2025-07-20 15:46:06,609 INFO    MainThread:104213 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-20 15:46:13,184 INFO    MainThread:104213 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-20 15:46:13,368 INFO    MainThread:104213 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-20 15:46:13,368 INFO    MainThread:104213 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-20 15:46:13,371 INFO    MainThread:104213 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-20 15:46:13,371 INFO    MainThread:104213 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-20 15:46:13,373 INFO    MainThread:104213 [wandb_init.py:init():1075] run started, returning control to user process
diff --git a/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/run-b40ine7y.wandb b/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/run-b40ine7y.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/files/output.log b/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..c7f0ffdc083aad03eb7de1d197b1e9bf13782bb6
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/files/output.log
@@ -0,0 +1,2 @@
+{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.4.164", "pid": 113131, "uuid": "5e376187861c45a490c4be994ce9436d", "closed": false}
+Start validation...
diff --git a/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/files/requirements.txt
@@ -0,0 +1,295 @@
+colorama==0.4.6
+psutil==7.0.0
+setproctitle==1.2.2
+ipython==8.37.0
+gitdb==4.0.12
+smmap==5.0.2
+pyzmq==27.0.0
+wcwidth==0.2.13
+antlr4-python3-runtime==4.9.3
+streamlit==1.46.1
+opentelemetry-proto==1.26.0
+tiktoken==0.9.0
+MarkupSafe==3.0.2
+openai==1.90.0
+jiter==0.10.0
+markdown-it-py==3.0.0
+rich-toolkit==0.14.8
+PyYAML==6.0.2
+pycountry==24.6.1
+nvidia-cusolver-cu12==11.6.1.9
+codetiming==1.4.0
+text-unidecode==1.3
+aiohttp-cors==0.8.1
+prometheus_client==0.22.1
+pandas==2.3.1
+wrapt==1.17.2
+tifffile==2025.5.10
+vllm==0.8.5.post1
+google-auth==2.40.3
+nvidia-curand-cu12==10.3.5.147
+networkx==3.4.2
+protobuf==4.25.8
+depyf==0.18.0
+altair==5.5.0
+wandb==0.21.0
+opentelemetry-sdk==1.26.0
+nvidia-cufft-cu12==11.2.1.3
+frozenlist==1.7.0
+ninja==1.11.1.4
+anyio==4.9.0
+rignore==0.6.2
+pydantic-extra-types==2.10.5
+tzdata==2025.2
+orjson==3.10.18
+smart_open==7.3.0.post1
+nvidia-cublas-cu12==12.4.5.8
+astor==0.8.1
+uvicorn==0.35.0
+sentry-sdk==2.32.0
+weasel==0.4.1
+opencensus==0.11.4
+certifi==2025.7.14
+nvidia-cuda-nvrtc-cu12==12.4.127
+cupy-cuda12x==13.5.1
+jedi==0.19.2
+GitPython==3.1.44
+xgrammar==0.1.18
+sniffio==1.3.1
+dill==0.3.8
+python-json-logger==3.3.0
+peft==0.16.0
+python-slugify==8.0.4
+watchfiles==1.1.0
+torchaudio==2.6.0
+omegaconf==2.3.0
+interegular==0.3.3
+torchmetrics==1.7.4
+tenacity==9.1.2
+async-timeout==5.0.1
+pybase64==1.4.1
+tqdm==4.67.1
+confection==0.1.5
+absl-py==2.3.1
+six==1.17.0
+colorful==0.5.7
+xxhash==3.5.0
+regex==2024.11.6
+nvidia-cuda-runtime-cu12==12.4.127
+pip==25.1
+annotated-types==0.7.0
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+salesforce-lavis==1.0.2
+fastapi-cloud-cli==0.1.4
+av==15.0.0
+nvidia-nvjitlink-cu12==12.4.127
+lightning-utilities==0.14.3
+pytz==2025.2
+webdataset==1.0.2
+cachetools==5.5.2
+nltk==3.9.1
+prometheus-fastapi-instrumentator==7.1.0
+pexpect==4.9.0
+jsonschema-specifications==2025.4.1
+Jinja2==3.1.6
+nvidia-cusparse-cu12==12.3.1.170
+nvidia-cudnn-cu12==9.1.0.70
+decord==0.6.0
+transformers==4.52.4
+dnspython==2.7.0
+joblib==1.5.1
+kaggle==1.7.4.5
+pyasn1_modules==0.4.2
+httpcore==1.0.9
+accelerate==1.8.1
+psutil==7.0.0
+pycocotools==2.0.10
+lm-format-enforcer==0.10.11
+liger_kernel==0.6.0
+googleapis-common-protos==1.70.0
+idna==3.10
+aiohappyeyeballs==2.6.1
+numba==0.61.2
+tornado==6.5.1
+opentelemetry-semantic-conventions==0.47b0
+torchvision==0.21.0
+exceptiongroup==1.3.0
+cfgv==3.4.0
+py-cpuinfo==9.0.0
+murmurhash==1.0.13
+pillow==11.3.0
+asttokens==3.0.0
+spacy==3.8.7
+blinker==1.9.0
+llguidance==0.7.30
+fastapi==0.116.1
+python-dateutil==2.9.0.post0
+prompt_toolkit==3.0.51
+opentelemetry-api==1.26.0
+referencing==0.36.2
+Pygments==2.19.2
+mpmath==1.3.0
+thinc==8.3.6
+multidict==6.6.3
+python-magic==0.4.27
+fairscale==0.4.4
+nodeenv==1.9.1
+mathruler==0.1.0
+identify==2.6.12
+multiprocess==0.70.16
+ftfy==6.3.1
+spacy-legacy==3.0.12
+rsa==4.9.1
+cymem==2.0.11
+flash-attn==2.7.1.post1
+typing-inspection==0.4.1
+nvidia-cufile-cu12==1.11.1.6
+filelock==3.18.0
+jsonschema==4.24.0
+language_data==1.3.0
+iopath==0.1.10
+cloudpickle==3.1.1
+pre_commit==4.2.0
+python-multipart==0.0.20
+gguf==0.17.1
+toml==0.10.2
+lazy_loader==0.4
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+opencv-python-headless==4.12.0.88
+rouge_score==0.1.2
+portalocker==3.2.0
+diskcache==5.6.3
+pycocoevalcap==1.2
+mdurl==0.1.2
+pure_eval==0.2.3
+ray==2.47.1
+langcodes==3.5.0
+distlib==0.3.9
+pydeck==0.9.1
+traitlets==5.14.3
+aiohttp==3.12.14
+decorator==5.2.1
+opentelemetry-exporter-otlp-proto-http==1.26.0
+verl==0.3.2.dev0
+fsspec==2025.3.0
+pydantic_core==2.33.2
+matplotlib-inline==0.1.7
+httpx==0.28.1
+fastrlock==0.8.3
+zipp==3.23.0
+aiosignal==1.4.0
+uvloop==0.21.0
+opendatasets==0.1.22
+python-dotenv==1.1.1
+attrs==25.3.0
+starlette==0.47.1
+distro==1.9.0
+pyasn1==0.6.1
+plotly==6.2.0
+opencensus-context==0.1.3
+datasets==4.0.0
+bleach==6.2.0
+hf-xet==1.1.5
+pyvers==0.1.0
+rich==14.0.0
+pylatexenc==2.10
+tensordict==0.9.1
+urllib3==2.5.0
+imageio==2.37.0
+platformdirs==4.3.8
+preshed==3.0.10
+catalogue==2.0.10
+h11==0.16.0
+outlines_core==0.1.26
+wasabi==1.1.3
+proto-plus==1.26.1
+scikit-image==0.25.2
+blis==1.3.0
+fastapi-cli==0.0.8
+opentelemetry-exporter-prometheus==0.56b0
+opentelemetry-exporter-otlp==1.26.0
+compressed-tensors==0.9.3
+pyarrow==20.0.0
+opentelemetry-semantic-conventions-ai==0.4.11
+partial-json-parser==0.2.1.1.post6
+parso==0.8.4
+importlib_metadata==8.0.0
+tokenizers==0.21.2
+opentelemetry-exporter-otlp-proto-common==1.26.0
+torchdata==0.11.0
+py-spy==0.4.0
+propcache==0.3.2
+braceexpand==0.1.7
+numpy==2.2.6
+cloudpathlib==0.21.1
+email_validator==2.2.0
+srsly==2.5.1
+webencodings==0.5.1
+airportsdata==20250706
+rpds-py==0.26.0
+outlines==0.1.11
+packaging==25.0
+yarl==1.20.1
+nvidia-cuda-cupti-cu12==12.4.127
+typing_extensions==4.14.1
+pydantic==2.11.7
+xformers==0.0.29.post2
+einops==0.8.1
+grpcio==1.73.1
+setuptools==78.1.1
+httptools==0.6.4
+nvidia-nccl-cu12==2.21.5
+Deprecated==1.2.18
+ptyprocess==0.7.0
+websockets==15.0.1
+torch==2.6.0
+scipy==1.15.3
+typer==0.16.0
+pytorch-lightning==2.5.2
+virtualenv==20.31.2
+huggingface-hub==0.33.4
+contexttimer==0.3.3
+marisa-trie==1.2.1
+shellingham==1.5.4
+charset-normalizer==3.4.2
+nest-asyncio==1.6.0
+executing==2.2.0
+stack-data==0.6.3
+msgpack==1.1.1
+blake3==1.0.5
+narwhals==1.47.0
+sentencepiece==0.2.0
+llvmlite==0.44.0
+click==8.2.1
+lark==1.2.2
+google-api-core==2.25.1
+sympy==1.13.1
+wheel==0.45.1
+safetensors==0.5.3
+mistral_common==1.8.0
+triton==3.2.0
+msgspec==0.19.0
+watchdog==6.0.0
+requests==2.32.4
+spacy-loggers==1.0.5
+timm==0.4.12
+qwen-vl-utils==0.0.11
+modelscope==1.28.0
+verl==0.3.2.dev0
+jaraco.text==3.12.1
+autocommand==2.2.2
+packaging==24.2
+jaraco.context==5.3.0
+tomli==2.0.1
+typeguard==4.3.0
+zipp==3.19.2
+backports.tarfile==1.2.0
+typing_extensions==4.12.2
+jaraco.collections==5.1.0
+inflect==7.3.1
+more-itertools==10.3.0
+jaraco.functools==4.0.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+wheel==0.45.1
diff --git a/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..bdbdb4d68b86d6cff6e24e7f3d93a9ddaafdd62c
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/files/wandb-metadata.json
@@ -0,0 +1,91 @@
+{
+  "os":  "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.0",
+  "startedAt":  "2025-07-20T07:52:10.419577Z",
+  "args":  [
+    "--node-ip-address=10.1.4.164",
+    "--node-manager-port=39519",
+    "--object-store-name=/tmp/ray/session_2025-07-20_15-50-27_532689_108378/sockets/plasma_store",
+    "--raylet-name=/tmp/ray/session_2025-07-20_15-50-27_532689_108378/sockets/raylet",
+    "--redis-address=None",
+    "--metrics-agent-port=49859",
+    "--logging-rotate-bytes=536870912",
+    "--logging-rotate-backup-count=5",
+    "--runtime-env-agent-port=55665",
+    "--gcs-address=10.1.4.164:53198",
+    "--session-name=session_2025-07-20_15-50-27_532689_108378",
+    "--temp-dir=/tmp/ray",
+    "--webui=127.0.0.1:8265",
+    "--cluster-id=0645951f301c3c6261e21e7675494d0f66f53c7471d7ae62b1bfef71",
+    "--startup-token=64",
+    "--worker-launch-time-ms=1752997830261",
+    "--node-id=e12fae92f6ce3564c51cacc15e7a9d534fa0201dde3c0167c6930682",
+    "--runtime-env-hash=-115784934",
+    "--enable-resource-isolation=false"
+  ],
+  "program":  "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
+  "git":  {
+    "remote":  "https://github.com/PorUna-byte/PAR.git",
+    "commit":  "b8caf406aa1699c788f0ca6e44a1769452c317db"
+  },
+  "root":  "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
+  "host":  "dsw-266702-557cd69888-g24kv",
+  "executable":  "/root/miniconda3/envs/easyr1-new/bin/python3",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA A800-SXM4-80GB",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1623302262784",
+      "used":  "1178353664"
+    }
+  },
+  "memory":  {
+    "total":  "549755813888"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-dd05c780-0a19-17fd-c584-d09f4318f680"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-2782427d-b86d-e7c7-247f-edefe39eec71"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-52755b3d-d761-c869-0d08-300873ba5f02"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-90465488-5319-9508-9e8a-b3b12918be35"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a"
+    }
+  ],
+  "cudaVersion":  "12.1",
+  "writerId":  "7lqwpfhgfxthb7o9vn643sdvq899ymp6"
+}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..fd4fcc824f7b4b3e2c10ffdd8d5e20c51ed5d80b
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/logs/debug-internal.log
@@ -0,0 +1,8 @@
+{"time":"2025-07-20T15:52:10.714178248+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-20T15:52:40.821708731+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers)"}
+{"time":"2025-07-20T15:52:56.997058402+08:00","level":"INFO","msg":"stream: created new stream","id":"dm3qeysw"}
+{"time":"2025-07-20T15:52:56.997814652+08:00","level":"INFO","msg":"stream: started","id":"dm3qeysw"}
+{"time":"2025-07-20T15:52:56.997837116+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"dm3qeysw"}
+{"time":"2025-07-20T15:52:56.997856956+08:00","level":"INFO","msg":"sender: started","stream_id":"dm3qeysw"}
+{"time":"2025-07-20T15:52:56.997885717+08:00","level":"INFO","msg":"handler: started","stream_id":"dm3qeysw"}
+{"time":"2025-07-20T15:58:24.003309141+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/dm3qeysw/file_stream\": read tcp 10.1.4.164:41206->172.67.193.61:443: read: connection timed out"}
diff --git a/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..e900a277f657cee9e69343e39551e800a4e3338f
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/logs/debug.log
@@ -0,0 +1,21 @@
+2025-07-20 15:52:10,498 INFO    MainThread:113131 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-20 15:52:10,498 INFO    MainThread:113131 [wandb_setup.py:_flush():80] Configure stats pid to 113131
+2025-07-20 15:52:10,498 INFO    MainThread:113131 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-20 15:52:10,498 INFO    MainThread:113131 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-20 15:52:10,498 INFO    MainThread:113131 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-20 15:52:10,498 INFO    MainThread:113131 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/logs/debug.log
+2025-07-20 15:52:10,498 INFO    MainThread:113131 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/logs/debug-internal.log
+2025-07-20 15:52:10,498 INFO    MainThread:113131 [wandb_init.py:init():830] calling init triggers
+2025-07-20 15:52:10,498 INFO    MainThread:113131 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-20 15:52:10,498 INFO    MainThread:113131 [wandb_init.py:init():871] starting backend
+2025-07-20 15:52:10,705 INFO    MainThread:113131 [wandb_init.py:init():874] sending inform_init request
+2025-07-20 15:52:10,707 INFO    MainThread:113131 [wandb_init.py:init():882] backend started and connected
+2025-07-20 15:52:10,710 INFO    MainThread:113131 [wandb_init.py:init():953] updated telemetry
+2025-07-20 15:52:10,741 INFO    MainThread:113131 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-20 15:53:00,791 INFO    MainThread:113131 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-20 15:53:00,983 INFO    MainThread:113131 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-20 15:53:00,983 INFO    MainThread:113131 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-20 15:53:00,987 INFO    MainThread:113131 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-20 15:53:00,987 INFO    MainThread:113131 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-20 15:53:00,993 INFO    MainThread:113131 [wandb_init.py:init():1075] run started, returning control to user process
diff --git a/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/run-dm3qeysw.wandb b/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/run-dm3qeysw.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..3c1e47133c6a38adc3b17429053042b940e40fdd
Binary files /dev/null and b/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/run-dm3qeysw.wandb differ
diff --git a/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/files/output.log b/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..b2c680d12edde77af722c62529d379986abfb732
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/files/output.log
@@ -0,0 +1,2 @@
+{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.4.164", "pid": 151637, "uuid": "68188b32248f482d861ba5b2d8e18d46", "closed": false}
+Start validation...
diff --git a/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/files/requirements.txt
@@ -0,0 +1,295 @@
+colorama==0.4.6
+psutil==7.0.0
+setproctitle==1.2.2
+ipython==8.37.0
+gitdb==4.0.12
+smmap==5.0.2
+pyzmq==27.0.0
+wcwidth==0.2.13
+antlr4-python3-runtime==4.9.3
+streamlit==1.46.1
+opentelemetry-proto==1.26.0
+tiktoken==0.9.0
+MarkupSafe==3.0.2
+openai==1.90.0
+jiter==0.10.0
+markdown-it-py==3.0.0
+rich-toolkit==0.14.8
+PyYAML==6.0.2
+pycountry==24.6.1
+nvidia-cusolver-cu12==11.6.1.9
+codetiming==1.4.0
+text-unidecode==1.3
+aiohttp-cors==0.8.1
+prometheus_client==0.22.1
+pandas==2.3.1
+wrapt==1.17.2
+tifffile==2025.5.10
+vllm==0.8.5.post1
+google-auth==2.40.3
+nvidia-curand-cu12==10.3.5.147
+networkx==3.4.2
+protobuf==4.25.8
+depyf==0.18.0
+altair==5.5.0
+wandb==0.21.0
+opentelemetry-sdk==1.26.0
+nvidia-cufft-cu12==11.2.1.3
+frozenlist==1.7.0
+ninja==1.11.1.4
+anyio==4.9.0
+rignore==0.6.2
+pydantic-extra-types==2.10.5
+tzdata==2025.2
+orjson==3.10.18
+smart_open==7.3.0.post1
+nvidia-cublas-cu12==12.4.5.8
+astor==0.8.1
+uvicorn==0.35.0
+sentry-sdk==2.32.0
+weasel==0.4.1
+opencensus==0.11.4
+certifi==2025.7.14
+nvidia-cuda-nvrtc-cu12==12.4.127
+cupy-cuda12x==13.5.1
+jedi==0.19.2
+GitPython==3.1.44
+xgrammar==0.1.18
+sniffio==1.3.1
+dill==0.3.8
+python-json-logger==3.3.0
+peft==0.16.0
+python-slugify==8.0.4
+watchfiles==1.1.0
+torchaudio==2.6.0
+omegaconf==2.3.0
+interegular==0.3.3
+torchmetrics==1.7.4
+tenacity==9.1.2
+async-timeout==5.0.1
+pybase64==1.4.1
+tqdm==4.67.1
+confection==0.1.5
+absl-py==2.3.1
+six==1.17.0
+colorful==0.5.7
+xxhash==3.5.0
+regex==2024.11.6
+nvidia-cuda-runtime-cu12==12.4.127
+pip==25.1
+annotated-types==0.7.0
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+salesforce-lavis==1.0.2
+fastapi-cloud-cli==0.1.4
+av==15.0.0
+nvidia-nvjitlink-cu12==12.4.127
+lightning-utilities==0.14.3
+pytz==2025.2
+webdataset==1.0.2
+cachetools==5.5.2
+nltk==3.9.1
+prometheus-fastapi-instrumentator==7.1.0
+pexpect==4.9.0
+jsonschema-specifications==2025.4.1
+Jinja2==3.1.6
+nvidia-cusparse-cu12==12.3.1.170
+nvidia-cudnn-cu12==9.1.0.70
+decord==0.6.0
+transformers==4.52.4
+dnspython==2.7.0
+joblib==1.5.1
+kaggle==1.7.4.5
+pyasn1_modules==0.4.2
+httpcore==1.0.9
+accelerate==1.8.1
+psutil==7.0.0
+pycocotools==2.0.10
+lm-format-enforcer==0.10.11
+liger_kernel==0.6.0
+googleapis-common-protos==1.70.0
+idna==3.10
+aiohappyeyeballs==2.6.1
+numba==0.61.2
+tornado==6.5.1
+opentelemetry-semantic-conventions==0.47b0
+torchvision==0.21.0
+exceptiongroup==1.3.0
+cfgv==3.4.0
+py-cpuinfo==9.0.0
+murmurhash==1.0.13
+pillow==11.3.0
+asttokens==3.0.0
+spacy==3.8.7
+blinker==1.9.0
+llguidance==0.7.30
+fastapi==0.116.1
+python-dateutil==2.9.0.post0
+prompt_toolkit==3.0.51
+opentelemetry-api==1.26.0
+referencing==0.36.2
+Pygments==2.19.2
+mpmath==1.3.0
+thinc==8.3.6
+multidict==6.6.3
+python-magic==0.4.27
+fairscale==0.4.4
+nodeenv==1.9.1
+mathruler==0.1.0
+identify==2.6.12
+multiprocess==0.70.16
+ftfy==6.3.1
+spacy-legacy==3.0.12
+rsa==4.9.1
+cymem==2.0.11
+flash-attn==2.7.1.post1
+typing-inspection==0.4.1
+nvidia-cufile-cu12==1.11.1.6
+filelock==3.18.0
+jsonschema==4.24.0
+language_data==1.3.0
+iopath==0.1.10
+cloudpickle==3.1.1
+pre_commit==4.2.0
+python-multipart==0.0.20
+gguf==0.17.1
+toml==0.10.2
+lazy_loader==0.4
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+opencv-python-headless==4.12.0.88
+rouge_score==0.1.2
+portalocker==3.2.0
+diskcache==5.6.3
+pycocoevalcap==1.2
+mdurl==0.1.2
+pure_eval==0.2.3
+ray==2.47.1
+langcodes==3.5.0
+distlib==0.3.9
+pydeck==0.9.1
+traitlets==5.14.3
+aiohttp==3.12.14
+decorator==5.2.1
+opentelemetry-exporter-otlp-proto-http==1.26.0
+verl==0.3.2.dev0
+fsspec==2025.3.0
+pydantic_core==2.33.2
+matplotlib-inline==0.1.7
+httpx==0.28.1
+fastrlock==0.8.3
+zipp==3.23.0
+aiosignal==1.4.0
+uvloop==0.21.0
+opendatasets==0.1.22
+python-dotenv==1.1.1
+attrs==25.3.0
+starlette==0.47.1
+distro==1.9.0
+pyasn1==0.6.1
+plotly==6.2.0
+opencensus-context==0.1.3
+datasets==4.0.0
+bleach==6.2.0
+hf-xet==1.1.5
+pyvers==0.1.0
+rich==14.0.0
+pylatexenc==2.10
+tensordict==0.9.1
+urllib3==2.5.0
+imageio==2.37.0
+platformdirs==4.3.8
+preshed==3.0.10
+catalogue==2.0.10
+h11==0.16.0
+outlines_core==0.1.26
+wasabi==1.1.3
+proto-plus==1.26.1
+scikit-image==0.25.2
+blis==1.3.0
+fastapi-cli==0.0.8
+opentelemetry-exporter-prometheus==0.56b0
+opentelemetry-exporter-otlp==1.26.0
+compressed-tensors==0.9.3
+pyarrow==20.0.0
+opentelemetry-semantic-conventions-ai==0.4.11
+partial-json-parser==0.2.1.1.post6
+parso==0.8.4
+importlib_metadata==8.0.0
+tokenizers==0.21.2
+opentelemetry-exporter-otlp-proto-common==1.26.0
+torchdata==0.11.0
+py-spy==0.4.0
+propcache==0.3.2
+braceexpand==0.1.7
+numpy==2.2.6
+cloudpathlib==0.21.1
+email_validator==2.2.0
+srsly==2.5.1
+webencodings==0.5.1
+airportsdata==20250706
+rpds-py==0.26.0
+outlines==0.1.11
+packaging==25.0
+yarl==1.20.1
+nvidia-cuda-cupti-cu12==12.4.127
+typing_extensions==4.14.1
+pydantic==2.11.7
+xformers==0.0.29.post2
+einops==0.8.1
+grpcio==1.73.1
+setuptools==78.1.1
+httptools==0.6.4
+nvidia-nccl-cu12==2.21.5
+Deprecated==1.2.18
+ptyprocess==0.7.0
+websockets==15.0.1
+torch==2.6.0
+scipy==1.15.3
+typer==0.16.0
+pytorch-lightning==2.5.2
+virtualenv==20.31.2
+huggingface-hub==0.33.4
+contexttimer==0.3.3
+marisa-trie==1.2.1
+shellingham==1.5.4
+charset-normalizer==3.4.2
+nest-asyncio==1.6.0
+executing==2.2.0
+stack-data==0.6.3
+msgpack==1.1.1
+blake3==1.0.5
+narwhals==1.47.0
+sentencepiece==0.2.0
+llvmlite==0.44.0
+click==8.2.1
+lark==1.2.2
+google-api-core==2.25.1
+sympy==1.13.1
+wheel==0.45.1
+safetensors==0.5.3
+mistral_common==1.8.0
+triton==3.2.0
+msgspec==0.19.0
+watchdog==6.0.0
+requests==2.32.4
+spacy-loggers==1.0.5
+timm==0.4.12
+qwen-vl-utils==0.0.11
+modelscope==1.28.0
+verl==0.3.2.dev0
+jaraco.text==3.12.1
+autocommand==2.2.2
+packaging==24.2
+jaraco.context==5.3.0
+tomli==2.0.1
+typeguard==4.3.0
+zipp==3.19.2
+backports.tarfile==1.2.0
+typing_extensions==4.12.2
+jaraco.collections==5.1.0
+inflect==7.3.1
+more-itertools==10.3.0
+jaraco.functools==4.0.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+wheel==0.45.1
diff --git a/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..59fda2af506801cb6cf9178edb33bd421e73dcd1
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/files/wandb-metadata.json
@@ -0,0 +1,92 @@
+{
+  "os":  "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.0",
+  "startedAt":  "2025-07-20T09:49:59.866734Z",
+  "args":  [
+    "--node-ip-address=10.1.4.164",
+    "--node-manager-port=36215",
+    "--object-store-name=/tmp/ray/session_2025-07-20_17-48-21_273251_146876/sockets/plasma_store",
+    "--raylet-name=/tmp/ray/session_2025-07-20_17-48-21_273251_146876/sockets/raylet",
+    "--redis-address=None",
+    "--metrics-agent-port=64359",
+    "--logging-rotate-bytes=536870912",
+    "--logging-rotate-backup-count=5",
+    "--runtime-env-agent-port=59077",
+    "--gcs-address=10.1.4.164:54394",
+    "--session-name=session_2025-07-20_17-48-21_273251_146876",
+    "--temp-dir=/tmp/ray",
+    "--webui=127.0.0.1:8265",
+    "--cluster-id=36b9d744a71ae29c87604d3621452693c69c1a8bc6bd98b6b84851b7",
+    "--startup-token=64",
+    "--worker-launch-time-ms=1753004903788",
+    "--node-id=c275be38bd3059a0f90ff0010a5a1c29e1de2704c979f89ff841713a",
+    "--runtime-env-hash=-115784934",
+    "--enable-resource-isolation=false"
+  ],
+  "program":  "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
+  "git":  {
+    "remote":  "https://github.com/PorUna-byte/PAR.git",
+    "commit":  "b8caf406aa1699c788f0ca6e44a1769452c317db"
+  },
+  "email":  "gia0603yucca@gmail.com",
+  "root":  "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
+  "host":  "dsw-266702-557cd69888-g24kv",
+  "executable":  "/root/miniconda3/envs/easyr1-new/bin/python3",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA A800-SXM4-80GB",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1623302262784",
+      "used":  "1178603520"
+    }
+  },
+  "memory":  {
+    "total":  "549755813888"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-dd05c780-0a19-17fd-c584-d09f4318f680"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-2782427d-b86d-e7c7-247f-edefe39eec71"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-52755b3d-d761-c869-0d08-300873ba5f02"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-90465488-5319-9508-9e8a-b3b12918be35"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a"
+    }
+  ],
+  "cudaVersion":  "12.1",
+  "writerId":  "7dtryqcd9js64s789ba7bibty7wfdyw3"
+}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..96402a170fa118b6a127924738df09f239f1d16c
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/logs/debug-internal.log
@@ -0,0 +1,6 @@
+{"time":"2025-07-20T17:50:00.335547599+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-20T17:50:04.993221807+08:00","level":"INFO","msg":"stream: created new stream","id":"sl6d9zx4"}
+{"time":"2025-07-20T17:50:04.998155148+08:00","level":"INFO","msg":"stream: started","id":"sl6d9zx4"}
+{"time":"2025-07-20T17:50:04.998174332+08:00","level":"INFO","msg":"handler: started","stream_id":"sl6d9zx4"}
+{"time":"2025-07-20T17:50:04.998227883+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"sl6d9zx4"}
+{"time":"2025-07-20T17:50:04.998235218+08:00","level":"INFO","msg":"sender: started","stream_id":"sl6d9zx4"}
diff --git a/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..4f3c5dfe168d38274f6db0db0d44caf37e1f7fa5
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/logs/debug.log
@@ -0,0 +1,21 @@
+2025-07-20 17:50:00,093 INFO    MainThread:151637 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-20 17:50:00,093 INFO    MainThread:151637 [wandb_setup.py:_flush():80] Configure stats pid to 151637
+2025-07-20 17:50:00,093 INFO    MainThread:151637 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-20 17:50:00,093 INFO    MainThread:151637 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-20 17:50:00,093 INFO    MainThread:151637 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-20 17:50:00,094 INFO    MainThread:151637 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/logs/debug.log
+2025-07-20 17:50:00,094 INFO    MainThread:151637 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/logs/debug-internal.log
+2025-07-20 17:50:00,094 INFO    MainThread:151637 [wandb_init.py:init():830] calling init triggers
+2025-07-20 17:50:00,094 INFO    MainThread:151637 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-20 17:50:00,094 INFO    MainThread:151637 [wandb_init.py:init():871] starting backend
+2025-07-20 17:50:00,302 INFO    MainThread:151637 [wandb_init.py:init():874] sending inform_init request
+2025-07-20 17:50:00,304 INFO    MainThread:151637 [wandb_init.py:init():882] backend started and connected
+2025-07-20 17:50:00,311 INFO    MainThread:151637 [wandb_init.py:init():953] updated telemetry
+2025-07-20 17:50:00,394 INFO    MainThread:151637 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-20 17:50:06,920 INFO    MainThread:151637 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-20 17:50:07,098 INFO    MainThread:151637 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-20 17:50:07,098 INFO    MainThread:151637 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-20 17:50:07,102 INFO    MainThread:151637 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-20 17:50:07,102 INFO    MainThread:151637 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-20 17:50:07,106 INFO    MainThread:151637 [wandb_init.py:init():1075] run started, returning control to user process
diff --git a/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/run-sl6d9zx4.wandb b/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/run-sl6d9zx4.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/files/output.log b/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..3ff37b56d793f70783e6b035485a9140a3041271
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/files/output.log
@@ -0,0 +1,2 @@
+{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.4.164", "pid": 160093, "uuid": "1e1a1ad89e384a9e982686b42345ad53", "closed": false}
+Start validation...
diff --git a/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/files/requirements.txt
@@ -0,0 +1,295 @@
+colorama==0.4.6
+psutil==7.0.0
+setproctitle==1.2.2
+ipython==8.37.0
+gitdb==4.0.12
+smmap==5.0.2
+pyzmq==27.0.0
+wcwidth==0.2.13
+antlr4-python3-runtime==4.9.3
+streamlit==1.46.1
+opentelemetry-proto==1.26.0
+tiktoken==0.9.0
+MarkupSafe==3.0.2
+openai==1.90.0
+jiter==0.10.0
+markdown-it-py==3.0.0
+rich-toolkit==0.14.8
+PyYAML==6.0.2
+pycountry==24.6.1
+nvidia-cusolver-cu12==11.6.1.9
+codetiming==1.4.0
+text-unidecode==1.3
+aiohttp-cors==0.8.1
+prometheus_client==0.22.1
+pandas==2.3.1
+wrapt==1.17.2
+tifffile==2025.5.10
+vllm==0.8.5.post1
+google-auth==2.40.3
+nvidia-curand-cu12==10.3.5.147
+networkx==3.4.2
+protobuf==4.25.8
+depyf==0.18.0
+altair==5.5.0
+wandb==0.21.0
+opentelemetry-sdk==1.26.0
+nvidia-cufft-cu12==11.2.1.3
+frozenlist==1.7.0
+ninja==1.11.1.4
+anyio==4.9.0
+rignore==0.6.2
+pydantic-extra-types==2.10.5
+tzdata==2025.2
+orjson==3.10.18
+smart_open==7.3.0.post1
+nvidia-cublas-cu12==12.4.5.8
+astor==0.8.1
+uvicorn==0.35.0
+sentry-sdk==2.32.0
+weasel==0.4.1
+opencensus==0.11.4
+certifi==2025.7.14
+nvidia-cuda-nvrtc-cu12==12.4.127
+cupy-cuda12x==13.5.1
+jedi==0.19.2
+GitPython==3.1.44
+xgrammar==0.1.18
+sniffio==1.3.1
+dill==0.3.8
+python-json-logger==3.3.0
+peft==0.16.0
+python-slugify==8.0.4
+watchfiles==1.1.0
+torchaudio==2.6.0
+omegaconf==2.3.0
+interegular==0.3.3
+torchmetrics==1.7.4
+tenacity==9.1.2
+async-timeout==5.0.1
+pybase64==1.4.1
+tqdm==4.67.1
+confection==0.1.5
+absl-py==2.3.1
+six==1.17.0
+colorful==0.5.7
+xxhash==3.5.0
+regex==2024.11.6
+nvidia-cuda-runtime-cu12==12.4.127
+pip==25.1
+annotated-types==0.7.0
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+salesforce-lavis==1.0.2
+fastapi-cloud-cli==0.1.4
+av==15.0.0
+nvidia-nvjitlink-cu12==12.4.127
+lightning-utilities==0.14.3
+pytz==2025.2
+webdataset==1.0.2
+cachetools==5.5.2
+nltk==3.9.1
+prometheus-fastapi-instrumentator==7.1.0
+pexpect==4.9.0
+jsonschema-specifications==2025.4.1
+Jinja2==3.1.6
+nvidia-cusparse-cu12==12.3.1.170
+nvidia-cudnn-cu12==9.1.0.70
+decord==0.6.0
+transformers==4.52.4
+dnspython==2.7.0
+joblib==1.5.1
+kaggle==1.7.4.5
+pyasn1_modules==0.4.2
+httpcore==1.0.9
+accelerate==1.8.1
+psutil==7.0.0
+pycocotools==2.0.10
+lm-format-enforcer==0.10.11
+liger_kernel==0.6.0
+googleapis-common-protos==1.70.0
+idna==3.10
+aiohappyeyeballs==2.6.1
+numba==0.61.2
+tornado==6.5.1
+opentelemetry-semantic-conventions==0.47b0
+torchvision==0.21.0
+exceptiongroup==1.3.0
+cfgv==3.4.0
+py-cpuinfo==9.0.0
+murmurhash==1.0.13
+pillow==11.3.0
+asttokens==3.0.0
+spacy==3.8.7
+blinker==1.9.0
+llguidance==0.7.30
+fastapi==0.116.1
+python-dateutil==2.9.0.post0
+prompt_toolkit==3.0.51
+opentelemetry-api==1.26.0
+referencing==0.36.2
+Pygments==2.19.2
+mpmath==1.3.0
+thinc==8.3.6
+multidict==6.6.3
+python-magic==0.4.27
+fairscale==0.4.4
+nodeenv==1.9.1
+mathruler==0.1.0
+identify==2.6.12
+multiprocess==0.70.16
+ftfy==6.3.1
+spacy-legacy==3.0.12
+rsa==4.9.1
+cymem==2.0.11
+flash-attn==2.7.1.post1
+typing-inspection==0.4.1
+nvidia-cufile-cu12==1.11.1.6
+filelock==3.18.0
+jsonschema==4.24.0
+language_data==1.3.0
+iopath==0.1.10
+cloudpickle==3.1.1
+pre_commit==4.2.0
+python-multipart==0.0.20
+gguf==0.17.1
+toml==0.10.2
+lazy_loader==0.4
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+opencv-python-headless==4.12.0.88
+rouge_score==0.1.2
+portalocker==3.2.0
+diskcache==5.6.3
+pycocoevalcap==1.2
+mdurl==0.1.2
+pure_eval==0.2.3
+ray==2.47.1
+langcodes==3.5.0
+distlib==0.3.9
+pydeck==0.9.1
+traitlets==5.14.3
+aiohttp==3.12.14
+decorator==5.2.1
+opentelemetry-exporter-otlp-proto-http==1.26.0
+verl==0.3.2.dev0
+fsspec==2025.3.0
+pydantic_core==2.33.2
+matplotlib-inline==0.1.7
+httpx==0.28.1
+fastrlock==0.8.3
+zipp==3.23.0
+aiosignal==1.4.0
+uvloop==0.21.0
+opendatasets==0.1.22
+python-dotenv==1.1.1
+attrs==25.3.0
+starlette==0.47.1
+distro==1.9.0
+pyasn1==0.6.1
+plotly==6.2.0
+opencensus-context==0.1.3
+datasets==4.0.0
+bleach==6.2.0
+hf-xet==1.1.5
+pyvers==0.1.0
+rich==14.0.0
+pylatexenc==2.10
+tensordict==0.9.1
+urllib3==2.5.0
+imageio==2.37.0
+platformdirs==4.3.8
+preshed==3.0.10
+catalogue==2.0.10
+h11==0.16.0
+outlines_core==0.1.26
+wasabi==1.1.3
+proto-plus==1.26.1
+scikit-image==0.25.2
+blis==1.3.0
+fastapi-cli==0.0.8
+opentelemetry-exporter-prometheus==0.56b0
+opentelemetry-exporter-otlp==1.26.0
+compressed-tensors==0.9.3
+pyarrow==20.0.0
+opentelemetry-semantic-conventions-ai==0.4.11
+partial-json-parser==0.2.1.1.post6
+parso==0.8.4
+importlib_metadata==8.0.0
+tokenizers==0.21.2
+opentelemetry-exporter-otlp-proto-common==1.26.0
+torchdata==0.11.0
+py-spy==0.4.0
+propcache==0.3.2
+braceexpand==0.1.7
+numpy==2.2.6
+cloudpathlib==0.21.1
+email_validator==2.2.0
+srsly==2.5.1
+webencodings==0.5.1
+airportsdata==20250706
+rpds-py==0.26.0
+outlines==0.1.11
+packaging==25.0
+yarl==1.20.1
+nvidia-cuda-cupti-cu12==12.4.127
+typing_extensions==4.14.1
+pydantic==2.11.7
+xformers==0.0.29.post2
+einops==0.8.1
+grpcio==1.73.1
+setuptools==78.1.1
+httptools==0.6.4
+nvidia-nccl-cu12==2.21.5
+Deprecated==1.2.18
+ptyprocess==0.7.0
+websockets==15.0.1
+torch==2.6.0
+scipy==1.15.3
+typer==0.16.0
+pytorch-lightning==2.5.2
+virtualenv==20.31.2
+huggingface-hub==0.33.4
+contexttimer==0.3.3
+marisa-trie==1.2.1
+shellingham==1.5.4
+charset-normalizer==3.4.2
+nest-asyncio==1.6.0
+executing==2.2.0
+stack-data==0.6.3
+msgpack==1.1.1
+blake3==1.0.5
+narwhals==1.47.0
+sentencepiece==0.2.0
+llvmlite==0.44.0
+click==8.2.1
+lark==1.2.2
+google-api-core==2.25.1
+sympy==1.13.1
+wheel==0.45.1
+safetensors==0.5.3
+mistral_common==1.8.0
+triton==3.2.0
+msgspec==0.19.0
+watchdog==6.0.0
+requests==2.32.4
+spacy-loggers==1.0.5
+timm==0.4.12
+qwen-vl-utils==0.0.11
+modelscope==1.28.0
+verl==0.3.2.dev0
+jaraco.text==3.12.1
+autocommand==2.2.2
+packaging==24.2
+jaraco.context==5.3.0
+tomli==2.0.1
+typeguard==4.3.0
+zipp==3.19.2
+backports.tarfile==1.2.0
+typing_extensions==4.12.2
+jaraco.collections==5.1.0
+inflect==7.3.1
+more-itertools==10.3.0
+jaraco.functools==4.0.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+wheel==0.45.1
diff --git a/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..63cf50ee5c2cc5d556d9c3359bb56324bd4d43a6
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/files/wandb-metadata.json
@@ -0,0 +1,91 @@
+{
+  "os":  "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.0",
+  "startedAt":  "2025-07-20T09:54:52.474738Z",
+  "args":  [
+    "--node-ip-address=10.1.4.164",
+    "--node-manager-port=32789",
+    "--object-store-name=/tmp/ray/session_2025-07-20_17-53-07_376621_155325/sockets/plasma_store",
+    "--raylet-name=/tmp/ray/session_2025-07-20_17-53-07_376621_155325/sockets/raylet",
+    "--redis-address=None",
+    "--metrics-agent-port=58284",
+    "--logging-rotate-bytes=536870912",
+    "--logging-rotate-backup-count=5",
+    "--runtime-env-agent-port=36321",
+    "--gcs-address=10.1.4.164:59631",
+    "--session-name=session_2025-07-20_17-53-07_376621_155325",
+    "--temp-dir=/tmp/ray",
+    "--webui=127.0.0.1:8265",
+    "--cluster-id=dbea8f527f9086b064609c5db5aed7aea24688b11023eee9c47c3537",
+    "--startup-token=64",
+    "--worker-launch-time-ms=1753005189877",
+    "--node-id=7c8c28532c4a6185133a03080497308e2d83d07893be32110c9f7adb",
+    "--runtime-env-hash=-115784934",
+    "--enable-resource-isolation=false"
+  ],
+  "program":  "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
+  "git":  {
+    "remote":  "https://github.com/PorUna-byte/PAR.git",
+    "commit":  "b8caf406aa1699c788f0ca6e44a1769452c317db"
+  },
+  "root":  "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
+  "host":  "dsw-266702-557cd69888-g24kv",
+  "executable":  "/root/miniconda3/envs/easyr1-new/bin/python3",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA A800-SXM4-80GB",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1623302262784",
+      "used":  "1178615808"
+    }
+  },
+  "memory":  {
+    "total":  "549755813888"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-dd05c780-0a19-17fd-c584-d09f4318f680"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-2782427d-b86d-e7c7-247f-edefe39eec71"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-52755b3d-d761-c869-0d08-300873ba5f02"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-90465488-5319-9508-9e8a-b3b12918be35"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a"
+    }
+  ],
+  "cudaVersion":  "12.1",
+  "writerId":  "hnz4fmdpp9zbobdudzhrp438wu35xx1q"
+}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..dcbff365ec41d12cd4edf278ab15daba9e9746f2
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/logs/debug-internal.log
@@ -0,0 +1,7 @@
+{"time":"2025-07-20T17:54:52.871556297+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-20T17:55:14.368699132+08:00","level":"INFO","msg":"stream: created new stream","id":"u3x8mk80"}
+{"time":"2025-07-20T17:55:14.369557756+08:00","level":"INFO","msg":"stream: started","id":"u3x8mk80"}
+{"time":"2025-07-20T17:55:14.369581991+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"u3x8mk80"}
+{"time":"2025-07-20T17:55:14.369590948+08:00","level":"INFO","msg":"sender: started","stream_id":"u3x8mk80"}
+{"time":"2025-07-20T17:55:14.369632862+08:00","level":"INFO","msg":"handler: started","stream_id":"u3x8mk80"}
+{"time":"2025-07-20T17:55:39.968202666+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/u3x8mk80/file_stream\": unexpected EOF"}
diff --git a/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..76200ae1c4fed5f840c44ccf7c91d06fc37a685b
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/logs/debug.log
@@ -0,0 +1,21 @@
+2025-07-20 17:54:52,631 INFO    MainThread:160093 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-20 17:54:52,632 INFO    MainThread:160093 [wandb_setup.py:_flush():80] Configure stats pid to 160093
+2025-07-20 17:54:52,632 INFO    MainThread:160093 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-20 17:54:52,632 INFO    MainThread:160093 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-20 17:54:52,632 INFO    MainThread:160093 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-20 17:54:52,632 INFO    MainThread:160093 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/logs/debug.log
+2025-07-20 17:54:52,632 INFO    MainThread:160093 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/logs/debug-internal.log
+2025-07-20 17:54:52,632 INFO    MainThread:160093 [wandb_init.py:init():830] calling init triggers
+2025-07-20 17:54:52,632 INFO    MainThread:160093 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-20 17:54:52,632 INFO    MainThread:160093 [wandb_init.py:init():871] starting backend
+2025-07-20 17:54:52,845 INFO    MainThread:160093 [wandb_init.py:init():874] sending inform_init request
+2025-07-20 17:54:52,846 INFO    MainThread:160093 [wandb_init.py:init():882] backend started and connected
+2025-07-20 17:54:52,856 INFO    MainThread:160093 [wandb_init.py:init():953] updated telemetry
+2025-07-20 17:54:52,931 INFO    MainThread:160093 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-20 17:55:16,136 INFO    MainThread:160093 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-20 17:55:16,309 INFO    MainThread:160093 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-20 17:55:16,310 INFO    MainThread:160093 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-20 17:55:16,368 INFO    MainThread:160093 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-20 17:55:16,377 INFO    MainThread:160093 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-20 17:55:16,381 INFO    MainThread:160093 [wandb_init.py:init():1075] run started, returning control to user process
diff --git a/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/run-u3x8mk80.wandb b/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/run-u3x8mk80.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/files/output.log b/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..123ab12e0b9dd7525db98404c151e33db9aa43ff
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/files/output.log
@@ -0,0 +1,2 @@
+{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.4.164", "pid": 169088, "uuid": "ab4049b6358b42718ec3f59ff589dde1", "closed": false}
+Start validation...
diff --git a/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/files/requirements.txt
@@ -0,0 +1,295 @@
+colorama==0.4.6
+psutil==7.0.0
+setproctitle==1.2.2
+ipython==8.37.0
+gitdb==4.0.12
+smmap==5.0.2
+pyzmq==27.0.0
+wcwidth==0.2.13
+antlr4-python3-runtime==4.9.3
+streamlit==1.46.1
+opentelemetry-proto==1.26.0
+tiktoken==0.9.0
+MarkupSafe==3.0.2
+openai==1.90.0
+jiter==0.10.0
+markdown-it-py==3.0.0
+rich-toolkit==0.14.8
+PyYAML==6.0.2
+pycountry==24.6.1
+nvidia-cusolver-cu12==11.6.1.9
+codetiming==1.4.0
+text-unidecode==1.3
+aiohttp-cors==0.8.1
+prometheus_client==0.22.1
+pandas==2.3.1
+wrapt==1.17.2
+tifffile==2025.5.10
+vllm==0.8.5.post1
+google-auth==2.40.3
+nvidia-curand-cu12==10.3.5.147
+networkx==3.4.2
+protobuf==4.25.8
+depyf==0.18.0
+altair==5.5.0
+wandb==0.21.0
+opentelemetry-sdk==1.26.0
+nvidia-cufft-cu12==11.2.1.3
+frozenlist==1.7.0
+ninja==1.11.1.4
+anyio==4.9.0
+rignore==0.6.2
+pydantic-extra-types==2.10.5
+tzdata==2025.2
+orjson==3.10.18
+smart_open==7.3.0.post1
+nvidia-cublas-cu12==12.4.5.8
+astor==0.8.1
+uvicorn==0.35.0
+sentry-sdk==2.32.0
+weasel==0.4.1
+opencensus==0.11.4
+certifi==2025.7.14
+nvidia-cuda-nvrtc-cu12==12.4.127
+cupy-cuda12x==13.5.1
+jedi==0.19.2
+GitPython==3.1.44
+xgrammar==0.1.18
+sniffio==1.3.1
+dill==0.3.8
+python-json-logger==3.3.0
+peft==0.16.0
+python-slugify==8.0.4
+watchfiles==1.1.0
+torchaudio==2.6.0
+omegaconf==2.3.0
+interegular==0.3.3
+torchmetrics==1.7.4
+tenacity==9.1.2
+async-timeout==5.0.1
+pybase64==1.4.1
+tqdm==4.67.1
+confection==0.1.5
+absl-py==2.3.1
+six==1.17.0
+colorful==0.5.7
+xxhash==3.5.0
+regex==2024.11.6
+nvidia-cuda-runtime-cu12==12.4.127
+pip==25.1
+annotated-types==0.7.0
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+salesforce-lavis==1.0.2
+fastapi-cloud-cli==0.1.4
+av==15.0.0
+nvidia-nvjitlink-cu12==12.4.127
+lightning-utilities==0.14.3
+pytz==2025.2
+webdataset==1.0.2
+cachetools==5.5.2
+nltk==3.9.1
+prometheus-fastapi-instrumentator==7.1.0
+pexpect==4.9.0
+jsonschema-specifications==2025.4.1
+Jinja2==3.1.6
+nvidia-cusparse-cu12==12.3.1.170
+nvidia-cudnn-cu12==9.1.0.70
+decord==0.6.0
+transformers==4.52.4
+dnspython==2.7.0
+joblib==1.5.1
+kaggle==1.7.4.5
+pyasn1_modules==0.4.2
+httpcore==1.0.9
+accelerate==1.8.1
+psutil==7.0.0
+pycocotools==2.0.10
+lm-format-enforcer==0.10.11
+liger_kernel==0.6.0
+googleapis-common-protos==1.70.0
+idna==3.10
+aiohappyeyeballs==2.6.1
+numba==0.61.2
+tornado==6.5.1
+opentelemetry-semantic-conventions==0.47b0
+torchvision==0.21.0
+exceptiongroup==1.3.0
+cfgv==3.4.0
+py-cpuinfo==9.0.0
+murmurhash==1.0.13
+pillow==11.3.0
+asttokens==3.0.0
+spacy==3.8.7
+blinker==1.9.0
+llguidance==0.7.30
+fastapi==0.116.1
+python-dateutil==2.9.0.post0
+prompt_toolkit==3.0.51
+opentelemetry-api==1.26.0
+referencing==0.36.2
+Pygments==2.19.2
+mpmath==1.3.0
+thinc==8.3.6
+multidict==6.6.3
+python-magic==0.4.27
+fairscale==0.4.4
+nodeenv==1.9.1
+mathruler==0.1.0
+identify==2.6.12
+multiprocess==0.70.16
+ftfy==6.3.1
+spacy-legacy==3.0.12
+rsa==4.9.1
+cymem==2.0.11
+flash-attn==2.7.1.post1
+typing-inspection==0.4.1
+nvidia-cufile-cu12==1.11.1.6
+filelock==3.18.0
+jsonschema==4.24.0
+language_data==1.3.0
+iopath==0.1.10
+cloudpickle==3.1.1
+pre_commit==4.2.0
+python-multipart==0.0.20
+gguf==0.17.1
+toml==0.10.2
+lazy_loader==0.4
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+opencv-python-headless==4.12.0.88
+rouge_score==0.1.2
+portalocker==3.2.0
+diskcache==5.6.3
+pycocoevalcap==1.2
+mdurl==0.1.2
+pure_eval==0.2.3
+ray==2.47.1
+langcodes==3.5.0
+distlib==0.3.9
+pydeck==0.9.1
+traitlets==5.14.3
+aiohttp==3.12.14
+decorator==5.2.1
+opentelemetry-exporter-otlp-proto-http==1.26.0
+verl==0.3.2.dev0
+fsspec==2025.3.0
+pydantic_core==2.33.2
+matplotlib-inline==0.1.7
+httpx==0.28.1
+fastrlock==0.8.3
+zipp==3.23.0
+aiosignal==1.4.0
+uvloop==0.21.0
+opendatasets==0.1.22
+python-dotenv==1.1.1
+attrs==25.3.0
+starlette==0.47.1
+distro==1.9.0
+pyasn1==0.6.1
+plotly==6.2.0
+opencensus-context==0.1.3
+datasets==4.0.0
+bleach==6.2.0
+hf-xet==1.1.5
+pyvers==0.1.0
+rich==14.0.0
+pylatexenc==2.10
+tensordict==0.9.1
+urllib3==2.5.0
+imageio==2.37.0
+platformdirs==4.3.8
+preshed==3.0.10
+catalogue==2.0.10
+h11==0.16.0
+outlines_core==0.1.26
+wasabi==1.1.3
+proto-plus==1.26.1
+scikit-image==0.25.2
+blis==1.3.0
+fastapi-cli==0.0.8
+opentelemetry-exporter-prometheus==0.56b0
+opentelemetry-exporter-otlp==1.26.0
+compressed-tensors==0.9.3
+pyarrow==20.0.0
+opentelemetry-semantic-conventions-ai==0.4.11
+partial-json-parser==0.2.1.1.post6
+parso==0.8.4
+importlib_metadata==8.0.0
+tokenizers==0.21.2
+opentelemetry-exporter-otlp-proto-common==1.26.0
+torchdata==0.11.0
+py-spy==0.4.0
+propcache==0.3.2
+braceexpand==0.1.7
+numpy==2.2.6
+cloudpathlib==0.21.1
+email_validator==2.2.0
+srsly==2.5.1
+webencodings==0.5.1
+airportsdata==20250706
+rpds-py==0.26.0
+outlines==0.1.11
+packaging==25.0
+yarl==1.20.1
+nvidia-cuda-cupti-cu12==12.4.127
+typing_extensions==4.14.1
+pydantic==2.11.7
+xformers==0.0.29.post2
+einops==0.8.1
+grpcio==1.73.1
+setuptools==78.1.1
+httptools==0.6.4
+nvidia-nccl-cu12==2.21.5
+Deprecated==1.2.18
+ptyprocess==0.7.0
+websockets==15.0.1
+torch==2.6.0
+scipy==1.15.3
+typer==0.16.0
+pytorch-lightning==2.5.2
+virtualenv==20.31.2
+huggingface-hub==0.33.4
+contexttimer==0.3.3
+marisa-trie==1.2.1
+shellingham==1.5.4
+charset-normalizer==3.4.2
+nest-asyncio==1.6.0
+executing==2.2.0
+stack-data==0.6.3
+msgpack==1.1.1
+blake3==1.0.5
+narwhals==1.47.0
+sentencepiece==0.2.0
+llvmlite==0.44.0
+click==8.2.1
+lark==1.2.2
+google-api-core==2.25.1
+sympy==1.13.1
+wheel==0.45.1
+safetensors==0.5.3
+mistral_common==1.8.0
+triton==3.2.0
+msgspec==0.19.0
+watchdog==6.0.0
+requests==2.32.4
+spacy-loggers==1.0.5
+timm==0.4.12
+qwen-vl-utils==0.0.11
+modelscope==1.28.0
+verl==0.3.2.dev0
+jaraco.text==3.12.1
+autocommand==2.2.2
+packaging==24.2
+jaraco.context==5.3.0
+tomli==2.0.1
+typeguard==4.3.0
+zipp==3.19.2
+backports.tarfile==1.2.0
+typing_extensions==4.12.2
+jaraco.collections==5.1.0
+inflect==7.3.1
+more-itertools==10.3.0
+jaraco.functools==4.0.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+wheel==0.45.1
diff --git a/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..5c2fcaebb44b064a31ae45e94c760ef691a3ebf5
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/files/wandb-metadata.json
@@ -0,0 +1,91 @@
+{
+  "os":  "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.0",
+  "startedAt":  "2025-07-20T10:01:35.121306Z",
+  "args":  [
+    "--node-ip-address=10.1.4.164",
+    "--node-manager-port=41203",
+    "--object-store-name=/tmp/ray/session_2025-07-20_17-59-50_542822_164318/sockets/plasma_store",
+    "--raylet-name=/tmp/ray/session_2025-07-20_17-59-50_542822_164318/sockets/raylet",
+    "--redis-address=None",
+    "--metrics-agent-port=61732",
+    "--logging-rotate-bytes=536870912",
+    "--logging-rotate-backup-count=5",
+    "--runtime-env-agent-port=53515",
+    "--gcs-address=10.1.4.164:57575",
+    "--session-name=session_2025-07-20_17-59-50_542822_164318",
+    "--temp-dir=/tmp/ray",
+    "--webui=127.0.0.1:8265",
+    "--cluster-id=c3d4af086c5d1d1af37df7b0ae84b5563d0811c6b79f2452a68d02fb",
+    "--startup-token=64",
+    "--worker-launch-time-ms=1753005592994",
+    "--node-id=89f3cd99cf596d5dba4bc8d8f07acb16cf7c97aeb23632bbb2b637f1",
+    "--runtime-env-hash=-115784934",
+    "--enable-resource-isolation=false"
+  ],
+  "program":  "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
+  "git":  {
+    "remote":  "https://github.com/PorUna-byte/PAR.git",
+    "commit":  "b8caf406aa1699c788f0ca6e44a1769452c317db"
+  },
+  "root":  "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
+  "host":  "dsw-266702-557cd69888-g24kv",
+  "executable":  "/root/miniconda3/envs/easyr1-new/bin/python3",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA A800-SXM4-80GB",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1623302262784",
+      "used":  "1178652672"
+    }
+  },
+  "memory":  {
+    "total":  "549755813888"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-dd05c780-0a19-17fd-c584-d09f4318f680"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-2782427d-b86d-e7c7-247f-edefe39eec71"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-52755b3d-d761-c869-0d08-300873ba5f02"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-90465488-5319-9508-9e8a-b3b12918be35"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a"
+    }
+  ],
+  "cudaVersion":  "12.1",
+  "writerId":  "fa8gi1wr876xad5m76att7408yfvwm84"
+}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..d3f629b61c773a20c0e243b9f8e3bc10b388ae37
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/logs/debug-internal.log
@@ -0,0 +1,8 @@
+{"time":"2025-07-20T18:01:35.480814623+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-20T18:02:05.589240176+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
+{"time":"2025-07-20T18:02:13.132238778+08:00","level":"INFO","msg":"stream: created new stream","id":"m27ujde6"}
+{"time":"2025-07-20T18:02:13.133859178+08:00","level":"INFO","msg":"handler: started","stream_id":"m27ujde6"}
+{"time":"2025-07-20T18:02:13.134071767+08:00","level":"INFO","msg":"stream: started","id":"m27ujde6"}
+{"time":"2025-07-20T18:02:13.134098714+08:00","level":"INFO","msg":"sender: started","stream_id":"m27ujde6"}
+{"time":"2025-07-20T18:02:13.134089006+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"m27ujde6"}
+{"time":"2025-07-20T18:08:58.115407689+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/m27ujde6/file_stream\": read tcp 10.1.4.164:48990->172.67.193.61:443: read: connection timed out"}
diff --git a/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..e02b522c5ef29ff8c18fbb45f45e2324d1486386
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/logs/debug.log
@@ -0,0 +1,21 @@
+2025-07-20 18:01:35,258 INFO    MainThread:169088 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-20 18:01:35,258 INFO    MainThread:169088 [wandb_setup.py:_flush():80] Configure stats pid to 169088
+2025-07-20 18:01:35,258 INFO    MainThread:169088 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-20 18:01:35,258 INFO    MainThread:169088 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-20 18:01:35,258 INFO    MainThread:169088 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-20 18:01:35,258 INFO    MainThread:169088 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/logs/debug.log
+2025-07-20 18:01:35,258 INFO    MainThread:169088 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/logs/debug-internal.log
+2025-07-20 18:01:35,258 INFO    MainThread:169088 [wandb_init.py:init():830] calling init triggers
+2025-07-20 18:01:35,258 INFO    MainThread:169088 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-20 18:01:35,259 INFO    MainThread:169088 [wandb_init.py:init():871] starting backend
+2025-07-20 18:01:35,468 INFO    MainThread:169088 [wandb_init.py:init():874] sending inform_init request
+2025-07-20 18:01:35,470 INFO    MainThread:169088 [wandb_init.py:init():882] backend started and connected
+2025-07-20 18:01:35,475 INFO    MainThread:169088 [wandb_init.py:init():953] updated telemetry
+2025-07-20 18:01:35,528 INFO    MainThread:169088 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-20 18:02:16,165 INFO    MainThread:169088 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-20 18:02:16,369 INFO    MainThread:169088 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-20 18:02:16,369 INFO    MainThread:169088 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-20 18:02:16,373 INFO    MainThread:169088 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-20 18:02:16,373 INFO    MainThread:169088 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-20 18:02:16,376 INFO    MainThread:169088 [wandb_init.py:init():1075] run started, returning control to user process
diff --git a/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/run-m27ujde6.wandb b/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/run-m27ujde6.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..827a007cf3ac5a9a3ccbf066a7d52b6a49f801cc
Binary files /dev/null and b/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/run-m27ujde6.wandb differ
diff --git a/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/files/output.log b/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..cdb5545831b8fb1c8c626d8d0857a3e5e10d2ea2
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/files/output.log
@@ -0,0 +1,2 @@
+{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 1, "unit": "it", "ip": "10.1.4.164", "pid": 179677, "uuid": "ed6b32be02f24016a6d7ea58bd716567", "closed": false}
+Start validation...
diff --git a/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/files/requirements.txt
@@ -0,0 +1,295 @@
+colorama==0.4.6
+psutil==7.0.0
+setproctitle==1.2.2
+ipython==8.37.0
+gitdb==4.0.12
+smmap==5.0.2
+pyzmq==27.0.0
+wcwidth==0.2.13
+antlr4-python3-runtime==4.9.3
+streamlit==1.46.1
+opentelemetry-proto==1.26.0
+tiktoken==0.9.0
+MarkupSafe==3.0.2
+openai==1.90.0
+jiter==0.10.0
+markdown-it-py==3.0.0
+rich-toolkit==0.14.8
+PyYAML==6.0.2
+pycountry==24.6.1
+nvidia-cusolver-cu12==11.6.1.9
+codetiming==1.4.0
+text-unidecode==1.3
+aiohttp-cors==0.8.1
+prometheus_client==0.22.1
+pandas==2.3.1
+wrapt==1.17.2
+tifffile==2025.5.10
+vllm==0.8.5.post1
+google-auth==2.40.3
+nvidia-curand-cu12==10.3.5.147
+networkx==3.4.2
+protobuf==4.25.8
+depyf==0.18.0
+altair==5.5.0
+wandb==0.21.0
+opentelemetry-sdk==1.26.0
+nvidia-cufft-cu12==11.2.1.3
+frozenlist==1.7.0
+ninja==1.11.1.4
+anyio==4.9.0
+rignore==0.6.2
+pydantic-extra-types==2.10.5
+tzdata==2025.2
+orjson==3.10.18
+smart_open==7.3.0.post1
+nvidia-cublas-cu12==12.4.5.8
+astor==0.8.1
+uvicorn==0.35.0
+sentry-sdk==2.32.0
+weasel==0.4.1
+opencensus==0.11.4
+certifi==2025.7.14
+nvidia-cuda-nvrtc-cu12==12.4.127
+cupy-cuda12x==13.5.1
+jedi==0.19.2
+GitPython==3.1.44
+xgrammar==0.1.18
+sniffio==1.3.1
+dill==0.3.8
+python-json-logger==3.3.0
+peft==0.16.0
+python-slugify==8.0.4
+watchfiles==1.1.0
+torchaudio==2.6.0
+omegaconf==2.3.0
+interegular==0.3.3
+torchmetrics==1.7.4
+tenacity==9.1.2
+async-timeout==5.0.1
+pybase64==1.4.1
+tqdm==4.67.1
+confection==0.1.5
+absl-py==2.3.1
+six==1.17.0
+colorful==0.5.7
+xxhash==3.5.0
+regex==2024.11.6
+nvidia-cuda-runtime-cu12==12.4.127
+pip==25.1
+annotated-types==0.7.0
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+salesforce-lavis==1.0.2
+fastapi-cloud-cli==0.1.4
+av==15.0.0
+nvidia-nvjitlink-cu12==12.4.127
+lightning-utilities==0.14.3
+pytz==2025.2
+webdataset==1.0.2
+cachetools==5.5.2
+nltk==3.9.1
+prometheus-fastapi-instrumentator==7.1.0
+pexpect==4.9.0
+jsonschema-specifications==2025.4.1
+Jinja2==3.1.6
+nvidia-cusparse-cu12==12.3.1.170
+nvidia-cudnn-cu12==9.1.0.70
+decord==0.6.0
+transformers==4.52.4
+dnspython==2.7.0
+joblib==1.5.1
+kaggle==1.7.4.5
+pyasn1_modules==0.4.2
+httpcore==1.0.9
+accelerate==1.8.1
+psutil==7.0.0
+pycocotools==2.0.10
+lm-format-enforcer==0.10.11
+liger_kernel==0.6.0
+googleapis-common-protos==1.70.0
+idna==3.10
+aiohappyeyeballs==2.6.1
+numba==0.61.2
+tornado==6.5.1
+opentelemetry-semantic-conventions==0.47b0
+torchvision==0.21.0
+exceptiongroup==1.3.0
+cfgv==3.4.0
+py-cpuinfo==9.0.0
+murmurhash==1.0.13
+pillow==11.3.0
+asttokens==3.0.0
+spacy==3.8.7
+blinker==1.9.0
+llguidance==0.7.30
+fastapi==0.116.1
+python-dateutil==2.9.0.post0
+prompt_toolkit==3.0.51
+opentelemetry-api==1.26.0
+referencing==0.36.2
+Pygments==2.19.2
+mpmath==1.3.0
+thinc==8.3.6
+multidict==6.6.3
+python-magic==0.4.27
+fairscale==0.4.4
+nodeenv==1.9.1
+mathruler==0.1.0
+identify==2.6.12
+multiprocess==0.70.16
+ftfy==6.3.1
+spacy-legacy==3.0.12
+rsa==4.9.1
+cymem==2.0.11
+flash-attn==2.7.1.post1
+typing-inspection==0.4.1
+nvidia-cufile-cu12==1.11.1.6
+filelock==3.18.0
+jsonschema==4.24.0
+language_data==1.3.0
+iopath==0.1.10
+cloudpickle==3.1.1
+pre_commit==4.2.0
+python-multipart==0.0.20
+gguf==0.17.1
+toml==0.10.2
+lazy_loader==0.4
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+opencv-python-headless==4.12.0.88
+rouge_score==0.1.2
+portalocker==3.2.0
+diskcache==5.6.3
+pycocoevalcap==1.2
+mdurl==0.1.2
+pure_eval==0.2.3
+ray==2.47.1
+langcodes==3.5.0
+distlib==0.3.9
+pydeck==0.9.1
+traitlets==5.14.3
+aiohttp==3.12.14
+decorator==5.2.1
+opentelemetry-exporter-otlp-proto-http==1.26.0
+verl==0.3.2.dev0
+fsspec==2025.3.0
+pydantic_core==2.33.2
+matplotlib-inline==0.1.7
+httpx==0.28.1
+fastrlock==0.8.3
+zipp==3.23.0
+aiosignal==1.4.0
+uvloop==0.21.0
+opendatasets==0.1.22
+python-dotenv==1.1.1
+attrs==25.3.0
+starlette==0.47.1
+distro==1.9.0
+pyasn1==0.6.1
+plotly==6.2.0
+opencensus-context==0.1.3
+datasets==4.0.0
+bleach==6.2.0
+hf-xet==1.1.5
+pyvers==0.1.0
+rich==14.0.0
+pylatexenc==2.10
+tensordict==0.9.1
+urllib3==2.5.0
+imageio==2.37.0
+platformdirs==4.3.8
+preshed==3.0.10
+catalogue==2.0.10
+h11==0.16.0
+outlines_core==0.1.26
+wasabi==1.1.3
+proto-plus==1.26.1
+scikit-image==0.25.2
+blis==1.3.0
+fastapi-cli==0.0.8
+opentelemetry-exporter-prometheus==0.56b0
+opentelemetry-exporter-otlp==1.26.0
+compressed-tensors==0.9.3
+pyarrow==20.0.0
+opentelemetry-semantic-conventions-ai==0.4.11
+partial-json-parser==0.2.1.1.post6
+parso==0.8.4
+importlib_metadata==8.0.0
+tokenizers==0.21.2
+opentelemetry-exporter-otlp-proto-common==1.26.0
+torchdata==0.11.0
+py-spy==0.4.0
+propcache==0.3.2
+braceexpand==0.1.7
+numpy==2.2.6
+cloudpathlib==0.21.1
+email_validator==2.2.0
+srsly==2.5.1
+webencodings==0.5.1
+airportsdata==20250706
+rpds-py==0.26.0
+outlines==0.1.11
+packaging==25.0
+yarl==1.20.1
+nvidia-cuda-cupti-cu12==12.4.127
+typing_extensions==4.14.1
+pydantic==2.11.7
+xformers==0.0.29.post2
+einops==0.8.1
+grpcio==1.73.1
+setuptools==78.1.1
+httptools==0.6.4
+nvidia-nccl-cu12==2.21.5
+Deprecated==1.2.18
+ptyprocess==0.7.0
+websockets==15.0.1
+torch==2.6.0
+scipy==1.15.3
+typer==0.16.0
+pytorch-lightning==2.5.2
+virtualenv==20.31.2
+huggingface-hub==0.33.4
+contexttimer==0.3.3
+marisa-trie==1.2.1
+shellingham==1.5.4
+charset-normalizer==3.4.2
+nest-asyncio==1.6.0
+executing==2.2.0
+stack-data==0.6.3
+msgpack==1.1.1
+blake3==1.0.5
+narwhals==1.47.0
+sentencepiece==0.2.0
+llvmlite==0.44.0
+click==8.2.1
+lark==1.2.2
+google-api-core==2.25.1
+sympy==1.13.1
+wheel==0.45.1
+safetensors==0.5.3
+mistral_common==1.8.0
+triton==3.2.0
+msgspec==0.19.0
+watchdog==6.0.0
+requests==2.32.4
+spacy-loggers==1.0.5
+timm==0.4.12
+qwen-vl-utils==0.0.11
+modelscope==1.28.0
+verl==0.3.2.dev0
+jaraco.text==3.12.1
+autocommand==2.2.2
+packaging==24.2
+jaraco.context==5.3.0
+tomli==2.0.1
+typeguard==4.3.0
+zipp==3.19.2
+backports.tarfile==1.2.0
+typing_extensions==4.12.2
+jaraco.collections==5.1.0
+inflect==7.3.1
+more-itertools==10.3.0
+jaraco.functools==4.0.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+wheel==0.45.1
diff --git a/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..05bf02ba431928cb3d2d81014ed15de3d114f7f7
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/files/wandb-metadata.json
@@ -0,0 +1,92 @@
+{
+  "os":  "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.0",
+  "startedAt":  "2025-07-20T10:12:46.823746Z",
+  "args":  [
+    "--node-ip-address=10.1.4.164",
+    "--node-manager-port=46025",
+    "--object-store-name=/tmp/ray/session_2025-07-20_18-09-53_125496_174925/sockets/plasma_store",
+    "--raylet-name=/tmp/ray/session_2025-07-20_18-09-53_125496_174925/sockets/raylet",
+    "--redis-address=None",
+    "--metrics-agent-port=41545",
+    "--logging-rotate-bytes=536870912",
+    "--logging-rotate-backup-count=5",
+    "--runtime-env-agent-port=39916",
+    "--gcs-address=10.1.4.164:60493",
+    "--session-name=session_2025-07-20_18-09-53_125496_174925",
+    "--temp-dir=/tmp/ray",
+    "--webui=127.0.0.1:8265",
+    "--cluster-id=2b81a5b7050804e85740b145be89d8f47b8b557be40acba82e5226d9",
+    "--startup-token=64",
+    "--worker-launch-time-ms=1753006195919",
+    "--node-id=812b9ba1b07bc192ee99fedd03ebc5d4bcab96a2124b55aca302465b",
+    "--runtime-env-hash=-115784934",
+    "--enable-resource-isolation=false"
+  ],
+  "program":  "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
+  "git":  {
+    "remote":  "https://github.com/PorUna-byte/PAR.git",
+    "commit":  "b8caf406aa1699c788f0ca6e44a1769452c317db"
+  },
+  "email":  "gia0603yucca@gmail.com",
+  "root":  "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
+  "host":  "dsw-266702-557cd69888-g24kv",
+  "executable":  "/root/miniconda3/envs/easyr1-new/bin/python3",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA A800-SXM4-80GB",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1623302262784",
+      "used":  "1179250688"
+    }
+  },
+  "memory":  {
+    "total":  "549755813888"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-dd05c780-0a19-17fd-c584-d09f4318f680"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-2782427d-b86d-e7c7-247f-edefe39eec71"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-52755b3d-d761-c869-0d08-300873ba5f02"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-90465488-5319-9508-9e8a-b3b12918be35"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a"
+    }
+  ],
+  "cudaVersion":  "12.1",
+  "writerId":  "jca15im09rvgmqcdbyu8ijz6wttj46fx"
+}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..2d2cec0c5dd5de8d44b8f9cf6aed426d6deeaae1
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/logs/debug-internal.log
@@ -0,0 +1,6 @@
+{"time":"2025-07-20T18:12:47.187575955+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-20T18:12:50.614283993+08:00","level":"INFO","msg":"stream: created new stream","id":"89mv3lt4"}
+{"time":"2025-07-20T18:12:50.615230535+08:00","level":"INFO","msg":"stream: started","id":"89mv3lt4"}
+{"time":"2025-07-20T18:12:50.615255588+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"89mv3lt4"}
+{"time":"2025-07-20T18:12:50.615279379+08:00","level":"INFO","msg":"sender: started","stream_id":"89mv3lt4"}
+{"time":"2025-07-20T18:12:50.615332536+08:00","level":"INFO","msg":"handler: started","stream_id":"89mv3lt4"}
diff --git a/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..8b7506e1aeea69eace0c6173664d7162688ba3e4
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/logs/debug.log
@@ -0,0 +1,21 @@
+2025-07-20 18:12:46,958 INFO    MainThread:179677 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-20 18:12:46,958 INFO    MainThread:179677 [wandb_setup.py:_flush():80] Configure stats pid to 179677
+2025-07-20 18:12:46,958 INFO    MainThread:179677 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-20 18:12:46,958 INFO    MainThread:179677 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-20 18:12:46,958 INFO    MainThread:179677 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-20 18:12:46,958 INFO    MainThread:179677 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/logs/debug.log
+2025-07-20 18:12:46,958 INFO    MainThread:179677 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/logs/debug-internal.log
+2025-07-20 18:12:46,958 INFO    MainThread:179677 [wandb_init.py:init():830] calling init triggers
+2025-07-20 18:12:46,958 INFO    MainThread:179677 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-20 18:12:46,958 INFO    MainThread:179677 [wandb_init.py:init():871] starting backend
+2025-07-20 18:12:47,172 INFO    MainThread:179677 [wandb_init.py:init():874] sending inform_init request
+2025-07-20 18:12:47,173 INFO    MainThread:179677 [wandb_init.py:init():882] backend started and connected
+2025-07-20 18:12:47,176 INFO    MainThread:179677 [wandb_init.py:init():953] updated telemetry
+2025-07-20 18:12:47,295 INFO    MainThread:179677 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-20 18:12:51,947 INFO    MainThread:179677 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-20 18:12:52,164 INFO    MainThread:179677 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-20 18:12:52,164 INFO    MainThread:179677 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-20 18:12:52,165 INFO    MainThread:179677 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-20 18:12:52,165 INFO    MainThread:179677 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-20 18:12:52,167 INFO    MainThread:179677 [wandb_init.py:init():1075] run started, returning control to user process
diff --git a/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/run-89mv3lt4.wandb b/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/run-89mv3lt4.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/files/output.log b/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..8e478900dbdb07580c457d558258752ecbf79130
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/files/output.log
@@ -0,0 +1,2 @@
+{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 1, "unit": "it", "ip": "10.1.4.164", "pid": 193580, "uuid": "0c2f3bce130f48f0a7237c58a684eeb5", "closed": false}
+Start validation...
diff --git a/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/files/requirements.txt
@@ -0,0 +1,295 @@
+colorama==0.4.6
+psutil==7.0.0
+setproctitle==1.2.2
+ipython==8.37.0
+gitdb==4.0.12
+smmap==5.0.2
+pyzmq==27.0.0
+wcwidth==0.2.13
+antlr4-python3-runtime==4.9.3
+streamlit==1.46.1
+opentelemetry-proto==1.26.0
+tiktoken==0.9.0
+MarkupSafe==3.0.2
+openai==1.90.0
+jiter==0.10.0
+markdown-it-py==3.0.0
+rich-toolkit==0.14.8
+PyYAML==6.0.2
+pycountry==24.6.1
+nvidia-cusolver-cu12==11.6.1.9
+codetiming==1.4.0
+text-unidecode==1.3
+aiohttp-cors==0.8.1
+prometheus_client==0.22.1
+pandas==2.3.1
+wrapt==1.17.2
+tifffile==2025.5.10
+vllm==0.8.5.post1
+google-auth==2.40.3
+nvidia-curand-cu12==10.3.5.147
+networkx==3.4.2
+protobuf==4.25.8
+depyf==0.18.0
+altair==5.5.0
+wandb==0.21.0
+opentelemetry-sdk==1.26.0
+nvidia-cufft-cu12==11.2.1.3
+frozenlist==1.7.0
+ninja==1.11.1.4
+anyio==4.9.0
+rignore==0.6.2
+pydantic-extra-types==2.10.5
+tzdata==2025.2
+orjson==3.10.18
+smart_open==7.3.0.post1
+nvidia-cublas-cu12==12.4.5.8
+astor==0.8.1
+uvicorn==0.35.0
+sentry-sdk==2.32.0
+weasel==0.4.1
+opencensus==0.11.4
+certifi==2025.7.14
+nvidia-cuda-nvrtc-cu12==12.4.127
+cupy-cuda12x==13.5.1
+jedi==0.19.2
+GitPython==3.1.44
+xgrammar==0.1.18
+sniffio==1.3.1
+dill==0.3.8
+python-json-logger==3.3.0
+peft==0.16.0
+python-slugify==8.0.4
+watchfiles==1.1.0
+torchaudio==2.6.0
+omegaconf==2.3.0
+interegular==0.3.3
+torchmetrics==1.7.4
+tenacity==9.1.2
+async-timeout==5.0.1
+pybase64==1.4.1
+tqdm==4.67.1
+confection==0.1.5
+absl-py==2.3.1
+six==1.17.0
+colorful==0.5.7
+xxhash==3.5.0
+regex==2024.11.6
+nvidia-cuda-runtime-cu12==12.4.127
+pip==25.1
+annotated-types==0.7.0
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+salesforce-lavis==1.0.2
+fastapi-cloud-cli==0.1.4
+av==15.0.0
+nvidia-nvjitlink-cu12==12.4.127
+lightning-utilities==0.14.3
+pytz==2025.2
+webdataset==1.0.2
+cachetools==5.5.2
+nltk==3.9.1
+prometheus-fastapi-instrumentator==7.1.0
+pexpect==4.9.0
+jsonschema-specifications==2025.4.1
+Jinja2==3.1.6
+nvidia-cusparse-cu12==12.3.1.170
+nvidia-cudnn-cu12==9.1.0.70
+decord==0.6.0
+transformers==4.52.4
+dnspython==2.7.0
+joblib==1.5.1
+kaggle==1.7.4.5
+pyasn1_modules==0.4.2
+httpcore==1.0.9
+accelerate==1.8.1
+psutil==7.0.0
+pycocotools==2.0.10
+lm-format-enforcer==0.10.11
+liger_kernel==0.6.0
+googleapis-common-protos==1.70.0
+idna==3.10
+aiohappyeyeballs==2.6.1
+numba==0.61.2
+tornado==6.5.1
+opentelemetry-semantic-conventions==0.47b0
+torchvision==0.21.0
+exceptiongroup==1.3.0
+cfgv==3.4.0
+py-cpuinfo==9.0.0
+murmurhash==1.0.13
+pillow==11.3.0
+asttokens==3.0.0
+spacy==3.8.7
+blinker==1.9.0
+llguidance==0.7.30
+fastapi==0.116.1
+python-dateutil==2.9.0.post0
+prompt_toolkit==3.0.51
+opentelemetry-api==1.26.0
+referencing==0.36.2
+Pygments==2.19.2
+mpmath==1.3.0
+thinc==8.3.6
+multidict==6.6.3
+python-magic==0.4.27
+fairscale==0.4.4
+nodeenv==1.9.1
+mathruler==0.1.0
+identify==2.6.12
+multiprocess==0.70.16
+ftfy==6.3.1
+spacy-legacy==3.0.12
+rsa==4.9.1
+cymem==2.0.11
+flash-attn==2.7.1.post1
+typing-inspection==0.4.1
+nvidia-cufile-cu12==1.11.1.6
+filelock==3.18.0
+jsonschema==4.24.0
+language_data==1.3.0
+iopath==0.1.10
+cloudpickle==3.1.1
+pre_commit==4.2.0
+python-multipart==0.0.20
+gguf==0.17.1
+toml==0.10.2
+lazy_loader==0.4
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+opencv-python-headless==4.12.0.88
+rouge_score==0.1.2
+portalocker==3.2.0
+diskcache==5.6.3
+pycocoevalcap==1.2
+mdurl==0.1.2
+pure_eval==0.2.3
+ray==2.47.1
+langcodes==3.5.0
+distlib==0.3.9
+pydeck==0.9.1
+traitlets==5.14.3
+aiohttp==3.12.14
+decorator==5.2.1
+opentelemetry-exporter-otlp-proto-http==1.26.0
+verl==0.3.2.dev0
+fsspec==2025.3.0
+pydantic_core==2.33.2
+matplotlib-inline==0.1.7
+httpx==0.28.1
+fastrlock==0.8.3
+zipp==3.23.0
+aiosignal==1.4.0
+uvloop==0.21.0
+opendatasets==0.1.22
+python-dotenv==1.1.1
+attrs==25.3.0
+starlette==0.47.1
+distro==1.9.0
+pyasn1==0.6.1
+plotly==6.2.0
+opencensus-context==0.1.3
+datasets==4.0.0
+bleach==6.2.0
+hf-xet==1.1.5
+pyvers==0.1.0
+rich==14.0.0
+pylatexenc==2.10
+tensordict==0.9.1
+urllib3==2.5.0
+imageio==2.37.0
+platformdirs==4.3.8
+preshed==3.0.10
+catalogue==2.0.10
+h11==0.16.0
+outlines_core==0.1.26
+wasabi==1.1.3
+proto-plus==1.26.1
+scikit-image==0.25.2
+blis==1.3.0
+fastapi-cli==0.0.8
+opentelemetry-exporter-prometheus==0.56b0
+opentelemetry-exporter-otlp==1.26.0
+compressed-tensors==0.9.3
+pyarrow==20.0.0
+opentelemetry-semantic-conventions-ai==0.4.11
+partial-json-parser==0.2.1.1.post6
+parso==0.8.4
+importlib_metadata==8.0.0
+tokenizers==0.21.2
+opentelemetry-exporter-otlp-proto-common==1.26.0
+torchdata==0.11.0
+py-spy==0.4.0
+propcache==0.3.2
+braceexpand==0.1.7
+numpy==2.2.6
+cloudpathlib==0.21.1
+email_validator==2.2.0
+srsly==2.5.1
+webencodings==0.5.1
+airportsdata==20250706
+rpds-py==0.26.0
+outlines==0.1.11
+packaging==25.0
+yarl==1.20.1
+nvidia-cuda-cupti-cu12==12.4.127
+typing_extensions==4.14.1
+pydantic==2.11.7
+xformers==0.0.29.post2
+einops==0.8.1
+grpcio==1.73.1
+setuptools==78.1.1
+httptools==0.6.4
+nvidia-nccl-cu12==2.21.5
+Deprecated==1.2.18
+ptyprocess==0.7.0
+websockets==15.0.1
+torch==2.6.0
+scipy==1.15.3
+typer==0.16.0
+pytorch-lightning==2.5.2
+virtualenv==20.31.2
+huggingface-hub==0.33.4
+contexttimer==0.3.3
+marisa-trie==1.2.1
+shellingham==1.5.4
+charset-normalizer==3.4.2
+nest-asyncio==1.6.0
+executing==2.2.0
+stack-data==0.6.3
+msgpack==1.1.1
+blake3==1.0.5
+narwhals==1.47.0
+sentencepiece==0.2.0
+llvmlite==0.44.0
+click==8.2.1
+lark==1.2.2
+google-api-core==2.25.1
+sympy==1.13.1
+wheel==0.45.1
+safetensors==0.5.3
+mistral_common==1.8.0
+triton==3.2.0
+msgspec==0.19.0
+watchdog==6.0.0
+requests==2.32.4
+spacy-loggers==1.0.5
+timm==0.4.12
+qwen-vl-utils==0.0.11
+modelscope==1.28.0
+verl==0.3.2.dev0
+jaraco.text==3.12.1
+autocommand==2.2.2
+packaging==24.2
+jaraco.context==5.3.0
+tomli==2.0.1
+typeguard==4.3.0
+zipp==3.19.2
+backports.tarfile==1.2.0
+typing_extensions==4.12.2
+jaraco.collections==5.1.0
+inflect==7.3.1
+more-itertools==10.3.0
+jaraco.functools==4.0.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+wheel==0.45.1
diff --git a/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..aab8b81730a8803066b8c3a1a5e8e85dc21f271c
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/files/wandb-metadata.json
@@ -0,0 +1,91 @@
+{
+  "os":  "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.0",
+  "startedAt":  "2025-07-20T10:38:04.454833Z",
+  "args":  [
+    "--node-ip-address=10.1.4.164",
+    "--node-manager-port=33775",
+    "--object-store-name=/tmp/ray/session_2025-07-20_18-36-18_073010_188810/sockets/plasma_store",
+    "--raylet-name=/tmp/ray/session_2025-07-20_18-36-18_073010_188810/sockets/raylet",
+    "--redis-address=None",
+    "--metrics-agent-port=45288",
+    "--logging-rotate-bytes=536870912",
+    "--logging-rotate-backup-count=5",
+    "--runtime-env-agent-port=56348",
+    "--gcs-address=10.1.4.164:64298",
+    "--session-name=session_2025-07-20_18-36-18_073010_188810",
+    "--temp-dir=/tmp/ray",
+    "--webui=127.0.0.1:8265",
+    "--cluster-id=653b197ce741c102801a48d4315a065ffeb33d7d9d7a1fb24af03e3f",
+    "--startup-token=64",
+    "--worker-launch-time-ms=1753007780685",
+    "--node-id=f18941412ee5f4fe4a1ee5dc9df15d495c6cf10e7fd83f7a16ec63a8",
+    "--runtime-env-hash=-115784934",
+    "--enable-resource-isolation=false"
+  ],
+  "program":  "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
+  "git":  {
+    "remote":  "https://github.com/PorUna-byte/PAR.git",
+    "commit":  "b8caf406aa1699c788f0ca6e44a1769452c317db"
+  },
+  "root":  "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
+  "host":  "dsw-266702-557cd69888-g24kv",
+  "executable":  "/root/miniconda3/envs/easyr1-new/bin/python3",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA A800-SXM4-80GB",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1623302262784",
+      "used":  "1179459584"
+    }
+  },
+  "memory":  {
+    "total":  "549755813888"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-dd05c780-0a19-17fd-c584-d09f4318f680"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-2782427d-b86d-e7c7-247f-edefe39eec71"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-52755b3d-d761-c869-0d08-300873ba5f02"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-90465488-5319-9508-9e8a-b3b12918be35"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a"
+    }
+  ],
+  "cudaVersion":  "12.1",
+  "writerId":  "hprpvicj1ru54tiz927ygpyb2z630bnt"
+}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..63029ba30bc2636bc2541ee269129d546cd10373
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/logs/debug-internal.log
@@ -0,0 +1,6 @@
+{"time":"2025-07-20T18:38:05.03357076+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-20T18:38:10.341722047+08:00","level":"INFO","msg":"stream: created new stream","id":"82cvf14y"}
+{"time":"2025-07-20T18:38:10.343036806+08:00","level":"INFO","msg":"stream: started","id":"82cvf14y"}
+{"time":"2025-07-20T18:38:10.343072051+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"82cvf14y"}
+{"time":"2025-07-20T18:38:10.343124908+08:00","level":"INFO","msg":"handler: started","stream_id":"82cvf14y"}
+{"time":"2025-07-20T18:38:10.343080828+08:00","level":"INFO","msg":"sender: started","stream_id":"82cvf14y"}
diff --git a/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..dbd328c518d9107b764f20dd3ddca1aa007be074
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/logs/debug.log
@@ -0,0 +1,21 @@
+2025-07-20 18:38:04,777 INFO    MainThread:193580 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-20 18:38:04,777 INFO    MainThread:193580 [wandb_setup.py:_flush():80] Configure stats pid to 193580
+2025-07-20 18:38:04,777 INFO    MainThread:193580 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-20 18:38:04,777 INFO    MainThread:193580 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-20 18:38:04,777 INFO    MainThread:193580 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-20 18:38:04,777 INFO    MainThread:193580 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/logs/debug.log
+2025-07-20 18:38:04,778 INFO    MainThread:193580 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/logs/debug-internal.log
+2025-07-20 18:38:04,778 INFO    MainThread:193580 [wandb_init.py:init():830] calling init triggers
+2025-07-20 18:38:04,778 INFO    MainThread:193580 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-20 18:38:04,778 INFO    MainThread:193580 [wandb_init.py:init():871] starting backend
+2025-07-20 18:38:05,019 INFO    MainThread:193580 [wandb_init.py:init():874] sending inform_init request
+2025-07-20 18:38:05,021 INFO    MainThread:193580 [wandb_init.py:init():882] backend started and connected
+2025-07-20 18:38:05,028 INFO    MainThread:193580 [wandb_init.py:init():953] updated telemetry
+2025-07-20 18:38:05,097 INFO    MainThread:193580 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-20 18:38:11,543 INFO    MainThread:193580 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-20 18:38:11,737 INFO    MainThread:193580 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-20 18:38:11,737 INFO    MainThread:193580 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-20 18:38:11,744 INFO    MainThread:193580 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-20 18:38:11,744 INFO    MainThread:193580 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-20 18:38:11,764 INFO    MainThread:193580 [wandb_init.py:init():1075] run started, returning control to user process
diff --git a/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/run-82cvf14y.wandb b/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/run-82cvf14y.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/files/output.log b/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..7b0d3f160e4c6794d0d5c3531b987a2a80b75559
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/files/output.log
@@ -0,0 +1,2 @@
+{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 1, "unit": "it", "ip": "10.1.4.164", "pid": 202910, "uuid": "1a0be4df81354e5b8a89bcc67273cb08", "closed": false}
+Start validation...
diff --git a/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/files/requirements.txt
@@ -0,0 +1,295 @@
+colorama==0.4.6
+psutil==7.0.0
+setproctitle==1.2.2
+ipython==8.37.0
+gitdb==4.0.12
+smmap==5.0.2
+pyzmq==27.0.0
+wcwidth==0.2.13
+antlr4-python3-runtime==4.9.3
+streamlit==1.46.1
+opentelemetry-proto==1.26.0
+tiktoken==0.9.0
+MarkupSafe==3.0.2
+openai==1.90.0
+jiter==0.10.0
+markdown-it-py==3.0.0
+rich-toolkit==0.14.8
+PyYAML==6.0.2
+pycountry==24.6.1
+nvidia-cusolver-cu12==11.6.1.9
+codetiming==1.4.0
+text-unidecode==1.3
+aiohttp-cors==0.8.1
+prometheus_client==0.22.1
+pandas==2.3.1
+wrapt==1.17.2
+tifffile==2025.5.10
+vllm==0.8.5.post1
+google-auth==2.40.3
+nvidia-curand-cu12==10.3.5.147
+networkx==3.4.2
+protobuf==4.25.8
+depyf==0.18.0
+altair==5.5.0
+wandb==0.21.0
+opentelemetry-sdk==1.26.0
+nvidia-cufft-cu12==11.2.1.3
+frozenlist==1.7.0
+ninja==1.11.1.4
+anyio==4.9.0
+rignore==0.6.2
+pydantic-extra-types==2.10.5
+tzdata==2025.2
+orjson==3.10.18
+smart_open==7.3.0.post1
+nvidia-cublas-cu12==12.4.5.8
+astor==0.8.1
+uvicorn==0.35.0
+sentry-sdk==2.32.0
+weasel==0.4.1
+opencensus==0.11.4
+certifi==2025.7.14
+nvidia-cuda-nvrtc-cu12==12.4.127
+cupy-cuda12x==13.5.1
+jedi==0.19.2
+GitPython==3.1.44
+xgrammar==0.1.18
+sniffio==1.3.1
+dill==0.3.8
+python-json-logger==3.3.0
+peft==0.16.0
+python-slugify==8.0.4
+watchfiles==1.1.0
+torchaudio==2.6.0
+omegaconf==2.3.0
+interegular==0.3.3
+torchmetrics==1.7.4
+tenacity==9.1.2
+async-timeout==5.0.1
+pybase64==1.4.1
+tqdm==4.67.1
+confection==0.1.5
+absl-py==2.3.1
+six==1.17.0
+colorful==0.5.7
+xxhash==3.5.0
+regex==2024.11.6
+nvidia-cuda-runtime-cu12==12.4.127
+pip==25.1
+annotated-types==0.7.0
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+salesforce-lavis==1.0.2
+fastapi-cloud-cli==0.1.4
+av==15.0.0
+nvidia-nvjitlink-cu12==12.4.127
+lightning-utilities==0.14.3
+pytz==2025.2
+webdataset==1.0.2
+cachetools==5.5.2
+nltk==3.9.1
+prometheus-fastapi-instrumentator==7.1.0
+pexpect==4.9.0
+jsonschema-specifications==2025.4.1
+Jinja2==3.1.6
+nvidia-cusparse-cu12==12.3.1.170
+nvidia-cudnn-cu12==9.1.0.70
+decord==0.6.0
+transformers==4.52.4
+dnspython==2.7.0
+joblib==1.5.1
+kaggle==1.7.4.5
+pyasn1_modules==0.4.2
+httpcore==1.0.9
+accelerate==1.8.1
+psutil==7.0.0
+pycocotools==2.0.10
+lm-format-enforcer==0.10.11
+liger_kernel==0.6.0
+googleapis-common-protos==1.70.0
+idna==3.10
+aiohappyeyeballs==2.6.1
+numba==0.61.2
+tornado==6.5.1
+opentelemetry-semantic-conventions==0.47b0
+torchvision==0.21.0
+exceptiongroup==1.3.0
+cfgv==3.4.0
+py-cpuinfo==9.0.0
+murmurhash==1.0.13
+pillow==11.3.0
+asttokens==3.0.0
+spacy==3.8.7
+blinker==1.9.0
+llguidance==0.7.30
+fastapi==0.116.1
+python-dateutil==2.9.0.post0
+prompt_toolkit==3.0.51
+opentelemetry-api==1.26.0
+referencing==0.36.2
+Pygments==2.19.2
+mpmath==1.3.0
+thinc==8.3.6
+multidict==6.6.3
+python-magic==0.4.27
+fairscale==0.4.4
+nodeenv==1.9.1
+mathruler==0.1.0
+identify==2.6.12
+multiprocess==0.70.16
+ftfy==6.3.1
+spacy-legacy==3.0.12
+rsa==4.9.1
+cymem==2.0.11
+flash-attn==2.7.1.post1
+typing-inspection==0.4.1
+nvidia-cufile-cu12==1.11.1.6
+filelock==3.18.0
+jsonschema==4.24.0
+language_data==1.3.0
+iopath==0.1.10
+cloudpickle==3.1.1
+pre_commit==4.2.0
+python-multipart==0.0.20
+gguf==0.17.1
+toml==0.10.2
+lazy_loader==0.4
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+opencv-python-headless==4.12.0.88
+rouge_score==0.1.2
+portalocker==3.2.0
+diskcache==5.6.3
+pycocoevalcap==1.2
+mdurl==0.1.2
+pure_eval==0.2.3
+ray==2.47.1
+langcodes==3.5.0
+distlib==0.3.9
+pydeck==0.9.1
+traitlets==5.14.3
+aiohttp==3.12.14
+decorator==5.2.1
+opentelemetry-exporter-otlp-proto-http==1.26.0
+verl==0.3.2.dev0
+fsspec==2025.3.0
+pydantic_core==2.33.2
+matplotlib-inline==0.1.7
+httpx==0.28.1
+fastrlock==0.8.3
+zipp==3.23.0
+aiosignal==1.4.0
+uvloop==0.21.0
+opendatasets==0.1.22
+python-dotenv==1.1.1
+attrs==25.3.0
+starlette==0.47.1
+distro==1.9.0
+pyasn1==0.6.1
+plotly==6.2.0
+opencensus-context==0.1.3
+datasets==4.0.0
+bleach==6.2.0
+hf-xet==1.1.5
+pyvers==0.1.0
+rich==14.0.0
+pylatexenc==2.10
+tensordict==0.9.1
+urllib3==2.5.0
+imageio==2.37.0
+platformdirs==4.3.8
+preshed==3.0.10
+catalogue==2.0.10
+h11==0.16.0
+outlines_core==0.1.26
+wasabi==1.1.3
+proto-plus==1.26.1
+scikit-image==0.25.2
+blis==1.3.0
+fastapi-cli==0.0.8
+opentelemetry-exporter-prometheus==0.56b0
+opentelemetry-exporter-otlp==1.26.0
+compressed-tensors==0.9.3
+pyarrow==20.0.0
+opentelemetry-semantic-conventions-ai==0.4.11
+partial-json-parser==0.2.1.1.post6
+parso==0.8.4
+importlib_metadata==8.0.0
+tokenizers==0.21.2
+opentelemetry-exporter-otlp-proto-common==1.26.0
+torchdata==0.11.0
+py-spy==0.4.0
+propcache==0.3.2
+braceexpand==0.1.7
+numpy==2.2.6
+cloudpathlib==0.21.1
+email_validator==2.2.0
+srsly==2.5.1
+webencodings==0.5.1
+airportsdata==20250706
+rpds-py==0.26.0
+outlines==0.1.11
+packaging==25.0
+yarl==1.20.1
+nvidia-cuda-cupti-cu12==12.4.127
+typing_extensions==4.14.1
+pydantic==2.11.7
+xformers==0.0.29.post2
+einops==0.8.1
+grpcio==1.73.1
+setuptools==78.1.1
+httptools==0.6.4
+nvidia-nccl-cu12==2.21.5
+Deprecated==1.2.18
+ptyprocess==0.7.0
+websockets==15.0.1
+torch==2.6.0
+scipy==1.15.3
+typer==0.16.0
+pytorch-lightning==2.5.2
+virtualenv==20.31.2
+huggingface-hub==0.33.4
+contexttimer==0.3.3
+marisa-trie==1.2.1
+shellingham==1.5.4
+charset-normalizer==3.4.2
+nest-asyncio==1.6.0
+executing==2.2.0
+stack-data==0.6.3
+msgpack==1.1.1
+blake3==1.0.5
+narwhals==1.47.0
+sentencepiece==0.2.0
+llvmlite==0.44.0
+click==8.2.1
+lark==1.2.2
+google-api-core==2.25.1
+sympy==1.13.1
+wheel==0.45.1
+safetensors==0.5.3
+mistral_common==1.8.0
+triton==3.2.0
+msgspec==0.19.0
+watchdog==6.0.0
+requests==2.32.4
+spacy-loggers==1.0.5
+timm==0.4.12
+qwen-vl-utils==0.0.11
+modelscope==1.28.0
+verl==0.3.2.dev0
+jaraco.text==3.12.1
+autocommand==2.2.2
+packaging==24.2
+jaraco.context==5.3.0
+tomli==2.0.1
+typeguard==4.3.0
+zipp==3.19.2
+backports.tarfile==1.2.0
+typing_extensions==4.12.2
+jaraco.collections==5.1.0
+inflect==7.3.1
+more-itertools==10.3.0
+jaraco.functools==4.0.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+wheel==0.45.1
diff --git a/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..2785db21b6c10750a7a09a2c849618c0855ad025
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/files/wandb-metadata.json
@@ -0,0 +1,91 @@
+{
+  "os":  "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.0",
+  "startedAt":  "2025-07-20T10:45:55.035753Z",
+  "args":  [
+    "--node-ip-address=10.1.4.164",
+    "--node-manager-port=38911",
+    "--object-store-name=/tmp/ray/session_2025-07-20_18-44-08_264554_198143/sockets/plasma_store",
+    "--raylet-name=/tmp/ray/session_2025-07-20_18-44-08_264554_198143/sockets/raylet",
+    "--redis-address=None",
+    "--metrics-agent-port=58446",
+    "--logging-rotate-bytes=536870912",
+    "--logging-rotate-backup-count=5",
+    "--runtime-env-agent-port=59793",
+    "--gcs-address=10.1.4.164:54451",
+    "--session-name=session_2025-07-20_18-44-08_264554_198143",
+    "--temp-dir=/tmp/ray",
+    "--webui=127.0.0.1:8265",
+    "--cluster-id=a38c77a85680040c660a3b5d4ab803d8aaa44e70746e99af3f111616",
+    "--startup-token=64",
+    "--worker-launch-time-ms=1753008251912",
+    "--node-id=7ad278f1bf59137a7347996ddac8430d32e9591d2f83756c40b53bdf",
+    "--runtime-env-hash=-115784934",
+    "--enable-resource-isolation=false"
+  ],
+  "program":  "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
+  "git":  {
+    "remote":  "https://github.com/PorUna-byte/PAR.git",
+    "commit":  "b8caf406aa1699c788f0ca6e44a1769452c317db"
+  },
+  "root":  "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
+  "host":  "dsw-266702-557cd69888-g24kv",
+  "executable":  "/root/miniconda3/envs/easyr1-new/bin/python3",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA A800-SXM4-80GB",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1623302262784",
+      "used":  "1179512832"
+    }
+  },
+  "memory":  {
+    "total":  "549755813888"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-dd05c780-0a19-17fd-c584-d09f4318f680"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-2782427d-b86d-e7c7-247f-edefe39eec71"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-52755b3d-d761-c869-0d08-300873ba5f02"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-90465488-5319-9508-9e8a-b3b12918be35"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a"
+    }
+  ],
+  "cudaVersion":  "12.1",
+  "writerId":  "2qdyd3o7cilgxi2vgxni4m8uxslv4r1d"
+}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..ca59475e3f60f69e98062d738e73f4e7dfa64a9b
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/logs/debug-internal.log
@@ -0,0 +1,6 @@
+{"time":"2025-07-20T18:45:55.347049864+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-20T18:45:58.087436155+08:00","level":"INFO","msg":"stream: created new stream","id":"htrmvlj8"}
+{"time":"2025-07-20T18:45:58.08754831+08:00","level":"INFO","msg":"stream: started","id":"htrmvlj8"}
+{"time":"2025-07-20T18:45:58.087578033+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"htrmvlj8"}
+{"time":"2025-07-20T18:45:58.087603416+08:00","level":"INFO","msg":"handler: started","stream_id":"htrmvlj8"}
+{"time":"2025-07-20T18:45:58.087668036+08:00","level":"INFO","msg":"sender: started","stream_id":"htrmvlj8"}
diff --git a/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..1176a052552495e1e4e93c8575123b6c59e3faca
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/logs/debug.log
@@ -0,0 +1,21 @@
+2025-07-20 18:45:55,129 INFO    MainThread:202910 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-20 18:45:55,130 INFO    MainThread:202910 [wandb_setup.py:_flush():80] Configure stats pid to 202910
+2025-07-20 18:45:55,130 INFO    MainThread:202910 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-20 18:45:55,130 INFO    MainThread:202910 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-20 18:45:55,130 INFO    MainThread:202910 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-20 18:45:55,130 INFO    MainThread:202910 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/logs/debug.log
+2025-07-20 18:45:55,130 INFO    MainThread:202910 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/logs/debug-internal.log
+2025-07-20 18:45:55,130 INFO    MainThread:202910 [wandb_init.py:init():830] calling init triggers
+2025-07-20 18:45:55,130 INFO    MainThread:202910 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-20 18:45:55,130 INFO    MainThread:202910 [wandb_init.py:init():871] starting backend
+2025-07-20 18:45:55,337 INFO    MainThread:202910 [wandb_init.py:init():874] sending inform_init request
+2025-07-20 18:45:55,339 INFO    MainThread:202910 [wandb_init.py:init():882] backend started and connected
+2025-07-20 18:45:55,343 INFO    MainThread:202910 [wandb_init.py:init():953] updated telemetry
+2025-07-20 18:45:55,379 INFO    MainThread:202910 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-20 18:45:59,062 INFO    MainThread:202910 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-20 18:45:59,234 INFO    MainThread:202910 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-20 18:45:59,234 INFO    MainThread:202910 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-20 18:45:59,238 INFO    MainThread:202910 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-20 18:45:59,238 INFO    MainThread:202910 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-20 18:45:59,241 INFO    MainThread:202910 [wandb_init.py:init():1075] run started, returning control to user process
diff --git a/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/run-htrmvlj8.wandb b/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/run-htrmvlj8.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/files/config.yaml b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9cb8f96cf110a214774352fcec46a88f1b722fd8
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/files/config.yaml
@@ -0,0 +1,322 @@
+_wandb:
+    value:
+        cli_version: 0.21.0
+        e:
+            nmbvcuty6clhkr83vjwadihsu9kpdhoi:
+                args:
+                    - --node-ip-address=10.1.4.164
+                    - --node-manager-port=40847
+                    - --object-store-name=/tmp/ray/session_2025-07-20_18-50-33_612671_207255/sockets/plasma_store
+                    - --raylet-name=/tmp/ray/session_2025-07-20_18-50-33_612671_207255/sockets/raylet
+                    - --redis-address=None
+                    - --metrics-agent-port=56349
+                    - --logging-rotate-bytes=536870912
+                    - --logging-rotate-backup-count=5
+                    - --runtime-env-agent-port=59408
+                    - --gcs-address=10.1.4.164:61296
+                    - --session-name=session_2025-07-20_18-50-33_612671_207255
+                    - --temp-dir=/tmp/ray
+                    - --webui=127.0.0.1:8265
+                    - --cluster-id=5f593853245e93a932b0d21951d11425b24c26d22610a6a16cb50798
+                    - --startup-token=64
+                    - --worker-launch-time-ms=1753008637130
+                    - --node-id=f402a8cae52cba65b7db537226d3a02bb2c3b13e17c85bb8ba53fb2d
+                    - --runtime-env-hash=-115784934
+                    - --enable-resource-isolation=false
+                cpu_count: 64
+                cpu_count_logical: 64
+                cudaVersion: "12.1"
+                disk:
+                    /:
+                        total: "1623302262784"
+                        used: "1179701248"
+                email: gia0603yucca@gmail.com
+                executable: /root/miniconda3/envs/easyr1-new/bin/python3
+                git:
+                    commit: b8caf406aa1699c788f0ca6e44a1769452c317db
+                    remote: https://github.com/PorUna-byte/PAR.git
+                gpu: NVIDIA A800-SXM4-80GB
+                gpu_count: 8
+                gpu_nvidia:
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-dd05c780-0a19-17fd-c584-d09f4318f680
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-2782427d-b86d-e7c7-247f-edefe39eec71
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-52755b3d-d761-c869-0d08-300873ba5f02
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-5927b565-1506-34c6-eb8d-d657ed5f3558
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-90465488-5319-9508-9e8a-b3b12918be35
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-855bcb80-e48f-cdd2-f62d-35138b69089a
+                host: dsw-266702-557cd69888-g24kv
+                memory:
+                    total: "549755813888"
+                os: Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35
+                program: /root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py
+                python: CPython 3.10.0
+                root: /nas/shared/kilab/wangyujia/EasyR1-new/examples
+                startedAt: "2025-07-20T10:52:18.641147Z"
+                writerId: nmbvcuty6clhkr83vjwadihsu9kpdhoi
+        m: []
+        python_version: 3.10.0
+        t:
+            "1":
+                - 1
+                - 9
+                - 11
+                - 30
+                - 33
+                - 41
+                - 49
+                - 51
+                - 63
+                - 71
+                - 95
+                - 98
+                - 103
+                - 105
+            "2":
+                - 1
+                - 9
+                - 11
+                - 30
+                - 33
+                - 41
+                - 49
+                - 51
+                - 63
+                - 71
+                - 95
+                - 98
+                - 103
+                - 105
+            "3":
+                - 2
+                - 13
+                - 16
+            "4": 3.10.0
+            "5": 0.21.0
+            "6": 4.52.4
+            "12": 0.21.0
+            "13": linux-x86_64
+algorithm:
+    value:
+        adv_estimator: grpo
+        disable_kl: false
+        filter_high: 0.99
+        filter_key: overall
+        filter_low: 0.01
+        gamma: 1
+        kl_coef: 0.01
+        kl_horizon: 10000
+        kl_penalty: low_var_kl
+        kl_target: 0.1
+        kl_type: fixed
+        lam: 1
+        online_filtering: false
+        use_kl_loss: true
+data:
+    value:
+        answer_key: answer
+        filter_overlong_prompts: true
+        filter_overlong_prompts_workers: 16
+        format_prompt: /nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja
+        image_dir: null
+        image_key: images
+        max_pixels: 4194304
+        max_prompt_length: 4096
+        max_response_length: 16384
+        min_pixels: 262144
+        mini_rollout_batch_size: null
+        override_chat_template: null
+        prompt_key: question
+        protein_key: protein
+        rollout_batch_size: 128
+        seed: 1
+        shuffle: true
+        train_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl
+        val_batch_size: 256
+        val_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl
+        video_fps: 2
+        video_key: videos
+trainer:
+    value:
+        critic_warmup: 0
+        experiment_name: qwen2.5_7b_bio_06182042
+        load_checkpoint_path: null
+        logger:
+            - console
+            - wandb
+        max_steps: null
+        max_try_make_batch: 20
+        n_gpus_per_node: 8
+        nnodes: 1
+        project_name: easy_r1
+        save_checkpoint_path: /oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042
+        save_freq: 5
+        save_limit: 3
+        save_model_only: false
+        total_epochs: 1
+        val_before_train: true
+        val_freq: 5
+        val_generations_to_log: 3
+        val_only: false
+worker:
+    value:
+        actor:
+            clip_ratio_dual: 3
+            clip_ratio_high: 0.3
+            clip_ratio_low: 0.2
+            disable_kl: false
+            fsdp:
+                enable_cpu_offload: false
+                enable_full_shard: true
+                enable_rank0_init: true
+                fsdp_size: -1
+                mp_buffer_dtype: fp32
+                mp_param_dtype: bf16
+                mp_reduce_dtype: fp32
+                torch_dtype: null
+                use_orig_params: false
+            global_batch_size: 64
+            global_batch_size_per_device: -1
+            kl_coef: 0.01
+            kl_penalty: low_var_kl
+            loss_avg_mode: token
+            max_grad_norm: 1
+            micro_batch_size_per_device_for_experience: 16
+            micro_batch_size_per_device_for_update: 2
+            model:
+                enable_gradient_checkpointing: true
+                freeze_vision_tower: false
+                model_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model
+                tokenizer_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model
+                trust_remote_code: false
+            offload:
+                offload_optimizer: true
+                offload_params: true
+            optim:
+                betas:
+                    - 0.9
+                    - 0.999
+                lr: 1e-06
+                lr_warmup_ratio: 0
+                lr_warmup_steps: null
+                min_lr_ratio: null
+                strategy: adamw
+                training_steps: 1
+                warmup_style: constant
+                weight_decay: 0.01
+            padding_free: true
+            ppo_epochs: 1
+            strategy: fsdp
+            ulysses_size: 1
+            use_kl_loss: true
+            use_torch_compile: true
+        critic:
+            cliprange_value: 0.5
+            fsdp:
+                enable_cpu_offload: false
+                enable_full_shard: true
+                enable_rank0_init: true
+                fsdp_size: -1
+                mp_buffer_dtype: fp32
+                mp_param_dtype: bf16
+                mp_reduce_dtype: fp32
+                torch_dtype: null
+                use_orig_params: false
+            global_batch_size: 256
+            global_batch_size_per_device: -1
+            loss_avg_mode: token
+            max_grad_norm: 1
+            micro_batch_size_per_device_for_experience: 16
+            micro_batch_size_per_device_for_update: 4
+            model:
+                enable_gradient_checkpointing: true
+                freeze_vision_tower: false
+                model_path: null
+                tokenizer_path: null
+                trust_remote_code: true
+            offload:
+                offload_optimizer: false
+                offload_params: false
+            optim:
+                betas:
+                    - 0.9
+                    - 0.999
+                lr: 1e-06
+                lr_warmup_ratio: 0
+                lr_warmup_steps: null
+                min_lr_ratio: null
+                strategy: adamw
+                training_steps: 1
+                warmup_style: constant
+                weight_decay: 0.01
+            padding_free: false
+            ppo_epochs: 1
+            strategy: fsdp
+            ulysses_size: 1
+        hybrid_engine: true
+        ref:
+            fsdp:
+                enable_cpu_offload: true
+                enable_full_shard: true
+                enable_rank0_init: true
+                fsdp_size: -1
+                mp_buffer_dtype: fp32
+                mp_param_dtype: bf16
+                mp_reduce_dtype: fp32
+                torch_dtype: null
+                use_orig_params: false
+            micro_batch_size_per_device_for_experience: 16
+            offload:
+                offload_optimizer: false
+                offload_params: false
+            padding_free: true
+            strategy: fsdp
+            ulysses_size: 1
+            use_torch_compile: true
+        reward:
+            num_cpus: 1
+            reward_function: /nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py
+            reward_function_name: main
+            reward_type: batch
+            skip_special_tokens: true
+        rollout:
+            disable_log_stats: true
+            disable_tqdm: false
+            dtype: bf16
+            enable_chunked_prefill: false
+            enforce_eager: false
+            gpu_memory_utilization: 0.6
+            ignore_eos: false
+            limit_images: 0
+            max_model_len: null
+            max_num_batched_tokens: 24576
+            "n": 5
+            name: vllm
+            prompt_length: 4096
+            response_length: 16384
+            seed: 1
+            temperature: 1
+            tensor_parallel_size: 1
+            top_k: -1
+            top_p: 0.99
+            trust_remote_code: false
+            val_override_config:
+                "n": 1
+                temperature: 0.5
diff --git a/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/files/output.log b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..80a724dfe36a4e3971e5a46e9778c19c0f6d3c1a
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/files/output.log
@@ -0,0 +1,2 @@
+{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 1, "unit": "it", "ip": "10.1.4.164", "pid": 212029, "uuid": "4f888a0cb3b2443cadcfcfb0f31adc07", "closed": false}
+Start validation...
diff --git a/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/files/requirements.txt
@@ -0,0 +1,295 @@
+colorama==0.4.6
+psutil==7.0.0
+setproctitle==1.2.2
+ipython==8.37.0
+gitdb==4.0.12
+smmap==5.0.2
+pyzmq==27.0.0
+wcwidth==0.2.13
+antlr4-python3-runtime==4.9.3
+streamlit==1.46.1
+opentelemetry-proto==1.26.0
+tiktoken==0.9.0
+MarkupSafe==3.0.2
+openai==1.90.0
+jiter==0.10.0
+markdown-it-py==3.0.0
+rich-toolkit==0.14.8
+PyYAML==6.0.2
+pycountry==24.6.1
+nvidia-cusolver-cu12==11.6.1.9
+codetiming==1.4.0
+text-unidecode==1.3
+aiohttp-cors==0.8.1
+prometheus_client==0.22.1
+pandas==2.3.1
+wrapt==1.17.2
+tifffile==2025.5.10
+vllm==0.8.5.post1
+google-auth==2.40.3
+nvidia-curand-cu12==10.3.5.147
+networkx==3.4.2
+protobuf==4.25.8
+depyf==0.18.0
+altair==5.5.0
+wandb==0.21.0
+opentelemetry-sdk==1.26.0
+nvidia-cufft-cu12==11.2.1.3
+frozenlist==1.7.0
+ninja==1.11.1.4
+anyio==4.9.0
+rignore==0.6.2
+pydantic-extra-types==2.10.5
+tzdata==2025.2
+orjson==3.10.18
+smart_open==7.3.0.post1
+nvidia-cublas-cu12==12.4.5.8
+astor==0.8.1
+uvicorn==0.35.0
+sentry-sdk==2.32.0
+weasel==0.4.1
+opencensus==0.11.4
+certifi==2025.7.14
+nvidia-cuda-nvrtc-cu12==12.4.127
+cupy-cuda12x==13.5.1
+jedi==0.19.2
+GitPython==3.1.44
+xgrammar==0.1.18
+sniffio==1.3.1
+dill==0.3.8
+python-json-logger==3.3.0
+peft==0.16.0
+python-slugify==8.0.4
+watchfiles==1.1.0
+torchaudio==2.6.0
+omegaconf==2.3.0
+interegular==0.3.3
+torchmetrics==1.7.4
+tenacity==9.1.2
+async-timeout==5.0.1
+pybase64==1.4.1
+tqdm==4.67.1
+confection==0.1.5
+absl-py==2.3.1
+six==1.17.0
+colorful==0.5.7
+xxhash==3.5.0
+regex==2024.11.6
+nvidia-cuda-runtime-cu12==12.4.127
+pip==25.1
+annotated-types==0.7.0
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+salesforce-lavis==1.0.2
+fastapi-cloud-cli==0.1.4
+av==15.0.0
+nvidia-nvjitlink-cu12==12.4.127
+lightning-utilities==0.14.3
+pytz==2025.2
+webdataset==1.0.2
+cachetools==5.5.2
+nltk==3.9.1
+prometheus-fastapi-instrumentator==7.1.0
+pexpect==4.9.0
+jsonschema-specifications==2025.4.1
+Jinja2==3.1.6
+nvidia-cusparse-cu12==12.3.1.170
+nvidia-cudnn-cu12==9.1.0.70
+decord==0.6.0
+transformers==4.52.4
+dnspython==2.7.0
+joblib==1.5.1
+kaggle==1.7.4.5
+pyasn1_modules==0.4.2
+httpcore==1.0.9
+accelerate==1.8.1
+psutil==7.0.0
+pycocotools==2.0.10
+lm-format-enforcer==0.10.11
+liger_kernel==0.6.0
+googleapis-common-protos==1.70.0
+idna==3.10
+aiohappyeyeballs==2.6.1
+numba==0.61.2
+tornado==6.5.1
+opentelemetry-semantic-conventions==0.47b0
+torchvision==0.21.0
+exceptiongroup==1.3.0
+cfgv==3.4.0
+py-cpuinfo==9.0.0
+murmurhash==1.0.13
+pillow==11.3.0
+asttokens==3.0.0
+spacy==3.8.7
+blinker==1.9.0
+llguidance==0.7.30
+fastapi==0.116.1
+python-dateutil==2.9.0.post0
+prompt_toolkit==3.0.51
+opentelemetry-api==1.26.0
+referencing==0.36.2
+Pygments==2.19.2
+mpmath==1.3.0
+thinc==8.3.6
+multidict==6.6.3
+python-magic==0.4.27
+fairscale==0.4.4
+nodeenv==1.9.1
+mathruler==0.1.0
+identify==2.6.12
+multiprocess==0.70.16
+ftfy==6.3.1
+spacy-legacy==3.0.12
+rsa==4.9.1
+cymem==2.0.11
+flash-attn==2.7.1.post1
+typing-inspection==0.4.1
+nvidia-cufile-cu12==1.11.1.6
+filelock==3.18.0
+jsonschema==4.24.0
+language_data==1.3.0
+iopath==0.1.10
+cloudpickle==3.1.1
+pre_commit==4.2.0
+python-multipart==0.0.20
+gguf==0.17.1
+toml==0.10.2
+lazy_loader==0.4
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+opencv-python-headless==4.12.0.88
+rouge_score==0.1.2
+portalocker==3.2.0
+diskcache==5.6.3
+pycocoevalcap==1.2
+mdurl==0.1.2
+pure_eval==0.2.3
+ray==2.47.1
+langcodes==3.5.0
+distlib==0.3.9
+pydeck==0.9.1
+traitlets==5.14.3
+aiohttp==3.12.14
+decorator==5.2.1
+opentelemetry-exporter-otlp-proto-http==1.26.0
+verl==0.3.2.dev0
+fsspec==2025.3.0
+pydantic_core==2.33.2
+matplotlib-inline==0.1.7
+httpx==0.28.1
+fastrlock==0.8.3
+zipp==3.23.0
+aiosignal==1.4.0
+uvloop==0.21.0
+opendatasets==0.1.22
+python-dotenv==1.1.1
+attrs==25.3.0
+starlette==0.47.1
+distro==1.9.0
+pyasn1==0.6.1
+plotly==6.2.0
+opencensus-context==0.1.3
+datasets==4.0.0
+bleach==6.2.0
+hf-xet==1.1.5
+pyvers==0.1.0
+rich==14.0.0
+pylatexenc==2.10
+tensordict==0.9.1
+urllib3==2.5.0
+imageio==2.37.0
+platformdirs==4.3.8
+preshed==3.0.10
+catalogue==2.0.10
+h11==0.16.0
+outlines_core==0.1.26
+wasabi==1.1.3
+proto-plus==1.26.1
+scikit-image==0.25.2
+blis==1.3.0
+fastapi-cli==0.0.8
+opentelemetry-exporter-prometheus==0.56b0
+opentelemetry-exporter-otlp==1.26.0
+compressed-tensors==0.9.3
+pyarrow==20.0.0
+opentelemetry-semantic-conventions-ai==0.4.11
+partial-json-parser==0.2.1.1.post6
+parso==0.8.4
+importlib_metadata==8.0.0
+tokenizers==0.21.2
+opentelemetry-exporter-otlp-proto-common==1.26.0
+torchdata==0.11.0
+py-spy==0.4.0
+propcache==0.3.2
+braceexpand==0.1.7
+numpy==2.2.6
+cloudpathlib==0.21.1
+email_validator==2.2.0
+srsly==2.5.1
+webencodings==0.5.1
+airportsdata==20250706
+rpds-py==0.26.0
+outlines==0.1.11
+packaging==25.0
+yarl==1.20.1
+nvidia-cuda-cupti-cu12==12.4.127
+typing_extensions==4.14.1
+pydantic==2.11.7
+xformers==0.0.29.post2
+einops==0.8.1
+grpcio==1.73.1
+setuptools==78.1.1
+httptools==0.6.4
+nvidia-nccl-cu12==2.21.5
+Deprecated==1.2.18
+ptyprocess==0.7.0
+websockets==15.0.1
+torch==2.6.0
+scipy==1.15.3
+typer==0.16.0
+pytorch-lightning==2.5.2
+virtualenv==20.31.2
+huggingface-hub==0.33.4
+contexttimer==0.3.3
+marisa-trie==1.2.1
+shellingham==1.5.4
+charset-normalizer==3.4.2
+nest-asyncio==1.6.0
+executing==2.2.0
+stack-data==0.6.3
+msgpack==1.1.1
+blake3==1.0.5
+narwhals==1.47.0
+sentencepiece==0.2.0
+llvmlite==0.44.0
+click==8.2.1
+lark==1.2.2
+google-api-core==2.25.1
+sympy==1.13.1
+wheel==0.45.1
+safetensors==0.5.3
+mistral_common==1.8.0
+triton==3.2.0
+msgspec==0.19.0
+watchdog==6.0.0
+requests==2.32.4
+spacy-loggers==1.0.5
+timm==0.4.12
+qwen-vl-utils==0.0.11
+modelscope==1.28.0
+verl==0.3.2.dev0
+jaraco.text==3.12.1
+autocommand==2.2.2
+packaging==24.2
+jaraco.context==5.3.0
+tomli==2.0.1
+typeguard==4.3.0
+zipp==3.19.2
+backports.tarfile==1.2.0
+typing_extensions==4.12.2
+jaraco.collections==5.1.0
+inflect==7.3.1
+more-itertools==10.3.0
+jaraco.functools==4.0.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+wheel==0.45.1
diff --git a/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..f3214ee37d6934631517bd6e45ec299a6ed93f5d
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/files/wandb-metadata.json
@@ -0,0 +1,92 @@
+{
+  "os":  "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.0",
+  "startedAt":  "2025-07-20T10:52:18.641147Z",
+  "args":  [
+    "--node-ip-address=10.1.4.164",
+    "--node-manager-port=40847",
+    "--object-store-name=/tmp/ray/session_2025-07-20_18-50-33_612671_207255/sockets/plasma_store",
+    "--raylet-name=/tmp/ray/session_2025-07-20_18-50-33_612671_207255/sockets/raylet",
+    "--redis-address=None",
+    "--metrics-agent-port=56349",
+    "--logging-rotate-bytes=536870912",
+    "--logging-rotate-backup-count=5",
+    "--runtime-env-agent-port=59408",
+    "--gcs-address=10.1.4.164:61296",
+    "--session-name=session_2025-07-20_18-50-33_612671_207255",
+    "--temp-dir=/tmp/ray",
+    "--webui=127.0.0.1:8265",
+    "--cluster-id=5f593853245e93a932b0d21951d11425b24c26d22610a6a16cb50798",
+    "--startup-token=64",
+    "--worker-launch-time-ms=1753008637130",
+    "--node-id=f402a8cae52cba65b7db537226d3a02bb2c3b13e17c85bb8ba53fb2d",
+    "--runtime-env-hash=-115784934",
+    "--enable-resource-isolation=false"
+  ],
+  "program":  "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
+  "git":  {
+    "remote":  "https://github.com/PorUna-byte/PAR.git",
+    "commit":  "b8caf406aa1699c788f0ca6e44a1769452c317db"
+  },
+  "email":  "gia0603yucca@gmail.com",
+  "root":  "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
+  "host":  "dsw-266702-557cd69888-g24kv",
+  "executable":  "/root/miniconda3/envs/easyr1-new/bin/python3",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA A800-SXM4-80GB",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1623302262784",
+      "used":  "1179701248"
+    }
+  },
+  "memory":  {
+    "total":  "549755813888"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-dd05c780-0a19-17fd-c584-d09f4318f680"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-2782427d-b86d-e7c7-247f-edefe39eec71"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-52755b3d-d761-c869-0d08-300873ba5f02"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-90465488-5319-9508-9e8a-b3b12918be35"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a"
+    }
+  ],
+  "cudaVersion":  "12.1",
+  "writerId":  "nmbvcuty6clhkr83vjwadihsu9kpdhoi"
+}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/files/wandb-summary.json b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..97d29b9d12b49eb80fef4dfe6cc237926a61eadd
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":86},"_runtime":86}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..267e360925561c316cc43ab6fa7009424ffaca81
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/logs/debug-internal.log
@@ -0,0 +1,13 @@
+{"time":"2025-07-20T18:52:19.072240465+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-20T18:52:21.880833557+08:00","level":"INFO","msg":"stream: created new stream","id":"hiby21ed"}
+{"time":"2025-07-20T18:52:21.880900115+08:00","level":"INFO","msg":"stream: started","id":"hiby21ed"}
+{"time":"2025-07-20T18:52:21.8809482+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"hiby21ed"}
+{"time":"2025-07-20T18:52:21.880962341+08:00","level":"INFO","msg":"handler: started","stream_id":"hiby21ed"}
+{"time":"2025-07-20T18:52:21.881001525+08:00","level":"INFO","msg":"sender: started","stream_id":"hiby21ed"}
+{"time":"2025-07-20T18:53:51.736418092+08:00","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"updating run metadata","runtime_seconds":1.018644786}],"total_operations":1}}
+{"time":"2025-07-20T18:53:55.214428055+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-07-20T18:54:06.246943554+08:00","level":"INFO","msg":"stream: closing","id":"hiby21ed"}
+{"time":"2025-07-20T18:54:06.247836119+08:00","level":"INFO","msg":"handler: closed","stream_id":"hiby21ed"}
+{"time":"2025-07-20T18:54:06.247850793+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"hiby21ed"}
+{"time":"2025-07-20T18:54:06.24786321+08:00","level":"INFO","msg":"sender: closed","stream_id":"hiby21ed"}
+{"time":"2025-07-20T18:54:06.255074667+08:00","level":"INFO","msg":"stream: closed","id":"hiby21ed"}
diff --git a/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..e4ca65344d880c9f07206e4af4eb0752336deaa9
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/logs/debug.log
@@ -0,0 +1,28 @@
+2025-07-20 18:52:18,853 INFO    MainThread:212029 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-20 18:52:18,853 INFO    MainThread:212029 [wandb_setup.py:_flush():80] Configure stats pid to 212029
+2025-07-20 18:52:18,853 INFO    MainThread:212029 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-20 18:52:18,853 INFO    MainThread:212029 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-20 18:52:18,853 INFO    MainThread:212029 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-20 18:52:18,853 INFO    MainThread:212029 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/logs/debug.log
+2025-07-20 18:52:18,853 INFO    MainThread:212029 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/logs/debug-internal.log
+2025-07-20 18:52:18,853 INFO    MainThread:212029 [wandb_init.py:init():830] calling init triggers
+2025-07-20 18:52:18,853 INFO    MainThread:212029 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-20 18:52:18,853 INFO    MainThread:212029 [wandb_init.py:init():871] starting backend
+2025-07-20 18:52:19,060 INFO    MainThread:212029 [wandb_init.py:init():874] sending inform_init request
+2025-07-20 18:52:19,061 INFO    MainThread:212029 [wandb_init.py:init():882] backend started and connected
+2025-07-20 18:52:19,068 INFO    MainThread:212029 [wandb_init.py:init():953] updated telemetry
+2025-07-20 18:52:19,145 INFO    MainThread:212029 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-20 18:52:24,026 INFO    MainThread:212029 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-20 18:52:24,193 INFO    MainThread:212029 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-20 18:52:24,194 INFO    MainThread:212029 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-20 18:52:24,198 INFO    MainThread:212029 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-20 18:52:24,198 INFO    MainThread:212029 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-20 18:52:24,200 INFO    MainThread:212029 [wandb_init.py:init():1075] run started, returning control to user process
+2025-07-20 18:53:50,710 INFO    MainThread:212029 [wandb_run.py:_finish():2224] finishing run gia0603yucca/easy_r1/hiby21ed
+2025-07-20 18:53:50,717 INFO    MainThread:212029 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0
+2025-07-20 18:53:50,734 INFO    MainThread:212029 [wandb_run.py:_restore():2405] restore
+2025-07-20 18:53:50,734 INFO    MainThread:212029 [wandb_run.py:_restore():2411] restore done
+2025-07-20 18:54:06,232 INFO    MainThread:212029 [wandb_run.py:_footer_history_summary_info():3903] rendering history
+2025-07-20 18:54:06,233 INFO    MainThread:212029 [wandb_run.py:_footer_history_summary_info():3935] rendering summary
+2025-07-20 18:54:06,239 INFO    MainThread:212029 [wandb_run.py:_footer_sync_info():3864] logging synced files
diff --git a/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/run-hiby21ed.wandb b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/run-hiby21ed.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..3110c1fc373205f16bd5e7932384f1ba368c2fa9
Binary files /dev/null and b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/run-hiby21ed.wandb differ
diff --git a/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/files/config.yaml b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..14ee11a0cb292dad8d69ff327102b77cba2ec272
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/files/config.yaml
@@ -0,0 +1,322 @@
+_wandb:
+    value:
+        cli_version: 0.21.0
+        e:
+            t6vyfkz6zns915ezpi7icvj23e02z4q0:
+                args:
+                    - --node-ip-address=10.1.4.164
+                    - --node-manager-port=38935
+                    - --object-store-name=/tmp/ray/session_2025-07-20_19-06-49_559805_218438/sockets/plasma_store
+                    - --raylet-name=/tmp/ray/session_2025-07-20_19-06-49_559805_218438/sockets/raylet
+                    - --redis-address=None
+                    - --metrics-agent-port=44894
+                    - --logging-rotate-bytes=536870912
+                    - --logging-rotate-backup-count=5
+                    - --runtime-env-agent-port=41153
+                    - --gcs-address=10.1.4.164:48225
+                    - --session-name=session_2025-07-20_19-06-49_559805_218438
+                    - --temp-dir=/tmp/ray
+                    - --webui=127.0.0.1:8265
+                    - --cluster-id=cf15bbb8c672ce3f59302373bb0e3c555a73db416b5edbf38e5c645f
+                    - --startup-token=64
+                    - --worker-launch-time-ms=1753009612034
+                    - --node-id=c982adee0b3c4e6317899ac6dd65aa17e75b59bb373b3d128c71698a
+                    - --runtime-env-hash=-115784934
+                    - --enable-resource-isolation=false
+                cpu_count: 64
+                cpu_count_logical: 64
+                cudaVersion: "12.1"
+                disk:
+                    /:
+                        total: "1623302262784"
+                        used: "1179754496"
+                email: gia0603yucca@gmail.com
+                executable: /root/miniconda3/envs/easyr1-new/bin/python3
+                git:
+                    commit: b8caf406aa1699c788f0ca6e44a1769452c317db
+                    remote: https://github.com/PorUna-byte/PAR.git
+                gpu: NVIDIA A800-SXM4-80GB
+                gpu_count: 8
+                gpu_nvidia:
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-dd05c780-0a19-17fd-c584-d09f4318f680
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-2782427d-b86d-e7c7-247f-edefe39eec71
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-52755b3d-d761-c869-0d08-300873ba5f02
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-5927b565-1506-34c6-eb8d-d657ed5f3558
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-90465488-5319-9508-9e8a-b3b12918be35
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-855bcb80-e48f-cdd2-f62d-35138b69089a
+                host: dsw-266702-557cd69888-g24kv
+                memory:
+                    total: "549755813888"
+                os: Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35
+                program: /root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py
+                python: CPython 3.10.0
+                root: /nas/shared/kilab/wangyujia/EasyR1-new/examples
+                startedAt: "2025-07-20T11:08:29.360965Z"
+                writerId: t6vyfkz6zns915ezpi7icvj23e02z4q0
+        m: []
+        python_version: 3.10.0
+        t:
+            "1":
+                - 1
+                - 9
+                - 11
+                - 30
+                - 33
+                - 41
+                - 49
+                - 51
+                - 63
+                - 71
+                - 95
+                - 98
+                - 103
+                - 105
+            "2":
+                - 1
+                - 9
+                - 11
+                - 30
+                - 33
+                - 41
+                - 49
+                - 51
+                - 63
+                - 71
+                - 95
+                - 98
+                - 103
+                - 105
+            "3":
+                - 2
+                - 13
+                - 16
+            "4": 3.10.0
+            "5": 0.21.0
+            "6": 4.52.4
+            "12": 0.21.0
+            "13": linux-x86_64
+algorithm:
+    value:
+        adv_estimator: grpo
+        disable_kl: false
+        filter_high: 0.99
+        filter_key: overall
+        filter_low: 0.01
+        gamma: 1
+        kl_coef: 0.01
+        kl_horizon: 10000
+        kl_penalty: low_var_kl
+        kl_target: 0.1
+        kl_type: fixed
+        lam: 1
+        online_filtering: false
+        use_kl_loss: true
+data:
+    value:
+        answer_key: answer
+        filter_overlong_prompts: true
+        filter_overlong_prompts_workers: 16
+        format_prompt: /nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja
+        image_dir: null
+        image_key: images
+        max_pixels: 4194304
+        max_prompt_length: 4096
+        max_response_length: 16384
+        min_pixels: 262144
+        mini_rollout_batch_size: null
+        override_chat_template: null
+        prompt_key: question
+        protein_key: protein
+        rollout_batch_size: 128
+        seed: 1
+        shuffle: true
+        train_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl
+        val_batch_size: 256
+        val_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl
+        video_fps: 2
+        video_key: videos
+trainer:
+    value:
+        critic_warmup: 0
+        experiment_name: qwen2.5_7b_bio_06182042
+        load_checkpoint_path: null
+        logger:
+            - console
+            - wandb
+        max_steps: null
+        max_try_make_batch: 20
+        n_gpus_per_node: 8
+        nnodes: 1
+        project_name: easy_r1
+        save_checkpoint_path: /oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042
+        save_freq: 5
+        save_limit: 3
+        save_model_only: false
+        total_epochs: 1
+        val_before_train: true
+        val_freq: 5
+        val_generations_to_log: 3
+        val_only: false
+worker:
+    value:
+        actor:
+            clip_ratio_dual: 3
+            clip_ratio_high: 0.3
+            clip_ratio_low: 0.2
+            disable_kl: false
+            fsdp:
+                enable_cpu_offload: false
+                enable_full_shard: true
+                enable_rank0_init: true
+                fsdp_size: -1
+                mp_buffer_dtype: fp32
+                mp_param_dtype: bf16
+                mp_reduce_dtype: fp32
+                torch_dtype: null
+                use_orig_params: false
+            global_batch_size: 64
+            global_batch_size_per_device: -1
+            kl_coef: 0.01
+            kl_penalty: low_var_kl
+            loss_avg_mode: token
+            max_grad_norm: 1
+            micro_batch_size_per_device_for_experience: 16
+            micro_batch_size_per_device_for_update: 2
+            model:
+                enable_gradient_checkpointing: true
+                freeze_vision_tower: false
+                model_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model
+                tokenizer_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model
+                trust_remote_code: false
+            offload:
+                offload_optimizer: true
+                offload_params: true
+            optim:
+                betas:
+                    - 0.9
+                    - 0.999
+                lr: 1e-06
+                lr_warmup_ratio: 0
+                lr_warmup_steps: null
+                min_lr_ratio: null
+                strategy: adamw
+                training_steps: 1
+                warmup_style: constant
+                weight_decay: 0.01
+            padding_free: true
+            ppo_epochs: 1
+            strategy: fsdp
+            ulysses_size: 1
+            use_kl_loss: true
+            use_torch_compile: true
+        critic:
+            cliprange_value: 0.5
+            fsdp:
+                enable_cpu_offload: false
+                enable_full_shard: true
+                enable_rank0_init: true
+                fsdp_size: -1
+                mp_buffer_dtype: fp32
+                mp_param_dtype: bf16
+                mp_reduce_dtype: fp32
+                torch_dtype: null
+                use_orig_params: false
+            global_batch_size: 256
+            global_batch_size_per_device: -1
+            loss_avg_mode: token
+            max_grad_norm: 1
+            micro_batch_size_per_device_for_experience: 16
+            micro_batch_size_per_device_for_update: 4
+            model:
+                enable_gradient_checkpointing: true
+                freeze_vision_tower: false
+                model_path: null
+                tokenizer_path: null
+                trust_remote_code: true
+            offload:
+                offload_optimizer: false
+                offload_params: false
+            optim:
+                betas:
+                    - 0.9
+                    - 0.999
+                lr: 1e-06
+                lr_warmup_ratio: 0
+                lr_warmup_steps: null
+                min_lr_ratio: null
+                strategy: adamw
+                training_steps: 1
+                warmup_style: constant
+                weight_decay: 0.01
+            padding_free: false
+            ppo_epochs: 1
+            strategy: fsdp
+            ulysses_size: 1
+        hybrid_engine: true
+        ref:
+            fsdp:
+                enable_cpu_offload: true
+                enable_full_shard: true
+                enable_rank0_init: true
+                fsdp_size: -1
+                mp_buffer_dtype: fp32
+                mp_param_dtype: bf16
+                mp_reduce_dtype: fp32
+                torch_dtype: null
+                use_orig_params: false
+            micro_batch_size_per_device_for_experience: 16
+            offload:
+                offload_optimizer: false
+                offload_params: false
+            padding_free: true
+            strategy: fsdp
+            ulysses_size: 1
+            use_torch_compile: true
+        reward:
+            num_cpus: 1
+            reward_function: /nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py
+            reward_function_name: main
+            reward_type: batch
+            skip_special_tokens: true
+        rollout:
+            disable_log_stats: true
+            disable_tqdm: false
+            dtype: bf16
+            enable_chunked_prefill: false
+            enforce_eager: false
+            gpu_memory_utilization: 0.6
+            ignore_eos: false
+            limit_images: 0
+            max_model_len: null
+            max_num_batched_tokens: 24576
+            "n": 5
+            name: vllm
+            prompt_length: 4096
+            response_length: 16384
+            seed: 1
+            temperature: 1
+            tensor_parallel_size: 1
+            top_k: -1
+            top_p: 0.99
+            trust_remote_code: false
+            val_override_config:
+                "n": 1
+                temperature: 0.5
diff --git a/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/files/output.log b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..0c4d99df5fdcafde7bb5b46a0a00f3a82b5b8160
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/files/output.log
@@ -0,0 +1,2 @@
+{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 1, "unit": "it", "ip": "10.1.4.164", "pid": 223200, "uuid": "ef2384a07bb14e09b722b1dd2e623ad1", "closed": false}
+Start validation...
diff --git a/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/files/requirements.txt
@@ -0,0 +1,295 @@
+colorama==0.4.6
+psutil==7.0.0
+setproctitle==1.2.2
+ipython==8.37.0
+gitdb==4.0.12
+smmap==5.0.2
+pyzmq==27.0.0
+wcwidth==0.2.13
+antlr4-python3-runtime==4.9.3
+streamlit==1.46.1
+opentelemetry-proto==1.26.0
+tiktoken==0.9.0
+MarkupSafe==3.0.2
+openai==1.90.0
+jiter==0.10.0
+markdown-it-py==3.0.0
+rich-toolkit==0.14.8
+PyYAML==6.0.2
+pycountry==24.6.1
+nvidia-cusolver-cu12==11.6.1.9
+codetiming==1.4.0
+text-unidecode==1.3
+aiohttp-cors==0.8.1
+prometheus_client==0.22.1
+pandas==2.3.1
+wrapt==1.17.2
+tifffile==2025.5.10
+vllm==0.8.5.post1
+google-auth==2.40.3
+nvidia-curand-cu12==10.3.5.147
+networkx==3.4.2
+protobuf==4.25.8
+depyf==0.18.0
+altair==5.5.0
+wandb==0.21.0
+opentelemetry-sdk==1.26.0
+nvidia-cufft-cu12==11.2.1.3
+frozenlist==1.7.0
+ninja==1.11.1.4
+anyio==4.9.0
+rignore==0.6.2
+pydantic-extra-types==2.10.5
+tzdata==2025.2
+orjson==3.10.18
+smart_open==7.3.0.post1
+nvidia-cublas-cu12==12.4.5.8
+astor==0.8.1
+uvicorn==0.35.0
+sentry-sdk==2.32.0
+weasel==0.4.1
+opencensus==0.11.4
+certifi==2025.7.14
+nvidia-cuda-nvrtc-cu12==12.4.127
+cupy-cuda12x==13.5.1
+jedi==0.19.2
+GitPython==3.1.44
+xgrammar==0.1.18
+sniffio==1.3.1
+dill==0.3.8
+python-json-logger==3.3.0
+peft==0.16.0
+python-slugify==8.0.4
+watchfiles==1.1.0
+torchaudio==2.6.0
+omegaconf==2.3.0
+interegular==0.3.3
+torchmetrics==1.7.4
+tenacity==9.1.2
+async-timeout==5.0.1
+pybase64==1.4.1
+tqdm==4.67.1
+confection==0.1.5
+absl-py==2.3.1
+six==1.17.0
+colorful==0.5.7
+xxhash==3.5.0
+regex==2024.11.6
+nvidia-cuda-runtime-cu12==12.4.127
+pip==25.1
+annotated-types==0.7.0
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+salesforce-lavis==1.0.2
+fastapi-cloud-cli==0.1.4
+av==15.0.0
+nvidia-nvjitlink-cu12==12.4.127
+lightning-utilities==0.14.3
+pytz==2025.2
+webdataset==1.0.2
+cachetools==5.5.2
+nltk==3.9.1
+prometheus-fastapi-instrumentator==7.1.0
+pexpect==4.9.0
+jsonschema-specifications==2025.4.1
+Jinja2==3.1.6
+nvidia-cusparse-cu12==12.3.1.170
+nvidia-cudnn-cu12==9.1.0.70
+decord==0.6.0
+transformers==4.52.4
+dnspython==2.7.0
+joblib==1.5.1
+kaggle==1.7.4.5
+pyasn1_modules==0.4.2
+httpcore==1.0.9
+accelerate==1.8.1
+psutil==7.0.0
+pycocotools==2.0.10
+lm-format-enforcer==0.10.11
+liger_kernel==0.6.0
+googleapis-common-protos==1.70.0
+idna==3.10
+aiohappyeyeballs==2.6.1
+numba==0.61.2
+tornado==6.5.1
+opentelemetry-semantic-conventions==0.47b0
+torchvision==0.21.0
+exceptiongroup==1.3.0
+cfgv==3.4.0
+py-cpuinfo==9.0.0
+murmurhash==1.0.13
+pillow==11.3.0
+asttokens==3.0.0
+spacy==3.8.7
+blinker==1.9.0
+llguidance==0.7.30
+fastapi==0.116.1
+python-dateutil==2.9.0.post0
+prompt_toolkit==3.0.51
+opentelemetry-api==1.26.0
+referencing==0.36.2
+Pygments==2.19.2
+mpmath==1.3.0
+thinc==8.3.6
+multidict==6.6.3
+python-magic==0.4.27
+fairscale==0.4.4
+nodeenv==1.9.1
+mathruler==0.1.0
+identify==2.6.12
+multiprocess==0.70.16
+ftfy==6.3.1
+spacy-legacy==3.0.12
+rsa==4.9.1
+cymem==2.0.11
+flash-attn==2.7.1.post1
+typing-inspection==0.4.1
+nvidia-cufile-cu12==1.11.1.6
+filelock==3.18.0
+jsonschema==4.24.0
+language_data==1.3.0
+iopath==0.1.10
+cloudpickle==3.1.1
+pre_commit==4.2.0
+python-multipart==0.0.20
+gguf==0.17.1
+toml==0.10.2
+lazy_loader==0.4
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+opencv-python-headless==4.12.0.88
+rouge_score==0.1.2
+portalocker==3.2.0
+diskcache==5.6.3
+pycocoevalcap==1.2
+mdurl==0.1.2
+pure_eval==0.2.3
+ray==2.47.1
+langcodes==3.5.0
+distlib==0.3.9
+pydeck==0.9.1
+traitlets==5.14.3
+aiohttp==3.12.14
+decorator==5.2.1
+opentelemetry-exporter-otlp-proto-http==1.26.0
+verl==0.3.2.dev0
+fsspec==2025.3.0
+pydantic_core==2.33.2
+matplotlib-inline==0.1.7
+httpx==0.28.1
+fastrlock==0.8.3
+zipp==3.23.0
+aiosignal==1.4.0
+uvloop==0.21.0
+opendatasets==0.1.22
+python-dotenv==1.1.1
+attrs==25.3.0
+starlette==0.47.1
+distro==1.9.0
+pyasn1==0.6.1
+plotly==6.2.0
+opencensus-context==0.1.3
+datasets==4.0.0
+bleach==6.2.0
+hf-xet==1.1.5
+pyvers==0.1.0
+rich==14.0.0
+pylatexenc==2.10
+tensordict==0.9.1
+urllib3==2.5.0
+imageio==2.37.0
+platformdirs==4.3.8
+preshed==3.0.10
+catalogue==2.0.10
+h11==0.16.0
+outlines_core==0.1.26
+wasabi==1.1.3
+proto-plus==1.26.1
+scikit-image==0.25.2
+blis==1.3.0
+fastapi-cli==0.0.8
+opentelemetry-exporter-prometheus==0.56b0
+opentelemetry-exporter-otlp==1.26.0
+compressed-tensors==0.9.3
+pyarrow==20.0.0
+opentelemetry-semantic-conventions-ai==0.4.11
+partial-json-parser==0.2.1.1.post6
+parso==0.8.4
+importlib_metadata==8.0.0
+tokenizers==0.21.2
+opentelemetry-exporter-otlp-proto-common==1.26.0
+torchdata==0.11.0
+py-spy==0.4.0
+propcache==0.3.2
+braceexpand==0.1.7
+numpy==2.2.6
+cloudpathlib==0.21.1
+email_validator==2.2.0
+srsly==2.5.1
+webencodings==0.5.1
+airportsdata==20250706
+rpds-py==0.26.0
+outlines==0.1.11
+packaging==25.0
+yarl==1.20.1
+nvidia-cuda-cupti-cu12==12.4.127
+typing_extensions==4.14.1
+pydantic==2.11.7
+xformers==0.0.29.post2
+einops==0.8.1
+grpcio==1.73.1
+setuptools==78.1.1
+httptools==0.6.4
+nvidia-nccl-cu12==2.21.5
+Deprecated==1.2.18
+ptyprocess==0.7.0
+websockets==15.0.1
+torch==2.6.0
+scipy==1.15.3
+typer==0.16.0
+pytorch-lightning==2.5.2
+virtualenv==20.31.2
+huggingface-hub==0.33.4
+contexttimer==0.3.3
+marisa-trie==1.2.1
+shellingham==1.5.4
+charset-normalizer==3.4.2
+nest-asyncio==1.6.0
+executing==2.2.0
+stack-data==0.6.3
+msgpack==1.1.1
+blake3==1.0.5
+narwhals==1.47.0
+sentencepiece==0.2.0
+llvmlite==0.44.0
+click==8.2.1
+lark==1.2.2
+google-api-core==2.25.1
+sympy==1.13.1
+wheel==0.45.1
+safetensors==0.5.3
+mistral_common==1.8.0
+triton==3.2.0
+msgspec==0.19.0
+watchdog==6.0.0
+requests==2.32.4
+spacy-loggers==1.0.5
+timm==0.4.12
+qwen-vl-utils==0.0.11
+modelscope==1.28.0
+verl==0.3.2.dev0
+jaraco.text==3.12.1
+autocommand==2.2.2
+packaging==24.2
+jaraco.context==5.3.0
+tomli==2.0.1
+typeguard==4.3.0
+zipp==3.19.2
+backports.tarfile==1.2.0
+typing_extensions==4.12.2
+jaraco.collections==5.1.0
+inflect==7.3.1
+more-itertools==10.3.0
+jaraco.functools==4.0.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+wheel==0.45.1
diff --git a/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..1505160ce482dc9c6444d5deb1fd3dd7a7b326b6
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/files/wandb-metadata.json
@@ -0,0 +1,92 @@
+{
+  "os":  "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.0",
+  "startedAt":  "2025-07-20T11:08:29.360965Z",
+  "args":  [
+    "--node-ip-address=10.1.4.164",
+    "--node-manager-port=38935",
+    "--object-store-name=/tmp/ray/session_2025-07-20_19-06-49_559805_218438/sockets/plasma_store",
+    "--raylet-name=/tmp/ray/session_2025-07-20_19-06-49_559805_218438/sockets/raylet",
+    "--redis-address=None",
+    "--metrics-agent-port=44894",
+    "--logging-rotate-bytes=536870912",
+    "--logging-rotate-backup-count=5",
+    "--runtime-env-agent-port=41153",
+    "--gcs-address=10.1.4.164:48225",
+    "--session-name=session_2025-07-20_19-06-49_559805_218438",
+    "--temp-dir=/tmp/ray",
+    "--webui=127.0.0.1:8265",
+    "--cluster-id=cf15bbb8c672ce3f59302373bb0e3c555a73db416b5edbf38e5c645f",
+    "--startup-token=64",
+    "--worker-launch-time-ms=1753009612034",
+    "--node-id=c982adee0b3c4e6317899ac6dd65aa17e75b59bb373b3d128c71698a",
+    "--runtime-env-hash=-115784934",
+    "--enable-resource-isolation=false"
+  ],
+  "program":  "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
+  "git":  {
+    "remote":  "https://github.com/PorUna-byte/PAR.git",
+    "commit":  "b8caf406aa1699c788f0ca6e44a1769452c317db"
+  },
+  "email":  "gia0603yucca@gmail.com",
+  "root":  "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
+  "host":  "dsw-266702-557cd69888-g24kv",
+  "executable":  "/root/miniconda3/envs/easyr1-new/bin/python3",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA A800-SXM4-80GB",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1623302262784",
+      "used":  "1179754496"
+    }
+  },
+  "memory":  {
+    "total":  "549755813888"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-dd05c780-0a19-17fd-c584-d09f4318f680"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-2782427d-b86d-e7c7-247f-edefe39eec71"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-52755b3d-d761-c869-0d08-300873ba5f02"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-90465488-5319-9508-9e8a-b3b12918be35"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a"
+    }
+  ],
+  "cudaVersion":  "12.1",
+  "writerId":  "t6vyfkz6zns915ezpi7icvj23e02z4q0"
+}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/files/wandb-summary.json b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..97d29b9d12b49eb80fef4dfe6cc237926a61eadd
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":86},"_runtime":86}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..eea55cfe26731ac5b22b4d89ce313358f3d83848
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/logs/debug-internal.log
@@ -0,0 +1,13 @@
+{"time":"2025-07-20T19:08:29.724190471+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-20T19:08:32.806897652+08:00","level":"INFO","msg":"stream: created new stream","id":"rjowgpxz"}
+{"time":"2025-07-20T19:08:32.816321489+08:00","level":"INFO","msg":"sender: started","stream_id":"rjowgpxz"}
+{"time":"2025-07-20T19:08:32.816336671+08:00","level":"INFO","msg":"stream: started","id":"rjowgpxz"}
+{"time":"2025-07-20T19:08:32.816359861+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"rjowgpxz"}
+{"time":"2025-07-20T19:08:32.816322496+08:00","level":"INFO","msg":"handler: started","stream_id":"rjowgpxz"}
+{"time":"2025-07-20T19:10:01.737560141+08:00","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"uploading wandb-metadata.json","runtime_seconds":86.389395114},{"desc":"uploading requirements.txt","runtime_seconds":86.151408678}],"total_operations":2}}
+{"time":"2025-07-20T19:10:45.930239641+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-07-20T19:10:52.409320897+08:00","level":"INFO","msg":"stream: closing","id":"rjowgpxz"}
+{"time":"2025-07-20T19:10:52.409346873+08:00","level":"INFO","msg":"handler: closed","stream_id":"rjowgpxz"}
+{"time":"2025-07-20T19:10:52.409355912+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"rjowgpxz"}
+{"time":"2025-07-20T19:10:52.409369828+08:00","level":"INFO","msg":"sender: closed","stream_id":"rjowgpxz"}
+{"time":"2025-07-20T19:10:52.417363691+08:00","level":"INFO","msg":"stream: closed","id":"rjowgpxz"}
diff --git a/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..900deb9f09c558d90f460fe18314e902afb8c503
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/logs/debug.log
@@ -0,0 +1,28 @@
+2025-07-20 19:08:29,503 INFO    MainThread:223200 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-20 19:08:29,503 INFO    MainThread:223200 [wandb_setup.py:_flush():80] Configure stats pid to 223200
+2025-07-20 19:08:29,503 INFO    MainThread:223200 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-20 19:08:29,503 INFO    MainThread:223200 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-20 19:08:29,503 INFO    MainThread:223200 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-20 19:08:29,503 INFO    MainThread:223200 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/logs/debug.log
+2025-07-20 19:08:29,503 INFO    MainThread:223200 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/logs/debug-internal.log
+2025-07-20 19:08:29,503 INFO    MainThread:223200 [wandb_init.py:init():830] calling init triggers
+2025-07-20 19:08:29,503 INFO    MainThread:223200 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-20 19:08:29,503 INFO    MainThread:223200 [wandb_init.py:init():871] starting backend
+2025-07-20 19:08:29,711 INFO    MainThread:223200 [wandb_init.py:init():874] sending inform_init request
+2025-07-20 19:08:29,713 INFO    MainThread:223200 [wandb_init.py:init():882] backend started and connected
+2025-07-20 19:08:29,719 INFO    MainThread:223200 [wandb_init.py:init():953] updated telemetry
+2025-07-20 19:08:29,812 INFO    MainThread:223200 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-20 19:08:34,079 INFO    MainThread:223200 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-20 19:08:34,261 INFO    MainThread:223200 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-20 19:08:34,261 INFO    MainThread:223200 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-20 19:08:34,302 INFO    MainThread:223200 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-20 19:08:34,302 INFO    MainThread:223200 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-20 19:08:34,306 INFO    MainThread:223200 [wandb_init.py:init():1075] run started, returning control to user process
+2025-07-20 19:10:00,647 INFO    MainThread:223200 [wandb_run.py:_finish():2224] finishing run gia0603yucca/easy_r1/rjowgpxz
+2025-07-20 19:10:00,659 INFO    MainThread:223200 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0
+2025-07-20 19:10:00,735 INFO    MainThread:223200 [wandb_run.py:_restore():2405] restore
+2025-07-20 19:10:00,735 INFO    MainThread:223200 [wandb_run.py:_restore():2411] restore done
+2025-07-20 19:10:52,381 INFO    MainThread:223200 [wandb_run.py:_footer_history_summary_info():3903] rendering history
+2025-07-20 19:10:52,383 INFO    MainThread:223200 [wandb_run.py:_footer_history_summary_info():3935] rendering summary
+2025-07-20 19:10:52,383 INFO    MainThread:223200 [wandb_run.py:_footer_sync_info():3864] logging synced files
diff --git a/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/run-rjowgpxz.wandb b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/run-rjowgpxz.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..cecbffade790ebac3e17e1d9f0bba9ee401f80b5
Binary files /dev/null and b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/run-rjowgpxz.wandb differ
diff --git a/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/files/config.yaml b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9e14feab9091f8abf1d289eb42a01549d5d0779b
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/files/config.yaml
@@ -0,0 +1,322 @@
+_wandb:
+    value:
+        cli_version: 0.21.0
+        e:
+            bmyd1r1vwx9pfo07osl5tt1bsnz5nor2:
+                args:
+                    - --node-ip-address=10.1.4.164
+                    - --node-manager-port=37981
+                    - --object-store-name=/tmp/ray/session_2025-07-20_19-12-49_683982_227299/sockets/plasma_store
+                    - --raylet-name=/tmp/ray/session_2025-07-20_19-12-49_683982_227299/sockets/raylet
+                    - --redis-address=None
+                    - --metrics-agent-port=60823
+                    - --logging-rotate-bytes=536870912
+                    - --logging-rotate-backup-count=5
+                    - --runtime-env-agent-port=60965
+                    - --gcs-address=10.1.4.164:60942
+                    - --session-name=session_2025-07-20_19-12-49_683982_227299
+                    - --temp-dir=/tmp/ray
+                    - --webui=127.0.0.1:8265
+                    - --cluster-id=edbd7ce8cd4b448a3a0fe384bbef5900718e1aba5abd1864af5145b8
+                    - --startup-token=64
+                    - --worker-launch-time-ms=1753009973102
+                    - --node-id=c7c2fc8754cd7e338d6c3cc50f0b96b730630420a2b67812b4dbe2f2
+                    - --runtime-env-hash=-115784934
+                    - --enable-resource-isolation=false
+                cpu_count: 64
+                cpu_count_logical: 64
+                cudaVersion: "12.1"
+                disk:
+                    /:
+                        total: "1623302262784"
+                        used: "1179848704"
+                email: gia0603yucca@gmail.com
+                executable: /root/miniconda3/envs/easyr1-new/bin/python3
+                git:
+                    commit: b8caf406aa1699c788f0ca6e44a1769452c317db
+                    remote: https://github.com/PorUna-byte/PAR.git
+                gpu: NVIDIA A800-SXM4-80GB
+                gpu_count: 8
+                gpu_nvidia:
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-dd05c780-0a19-17fd-c584-d09f4318f680
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-2782427d-b86d-e7c7-247f-edefe39eec71
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-52755b3d-d761-c869-0d08-300873ba5f02
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-5927b565-1506-34c6-eb8d-d657ed5f3558
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-90465488-5319-9508-9e8a-b3b12918be35
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-855bcb80-e48f-cdd2-f62d-35138b69089a
+                host: dsw-266702-557cd69888-g24kv
+                memory:
+                    total: "549755813888"
+                os: Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35
+                program: /root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py
+                python: CPython 3.10.0
+                root: /nas/shared/kilab/wangyujia/EasyR1-new/examples
+                startedAt: "2025-07-20T11:14:42.544523Z"
+                writerId: bmyd1r1vwx9pfo07osl5tt1bsnz5nor2
+        m: []
+        python_version: 3.10.0
+        t:
+            "1":
+                - 1
+                - 9
+                - 11
+                - 30
+                - 33
+                - 41
+                - 49
+                - 51
+                - 63
+                - 71
+                - 95
+                - 98
+                - 103
+                - 105
+            "2":
+                - 1
+                - 9
+                - 11
+                - 30
+                - 33
+                - 41
+                - 49
+                - 51
+                - 63
+                - 71
+                - 95
+                - 98
+                - 103
+                - 105
+            "3":
+                - 2
+                - 13
+                - 16
+            "4": 3.10.0
+            "5": 0.21.0
+            "6": 4.52.4
+            "12": 0.21.0
+            "13": linux-x86_64
+algorithm:
+    value:
+        adv_estimator: grpo
+        disable_kl: false
+        filter_high: 0.99
+        filter_key: overall
+        filter_low: 0.01
+        gamma: 1
+        kl_coef: 0.01
+        kl_horizon: 10000
+        kl_penalty: low_var_kl
+        kl_target: 0.1
+        kl_type: fixed
+        lam: 1
+        online_filtering: false
+        use_kl_loss: true
+data:
+    value:
+        answer_key: answer
+        filter_overlong_prompts: true
+        filter_overlong_prompts_workers: 16
+        format_prompt: /nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja
+        image_dir: null
+        image_key: images
+        max_pixels: 4194304
+        max_prompt_length: 4096
+        max_response_length: 16384
+        min_pixels: 262144
+        mini_rollout_batch_size: null
+        override_chat_template: null
+        prompt_key: question
+        protein_key: protein
+        rollout_batch_size: 128
+        seed: 1
+        shuffle: true
+        train_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl
+        val_batch_size: 256
+        val_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl
+        video_fps: 2
+        video_key: videos
+trainer:
+    value:
+        critic_warmup: 0
+        experiment_name: qwen2.5_7b_bio_06182042
+        load_checkpoint_path: null
+        logger:
+            - console
+            - wandb
+        max_steps: null
+        max_try_make_batch: 20
+        n_gpus_per_node: 8
+        nnodes: 1
+        project_name: easy_r1
+        save_checkpoint_path: /oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042
+        save_freq: 5
+        save_limit: 3
+        save_model_only: false
+        total_epochs: 1
+        val_before_train: true
+        val_freq: 5
+        val_generations_to_log: 3
+        val_only: false
+worker:
+    value:
+        actor:
+            clip_ratio_dual: 3
+            clip_ratio_high: 0.3
+            clip_ratio_low: 0.2
+            disable_kl: false
+            fsdp:
+                enable_cpu_offload: false
+                enable_full_shard: true
+                enable_rank0_init: true
+                fsdp_size: -1
+                mp_buffer_dtype: fp32
+                mp_param_dtype: bf16
+                mp_reduce_dtype: fp32
+                torch_dtype: null
+                use_orig_params: false
+            global_batch_size: 64
+            global_batch_size_per_device: -1
+            kl_coef: 0.01
+            kl_penalty: low_var_kl
+            loss_avg_mode: token
+            max_grad_norm: 1
+            micro_batch_size_per_device_for_experience: 16
+            micro_batch_size_per_device_for_update: 2
+            model:
+                enable_gradient_checkpointing: true
+                freeze_vision_tower: false
+                model_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model
+                tokenizer_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model
+                trust_remote_code: false
+            offload:
+                offload_optimizer: true
+                offload_params: true
+            optim:
+                betas:
+                    - 0.9
+                    - 0.999
+                lr: 1e-06
+                lr_warmup_ratio: 0
+                lr_warmup_steps: null
+                min_lr_ratio: null
+                strategy: adamw
+                training_steps: 1
+                warmup_style: constant
+                weight_decay: 0.01
+            padding_free: true
+            ppo_epochs: 1
+            strategy: fsdp
+            ulysses_size: 1
+            use_kl_loss: true
+            use_torch_compile: true
+        critic:
+            cliprange_value: 0.5
+            fsdp:
+                enable_cpu_offload: false
+                enable_full_shard: true
+                enable_rank0_init: true
+                fsdp_size: -1
+                mp_buffer_dtype: fp32
+                mp_param_dtype: bf16
+                mp_reduce_dtype: fp32
+                torch_dtype: null
+                use_orig_params: false
+            global_batch_size: 256
+            global_batch_size_per_device: -1
+            loss_avg_mode: token
+            max_grad_norm: 1
+            micro_batch_size_per_device_for_experience: 16
+            micro_batch_size_per_device_for_update: 4
+            model:
+                enable_gradient_checkpointing: true
+                freeze_vision_tower: false
+                model_path: null
+                tokenizer_path: null
+                trust_remote_code: true
+            offload:
+                offload_optimizer: false
+                offload_params: false
+            optim:
+                betas:
+                    - 0.9
+                    - 0.999
+                lr: 1e-06
+                lr_warmup_ratio: 0
+                lr_warmup_steps: null
+                min_lr_ratio: null
+                strategy: adamw
+                training_steps: 1
+                warmup_style: constant
+                weight_decay: 0.01
+            padding_free: false
+            ppo_epochs: 1
+            strategy: fsdp
+            ulysses_size: 1
+        hybrid_engine: true
+        ref:
+            fsdp:
+                enable_cpu_offload: true
+                enable_full_shard: true
+                enable_rank0_init: true
+                fsdp_size: -1
+                mp_buffer_dtype: fp32
+                mp_param_dtype: bf16
+                mp_reduce_dtype: fp32
+                torch_dtype: null
+                use_orig_params: false
+            micro_batch_size_per_device_for_experience: 16
+            offload:
+                offload_optimizer: false
+                offload_params: false
+            padding_free: true
+            strategy: fsdp
+            ulysses_size: 1
+            use_torch_compile: true
+        reward:
+            num_cpus: 1
+            reward_function: /nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py
+            reward_function_name: main
+            reward_type: batch
+            skip_special_tokens: true
+        rollout:
+            disable_log_stats: true
+            disable_tqdm: false
+            dtype: bf16
+            enable_chunked_prefill: false
+            enforce_eager: false
+            gpu_memory_utilization: 0.6
+            ignore_eos: false
+            limit_images: 0
+            max_model_len: null
+            max_num_batched_tokens: 24576
+            "n": 5
+            name: vllm
+            prompt_length: 4096
+            response_length: 16384
+            seed: 1
+            temperature: 1
+            tensor_parallel_size: 1
+            top_k: -1
+            top_p: 0.99
+            trust_remote_code: false
+            val_override_config:
+                "n": 1
+                temperature: 0.5
diff --git a/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/files/output.log b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..05ac641fade83f7651fb358da40f4042cd81f9a1
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/files/output.log
@@ -0,0 +1,48 @@
+{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 1, "unit": "it", "ip": "10.1.4.164", "pid": 232058, "uuid": "2b6c8b0cab6d4bbb95f53b8629aa9dcb", "closed": false}
+Start validation...
+key
+prot_embeds
+value
+tensor([[[ 0.1386, -0.2522,  0.0733,  ...,  0.5373,  0.6044, -0.6031],
+         [ 0.1386, -0.2522,  0.0733,  ...,  0.5373,  0.6044, -0.6031],
+         [ 0.1386, -0.2522,  0.0733,  ...,  0.5373,  0.6044, -0.6031],
+         ...,
+         [ 0.1386, -0.2522,  0.0733,  ...,  0.5373,  0.6044, -0.6031],
+         [ 0.1386, -0.2522,  0.0733,  ...,  0.5373,  0.6044, -0.6031],
+         [ 0.1386, -0.2522,  0.0733,  ...,  0.5373,  0.6044, -0.6031]],
+
+        [[-0.0670,  0.1320,  0.0262,  ...,  0.0592, -0.0983,  0.1597],
+         [-0.0670,  0.1320,  0.0262,  ...,  0.0592, -0.0983,  0.1597],
+         [-0.0670,  0.1320,  0.0262,  ...,  0.0592, -0.0983,  0.1597],
+         ...,
+         [-0.0670,  0.1320,  0.0262,  ...,  0.0592, -0.0983,  0.1597],
+         [-0.0670,  0.1320,  0.0262,  ...,  0.0592, -0.0983,  0.1597],
+         [-0.0670,  0.1320,  0.0262,  ...,  0.0592, -0.0983,  0.1597]],
+
+        [[ 0.2223,  0.0633,  0.3426,  ...,  0.4689,  0.0807,  0.2240],
+         [ 0.2223,  0.0633,  0.3426,  ...,  0.4689,  0.0807,  0.2240],
+         [ 0.2223,  0.0633,  0.3426,  ...,  0.4689,  0.0807,  0.2240],
+         ...,
+         [ 0.2223,  0.0633,  0.3426,  ...,  0.4689,  0.0807,  0.2240],
+         [ 0.2223,  0.0633,  0.3426,  ...,  0.4689,  0.0807,  0.2240],
+         [ 0.2223,  0.0633,  0.3426,  ...,  0.4689,  0.0807,  0.2240]],
+
+        [[-0.1802, -0.4834,  0.1041,  ..., -0.1436, -0.3784, -0.5227],
+         [-0.1802, -0.4834,  0.1041,  ..., -0.1436, -0.3784, -0.5227],
+         [-0.1802, -0.4834,  0.1041,  ..., -0.1436, -0.3784, -0.5227],
+         ...,
+         [-0.1802, -0.4834,  0.1041,  ..., -0.1436, -0.3784, -0.5227],
+         [-0.1802, -0.4834,  0.1041,  ..., -0.1436, -0.3784, -0.5227],
+         [-0.1802, -0.4834,  0.1041,  ..., -0.1436, -0.3784, -0.5227]],
+
+        [[ 0.0572,  0.1415,  0.0391,  ...,  0.1771,  0.2956,  0.6073],
+         [ 0.0572,  0.1415,  0.0391,  ...,  0.1771,  0.2956,  0.6073],
+         [ 0.0572,  0.1415,  0.0391,  ...,  0.1771,  0.2956,  0.6073],
+         ...,
+         [ 0.0572,  0.1415,  0.0391,  ...,  0.1771,  0.2956,  0.6073],
+         [ 0.0572,  0.1415,  0.0391,  ...,  0.1771,  0.2956,  0.6073],
+         [ 0.0572,  0.1415,  0.0391,  ...,  0.1771,  0.2956,  0.6073]]])
+key
+prompt_input_ids
+value
+[tensor([[ 11190,    311,    279,  ..., 151665, 151665, 151665]]), tensor([[ 11190,    311,    279,  ..., 151665, 151665, 151665]]), tensor([[ 11190,    311,    279,  ..., 151665, 151665, 151665]]), tensor([[ 11190,    311,    279,  ..., 151665, 151665, 151665]]), tensor([[ 11190,    311,    279,  ..., 151665, 151665, 151665]])]
diff --git a/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/files/requirements.txt
@@ -0,0 +1,295 @@
+colorama==0.4.6
+psutil==7.0.0
+setproctitle==1.2.2
+ipython==8.37.0
+gitdb==4.0.12
+smmap==5.0.2
+pyzmq==27.0.0
+wcwidth==0.2.13
+antlr4-python3-runtime==4.9.3
+streamlit==1.46.1
+opentelemetry-proto==1.26.0
+tiktoken==0.9.0
+MarkupSafe==3.0.2
+openai==1.90.0
+jiter==0.10.0
+markdown-it-py==3.0.0
+rich-toolkit==0.14.8
+PyYAML==6.0.2
+pycountry==24.6.1
+nvidia-cusolver-cu12==11.6.1.9
+codetiming==1.4.0
+text-unidecode==1.3
+aiohttp-cors==0.8.1
+prometheus_client==0.22.1
+pandas==2.3.1
+wrapt==1.17.2
+tifffile==2025.5.10
+vllm==0.8.5.post1
+google-auth==2.40.3
+nvidia-curand-cu12==10.3.5.147
+networkx==3.4.2
+protobuf==4.25.8
+depyf==0.18.0
+altair==5.5.0
+wandb==0.21.0
+opentelemetry-sdk==1.26.0
+nvidia-cufft-cu12==11.2.1.3
+frozenlist==1.7.0
+ninja==1.11.1.4
+anyio==4.9.0
+rignore==0.6.2
+pydantic-extra-types==2.10.5
+tzdata==2025.2
+orjson==3.10.18
+smart_open==7.3.0.post1
+nvidia-cublas-cu12==12.4.5.8
+astor==0.8.1
+uvicorn==0.35.0
+sentry-sdk==2.32.0
+weasel==0.4.1
+opencensus==0.11.4
+certifi==2025.7.14
+nvidia-cuda-nvrtc-cu12==12.4.127
+cupy-cuda12x==13.5.1
+jedi==0.19.2
+GitPython==3.1.44
+xgrammar==0.1.18
+sniffio==1.3.1
+dill==0.3.8
+python-json-logger==3.3.0
+peft==0.16.0
+python-slugify==8.0.4
+watchfiles==1.1.0
+torchaudio==2.6.0
+omegaconf==2.3.0
+interegular==0.3.3
+torchmetrics==1.7.4
+tenacity==9.1.2
+async-timeout==5.0.1
+pybase64==1.4.1
+tqdm==4.67.1
+confection==0.1.5
+absl-py==2.3.1
+six==1.17.0
+colorful==0.5.7
+xxhash==3.5.0
+regex==2024.11.6
+nvidia-cuda-runtime-cu12==12.4.127
+pip==25.1
+annotated-types==0.7.0
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+salesforce-lavis==1.0.2
+fastapi-cloud-cli==0.1.4
+av==15.0.0
+nvidia-nvjitlink-cu12==12.4.127
+lightning-utilities==0.14.3
+pytz==2025.2
+webdataset==1.0.2
+cachetools==5.5.2
+nltk==3.9.1
+prometheus-fastapi-instrumentator==7.1.0
+pexpect==4.9.0
+jsonschema-specifications==2025.4.1
+Jinja2==3.1.6
+nvidia-cusparse-cu12==12.3.1.170
+nvidia-cudnn-cu12==9.1.0.70
+decord==0.6.0
+transformers==4.52.4
+dnspython==2.7.0
+joblib==1.5.1
+kaggle==1.7.4.5
+pyasn1_modules==0.4.2
+httpcore==1.0.9
+accelerate==1.8.1
+psutil==7.0.0
+pycocotools==2.0.10
+lm-format-enforcer==0.10.11
+liger_kernel==0.6.0
+googleapis-common-protos==1.70.0
+idna==3.10
+aiohappyeyeballs==2.6.1
+numba==0.61.2
+tornado==6.5.1
+opentelemetry-semantic-conventions==0.47b0
+torchvision==0.21.0
+exceptiongroup==1.3.0
+cfgv==3.4.0
+py-cpuinfo==9.0.0
+murmurhash==1.0.13
+pillow==11.3.0
+asttokens==3.0.0
+spacy==3.8.7
+blinker==1.9.0
+llguidance==0.7.30
+fastapi==0.116.1
+python-dateutil==2.9.0.post0
+prompt_toolkit==3.0.51
+opentelemetry-api==1.26.0
+referencing==0.36.2
+Pygments==2.19.2
+mpmath==1.3.0
+thinc==8.3.6
+multidict==6.6.3
+python-magic==0.4.27
+fairscale==0.4.4
+nodeenv==1.9.1
+mathruler==0.1.0
+identify==2.6.12
+multiprocess==0.70.16
+ftfy==6.3.1
+spacy-legacy==3.0.12
+rsa==4.9.1
+cymem==2.0.11
+flash-attn==2.7.1.post1
+typing-inspection==0.4.1
+nvidia-cufile-cu12==1.11.1.6
+filelock==3.18.0
+jsonschema==4.24.0
+language_data==1.3.0
+iopath==0.1.10
+cloudpickle==3.1.1
+pre_commit==4.2.0
+python-multipart==0.0.20
+gguf==0.17.1
+toml==0.10.2
+lazy_loader==0.4
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+opencv-python-headless==4.12.0.88
+rouge_score==0.1.2
+portalocker==3.2.0
+diskcache==5.6.3
+pycocoevalcap==1.2
+mdurl==0.1.2
+pure_eval==0.2.3
+ray==2.47.1
+langcodes==3.5.0
+distlib==0.3.9
+pydeck==0.9.1
+traitlets==5.14.3
+aiohttp==3.12.14
+decorator==5.2.1
+opentelemetry-exporter-otlp-proto-http==1.26.0
+verl==0.3.2.dev0
+fsspec==2025.3.0
+pydantic_core==2.33.2
+matplotlib-inline==0.1.7
+httpx==0.28.1
+fastrlock==0.8.3
+zipp==3.23.0
+aiosignal==1.4.0
+uvloop==0.21.0
+opendatasets==0.1.22
+python-dotenv==1.1.1
+attrs==25.3.0
+starlette==0.47.1
+distro==1.9.0
+pyasn1==0.6.1
+plotly==6.2.0
+opencensus-context==0.1.3
+datasets==4.0.0
+bleach==6.2.0
+hf-xet==1.1.5
+pyvers==0.1.0
+rich==14.0.0
+pylatexenc==2.10
+tensordict==0.9.1
+urllib3==2.5.0
+imageio==2.37.0
+platformdirs==4.3.8
+preshed==3.0.10
+catalogue==2.0.10
+h11==0.16.0
+outlines_core==0.1.26
+wasabi==1.1.3
+proto-plus==1.26.1
+scikit-image==0.25.2
+blis==1.3.0
+fastapi-cli==0.0.8
+opentelemetry-exporter-prometheus==0.56b0
+opentelemetry-exporter-otlp==1.26.0
+compressed-tensors==0.9.3
+pyarrow==20.0.0
+opentelemetry-semantic-conventions-ai==0.4.11
+partial-json-parser==0.2.1.1.post6
+parso==0.8.4
+importlib_metadata==8.0.0
+tokenizers==0.21.2
+opentelemetry-exporter-otlp-proto-common==1.26.0
+torchdata==0.11.0
+py-spy==0.4.0
+propcache==0.3.2
+braceexpand==0.1.7
+numpy==2.2.6
+cloudpathlib==0.21.1
+email_validator==2.2.0
+srsly==2.5.1
+webencodings==0.5.1
+airportsdata==20250706
+rpds-py==0.26.0
+outlines==0.1.11
+packaging==25.0
+yarl==1.20.1
+nvidia-cuda-cupti-cu12==12.4.127
+typing_extensions==4.14.1
+pydantic==2.11.7
+xformers==0.0.29.post2
+einops==0.8.1
+grpcio==1.73.1
+setuptools==78.1.1
+httptools==0.6.4
+nvidia-nccl-cu12==2.21.5
+Deprecated==1.2.18
+ptyprocess==0.7.0
+websockets==15.0.1
+torch==2.6.0
+scipy==1.15.3
+typer==0.16.0
+pytorch-lightning==2.5.2
+virtualenv==20.31.2
+huggingface-hub==0.33.4
+contexttimer==0.3.3
+marisa-trie==1.2.1
+shellingham==1.5.4
+charset-normalizer==3.4.2
+nest-asyncio==1.6.0
+executing==2.2.0
+stack-data==0.6.3
+msgpack==1.1.1
+blake3==1.0.5
+narwhals==1.47.0
+sentencepiece==0.2.0
+llvmlite==0.44.0
+click==8.2.1
+lark==1.2.2
+google-api-core==2.25.1
+sympy==1.13.1
+wheel==0.45.1
+safetensors==0.5.3
+mistral_common==1.8.0
+triton==3.2.0
+msgspec==0.19.0
+watchdog==6.0.0
+requests==2.32.4
+spacy-loggers==1.0.5
+timm==0.4.12
+qwen-vl-utils==0.0.11
+modelscope==1.28.0
+verl==0.3.2.dev0
+jaraco.text==3.12.1
+autocommand==2.2.2
+packaging==24.2
+jaraco.context==5.3.0
+tomli==2.0.1
+typeguard==4.3.0
+zipp==3.19.2
+backports.tarfile==1.2.0
+typing_extensions==4.12.2
+jaraco.collections==5.1.0
+inflect==7.3.1
+more-itertools==10.3.0
+jaraco.functools==4.0.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+wheel==0.45.1
diff --git a/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..538e7e992c65cd5c7c2435544be5a9f46773e169
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/files/wandb-metadata.json
@@ -0,0 +1,92 @@
+{
+  "os":  "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.0",
+  "startedAt":  "2025-07-20T11:14:42.544523Z",
+  "args":  [
+    "--node-ip-address=10.1.4.164",
+    "--node-manager-port=37981",
+    "--object-store-name=/tmp/ray/session_2025-07-20_19-12-49_683982_227299/sockets/plasma_store",
+    "--raylet-name=/tmp/ray/session_2025-07-20_19-12-49_683982_227299/sockets/raylet",
+    "--redis-address=None",
+    "--metrics-agent-port=60823",
+    "--logging-rotate-bytes=536870912",
+    "--logging-rotate-backup-count=5",
+    "--runtime-env-agent-port=60965",
+    "--gcs-address=10.1.4.164:60942",
+    "--session-name=session_2025-07-20_19-12-49_683982_227299",
+    "--temp-dir=/tmp/ray",
+    "--webui=127.0.0.1:8265",
+    "--cluster-id=edbd7ce8cd4b448a3a0fe384bbef5900718e1aba5abd1864af5145b8",
+    "--startup-token=64",
+    "--worker-launch-time-ms=1753009973102",
+    "--node-id=c7c2fc8754cd7e338d6c3cc50f0b96b730630420a2b67812b4dbe2f2",
+    "--runtime-env-hash=-115784934",
+    "--enable-resource-isolation=false"
+  ],
+  "program":  "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
+  "git":  {
+    "remote":  "https://github.com/PorUna-byte/PAR.git",
+    "commit":  "b8caf406aa1699c788f0ca6e44a1769452c317db"
+  },
+  "email":  "gia0603yucca@gmail.com",
+  "root":  "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
+  "host":  "dsw-266702-557cd69888-g24kv",
+  "executable":  "/root/miniconda3/envs/easyr1-new/bin/python3",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA A800-SXM4-80GB",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1623302262784",
+      "used":  "1179848704"
+    }
+  },
+  "memory":  {
+    "total":  "549755813888"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-dd05c780-0a19-17fd-c584-d09f4318f680"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-2782427d-b86d-e7c7-247f-edefe39eec71"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-52755b3d-d761-c869-0d08-300873ba5f02"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-90465488-5319-9508-9e8a-b3b12918be35"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a"
+    }
+  ],
+  "cudaVersion":  "12.1",
+  "writerId":  "bmyd1r1vwx9pfo07osl5tt1bsnz5nor2"
+}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/files/wandb-summary.json b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf55868a8b18bdd0746b03671b2168432b325fcb
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_runtime":28,"_wandb":{"runtime":28}}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..2d1149c77a97ba2b49de7b4387c09c046fef4cb8
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/logs/debug-internal.log
@@ -0,0 +1,13 @@
+{"time":"2025-07-20T19:14:42.896313208+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-20T19:14:45.107007409+08:00","level":"INFO","msg":"stream: created new stream","id":"18c4gjei"}
+{"time":"2025-07-20T19:14:45.107045881+08:00","level":"INFO","msg":"stream: started","id":"18c4gjei"}
+{"time":"2025-07-20T19:14:45.107089127+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"18c4gjei"}
+{"time":"2025-07-20T19:14:45.107117209+08:00","level":"INFO","msg":"sender: started","stream_id":"18c4gjei"}
+{"time":"2025-07-20T19:14:45.107141507+08:00","level":"INFO","msg":"handler: started","stream_id":"18c4gjei"}
+{"time":"2025-07-20T19:15:15.899922253+08:00","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"updating run metadata","runtime_seconds":1.0589074}],"total_operations":1}}
+{"time":"2025-07-20T19:15:23.137185078+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-07-20T19:15:47.140064574+08:00","level":"INFO","msg":"stream: closing","id":"18c4gjei"}
+{"time":"2025-07-20T19:15:47.141573102+08:00","level":"INFO","msg":"handler: closed","stream_id":"18c4gjei"}
+{"time":"2025-07-20T19:15:47.141588801+08:00","level":"INFO","msg":"sender: closed","stream_id":"18c4gjei"}
+{"time":"2025-07-20T19:15:47.14158413+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"18c4gjei"}
+{"time":"2025-07-20T19:15:47.147232995+08:00","level":"INFO","msg":"stream: closed","id":"18c4gjei"}
diff --git a/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..f4c271aaad7e334e366ed34c0cd9747be734dbed
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/logs/debug.log
@@ -0,0 +1,28 @@
+2025-07-20 19:14:42,658 INFO    MainThread:232058 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-20 19:14:42,658 INFO    MainThread:232058 [wandb_setup.py:_flush():80] Configure stats pid to 232058
+2025-07-20 19:14:42,658 INFO    MainThread:232058 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-20 19:14:42,658 INFO    MainThread:232058 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-20 19:14:42,658 INFO    MainThread:232058 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-20 19:14:42,658 INFO    MainThread:232058 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/logs/debug.log
+2025-07-20 19:14:42,658 INFO    MainThread:232058 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/logs/debug-internal.log
+2025-07-20 19:14:42,658 INFO    MainThread:232058 [wandb_init.py:init():830] calling init triggers
+2025-07-20 19:14:42,658 INFO    MainThread:232058 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-20 19:14:42,658 INFO    MainThread:232058 [wandb_init.py:init():871] starting backend
+2025-07-20 19:14:42,866 INFO    MainThread:232058 [wandb_init.py:init():874] sending inform_init request
+2025-07-20 19:14:42,868 INFO    MainThread:232058 [wandb_init.py:init():882] backend started and connected
+2025-07-20 19:14:42,880 INFO    MainThread:232058 [wandb_init.py:init():953] updated telemetry
+2025-07-20 19:14:42,981 INFO    MainThread:232058 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-20 19:14:46,052 INFO    MainThread:232058 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-20 19:14:46,234 INFO    MainThread:232058 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-20 19:14:46,234 INFO    MainThread:232058 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-20 19:14:46,238 INFO    MainThread:232058 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-20 19:14:46,238 INFO    MainThread:232058 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-20 19:14:46,241 INFO    MainThread:232058 [wandb_init.py:init():1075] run started, returning control to user process
+2025-07-20 19:15:14,838 INFO    MainThread:232058 [wandb_run.py:_finish():2224] finishing run gia0603yucca/easy_r1/18c4gjei
+2025-07-20 19:15:14,840 INFO    MainThread:232058 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0
+2025-07-20 19:15:14,883 INFO    MainThread:232058 [wandb_run.py:_restore():2405] restore
+2025-07-20 19:15:14,887 INFO    MainThread:232058 [wandb_run.py:_restore():2411] restore done
+2025-07-20 19:15:47,120 INFO    MainThread:232058 [wandb_run.py:_footer_history_summary_info():3903] rendering history
+2025-07-20 19:15:47,127 INFO    MainThread:232058 [wandb_run.py:_footer_history_summary_info():3935] rendering summary
+2025-07-20 19:15:47,127 INFO    MainThread:232058 [wandb_run.py:_footer_sync_info():3864] logging synced files
diff --git a/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/run-18c4gjei.wandb b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/run-18c4gjei.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..43209156f6235dd1820d96556ecbab66ee59fc31
Binary files /dev/null and b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/run-18c4gjei.wandb differ
diff --git a/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/files/config.yaml b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..512a1fcb46c50f5d1f858e93a0d972d52773bcdc
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/files/config.yaml
@@ -0,0 +1,321 @@
+_wandb:
+    value:
+        cli_version: 0.21.0
+        e:
+            aen8lqfqe6nmonwi9mg6k364ewm1zbk1:
+                args:
+                    - --node-ip-address=10.1.4.164
+                    - --node-manager-port=39481
+                    - --object-store-name=/tmp/ray/session_2025-07-20_19-23-46_177404_237363/sockets/plasma_store
+                    - --raylet-name=/tmp/ray/session_2025-07-20_19-23-46_177404_237363/sockets/raylet
+                    - --redis-address=None
+                    - --metrics-agent-port=50645
+                    - --logging-rotate-bytes=536870912
+                    - --logging-rotate-backup-count=5
+                    - --runtime-env-agent-port=61124
+                    - --gcs-address=10.1.4.164:47108
+                    - --session-name=session_2025-07-20_19-23-46_177404_237363
+                    - --temp-dir=/tmp/ray
+                    - --webui=127.0.0.1:8265
+                    - --cluster-id=c47a61f296c7eb4c70ae9057de30f64c14ef34b9d67ed1e6c9f9e1ab
+                    - --startup-token=64
+                    - --worker-launch-time-ms=1753010628799
+                    - --node-id=f70ded91aa9dcf3abddbd985069477193b47e040a95f0010c3430c70
+                    - --runtime-env-hash=-115784934
+                    - --enable-resource-isolation=false
+                cpu_count: 64
+                cpu_count_logical: 64
+                cudaVersion: "12.1"
+                disk:
+                    /:
+                        total: "1623302262784"
+                        used: "1179885568"
+                executable: /root/miniconda3/envs/easyr1-new/bin/python3
+                git:
+                    commit: b8caf406aa1699c788f0ca6e44a1769452c317db
+                    remote: https://github.com/PorUna-byte/PAR.git
+                gpu: NVIDIA A800-SXM4-80GB
+                gpu_count: 8
+                gpu_nvidia:
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-dd05c780-0a19-17fd-c584-d09f4318f680
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-2782427d-b86d-e7c7-247f-edefe39eec71
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-52755b3d-d761-c869-0d08-300873ba5f02
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-5927b565-1506-34c6-eb8d-d657ed5f3558
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-90465488-5319-9508-9e8a-b3b12918be35
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-855bcb80-e48f-cdd2-f62d-35138b69089a
+                host: dsw-266702-557cd69888-g24kv
+                memory:
+                    total: "549755813888"
+                os: Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35
+                program: /root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py
+                python: CPython 3.10.0
+                root: /nas/shared/kilab/wangyujia/EasyR1-new/examples
+                startedAt: "2025-07-20T11:25:31.090058Z"
+                writerId: aen8lqfqe6nmonwi9mg6k364ewm1zbk1
+        m: []
+        python_version: 3.10.0
+        t:
+            "1":
+                - 1
+                - 9
+                - 11
+                - 30
+                - 33
+                - 41
+                - 49
+                - 51
+                - 63
+                - 71
+                - 95
+                - 98
+                - 103
+                - 105
+            "2":
+                - 1
+                - 9
+                - 11
+                - 30
+                - 33
+                - 41
+                - 49
+                - 51
+                - 63
+                - 71
+                - 95
+                - 98
+                - 103
+                - 105
+            "3":
+                - 2
+                - 13
+                - 16
+            "4": 3.10.0
+            "5": 0.21.0
+            "6": 4.52.4
+            "12": 0.21.0
+            "13": linux-x86_64
+algorithm:
+    value:
+        adv_estimator: grpo
+        disable_kl: false
+        filter_high: 0.99
+        filter_key: overall
+        filter_low: 0.01
+        gamma: 1
+        kl_coef: 0.01
+        kl_horizon: 10000
+        kl_penalty: low_var_kl
+        kl_target: 0.1
+        kl_type: fixed
+        lam: 1
+        online_filtering: false
+        use_kl_loss: true
+data:
+    value:
+        answer_key: answer
+        filter_overlong_prompts: true
+        filter_overlong_prompts_workers: 16
+        format_prompt: /nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja
+        image_dir: null
+        image_key: images
+        max_pixels: 4194304
+        max_prompt_length: 4096
+        max_response_length: 16384
+        min_pixels: 262144
+        mini_rollout_batch_size: null
+        override_chat_template: null
+        prompt_key: question
+        protein_key: protein
+        rollout_batch_size: 128
+        seed: 1
+        shuffle: true
+        train_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl
+        val_batch_size: 256
+        val_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl
+        video_fps: 2
+        video_key: videos
+trainer:
+    value:
+        critic_warmup: 0
+        experiment_name: qwen2.5_7b_bio_06182042
+        load_checkpoint_path: null
+        logger:
+            - console
+            - wandb
+        max_steps: null
+        max_try_make_batch: 20
+        n_gpus_per_node: 8
+        nnodes: 1
+        project_name: easy_r1
+        save_checkpoint_path: /oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042
+        save_freq: 5
+        save_limit: 3
+        save_model_only: false
+        total_epochs: 1
+        val_before_train: true
+        val_freq: 5
+        val_generations_to_log: 3
+        val_only: false
+worker:
+    value:
+        actor:
+            clip_ratio_dual: 3
+            clip_ratio_high: 0.3
+            clip_ratio_low: 0.2
+            disable_kl: false
+            fsdp:
+                enable_cpu_offload: false
+                enable_full_shard: true
+                enable_rank0_init: true
+                fsdp_size: -1
+                mp_buffer_dtype: fp32
+                mp_param_dtype: bf16
+                mp_reduce_dtype: fp32
+                torch_dtype: null
+                use_orig_params: false
+            global_batch_size: 64
+            global_batch_size_per_device: -1
+            kl_coef: 0.01
+            kl_penalty: low_var_kl
+            loss_avg_mode: token
+            max_grad_norm: 1
+            micro_batch_size_per_device_for_experience: 16
+            micro_batch_size_per_device_for_update: 2
+            model:
+                enable_gradient_checkpointing: true
+                freeze_vision_tower: false
+                model_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model
+                tokenizer_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model
+                trust_remote_code: false
+            offload:
+                offload_optimizer: true
+                offload_params: true
+            optim:
+                betas:
+                    - 0.9
+                    - 0.999
+                lr: 1e-06
+                lr_warmup_ratio: 0
+                lr_warmup_steps: null
+                min_lr_ratio: null
+                strategy: adamw
+                training_steps: 1
+                warmup_style: constant
+                weight_decay: 0.01
+            padding_free: true
+            ppo_epochs: 1
+            strategy: fsdp
+            ulysses_size: 1
+            use_kl_loss: true
+            use_torch_compile: true
+        critic:
+            cliprange_value: 0.5
+            fsdp:
+                enable_cpu_offload: false
+                enable_full_shard: true
+                enable_rank0_init: true
+                fsdp_size: -1
+                mp_buffer_dtype: fp32
+                mp_param_dtype: bf16
+                mp_reduce_dtype: fp32
+                torch_dtype: null
+                use_orig_params: false
+            global_batch_size: 256
+            global_batch_size_per_device: -1
+            loss_avg_mode: token
+            max_grad_norm: 1
+            micro_batch_size_per_device_for_experience: 16
+            micro_batch_size_per_device_for_update: 4
+            model:
+                enable_gradient_checkpointing: true
+                freeze_vision_tower: false
+                model_path: null
+                tokenizer_path: null
+                trust_remote_code: true
+            offload:
+                offload_optimizer: false
+                offload_params: false
+            optim:
+                betas:
+                    - 0.9
+                    - 0.999
+                lr: 1e-06
+                lr_warmup_ratio: 0
+                lr_warmup_steps: null
+                min_lr_ratio: null
+                strategy: adamw
+                training_steps: 1
+                warmup_style: constant
+                weight_decay: 0.01
+            padding_free: false
+            ppo_epochs: 1
+            strategy: fsdp
+            ulysses_size: 1
+        hybrid_engine: true
+        ref:
+            fsdp:
+                enable_cpu_offload: true
+                enable_full_shard: true
+                enable_rank0_init: true
+                fsdp_size: -1
+                mp_buffer_dtype: fp32
+                mp_param_dtype: bf16
+                mp_reduce_dtype: fp32
+                torch_dtype: null
+                use_orig_params: false
+            micro_batch_size_per_device_for_experience: 16
+            offload:
+                offload_optimizer: false
+                offload_params: false
+            padding_free: true
+            strategy: fsdp
+            ulysses_size: 1
+            use_torch_compile: true
+        reward:
+            num_cpus: 1
+            reward_function: /nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py
+            reward_function_name: main
+            reward_type: batch
+            skip_special_tokens: true
+        rollout:
+            disable_log_stats: true
+            disable_tqdm: false
+            dtype: bf16
+            enable_chunked_prefill: false
+            enforce_eager: false
+            gpu_memory_utilization: 0.6
+            ignore_eos: false
+            limit_images: 0
+            max_model_len: null
+            max_num_batched_tokens: 24576
+            "n": 5
+            name: vllm
+            prompt_length: 4096
+            response_length: 16384
+            seed: 1
+            temperature: 1
+            tensor_parallel_size: 1
+            top_k: -1
+            top_p: 0.99
+            trust_remote_code: false
+            val_override_config:
+                "n": 1
+                temperature: 0.5
diff --git a/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/files/output.log b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..d0a19aa21317f2d31bcd06b975e6bd93f7b139b3
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/files/output.log
@@ -0,0 +1,51 @@
+{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 1, "unit": "it", "ip": "10.1.4.164", "pid": 242135, "uuid": "dccebe4a06fd42b7b04143ecb47f7f86", "closed": false}
+Start validation...
+{'prot_embeds': tensor([[[ 0.1386, -0.2522,  0.0733,  ...,  0.5373,  0.6044, -0.6031],
+         [ 0.1386, -0.2522,  0.0733,  ...,  0.5373,  0.6044, -0.6031],
+         [ 0.1386, -0.2522,  0.0733,  ...,  0.5373,  0.6044, -0.6031],
+         ...,
+         [ 0.1386, -0.2522,  0.0733,  ...,  0.5373,  0.6044, -0.6031],
+         [ 0.1386, -0.2522,  0.0733,  ...,  0.5373,  0.6044, -0.6031],
+         [ 0.1386, -0.2522,  0.0733,  ...,  0.5373,  0.6044, -0.6031]],
+
+        [[-0.0670,  0.1320,  0.0262,  ...,  0.0592, -0.0983,  0.1597],
+         [-0.0670,  0.1320,  0.0262,  ...,  0.0592, -0.0983,  0.1597],
+         [-0.0670,  0.1320,  0.0262,  ...,  0.0592, -0.0983,  0.1597],
+         ...,
+         [-0.0670,  0.1320,  0.0262,  ...,  0.0592, -0.0983,  0.1597],
+         [-0.0670,  0.1320,  0.0262,  ...,  0.0592, -0.0983,  0.1597],
+         [-0.0670,  0.1320,  0.0262,  ...,  0.0592, -0.0983,  0.1597]],
+
+        [[ 0.2223,  0.0633,  0.3426,  ...,  0.4689,  0.0807,  0.2240],
+         [ 0.2223,  0.0633,  0.3426,  ...,  0.4689,  0.0807,  0.2240],
+         [ 0.2223,  0.0633,  0.3426,  ...,  0.4689,  0.0807,  0.2240],
+         ...,
+         [ 0.2223,  0.0633,  0.3426,  ...,  0.4689,  0.0807,  0.2240],
+         [ 0.2223,  0.0633,  0.3426,  ...,  0.4689,  0.0807,  0.2240],
+         [ 0.2223,  0.0633,  0.3426,  ...,  0.4689,  0.0807,  0.2240]],
+
+        [[-0.1802, -0.4834,  0.1041,  ..., -0.1436, -0.3784, -0.5227],
+         [-0.1802, -0.4834,  0.1041,  ..., -0.1436, -0.3784, -0.5227],
+         [-0.1802, -0.4834,  0.1041,  ..., -0.1436, -0.3784, -0.5227],
+         ...,
+         [-0.1802, -0.4834,  0.1041,  ..., -0.1436, -0.3784, -0.5227],
+         [-0.1802, -0.4834,  0.1041,  ..., -0.1436, -0.3784, -0.5227],
+         [-0.1802, -0.4834,  0.1041,  ..., -0.1436, -0.3784, -0.5227]],
+
+        [[ 0.0572,  0.1415,  0.0391,  ...,  0.1771,  0.2956,  0.6073],
+         [ 0.0572,  0.1415,  0.0391,  ...,  0.1771,  0.2956,  0.6073],
+         [ 0.0572,  0.1415,  0.0391,  ...,  0.1771,  0.2956,  0.6073],
+         ...,
+         [ 0.0572,  0.1415,  0.0391,  ...,  0.1771,  0.2956,  0.6073],
+         [ 0.0572,  0.1415,  0.0391,  ...,  0.1771,  0.2956,  0.6073],
+         [ 0.0572,  0.1415,  0.0391,  ...,  0.1771,  0.2956,  0.6073]]]), 'prompt_input_ids': [tensor([[ 11190,    311,    279,  ..., 151665, 151665, 151665]]), tensor([[ 11190,    311,    279,  ..., 151665, 151665, 151665]]), tensor([[ 11190,    311,    279,  ..., 151665, 151665, 151665]]), tensor([[ 11190,    311,    279,  ..., 151665, 151665, 151665]]), tensor([[ 11190,    311,    279,  ..., 151665, 151665, 151665]])], 'input_ids': [tensor([[ 11190,    311,    279,  ..., 151665, 151665, 151665]]), tensor([[ 11190,    311,    279,  ..., 151665, 151665, 151665]]), tensor([[ 11190,    311,    279,  ..., 151665, 151665, 151665]]), tensor([[ 11190,    311,    279,  ..., 151665, 151665, 151665]]), tensor([[ 11190,    311,    279,  ..., 151665, 151665, 151665]])], 'raw_prompt_ids': array([list([11190, 311, 279, 12833, 1995, 3897, 3685, 323, 279, 12833, 829, 1207, 24, 56, 19, 34, 17, 11, 7023, 279, 1429, 4363, 1186, 5873, 1276, 52304, 504, 279, 2701, 2606, 510, 3798, 25, 220, 15, 13, 330, 45, 22147, 355, 11, 547, 1, 715, 220, 16, 13, 330, 56715, 98605, 10530, 11, 328, 1, 2303, 220, 17, 13, 330, 840, 376, 64341, 11, 328, 1, 2303, 220, 18, 13, 330, 54370, 46417, 81, 290, 11, 547, 1, 2303, 220, 19, 13, 330, 3599, 38554, 11, 386, 1, 2303, 220, 20, 13, 330, 3727, 55078, 10530, 292, 2112, 292, 16496, 11, 386, 1, 2303, 220, 21, 13, 330, 2120, 559, 307, 11, 328, 1, 2303, 220, 22, 13, 330, 38, 337, 8212, 40605, 11, 386, 1, 2303, 220, 220, 23, 13, 330, 43, 1047, 31454, 27233, 580, 84, 1263, 11, 386, 1, 2303, 24, 13, 330, 47, 2328, 7191, 635, 11, 547, 698]),
+       list([11190, 311, 279, 12833, 1995, 3897, 3685, 323, 279, 12833, 829, 1207, 24, 38, 3390, 23, 11, 7023, 279, 1429, 4363, 1186, 5873, 1276, 52304, 504, 279, 2701, 2606, 510, 3798, 25, 220, 15, 13, 330, 45, 22147, 355, 11, 547, 1, 715, 220, 16, 13, 330, 56715, 98605, 10530, 11, 328, 1, 2303, 220, 17, 13, 330, 840, 376, 64341, 11, 328, 1, 2303, 220, 18, 13, 330, 54370, 46417, 81, 290, 11, 547, 1, 2303, 220, 19, 13, 330, 3599, 38554, 11, 386, 1, 2303, 220, 20, 13, 330, 3727, 55078, 10530, 292, 2112, 292, 16496, 11, 386, 1, 2303, 220, 21, 13, 330, 2120, 559, 307, 11, 328, 1, 2303, 220, 22, 13, 330, 38, 337, 8212, 40605, 11, 386, 1, 2303, 220, 220, 23, 13, 330, 43, 1047, 31454, 27233, 580, 84, 1263, 11, 386, 1, 2303, 24, 13, 330, 47, 2328, 7191, 635, 11, 547, 698]),
+       list([11190, 311, 279, 12833, 1995, 3897, 3685, 323, 279, 12833, 829, 1207, 24, 17, 18, 55, 16, 11, 7023, 279, 1429, 4363, 1186, 5873, 1276, 52304, 504, 279, 2701, 2606, 510, 3798, 25, 220, 15, 13, 330, 45, 22147, 355, 11, 547, 1, 715, 220, 16, 13, 330, 56715, 98605, 10530, 11, 328, 1, 2303, 220, 17, 13, 330, 840, 376, 64341, 11, 328, 1, 2303, 220, 18, 13, 330, 54370, 46417, 81, 290, 11, 547, 1, 2303, 220, 19, 13, 330, 3599, 38554, 11, 386, 1, 2303, 220, 20, 13, 330, 3727, 55078, 10530, 292, 2112, 292, 16496, 11, 386, 1, 2303, 220, 21, 13, 330, 2120, 559, 307, 11, 328, 1, 2303, 220, 22, 13, 330, 38, 337, 8212, 40605, 11, 386, 1, 2303, 220, 220, 23, 13, 330, 43, 1047, 31454, 27233, 580, 84, 1263, 11, 386, 1, 2303, 24, 13, 330, 47, 2328, 7191, 635, 11, 547, 698]),
+       list([11190, 311, 279, 12833, 1995, 3897, 3685, 323, 279, 12833, 829, 422, 17, 42, 21, 37, 16, 11, 7023, 279, 1429, 4363, 1186, 5873, 1276, 52304, 504, 279, 2701, 2606, 510, 3798, 25, 220, 15, 13, 330, 45, 22147, 355, 11, 547, 1, 715, 220, 16, 13, 330, 56715, 98605, 10530, 11, 328, 1, 2303, 220, 17, 13, 330, 840, 376, 64341, 11, 328, 1, 2303, 220, 18, 13, 330, 54370, 46417, 81, 290, 11, 547, 1, 2303, 220, 19, 13, 330, 3599, 38554, 11, 386, 1, 2303, 220, 20, 13, 330, 3727, 55078, 10530, 292, 2112, 292, 16496, 11, 386, 1, 2303, 220, 21, 13, 330, 2120, 559, 307, 11, 328, 1, 2303, 220, 22, 13, 330, 38, 337, 8212, 40605, 11, 386, 1, 2303, 220, 220, 23, 13, 330, 43, 1047, 31454, 27233, 580, 84, 1263, 11, 386, 1, 2303, 24, 13, 330, 47, 2328, 7191, 635, 11, 547, 698]),
+       list([11190, 311, 279, 12833, 1995, 3897, 3685, 323, 279, 12833, 829, 1207, 24, 53, 12457, 15, 11, 7023, 279, 1429, 4363, 1186, 5873, 1276, 52304, 504, 279, 2701, 2606, 510, 3798, 25, 220, 15, 13, 330, 45, 22147, 355, 11, 547, 1, 715, 220, 16, 13, 330, 56715, 98605, 10530, 11, 328, 1, 2303, 220, 17, 13, 330, 840, 376, 64341, 11, 328, 1, 2303, 220, 18, 13, 330, 54370, 46417, 81, 290, 11, 547, 1, 2303, 220, 19, 13, 330, 3599, 38554, 11, 386, 1, 2303, 220, 20, 13, 330, 3727, 55078, 10530, 292, 2112, 292, 16496, 11, 386, 1, 2303, 220, 21, 13, 330, 2120, 559, 307, 11, 328, 1, 2303, 220, 22, 13, 330, 38, 337, 8212, 40605, 11, 386, 1, 2303, 220, 220, 23, 13, 330, 43, 1047, 31454, 27233, 580, 84, 1263, 11, 386, 1, 2303, 24, 13, 330, 47, 2328, 7191, 635, 11, 547, 698])],
+      dtype=object), 'ground_truth': array(['4', '4', '4', '4', '4'], dtype=object), 'protein_sequence': array(['MATPSAAFEALMNGVTSWDVPEDAVPCELLLIGEASFPVMVNDMGQVLIAASSYGRGRLVVVSHEDYLVEAQLTPFLLNAVGWLCSSPGAPIGVHPSLAPLAKILEGSGVDAKVEPEVKDSLGVYCIDAYNETMTEKLVKFMKCGGGLLIGGQAWDWANQGEDERVLFTFPGNLVTSVAGIYFTDNKGDTSFFKVSKKMPKIPVLVSCEDDLSDDREELLHGISELDISNSDCFPSQLLVHGALAFPLGLDSYHGCVIAAARYGRGRVVVTGHKVLFTVGKLGPFLLNAVRWLDGGRRGKVVVQTELRTLSGLLAVGGIDTSIEPNLTSDASVYCFEPVSEVGVKELQEFVAEGGGLFVGAQAWWWAFKNPGVSPLARFPGNLLLNPFGISITSQSLNPGPFRTPKAGIRTYHFRSTLAEFQVIMGRKRGNVEKGWLAKLGPDGAAFLQIPAEEIPAYMSVHRLLRKLLSRYRLPVATRENPVINDCCRGAMLSLATGLAHSGSDLSLLVPEIEDMYSSPYLRPSESPITVEVNCTNPGTRYCWMSTGLYIPGRQIIEVSLPEAAASADLKIQIGCHTDDLTRASKLFRGPLVINRCCLDKPTKSITCLWGGLLYIIVPQNSKLGSVPVTVKGAVHAPYYKLGETTLEEWKRRIQENPGPWGELATDNIILTVPTANLRTLENPEPLLRLWDEVMQAVARLGAEPFPLRLPQRIVADVQISVGWMHAGYPIMCHLESVQELINEKLIRTKGLWGPVHELGRNQQRQEWEFPPHTTEATCNLWCVYVHETVLGIPRSRANIALWPPVREKRVRIYLSKGPNVKNWNAWTALETYLQLQEAFGWEPFIRLFTEYRNQTNLPTENVDKMNLWVKMFSHQVQKNLAPFFEAWAWPIQKEVATSLAYLPEWKENIMKLYLLTQMPH',
+       'MWPLVVVVLLGSAYCGSAQLIFNITKSVEFTVCNTTVTIPCFVNNMEAKNISELYVKWKFKGKDIFIFDGAQHISKPSEAFPSSKISPSELLHGIASLKMDKRDAVIGNYTCEVTELSREGETIIELKRRFVSWFSPNENILIVIFPILAILLFWGQFGILTLKYKSSYTKEKTIFLLVAGLMLTIIVIVGAILFIPGEYSTKNACGLGLIVIPTAILILLQYCVFMMALGMSSFTIAILILQVLGHVLSVVGLSLCVSECTPVHGPLLISGLGIIALAELLGLVYMKCVASDHKTIQPPRNN',
+       'MRLLPLLVGFSTLLNCSYTQNCSKTTCLPNAKCEVHNGVEACFCSQGYSGNGVTICEDIDECSESSVCGDHAVCENVNGGFSCFCREGYQTATGKSQFTPNDGSYCQDIDECSESSVCGDHAVCENVNGGFSCFCREGYQTATGKSQFTPNDGSYCQESMNSNCHLEHACIAANINKTLKRIGPITEQTTLLQEIYRNSEAELSLMDIVTYIEILTESSSLLGHPNSTTSYKDAHFNSTLTEFGETINNFVERSTHKMWDQLPTNHRRLHLTKLMHTAELVTLQIAQNTQKNSQFDMNSTDLALKVFAFDSTHMKHAHPHMNVDGGYVKISPRRKAAHGTTGNVVVAFLCYKSIGPLLSSSDNFLLDTQNDNSEGKEKVISSVISASISSNPPTLYELEKITFTLSHVKLSDKHRTQCAFWNYSVDAMNNGSWSTEGCELTHSNDTHTSCRCSHLTHFAILMSSTSSIGIKDYNILTRITQLGIIISLICLAICIFTFWFFSEIQSTRTTIHKNLCCSLFLAELVFLIGININTNKLVCSIIAGLLHYFFLAAFAWMCIEGIHLYLIVVGVIYNKGFLHKNFYIFGYLSPAVVVGFSASLGYRYYGTTKVCWLSTENNFIWSFIGPACLIILVNLLAFGVIIYKVFRHTAGLKPEVSCYENIRSCARGALALLFLLGTTWIFGVLHVVHASVVTAYLFTVSNAFQGMFIFLFLCVLSRKIQEEYYRLFKNVPCCFGCLR',
+       'MGFGWQGSVSIAFTALAFVVMAADWVGPDVTFTVLLAFLTAFDGQIVTVAKAAAGYGNTGLLTVIFLYWVAEGITQTGGLELIMNFVLGRSRSVHWALARSMFPVMCLSAFLNNTPCVTFMIPILISWGRRCGVPIKKLLIPLSYASVLGGTCTSIGTSTNLVIVGLQDARYTKAKQLDQAKFQIFDIAPYGVPYALWGFVFILLTQAFLLPGNSSRYAKDLLIAVRVLPSSSVAKKKLKDSGLLQQSGFSVSGIYRDGKYLSKPDPNWVLEPNDILYAAGEFDVVEFVGEEFGLGLVNADAETSAERPFTTGEESVFTPTGGAPYQKLVQATIAPTSDLIGRTVREVSWQGRFGLIPVAIQRGNGREDGRLNDVVLAAGDVLILDTTPFYDEEREDSKNNFAGKVRAVKDGAAKEFVVGVKVKKSSEVVNKTVSAAGLRGIPGLFVLSVDRADGSSVEASDYLYKIQPDDTIWIATDIGAVGFLAKFPGLELVQQEQVDKTGTSILYRHLVQAAVSHKGPIVGKTVRDVRFRTLYNAAVVAVHREGARVPLKVQDIVLQGGDVLLISCHTNWADEHRHDKSFVLLQPVPDSSPPKRSRMVIGVLLATGMVLTQIVGGLKSREYIHLWPAAVLTSALMLLTGCMNADQARKAIYWDVYLTIAAAFGVSAALEGTGVAASFANGIISIGKNLHSDGAALIAIYIATAMLSELLTNNAAGAIMYPIAAIAGDALKISPKETSVAIMLGASAGFINPFSYQCNLMVYAAGNYSVREFAIIGAPFQIWLMIVAGFILCYMKEWHQVWIVSWICTAGIVLLPALYFLLPTKVQLRIDAFFDRVAQTLNPKLIIERRNSIRRQASRTGSDGTGSSDSPRALGVPKVITA',
+       'MVAQEQLVLLLMLLAGCRGGANAILDPGWVIPSKVEQLIGGDFNLSCTLNEDYFNGKSAEDCPVEKLYFTGGGRVYRDSKHIRILNNTTILFSDTNAVEQENDYHCMCDEYVINKSKVYVGTRPLLVRDFNCLDYDFQFMVCNFTQPPNTVITKYNISYNTNNDWRYSNTLDCNFDSAPVVTCNLTDDNYKRFSETFYFRLSISNALGHETQPITINHFERLVPARPGQNLTLLNRTESSVCLSWEMPRRSNYNRGLVWQVRVTPQNFEPITRPSWRNHTLTIKDTLCLTELPFAGYNYTLRVRVRANQNNTLWSEPMIYAFATAPAPPRRPPRVTYGSFYVYSSEKAMRFYWEPLEEHELNGPDFRYSISEYRINGTAVDPGLIKVESNSAMIDHWSMSAVHHFLIRSSNSQGLSVNATPMTIGPISNRDFKVREPRNIRSVYHPTNKSYTLSWDPPSDQRELQNYTVFWCVPKPGLQSECEGSIRFAEVASGLHHFTTSPDQLLTLHMAVSANYQSHNTGLHWAICSSDKKDDLAKMEPSIDVATSTSLTVSWSERVCAVILAGYNLTYCQRSAGRPDNCTTVTIDRYTNKHVIQNLVPYTDYSVKMLMYSDSRVSKYSDELVNRTGEAAPSQPRELQLIRVTSDSVELAWKPPLLANGVVRAYEGTFRSLHDNVTDTFRVSASADELVNNEKPITYRLGNLTAFTKYEISVRARTVYPSEPSNVILFSTAIGVPSPPQLYVINNPDQSSRLDWEPPRTPAGRIDFYEISLRDNNASCLTSTILPGRNLSYVMATPRCTSHNPFQLAVRAINVEQHPQLNGADAAEGAVLLMSTNGKGCEARTDALGEEERLQFEAYAANMTAYRLYRSDWGIYGFICTPDTHSVKAMYQTIEVTVAILVLGVIFYLVYKKYRKMSDIGLVLPQGIMETMKKPIDMGGLGLGLGPDSSVSGGIVCTRVDDSPPYTPQDLPHDFSSCGSESSKLLLRTASSSGGGGCVDRDGYDDNHETGPISAVGPPTSYLAMRHGLLVQNDRERERERDREQEREREQQQQQRESEMDREQSCTNGYIKPTQMKSWGGNGPSDNDHTFSVPSTAMTAPMSQPLSQIPLSGYVPVPIPQSRFNPAPVQPFGSPAVPSAATAAAASTFFPPAHLLNMDNYVQASDLHKLKPLVAAPLSQTGGPAFAGSSPATSPPLQLPPVHAASPAAATPKMADIGYTTMEQLQLTGLIKPPLAATVGSPTHAAGGAPGGGNQHSRLQPQINGYVTPQDLNAMAHNRHVL'],
+      dtype=object)}
diff --git a/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/files/requirements.txt
@@ -0,0 +1,295 @@
+colorama==0.4.6
+psutil==7.0.0
+setproctitle==1.2.2
+ipython==8.37.0
+gitdb==4.0.12
+smmap==5.0.2
+pyzmq==27.0.0
+wcwidth==0.2.13
+antlr4-python3-runtime==4.9.3
+streamlit==1.46.1
+opentelemetry-proto==1.26.0
+tiktoken==0.9.0
+MarkupSafe==3.0.2
+openai==1.90.0
+jiter==0.10.0
+markdown-it-py==3.0.0
+rich-toolkit==0.14.8
+PyYAML==6.0.2
+pycountry==24.6.1
+nvidia-cusolver-cu12==11.6.1.9
+codetiming==1.4.0
+text-unidecode==1.3
+aiohttp-cors==0.8.1
+prometheus_client==0.22.1
+pandas==2.3.1
+wrapt==1.17.2
+tifffile==2025.5.10
+vllm==0.8.5.post1
+google-auth==2.40.3
+nvidia-curand-cu12==10.3.5.147
+networkx==3.4.2
+protobuf==4.25.8
+depyf==0.18.0
+altair==5.5.0
+wandb==0.21.0
+opentelemetry-sdk==1.26.0
+nvidia-cufft-cu12==11.2.1.3
+frozenlist==1.7.0
+ninja==1.11.1.4
+anyio==4.9.0
+rignore==0.6.2
+pydantic-extra-types==2.10.5
+tzdata==2025.2
+orjson==3.10.18
+smart_open==7.3.0.post1
+nvidia-cublas-cu12==12.4.5.8
+astor==0.8.1
+uvicorn==0.35.0
+sentry-sdk==2.32.0
+weasel==0.4.1
+opencensus==0.11.4
+certifi==2025.7.14
+nvidia-cuda-nvrtc-cu12==12.4.127
+cupy-cuda12x==13.5.1
+jedi==0.19.2
+GitPython==3.1.44
+xgrammar==0.1.18
+sniffio==1.3.1
+dill==0.3.8
+python-json-logger==3.3.0
+peft==0.16.0
+python-slugify==8.0.4
+watchfiles==1.1.0
+torchaudio==2.6.0
+omegaconf==2.3.0
+interegular==0.3.3
+torchmetrics==1.7.4
+tenacity==9.1.2
+async-timeout==5.0.1
+pybase64==1.4.1
+tqdm==4.67.1
+confection==0.1.5
+absl-py==2.3.1
+six==1.17.0
+colorful==0.5.7
+xxhash==3.5.0
+regex==2024.11.6
+nvidia-cuda-runtime-cu12==12.4.127
+pip==25.1
+annotated-types==0.7.0
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+salesforce-lavis==1.0.2
+fastapi-cloud-cli==0.1.4
+av==15.0.0
+nvidia-nvjitlink-cu12==12.4.127
+lightning-utilities==0.14.3
+pytz==2025.2
+webdataset==1.0.2
+cachetools==5.5.2
+nltk==3.9.1
+prometheus-fastapi-instrumentator==7.1.0
+pexpect==4.9.0
+jsonschema-specifications==2025.4.1
+Jinja2==3.1.6
+nvidia-cusparse-cu12==12.3.1.170
+nvidia-cudnn-cu12==9.1.0.70
+decord==0.6.0
+transformers==4.52.4
+dnspython==2.7.0
+joblib==1.5.1
+kaggle==1.7.4.5
+pyasn1_modules==0.4.2
+httpcore==1.0.9
+accelerate==1.8.1
+psutil==7.0.0
+pycocotools==2.0.10
+lm-format-enforcer==0.10.11
+liger_kernel==0.6.0
+googleapis-common-protos==1.70.0
+idna==3.10
+aiohappyeyeballs==2.6.1
+numba==0.61.2
+tornado==6.5.1
+opentelemetry-semantic-conventions==0.47b0
+torchvision==0.21.0
+exceptiongroup==1.3.0
+cfgv==3.4.0
+py-cpuinfo==9.0.0
+murmurhash==1.0.13
+pillow==11.3.0
+asttokens==3.0.0
+spacy==3.8.7
+blinker==1.9.0
+llguidance==0.7.30
+fastapi==0.116.1
+python-dateutil==2.9.0.post0
+prompt_toolkit==3.0.51
+opentelemetry-api==1.26.0
+referencing==0.36.2
+Pygments==2.19.2
+mpmath==1.3.0
+thinc==8.3.6
+multidict==6.6.3
+python-magic==0.4.27
+fairscale==0.4.4
+nodeenv==1.9.1
+mathruler==0.1.0
+identify==2.6.12
+multiprocess==0.70.16
+ftfy==6.3.1
+spacy-legacy==3.0.12
+rsa==4.9.1
+cymem==2.0.11
+flash-attn==2.7.1.post1
+typing-inspection==0.4.1
+nvidia-cufile-cu12==1.11.1.6
+filelock==3.18.0
+jsonschema==4.24.0
+language_data==1.3.0
+iopath==0.1.10
+cloudpickle==3.1.1
+pre_commit==4.2.0
+python-multipart==0.0.20
+gguf==0.17.1
+toml==0.10.2
+lazy_loader==0.4
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+opencv-python-headless==4.12.0.88
+rouge_score==0.1.2
+portalocker==3.2.0
+diskcache==5.6.3
+pycocoevalcap==1.2
+mdurl==0.1.2
+pure_eval==0.2.3
+ray==2.47.1
+langcodes==3.5.0
+distlib==0.3.9
+pydeck==0.9.1
+traitlets==5.14.3
+aiohttp==3.12.14
+decorator==5.2.1
+opentelemetry-exporter-otlp-proto-http==1.26.0
+verl==0.3.2.dev0
+fsspec==2025.3.0
+pydantic_core==2.33.2
+matplotlib-inline==0.1.7
+httpx==0.28.1
+fastrlock==0.8.3
+zipp==3.23.0
+aiosignal==1.4.0
+uvloop==0.21.0
+opendatasets==0.1.22
+python-dotenv==1.1.1
+attrs==25.3.0
+starlette==0.47.1
+distro==1.9.0
+pyasn1==0.6.1
+plotly==6.2.0
+opencensus-context==0.1.3
+datasets==4.0.0
+bleach==6.2.0
+hf-xet==1.1.5
+pyvers==0.1.0
+rich==14.0.0
+pylatexenc==2.10
+tensordict==0.9.1
+urllib3==2.5.0
+imageio==2.37.0
+platformdirs==4.3.8
+preshed==3.0.10
+catalogue==2.0.10
+h11==0.16.0
+outlines_core==0.1.26
+wasabi==1.1.3
+proto-plus==1.26.1
+scikit-image==0.25.2
+blis==1.3.0
+fastapi-cli==0.0.8
+opentelemetry-exporter-prometheus==0.56b0
+opentelemetry-exporter-otlp==1.26.0
+compressed-tensors==0.9.3
+pyarrow==20.0.0
+opentelemetry-semantic-conventions-ai==0.4.11
+partial-json-parser==0.2.1.1.post6
+parso==0.8.4
+importlib_metadata==8.0.0
+tokenizers==0.21.2
+opentelemetry-exporter-otlp-proto-common==1.26.0
+torchdata==0.11.0
+py-spy==0.4.0
+propcache==0.3.2
+braceexpand==0.1.7
+numpy==2.2.6
+cloudpathlib==0.21.1
+email_validator==2.2.0
+srsly==2.5.1
+webencodings==0.5.1
+airportsdata==20250706
+rpds-py==0.26.0
+outlines==0.1.11
+packaging==25.0
+yarl==1.20.1
+nvidia-cuda-cupti-cu12==12.4.127
+typing_extensions==4.14.1
+pydantic==2.11.7
+xformers==0.0.29.post2
+einops==0.8.1
+grpcio==1.73.1
+setuptools==78.1.1
+httptools==0.6.4
+nvidia-nccl-cu12==2.21.5
+Deprecated==1.2.18
+ptyprocess==0.7.0
+websockets==15.0.1
+torch==2.6.0
+scipy==1.15.3
+typer==0.16.0
+pytorch-lightning==2.5.2
+virtualenv==20.31.2
+huggingface-hub==0.33.4
+contexttimer==0.3.3
+marisa-trie==1.2.1
+shellingham==1.5.4
+charset-normalizer==3.4.2
+nest-asyncio==1.6.0
+executing==2.2.0
+stack-data==0.6.3
+msgpack==1.1.1
+blake3==1.0.5
+narwhals==1.47.0
+sentencepiece==0.2.0
+llvmlite==0.44.0
+click==8.2.1
+lark==1.2.2
+google-api-core==2.25.1
+sympy==1.13.1
+wheel==0.45.1
+safetensors==0.5.3
+mistral_common==1.8.0
+triton==3.2.0
+msgspec==0.19.0
+watchdog==6.0.0
+requests==2.32.4
+spacy-loggers==1.0.5
+timm==0.4.12
+qwen-vl-utils==0.0.11
+modelscope==1.28.0
+verl==0.3.2.dev0
+jaraco.text==3.12.1
+autocommand==2.2.2
+packaging==24.2
+jaraco.context==5.3.0
+tomli==2.0.1
+typeguard==4.3.0
+zipp==3.19.2
+backports.tarfile==1.2.0
+typing_extensions==4.12.2
+jaraco.collections==5.1.0
+inflect==7.3.1
+more-itertools==10.3.0
+jaraco.functools==4.0.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+wheel==0.45.1
diff --git a/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..9c5883d6f0a3aedd20b77cf4ce6c1d7e4d7a9648
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/files/wandb-metadata.json
@@ -0,0 +1,91 @@
+{
+  "os":  "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.0",
+  "startedAt":  "2025-07-20T11:25:31.090058Z",
+  "args":  [
+    "--node-ip-address=10.1.4.164",
+    "--node-manager-port=39481",
+    "--object-store-name=/tmp/ray/session_2025-07-20_19-23-46_177404_237363/sockets/plasma_store",
+    "--raylet-name=/tmp/ray/session_2025-07-20_19-23-46_177404_237363/sockets/raylet",
+    "--redis-address=None",
+    "--metrics-agent-port=50645",
+    "--logging-rotate-bytes=536870912",
+    "--logging-rotate-backup-count=5",
+    "--runtime-env-agent-port=61124",
+    "--gcs-address=10.1.4.164:47108",
+    "--session-name=session_2025-07-20_19-23-46_177404_237363",
+    "--temp-dir=/tmp/ray",
+    "--webui=127.0.0.1:8265",
+    "--cluster-id=c47a61f296c7eb4c70ae9057de30f64c14ef34b9d67ed1e6c9f9e1ab",
+    "--startup-token=64",
+    "--worker-launch-time-ms=1753010628799",
+    "--node-id=f70ded91aa9dcf3abddbd985069477193b47e040a95f0010c3430c70",
+    "--runtime-env-hash=-115784934",
+    "--enable-resource-isolation=false"
+  ],
+  "program":  "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
+  "git":  {
+    "remote":  "https://github.com/PorUna-byte/PAR.git",
+    "commit":  "b8caf406aa1699c788f0ca6e44a1769452c317db"
+  },
+  "root":  "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
+  "host":  "dsw-266702-557cd69888-g24kv",
+  "executable":  "/root/miniconda3/envs/easyr1-new/bin/python3",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA A800-SXM4-80GB",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1623302262784",
+      "used":  "1179885568"
+    }
+  },
+  "memory":  {
+    "total":  "549755813888"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-dd05c780-0a19-17fd-c584-d09f4318f680"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-2782427d-b86d-e7c7-247f-edefe39eec71"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-52755b3d-d761-c869-0d08-300873ba5f02"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-90465488-5319-9508-9e8a-b3b12918be35"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a"
+    }
+  ],
+  "cudaVersion":  "12.1",
+  "writerId":  "aen8lqfqe6nmonwi9mg6k364ewm1zbk1"
+}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/files/wandb-summary.json b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..334abaad9a4c576b8b414ca3a7804f2fbc807661
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":28},"_runtime":28}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..026e8cf157c7da6bacb3f33b46bb622a12ace9a4
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/logs/debug-internal.log
@@ -0,0 +1,15 @@
+{"time":"2025-07-20T19:25:31.457576073+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-20T19:25:35.573678908+08:00","level":"INFO","msg":"stream: created new stream","id":"zygsruir"}
+{"time":"2025-07-20T19:25:35.574999882+08:00","level":"INFO","msg":"stream: started","id":"zygsruir"}
+{"time":"2025-07-20T19:25:35.575019345+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"zygsruir"}
+{"time":"2025-07-20T19:25:35.575032079+08:00","level":"INFO","msg":"handler: started","stream_id":"zygsruir"}
+{"time":"2025-07-20T19:25:35.575046919+08:00","level":"INFO","msg":"sender: started","stream_id":"zygsruir"}
+{"time":"2025-07-20T19:26:06.941634052+08:00","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"uploading wandb-metadata.json","runtime_seconds":27.851439057},{"desc":"uploading requirements.txt","runtime_seconds":27.601123972},{"desc":"updating run metadata","runtime_seconds":1.002174891}],"total_operations":3}}
+{"time":"2025-07-20T19:27:06.990529067+08:00","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"uploading wandb-metadata.json","runtime_seconds":87.900327481},{"desc":"uploading requirements.txt","runtime_seconds":87.650013021},{"desc":"uploading output.log","runtime_seconds":59.29233501},{"desc":"uploading wandb-summary.json","runtime_seconds":59.292321132},{"desc":"uploading config.yaml","runtime_seconds":57.41576814}],"total_operations":5}}
+{"time":"2025-07-20T19:28:07.03660474+08:00","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"uploading wandb-metadata.json","runtime_seconds":147.946406593},{"desc":"uploading requirements.txt","runtime_seconds":147.696092307},{"desc":"uploading output.log","runtime_seconds":119.338414484},{"desc":"uploading wandb-summary.json","runtime_seconds":119.338400774},{"desc":"uploading config.yaml","runtime_seconds":117.461848187}],"total_operations":5}}
+{"time":"2025-07-20T19:28:18.110317576+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-07-20T19:28:26.613849672+08:00","level":"INFO","msg":"stream: closing","id":"zygsruir"}
+{"time":"2025-07-20T19:28:26.613879904+08:00","level":"INFO","msg":"handler: closed","stream_id":"zygsruir"}
+{"time":"2025-07-20T19:28:26.613889947+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"zygsruir"}
+{"time":"2025-07-20T19:28:26.61390552+08:00","level":"INFO","msg":"sender: closed","stream_id":"zygsruir"}
+{"time":"2025-07-20T19:28:26.619789342+08:00","level":"INFO","msg":"stream: closed","id":"zygsruir"}
diff --git a/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..6ebd24ff68141c0f7b1e8626f8b3f7a92f8fad87
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/logs/debug.log
@@ -0,0 +1,28 @@
+2025-07-20 19:25:31,219 INFO    MainThread:242135 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-20 19:25:31,219 INFO    MainThread:242135 [wandb_setup.py:_flush():80] Configure stats pid to 242135
+2025-07-20 19:25:31,219 INFO    MainThread:242135 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-20 19:25:31,219 INFO    MainThread:242135 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-20 19:25:31,219 INFO    MainThread:242135 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-20 19:25:31,219 INFO    MainThread:242135 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/logs/debug.log
+2025-07-20 19:25:31,219 INFO    MainThread:242135 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/logs/debug-internal.log
+2025-07-20 19:25:31,219 INFO    MainThread:242135 [wandb_init.py:init():830] calling init triggers
+2025-07-20 19:25:31,219 INFO    MainThread:242135 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-20 19:25:31,232 INFO    MainThread:242135 [wandb_init.py:init():871] starting backend
+2025-07-20 19:25:31,444 INFO    MainThread:242135 [wandb_init.py:init():874] sending inform_init request
+2025-07-20 19:25:31,446 INFO    MainThread:242135 [wandb_init.py:init():882] backend started and connected
+2025-07-20 19:25:31,451 INFO    MainThread:242135 [wandb_init.py:init():953] updated telemetry
+2025-07-20 19:25:31,492 INFO    MainThread:242135 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-20 19:25:37,433 INFO    MainThread:242135 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-20 19:25:37,647 INFO    MainThread:242135 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-20 19:25:37,647 INFO    MainThread:242135 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-20 19:25:37,652 INFO    MainThread:242135 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-20 19:25:37,652 INFO    MainThread:242135 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-20 19:25:37,655 INFO    MainThread:242135 [wandb_init.py:init():1075] run started, returning control to user process
+2025-07-20 19:26:05,937 INFO    MainThread:242135 [wandb_run.py:_finish():2224] finishing run gia0603yucca/easy_r1/zygsruir
+2025-07-20 19:26:05,939 INFO    MainThread:242135 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0
+2025-07-20 19:26:05,939 INFO    MainThread:242135 [wandb_run.py:_restore():2405] restore
+2025-07-20 19:26:05,939 INFO    MainThread:242135 [wandb_run.py:_restore():2411] restore done
+2025-07-20 19:28:26,559 INFO    MainThread:242135 [wandb_run.py:_footer_history_summary_info():3903] rendering history
+2025-07-20 19:28:26,580 INFO    MainThread:242135 [wandb_run.py:_footer_history_summary_info():3935] rendering summary
+2025-07-20 19:28:26,600 INFO    MainThread:242135 [wandb_run.py:_footer_sync_info():3864] logging synced files
diff --git a/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/run-zygsruir.wandb b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/run-zygsruir.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..1c10e0cb085ee7a2463d0ac5397ce7139bf082cf
Binary files /dev/null and b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/run-zygsruir.wandb differ
diff --git a/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/files/output.log b/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..fee82decdb3d2d155a5bd27c3020a01a06910ea0
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/files/output.log
@@ -0,0 +1,2 @@
+{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 1, "unit": "it", "ip": "10.1.4.164", "pid": 258417, "uuid": "eea63d59625341e689a596fa0b39bb32", "closed": false}
+Start validation...
diff --git a/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/files/requirements.txt
@@ -0,0 +1,295 @@
+colorama==0.4.6
+psutil==7.0.0
+setproctitle==1.2.2
+ipython==8.37.0
+gitdb==4.0.12
+smmap==5.0.2
+pyzmq==27.0.0
+wcwidth==0.2.13
+antlr4-python3-runtime==4.9.3
+streamlit==1.46.1
+opentelemetry-proto==1.26.0
+tiktoken==0.9.0
+MarkupSafe==3.0.2
+openai==1.90.0
+jiter==0.10.0
+markdown-it-py==3.0.0
+rich-toolkit==0.14.8
+PyYAML==6.0.2
+pycountry==24.6.1
+nvidia-cusolver-cu12==11.6.1.9
+codetiming==1.4.0
+text-unidecode==1.3
+aiohttp-cors==0.8.1
+prometheus_client==0.22.1
+pandas==2.3.1
+wrapt==1.17.2
+tifffile==2025.5.10
+vllm==0.8.5.post1
+google-auth==2.40.3
+nvidia-curand-cu12==10.3.5.147
+networkx==3.4.2
+protobuf==4.25.8
+depyf==0.18.0
+altair==5.5.0
+wandb==0.21.0
+opentelemetry-sdk==1.26.0
+nvidia-cufft-cu12==11.2.1.3
+frozenlist==1.7.0
+ninja==1.11.1.4
+anyio==4.9.0
+rignore==0.6.2
+pydantic-extra-types==2.10.5
+tzdata==2025.2
+orjson==3.10.18
+smart_open==7.3.0.post1
+nvidia-cublas-cu12==12.4.5.8
+astor==0.8.1
+uvicorn==0.35.0
+sentry-sdk==2.32.0
+weasel==0.4.1
+opencensus==0.11.4
+certifi==2025.7.14
+nvidia-cuda-nvrtc-cu12==12.4.127
+cupy-cuda12x==13.5.1
+jedi==0.19.2
+GitPython==3.1.44
+xgrammar==0.1.18
+sniffio==1.3.1
+dill==0.3.8
+python-json-logger==3.3.0
+peft==0.16.0
+python-slugify==8.0.4
+watchfiles==1.1.0
+torchaudio==2.6.0
+omegaconf==2.3.0
+interegular==0.3.3
+torchmetrics==1.7.4
+tenacity==9.1.2
+async-timeout==5.0.1
+pybase64==1.4.1
+tqdm==4.67.1
+confection==0.1.5
+absl-py==2.3.1
+six==1.17.0
+colorful==0.5.7
+xxhash==3.5.0
+regex==2024.11.6
+nvidia-cuda-runtime-cu12==12.4.127
+pip==25.1
+annotated-types==0.7.0
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+salesforce-lavis==1.0.2
+fastapi-cloud-cli==0.1.4
+av==15.0.0
+nvidia-nvjitlink-cu12==12.4.127
+lightning-utilities==0.14.3
+pytz==2025.2
+webdataset==1.0.2
+cachetools==5.5.2
+nltk==3.9.1
+prometheus-fastapi-instrumentator==7.1.0
+pexpect==4.9.0
+jsonschema-specifications==2025.4.1
+Jinja2==3.1.6
+nvidia-cusparse-cu12==12.3.1.170
+nvidia-cudnn-cu12==9.1.0.70
+decord==0.6.0
+transformers==4.52.4
+dnspython==2.7.0
+joblib==1.5.1
+kaggle==1.7.4.5
+pyasn1_modules==0.4.2
+httpcore==1.0.9
+accelerate==1.8.1
+psutil==7.0.0
+pycocotools==2.0.10
+lm-format-enforcer==0.10.11
+liger_kernel==0.6.0
+googleapis-common-protos==1.70.0
+idna==3.10
+aiohappyeyeballs==2.6.1
+numba==0.61.2
+tornado==6.5.1
+opentelemetry-semantic-conventions==0.47b0
+torchvision==0.21.0
+exceptiongroup==1.3.0
+cfgv==3.4.0
+py-cpuinfo==9.0.0
+murmurhash==1.0.13
+pillow==11.3.0
+asttokens==3.0.0
+spacy==3.8.7
+blinker==1.9.0
+llguidance==0.7.30
+fastapi==0.116.1
+python-dateutil==2.9.0.post0
+prompt_toolkit==3.0.51
+opentelemetry-api==1.26.0
+referencing==0.36.2
+Pygments==2.19.2
+mpmath==1.3.0
+thinc==8.3.6
+multidict==6.6.3
+python-magic==0.4.27
+fairscale==0.4.4
+nodeenv==1.9.1
+mathruler==0.1.0
+identify==2.6.12
+multiprocess==0.70.16
+ftfy==6.3.1
+spacy-legacy==3.0.12
+rsa==4.9.1
+cymem==2.0.11
+flash-attn==2.7.1.post1
+typing-inspection==0.4.1
+nvidia-cufile-cu12==1.11.1.6
+filelock==3.18.0
+jsonschema==4.24.0
+language_data==1.3.0
+iopath==0.1.10
+cloudpickle==3.1.1
+pre_commit==4.2.0
+python-multipart==0.0.20
+gguf==0.17.1
+toml==0.10.2
+lazy_loader==0.4
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+opencv-python-headless==4.12.0.88
+rouge_score==0.1.2
+portalocker==3.2.0
+diskcache==5.6.3
+pycocoevalcap==1.2
+mdurl==0.1.2
+pure_eval==0.2.3
+ray==2.47.1
+langcodes==3.5.0
+distlib==0.3.9
+pydeck==0.9.1
+traitlets==5.14.3
+aiohttp==3.12.14
+decorator==5.2.1
+opentelemetry-exporter-otlp-proto-http==1.26.0
+verl==0.3.2.dev0
+fsspec==2025.3.0
+pydantic_core==2.33.2
+matplotlib-inline==0.1.7
+httpx==0.28.1
+fastrlock==0.8.3
+zipp==3.23.0
+aiosignal==1.4.0
+uvloop==0.21.0
+opendatasets==0.1.22
+python-dotenv==1.1.1
+attrs==25.3.0
+starlette==0.47.1
+distro==1.9.0
+pyasn1==0.6.1
+plotly==6.2.0
+opencensus-context==0.1.3
+datasets==4.0.0
+bleach==6.2.0
+hf-xet==1.1.5
+pyvers==0.1.0
+rich==14.0.0
+pylatexenc==2.10
+tensordict==0.9.1
+urllib3==2.5.0
+imageio==2.37.0
+platformdirs==4.3.8
+preshed==3.0.10
+catalogue==2.0.10
+h11==0.16.0
+outlines_core==0.1.26
+wasabi==1.1.3
+proto-plus==1.26.1
+scikit-image==0.25.2
+blis==1.3.0
+fastapi-cli==0.0.8
+opentelemetry-exporter-prometheus==0.56b0
+opentelemetry-exporter-otlp==1.26.0
+compressed-tensors==0.9.3
+pyarrow==20.0.0
+opentelemetry-semantic-conventions-ai==0.4.11
+partial-json-parser==0.2.1.1.post6
+parso==0.8.4
+importlib_metadata==8.0.0
+tokenizers==0.21.2
+opentelemetry-exporter-otlp-proto-common==1.26.0
+torchdata==0.11.0
+py-spy==0.4.0
+propcache==0.3.2
+braceexpand==0.1.7
+numpy==2.2.6
+cloudpathlib==0.21.1
+email_validator==2.2.0
+srsly==2.5.1
+webencodings==0.5.1
+airportsdata==20250706
+rpds-py==0.26.0
+outlines==0.1.11
+packaging==25.0
+yarl==1.20.1
+nvidia-cuda-cupti-cu12==12.4.127
+typing_extensions==4.14.1
+pydantic==2.11.7
+xformers==0.0.29.post2
+einops==0.8.1
+grpcio==1.73.1
+setuptools==78.1.1
+httptools==0.6.4
+nvidia-nccl-cu12==2.21.5
+Deprecated==1.2.18
+ptyprocess==0.7.0
+websockets==15.0.1
+torch==2.6.0
+scipy==1.15.3
+typer==0.16.0
+pytorch-lightning==2.5.2
+virtualenv==20.31.2
+huggingface-hub==0.33.4
+contexttimer==0.3.3
+marisa-trie==1.2.1
+shellingham==1.5.4
+charset-normalizer==3.4.2
+nest-asyncio==1.6.0
+executing==2.2.0
+stack-data==0.6.3
+msgpack==1.1.1
+blake3==1.0.5
+narwhals==1.47.0
+sentencepiece==0.2.0
+llvmlite==0.44.0
+click==8.2.1
+lark==1.2.2
+google-api-core==2.25.1
+sympy==1.13.1
+wheel==0.45.1
+safetensors==0.5.3
+mistral_common==1.8.0
+triton==3.2.0
+msgspec==0.19.0
+watchdog==6.0.0
+requests==2.32.4
+spacy-loggers==1.0.5
+timm==0.4.12
+qwen-vl-utils==0.0.11
+modelscope==1.28.0
+verl==0.3.2.dev0
+jaraco.text==3.12.1
+autocommand==2.2.2
+packaging==24.2
+jaraco.context==5.3.0
+tomli==2.0.1
+typeguard==4.3.0
+zipp==3.19.2
+backports.tarfile==1.2.0
+typing_extensions==4.12.2
+jaraco.collections==5.1.0
+inflect==7.3.1
+more-itertools==10.3.0
+jaraco.functools==4.0.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+wheel==0.45.1
diff --git a/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..357fe4d9723bf82d2dadc05c36a49e89f85c3713
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/files/wandb-metadata.json
@@ -0,0 +1,91 @@
+{
+  "os":  "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.0",
+  "startedAt":  "2025-07-20T12:04:22.761771Z",
+  "args":  [
+    "--node-ip-address=10.1.4.164",
+    "--node-manager-port=39065",
+    "--object-store-name=/tmp/ray/session_2025-07-20_20-02-37_338127_253666/sockets/plasma_store",
+    "--raylet-name=/tmp/ray/session_2025-07-20_20-02-37_338127_253666/sockets/raylet",
+    "--redis-address=None",
+    "--metrics-agent-port=51673",
+    "--logging-rotate-bytes=536870912",
+    "--logging-rotate-backup-count=5",
+    "--runtime-env-agent-port=61422",
+    "--gcs-address=10.1.4.164:60743",
+    "--session-name=session_2025-07-20_20-02-37_338127_253666",
+    "--temp-dir=/tmp/ray",
+    "--webui=127.0.0.1:8265",
+    "--cluster-id=b4b34c9cedc29492f8fa882ae8351e7ee3416f09f147c7e74caf17c3",
+    "--startup-token=64",
+    "--worker-launch-time-ms=1753012959888",
+    "--node-id=51eea3898a82302f4cb8f9a222fdcca1b829b0df7dd10244eed979fe",
+    "--runtime-env-hash=-115784934",
+    "--enable-resource-isolation=false"
+  ],
+  "program":  "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
+  "git":  {
+    "remote":  "https://github.com/PorUna-byte/PAR.git",
+    "commit":  "b8caf406aa1699c788f0ca6e44a1769452c317db"
+  },
+  "root":  "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
+  "host":  "dsw-266702-557cd69888-g24kv",
+  "executable":  "/root/miniconda3/envs/easyr1-new/bin/python3",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA A800-SXM4-80GB",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1623302262784",
+      "used":  "1179971584"
+    }
+  },
+  "memory":  {
+    "total":  "549755813888"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-dd05c780-0a19-17fd-c584-d09f4318f680"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-2782427d-b86d-e7c7-247f-edefe39eec71"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-52755b3d-d761-c869-0d08-300873ba5f02"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-90465488-5319-9508-9e8a-b3b12918be35"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a"
+    }
+  ],
+  "cudaVersion":  "12.1",
+  "writerId":  "61kjyeeak3xa3675zcay0f3n8u00jg65"
+}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..5ce12fb741fa20d3c2a9c92f5b00dc8f204498fd
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/logs/debug-internal.log
@@ -0,0 +1,6 @@
+{"time":"2025-07-20T20:04:23.360241675+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-20T20:04:27.983686669+08:00","level":"INFO","msg":"stream: created new stream","id":"m2esqgth"}
+{"time":"2025-07-20T20:04:27.984452648+08:00","level":"INFO","msg":"stream: started","id":"m2esqgth"}
+{"time":"2025-07-20T20:04:27.98445835+08:00","level":"INFO","msg":"handler: started","stream_id":"m2esqgth"}
+{"time":"2025-07-20T20:04:27.98449331+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"m2esqgth"}
+{"time":"2025-07-20T20:04:27.984472178+08:00","level":"INFO","msg":"sender: started","stream_id":"m2esqgth"}
diff --git a/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..4820c2e5eb196c71b32cecc700c86e3667344782
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/logs/debug.log
@@ -0,0 +1,21 @@
+2025-07-20 20:04:23,137 INFO    MainThread:258417 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-20 20:04:23,137 INFO    MainThread:258417 [wandb_setup.py:_flush():80] Configure stats pid to 258417
+2025-07-20 20:04:23,137 INFO    MainThread:258417 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-20 20:04:23,137 INFO    MainThread:258417 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-20 20:04:23,137 INFO    MainThread:258417 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-20 20:04:23,137 INFO    MainThread:258417 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/logs/debug.log
+2025-07-20 20:04:23,137 INFO    MainThread:258417 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/logs/debug-internal.log
+2025-07-20 20:04:23,137 INFO    MainThread:258417 [wandb_init.py:init():830] calling init triggers
+2025-07-20 20:04:23,137 INFO    MainThread:258417 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-20 20:04:23,138 INFO    MainThread:258417 [wandb_init.py:init():871] starting backend
+2025-07-20 20:04:23,345 INFO    MainThread:258417 [wandb_init.py:init():874] sending inform_init request
+2025-07-20 20:04:23,347 INFO    MainThread:258417 [wandb_init.py:init():882] backend started and connected
+2025-07-20 20:04:23,353 INFO    MainThread:258417 [wandb_init.py:init():953] updated telemetry
+2025-07-20 20:04:23,405 INFO    MainThread:258417 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-20 20:04:29,804 INFO    MainThread:258417 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-20 20:04:29,998 INFO    MainThread:258417 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-20 20:04:29,998 INFO    MainThread:258417 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-20 20:04:30,002 INFO    MainThread:258417 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-20 20:04:30,002 INFO    MainThread:258417 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-20 20:04:30,005 INFO    MainThread:258417 [wandb_init.py:init():1075] run started, returning control to user process
diff --git a/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/run-m2esqgth.wandb b/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/run-m2esqgth.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/files/config.yaml b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6c4bc878537dc6fc2dc7dff776dd4d748ea52668
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/files/config.yaml
@@ -0,0 +1,321 @@
+_wandb:
+    value:
+        cli_version: 0.21.0
+        e:
+            xtan9jnlk3anxkvk8lrtde0kzlter6ht:
+                args:
+                    - --node-ip-address=10.1.4.164
+                    - --node-manager-port=36211
+                    - --object-store-name=/tmp/ray/session_2025-07-20_20-06-04_261605_261953/sockets/plasma_store
+                    - --raylet-name=/tmp/ray/session_2025-07-20_20-06-04_261605_261953/sockets/raylet
+                    - --redis-address=None
+                    - --metrics-agent-port=55320
+                    - --logging-rotate-bytes=536870912
+                    - --logging-rotate-backup-count=5
+                    - --runtime-env-agent-port=40764
+                    - --gcs-address=10.1.4.164:53846
+                    - --session-name=session_2025-07-20_20-06-04_261605_261953
+                    - --temp-dir=/tmp/ray
+                    - --webui=127.0.0.1:8265
+                    - --cluster-id=0063e3b7b4568489d096fdc242c3518b906b31d74ced6b05fe470650
+                    - --startup-token=64
+                    - --worker-launch-time-ms=1753013166837
+                    - --node-id=fdba09a4dfb48b5d51465042822e51b6b219b770c4e580a471120e79
+                    - --runtime-env-hash=-115784934
+                    - --enable-resource-isolation=false
+                cpu_count: 64
+                cpu_count_logical: 64
+                cudaVersion: "12.1"
+                disk:
+                    /:
+                        total: "1623302262784"
+                        used: "1180028928"
+                executable: /root/miniconda3/envs/easyr1-new/bin/python3
+                git:
+                    commit: b8caf406aa1699c788f0ca6e44a1769452c317db
+                    remote: https://github.com/PorUna-byte/PAR.git
+                gpu: NVIDIA A800-SXM4-80GB
+                gpu_count: 8
+                gpu_nvidia:
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-dd05c780-0a19-17fd-c584-d09f4318f680
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-2782427d-b86d-e7c7-247f-edefe39eec71
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-52755b3d-d761-c869-0d08-300873ba5f02
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-5927b565-1506-34c6-eb8d-d657ed5f3558
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-90465488-5319-9508-9e8a-b3b12918be35
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-855bcb80-e48f-cdd2-f62d-35138b69089a
+                host: dsw-266702-557cd69888-g24kv
+                memory:
+                    total: "549755813888"
+                os: Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35
+                program: /root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py
+                python: CPython 3.10.0
+                root: /nas/shared/kilab/wangyujia/EasyR1-new/examples
+                startedAt: "2025-07-20T12:07:50.034461Z"
+                writerId: xtan9jnlk3anxkvk8lrtde0kzlter6ht
+        m: []
+        python_version: 3.10.0
+        t:
+            "1":
+                - 1
+                - 9
+                - 11
+                - 30
+                - 33
+                - 41
+                - 49
+                - 51
+                - 63
+                - 71
+                - 95
+                - 98
+                - 103
+                - 105
+            "2":
+                - 1
+                - 9
+                - 11
+                - 30
+                - 33
+                - 41
+                - 49
+                - 51
+                - 63
+                - 71
+                - 95
+                - 98
+                - 103
+                - 105
+            "3":
+                - 2
+                - 13
+                - 16
+            "4": 3.10.0
+            "5": 0.21.0
+            "6": 4.52.4
+            "12": 0.21.0
+            "13": linux-x86_64
+algorithm:
+    value:
+        adv_estimator: grpo
+        disable_kl: false
+        filter_high: 0.99
+        filter_key: overall
+        filter_low: 0.01
+        gamma: 1
+        kl_coef: 0.01
+        kl_horizon: 10000
+        kl_penalty: low_var_kl
+        kl_target: 0.1
+        kl_type: fixed
+        lam: 1
+        online_filtering: false
+        use_kl_loss: true
+data:
+    value:
+        answer_key: answer
+        filter_overlong_prompts: true
+        filter_overlong_prompts_workers: 16
+        format_prompt: /nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja
+        image_dir: null
+        image_key: images
+        max_pixels: 4194304
+        max_prompt_length: 4096
+        max_response_length: 16384
+        min_pixels: 262144
+        mini_rollout_batch_size: null
+        override_chat_template: null
+        prompt_key: question
+        protein_key: protein
+        rollout_batch_size: 128
+        seed: 1
+        shuffle: true
+        train_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl
+        val_batch_size: 256
+        val_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl
+        video_fps: 2
+        video_key: videos
+trainer:
+    value:
+        critic_warmup: 0
+        experiment_name: qwen2.5_7b_bio_06182042
+        load_checkpoint_path: null
+        logger:
+            - console
+            - wandb
+        max_steps: null
+        max_try_make_batch: 20
+        n_gpus_per_node: 8
+        nnodes: 1
+        project_name: easy_r1
+        save_checkpoint_path: /oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042
+        save_freq: 5
+        save_limit: 3
+        save_model_only: false
+        total_epochs: 1
+        val_before_train: true
+        val_freq: 5
+        val_generations_to_log: 3
+        val_only: false
+worker:
+    value:
+        actor:
+            clip_ratio_dual: 3
+            clip_ratio_high: 0.3
+            clip_ratio_low: 0.2
+            disable_kl: false
+            fsdp:
+                enable_cpu_offload: false
+                enable_full_shard: true
+                enable_rank0_init: true
+                fsdp_size: -1
+                mp_buffer_dtype: fp32
+                mp_param_dtype: bf16
+                mp_reduce_dtype: fp32
+                torch_dtype: null
+                use_orig_params: false
+            global_batch_size: 64
+            global_batch_size_per_device: -1
+            kl_coef: 0.01
+            kl_penalty: low_var_kl
+            loss_avg_mode: token
+            max_grad_norm: 1
+            micro_batch_size_per_device_for_experience: 16
+            micro_batch_size_per_device_for_update: 2
+            model:
+                enable_gradient_checkpointing: true
+                freeze_vision_tower: false
+                model_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model
+                tokenizer_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model
+                trust_remote_code: false
+            offload:
+                offload_optimizer: true
+                offload_params: true
+            optim:
+                betas:
+                    - 0.9
+                    - 0.999
+                lr: 1e-06
+                lr_warmup_ratio: 0
+                lr_warmup_steps: null
+                min_lr_ratio: null
+                strategy: adamw
+                training_steps: 1
+                warmup_style: constant
+                weight_decay: 0.01
+            padding_free: true
+            ppo_epochs: 1
+            strategy: fsdp
+            ulysses_size: 1
+            use_kl_loss: true
+            use_torch_compile: true
+        critic:
+            cliprange_value: 0.5
+            fsdp:
+                enable_cpu_offload: false
+                enable_full_shard: true
+                enable_rank0_init: true
+                fsdp_size: -1
+                mp_buffer_dtype: fp32
+                mp_param_dtype: bf16
+                mp_reduce_dtype: fp32
+                torch_dtype: null
+                use_orig_params: false
+            global_batch_size: 256
+            global_batch_size_per_device: -1
+            loss_avg_mode: token
+            max_grad_norm: 1
+            micro_batch_size_per_device_for_experience: 16
+            micro_batch_size_per_device_for_update: 4
+            model:
+                enable_gradient_checkpointing: true
+                freeze_vision_tower: false
+                model_path: null
+                tokenizer_path: null
+                trust_remote_code: true
+            offload:
+                offload_optimizer: false
+                offload_params: false
+            optim:
+                betas:
+                    - 0.9
+                    - 0.999
+                lr: 1e-06
+                lr_warmup_ratio: 0
+                lr_warmup_steps: null
+                min_lr_ratio: null
+                strategy: adamw
+                training_steps: 1
+                warmup_style: constant
+                weight_decay: 0.01
+            padding_free: false
+            ppo_epochs: 1
+            strategy: fsdp
+            ulysses_size: 1
+        hybrid_engine: true
+        ref:
+            fsdp:
+                enable_cpu_offload: true
+                enable_full_shard: true
+                enable_rank0_init: true
+                fsdp_size: -1
+                mp_buffer_dtype: fp32
+                mp_param_dtype: bf16
+                mp_reduce_dtype: fp32
+                torch_dtype: null
+                use_orig_params: false
+            micro_batch_size_per_device_for_experience: 16
+            offload:
+                offload_optimizer: false
+                offload_params: false
+            padding_free: true
+            strategy: fsdp
+            ulysses_size: 1
+            use_torch_compile: true
+        reward:
+            num_cpus: 1
+            reward_function: /nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py
+            reward_function_name: main
+            reward_type: batch
+            skip_special_tokens: true
+        rollout:
+            disable_log_stats: true
+            disable_tqdm: false
+            dtype: bf16
+            enable_chunked_prefill: false
+            enforce_eager: false
+            gpu_memory_utilization: 0.6
+            ignore_eos: false
+            limit_images: 0
+            max_model_len: null
+            max_num_batched_tokens: 24576
+            "n": 5
+            name: vllm
+            prompt_length: 4096
+            response_length: 16384
+            seed: 1
+            temperature: 1
+            tensor_parallel_size: 1
+            top_k: -1
+            top_p: 0.99
+            trust_remote_code: false
+            val_override_config:
+                "n": 1
+                temperature: 0.5
diff --git a/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/files/output.log b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..cad8f45f4328a4f84e7d55842a40398c72b9c663
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/files/output.log
@@ -0,0 +1,230 @@
+{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 1, "unit": "it", "ip": "10.1.4.164", "pid": 266721, "uuid": "6d31a99c30c448c08468eabd5882cd00", "closed": false}
+Start validation...
+key
+prot_embeds
+value
+tensor([[[ 0.1386, -0.2522,  0.0733,  ...,  0.5373,  0.6044, -0.6031],
+         [ 0.1386, -0.2522,  0.0733,  ...,  0.5373,  0.6044, -0.6031],
+         [ 0.1386, -0.2522,  0.0733,  ...,  0.5373,  0.6044, -0.6031],
+         ...,
+         [ 0.1386, -0.2522,  0.0733,  ...,  0.5373,  0.6044, -0.6031],
+         [ 0.1386, -0.2522,  0.0733,  ...,  0.5373,  0.6044, -0.6031],
+         [ 0.1386, -0.2522,  0.0733,  ...,  0.5373,  0.6044, -0.6031]]])
+key
+prompt_input_ids
+value
+[[[11190 311 279 ... 151665 151665 151665]]]
+key
+input_ids
+value
+[[[11190 311 279 ... 151665 151665 151665]]]
+key
+raw_prompt_ids
+value
+[[11190 311 279 12833 1995 3897 3685 323 279 12833 829 1207 24 56 19 34
+  17 11 7023 279 1429 4363 1186 5873 1276 52304 504 279 2701 2606 510
+  3798 25 220 15 13 330 45 22147 355 11 547 1 715 220 16 13 330 56715
+  98605 10530 11 328 1 2303 220 17 13 330 840 376 64341 11 328 1 2303 220
+  18 13 330 54370 46417 81 290 11 547 1 2303 220 19 13 330 3599 38554 11
+  386 1 2303 220 20 13 330 3727 55078 10530 292 2112 292 16496 11 386 1
+  2303 220 21 13 330 2120 559 307 11 328 1 2303 220 22 13 330 38 337 8212
+  40605 11 386 1 2303 220 220 23 13 330 43 1047 31454 27233 580 84 1263
+  11 386 1 2303 24 13 330 47 2328 7191 635 11 547 698]]
+key
+ground_truth
+value
+['4']
+key
+protein_sequence
+value
+['MATPSAAFEALMNGVTSWDVPEDAVPCELLLIGEASFPVMVNDMGQVLIAASSYGRGRLVVVSHEDYLVEAQLTPFLLNAVGWLCSSPGAPIGVHPSLAPLAKILEGSGVDAKVEPEVKDSLGVYCIDAYNETMTEKLVKFMKCGGGLLIGGQAWDWANQGEDERVLFTFPGNLVTSVAGIYFTDNKGDTSFFKVSKKMPKIPVLVSCEDDLSDDREELLHGISELDISNSDCFPSQLLVHGALAFPLGLDSYHGCVIAAARYGRGRVVVTGHKVLFTVGKLGPFLLNAVRWLDGGRRGKVVVQTELRTLSGLLAVGGIDTSIEPNLTSDASVYCFEPVSEVGVKELQEFVAEGGGLFVGAQAWWWAFKNPGVSPLARFPGNLLLNPFGISITSQSLNPGPFRTPKAGIRTYHFRSTLAEFQVIMGRKRGNVEKGWLAKLGPDGAAFLQIPAEEIPAYMSVHRLLRKLLSRYRLPVATRENPVINDCCRGAMLSLATGLAHSGSDLSLLVPEIEDMYSSPYLRPSESPITVEVNCTNPGTRYCWMSTGLYIPGRQIIEVSLPEAAASADLKIQIGCHTDDLTRASKLFRGPLVINRCCLDKPTKSITCLWGGLLYIIVPQNSKLGSVPVTVKGAVHAPYYKLGETTLEEWKRRIQENPGPWGELATDNIILTVPTANLRTLENPEPLLRLWDEVMQAVARLGAEPFPLRLPQRIVADVQISVGWMHAGYPIMCHLESVQELINEKLIRTKGLWGPVHELGRNQQRQEWEFPPHTTEATCNLWCVYVHETVLGIPRSRANIALWPPVREKRVRIYLSKGPNVKNWNAWTALETYLQLQEAFGWEPFIRLFTEYRNQTNLPTENVDKMNLWVKMFSHQVQKNLAPFFEAWAWPIQKEVATSLAYLPEWKENIMKLYLLTQMPH']
+开始valid generate_sequence
+Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::WorkerDict.actor_rollout_ref_generate_sequences()[39m (pid=268621, ip=10.1.4.164, actor_id=ce52cf1bf65fb687f4ef2e8501000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7eeab2e09b40>)
+KeyError: 'prompt_input_ids'
+
+During handling of the above exception, another exception occurred:
+
+[36mray::WorkerDict.actor_rollout_ref_generate_sequences()[39m (pid=268621, ip=10.1.4.164, actor_id=ce52cf1bf65fb687f4ef2e8501000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7eeab2e09b40>)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func
+    return getattr(self.worker_dict[key], name)(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 574, in generate_sequences
+    output = self.rollout.generate_sequences(prompts=prompts)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/rollout/vllm_rollout_spmd_new.py", line 176, in generate_sequences
+    prompt_input_ids: torch.Tensor = prompts.batch["prompt_input_ids"]
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 574, in __getitem__
+    return self._get_tuple_maybe_non_tensor(idx_unravel, NO_DEFAULT)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 6563, in _get_tuple_maybe_non_tensor
+    result = self._get_tuple(key, default, **kwargs)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/_td.py", line 2711, in _get_tuple
+    first = self._get_str(key[0], default, **kwargs)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/_td.py", line 2708, in _get_str
+    return self._default_get(key, default)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 6500, in _default_get
+    raise KeyError(
+KeyError: 'key "prompt_input_ids" not found in TensorDict with keys [\'prot_embeds\']'
+Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::WorkerDict.actor_rollout_ref_generate_sequences()[39m (pid=268620, ip=10.1.4.164, actor_id=f7a12dbb859a77a4660ab73b01000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7fcff4331bd0>)
+KeyError: 'prompt_input_ids'
+
+During handling of the above exception, another exception occurred:
+
+[36mray::WorkerDict.actor_rollout_ref_generate_sequences()[39m (pid=268620, ip=10.1.4.164, actor_id=f7a12dbb859a77a4660ab73b01000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7fcff4331bd0>)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func
+    return getattr(self.worker_dict[key], name)(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 574, in generate_sequences
+    output = self.rollout.generate_sequences(prompts=prompts)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/rollout/vllm_rollout_spmd_new.py", line 176, in generate_sequences
+    prompt_input_ids: torch.Tensor = prompts.batch["prompt_input_ids"]
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 574, in __getitem__
+    return self._get_tuple_maybe_non_tensor(idx_unravel, NO_DEFAULT)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 6563, in _get_tuple_maybe_non_tensor
+    result = self._get_tuple(key, default, **kwargs)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/_td.py", line 2711, in _get_tuple
+    first = self._get_str(key[0], default, **kwargs)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/_td.py", line 2708, in _get_str
+    return self._default_get(key, default)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 6500, in _default_get
+    raise KeyError(
+KeyError: 'key "prompt_input_ids" not found in TensorDict with keys [\'prot_embeds\']'
+Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::WorkerDict.actor_rollout_ref_generate_sequences()[39m (pid=268619, ip=10.1.4.164, actor_id=b9a2160653e6d12bcb36fdfb01000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7fbb782d1cf0>)
+KeyError: 'prompt_input_ids'
+
+During handling of the above exception, another exception occurred:
+
+[36mray::WorkerDict.actor_rollout_ref_generate_sequences()[39m (pid=268619, ip=10.1.4.164, actor_id=b9a2160653e6d12bcb36fdfb01000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7fbb782d1cf0>)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func
+    return getattr(self.worker_dict[key], name)(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 574, in generate_sequences
+    output = self.rollout.generate_sequences(prompts=prompts)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/rollout/vllm_rollout_spmd_new.py", line 176, in generate_sequences
+    prompt_input_ids: torch.Tensor = prompts.batch["prompt_input_ids"]
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 574, in __getitem__
+    return self._get_tuple_maybe_non_tensor(idx_unravel, NO_DEFAULT)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 6563, in _get_tuple_maybe_non_tensor
+    result = self._get_tuple(key, default, **kwargs)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/_td.py", line 2711, in _get_tuple
+    first = self._get_str(key[0], default, **kwargs)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/_td.py", line 2708, in _get_str
+    return self._default_get(key, default)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 6500, in _default_get
+    raise KeyError(
+KeyError: 'key "prompt_input_ids" not found in TensorDict with keys [\'prot_embeds\']'
+Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::WorkerDict.actor_rollout_ref_generate_sequences()[39m (pid=268618, ip=10.1.4.164, actor_id=446eaa8d63c457018134bc9c01000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7fc590e15b40>)
+KeyError: 'prompt_input_ids'
+
+During handling of the above exception, another exception occurred:
+
+[36mray::WorkerDict.actor_rollout_ref_generate_sequences()[39m (pid=268618, ip=10.1.4.164, actor_id=446eaa8d63c457018134bc9c01000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7fc590e15b40>)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func
+    return getattr(self.worker_dict[key], name)(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 574, in generate_sequences
+    output = self.rollout.generate_sequences(prompts=prompts)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/rollout/vllm_rollout_spmd_new.py", line 176, in generate_sequences
+    prompt_input_ids: torch.Tensor = prompts.batch["prompt_input_ids"]
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 574, in __getitem__
+    return self._get_tuple_maybe_non_tensor(idx_unravel, NO_DEFAULT)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 6563, in _get_tuple_maybe_non_tensor
+    result = self._get_tuple(key, default, **kwargs)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/_td.py", line 2711, in _get_tuple
+    first = self._get_str(key[0], default, **kwargs)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/_td.py", line 2708, in _get_str
+    return self._default_get(key, default)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 6500, in _default_get
+    raise KeyError(
+KeyError: 'key "prompt_input_ids" not found in TensorDict with keys [\'prot_embeds\']'
+Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::WorkerDict.actor_rollout_ref_generate_sequences()[39m (pid=268617, ip=10.1.4.164, actor_id=efd1feff531e367223ca45bf01000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7fd382bb9ba0>)
+KeyError: 'prompt_input_ids'
+
+During handling of the above exception, another exception occurred:
+
+[36mray::WorkerDict.actor_rollout_ref_generate_sequences()[39m (pid=268617, ip=10.1.4.164, actor_id=efd1feff531e367223ca45bf01000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7fd382bb9ba0>)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func
+    return getattr(self.worker_dict[key], name)(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 574, in generate_sequences
+    output = self.rollout.generate_sequences(prompts=prompts)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/rollout/vllm_rollout_spmd_new.py", line 176, in generate_sequences
+    prompt_input_ids: torch.Tensor = prompts.batch["prompt_input_ids"]
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 574, in __getitem__
+    return self._get_tuple_maybe_non_tensor(idx_unravel, NO_DEFAULT)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 6563, in _get_tuple_maybe_non_tensor
+    result = self._get_tuple(key, default, **kwargs)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/_td.py", line 2711, in _get_tuple
+    first = self._get_str(key[0], default, **kwargs)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/_td.py", line 2708, in _get_str
+    return self._default_get(key, default)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 6500, in _default_get
+    raise KeyError(
+KeyError: 'key "prompt_input_ids" not found in TensorDict with keys [\'prot_embeds\']'
+Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::WorkerDict.actor_rollout_ref_generate_sequences()[39m (pid=268616, ip=10.1.4.164, actor_id=ec16be3ae05a2ccd6645853a01000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7f8cd8419b10>)
+KeyError: 'prompt_input_ids'
+
+During handling of the above exception, another exception occurred:
+
+[36mray::WorkerDict.actor_rollout_ref_generate_sequences()[39m (pid=268616, ip=10.1.4.164, actor_id=ec16be3ae05a2ccd6645853a01000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7f8cd8419b10>)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func
+    return getattr(self.worker_dict[key], name)(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 574, in generate_sequences
+    output = self.rollout.generate_sequences(prompts=prompts)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/rollout/vllm_rollout_spmd_new.py", line 176, in generate_sequences
+    prompt_input_ids: torch.Tensor = prompts.batch["prompt_input_ids"]
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 574, in __getitem__
+    return self._get_tuple_maybe_non_tensor(idx_unravel, NO_DEFAULT)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 6563, in _get_tuple_maybe_non_tensor
+    result = self._get_tuple(key, default, **kwargs)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/_td.py", line 2711, in _get_tuple
+    first = self._get_str(key[0], default, **kwargs)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/_td.py", line 2708, in _get_str
+    return self._default_get(key, default)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 6500, in _default_get
+    raise KeyError(
+KeyError: 'key "prompt_input_ids" not found in TensorDict with keys [\'prot_embeds\']'
+Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::WorkerDict.actor_rollout_ref_generate_sequences()[39m (pid=268397, ip=10.1.4.164, actor_id=f5dc0479983652f5dfca6aad01000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7f05b8535b10>)
+KeyError: 'prompt_input_ids'
+
+During handling of the above exception, another exception occurred:
+
+[36mray::WorkerDict.actor_rollout_ref_generate_sequences()[39m (pid=268397, ip=10.1.4.164, actor_id=f5dc0479983652f5dfca6aad01000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7f05b8535b10>)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func
+    return getattr(self.worker_dict[key], name)(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 574, in generate_sequences
+    output = self.rollout.generate_sequences(prompts=prompts)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/rollout/vllm_rollout_spmd_new.py", line 176, in generate_sequences
+    prompt_input_ids: torch.Tensor = prompts.batch["prompt_input_ids"]
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 574, in __getitem__
+    return self._get_tuple_maybe_non_tensor(idx_unravel, NO_DEFAULT)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 6563, in _get_tuple_maybe_non_tensor
+    result = self._get_tuple(key, default, **kwargs)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/_td.py", line 2711, in _get_tuple
+    first = self._get_str(key[0], default, **kwargs)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/_td.py", line 2708, in _get_str
+    return self._default_get(key, default)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 6500, in _default_get
+    raise KeyError(
+KeyError: 'key "prompt_input_ids" not found in TensorDict with keys [\'prot_embeds\']'
diff --git a/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/files/requirements.txt
@@ -0,0 +1,295 @@
+colorama==0.4.6
+psutil==7.0.0
+setproctitle==1.2.2
+ipython==8.37.0
+gitdb==4.0.12
+smmap==5.0.2
+pyzmq==27.0.0
+wcwidth==0.2.13
+antlr4-python3-runtime==4.9.3
+streamlit==1.46.1
+opentelemetry-proto==1.26.0
+tiktoken==0.9.0
+MarkupSafe==3.0.2
+openai==1.90.0
+jiter==0.10.0
+markdown-it-py==3.0.0
+rich-toolkit==0.14.8
+PyYAML==6.0.2
+pycountry==24.6.1
+nvidia-cusolver-cu12==11.6.1.9
+codetiming==1.4.0
+text-unidecode==1.3
+aiohttp-cors==0.8.1
+prometheus_client==0.22.1
+pandas==2.3.1
+wrapt==1.17.2
+tifffile==2025.5.10
+vllm==0.8.5.post1
+google-auth==2.40.3
+nvidia-curand-cu12==10.3.5.147
+networkx==3.4.2
+protobuf==4.25.8
+depyf==0.18.0
+altair==5.5.0
+wandb==0.21.0
+opentelemetry-sdk==1.26.0
+nvidia-cufft-cu12==11.2.1.3
+frozenlist==1.7.0
+ninja==1.11.1.4
+anyio==4.9.0
+rignore==0.6.2
+pydantic-extra-types==2.10.5
+tzdata==2025.2
+orjson==3.10.18
+smart_open==7.3.0.post1
+nvidia-cublas-cu12==12.4.5.8
+astor==0.8.1
+uvicorn==0.35.0
+sentry-sdk==2.32.0
+weasel==0.4.1
+opencensus==0.11.4
+certifi==2025.7.14
+nvidia-cuda-nvrtc-cu12==12.4.127
+cupy-cuda12x==13.5.1
+jedi==0.19.2
+GitPython==3.1.44
+xgrammar==0.1.18
+sniffio==1.3.1
+dill==0.3.8
+python-json-logger==3.3.0
+peft==0.16.0
+python-slugify==8.0.4
+watchfiles==1.1.0
+torchaudio==2.6.0
+omegaconf==2.3.0
+interegular==0.3.3
+torchmetrics==1.7.4
+tenacity==9.1.2
+async-timeout==5.0.1
+pybase64==1.4.1
+tqdm==4.67.1
+confection==0.1.5
+absl-py==2.3.1
+six==1.17.0
+colorful==0.5.7
+xxhash==3.5.0
+regex==2024.11.6
+nvidia-cuda-runtime-cu12==12.4.127
+pip==25.1
+annotated-types==0.7.0
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+salesforce-lavis==1.0.2
+fastapi-cloud-cli==0.1.4
+av==15.0.0
+nvidia-nvjitlink-cu12==12.4.127
+lightning-utilities==0.14.3
+pytz==2025.2
+webdataset==1.0.2
+cachetools==5.5.2
+nltk==3.9.1
+prometheus-fastapi-instrumentator==7.1.0
+pexpect==4.9.0
+jsonschema-specifications==2025.4.1
+Jinja2==3.1.6
+nvidia-cusparse-cu12==12.3.1.170
+nvidia-cudnn-cu12==9.1.0.70
+decord==0.6.0
+transformers==4.52.4
+dnspython==2.7.0
+joblib==1.5.1
+kaggle==1.7.4.5
+pyasn1_modules==0.4.2
+httpcore==1.0.9
+accelerate==1.8.1
+psutil==7.0.0
+pycocotools==2.0.10
+lm-format-enforcer==0.10.11
+liger_kernel==0.6.0
+googleapis-common-protos==1.70.0
+idna==3.10
+aiohappyeyeballs==2.6.1
+numba==0.61.2
+tornado==6.5.1
+opentelemetry-semantic-conventions==0.47b0
+torchvision==0.21.0
+exceptiongroup==1.3.0
+cfgv==3.4.0
+py-cpuinfo==9.0.0
+murmurhash==1.0.13
+pillow==11.3.0
+asttokens==3.0.0
+spacy==3.8.7
+blinker==1.9.0
+llguidance==0.7.30
+fastapi==0.116.1
+python-dateutil==2.9.0.post0
+prompt_toolkit==3.0.51
+opentelemetry-api==1.26.0
+referencing==0.36.2
+Pygments==2.19.2
+mpmath==1.3.0
+thinc==8.3.6
+multidict==6.6.3
+python-magic==0.4.27
+fairscale==0.4.4
+nodeenv==1.9.1
+mathruler==0.1.0
+identify==2.6.12
+multiprocess==0.70.16
+ftfy==6.3.1
+spacy-legacy==3.0.12
+rsa==4.9.1
+cymem==2.0.11
+flash-attn==2.7.1.post1
+typing-inspection==0.4.1
+nvidia-cufile-cu12==1.11.1.6
+filelock==3.18.0
+jsonschema==4.24.0
+language_data==1.3.0
+iopath==0.1.10
+cloudpickle==3.1.1
+pre_commit==4.2.0
+python-multipart==0.0.20
+gguf==0.17.1
+toml==0.10.2
+lazy_loader==0.4
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+opencv-python-headless==4.12.0.88
+rouge_score==0.1.2
+portalocker==3.2.0
+diskcache==5.6.3
+pycocoevalcap==1.2
+mdurl==0.1.2
+pure_eval==0.2.3
+ray==2.47.1
+langcodes==3.5.0
+distlib==0.3.9
+pydeck==0.9.1
+traitlets==5.14.3
+aiohttp==3.12.14
+decorator==5.2.1
+opentelemetry-exporter-otlp-proto-http==1.26.0
+verl==0.3.2.dev0
+fsspec==2025.3.0
+pydantic_core==2.33.2
+matplotlib-inline==0.1.7
+httpx==0.28.1
+fastrlock==0.8.3
+zipp==3.23.0
+aiosignal==1.4.0
+uvloop==0.21.0
+opendatasets==0.1.22
+python-dotenv==1.1.1
+attrs==25.3.0
+starlette==0.47.1
+distro==1.9.0
+pyasn1==0.6.1
+plotly==6.2.0
+opencensus-context==0.1.3
+datasets==4.0.0
+bleach==6.2.0
+hf-xet==1.1.5
+pyvers==0.1.0
+rich==14.0.0
+pylatexenc==2.10
+tensordict==0.9.1
+urllib3==2.5.0
+imageio==2.37.0
+platformdirs==4.3.8
+preshed==3.0.10
+catalogue==2.0.10
+h11==0.16.0
+outlines_core==0.1.26
+wasabi==1.1.3
+proto-plus==1.26.1
+scikit-image==0.25.2
+blis==1.3.0
+fastapi-cli==0.0.8
+opentelemetry-exporter-prometheus==0.56b0
+opentelemetry-exporter-otlp==1.26.0
+compressed-tensors==0.9.3
+pyarrow==20.0.0
+opentelemetry-semantic-conventions-ai==0.4.11
+partial-json-parser==0.2.1.1.post6
+parso==0.8.4
+importlib_metadata==8.0.0
+tokenizers==0.21.2
+opentelemetry-exporter-otlp-proto-common==1.26.0
+torchdata==0.11.0
+py-spy==0.4.0
+propcache==0.3.2
+braceexpand==0.1.7
+numpy==2.2.6
+cloudpathlib==0.21.1
+email_validator==2.2.0
+srsly==2.5.1
+webencodings==0.5.1
+airportsdata==20250706
+rpds-py==0.26.0
+outlines==0.1.11
+packaging==25.0
+yarl==1.20.1
+nvidia-cuda-cupti-cu12==12.4.127
+typing_extensions==4.14.1
+pydantic==2.11.7
+xformers==0.0.29.post2
+einops==0.8.1
+grpcio==1.73.1
+setuptools==78.1.1
+httptools==0.6.4
+nvidia-nccl-cu12==2.21.5
+Deprecated==1.2.18
+ptyprocess==0.7.0
+websockets==15.0.1
+torch==2.6.0
+scipy==1.15.3
+typer==0.16.0
+pytorch-lightning==2.5.2
+virtualenv==20.31.2
+huggingface-hub==0.33.4
+contexttimer==0.3.3
+marisa-trie==1.2.1
+shellingham==1.5.4
+charset-normalizer==3.4.2
+nest-asyncio==1.6.0
+executing==2.2.0
+stack-data==0.6.3
+msgpack==1.1.1
+blake3==1.0.5
+narwhals==1.47.0
+sentencepiece==0.2.0
+llvmlite==0.44.0
+click==8.2.1
+lark==1.2.2
+google-api-core==2.25.1
+sympy==1.13.1
+wheel==0.45.1
+safetensors==0.5.3
+mistral_common==1.8.0
+triton==3.2.0
+msgspec==0.19.0
+watchdog==6.0.0
+requests==2.32.4
+spacy-loggers==1.0.5
+timm==0.4.12
+qwen-vl-utils==0.0.11
+modelscope==1.28.0
+verl==0.3.2.dev0
+jaraco.text==3.12.1
+autocommand==2.2.2
+packaging==24.2
+jaraco.context==5.3.0
+tomli==2.0.1
+typeguard==4.3.0
+zipp==3.19.2
+backports.tarfile==1.2.0
+typing_extensions==4.12.2
+jaraco.collections==5.1.0
+inflect==7.3.1
+more-itertools==10.3.0
+jaraco.functools==4.0.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+wheel==0.45.1
diff --git a/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..f8884381d325d2f793e08f2cd106cc32b6a6564d
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/files/wandb-metadata.json
@@ -0,0 +1,91 @@
+{
+  "os":  "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.0",
+  "startedAt":  "2025-07-20T12:07:50.034461Z",
+  "args":  [
+    "--node-ip-address=10.1.4.164",
+    "--node-manager-port=36211",
+    "--object-store-name=/tmp/ray/session_2025-07-20_20-06-04_261605_261953/sockets/plasma_store",
+    "--raylet-name=/tmp/ray/session_2025-07-20_20-06-04_261605_261953/sockets/raylet",
+    "--redis-address=None",
+    "--metrics-agent-port=55320",
+    "--logging-rotate-bytes=536870912",
+    "--logging-rotate-backup-count=5",
+    "--runtime-env-agent-port=40764",
+    "--gcs-address=10.1.4.164:53846",
+    "--session-name=session_2025-07-20_20-06-04_261605_261953",
+    "--temp-dir=/tmp/ray",
+    "--webui=127.0.0.1:8265",
+    "--cluster-id=0063e3b7b4568489d096fdc242c3518b906b31d74ced6b05fe470650",
+    "--startup-token=64",
+    "--worker-launch-time-ms=1753013166837",
+    "--node-id=fdba09a4dfb48b5d51465042822e51b6b219b770c4e580a471120e79",
+    "--runtime-env-hash=-115784934",
+    "--enable-resource-isolation=false"
+  ],
+  "program":  "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
+  "git":  {
+    "remote":  "https://github.com/PorUna-byte/PAR.git",
+    "commit":  "b8caf406aa1699c788f0ca6e44a1769452c317db"
+  },
+  "root":  "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
+  "host":  "dsw-266702-557cd69888-g24kv",
+  "executable":  "/root/miniconda3/envs/easyr1-new/bin/python3",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA A800-SXM4-80GB",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1623302262784",
+      "used":  "1180028928"
+    }
+  },
+  "memory":  {
+    "total":  "549755813888"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-dd05c780-0a19-17fd-c584-d09f4318f680"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-2782427d-b86d-e7c7-247f-edefe39eec71"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-52755b3d-d761-c869-0d08-300873ba5f02"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-90465488-5319-9508-9e8a-b3b12918be35"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a"
+    }
+  ],
+  "cudaVersion":  "12.1",
+  "writerId":  "xtan9jnlk3anxkvk8lrtde0kzlter6ht"
+}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/files/wandb-summary.json b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..533452d4a934da3482e9f08995d671c42966eba9
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_runtime":6,"_wandb":{"runtime":6}}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..89585658bb17ee57991500fce03ed1101a676252
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/logs/debug-internal.log
@@ -0,0 +1,13 @@
+{"time":"2025-07-20T20:07:50.38283684+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-20T20:07:53.483644117+08:00","level":"INFO","msg":"stream: created new stream","id":"qx2pyd9p"}
+{"time":"2025-07-20T20:07:53.484513955+08:00","level":"INFO","msg":"stream: started","id":"qx2pyd9p"}
+{"time":"2025-07-20T20:07:53.484532882+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"qx2pyd9p"}
+{"time":"2025-07-20T20:07:53.484557873+08:00","level":"INFO","msg":"sender: started","stream_id":"qx2pyd9p"}
+{"time":"2025-07-20T20:07:53.484540258+08:00","level":"INFO","msg":"handler: started","stream_id":"qx2pyd9p"}
+{"time":"2025-07-20T20:08:02.135520705+08:00","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"updating run metadata","runtime_seconds":7.373944436}],"total_operations":1}}
+{"time":"2025-07-20T20:08:13.947103141+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-07-20T20:08:26.949791701+08:00","level":"INFO","msg":"stream: closing","id":"qx2pyd9p"}
+{"time":"2025-07-20T20:08:26.950524242+08:00","level":"INFO","msg":"handler: closed","stream_id":"qx2pyd9p"}
+{"time":"2025-07-20T20:08:26.950531995+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"qx2pyd9p"}
+{"time":"2025-07-20T20:08:26.950552576+08:00","level":"INFO","msg":"sender: closed","stream_id":"qx2pyd9p"}
+{"time":"2025-07-20T20:08:26.956579798+08:00","level":"INFO","msg":"stream: closed","id":"qx2pyd9p"}
diff --git a/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..03d1a7f54049deb83caba3d0ce58aaf8462cf976
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/logs/debug.log
@@ -0,0 +1,28 @@
+2025-07-20 20:07:50,155 INFO    MainThread:266721 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-20 20:07:50,155 INFO    MainThread:266721 [wandb_setup.py:_flush():80] Configure stats pid to 266721
+2025-07-20 20:07:50,155 INFO    MainThread:266721 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-20 20:07:50,155 INFO    MainThread:266721 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-20 20:07:50,155 INFO    MainThread:266721 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-20 20:07:50,155 INFO    MainThread:266721 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/logs/debug.log
+2025-07-20 20:07:50,155 INFO    MainThread:266721 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/logs/debug-internal.log
+2025-07-20 20:07:50,155 INFO    MainThread:266721 [wandb_init.py:init():830] calling init triggers
+2025-07-20 20:07:50,155 INFO    MainThread:266721 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-20 20:07:50,155 INFO    MainThread:266721 [wandb_init.py:init():871] starting backend
+2025-07-20 20:07:50,362 INFO    MainThread:266721 [wandb_init.py:init():874] sending inform_init request
+2025-07-20 20:07:50,364 INFO    MainThread:266721 [wandb_init.py:init():882] backend started and connected
+2025-07-20 20:07:50,371 INFO    MainThread:266721 [wandb_init.py:init():953] updated telemetry
+2025-07-20 20:07:50,424 INFO    MainThread:266721 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-20 20:07:54,757 INFO    MainThread:266721 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-20 20:07:55,002 INFO    MainThread:266721 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-20 20:07:55,002 INFO    MainThread:266721 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-20 20:07:55,009 INFO    MainThread:266721 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-20 20:07:55,009 INFO    MainThread:266721 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-20 20:07:55,014 INFO    MainThread:266721 [wandb_init.py:init():1075] run started, returning control to user process
+2025-07-20 20:08:01,111 INFO    MainThread:266721 [wandb_run.py:_finish():2224] finishing run gia0603yucca/easy_r1/qx2pyd9p
+2025-07-20 20:08:01,112 INFO    MainThread:266721 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0
+2025-07-20 20:08:01,117 INFO    MainThread:266721 [wandb_run.py:_restore():2405] restore
+2025-07-20 20:08:01,120 INFO    MainThread:266721 [wandb_run.py:_restore():2411] restore done
+2025-07-20 20:08:26,943 INFO    MainThread:266721 [wandb_run.py:_footer_history_summary_info():3903] rendering history
+2025-07-20 20:08:26,944 INFO    MainThread:266721 [wandb_run.py:_footer_history_summary_info():3935] rendering summary
+2025-07-20 20:08:26,945 INFO    MainThread:266721 [wandb_run.py:_footer_sync_info():3864] logging synced files
diff --git a/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/run-qx2pyd9p.wandb b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/run-qx2pyd9p.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..a67ee390595a2327a9e23a96f01a3bafcf2d3b63
Binary files /dev/null and b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/run-qx2pyd9p.wandb differ
diff --git a/EasyR1-new/examples/wandb/run-20250721_140302-esnp51q2/files/output.log b/EasyR1-new/examples/wandb/run-20250721_140302-esnp51q2/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/EasyR1-new/examples/wandb/run-20250721_140302-esnp51q2/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250721_140302-esnp51q2/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..5df308d8b383a43baf17da17990166de83b5ca30
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250721_140302-esnp51q2/logs/debug-internal.log
@@ -0,0 +1,7 @@
+{"time":"2025-07-21T14:03:03.306005681+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-21T14:03:16.726235454+08:00","level":"INFO","msg":"stream: created new stream","id":"esnp51q2"}
+{"time":"2025-07-21T14:03:16.72791216+08:00","level":"INFO","msg":"stream: started","id":"esnp51q2"}
+{"time":"2025-07-21T14:03:16.727930603+08:00","level":"INFO","msg":"handler: started","stream_id":"esnp51q2"}
+{"time":"2025-07-21T14:03:16.727955266+08:00","level":"INFO","msg":"sender: started","stream_id":"esnp51q2"}
+{"time":"2025-07-21T14:03:16.727988136+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"esnp51q2"}
+{"time":"2025-07-21T14:03:46.759865901+08:00","level":"ERROR","msg":"runupserter: failed to init run","error":"context deadline exceeded (Client.Timeout or context cancellation while reading body)"}
diff --git a/EasyR1-new/examples/wandb/run-20250721_140302-esnp51q2/logs/debug.log b/EasyR1-new/examples/wandb/run-20250721_140302-esnp51q2/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..865615cccfa51ae2499481321bbd83f562a738df
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250721_140302-esnp51q2/logs/debug.log
@@ -0,0 +1,15 @@
+2025-07-21 14:03:03,077 INFO    MainThread:309550 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-21 14:03:03,077 INFO    MainThread:309550 [wandb_setup.py:_flush():80] Configure stats pid to 309550
+2025-07-21 14:03:03,077 INFO    MainThread:309550 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-21 14:03:03,077 INFO    MainThread:309550 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-21 14:03:03,077 INFO    MainThread:309550 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-21 14:03:03,077 INFO    MainThread:309550 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250721_140302-esnp51q2/logs/debug.log
+2025-07-21 14:03:03,077 INFO    MainThread:309550 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250721_140302-esnp51q2/logs/debug-internal.log
+2025-07-21 14:03:03,077 INFO    MainThread:309550 [wandb_init.py:init():830] calling init triggers
+2025-07-21 14:03:03,077 INFO    MainThread:309550 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-21 14:03:03,077 INFO    MainThread:309550 [wandb_init.py:init():871] starting backend
+2025-07-21 14:03:03,285 INFO    MainThread:309550 [wandb_init.py:init():874] sending inform_init request
+2025-07-21 14:03:03,286 INFO    MainThread:309550 [wandb_init.py:init():882] backend started and connected
+2025-07-21 14:03:03,302 INFO    MainThread:309550 [wandb_init.py:init():953] updated telemetry
+2025-07-21 14:03:03,345 INFO    MainThread:309550 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
diff --git a/EasyR1-new/examples/wandb/run-20250721_140302-esnp51q2/run-esnp51q2.wandb b/EasyR1-new/examples/wandb/run-20250721_140302-esnp51q2/run-esnp51q2.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/files/config.yaml b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c296dc14c1b357fe68bd9e6fe8b886b72a844a7c
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/files/config.yaml
@@ -0,0 +1,285 @@
+_wandb:
+    value:
+        cli_version: 0.21.0
+        e:
+            8rebgclfceg9loyocndmo2x990qn9an8:
+                args:
+                    - --node-ip-address=10.1.4.164
+                    - --node-manager-port=39807
+                    - --object-store-name=/tmp/ray/session_2025-07-21_14-05-48_702174_313219/sockets/plasma_store
+                    - --raylet-name=/tmp/ray/session_2025-07-21_14-05-48_702174_313219/sockets/raylet
+                    - --redis-address=None
+                    - --metrics-agent-port=59329
+                    - --logging-rotate-bytes=536870912
+                    - --logging-rotate-backup-count=5
+                    - --runtime-env-agent-port=57728
+                    - --gcs-address=10.1.4.164:57619
+                    - --session-name=session_2025-07-21_14-05-48_702174_313219
+                    - --temp-dir=/tmp/ray
+                    - --webui=127.0.0.1:8265
+                    - --cluster-id=5e1b6583b033a5ac3a5096c47aedf2ae03a43832b5f14da9ae247fc3
+                    - --startup-token=64
+                    - --worker-launch-time-ms=1753077952181
+                    - --node-id=455c3d0267181f710150724f5570e592401a2c2957f60bcaf24ff8ae
+                    - --runtime-env-hash=-115784934
+                    - --enable-resource-isolation=false
+                executable: /root/miniconda3/envs/easyr1-new/bin/python3
+                git:
+                    commit: b8caf406aa1699c788f0ca6e44a1769452c317db
+                    remote: https://github.com/PorUna-byte/PAR.git
+                host: dsw-266702-557cd69888-g24kv
+                os: Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35
+                program: /root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py
+                python: CPython 3.10.0
+                root: /nas/shared/kilab/wangyujia/EasyR1-new/examples
+                startedAt: "2025-07-21T06:07:34.664044Z"
+                writerId: 8rebgclfceg9loyocndmo2x990qn9an8
+        m: []
+        python_version: 3.10.0
+        t:
+            "1":
+                - 1
+                - 9
+                - 11
+                - 30
+                - 33
+                - 41
+                - 49
+                - 51
+                - 63
+                - 71
+                - 95
+                - 98
+                - 103
+                - 105
+            "2":
+                - 1
+                - 9
+                - 11
+                - 30
+                - 33
+                - 41
+                - 49
+                - 51
+                - 63
+                - 71
+                - 95
+                - 98
+                - 103
+                - 105
+            "3":
+                - 2
+                - 13
+                - 16
+            "4": 3.10.0
+            "5": 0.21.0
+            "6": 4.52.4
+            "12": 0.21.0
+            "13": linux-x86_64
+algorithm:
+    value:
+        adv_estimator: grpo
+        disable_kl: false
+        filter_high: 0.99
+        filter_key: overall
+        filter_low: 0.01
+        gamma: 1
+        kl_coef: 0.01
+        kl_horizon: 10000
+        kl_penalty: low_var_kl
+        kl_target: 0.1
+        kl_type: fixed
+        lam: 1
+        online_filtering: false
+        use_kl_loss: true
+data:
+    value:
+        answer_key: answer
+        filter_overlong_prompts: true
+        filter_overlong_prompts_workers: 16
+        format_prompt: /nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja
+        image_dir: null
+        image_key: images
+        max_pixels: 4194304
+        max_prompt_length: 4096
+        max_response_length: 16384
+        min_pixels: 262144
+        mini_rollout_batch_size: null
+        override_chat_template: null
+        prompt_key: question
+        protein_key: protein
+        rollout_batch_size: 128
+        seed: 1
+        shuffle: true
+        train_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl
+        val_batch_size: 256
+        val_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl
+        video_fps: 2
+        video_key: videos
+trainer:
+    value:
+        critic_warmup: 0
+        experiment_name: qwen2.5_7b_bio_06182042
+        load_checkpoint_path: null
+        logger:
+            - console
+            - wandb
+        max_steps: null
+        max_try_make_batch: 20
+        n_gpus_per_node: 8
+        nnodes: 1
+        project_name: easy_r1
+        save_checkpoint_path: /oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042
+        save_freq: 5
+        save_limit: 3
+        save_model_only: false
+        total_epochs: 1
+        val_before_train: true
+        val_freq: 5
+        val_generations_to_log: 3
+        val_only: false
+worker:
+    value:
+        actor:
+            clip_ratio_dual: 3
+            clip_ratio_high: 0.3
+            clip_ratio_low: 0.2
+            disable_kl: false
+            fsdp:
+                enable_cpu_offload: false
+                enable_full_shard: true
+                enable_rank0_init: true
+                fsdp_size: -1
+                mp_buffer_dtype: fp32
+                mp_param_dtype: bf16
+                mp_reduce_dtype: fp32
+                torch_dtype: null
+                use_orig_params: false
+            global_batch_size: 64
+            global_batch_size_per_device: -1
+            kl_coef: 0.01
+            kl_penalty: low_var_kl
+            loss_avg_mode: token
+            max_grad_norm: 1
+            micro_batch_size_per_device_for_experience: 16
+            micro_batch_size_per_device_for_update: 2
+            model:
+                enable_gradient_checkpointing: true
+                freeze_vision_tower: false
+                model_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model
+                tokenizer_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model
+                trust_remote_code: false
+            offload:
+                offload_optimizer: true
+                offload_params: true
+            optim:
+                betas:
+                    - 0.9
+                    - 0.999
+                lr: 1e-06
+                lr_warmup_ratio: 0
+                lr_warmup_steps: null
+                min_lr_ratio: null
+                strategy: adamw
+                training_steps: 1
+                warmup_style: constant
+                weight_decay: 0.01
+            padding_free: true
+            ppo_epochs: 1
+            strategy: fsdp
+            ulysses_size: 1
+            use_kl_loss: true
+            use_torch_compile: true
+        critic:
+            cliprange_value: 0.5
+            fsdp:
+                enable_cpu_offload: false
+                enable_full_shard: true
+                enable_rank0_init: true
+                fsdp_size: -1
+                mp_buffer_dtype: fp32
+                mp_param_dtype: bf16
+                mp_reduce_dtype: fp32
+                torch_dtype: null
+                use_orig_params: false
+            global_batch_size: 256
+            global_batch_size_per_device: -1
+            loss_avg_mode: token
+            max_grad_norm: 1
+            micro_batch_size_per_device_for_experience: 16
+            micro_batch_size_per_device_for_update: 4
+            model:
+                enable_gradient_checkpointing: true
+                freeze_vision_tower: false
+                model_path: null
+                tokenizer_path: null
+                trust_remote_code: true
+            offload:
+                offload_optimizer: false
+                offload_params: false
+            optim:
+                betas:
+                    - 0.9
+                    - 0.999
+                lr: 1e-06
+                lr_warmup_ratio: 0
+                lr_warmup_steps: null
+                min_lr_ratio: null
+                strategy: adamw
+                training_steps: 1
+                warmup_style: constant
+                weight_decay: 0.01
+            padding_free: false
+            ppo_epochs: 1
+            strategy: fsdp
+            ulysses_size: 1
+        hybrid_engine: true
+        ref:
+            fsdp:
+                enable_cpu_offload: true
+                enable_full_shard: true
+                enable_rank0_init: true
+                fsdp_size: -1
+                mp_buffer_dtype: fp32
+                mp_param_dtype: bf16
+                mp_reduce_dtype: fp32
+                torch_dtype: null
+                use_orig_params: false
+            micro_batch_size_per_device_for_experience: 16
+            offload:
+                offload_optimizer: false
+                offload_params: false
+            padding_free: true
+            strategy: fsdp
+            ulysses_size: 1
+            use_torch_compile: true
+        reward:
+            num_cpus: 1
+            reward_function: /nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py
+            reward_function_name: main
+            reward_type: batch
+            skip_special_tokens: true
+        rollout:
+            disable_log_stats: true
+            disable_tqdm: false
+            dtype: bf16
+            enable_chunked_prefill: false
+            enforce_eager: false
+            gpu_memory_utilization: 0.6
+            ignore_eos: false
+            limit_images: 0
+            max_model_len: null
+            max_num_batched_tokens: 24576
+            "n": 5
+            name: vllm
+            prompt_length: 4096
+            response_length: 16384
+            seed: 1
+            temperature: 1
+            tensor_parallel_size: 1
+            top_k: -1
+            top_p: 0.99
+            trust_remote_code: false
+            val_override_config:
+                "n": 1
+                temperature: 0.5
diff --git a/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/files/output.log b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..ea9d75e411eaa15741df68ff8613074c5a3303b0
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/files/output.log
@@ -0,0 +1,77 @@
+{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 1, "unit": "it", "ip": "10.1.4.164", "pid": 317976, "uuid": "a2969af0271e4dd2a54b4cda655313f9", "closed": false}
+Start validation...
+key
+prot_embeds
+value
+tensor([[[ 0.1386, -0.2522,  0.0733,  ...,  0.5373,  0.6044, -0.6031],
+         [ 0.1386, -0.2522,  0.0733,  ...,  0.5373,  0.6044, -0.6031],
+         [ 0.1386, -0.2522,  0.0733,  ...,  0.5373,  0.6044, -0.6031],
+         ...,
+         [ 0.1386, -0.2522,  0.0733,  ...,  0.5373,  0.6044, -0.6031],
+         [ 0.1386, -0.2522,  0.0733,  ...,  0.5373,  0.6044, -0.6031],
+         [ 0.1386, -0.2522,  0.0733,  ...,  0.5373,  0.6044, -0.6031]]])
+key
+prompt_input_ids
+value
+[[[11190 311 279 ... 151665 151665 151665]]]
+key
+input_ids
+value
+[[[11190 311 279 ... 151665 151665 151665]]]
+key
+raw_prompt_ids
+value
+[[11190 311 279 12833 1995 3897 3685 323 279 12833 829 1207 24 56 19 34
+  17 11 7023 279 1429 4363 1186 5873 1276 52304 504 279 2701 2606 510
+  3798 25 220 15 13 330 45 22147 355 11 547 1 715 220 16 13 330 56715
+  98605 10530 11 328 1 2303 220 17 13 330 840 376 64341 11 328 1 2303 220
+  18 13 330 54370 46417 81 290 11 547 1 2303 220 19 13 330 3599 38554 11
+  386 1 2303 220 20 13 330 3727 55078 10530 292 2112 292 16496 11 386 1
+  2303 220 21 13 330 2120 559 307 11 328 1 2303 220 22 13 330 38 337 8212
+  40605 11 386 1 2303 220 220 23 13 330 43 1047 31454 27233 580 84 1263
+  11 386 1 2303 24 13 330 47 2328 7191 635 11 547 698]]
+key
+ground_truth
+value
+['4']
+key
+multi_modal_data
+value
+['MATPSAAFEALMNGVTSWDVPEDAVPCELLLIGEASFPVMVNDMGQVLIAASSYGRGRLVVVSHEDYLVEAQLTPFLLNAVGWLCSSPGAPIGVHPSLAPLAKILEGSGVDAKVEPEVKDSLGVYCIDAYNETMTEKLVKFMKCGGGLLIGGQAWDWANQGEDERVLFTFPGNLVTSVAGIYFTDNKGDTSFFKVSKKMPKIPVLVSCEDDLSDDREELLHGISELDISNSDCFPSQLLVHGALAFPLGLDSYHGCVIAAARYGRGRVVVTGHKVLFTVGKLGPFLLNAVRWLDGGRRGKVVVQTELRTLSGLLAVGGIDTSIEPNLTSDASVYCFEPVSEVGVKELQEFVAEGGGLFVGAQAWWWAFKNPGVSPLARFPGNLLLNPFGISITSQSLNPGPFRTPKAGIRTYHFRSTLAEFQVIMGRKRGNVEKGWLAKLGPDGAAFLQIPAEEIPAYMSVHRLLRKLLSRYRLPVATRENPVINDCCRGAMLSLATGLAHSGSDLSLLVPEIEDMYSSPYLRPSESPITVEVNCTNPGTRYCWMSTGLYIPGRQIIEVSLPEAAASADLKIQIGCHTDDLTRASKLFRGPLVINRCCLDKPTKSITCLWGGLLYIIVPQNSKLGSVPVTVKGAVHAPYYKLGETTLEEWKRRIQENPGPWGELATDNIILTVPTANLRTLENPEPLLRLWDEVMQAVARLGAEPFPLRLPQRIVADVQISVGWMHAGYPIMCHLESVQELINEKLIRTKGLWGPVHELGRNQQRQEWEFPPHTTEATCNLWCVYVHETVLGIPRSRANIALWPPVREKRVRIYLSKGPNVKNWNAWTALETYLQLQEAFGWEPFIRLFTEYRNQTNLPTENVDKMNLWVKMFSHQVQKNLAPFFEAWAWPIQKEVATSLAYLPEWKENIMKLYLLTQMPH']
+开始valid generate_sequence
+Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::WorkerDict.actor_rollout_ref_generate_sequences()[39m (pid=319880, ip=10.1.4.164, actor_id=98a9fd8ac8699eddde886a1b01000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7ee6303c9b10>)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func
+    return getattr(self.worker_dict[key], name)(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 574, in generate_sequences
+    output = self.rollout.generate_sequences(prompts=prompts)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/rollout/vllm_rollout_spmd_new.py", line 193, in generate_sequences
+    prompt_input_ids = non_tensor_batch.pop["prompt_input_ids"]
+TypeError: 'builtin_function_or_method' object is not subscriptable
+Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::WorkerDict.actor_rollout_ref_generate_sequences()[39m (pid=319879, ip=10.1.4.164, actor_id=10eea48f2ea264c060e0257601000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7fb7d04e9c30>)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func
+    return getattr(self.worker_dict[key], name)(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 574, in generate_sequences
+    output = self.rollout.generate_sequences(prompts=prompts)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/rollout/vllm_rollout_spmd_new.py", line 193, in generate_sequences
+    prompt_input_ids = non_tensor_batch.pop["prompt_input_ids"]
+TypeError: 'builtin_function_or_method' object is not subscriptable
+Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::WorkerDict.actor_rollout_ref_generate_sequences()[39m (pid=319878, ip=10.1.4.164, actor_id=e85a62ca0dbb69fe1e14622801000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7f97e05e9c30>)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func
+    return getattr(self.worker_dict[key], name)(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 574, in generate_sequences
+    output = self.rollout.generate_sequences(prompts=prompts)
+  File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/rollout/vllm_rollout_spmd_new.py", line 193, in generate_sequences
+    prompt_input_ids = non_tensor_batch.pop["prompt_input_ids"]
+TypeError: 'builtin_function_or_method' object is not subscriptable
diff --git a/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/files/requirements.txt
@@ -0,0 +1,295 @@
+colorama==0.4.6
+psutil==7.0.0
+setproctitle==1.2.2
+ipython==8.37.0
+gitdb==4.0.12
+smmap==5.0.2
+pyzmq==27.0.0
+wcwidth==0.2.13
+antlr4-python3-runtime==4.9.3
+streamlit==1.46.1
+opentelemetry-proto==1.26.0
+tiktoken==0.9.0
+MarkupSafe==3.0.2
+openai==1.90.0
+jiter==0.10.0
+markdown-it-py==3.0.0
+rich-toolkit==0.14.8
+PyYAML==6.0.2
+pycountry==24.6.1
+nvidia-cusolver-cu12==11.6.1.9
+codetiming==1.4.0
+text-unidecode==1.3
+aiohttp-cors==0.8.1
+prometheus_client==0.22.1
+pandas==2.3.1
+wrapt==1.17.2
+tifffile==2025.5.10
+vllm==0.8.5.post1
+google-auth==2.40.3
+nvidia-curand-cu12==10.3.5.147
+networkx==3.4.2
+protobuf==4.25.8
+depyf==0.18.0
+altair==5.5.0
+wandb==0.21.0
+opentelemetry-sdk==1.26.0
+nvidia-cufft-cu12==11.2.1.3
+frozenlist==1.7.0
+ninja==1.11.1.4
+anyio==4.9.0
+rignore==0.6.2
+pydantic-extra-types==2.10.5
+tzdata==2025.2
+orjson==3.10.18
+smart_open==7.3.0.post1
+nvidia-cublas-cu12==12.4.5.8
+astor==0.8.1
+uvicorn==0.35.0
+sentry-sdk==2.32.0
+weasel==0.4.1
+opencensus==0.11.4
+certifi==2025.7.14
+nvidia-cuda-nvrtc-cu12==12.4.127
+cupy-cuda12x==13.5.1
+jedi==0.19.2
+GitPython==3.1.44
+xgrammar==0.1.18
+sniffio==1.3.1
+dill==0.3.8
+python-json-logger==3.3.0
+peft==0.16.0
+python-slugify==8.0.4
+watchfiles==1.1.0
+torchaudio==2.6.0
+omegaconf==2.3.0
+interegular==0.3.3
+torchmetrics==1.7.4
+tenacity==9.1.2
+async-timeout==5.0.1
+pybase64==1.4.1
+tqdm==4.67.1
+confection==0.1.5
+absl-py==2.3.1
+six==1.17.0
+colorful==0.5.7
+xxhash==3.5.0
+regex==2024.11.6
+nvidia-cuda-runtime-cu12==12.4.127
+pip==25.1
+annotated-types==0.7.0
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+salesforce-lavis==1.0.2
+fastapi-cloud-cli==0.1.4
+av==15.0.0
+nvidia-nvjitlink-cu12==12.4.127
+lightning-utilities==0.14.3
+pytz==2025.2
+webdataset==1.0.2
+cachetools==5.5.2
+nltk==3.9.1
+prometheus-fastapi-instrumentator==7.1.0
+pexpect==4.9.0
+jsonschema-specifications==2025.4.1
+Jinja2==3.1.6
+nvidia-cusparse-cu12==12.3.1.170
+nvidia-cudnn-cu12==9.1.0.70
+decord==0.6.0
+transformers==4.52.4
+dnspython==2.7.0
+joblib==1.5.1
+kaggle==1.7.4.5
+pyasn1_modules==0.4.2
+httpcore==1.0.9
+accelerate==1.8.1
+psutil==7.0.0
+pycocotools==2.0.10
+lm-format-enforcer==0.10.11
+liger_kernel==0.6.0
+googleapis-common-protos==1.70.0
+idna==3.10
+aiohappyeyeballs==2.6.1
+numba==0.61.2
+tornado==6.5.1
+opentelemetry-semantic-conventions==0.47b0
+torchvision==0.21.0
+exceptiongroup==1.3.0
+cfgv==3.4.0
+py-cpuinfo==9.0.0
+murmurhash==1.0.13
+pillow==11.3.0
+asttokens==3.0.0
+spacy==3.8.7
+blinker==1.9.0
+llguidance==0.7.30
+fastapi==0.116.1
+python-dateutil==2.9.0.post0
+prompt_toolkit==3.0.51
+opentelemetry-api==1.26.0
+referencing==0.36.2
+Pygments==2.19.2
+mpmath==1.3.0
+thinc==8.3.6
+multidict==6.6.3
+python-magic==0.4.27
+fairscale==0.4.4
+nodeenv==1.9.1
+mathruler==0.1.0
+identify==2.6.12
+multiprocess==0.70.16
+ftfy==6.3.1
+spacy-legacy==3.0.12
+rsa==4.9.1
+cymem==2.0.11
+flash-attn==2.7.1.post1
+typing-inspection==0.4.1
+nvidia-cufile-cu12==1.11.1.6
+filelock==3.18.0
+jsonschema==4.24.0
+language_data==1.3.0
+iopath==0.1.10
+cloudpickle==3.1.1
+pre_commit==4.2.0
+python-multipart==0.0.20
+gguf==0.17.1
+toml==0.10.2
+lazy_loader==0.4
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+opencv-python-headless==4.12.0.88
+rouge_score==0.1.2
+portalocker==3.2.0
+diskcache==5.6.3
+pycocoevalcap==1.2
+mdurl==0.1.2
+pure_eval==0.2.3
+ray==2.47.1
+langcodes==3.5.0
+distlib==0.3.9
+pydeck==0.9.1
+traitlets==5.14.3
+aiohttp==3.12.14
+decorator==5.2.1
+opentelemetry-exporter-otlp-proto-http==1.26.0
+verl==0.3.2.dev0
+fsspec==2025.3.0
+pydantic_core==2.33.2
+matplotlib-inline==0.1.7
+httpx==0.28.1
+fastrlock==0.8.3
+zipp==3.23.0
+aiosignal==1.4.0
+uvloop==0.21.0
+opendatasets==0.1.22
+python-dotenv==1.1.1
+attrs==25.3.0
+starlette==0.47.1
+distro==1.9.0
+pyasn1==0.6.1
+plotly==6.2.0
+opencensus-context==0.1.3
+datasets==4.0.0
+bleach==6.2.0
+hf-xet==1.1.5
+pyvers==0.1.0
+rich==14.0.0
+pylatexenc==2.10
+tensordict==0.9.1
+urllib3==2.5.0
+imageio==2.37.0
+platformdirs==4.3.8
+preshed==3.0.10
+catalogue==2.0.10
+h11==0.16.0
+outlines_core==0.1.26
+wasabi==1.1.3
+proto-plus==1.26.1
+scikit-image==0.25.2
+blis==1.3.0
+fastapi-cli==0.0.8
+opentelemetry-exporter-prometheus==0.56b0
+opentelemetry-exporter-otlp==1.26.0
+compressed-tensors==0.9.3
+pyarrow==20.0.0
+opentelemetry-semantic-conventions-ai==0.4.11
+partial-json-parser==0.2.1.1.post6
+parso==0.8.4
+importlib_metadata==8.0.0
+tokenizers==0.21.2
+opentelemetry-exporter-otlp-proto-common==1.26.0
+torchdata==0.11.0
+py-spy==0.4.0
+propcache==0.3.2
+braceexpand==0.1.7
+numpy==2.2.6
+cloudpathlib==0.21.1
+email_validator==2.2.0
+srsly==2.5.1
+webencodings==0.5.1
+airportsdata==20250706
+rpds-py==0.26.0
+outlines==0.1.11
+packaging==25.0
+yarl==1.20.1
+nvidia-cuda-cupti-cu12==12.4.127
+typing_extensions==4.14.1
+pydantic==2.11.7
+xformers==0.0.29.post2
+einops==0.8.1
+grpcio==1.73.1
+setuptools==78.1.1
+httptools==0.6.4
+nvidia-nccl-cu12==2.21.5
+Deprecated==1.2.18
+ptyprocess==0.7.0
+websockets==15.0.1
+torch==2.6.0
+scipy==1.15.3
+typer==0.16.0
+pytorch-lightning==2.5.2
+virtualenv==20.31.2
+huggingface-hub==0.33.4
+contexttimer==0.3.3
+marisa-trie==1.2.1
+shellingham==1.5.4
+charset-normalizer==3.4.2
+nest-asyncio==1.6.0
+executing==2.2.0
+stack-data==0.6.3
+msgpack==1.1.1
+blake3==1.0.5
+narwhals==1.47.0
+sentencepiece==0.2.0
+llvmlite==0.44.0
+click==8.2.1
+lark==1.2.2
+google-api-core==2.25.1
+sympy==1.13.1
+wheel==0.45.1
+safetensors==0.5.3
+mistral_common==1.8.0
+triton==3.2.0
+msgspec==0.19.0
+watchdog==6.0.0
+requests==2.32.4
+spacy-loggers==1.0.5
+timm==0.4.12
+qwen-vl-utils==0.0.11
+modelscope==1.28.0
+verl==0.3.2.dev0
+jaraco.text==3.12.1
+autocommand==2.2.2
+packaging==24.2
+jaraco.context==5.3.0
+tomli==2.0.1
+typeguard==4.3.0
+zipp==3.19.2
+backports.tarfile==1.2.0
+typing_extensions==4.12.2
+jaraco.collections==5.1.0
+inflect==7.3.1
+more-itertools==10.3.0
+jaraco.functools==4.0.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+wheel==0.45.1
diff --git a/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..0d66826f53b406c11b3f42776924d35d1a24d020
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/files/wandb-metadata.json
@@ -0,0 +1,35 @@
+{
+  "os":  "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.0",
+  "startedAt":  "2025-07-21T06:07:34.664044Z",
+  "args":  [
+    "--node-ip-address=10.1.4.164",
+    "--node-manager-port=39807",
+    "--object-store-name=/tmp/ray/session_2025-07-21_14-05-48_702174_313219/sockets/plasma_store",
+    "--raylet-name=/tmp/ray/session_2025-07-21_14-05-48_702174_313219/sockets/raylet",
+    "--redis-address=None",
+    "--metrics-agent-port=59329",
+    "--logging-rotate-bytes=536870912",
+    "--logging-rotate-backup-count=5",
+    "--runtime-env-agent-port=57728",
+    "--gcs-address=10.1.4.164:57619",
+    "--session-name=session_2025-07-21_14-05-48_702174_313219",
+    "--temp-dir=/tmp/ray",
+    "--webui=127.0.0.1:8265",
+    "--cluster-id=5e1b6583b033a5ac3a5096c47aedf2ae03a43832b5f14da9ae247fc3",
+    "--startup-token=64",
+    "--worker-launch-time-ms=1753077952181",
+    "--node-id=455c3d0267181f710150724f5570e592401a2c2957f60bcaf24ff8ae",
+    "--runtime-env-hash=-115784934",
+    "--enable-resource-isolation=false"
+  ],
+  "program":  "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
+  "git":  {
+    "remote":  "https://github.com/PorUna-byte/PAR.git",
+    "commit":  "b8caf406aa1699c788f0ca6e44a1769452c317db"
+  },
+  "root":  "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
+  "host":  "dsw-266702-557cd69888-g24kv",
+  "executable":  "/root/miniconda3/envs/easyr1-new/bin/python3",
+  "writerId":  "8rebgclfceg9loyocndmo2x990qn9an8"
+}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/files/wandb-summary.json b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..533452d4a934da3482e9f08995d671c42966eba9
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_runtime":6,"_wandb":{"runtime":6}}
\ No newline at end of file
diff --git a/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..3a020c591187aae5f1f529cb23d2665fe481c73e
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/logs/debug-internal.log
@@ -0,0 +1,13 @@
+{"time":"2025-07-21T14:07:35.211628547+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-21T14:07:50.875611638+08:00","level":"INFO","msg":"stream: created new stream","id":"a9qblh0u"}
+{"time":"2025-07-21T14:07:50.876588753+08:00","level":"INFO","msg":"stream: started","id":"a9qblh0u"}
+{"time":"2025-07-21T14:07:50.87663237+08:00","level":"INFO","msg":"sender: started","stream_id":"a9qblh0u"}
+{"time":"2025-07-21T14:07:50.876605114+08:00","level":"INFO","msg":"handler: started","stream_id":"a9qblh0u"}
+{"time":"2025-07-21T14:07:50.87665507+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"a9qblh0u"}
+{"time":"2025-07-21T14:08:05.783504415+08:00","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"updating run metadata","runtime_seconds":7.434542791},{"desc":"uploading data","runtime_seconds":0.571568597}],"total_operations":2}}
+{"time":"2025-07-21T14:08:31.955353631+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-07-21T14:08:56.48244624+08:00","level":"INFO","msg":"stream: closing","id":"a9qblh0u"}
+{"time":"2025-07-21T14:08:56.48558812+08:00","level":"INFO","msg":"handler: closed","stream_id":"a9qblh0u"}
+{"time":"2025-07-21T14:08:56.485598269+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"a9qblh0u"}
+{"time":"2025-07-21T14:08:56.485607803+08:00","level":"INFO","msg":"sender: closed","stream_id":"a9qblh0u"}
+{"time":"2025-07-21T14:08:56.50017009+08:00","level":"INFO","msg":"stream: closed","id":"a9qblh0u"}
diff --git a/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/logs/debug.log b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..b7e2572e2a286ff92c36c4fc2635c9b518e94415
--- /dev/null
+++ b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/logs/debug.log
@@ -0,0 +1,28 @@
+2025-07-21 14:07:34,952 INFO    MainThread:317976 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-21 14:07:34,952 INFO    MainThread:317976 [wandb_setup.py:_flush():80] Configure stats pid to 317976
+2025-07-21 14:07:34,952 INFO    MainThread:317976 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-21 14:07:34,952 INFO    MainThread:317976 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-21 14:07:34,952 INFO    MainThread:317976 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-21 14:07:34,952 INFO    MainThread:317976 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/logs/debug.log
+2025-07-21 14:07:34,952 INFO    MainThread:317976 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/logs/debug-internal.log
+2025-07-21 14:07:34,952 INFO    MainThread:317976 [wandb_init.py:init():830] calling init triggers
+2025-07-21 14:07:34,953 INFO    MainThread:317976 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-21 14:07:34,953 INFO    MainThread:317976 [wandb_init.py:init():871] starting backend
+2025-07-21 14:07:35,172 INFO    MainThread:317976 [wandb_init.py:init():874] sending inform_init request
+2025-07-21 14:07:35,174 INFO    MainThread:317976 [wandb_init.py:init():882] backend started and connected
+2025-07-21 14:07:35,186 INFO    MainThread:317976 [wandb_init.py:init():953] updated telemetry
+2025-07-21 14:07:35,302 INFO    MainThread:317976 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-21 14:07:58,269 INFO    MainThread:317976 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-21 14:07:58,556 INFO    MainThread:317976 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-21 14:07:58,556 INFO    MainThread:317976 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-21 14:07:58,562 INFO    MainThread:317976 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-21 14:07:58,562 INFO    MainThread:317976 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-21 14:07:58,574 INFO    MainThread:317976 [wandb_init.py:init():1075] run started, returning control to user process
+2025-07-21 14:08:04,748 INFO    MainThread:317976 [wandb_run.py:_finish():2224] finishing run gia0603yucca/easy_r1/a9qblh0u
+2025-07-21 14:08:04,755 INFO    MainThread:317976 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0
+2025-07-21 14:08:04,767 INFO    MainThread:317976 [wandb_run.py:_restore():2405] restore
+2025-07-21 14:08:04,771 INFO    MainThread:317976 [wandb_run.py:_restore():2411] restore done
+2025-07-21 14:08:56,463 INFO    MainThread:317976 [wandb_run.py:_footer_history_summary_info():3903] rendering history
+2025-07-21 14:08:56,469 INFO    MainThread:317976 [wandb_run.py:_footer_history_summary_info():3935] rendering summary
+2025-07-21 14:08:56,469 INFO    MainThread:317976 [wandb_run.py:_footer_sync_info():3864] logging synced files
diff --git a/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/run-a9qblh0u.wandb b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/run-a9qblh0u.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..437f9686a6e8622c2dec80595104988edc47111b
Binary files /dev/null and b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/run-a9qblh0u.wandb differ
diff --git a/EasyR1-new/scripts/model_merger.py b/EasyR1-new/scripts/model_merger.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f4dd3daee1728f35e9fb0aaea94a04b0591a02c
--- /dev/null
+++ b/EasyR1-new/scripts/model_merger.py
@@ -0,0 +1,187 @@
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import re
+from concurrent.futures import ThreadPoolExecutor
+from typing import Dict, List, Tuple
+
+import numpy as np
+import torch
+from torch.distributed._tensor import DTensor, Placement, Shard
+from transformers import (
+    AutoConfig,
+    AutoModelForCausalLM,
+    AutoModelForTokenClassification,
+    AutoModelForVision2Seq,
+    PretrainedConfig,
+    PreTrainedModel,
+)
+
+
+def merge_by_placement(tensors: List[torch.Tensor], placement: Placement):
+    if placement.is_replicate():
+        return tensors[0]
+    elif placement.is_partial():
+        raise NotImplementedError("Partial placement is not supported yet")
+    elif placement.is_shard():
+        return torch.cat(tensors, dim=placement.dim).contiguous()
+    else:
+        raise ValueError(f"Unsupported placement: {placement}")
+
+
+def upload_model_to_huggingface(local_path: str, remote_path: str):
+    # Push to hugging face
+    from huggingface_hub import HfApi
+
+    api = HfApi()
+    api.create_repo(repo_id=remote_path, private=False, exist_ok=True)
+    api.upload_folder(repo_id=remote_path, folder_path=local_path, repo_type="model")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--local_dir", required=True, type=str, help="The path for your saved model")
+    parser.add_argument("--hf_upload_path", default=False, type=str, help="The path of the huggingface repo to upload")
+    args = parser.parse_args()
+    local_dir: str = args.local_dir
+
+    assert not local_dir.endswith("huggingface"), "The local_dir should not end with huggingface."
+
+    # copy rank zero to find the shape of (dp, fsdp)
+    rank = 0
+    world_size = 0
+    for filename in os.listdir(local_dir):
+        match = re.match(r"model_world_size_(\d+)_rank_0\.pt", filename)
+        if match:
+            world_size = match.group(1)
+            break
+
+    assert world_size, "No model file with the proper format."
+
+    rank0_weight_path = os.path.join(local_dir, f"model_world_size_{world_size}_rank_{rank}.pt")
+    state_dict = torch.load(rank0_weight_path, map_location="cpu", weights_only=False)
+    pivot_key = sorted(state_dict.keys())[0]
+    weight = state_dict[pivot_key]
+    if isinstance(weight, DTensor):
+        # get sharding info
+        device_mesh = weight.device_mesh
+        mesh = device_mesh.mesh
+        mesh_dim_names = device_mesh.mesh_dim_names
+    else:
+        # for non-DTensor
+        mesh = np.array([int(world_size)], dtype=np.int64)
+        mesh_dim_names = ("fsdp",)
+
+    print(f"Got device mesh {mesh}, mesh_dim_names {mesh_dim_names}")
+
+    assert mesh_dim_names in (("fsdp",), ("ddp", "fsdp")), f"Unsupported mesh_dim_names {mesh_dim_names}."
+
+    if "tp" in mesh_dim_names:
+        # fsdp * tp
+        total_shards = mesh.shape[-1] * mesh.shape[-2]
+        mesh_shape = (mesh.shape[-2], mesh.shape[-1])
+    else:
+        # fsdp
+        total_shards = mesh.shape[-1]
+        mesh_shape = (mesh.shape[-1],)
+
+    print(f"Processing {total_shards} model shards in total.")
+    model_state_dict_lst = []
+    model_state_dict_lst.append(state_dict)
+    model_state_dict_lst.extend([""] * (total_shards - 1))
+
+    def process_one_shard(rank, model_state_dict_lst):
+        model_path = os.path.join(local_dir, f"model_world_size_{world_size}_rank_{rank}.pt")
+        state_dict = torch.load(model_path, map_location="cpu", weights_only=False)
+        model_state_dict_lst[rank] = state_dict
+        return state_dict
+
+    with ThreadPoolExecutor(max_workers=min(32, os.cpu_count())) as executor:
+        for rank in range(1, total_shards):
+            executor.submit(process_one_shard, rank, model_state_dict_lst)
+
+    state_dict: Dict[str, List[torch.Tensor]] = {}
+    param_placements: Dict[str, List[Placement]] = {}
+    keys = set(model_state_dict_lst[0].keys())
+    for key in keys:
+        state_dict[key] = []
+        for model_state_dict in model_state_dict_lst:
+            try:
+                tensor = model_state_dict.pop(key)
+            except Exception:
+                print(f"Cannot find key {key} in rank {rank}.")
+
+            if isinstance(tensor, DTensor):
+                state_dict[key].append(tensor._local_tensor.bfloat16())
+                placements = tuple(tensor.placements)
+                # replicated placement at ddp dimension can be discarded
+                if mesh_dim_names[0] == "ddp":
+                    placements = placements[1:]
+
+                if key not in param_placements:
+                    param_placements[key] = placements
+                else:
+                    assert param_placements[key] == placements
+            else:
+                state_dict[key].append(tensor.bfloat16())
+
+    del model_state_dict_lst
+
+    for key in sorted(state_dict):
+        if not isinstance(state_dict[key], list):
+            print(f"No need to merge key {key}")
+            continue
+
+        if key in param_placements:
+            # merge shards
+            placements: Tuple[Shard] = param_placements[key]
+            if len(mesh_shape) == 1:
+                # 1-D list, FSDP without TP
+                assert len(placements) == 1
+                shards = state_dict[key]
+                state_dict[key] = merge_by_placement(shards, placements[0])
+            else:
+                # 2-D list, FSDP + TP
+                raise NotImplementedError("FSDP + TP is not supported yet.")
+        else:
+            state_dict[key] = torch.cat(state_dict[key], dim=0)
+
+    print("Merge completed.")
+    hf_path = os.path.join(local_dir, "huggingface")
+    config: PretrainedConfig = AutoConfig.from_pretrained(hf_path)
+    architectures: List[str] = getattr(config, "architectures", ["Unknown"])
+
+    if "ForTokenClassification" in architectures[0]:
+        AutoClass = AutoModelForTokenClassification
+    elif "ForCausalLM" in architectures[0]:
+        AutoClass = AutoModelForCausalLM
+    elif "ForConditionalGeneration" in architectures[0]:
+        AutoClass = AutoModelForVision2Seq
+    else:
+        raise NotImplementedError(f"Unknown architecture {architectures}.")
+
+    with torch.device("meta"):
+        model: PreTrainedModel = AutoClass.from_config(config, torch_dtype=torch.bfloat16)
+
+    assert isinstance(model, PreTrainedModel)
+    model.to_empty(device="cpu")
+
+    print(f"Saving model to {hf_path}...")
+    model.save_pretrained(hf_path, state_dict=state_dict)
+    del state_dict, model
+
+    if args.hf_upload_path:
+        upload_model_to_huggingface(hf_path, args.hf_upload_path)
diff --git a/EasyR1-new/tests/check_license.py b/EasyR1-new/tests/check_license.py
new file mode 100644
index 0000000000000000000000000000000000000000..4196c7a6a64a6b662bbde8f8a2550fbc111772cf
--- /dev/null
+++ b/EasyR1-new/tests/check_license.py
@@ -0,0 +1,39 @@
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+from pathlib import Path
+
+
+KEYWORDS = ("Copyright", "2024", "Bytedance")
+
+
+def main():
+    path_list: list[Path] = []
+    for check_dir in sys.argv[1:]:
+        path_list.extend(Path(check_dir).glob("**/*.py"))
+
+    for path in path_list:
+        with open(path.absolute(), encoding="utf-8") as f:
+            file_content = f.read().strip().split("\n")
+            license = "\n".join(file_content[:5])
+            if not license:
+                continue
+
+            print(f"Check license: {path}")
+            assert all(keyword in license for keyword in KEYWORDS), f"File {path} does not contain license."
+
+
+if __name__ == "__main__":
+    main()
diff --git a/EasyR1-new/tests/test_dataproto.py b/EasyR1-new/tests/test_dataproto.py
new file mode 100644
index 0000000000000000000000000000000000000000..187346a922988c4b0dd3f13ac0603fc6aeef707c
--- /dev/null
+++ b/EasyR1-new/tests/test_dataproto.py
@@ -0,0 +1,183 @@
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import os
+from typing import Any, Dict, List, Optional
+
+import numpy as np
+import pytest
+import torch
+
+from verl.protocol import DataProto, pad_dataproto_to_divisor, unpad_dataproto
+
+
+def _get_data_proto(
+    tensors: Optional[Dict[str, List[Any]]] = None,
+    non_tensors: Optional[Dict[str, List[Any]]] = None,
+    meta_info: Optional[Dict[str, Any]] = None,
+) -> DataProto:
+    if tensors is None and non_tensors is None:
+        tensors = {"obs": [1, 2, 3, 4, 5, 6]}
+        non_tensors = {"labels": ["a", "b", "c", "d", "e", "f"]}
+
+    if tensors is not None:
+        tensors = {k: torch.tensor(v) if not isinstance(v, torch.Tensor) else v for k, v in tensors.items()}
+
+    if non_tensors is not None:
+        non_tensors = {
+            k: np.array(v, dtype=object) if not isinstance(v, np.ndarray) else v for k, v in non_tensors.items()
+        }
+
+    meta_info = meta_info or {"info": "test_info"}
+    return DataProto.from_dict(tensors=tensors, non_tensors=non_tensors, meta_info=meta_info)
+
+
+def _assert_equal(data1: DataProto, data2: Optional[DataProto] = None):
+    data2 = data2 or _get_data_proto()
+    if data1.batch is not None:
+        assert data1.batch.keys() == data2.batch.keys()
+        for key in data1.batch.keys():
+            assert torch.all(data1.batch[key] == data2.batch[key])
+    else:
+        assert data2.batch is None
+
+    if data1.non_tensor_batch is not None:
+        assert data1.non_tensor_batch.keys() == data2.non_tensor_batch.keys()
+        for key in data1.non_tensor_batch.keys():
+            assert np.all(data1.non_tensor_batch[key] == data2.non_tensor_batch[key])
+    else:
+        assert data2.non_tensor_batch is None
+
+    assert data1.meta_info == data2.meta_info
+
+
+def test_tensor_dict_constructor():
+    obs = torch.randn(100, 10)
+    act = torch.randn(100, 10, 3)
+    data = DataProto.from_dict(tensors={"obs": obs, "act": act})
+    assert len(data) == 100
+
+    with pytest.raises(AssertionError):
+        data = DataProto.from_dict(tensors={"obs": obs, "act": act}, num_batch_dims=2)
+
+    with pytest.raises(AssertionError):
+        data = DataProto.from_dict(tensors={"obs": obs, "act": act}, num_batch_dims=3)
+
+    labels = np.array(["a", "b", "c"], dtype=object)
+    data = DataProto.from_dict(non_tensors={"labels": labels})
+    assert len(data) == 3
+
+
+def test_getitem():
+    data = _get_data_proto()
+    assert data[0].batch["obs"] == torch.tensor(1)
+    assert data[0].non_tensor_batch["labels"] == "a"
+    _assert_equal(data[1:3], _get_data_proto({"obs": [2, 3]}, {"labels": ["b", "c"]}))
+    _assert_equal(data[[0, 2]], _get_data_proto({"obs": [1, 3]}, {"labels": ["a", "c"]}))
+    _assert_equal(data[torch.tensor([1])], _get_data_proto({"obs": [2]}, {"labels": ["b"]}))
+
+
+def test_select_pop():
+    obs = torch.randn(100, 10)
+    act = torch.randn(100, 3)
+    dataset = _get_data_proto(tensors={"obs": obs, "act": act}, meta_info={"p": 1, "q": 2})
+    selected_dataset = dataset.select(batch_keys=["obs"], meta_info_keys=["p"])
+
+    assert selected_dataset.batch.keys() == {"obs"}
+    assert selected_dataset.meta_info.keys() == {"p"}
+    assert dataset.batch.keys() == {"obs", "act"}
+    assert dataset.meta_info.keys() == {"p", "q"}
+
+    popped_dataset = dataset.pop(batch_keys=["obs"], meta_info_keys=["p"])
+    assert popped_dataset.batch.keys() == {"obs"}
+    assert popped_dataset.meta_info.keys() == {"p"}
+    assert dataset.batch.keys() == {"act"}
+    assert dataset.meta_info.keys() == {"q"}
+
+
+def test_chunk_concat_split():
+    data = _get_data_proto()
+    with pytest.raises(AssertionError):
+        data.chunk(5)
+
+    chunked_data = data.chunk(2)
+
+    assert len(chunked_data) == 2
+    expected_data = _get_data_proto({"obs": [1, 2, 3]}, {"labels": ["a", "b", "c"]})
+    _assert_equal(chunked_data[0], expected_data)
+
+    concat_data = DataProto.concat(chunked_data)
+    _assert_equal(concat_data, data)
+
+    splitted_data = data.split(2)
+    assert len(splitted_data) == 3
+    expected_data = _get_data_proto({"obs": [1, 2]}, {"labels": ["a", "b"]})
+    _assert_equal(splitted_data[0], expected_data)
+
+
+def test_reorder():
+    data = _get_data_proto()
+    data.reorder(torch.tensor([3, 4, 2, 0, 1, 5]))
+    expected_data = _get_data_proto({"obs": [4, 5, 3, 1, 2, 6]}, {"labels": ["d", "e", "c", "a", "b", "f"]})
+    _assert_equal(data, expected_data)
+
+
+@pytest.mark.parametrize("interleave", [True, False])
+def test_repeat(interleave: bool):
+    data = _get_data_proto({"obs": [1, 2]}, {"labels": ["a", "b"]})
+    repeated_data = data.repeat(repeat_times=2, interleave=interleave)
+    expected_tensors = {"obs": [1, 1, 2, 2] if interleave else [1, 2, 1, 2]}
+    expected_non_tensors = {"labels": ["a", "a", "b", "b"] if interleave else ["a", "b", "a", "b"]}
+    _assert_equal(repeated_data, _get_data_proto(expected_tensors, expected_non_tensors))
+
+
+@pytest.mark.parametrize("size_divisor", [2, 3])
+def test_dataproto_pad_unpad(size_divisor: int):
+    data = _get_data_proto({"obs": [1, 2, 3]}, {"labels": ["a", "b", "c"]})
+    # test size_divisor=2
+    padded_data, pad_size = pad_dataproto_to_divisor(data, size_divisor=size_divisor)
+    unpadded_data = unpad_dataproto(padded_data, pad_size=pad_size)
+
+    if size_divisor == 2:
+        assert pad_size == 1
+        expected_tensors = {"obs": [1, 2, 3, 1]}
+        expected_non_tensors = {"labels": ["a", "b", "c", "a"]}
+        expected_data = _get_data_proto(expected_tensors, expected_non_tensors)
+    else:
+        assert pad_size == 0
+        expected_data = data
+
+    _assert_equal(padded_data, expected_data)
+    _assert_equal(unpadded_data, data)
+
+
+def test_data_proto_save_load():
+    data = _get_data_proto()
+    data.save_to_disk("test_data.pt")
+    loaded_data = DataProto.load_from_disk("test_data.pt")
+    os.remove("test_data.pt")
+    _assert_equal(data, loaded_data)
+
+
+def test_union_tensor_dict():
+    obs = torch.randn(100, 10)
+    data1 = _get_data_proto({"obs": obs, "act": torch.randn(100, 3)})
+    data2 = _get_data_proto({"obs": obs, "rew": torch.randn(100)})
+    data1.union(data2)
+
+    data1 = _get_data_proto({"obs": obs, "act": torch.randn(100, 3)})
+    data2 = _get_data_proto({"obs": obs + 1, "rew": torch.randn(100)})
+    with pytest.raises(ValueError):
+        data1.union(data2)
diff --git a/EasyR1-new/tests/test_dataset.py b/EasyR1-new/tests/test_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..514b862744aad2b85b1dddc622c22689306acadb
--- /dev/null
+++ b/EasyR1-new/tests/test_dataset.py
@@ -0,0 +1,60 @@
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from PIL.Image import Image
+
+from verl.utils.dataset import RLHFDataset
+from verl.utils.tokenizer import get_processor, get_tokenizer
+
+
+def test_image_dataset():
+    tokenizer = get_tokenizer("Qwen/Qwen2.5-VL-7B-Instruct", use_fast=True)
+    processor = get_processor("Qwen/Qwen2.5-VL-7B-Instruct", use_fast=True)
+    dataset = RLHFDataset(
+        data_path="hiyouga/geometry3k@test",
+        tokenizer=tokenizer,
+        processor=processor,
+        prompt_key="problem",
+        answer_key="answer",
+        image_key="images",
+        max_prompt_length=16,
+        truncation="right",
+        filter_overlong_prompts=False,
+    )
+    token_ids = [151644, 8948, 198, 2610, 525, 264, 10950, 17847, 13, 151645, 198, 151644, 872, 198, 151652, 151655]
+    assert set(dataset[0].keys()) == {
+        "problem",
+        "ground_truth",
+        "input_ids",
+        "attention_mask",
+        "position_ids",
+        "raw_prompt_ids",
+        "multi_modal_data",
+    }
+    assert dataset[0]["problem"] == (
+        "<image>Chords $\\overline{A C}$ and $\\overline{D F}$ are equidistant from the center. "
+        "If the radius of $\\odot G$ is 26 find $A C$"
+    )
+    assert dataset[0]["ground_truth"] == "48"
+    assert torch.all(dataset[0]["input_ids"] == torch.tensor(token_ids))
+    assert torch.all(dataset[0]["attention_mask"] == torch.ones(16))
+    assert torch.all(dataset[0]["position_ids"] == torch.arange(16).unsqueeze(0).expand(3, -1))
+    assert list(dataset[0]["position_ids"].size()) == [3, 16]  # avoid fake positive caused by broadcasting
+    assert dataset[0]["raw_prompt_ids"] == token_ids
+    assert isinstance(dataset[0]["multi_modal_data"]["images"][0], Image)
+
+
+if __name__ == "__main__":
+    test_image_dataset()
diff --git a/EasyR1-new/verl/ProtT3/blip2.py b/EasyR1-new/verl/ProtT3/blip2.py
new file mode 100644
index 0000000000000000000000000000000000000000..40c386877ebfe65229e7550b3f6a92b3df05d867
--- /dev/null
+++ b/EasyR1-new/verl/ProtT3/blip2.py
@@ -0,0 +1,126 @@
+"""
+ Copyright (c) 2023, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+import torch
+import torch.nn as nn
+
+from lavis.models.base_model import BaseModel
+from lavis.models.blip2_models.Qformer import BertConfig, BertLMHeadModel
+from transformers import BertTokenizer, BitsAndBytesConfig
+from transformers import EsmTokenizer, EsmModel
+import os
+from pathlib import Path  # 添加到文件顶部
+    
+
+def get_gpu_memory(device=0):
+    # t = torch.cuda.get_device_properties(device).total_memory
+    # r = torch.cuda.memory_reserved(device)
+    # a = torch.cuda.memory_allocated(device)
+    # f = r-a  # free inside reserved
+    free, total = torch.cuda.mem_get_info(device)
+    free = free / (1024 ** 3)
+    total = total / (1024 ** 3)
+    return free, total-free, total
+
+
+class Blip2Base(BaseModel):
+    # @classmethod
+    # def init_tokenizer(cls):
+    #     tokenizer = BertTokenizer.from_pretrained('./bert_pretrained/')
+    #     tokenizer.add_special_tokens({"bos_token": "[DEC]"})
+    #     return tokenizer
+
+    @classmethod
+    def init_Qformer(cls, model_name, num_query_token, plm_width, cross_attention_freq=2):
+        # assert model_name == 'microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract'
+        # print("bert load microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext")
+
+        print(f"Loading Qformer from: {model_name}")
+        
+        # 修改2：添加本地路径检查逻辑
+        if not model_name.startswith('microsoft/') and Path(model_name).exists():
+            print("Loading from local path...")
+        else:
+            print("Loading from Hugging Face Hub...")
+        
+        encoder_config = BertConfig.from_pretrained(model_name)
+        encoder_config.encoder_width = plm_width
+        # insert cross-attention layer every other block
+        encoder_config.add_cross_attention = True
+        encoder_config.cross_attention_freq = cross_attention_freq
+        encoder_config.query_length = num_query_token
+        
+        Qformer = BertLMHeadModel.from_pretrained(model_name, config=encoder_config)
+        query_tokens = nn.Parameter(
+            torch.zeros(1, num_query_token, encoder_config.hidden_size)
+        )
+        query_tokens.data.normal_(mean=0.0, std=encoder_config.initializer_range)
+
+        tokenizer = BertTokenizer.from_pretrained(model_name)
+        tokenizer.add_special_tokens({"bos_token": "[DEC]"})
+        return tokenizer, Qformer, query_tokens
+    
+
+    def init_protein_encoder(self, plm_name, load_4bit=False):
+        # assert plm_name.startswith('facebook/esm2')
+        # plm_tokenizer = EsmTokenizer.from_pretrained(plm_name)
+         # 检查是否为本地路径（判断是否存在文件夹或文件）
+        if os.path.isdir(plm_name) or os.path.exists(os.path.join(plm_name, "config.json")):
+            print(f"Loading local PLM from {plm_name}")
+            plm_tokenizer = EsmTokenizer.from_pretrained(plm_name)
+        else:
+            # 保留远程加载逻辑（可选）
+            print(f"Loading remote PLM from {plm_name}")
+            plm_tokenizer = EsmTokenizer.from_pretrained(plm_name)
+
+        if not load_4bit:
+            plm = EsmModel.from_pretrained(plm_name, add_pooling_layer=False, torch_dtype=torch.bfloat16)
+        else:
+            quant_config = BitsAndBytesConfig(
+                load_in_4bit=True,
+                load_in_8bit=False,
+                llm_int8_threshold=6.0,
+                llm_int8_has_fp16_weight=False,
+                bnb_4bit_compute_dtype=torch.bfloat16,
+                bnb_4bit_use_double_quant=True,
+                bnb_4bit_quant_type='nf4',
+            )
+            ## give a device map that assign all layers to device 0
+            outputs = get_gpu_memory(6)
+            used_memory = outputs[1]
+            if used_memory > 1:
+                device_map = {"": 7}
+            else:
+                device_map = {"": 6}
+            plm = EsmModel.from_pretrained(
+                plm_name, 
+                add_pooling_layer=False,
+                quantization_config=quant_config,
+                load_in_4bit=True,
+                load_in_8bit=False,
+                device_map=device_map,
+                torch_dtype=torch.bfloat16,
+            )
+
+        plm.num_features = plm.config.hidden_size
+        ln_layer = nn.LayerNorm(plm.num_features)
+        return plm_tokenizer, plm, ln_layer
+
+
+def disabled_train(self, mode=True):
+    """Overwrite model.train with this function to make sure train/eval mode
+    does not change anymore."""
+    return self
+
+
+# class LayerNorm(nn.LayerNorm):
+#     """Subclass torch's LayerNorm to handle fp16."""
+
+#     def forward(self, x: torch.Tensor):
+#         orig_type = x.dtype
+#         ret = super().forward(x.type(torch.float32))
+#         return ret.type(orig_type)
+
diff --git a/EasyR1-new/verl/ProtT3/blip2_opt.py b/EasyR1-new/verl/ProtT3/blip2_opt.py
new file mode 100644
index 0000000000000000000000000000000000000000..aff4b8c549d37d8e853c9689268ec06a2a206ea9
--- /dev/null
+++ b/EasyR1-new/verl/ProtT3/blip2_opt.py
@@ -0,0 +1,450 @@
+"""
+ Copyright (c) 2023, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+import logging
+import torch
+import torch.nn as nn
+from torch.cuda.amp import autocast as autocast
+# from peft import get_peft_config, get_peft_model, get_peft_model_state_dict, LoraConfig, TaskType, PeftModel
+from lavis.models.blip2_models.blip2 import disabled_train
+from .blip2 import Blip2Base
+from transformers import AutoTokenizer
+from transformers import OPTForCausalLM
+from transformers import AutoTokenizer, AutoModelForCausalLM
+# from opendelta import LoraModel
+
+from transformers import BertTokenizer, BitsAndBytesConfig
+from .help_funcs import hf_enable_gradient_checkpointing
+import json
+# from accelerate import Accelerator
+# import torch.distributed as dist
+
+# from peft.tuners.lora import LoraLayer
+# from peft import (
+#     prepare_model_for_kbit_training,
+#     LoraConfig as PeftLoraConfig,
+#     get_peft_model,
+#     PeftModel
+# )
+
+# from opendelta.delta_configs
+
+opt_model_list = [
+    "facebook/galactica-125m",
+    "facebook/galactica-1.3b",
+    "facebook/galactica-6.7b",
+    "facebook/galactica-30b",
+]
+
+def get_gpu_memory(device=0):
+    # t = torch.cuda.get_device_properties(device).total_memory
+    # r = torch.cuda.memory_reserved(device)
+    # a = torch.cuda.memory_allocated(device)
+    # f = r-a  # free inside reserved
+    free, total = torch.cuda.mem_get_info(device)
+    free = free / (1024 ** 3)
+    total = total / (1024 ** 3)
+    return free, total-free, total
+
+def mask_by_len(input, lens, fill_value=0):
+    '''
+    input: shape = [N, D]
+    lens: shape = [N]
+    '''
+    mask = torch.arange(input.shape[1], device=input.device).reshape(1, -1)
+    mask = mask < lens.reshape(-1, 1)
+    input[mask] = fill_value
+    return input
+
+
+
+class Blip2OPT(Blip2Base):
+    """
+    BLIP2 first-stage model with Q-former and ViT.
+    Supported model types:
+        - pretrained: pretrained model with vit-g
+        - pretrain_vitL: pretrained model with vit-large
+        - coco: fintuned model on coco
+    Usage:
+        >>> from lavis.models import load_model
+        >>> model = load_model("blip2", "pretrain")
+    """
+    def __init__(
+        self,
+        bert_name,
+        num_query_token=32,
+        cross_attention_freq=2,
+        plm_model="facebook/esm2_t30_150M_UR50D",
+        plm_tune='freeze',
+        llm_name="facebook/galactica-1.3b",
+        llm_tune='freeze',
+        peft_dir='',
+        args=None,
+    ):
+        super().__init__()
+        #self.args = args
+        #self.enbale_gradient_checkpointing = args.enbale_gradient_checkpointing
+
+        self.plm_tokenizer, self.plm, self.ln_layer = self.init_protein_encoder(plm_model)
+        self.plm_tune = plm_tune
+        # if plm_tune == 'freeze':
+        #     for name, param in self.plm.named_parameters():
+        #         param.requires_grad = False
+        #     self.plm = self.plm.eval()
+        #     self.plm.train = disabled_train
+        #     logging.info("freeze plm encoder")
+        # elif plm_tune == 'lora':
+        #     lora_config = DeltaLoraConfig(args.lora_r, 
+        #                                   args.lora_alpha, 
+        #                                   args.lora_dropout,
+        #                                   modified_modules=["query", "value"])
+        #     self.delta = LoraModel.from_config(lora_config, self.plm)
+        #     self.delta.freeze_module(set_state_dict=False)
+        #     self.delta.log()
+        # else:
+        #     raise NotImplementedError()
+        
+        self.num_query_token = num_query_token
+        self.qformer_tokenizer, self.Qformer, self.query_tokens = self.init_Qformer(bert_name, num_query_token, self.plm.num_features, cross_attention_freq)
+        ### remove the unused parameters
+        self.Qformer.cls = None
+        self.Qformer.bert.embeddings.word_embeddings = None
+        self.Qformer.bert.embeddings.position_embeddings = None
+        for layer in self.Qformer.bert.encoder.layer:
+            layer.output = None
+            layer.intermediate = None
+
+        ## initialize llm model
+        # self.init_distributed()
+        self.llm_model, self.llm_tokenizer = self.load_llm(llm_name)
+        
+        #self.llm_model, self.llm_tokenizer = self.load_model_on_single_gpu(llm_name)
+        self.eos_token_id = self.llm_tokenizer.eos_token_id
+        self.pad_token_id = self.llm_tokenizer.pad_token_id
+
+        # if llm_tune == 'freeze':
+        #     for name, param in self.llm_model.named_parameters():
+        #         param.requires_grad = False
+        # elif llm_tune == 'full':
+        #     for name, param in self.llm_model.named_parameters():
+        #         param.requires_grad = True
+        # elif llm_tune == 'lora':
+        #     lora_config = DeltaLoraConfig(args.lora_r, 
+        #                                   args.lora_alpha, 
+        #                                   args.lora_dropout,)
+        #     self.delta = LoraModel.from_config(lora_config, self.llm_model)
+        #     self.delta.freeze_module(set_state_dict=False)
+        #     self.delta.log()
+        # elif llm_tune == 'mid_lora':
+        #     lora_config = DeltaLoraConfig(args.lora_r, args.lora_alpha, args.lora_dropout, modified_modules=["q_proj", "v_proj", 'k_proj', "out_proj", "fc1", "fc2"])
+        #     self.delta = LoraModel.from_config(lora_config, self.llm_model)
+        #     self.delta.freeze_module(set_state_dict=False)
+        #     self.delta.log()
+        # elif llm_tune == 'peft_lora':
+        #     config = PeftLoraConfig(
+        #         r=args.lora_r,
+        #         lora_alpha=args.lora_alpha,
+        #         # target_modules=modules,
+        #         lora_dropout=args.lora_dropout,
+        #         bias="none",
+        #         task_type="CAUSAL_LM",
+        #     )
+        #     self.llm_model = get_peft_model(self.llm_model, config)
+        #     for name, module in self.llm_model.named_modules():
+        #         if isinstance(module, LoraLayer):
+        #             if True:
+        #                 module = module.to(torch.bfloat16)
+        #         if 'norm' in name:
+        #             module = module.to(torch.float32)
+        #         if 'lm_head' in name or 'embed_tokens' in name:
+        #             if hasattr(module, 'weight'):
+        #                 if True and module.weight.dtype == torch.float32:
+        #                     module = module.to(torch.bfloat16)
+        # else:
+        #     raise NotImplementedError()
+
+        ## fixme: this is different from the original BLIP2
+        # self.eos_token_id = self.llm_tokenizer(
+        #     "\n", add_special_tokens=False
+        # ).input_ids[0]
+        self.opt_proj = nn.Linear(self.Qformer.config.hidden_size, self.llm_model.config.hidden_size)
+
+    def load_llm(self, llm_model, load_4bit=False, enable_gradient_checkpointing=True):
+        llm_tokenizer = AutoTokenizer.from_pretrained(llm_model, use_fast=False, padding_side='right')
+        llm_tokenizer.add_special_tokens({'pad_token': '<pad>'})
+        
+        special_tokens_dict = {'additional_special_tokens': ['<PROT>', '<TEXT>']}
+        llm_tokenizer.add_special_tokens(special_tokens_dict)
+        
+        llm_model = AutoModelForCausalLM.from_pretrained(llm_model, torch_dtype=torch.bfloat16)
+        llm_model.resize_token_embeddings(len(llm_tokenizer)) ## this will cause bug when 
+       
+        return llm_model, llm_tokenizer
+
+
+    # def forward(self, batch):
+    #     prot_batch, text_batch = batch
+    #     prot_embeds = self.plm(**prot_batch, return_dict=True)
+    #     prot_embeds = prot_embeds.last_hidden_state
+    #     if self.plm_tune == 'freeze':
+    #         prot_embeds = prot_embeds.detach()
+    #     prot_embeds = self.ln_layer(prot_embeds)
+    #     device = prot_embeds.device
+    #     query_tokens = self.query_tokens.expand(prot_embeds.shape[0], -1, -1)
+    #     query_output = self.Qformer.bert(
+    #         query_embeds=query_tokens,
+    #         encoder_hidden_states=prot_embeds,
+    #         encoder_attention_mask=prot_batch.attention_mask,
+    #         return_dict=True,
+    #     )
+    #     prot_tokens = self.opt_proj(query_output.last_hidden_state)
+    #     prot_mask = torch.ones(prot_tokens.shape[:2], dtype=text_batch.attention_mask.dtype, device=device)
+    #     prot_empty_targets = torch.ones(prot_tokens.shape[:2], dtype=torch.long, device=device).fill_(-100)
+        
+    #     targets = text_batch.input_ids.masked_fill(text_batch.input_ids == self.llm_tokenizer.pad_token_id, -100)
+    #     targets = targets.masked_fill(text_batch.token_type_ids == 0, -100)
+    #     targets = torch.cat([prot_empty_targets, targets], dim=1)
+
+    #     inputs_embeds = self.llm_model.get_input_embeddings()(text_batch.input_ids)
+    #     inputs_embeds = torch.cat((prot_tokens, inputs_embeds), dim=1)
+    #     attention_mask = torch.cat([prot_mask, text_batch.attention_mask], dim=1)
+        
+    #     outputs = self.llm_model(
+    #         inputs_embeds=inputs_embeds,
+    #         attention_mask=attention_mask,
+    #         return_dict=True,
+    #         labels=targets,
+    #     )
+    #     loss = outputs.loss
+    #     return loss
+
+    def forward(self, batch):
+        prot_batch, prompt_batch, text_dict = batch
+        text_seqs = text_dict['targets']
+
+        prot_embeds = self.plm(**prot_batch, return_dict=True)
+        prot_embeds = prot_embeds.last_hidden_state
+        if self.plm_tune == 'freeze':
+            prot_embeds = prot_embeds.detach()
+        prot_embeds = self.ln_layer(prot_embeds)
+        device = prot_embeds.device
+        query_tokens = self.query_tokens.expand(prot_embeds.shape[0], -1, -1)
+        query_output = self.Qformer.bert(
+            query_embeds=query_tokens,
+            encoder_hidden_states=prot_embeds,
+            encoder_attention_mask=prot_batch.attention_mask,
+            return_dict=True,
+        )
+        prot_tokens = self.opt_proj(query_output.last_hidden_state)
+        prot_mask = torch.ones(prot_tokens.shape[:2], dtype=torch.long, device=device)
+
+        # === Step 3: 编码 prompt 输入 ===
+        prompt_embeds = self.llm_model.get_input_embeddings()(prompt_batch.input_ids)  # [B, L_prompt, D_llm]
+        prompt_mask = prompt_batch['attention_mask']
+
+
+        text_batch = self.llm_tokenizer(
+            list(text_seqs),
+            padding='longest',
+            truncation=True,
+            max_length=1024,
+            return_tensors='pt'
+        ).to(device)
+        target_embeds = self.llm_model.get_input_embeddings()(text_batch['input_ids'])  # [B, T, D]
+        target_mask = text_batch['attention_mask']
+        targets = text_batch['input_ids'].masked_fill(text_batch['input_ids'] == self.llm_tokenizer.pad_token_id, -100)
+
+        inputs_embeds = torch.cat([prot_tokens, prompt_embeds, target_embeds], dim=1)
+        attention_mask = torch.cat([prot_mask, prompt_mask, target_mask], dim=1)
+
+        # === Step 7: 构造 labels，只监督 target 部分 ===
+        prot_label_pad = torch.full(prot_tokens.shape[:2], -100, dtype=torch.long, device=device)
+        prompt_label_pad = torch.full(prompt_mask.shape, -100, dtype=torch.long, device=device)
+        labels = torch.cat([prot_label_pad, prompt_label_pad, targets], dim=1)
+
+        # === Step 8: 送入 LLM ===
+        outputs = self.llm_model(
+            inputs_embeds=inputs_embeds,
+            attention_mask=attention_mask,
+            labels=labels,
+            return_dict=True,
+        )
+        loss = outputs.loss
+        # prot_mask = torch.ones(prot_tokens.shape[:2], dtype=text_batch.attention_mask.dtype, device=device)
+        # prot_empty_targets = torch.ones(prot_tokens.shape[:2], dtype=torch.long, device=device).fill_(-100)
+        # empty_targets = torch.ones(prompt_batch.attention_mask.shape, dtype=torch.long, device=device).fill_(-100)
+        # targets = text_batch.input_ids.masked_fill(text_batch.input_ids == self.llm_tokenizer.pad_token_id, -100)
+        # targets = torch.cat([prot_empty_targets, empty_targets, targets], dim=1)
+
+        # prompt_embeds = self.llm_model.get_input_embeddings()(prompt_batch.input_ids)
+        # inputs_embeds = self.llm_model.get_input_embeddings()(text_batch.input_ids)
+        # inputs_embeds = torch.cat((prot_tokens, prompt_embeds, inputs_embeds), dim=1)
+        # attention_mask = torch.cat([prot_mask, prompt_batch.attention_mask, text_batch.attention_mask], dim=1)
+        
+        # outputs = self.llm_model(
+        #     inputs_embeds=inputs_embeds,
+        #     attention_mask=attention_mask,
+        #     return_dict=True,
+        #     labels=targets,
+        # )
+        # loss = outputs.loss
+        return loss
+
+    # def forwardv2(self, batch):
+    #     prot_batch, prompt_batch, text_batch = batch
+    #     prot_embeds = self.plm(**prot_batch, return_dict=True)
+    #     prot_embeds = prot_embeds.last_hidden_state
+    #     if self.plm_tune == 'freeze':
+    #         prot_embeds = prot_embeds.detach()
+    #     prot_embeds = self.ln_layer(prot_embeds)
+    #     device = prot_embeds.device
+    #     query_tokens = self.query_tokens.expand(prot_embeds.shape[0], -1, -1)
+    #     query_output = self.Qformer.bert(
+    #         query_embeds=query_tokens,
+    #         encoder_hidden_states=prot_embeds,
+    #         encoder_attention_mask=prot_batch.attention_mask,
+    #         return_dict=True,
+    #     )
+    #     prot_tokens = self.opt_proj(query_output.last_hidden_state)
+    #     prot_mask = torch.ones(prot_tokens.shape[:2], dtype=text_batch.attention_mask.dtype, device=device)
+    #     targets = text_batch.input_ids.masked_fill(text_batch.input_ids == self.llm_tokenizer.pad_token_id, -100)
+
+    #     ### forward prefix
+    #     prompt_embeds = self.llm_model.get_input_embeddings()(prompt_batch.input_ids)
+    #     prefix_embeds = torch.cat([prot_tokens, prompt_embeds], dim=1)
+    #     prefix_mask = torch.cat([prot_mask, prompt_batch.attention_mask], dim=1)
+    #     prefix_output = self.llm_model.model(
+    #         inputs_embeds=prefix_embeds,
+    #         attention_mask=prefix_mask,
+    #         use_cache=True,
+    #         return_dict=True,
+    #     )
+
+    #     ## forward decoding
+    #     if False:
+    #         attention_mask = torch.cat([prot_mask, prompt_batch.attention_mask, text_batch.attention_mask], dim=1)
+    #     else:
+    #         attention_mask = text_batch.attention_mask
+    #     print(prefix_output.past_key_values)
+    #     outputs = self.llm_model(
+    #         input_ids=text_batch.input_ids,
+    #         attention_mask=attention_mask,
+    #         past_key_values=prefix_output.past_key_values,
+    #         return_dict=True,
+    #         labels=targets,
+    #     )
+    #     loss = outputs.loss
+    #     return loss
+    
+    @torch.no_grad()
+    def generate(
+        self,
+        samples,
+        do_sample=False,
+        num_beams=5,
+        max_length=128,
+        min_length=1,
+        top_p=0.9,
+        repetition_penalty=1.0,
+        length_penalty=1.0,
+        num_captions=1,
+        temperature=1,
+    ):
+        """
+        Args:
+            samples (dict): A dictionary containing the following keys:
+                - image (torch.Tensor): A tensor of shape (batch_size, 3, H, W)
+            num_beams (int): Number of beams for beam search. 1 means no beam search.
+            max_length (int): The maximum length of the sequence to be generated.
+            min_length (int): The minimum length of the sequence to be generated.
+            top_p (float): The cumulative probability for nucleus sampling.
+            repetition_penalty (float): The parameter for repetition penalty. 1.0 means no penalty.
+            num_captions (int): Number of captions to be generated for each image.
+        Returns:
+            captions (list): A list of strings of length batch_size * num_captions.
+        """
+        prot_batch = samples['prot_batch']
+        prompt_batch = samples['prompt_batch']
+        
+        # with self.maybe_autocast():
+        prot_embeds = self.plm(**prot_batch, return_dict=True)
+        prot_embeds = self.ln_layer(prot_embeds.last_hidden_state)
+
+        query_tokens = self.query_tokens.expand(prot_embeds.shape[0], -1, -1)
+        query_output = self.Qformer.bert(
+            query_embeds=query_tokens,
+            encoder_hidden_states=prot_embeds,
+            encoder_attention_mask=prot_batch['attention_mask'],
+            return_dict=True,
+        )
+        prot_tokens = self.opt_proj(query_output.last_hidden_state)
+        
+        
+        
+        # prompt_batch = samples['prompt_batch']
+        prompt_input_ids = prompt_batch['input_ids']           # shape: [B, L]
+        # for i, ids in enumerate(prompt_input_ids):
+        #     print(f"Prompt {i} token length: {len(ids)}")
+        decoded_texts = [self.llm_tokenizer.decode(ids, skip_special_tokens=True) for ids in prompt_input_ids]
+        #print(decoded_texts)
+        save_path = "decoded_prompts.json"
+
+        # 将 list 写入 JSON 文件
+        with open(save_path, 'w', encoding='utf-8') as f:
+            json.dump(decoded_texts, f, ensure_ascii=False, indent=4)
+
+        prompt_attention_mask = prompt_batch['attention_mask']
+        prompt_embeds = self.llm_model.model.embed_tokens(prompt_input_ids)
+
+
+
+        # device = prompt_input_ids.device
+        # prot_token_id = self.llm_tokenizer.convert_tokens_to_ids("<PROT>")
+        # text_token_id = self.llm_tokenizer.convert_tokens_to_ids("<TEXT>")
+        # text_token_embed = self.llm_model.model.embed_tokens(torch.tensor([[text_token_id]], device=device)).expand(prompt_embeds.shape[0], -1, -1)
+        # prot_token_embed = self.llm_model.model.embed_tokens(torch.tensor([[prot_token_id]], device=device)).expand(prompt_embeds.shape[0], -1, -1)
+        
+        # inputs_embeds = torch.cat([text_token_embed, prompt_embeds, prot_token_embed, prot_tokens], dim=1)
+
+        # B = prompt_input_ids.shape[0]
+        # special_attention = torch.ones((B, 1), dtype=prompt_attention_mask.dtype, device=prompt_attention_mask.device)
+        # prot_attention_mask = torch.ones(prot_tokens.shape[:2], dtype=prompt_attention_mask.dtype, device=prompt_attention_mask.device)
+        # attention_mask = torch.cat([
+        #     special_attention,             # [TEXT]
+        #     prompt_attention_mask,        # prompt
+        #     special_attention,             # [PROT]
+        #     prot_attention_mask           # protein
+        # ], dim=1)
+        # inputs_embeds = torch.cat((prot_tokens, prompt_embeds), dim=1)
+        inputs_embeds = torch.cat((prompt_embeds, prot_tokens), dim=1)
+
+        prot_attention_mask = torch.ones(prot_tokens.shape[:2], dtype=prompt_attention_mask.dtype, device=prompt_attention_mask.device)
+        #attention_mask = torch.cat([prot_attention_mask, prompt_attention_mask], dim=1) 
+        attention_mask = torch.cat([ prompt_attention_mask,prot_attention_mask], dim=1) 
+
+        
+        
+        outputs = self.llm_model.generate(
+            inputs_embeds=prompt_embeds,
+            attention_mask=prompt_attention_mask,
+            do_sample=do_sample,
+            top_p=top_p,
+            temperature=temperature,
+            num_beams=num_beams,
+            max_new_tokens=max_length,
+            min_length=min_length,
+            # pad_token_id=self.pad_token_id,
+            eos_token_id=self.eos_token_id,
+            repetition_penalty=repetition_penalty,
+            length_penalty=length_penalty,
+            num_return_sequences=num_captions,
+            use_cache=True,
+            cache_implementation="hybrid"
+        )
+        output_text = self.llm_tokenizer.batch_decode(outputs, skip_special_tokens=True)
+        output_text = [text.strip() for text in output_text]
+        # print(output_text)
+        return output_text
diff --git a/EasyR1-new/verl/ProtT3/blip2_stage2.py b/EasyR1-new/verl/ProtT3/blip2_stage2.py
new file mode 100644
index 0000000000000000000000000000000000000000..6ec563453278fa3220290b4f3283f6d21ae6ca35
--- /dev/null
+++ b/EasyR1-new/verl/ProtT3/blip2_stage2.py
@@ -0,0 +1,344 @@
+import os
+import torch
+from .blip2_opt import Blip2OPT
+import pytorch_lightning as pl
+from torch import optim
+from lavis.common.optims import LinearWarmupCosineLRScheduler, LinearWarmupStepLRScheduler
+import json
+import torch.distributed as dist
+#from peft import LoraConfig, TaskType
+from typing import Any, Dict
+from .help_funcs import caption_evaluate, AttrDict
+try:
+    from .opt_flash_attention import replace_opt_attn_with_flash_attn, replace_opt_attn_with_original_attn
+except ModuleNotFoundError:
+    pass
+
+
+def get_module_state_dict(state_dict, module_name):
+    module_state_dict = {}
+    for key, value in state_dict.items():
+        if key.startswith(module_name):
+            key = key[len(module_name) + 1:]
+            if key == '':
+                return value
+            module_state_dict[key] = value
+    return module_state_dict
+
+class Blip2Stage2(pl.LightningModule):
+    def on_save_checkpoint(self, checkpoint: Dict[str, Any]) -> None:
+        # checkpoint.pop('optimizer_states')
+        to_be_removed = []
+        for key, value in checkpoint['state_dict'].items():
+            try:
+                if not self.get_parameter(key).requires_grad:
+                    to_be_removed.append(key)
+            except AttributeError:
+                to_be_removed.append(key)
+        for key in to_be_removed:
+            checkpoint['state_dict'].pop(key)
+    
+    def __init__(self, args):
+        super().__init__()
+        if isinstance(args, dict):
+            args = AttrDict(**args)
+
+        self.args = args
+        self.caption_eval_epoch = 10,  #args.other.caption_eval_epoch
+        self.do_sample = False
+        self.num_beams = 5  #args.OPT.num_beams
+        self.max_inference_len = 128
+        self.min_inference_len = 1
+        self.llm_tune = 'freeze'
+        self.enable_flash = False
+
+       # if args.llm_name.find('galactica') >= 0:
+        self.blip2 = Blip2OPT("/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft",
+                                8,   #args.Bert.num_query_token, 
+                                2,   #args.Bert.cross_attention_freq, 
+                                "/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m", #args.plm_model.plm_model,
+                                'freeze',  #args.plm_model.plm_tune,
+                                "/oss/wangyujia/BIO/construction_finetuning/alpaca/v1-20250609-141541/checkpoint-50-merged", #args.OPT.llm_name,
+                                'freeze',  #args.OPT.llm_tune, 
+                                "", #args.OPT.peft_dir,  
+                                args)
+        # else:
+        #     raise NotImplementedError()
+        #self.save_hyperparameters(args)
+
+#     def load_from_stage1_checkpoint(self, path):
+#         ckpt = torch.load(path, map_location='cpu')
+#         state_dict = ckpt['state_dict']
+#         state_dict = {k.split('blip2qformer.')[1]:v for k, v in state_dict.items()}
+#         self.blip2.load_state_dict(state_dict, strict=False)
+#         return self
+    
+#     def configure_optimizers(self):
+#         self.trainer.fit_loop.setup_data()
+#         warmup_steps = min(len(self.trainer.train_dataloader), self.args.warmup_steps)
+#         optimizer = optim.AdamW(self.parameters(), lr=self.args.init_lr, weight_decay=self.args.weight_decay)
+#         if self.args.scheduler == 'linear_warmup_cosine_lr':
+#             self.scheduler = LinearWarmupCosineLRScheduler(optimizer, self.args.max_epochs, self.args.min_lr, self.args.init_lr, warmup_steps, self.args.warmup_lr)
+#         elif self.args.scheduler == 'linear_warmup_step_lr':
+#             self.scheduler = LinearWarmupStepLRScheduler(optimizer, self.args.max_epochs, self.args.min_lr, self.args.init_lr, self.args.lr_decay_rate, self.args.warmup_lr, warmup_steps)
+#         elif self.args.scheduler == 'None':
+#             self.scheduler = None
+#         else:
+#             raise NotImplementedError()
+#         return optimizer
+
+#     def save_predictions(self, predictions, targets, q_types=None, log_prefix=''):
+#         assert len(predictions) == len(targets)
+#         if log_prefix:
+#             name = f'{log_prefix}_predictions.txt'
+#         else:
+#             name = 'predictions.txt'
+#         with open(os.path.join(self.logger.log_dir, name), 'w', encoding='utf8') as f:
+#             if q_types is not None:
+#                 for p, t, q in zip(predictions, targets, q_types):
+#                     line = {'prediction': p, 'target': t, 'q_type': q}
+#                     f.write(json.dumps(line, ensure_ascii=True) + '\n')
+#             else:
+#                 for p, t in zip(predictions, targets):
+#                     line = {'prediction': p, 'target': t}
+#                     f.write(json.dumps(line, ensure_ascii=True) + '\n')
+
+#     def on_validation_epoch_start(self) -> None:
+#         if self.enable_flash:
+#             replace_opt_attn_with_original_attn()
+#         self.saved_dict_list = []
+#         self.prediction_list0 = []
+#         self.target_list0 = []
+#         self.prediction_list1 = []
+#         self.target_list1 = []
+
+#     @torch.no_grad()
+#     def validation_step(self, batch, batch_idx, dataloader_idx=0):
+#         prot_batch, prompt_batch, target_dict = batch
+#         if (dataloader_idx % 2) == 0:
+#             # text_batch = batch[-1]
+#             # batch_size = text_batch.input_ids.shape[0]   
+#             batch_size = len(target_dict['targets'])  # ✅ 正确获取batch大小
+#             loss = self.blip2(batch)
+#             ###============== Overall Loss ===================###
+#             self.log(f"dataloader{dataloader_idx}/val loss", float(loss), batch_size=batch_size, sync_dist=True)
+#         elif (dataloader_idx % 2) == 1:
+#             if (self.current_epoch+1) % self.caption_eval_epoch != 0:
+#                 return 
+#             # prot_batch, prompt_batch, target_dict = batch
+#             ###============== Captioning Results ===================###
+#             samples = {'prot_batch': prot_batch, 'prompt_batch': prompt_batch}
+#             predictions = self.blip2.generate(
+#                 samples, 
+#                 do_sample=self.do_sample,
+#                 num_beams=self.num_beams,
+#                 max_length=self.max_inference_len,
+#                 min_length=self.min_inference_len
+#             )
+#             target_dict['predictions'] = predictions
+#             self.saved_dict_list.append(target_dict)
+
+#     def gather_dict_results(self, dict_list):
+#         list_of_dict_list = [None for _ in range(self.trainer.world_size)]
+#         dist.all_gather_object(list_of_dict_list, dict_list)
+#         dict_list = [i for ii in list_of_dict_list for i in ii] ## dict list, each dict has values that are lists of predictions, etc.
+#         keys = dict_list[0].keys()
+#         gathered_dict = {} # each value is a list of predictions, etc.
+#         for key in keys:
+#             gathered_dict[key] = [i for d in dict_list for i in d[key]]
+#         dict_list = []
+#         for i in range(len(gathered_dict['predictions'])):
+#             d = {k:gathered_dict[k][i] for k in keys}
+#             dict_list.append(d)
+#         return dict_list
+
+#     def save_results(self, dict_list, log_prefix=""):
+#         ## save the results
+#         if log_prefix:
+#             name = f'{log_prefix}_predictions.txt'
+#         else:
+#             name = 'predictions.txt'
+#         with open(name, 'w', encoding='utf8') as f:
+#             for d in dict_list:
+#                 f.write(json.dumps(d, ensure_ascii=True) + '\n')
+
+#     def on_validation_epoch_end(self):
+#         if self.enable_flash:
+#             replace_opt_attn_with_flash_attn()
+#         if (self.current_epoch+1) % self.caption_eval_epoch != 0:
+#             return 
+#         result_list = self.gather_dict_results(self.saved_dict_list)
+#         ## empty cache
+#         self.saved_dict_list = []
+        
+#         if self.global_rank == 0:
+#             self.save_results(result_list, 'dataset0')
+#             all_predictions = [i['predictions'] for i in result_list]
+#             all_targets = [i['targets'] for i in result_list]
+            
+#             log_prefix = 'dataset0' ## fixme: this is just a placeholder
+#             if 'q_types' in result_list[0]:
+#                 ## evaluate protein qa
+#                 pass
+#             else:
+#                 ## evaluate captioning
+#                 bleu2, bleu4, rouge_1, rouge_2, rouge_l, meteor_score = \
+#                     caption_evaluate(all_predictions, all_targets, self.blip2.llm_tokenizer, self.max_inference_len) 
+#                 acc = evaluate_exact_match(all_predictions, all_targets)
+#                 self.log(f"{log_prefix}/acc", acc, sync_dist=False)
+#                 self.log(f"{log_prefix}/bleu2", bleu2, sync_dist=False)
+#                 self.log(f"{log_prefix}/bleu4", bleu4, sync_dist=False)
+#                 self.log(f"{log_prefix}/rouge_1", rouge_1, sync_dist=False)
+#                 self.log(f"{log_prefix}/rouge_2", rouge_2, sync_dist=False)
+#                 self.log(f"{log_prefix}/rouge_l", rouge_l, sync_dist=False)
+#                 self.log(f"{log_prefix}/meteor_score", meteor_score, sync_dist=False)
+
+#     @torch.no_grad()
+#     def validation_step_old(self, batch, batch_idx, dataloader_idx=0):
+#         if (dataloader_idx % 2) == 0:
+#             text_batch = batch[-1]
+#             batch_size = text_batch.input_ids.shape[0]
+#             loss = self.blip2(batch)
+#             ###============== Overall Loss ===================###
+#             self.log(f"dataloader{dataloader_idx}/val loss", float(loss), batch_size=batch_size, sync_dist=True)
+#         elif (dataloader_idx % 2) == 1:
+#             if (self.current_epoch+1) % self.caption_eval_epoch != 0:
+#                 return 
+#             prot_batch, prompt_batch, target_dict = batch
+#             ###============== Captioning Results ===================###
+#             samples = {'prot_batch': prot_batch, 'prompt_batch': prompt_batch}
+#             predictions = self.blip2.generate(
+#                 samples, 
+#                 do_sample=self.do_sample,
+#                 num_beams=self.num_beams,
+#                 max_length=self.max_inference_len,
+#                 min_length=self.min_inference_len
+#             )
+#             if dataloader_idx // 2 == 0:
+#                 self.prediction_list0.append(predictions)
+#                 self.target_list0.append(target_dict)
+#             elif dataloader_idx // 2 == 1:
+#                 self.prediction_list1.append(predictions)
+#                 self.target_list1.append(target_dict)
+#             else:
+#                 raise NotImplementedError
+#         else:
+#             raise NotImplementedError
+
+#     def on_validation_epoch_end_old(self):
+#         if self.enable_flash:
+#             replace_opt_attn_with_flash_attn()
+#         if (self.current_epoch+1) % self.caption_eval_epoch != 0:
+#             return 
+#         predictions0 = [i for ii in self.prediction_list0 for i in ii]
+#         targets0 = [i for ii in self.target_list0 for i in ii['answers']]
+#         if 'q_types' in self.target_list0[0]:
+#             q_types0 = [i for ii in self.target_list0 for i in ii['q_types']]
+#             self.reduce_and_evaluate_qa(predictions0, targets0, q_types0, 'dataset0')
+#         else:
+#             self.reduce_and_evaluate_captioning(predictions0, targets0, 'dataset0')
+
+#         if len(self.prediction_list1) > 0:
+#             predictions1 = [i for ii in self.prediction_list1 for i in ii]
+#             targets1 = [i for ii in self.target_list1 for i in ii]
+#             self.reduce_and_evaluate_captioning(predictions1, targets1, 'dataset1')
+
+#     def reduce_and_evaluate_qa(self, predictions, targets, q_types, log_prefix=""):
+#         all_predictions = [None for _ in range(self.trainer.world_size)]
+#         all_targets = [None for _ in range(self.trainer.world_size)]
+#         all_q_types = [None for _ in range(self.trainer.world_size)]
+#         dist.all_gather_object(all_predictions, predictions)
+#         dist.all_gather_object(all_targets, targets)
+#         dist.all_gather_object(all_q_types, q_types)
+#         if self.global_rank == 0:
+#             all_predictions = [i for ii in all_predictions for i in ii]
+#             all_targets = [i for ii in all_targets for i in ii]
+#             all_q_types = [i for ii in all_q_types for i in ii]
+#             self.save_predictions(all_predictions, all_targets, all_q_types, log_prefix=log_prefix)
+            
+#     def reduce_and_evaluate_captioning(self, predictions, targets, log_prefix=""):
+#         all_predictions = [None for _ in range(self.trainer.world_size)]
+#         all_targets = [None for _ in range(self.trainer.world_size)]
+#         dist.all_gather_object(all_predictions, predictions)
+#         dist.all_gather_object(all_targets, targets)
+#         if self.global_rank == 0:
+#             all_predictions = [i for ii in all_predictions for i in ii]
+#             all_targets = [i for ii in all_targets for i in ii]
+#             self.save_predictions(all_predictions, all_targets, log_prefix)
+#             ## fixme: I am not sure if the max length is the same as previous experiments
+#             bleu2, bleu4, rouge_1, rouge_2, rouge_l, meteor_score = \
+#                 caption_evaluate(all_predictions, all_targets, self.blip2.llm_tokenizer, self.max_inference_len) 
+#             acc = evaluate_exact_match(all_predictions, all_targets)
+#             self.log(f"{log_prefix}/acc", acc, sync_dist=False)
+#             self.log(f"{log_prefix}/bleu2", bleu2, sync_dist=False)
+#             self.log(f"{log_prefix}/bleu4", bleu4, sync_dist=False)
+#             self.log(f"{log_prefix}/rouge_1", rouge_1, sync_dist=False)
+#             self.log(f"{log_prefix}/rouge_2", rouge_2, sync_dist=False)
+#             self.log(f"{log_prefix}/rouge_l", rouge_l, sync_dist=False)
+#             self.log(f"{log_prefix}/meteor_score", meteor_score, sync_dist=False)
+
+#     def training_step(self, batch, batch_idx):
+#         if self.scheduler:
+#             self.scheduler.step(self.trainer.current_epoch, self.trainer.global_step)
+        
+#         #batch_size = batch[-1].input_ids.size(0)
+#         batch_size = len(batch[-1]['targets'])
+#         ###============== Overall Loss ===================###
+#         loss = self.blip2(batch)
+#         self.log("loss", float(loss), batch_size=batch_size, sync_dist=True)
+#         self.log("lr", self.trainer.optimizers[0].param_groups[0]['lr'], batch_size=batch_size, sync_dist=True)
+#         return loss
+
+#     @staticmethod
+#     def add_model_specific_args(parent_parser):
+#         parser = parent_parser.add_argument_group("ProtBlip2")
+#         # train mode
+#         parser.add_argument('--save_every_n_epochs', type=int, default=0)
+
+#         # Bert
+#         parser.add_argument('--bert_name', type=str, default='microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract')
+#         parser.add_argument('--cross_attention_freq', type=int, default=2)
+#         parser.add_argument('--num_query_token', type=int, default=8)
+#         # OPT
+#         parser.add_argument('--llm_name', type=str, default="facebook/galactica-1.3b")
+#         parser.add_argument('--num_beams', type=int, default=5)
+#         parser.add_argument('--do_sample', action='store_true', default=False)
+#         parser.add_argument('--max_inference_len', type=int, default=128)
+#         parser.add_argument('--min_inference_len', type=int, default=1)
+#         parser.add_argument('--llm_tune', type=str, default='freeze')
+#         parser.add_argument('--peft_config', type=str, default='')
+#         parser.add_argument('--peft_dir', type=str, default='')
+
+#         ## plm model
+#         parser.add_argument('--plm_model', type=str, default='facebook/esm2_t30_150M_UR50D')
+#         parser.add_argument('--plm_tune', type=str, default='freeze')
+
+#         ## lora config
+#         parser.add_argument('--lora_r', type=int, default=8)
+#         parser.add_argument('--lora_alpha', type=int, default=16)
+#         parser.add_argument('--lora_dropout', type=int, default=0.1)
+#         parser.add_argument('--enbale_gradient_checkpointing', action='store_true', default=False)
+
+#         # optimization
+#         parser.add_argument('--weight_decay', type=float, default=0.05, help='optimizer weight decay')
+#         parser.add_argument('--init_lr', type=float, default=1e-4, help='optimizer init learning rate')
+#         parser.add_argument('--min_lr', type=float, default=1e-5, help='optimizer min learning rate')
+#         parser.add_argument('--warmup_lr', type=float, default=1e-6, help='optimizer warmup learning rate')
+#         parser.add_argument('--warmup_steps', type=int, default=1000, help='optimizer warmup steps')
+#         parser.add_argument('--lr_decay_rate', type=float, default=0.9, help='optimizer lr decay rate')
+#         parser.add_argument('--scheduler', type=str, default='linear_warmup_cosine_lr', help='type of scheduler') # or linear_warmup_step_lr
+#         parser.add_argument('--stage1_path', type=str, default='')
+#         parser.add_argument('--stage2_path', type=str, default='')
+#         parser.add_argument('--init_checkpoint', type=str, default='')
+#         parser.add_argument('--caption_eval_epoch', type=int, default=10)
+#         return parent_parser
+
+
+
+# def evaluate_exact_match(predictions, targets):
+#     acc = 0
+#     for prediction, target in zip(predictions, targets):
+#         if prediction.strip() == target.strip():
+#             acc += 1
+#     acc = round(acc / len(predictions) * 100, 2)
+#     return acc
\ No newline at end of file
diff --git a/EasyR1-new/verl/ProtT3/help_funcs.py b/EasyR1-new/verl/ProtT3/help_funcs.py
new file mode 100644
index 0000000000000000000000000000000000000000..b57a64f8045e2824c90da07a601b826224d2394b
--- /dev/null
+++ b/EasyR1-new/verl/ProtT3/help_funcs.py
@@ -0,0 +1,112 @@
+import torch
+from nltk.translate.bleu_score import corpus_bleu
+from nltk.translate.meteor_score import meteor_score
+from rouge_score import rouge_scorer
+from tqdm import tqdm
+import numpy as np
+
+
+def caption_evaluate(predictions, targets, tokenizer, text_trunc_length):
+    targets = [t.strip() for t in targets]
+    meteor_scores = []
+    references = []
+    hypotheses = []
+    for gt, out in tqdm(zip(targets, predictions)):
+        gt_tokens = tokenizer.tokenize(gt, truncation=True, max_length=text_trunc_length,
+                                            padding='max_length')
+        ## added for galactica
+        gt_tokens = list(filter(('<pad>').__ne__, gt_tokens))
+        gt_tokens = list(filter(('[PAD]').__ne__, gt_tokens))
+        gt_tokens = list(filter(('[CLS]').__ne__, gt_tokens))
+        gt_tokens = list(filter(('[SEP]').__ne__, gt_tokens))
+
+        out_tokens = tokenizer.tokenize(out, truncation=True, max_length=text_trunc_length,
+                                            padding='max_length')
+        out_tokens = list(filter(('<pad>').__ne__, out_tokens))
+        gt_tokens = list(filter(('[PAD]').__ne__, gt_tokens))
+        out_tokens = list(filter(('[CLS]').__ne__, out_tokens))
+        out_tokens = list(filter(('[SEP]').__ne__, out_tokens))
+
+        references.append([gt_tokens])
+        hypotheses.append(out_tokens)
+
+        mscore = meteor_score([gt_tokens], out_tokens)
+        meteor_scores.append(mscore)
+
+    bleu2 = corpus_bleu(references, hypotheses, weights=(.5,.5))
+    bleu4 = corpus_bleu(references, hypotheses, weights=(.25,.25,.25,.25))
+    bleu2 *= 100
+    bleu4 *= 100
+
+    print('BLEU-2 score:', bleu2)
+    print('BLEU-4 score:', bleu4)
+    _meteor_score = np.mean(meteor_scores)
+    _meteor_score *= 100
+    print('Average Meteor score:', _meteor_score)
+
+    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'])
+
+    rouge_scores = []
+
+    references = []
+    hypotheses = []
+
+    for gt, out in tqdm(zip(targets, predictions)):
+        rs = scorer.score(out, gt)
+        rouge_scores.append(rs)
+
+    print('ROUGE score:')
+    rouge_1 = np.mean([rs['rouge1'].fmeasure for rs in rouge_scores]) * 100
+    rouge_2 = np.mean([rs['rouge2'].fmeasure for rs in rouge_scores]) * 100
+    rouge_l = np.mean([rs['rougeL'].fmeasure for rs in rouge_scores]) * 100
+    print('rouge1:', rouge_1)
+    print('rouge2:', rouge_2)
+    print('rougeL:', rouge_l)
+    return bleu2, bleu4, rouge_1, rouge_2, rouge_l, _meteor_score
+
+
+class AttrDict(dict):
+    def __init__(self, *args, **kwargs):
+        super(AttrDict, self).__init__(*args, **kwargs)
+        self.__dict__ = self
+
+
+def pad_and_concat(tensor_list, fill_value=0):
+    '''
+    concat the first dimension and pad the second dimension
+    tensor_list: [[B (diff), N_num, *], ...]
+    '''
+    device = tensor_list[0].device
+    dtype=tensor_list[0].dtype
+    max_dim1 = max(t.shape[1] for t in tensor_list)
+    sum_dim0 = sum(t.shape[0] for t in tensor_list)
+    if len(tensor_list[0].shape) == 3:
+        out = torch.full((sum_dim0, max_dim1, tensor_list[0].shape[-1]), fill_value=fill_value, device=device, dtype=dtype)
+        i = 0
+        for t in tensor_list:
+            out[i:i+t.shape[0], :t.shape[1]] = t
+            i += t.shape[0]
+        return out
+    elif len(tensor_list[0].shape) == 2:
+        out = torch.full((sum_dim0, max_dim1), fill_value=fill_value, device=device, dtype=dtype)
+        i = 0
+        for t in tensor_list:
+            out[i:i+t.shape[0], :t.shape[1]] = t
+            i += t.shape[0]
+        return out
+    raise NotImplementedError()
+
+
+def hf_enable_gradient_checkpointing(hf_model):
+    if hasattr(hf_model, "enable_input_require_grads"):
+        hf_model.enable_input_require_grads()
+    else:
+
+        def make_inputs_require_grad(module, input, output):
+            output.requires_grad_(True)
+
+        hf_model.get_input_embeddings().register_forward_hook(make_inputs_require_grad)
+
+    # enable gradient checkpointing for memory efficiency
+    hf_model.gradient_checkpointing_enable()
+    return hf_model
\ No newline at end of file
diff --git a/EasyR1-new/verl/ProtT3/opt_flash_attention.py b/EasyR1-new/verl/ProtT3/opt_flash_attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..9129a4a88c2ecc6afc2811e9b203248002a14232
--- /dev/null
+++ b/EasyR1-new/verl/ProtT3/opt_flash_attention.py
@@ -0,0 +1,348 @@
+from typing import List, Optional, Tuple
+import logging
+
+import torch
+from torch import nn
+
+import transformers
+from einops import rearrange
+
+from flash_attn.flash_attn_interface import flash_attn_varlen_qkvpacked_func
+from flash_attn.bert_padding import unpad_input, pad_input
+# from transformers.models.opt.modeling_opt import _make_causal_mask, _expand_mask
+from transformers.modeling_attn_mask_utils import AttentionMaskConverter
+
+# 使用类方法调用
+_make_causal_mask = AttentionMaskConverter._make_causal_mask
+_expand_mask = AttentionMaskConverter._expand_mask
+
+
+def _prepare_decoder_attention_mask_original(self, attention_mask, input_shape, inputs_embeds, past_key_values_length):
+    # create causal mask
+    # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
+    combined_attention_mask = None
+    if input_shape[-1] > 1:
+        combined_attention_mask = _make_causal_mask(
+            input_shape,
+            inputs_embeds.dtype,
+            device=inputs_embeds.device,
+            past_key_values_length=past_key_values_length,
+        )
+
+    if attention_mask is not None:
+        # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
+        expanded_attn_mask = _expand_mask(attention_mask, inputs_embeds.dtype, tgt_len=input_shape[-1]).to(
+            inputs_embeds.device
+        )
+        combined_attention_mask = (
+            expanded_attn_mask if combined_attention_mask is None else expanded_attn_mask + combined_attention_mask
+        )
+
+    return combined_attention_mask
+
+# def _shape(tensor: torch.Tensor, seq_len: int, bsz: int) -> torch.Tensor:
+#     return tensor.view(bsz, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()
+
+def forward_original(
+    self,
+    hidden_states: torch.Tensor,
+    position_ids = None,
+    key_value_states: Optional[torch.Tensor] = None,
+    past_key_value: Optional[Tuple[torch.Tensor]] = None,
+    attention_mask: Optional[torch.Tensor] = None,
+    layer_head_mask: Optional[torch.Tensor] = None,
+    output_attentions: bool = False,
+    cache_position=None,  # 👈 加上这个参数
+    **kwargs              # 👈 建议保留以避免未来不兼容
+) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
+    """Input shape: Batch x Time x Channel"""
+    # if key_value_states are provided this layer is used as a cross-attention layer
+    # for the decoder
+    is_cross_attention = key_value_states is not None
+
+    bsz, tgt_len, _ = hidden_states.size()
+
+    # get query proj
+    query_states = self.q_proj(hidden_states) * self.scaling
+    # get key, value proj
+    if is_cross_attention and past_key_value is not None:
+        # reuse k,v, cross_attentions
+        key_states = past_key_value[0]
+        value_states = past_key_value[1]
+    elif is_cross_attention:
+        # cross_attentions
+        key_states = self._shape(self.k_proj(key_value_states), -1, bsz)
+        value_states = self._shape(self.v_proj(key_value_states), -1, bsz)
+    elif past_key_value is not None:
+        # reuse k, v, self_attention
+        key_states = self._shape(self.k_proj(hidden_states), -1, bsz)
+        value_states = self._shape(self.v_proj(hidden_states), -1, bsz)
+        key_states = torch.cat([past_key_value[0], key_states], dim=2)
+        value_states = torch.cat([past_key_value[1], value_states], dim=2)
+    else:
+        # self_attention
+        key_states = self._shape(self.k_proj(hidden_states), -1, bsz)
+        value_states = self._shape(self.v_proj(hidden_states), -1, bsz)
+
+    # if self.is_decoder:
+    #     # if cross_attention save Tuple(torch.Tensor, torch.Tensor) of all cross attention key/value_states.
+    #     # Further calls to cross_attention layer can then reuse all cross-attention
+    #     # key/value_states (first "if" case)
+    #     # if uni-directional self-attention (decoder) save Tuple(torch.Tensor, torch.Tensor) of
+    #     # all previous decoder key/value_states. Further calls to uni-directional self-attention
+    #     # can concat previous decoder key/value_states to current projected key/value_states (third "elif" case)
+    #     # if encoder bi-directional self-attention `past_key_value` is always `None`
+    #     past_key_value = (key_states, value_states)
+    past_key_value = (key_states, value_states)
+
+    proj_shape = (bsz * self.num_heads, -1, self.head_dim)
+    query_states = self._shape(query_states, tgt_len, bsz).view(*proj_shape)
+    key_states = key_states.view(*proj_shape)
+    value_states = value_states.view(*proj_shape)
+
+    src_len = key_states.size(1)
+    attn_weights = torch.bmm(query_states, key_states.transpose(1, 2))
+
+    if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
+        raise ValueError(
+            f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is"
+            f" {attn_weights.size()}"
+        )
+
+    if attention_mask is not None:
+        if attention_mask.size() != (bsz, 1, tgt_len, src_len):
+            raise ValueError(
+                f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is {attention_mask.size()}"
+            )
+        attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + attention_mask
+        attn_weights = torch.max(
+            attn_weights, torch.tensor(torch.finfo(attn_weights.dtype).min, device=attn_weights.device)
+        )
+        attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
+
+    # upcast to fp32 if the weights are in fp16. Please see https://github.com/huggingface/transformers/pull/17437
+    if attn_weights.dtype == torch.float16:
+        attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(torch.float16)
+    else:
+        attn_weights = nn.functional.softmax(attn_weights, dim=-1)
+
+    if layer_head_mask is not None:
+        if layer_head_mask.size() != (self.num_heads,):
+            raise ValueError(
+                f"Head mask for a single layer should be of size {(self.num_heads,)}, but is"
+                f" {layer_head_mask.size()}"
+            )
+        attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
+        attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
+
+    if output_attentions:
+        # this operation is a bit awkward, but it's required to
+        # make sure that attn_weights keeps its gradient.
+        # In order to do so, attn_weights have to be reshaped
+        # twice and have to be reused in the following
+        attn_weights_reshaped = attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
+        attn_weights = attn_weights_reshaped.view(bsz * self.num_heads, tgt_len, src_len)
+    else:
+        attn_weights_reshaped = None
+
+    attn_probs = nn.functional.dropout(attn_weights, p=self.dropout, training=self.training)
+
+    attn_output = torch.bmm(attn_probs, value_states)
+
+    if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):
+        raise ValueError(
+            f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is"
+            f" {attn_output.size()}"
+        )
+
+    attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim)
+    attn_output = attn_output.transpose(1, 2)
+
+    # Use the `embed_dim` from the config (stored in the class) rather than `hidden_state` because `attn_output` can be
+    # partitioned aross GPUs when using tensor-parallelism.
+    attn_output = attn_output.reshape(bsz, tgt_len, self.embed_dim)
+
+    attn_output = self.out_proj(attn_output)
+
+    return attn_output, attn_weights_reshaped, past_key_value
+
+
+def forward(
+    self,
+    hidden_states: torch.Tensor,
+    key_value_states: Optional[torch.Tensor] = None,
+    past_key_value: Optional[Tuple[torch.Tensor]] = None,
+    attention_mask: Optional[torch.Tensor] = None,
+    layer_head_mask: Optional[torch.Tensor] = None,
+    output_attentions: bool = False,
+    position_ids = None,
+    cache_position=None,  # 👈 加上这个参数
+    **kwargs              # 👈 建议保留以避免未来不兼容
+) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
+    """Input shape: Batch x Time x Channel"""
+
+    # if key_value_states are provided this layer is used as a cross-attention layer
+    # for the decoder
+    is_cross_attention = key_value_states is not None
+    assert not is_cross_attention, "Cross attention is not supported for flash attention"
+    assert past_key_value is None, "past_key_value is not None is not supported for flash attention"
+    assert not output_attentions, "output_attentions is not supported for flash attention"
+
+    bsz, tgt_len, _ = hidden_states.size()
+
+    # get query proj
+    query_states = self.q_proj(hidden_states) * self.scaling
+    # get key, value proj
+    
+    if past_key_value is not None:
+        # reuse k, v, self_attention
+        key_states = self._shape(self.k_proj(hidden_states), -1, bsz)
+        value_states = self._shape(self.v_proj(hidden_states), -1, bsz)
+        key_states = torch.cat([past_key_value[0], key_states], dim=2)
+        value_states = torch.cat([past_key_value[1], value_states], dim=2)
+    else:
+        # self_attention
+        key_states = self._shape(self.k_proj(hidden_states), -1, bsz)
+        value_states = self._shape(self.v_proj(hidden_states), -1, bsz)
+
+    # if self.is_decoder:
+    #     # if cross_attention save Tuple(torch.Tensor, torch.Tensor) of all cross attention key/value_states.
+    #     # Further calls to cross_attention layer can then reuse all cross-attention
+    #     # key/value_states (first "if" case)
+    #     # if uni-directional self-attention (decoder) save Tuple(torch.Tensor, torch.Tensor) of
+    #     # all previous decoder key/value_states. Further calls to uni-directional self-attention
+    #     # can concat previous decoder key/value_states to current projected key/value_states (third "elif" case)
+    #     # if encoder bi-directional self-attention `past_key_value` is always `None`
+    #     past_key_value = (key_states, value_states)
+    past_key_value = (key_states, value_states)
+
+
+    proj_shape = (bsz * self.num_heads, -1, self.head_dim)
+    query_states = self._shape(query_states, tgt_len, bsz).view(*proj_shape)
+    key_states = key_states.view(*proj_shape)
+    value_states = value_states.view(*proj_shape)
+
+    ## for flash attention
+    flash_shape = (bsz, self.num_heads, tgt_len, self.head_dim)
+    query_states = query_states.view(*flash_shape)
+    key_states = key_states.view(*flash_shape)
+    value_states = value_states.view(*flash_shape)
+    qkv = torch.stack([query_states, key_states, value_states], dim=2) # shape = [bsz, num_heads, 3, tgt_len, head_dim]
+    qkv = qkv.transpose(1, 3)  # [bsz, tgt_len, 3, num_heads, head_dim]
+
+    key_padding_mask = attention_mask
+
+
+    assert key_padding_mask is not None
+    x = rearrange(qkv, "b s three h d -> b s (three h d)")
+    x_unpad, indices, cu_seqlens, max_s = unpad_input(x, key_padding_mask)
+    x_unpad = rearrange(x_unpad, 'nnz (three h d) -> nnz three h d', three=3, h=self.num_heads)
+    output_unpad = flash_attn_varlen_qkvpacked_func(
+        x_unpad, cu_seqlens, max_s, self.dropout if self.training else 0.0,
+        softmax_scale=1, causal=True, return_attn_probs=False
+    )
+
+    output = rearrange(pad_input(rearrange(output_unpad, 'nnz h d -> nnz (h d)'),
+                                indices, bsz, tgt_len),
+                    'b s (h d) -> b s h d', h=self.num_heads)
+
+    attn_output = self.out_proj(rearrange(output, "b s h d -> b s (h d)"))
+    return attn_output, None, past_key_value
+
+
+# Disable the transformation of the attention mask in LlamaModel as the flash attention
+# requires the attention mask to be the same as the key_padding_mask
+def _prepare_decoder_attention_mask(
+    self, attention_mask, input_shape, inputs_embeds, past_key_values_length
+):
+    # [bsz, seq_len]
+    return attention_mask
+
+
+def replace_opt_attn_with_flash_attn():
+    cuda_major, cuda_minor = torch.cuda.get_device_capability()
+    if cuda_major < 8:
+        logging.warning(
+            "Flash attention is only supported on A100 or H100 GPU during training due to head dim > 64 backward."
+            "ref: https://github.com/HazyResearch/flash-attention/issues/190#issuecomment-1523359593"
+        )
+    transformers.models.opt.modeling_opt.OPTDecoder._prepare_decoder_attention_mask = _prepare_decoder_attention_mask
+    transformers.models.opt.modeling_opt.OPTAttention.forward = forward
+
+def replace_opt_attn_with_original_attn():
+    transformers.models.opt.modeling_opt.OPTDecoder._prepare_decoder_attention_mask = _prepare_decoder_attention_mask_original
+    transformers.models.opt.modeling_opt.OPTAttention.forward = forward_original
+
+if __name__ == '__main__':
+    ## generate tests to verify the equivalence between forward_original and forward
+    import torch.nn as nn
+    import math
+    class FakeNN(nn.Module):
+        def __init__(self, ):
+            super().__init__()
+            self.scaling = 1 / math.sqrt(2048)
+            if False:
+                self.q_proj = nn.Linear(2048, 2048)
+                self.k_proj = nn.Linear(2048, 2048)
+                self.v_proj = nn.Linear(2048, 2048)
+                self.out_proj = nn.Linear(2048, 2048)
+            else:
+                self.q_proj = nn.Identity()
+                self.k_proj = nn.Identity()
+                self.v_proj = nn.Identity()
+                self.out_proj = nn.Identity()
+
+            self.is_decoder = True
+            self.num_heads = 2
+            self.head_dim = 128
+            self.embed_dim = 256
+            self.dropout = 0
+
+        def _prepare_decoder_attention_mask(self, attention_mask, input_shape, inputs_embeds, past_key_values_length):
+            # create causal mask
+            # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
+            combined_attention_mask = None
+            if input_shape[-1] > 1:
+                combined_attention_mask = _make_causal_mask(
+                    input_shape,
+                    inputs_embeds.dtype,
+                    device=inputs_embeds.device,
+                    past_key_values_length=past_key_values_length,
+                )
+
+            if attention_mask is not None:
+                # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
+                expanded_attn_mask = _expand_mask(attention_mask, inputs_embeds.dtype, tgt_len=input_shape[-1]).to(
+                    inputs_embeds.device
+                )
+                combined_attention_mask = (
+                    expanded_attn_mask if combined_attention_mask is None else expanded_attn_mask + combined_attention_mask
+                )
+
+            return combined_attention_mask
+        
+        def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
+            return tensor.view(bsz, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()
+        
+    fakenn = FakeNN().to(torch.bfloat16).to('cuda:0')
+
+    t_len = 3
+    fake_input = torch.randn(2, t_len, fakenn.embed_dim).to(torch.bfloat16).to('cuda:0')
+    if False:
+        fake_lens = torch.randint(0, t_len, (2,)).to('cuda:0')
+        fake_lens = torch.LongTensor([3, 2]).to('cuda:0')
+        # fake_lens = torch.ones((2,)).to('cuda:0') * 3
+        fake_mask = torch.arange(t_len).unsqueeze(0).to('cuda:0') < fake_lens.unsqueeze(1)
+    else:
+        fake_mask = torch.randint(0, t_len, (2, t_len)).bool().to('cuda:0')
+
+    fake_mask2 = fakenn._prepare_decoder_attention_mask(fake_mask, (2,t_len), fake_input, 0)
+    attn_output0, _, _ = forward_original(fakenn, fake_input, None, None, fake_mask2, None, False)
+    attn_output1, _, _ = forward(fakenn, fake_input, None, None, fake_mask, None, False) # shape = [2, 3, 256]
+    attn_output0 = attn_output0 * fake_mask.unsqueeze(-1)
+    
+    print(torch.isclose(attn_output0, attn_output1).all()) 
+    print(attn_output0.shape, attn_output1.shape)
+    difference = (attn_output0- attn_output1).abs()
+    print(difference)
+    print(difference.sum())
\ No newline at end of file
diff --git a/EasyR1-new/verl/__init__.py b/EasyR1-new/verl/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d1ca74ff322af7ea582be946a184e119681c71e2
--- /dev/null
+++ b/EasyR1-new/verl/__init__.py
@@ -0,0 +1,32 @@
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+from .utils.py_functional import is_package_available
+
+
+if is_package_available("modelscope"):
+    from modelscope.utils.hf_util import patch_hub  # type: ignore
+
+
+__version__ = "0.3.2.dev0"
+
+
+if os.getenv("USE_MODELSCOPE_HUB", "0").lower() in ["true", "y", "1"]:
+    # Patch hub to download models from modelscope to speed up.
+    if not is_package_available("modelscope"):
+        raise ImportError("You are using the modelscope hub, please install modelscope by `pip install modelscope`.")
+
+    patch_hub()
diff --git a/EasyR1-new/verl/protocol.py b/EasyR1-new/verl/protocol.py
new file mode 100644
index 0000000000000000000000000000000000000000..7cce4344a3ca8493c8eb75ba3be53a5806e2c83c
--- /dev/null
+++ b/EasyR1-new/verl/protocol.py
@@ -0,0 +1,741 @@
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Implement base data transfer protocol between any two functions, modules.
+We can subclass Protocol to define more detailed batch info with specific keys
+"""
+
+import copy
+import io
+import pickle
+from collections import defaultdict
+from dataclasses import dataclass, field
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+
+import numpy as np
+import ray
+import torch
+from numpy.typing import NDArray
+from tensordict import TensorDict
+from torch.distributed import ProcessGroup
+from torch.utils.data import DataLoader
+
+from .utils.py_functional import union_two_dict
+
+
+try:
+    import tensordict
+
+    tensordict.set_lazy_legacy(False).set()
+except Exception:
+    pass
+
+
+__all__ = ["DataProto", "union_tensor_dict"]
+
+
+def pad_dataproto_to_divisor(data: "DataProto", size_divisor: int) -> Tuple["DataProto", int]:
+    """Pad a DataProto to size divisible by size_divisor
+
+    Args:
+        data (DataProto): the unpadded DataProto
+        size_divisor (int): size divisor
+
+    Returns:
+        data (DataProto): the padded DataProto
+        pad_size (int)
+    """
+    assert isinstance(data, DataProto), "data must be a DataProto"
+    if len(data) % size_divisor != 0:
+        pad_size = size_divisor - len(data) % size_divisor
+        padding_protos = []
+        remaining_pad = pad_size
+        while remaining_pad > 0:
+            take_size = min(remaining_pad, len(data))
+            padding_protos.append(data[:take_size])
+            remaining_pad -= take_size
+
+        data_padded = DataProto.concat([data] + padding_protos)
+    else:
+        pad_size = 0
+        data_padded = data
+
+    return data_padded, pad_size
+
+
+def unpad_dataproto(data: "DataProto", pad_size: int) -> "DataProto":
+    if pad_size != 0:
+        data = data[:-pad_size]
+
+    return data
+
+
+def union_tensor_dict(tensor_dict1: TensorDict, tensor_dict2: TensorDict) -> TensorDict:
+    """Union two tensordicts."""
+    if tensor_dict1.batch_size != tensor_dict2.batch_size:
+        raise ValueError(
+            f"Two tensor dict must have identical batch size. Got {tensor_dict1.batch_size} and {tensor_dict2.batch_size}"
+        )
+
+    for key in tensor_dict2.keys():
+        if key in tensor_dict1 and not torch.equal(tensor_dict1[key], tensor_dict2[key]):
+            raise ValueError(f"Key already exists: {key}.")
+
+        tensor_dict1[key] = tensor_dict2[key]
+
+    return tensor_dict1
+
+
+def union_numpy_dict(tensor_dict1: Dict[str, NDArray], tensor_dict2: Dict[str, NDArray]) -> Dict[str, NDArray]:
+    for key in tensor_dict2.keys():
+        if key in tensor_dict1:
+            assert isinstance(tensor_dict2[key], np.ndarray)
+            assert isinstance(tensor_dict1[key], np.ndarray)
+            if not np.all(tensor_dict1[key] == tensor_dict2[key]):
+                raise ValueError(f"Key already exists: {key}.")
+
+        tensor_dict1[key] = tensor_dict2[key]
+
+    return tensor_dict1
+
+
+def batch_collate(features: List[Dict[str, Any]]) -> Dict[str, List[Any]]:
+    if len(features) == 0:
+        return {}
+
+    batch_features = defaultdict(list)
+    for feature in features:
+        for key, value in feature.items():
+            batch_features[key].append(value)
+
+    return batch_features
+
+
+def fold_batch_dim(data: "DataProto", new_batch_size: int):
+    """
+    Fold a batch dim from [bsz, xxx] into [new_bsz, bsz // new_bsz, xxx]
+    """
+    batch_size = data.batch.batch_size[0]
+
+    assert batch_size % new_batch_size == 0
+
+    tensor: TensorDict = data.batch
+    non_tensor = data.non_tensor_batch
+
+    tensor = tensor.view(new_batch_size, -1)
+    tensor.auto_batch_size_(batch_dims=1)
+
+    for key, value in non_tensor.items():
+        non_tensor[key] = np.reshape(value, newshape=(new_batch_size, -1, *value.shape[1:]))
+
+    return DataProto(batch=tensor, non_tensor_batch=non_tensor, meta_info=data.meta_info)
+
+
+def collate_fn(data_items: list["DataProtoItem"]):
+    batch = []
+    non_tensor_batch = []
+    for data in data_items:
+        batch.append(data.batch)
+        non_tensor_batch.append(data.non_tensor_batch)
+
+    batch = torch.stack(batch).contiguous()
+    non_tensor_batch = batch_collate(non_tensor_batch)
+    non_tensor_batch = {key: np.array(value, dtype=object) for key, value in non_tensor_batch.items()}
+    return DataProto(batch=batch, non_tensor_batch=non_tensor_batch)
+
+
+@dataclass
+class DataProtoItem:
+    batch: Optional[TensorDict] = None
+    non_tensor_batch: Dict[str, NDArray] = field(default_factory=dict)
+    meta_info: Dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class DataProto:
+    """
+    A DataProto is a data structure that aims to provide a standard protocol for data exchange between functions.
+    It contains a batch (TensorDict) and a meta_info (Dict). The batch is a TensorDict https://pytorch.org/tensordict/.
+    TensorDict allows you to manipulate a dictionary of Tensors like a single Tensor. Ideally, the tensors with the
+    same batch size should be put inside batch.
+    """
+
+    batch: Optional[TensorDict] = None
+    non_tensor_batch: Dict[str, NDArray] = field(default_factory=dict)
+    meta_info: Dict[str, Any] = field(default_factory=dict)
+
+    def __post_init__(self):
+        self.check_consistency()  # perform necessary checking
+
+    def __len__(self) -> int:
+        if self.batch is not None:
+            return self.batch.batch_size[0]
+        elif self.non_tensor_batch is not None and len(self.non_tensor_batch) > 0:
+            pivot_key = list(self.non_tensor_batch.keys())[0]
+            return self.non_tensor_batch[pivot_key].shape[0]
+        else:
+            return 0
+
+    def __getitem__(
+        self, item: Union[int, slice, List[int], np.ndarray, torch.Tensor]
+    ) -> Union["DataProto", "DataProtoItem"]:
+        if isinstance(item, slice):
+            return self.slice_select(item.start, item.stop, item.step)
+
+        if isinstance(item, (list, np.ndarray, torch.Tensor)):
+            return self.index_select(item)
+
+        tensor_data = self.batch[item] if self.batch is not None else None
+        non_tensor_data = {key: value[item] for key, value in self.non_tensor_batch.items()}
+        return DataProtoItem(batch=tensor_data, non_tensor_batch=non_tensor_data, meta_info=self.meta_info)
+
+    def __getstate__(self) -> Tuple[bytes, Dict[str, NDArray], Dict[str, Any]]:
+        buffer = io.BytesIO()
+        if self.batch is not None:
+            self.batch: TensorDict = self.batch.contiguous()
+            self.batch: TensorDict = self.batch.consolidate()
+
+        torch.save(self.batch, buffer)
+        buffer_bytes = buffer.getvalue()
+        return buffer_bytes, self.non_tensor_batch, self.meta_info
+
+    def __setstate__(self, data: Tuple[bytes, Dict[str, NDArray], Dict[str, Any]]) -> None:
+        batch_deserialized_bytes, non_tensor_batch, meta_info = data
+        batch_deserialized = io.BytesIO(batch_deserialized_bytes)
+        batch = torch.load(batch_deserialized, weights_only=False, map_location="cpu")
+        self.batch = batch
+        self.non_tensor_batch = non_tensor_batch
+        self.meta_info = meta_info
+
+    def save_to_disk(self, filepath: str) -> None:
+        with open(filepath, "wb") as f:
+            pickle.dump(self, f)
+
+    @staticmethod
+    def load_from_disk(filepath: str) -> "DataProto":
+        with open(filepath, "rb") as f:
+            data = pickle.load(f)
+            return data
+
+    def print_size(self, prefix: str = "") -> None:
+        size_of_tensordict = 0
+        if self.batch is not None:
+            for tensor in self.batch.values():
+                if isinstance(tensor, torch.Tensor):
+                    size_of_tensordict += tensor.element_size() * tensor.numel()
+
+        size_of_numpy_array = 0
+        for value in self.non_tensor_batch.values():
+            size_of_numpy_array += value.nbytes
+
+        size_of_numpy_array /= 1024**3
+        size_of_tensordict /= 1024**3
+
+        message = f"Size of tensordict: {size_of_tensordict} GB, size of non_tensor_batch: {size_of_numpy_array} GB."
+        print({prefix}, {message})
+
+    def check_consistency(self):
+        """Check the consistency of the DataProto. Mainly for batch and non_tensor_batch
+        We expose this function as a public one so that user can call themselves directly
+        """
+        if self.batch is not None:
+            assert len(self.batch.batch_size) == 1, "only support num_batch_dims=1"
+
+        if self.batch is not None and len(self.non_tensor_batch) != 0:
+            # TODO: we can actually lift this restriction if needed
+            assert len(self.batch.batch_size) == 1, "only support num_batch_dims=1 when non_tensor_batch is not empty."
+
+            batch_size = self.batch.batch_size[0]
+            for key, value in self.non_tensor_batch.items():
+                assert len(value) == batch_size, f"key {key} length {len(value)} is not equal to bsz {batch_size}."
+
+    @classmethod
+    def from_single_dict(
+        cls,
+        data: Dict[str, Union[torch.Tensor, NDArray]],
+        meta_info: Optional[Dict[str, Any]] = None,
+    ) -> "DataProto":
+        tensors, non_tensors = {}, {}
+        for key, value in data.items():
+            print("key")
+            print(key)
+            print("value")
+            print(value)
+            if isinstance(value, torch.Tensor):
+                tensors[key] = value
+            elif isinstance(value, np.ndarray):
+                non_tensors[key] = value
+            else:
+                raise ValueError(f"Unsupported type in data {type(value)}")
+
+        return DataProto.from_dict(tensors=tensors, non_tensors=non_tensors, meta_info=meta_info)
+
+    @classmethod
+    def from_dict(
+        cls,
+        tensors: Optional[Dict[str, torch.Tensor]] = None,
+        non_tensors: Optional[Dict[str, NDArray]] = None,
+        meta_info: Optional[Dict[str, Any]] = None,
+        num_batch_dims: int = 1,
+    ) -> "DataProto":
+        """Create a DataProto from a dict of tensors. This assumes that
+        1. All the tensor in tensors have the same dim0
+        2. Only dim0 is the batch dim
+        """
+        assert num_batch_dims > 0, "num_batch_dims must be greater than zero"
+        if non_tensors is not None:
+            assert num_batch_dims == 1, "only support num_batch_dims=1 when non_tensors is not None."
+
+        tensors = tensors or {}
+        non_tensors = non_tensors or {}
+        meta_info = meta_info or {}
+        assert isinstance(tensors, dict) and isinstance(non_tensors, dict) and isinstance(meta_info, dict)
+
+        # get and check batch size
+        batch_size = None
+        pivot_key = None
+        for key, tensor in tensors.items():
+            if batch_size is None:
+                batch_size = tensor.shape[:num_batch_dims]
+                pivot_key = key
+            else:
+                current_batch = tensor.shape[:num_batch_dims]
+                assert batch_size == current_batch, (
+                    f"Not all the tensor in tensors have the same batch size with batch_dims={num_batch_dims}. "
+                    f"Got {pivot_key} has {batch_size}, {key} has {current_batch}"
+                )
+
+        for key, value in non_tensors.items():
+            if not isinstance(value, np.ndarray) or value.dtype != np.dtype(object):
+                non_tensors[key] = np.array(value, dtype=object)
+
+        tensor_dict = TensorDict(source=tensors, batch_size=batch_size) if tensors else None
+        return cls(batch=tensor_dict, non_tensor_batch=non_tensors, meta_info=meta_info)
+
+    def to(self, device: torch.device) -> "DataProto":
+        """move the batch to device
+
+        Args:
+            device (torch.device, str): torch device
+
+        Returns:
+            DataProto: the current DataProto
+
+        """
+        if self.batch is not None:
+            self.batch = self.batch.to(device)
+
+        return self
+
+    def select(
+        self,
+        batch_keys: Optional[List[str]] = None,
+        non_tensor_batch_keys: Optional[List[str]] = None,
+        meta_info_keys: Optional[List[str]] = None,
+        deepcopy: bool = False,
+    ) -> "DataProto":
+        """Select a subset of the DataProto via batch_keys and meta_info_keys
+
+        Args:
+            batch_keys (list, optional): a list of strings indicating the keys in batch to select
+            meta_info_keys (list, optional): a list of keys indicating the meta info to select
+
+        Returns:
+            DataProto: the DataProto with the selected batch_keys and meta_info_keys
+        """
+        # TODO (zhangchi.usc1992) whether to copy
+        if batch_keys is not None:
+            batch_keys = tuple(filter(lambda k: k in self.batch, batch_keys))
+            sub_batch = self.batch.select(*batch_keys)
+        else:
+            sub_batch = self.batch
+
+        if non_tensor_batch_keys is not None:
+            # we must convert it to tuple to avoid the missing elements
+            non_tensor_batch_keys = tuple(filter(lambda k: k in self.non_tensor_batch, non_tensor_batch_keys))
+            non_tensor_batch = {k: v for k, v in self.non_tensor_batch.items() if k in non_tensor_batch_keys}
+        else:
+            non_tensor_batch = self.non_tensor_batch
+
+        if deepcopy:
+            non_tensor_batch = copy.deepcopy(non_tensor_batch)
+
+        if meta_info_keys is not None:
+            meta_info_keys = tuple(filter(lambda k: k in self.meta_info, meta_info_keys))
+            sub_meta_info = {k: v for k, v in self.meta_info.items() if k in meta_info_keys}
+        else:
+            sub_meta_info = self.meta_info
+
+        if deepcopy:
+            sub_meta_info = copy.deepcopy(sub_meta_info)
+
+        return DataProto(batch=sub_batch, non_tensor_batch=non_tensor_batch, meta_info=sub_meta_info)
+
+    def index_select(self, index: Union[List[int], NDArray, torch.Tensor]) -> "DataProto":
+        """Select a subset of the DataProto via index.
+
+        Args:
+            index (list, ndarray, torch.Tensor): a list of indices to select.
+
+        Returns:
+            DataProto: the DataProto containing the selected indices.
+        """
+        if isinstance(index, list):
+            index = np.array(index, dtype=bool if isinstance(index[0], bool) else np.int32)
+        elif isinstance(index, torch.Tensor):
+            index = index.detach().cpu().numpy()
+
+        tensor_data = self.batch[index] if self.batch is not None else None
+        non_tensor_data = {key: value[index] for key, value in self.non_tensor_batch.items()}
+        return DataProto(batch=tensor_data, non_tensor_batch=non_tensor_data, meta_info=self.meta_info)
+
+    def slice_select(
+        self, start: Optional[int] = None, end: Optional[int] = None, step: Optional[int] = None
+    ) -> "DataProto":
+        """Select a subset of the DataProto via slice.
+
+        Args:
+            start (int, optional): the start index of the slice.
+            end (int, optional): the end index of the slice.
+            step (int, optional): the step of the slice.
+
+        Returns:
+            DataProto: the DataProto containing the selected slice.
+        """
+        index = slice(start, end, step)
+        tensor_data = self.batch[index] if self.batch is not None else None
+        non_tensor_data = {key: value[index] for key, value in self.non_tensor_batch.items()}
+        return DataProto(batch=tensor_data, non_tensor_batch=non_tensor_data, meta_info=self.meta_info)
+
+    def pop(
+        self,
+        batch_keys: Optional[List[str]] = None,
+        non_tensor_batch_keys: Optional[List[str]] = None,
+        meta_info_keys: Optional[List[str]] = None,
+    ) -> "DataProto":
+        """Pop a subset of the DataProto via `batch_keys` and `meta_info_keys`
+
+        Args:
+            batch_keys (list, optional): a list of strings indicating the keys in batch to pop
+            meta_info_keys (list, optional): a list of keys indicating the meta info to pop
+
+        Returns:
+            DataProto: the DataProto with the poped batch_keys and meta_info_keys
+        """
+        assert batch_keys is not None
+        non_tensor_batch_keys = non_tensor_batch_keys or []
+        meta_info_keys = meta_info_keys or []
+
+        tensors = {}
+        for key in filter(lambda k: k in self.batch, batch_keys):
+            tensors[key] = self.batch.pop(key)
+
+        non_tensors = {}
+        for key in filter(lambda k: k in self.non_tensor_batch, non_tensor_batch_keys):
+            non_tensors[key] = self.non_tensor_batch.pop(key)
+
+        meta_info = {}
+        for key in filter(lambda k: k in self.meta_info, meta_info_keys):
+            meta_info[key] = self.meta_info.pop(key)
+
+        return DataProto.from_dict(tensors=tensors, non_tensors=non_tensors, meta_info=meta_info)
+
+    def rename(
+        self, old_keys: Optional[Union[str, List[str]]] = None, new_keys: Optional[Union[str, List[str]]] = None
+    ) -> "DataProto":
+        """
+        Note that this function only rename the key in the batch
+        """
+
+        def validate_input(keys):
+            if keys is not None:
+                if isinstance(keys, str):
+                    keys = [keys]
+                elif isinstance(keys, list):
+                    pass
+                else:
+                    raise TypeError(f"keys must be a list or a string, but got {type(keys)}")
+            return keys
+
+        old_keys = validate_input(old_keys)
+        new_keys = validate_input(new_keys)
+
+        if len(new_keys) != len(old_keys):
+            raise ValueError(
+                f"new_keys and old_keys must have the same length, but got {len(new_keys)} and {len(old_keys)}"
+            )
+
+        self.batch.rename_key_(tuple(old_keys), tuple(new_keys))
+
+        return self
+
+    def union(self, other: "DataProto") -> "DataProto":
+        """Union with another DataProto. Union batch and meta_info separately.
+        Throw an error if
+        - there are conflict keys in batch and they are not equal
+        - the batch size of two data batch is not the same
+        - there are conflict keys in meta_info and they are not the same.
+
+        Args:
+            other (DataProto): another DataProto to union
+
+        Returns:
+            DataProto: the DataProto after union
+        """
+        self.batch = union_tensor_dict(self.batch, other.batch)
+        self.non_tensor_batch = union_numpy_dict(self.non_tensor_batch, other.non_tensor_batch)
+        self.meta_info = union_two_dict(self.meta_info, other.meta_info)
+        return self
+
+    def make_iterator(
+        self, mini_batch_size: int, epochs: int, seed: int = None, dataloader_kwargs: Dict[str, Any] = None
+    ):
+        """Make an iterator from the DataProto. This is built upon that TensorDict can be used as a normal Pytorch
+        dataset. See https://pytorch.org/tensordict/tutorials/data_fashion for more details.
+
+        Args:
+            mini_batch_size (int): mini-batch size when iterating the dataset. We require that
+                ``batch.batch_size[0] % mini_batch_size == 0``
+            epochs (int): number of epochs when iterating the dataset.
+            dataloader_kwargs: internally, it returns a DataLoader over the batch.
+                The dataloader_kwargs is the kwargs passed to the DataLoader
+
+        Returns:
+            Iterator: an iterator that yields a mini-batch data at a time. The total number of iteration steps is
+            ``self.batch.batch_size * epochs // mini_batch_size``
+        """
+        assert self.batch.batch_size[0] % mini_batch_size == 0, f"{self.batch.batch_size[0]} % {mini_batch_size} != 0"
+        if seed is not None:
+            generator = torch.Generator()
+            generator.manual_seed(seed)
+        else:
+            generator = None
+
+        dataloader_kwargs = dataloader_kwargs or {}
+        assert isinstance(dataloader_kwargs, dict)
+        train_dataloader = DataLoader(
+            dataset=self,
+            batch_size=mini_batch_size,
+            collate_fn=collate_fn,
+            generator=generator,
+            **dataloader_kwargs,
+        )
+
+        def get_data():
+            for _ in range(epochs):
+                for data in train_dataloader:
+                    setattr(data, "meta_info", self.meta_info)
+                    yield data
+
+        return iter(get_data())
+
+    def chunk(self, chunks: int) -> List["DataProto"]:
+        """Split the batch among dim=0 into chunks. The meta_info is passed to each DataProto after split.
+
+        Args:
+            chunks (int): the number of chunks to split on dim=0
+
+        Returns:
+            List[DataProto]: a list of DataProto after splitting
+        """
+        assert len(self) % chunks == 0, (
+            f"only support equal chunk. Got size of DataProto {len(self)} and chunk {chunks}."
+        )
+        if self.batch is not None:
+            batch_lst = self.batch.chunk(chunks=chunks, dim=0)
+        else:
+            batch_lst = [None for _ in range(chunks)]
+
+        non_tensor_batch_lst = [{} for _ in range(chunks)]
+        for key, value in self.non_tensor_batch.items():
+            non_tensor_lst = np.array_split(value, chunks)
+            for i in range(chunks):
+                non_tensor_batch_lst[i][key] = non_tensor_lst[i]
+
+        return [
+            DataProto(batch=batch_lst[i], non_tensor_batch=non_tensor_batch_lst[i], meta_info=self.meta_info)
+            for i in range(chunks)
+        ]
+
+    def split(self, split_size: int) -> List["DataProto"]:
+        """Split the batch among dim=0 into chunks. The meta_info is passed to each DataProto after split.
+
+        Args:
+            split_size (int): the size of each split
+
+        Returns:
+            List[DataProto]: a list of DataProto after splitting
+        """
+        chunks = len(self) // split_size
+        return self.chunk(chunks)
+
+    @staticmethod
+    def concat(data: List["DataProto"]) -> "DataProto":
+        """Concat a list of DataProto. The batch is concatenated among dim=0.
+        The meta_info is assumed to be identical and will use the first one.
+
+        Args:
+            data (List[DataProto]): list of DataProto
+
+        Returns:
+            DataProto: concatenated DataProto
+        """
+        batch_lst = [batch.batch for batch in data]
+        new_batch = torch.cat(batch_lst, dim=0) if batch_lst[0] is not None else None
+        non_tensor_batch = batch_collate([d.non_tensor_batch for d in data])
+        for key, value in non_tensor_batch.items():
+            non_tensor_batch[key] = np.concatenate(value, axis=0)
+
+        return DataProto(batch=new_batch, non_tensor_batch=non_tensor_batch, meta_info=data[0].meta_info)
+
+    def reorder(self, indices: torch.Tensor) -> None:
+        """
+        Note that this operation is in-place
+        """
+        indices_np = indices.detach().numpy()
+        self.batch = self.batch[indices]
+        self.non_tensor_batch = {key: value[indices_np] for key, value in self.non_tensor_batch.items()}
+
+    def repeat(self, repeat_times: int, interleave: bool = True) -> "DataProto":
+        """
+        Repeat the batch data a specified number of times.
+
+        Args:
+            repeat_times (int): Number of times to repeat the data.
+            interleave (bool): Whether to interleave the repeated data.
+
+        Returns:
+            DataProto: A new DataProto with repeated data.
+        """
+        if self.batch is not None:
+            if interleave:  # interleave the data
+                repeated_tensors = {
+                    key: tensor.repeat_interleave(repeat_times, dim=0) for key, tensor in self.batch.items()
+                }
+            else:  # stack the data
+                repeated_tensors = {
+                    key: tensor.unsqueeze(0).expand(repeat_times, *tensor.shape).reshape(-1, *tensor.shape[1:])
+                    for key, tensor in self.batch.items()
+                }
+
+            repeated_batch = TensorDict(
+                source=repeated_tensors,
+                batch_size=(self.batch.batch_size[0] * repeat_times,),
+            )
+        else:
+            repeated_batch = None
+
+        repeated_non_tensor_batch = {}
+        for key, value in self.non_tensor_batch.items():
+            if interleave:
+                repeated_non_tensor_batch[key] = np.repeat(value, repeat_times, axis=0)
+            else:
+                repeated_non_tensor_batch[key] = np.tile(value, (repeat_times,) + (1,) * (value.ndim - 1))
+
+        return DataProto(
+            batch=repeated_batch,
+            non_tensor_batch=repeated_non_tensor_batch,
+            meta_info=self.meta_info,
+        )
+
+
+@dataclass
+class DataProtoFuture:
+    """
+    DataProtoFuture aims to eliminate actual data fetching on driver. By doing so, the driver doesn't have to wait
+    for data so that asynchronous execution becomes possible.
+    DataProtoFuture contains a list of futures from another WorkerGroup of size world_size.
+    - collect_fn is a Callable that reduces the list of futures to a DataProto
+    - dispatch_fn is a Callable that partitions the DataProto into a list of DataProto of size world_size and then select
+
+    Potential issue: we can optimize dispatch_fn(collect_fn) such that only needed data is fetched on destination
+    - DataProtoFuture only supports directly passing from the output of a method to another input. You can't perform any
+    operation on the DataProtoFuture in driver.
+    """
+
+    collect_fn: Callable
+    futures: List[ray.ObjectRef]
+    dispatch_fn: Callable = None
+
+    @staticmethod
+    def concat(data: List[ray.ObjectRef]) -> "DataProtoFuture":
+        output = DataProtoFuture(collect_fn=DataProto.concat, futures=data)
+        return output
+
+    def chunk(self, chunks: int) -> List["DataProtoFuture"]:
+        from functools import partial
+
+        arg_future_lst = []
+        for i in range(chunks):
+            # note that we can't directly pass i and chunks
+            def dispatch_fn(x, i, chunks):
+                return x.chunk(chunks=chunks)[i]
+
+            arg_future = DataProtoFuture(
+                collect_fn=self.collect_fn, dispatch_fn=partial(dispatch_fn, i=i, chunks=chunks), futures=self.futures
+            )
+            arg_future_lst.append(arg_future)
+        return arg_future_lst
+
+    def get(self):
+        outputs = ray.get(self.futures)  # dp_size
+        for output in outputs:
+            assert isinstance(output, DataProto)
+
+        outputs = self.collect_fn(outputs)  # select dp, concat
+        if self.dispatch_fn is not None:
+            outputs = self.dispatch_fn(outputs)  # split in batch dim, select using dp
+
+        return outputs
+
+
+def allgather_dict_tensors(
+    tensors: Union[Dict[str, torch.Tensor], TensorDict], size: int, group: ProcessGroup, dim: int = 0
+) -> Union[Dict[str, torch.Tensor], TensorDict]:
+    """
+    TODO: optimize this.
+    - We can use async ops
+    - We can use only one allgather
+    """
+    if isinstance(tensors, TensorDict):
+        is_tensor_dict = True
+        tensors_as_dict = tensors.to_dict()
+    else:
+        tensors_as_dict = tensors
+        is_tensor_dict = False
+
+    output = {}
+    sorted_keys = sorted(tensors_as_dict.keys())
+    for key in sorted_keys:
+        value = tensors_as_dict[key]
+        output[key] = [torch.empty_like(value) for _ in range(size)]
+        torch.distributed.all_gather(output[key], value, group=group, async_op=False)
+        output[key] = torch.cat(output[key], dim=dim)
+
+    if is_tensor_dict:
+        output = TensorDict(source=output, batch_size=tensors.batch_size[0] * size)
+
+    return output
+
+
+def all_gather_data_proto(data: DataProto, size: int, group: ProcessGroup) -> None:
+    # Note that this is an inplace operator just like torch.distributed.all_gather
+    prev_device = data.batch.device
+    data.batch = data.batch.cuda(device=torch.cuda.current_device())
+    data.batch = allgather_dict_tensors(data.batch.contiguous(), size=size, group=group, dim=0)
+    data.batch = data.batch.to(prev_device)
+    # all gather non_tensor_batch
+    all_non_tensor_batch = [None for _ in range(size)]
+    torch.distributed.all_gather_object(all_non_tensor_batch, data.non_tensor_batch, group=group)
+    data.non_tensor_batch = {k: np.concatenate([d[k] for d in all_non_tensor_batch]) for k in data.non_tensor_batch}