yuccaaa commited on Sep 3, 2025

Commit

9440cb3

verified ·

1 Parent(s): c633a73

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

EasyR1-new/examples/baselines/qwen2_5_vl_3b_clevr.sh +19 -0
EasyR1-new/examples/baselines/qwen2_5_vl_3b_geoqa8k.sh +19 -0
EasyR1-new/examples/format_prompt/bio_format.jinja +2 -0
EasyR1-new/examples/format_prompt/dapo.jinja +1 -0
EasyR1-new/examples/format_prompt/math.jinja +1 -0
EasyR1-new/examples/format_prompt/r1v.jinja +1 -0
EasyR1-new/examples/qwen2_5_vl_7b_multi_image.sh +18 -0
EasyR1-new/examples/qwen3_14b_dapo17k_dapo.sh +43 -0
EasyR1-new/examples/qwen3_4b_math_grpo.sh +13 -0
EasyR1-new/examples/reward_function/__pycache__/bio.cpython-310.pyc +0 -0
EasyR1-new/examples/reward_function/bio.py +183 -0
EasyR1-new/examples/reward_function/dapo.py +163 -0
EasyR1-new/examples/reward_function/math.py +49 -0
EasyR1-new/examples/reward_function/r1v.py +50 -0
EasyR1-new/examples/runtime_env.yaml +9 -0
EasyR1-new/examples/wandb/debug-internal.log +13 -0
EasyR1-new/examples/wandb/debug.log +28 -0
EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/files/output.log +2 -0
EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/files/requirements.txt +295 -0
EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/files/wandb-metadata.json +71 -0
EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/logs/debug-internal.log +6 -0
EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/logs/debug.log +21 -0
EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/run-lkflebyj.wandb +0 -0
EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/config.yaml +322 -0
EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/output.log +72 -0
EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/requirements.txt +295 -0
EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/wandb-metadata.json +92 -0
EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/wandb-summary.json +1 -0
EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/logs/debug-internal.log +15 -0
EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/logs/debug.log +28 -0
EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/run-nji9xqxs.wandb +0 -0
EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/files/output.log +0 -0
EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/files/requirements.txt +295 -0
EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/files/wandb-metadata.json +36 -0
EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/logs/debug-internal.log +6 -0
EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/logs/debug.log +21 -0
EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/run-wmarwr6l.wandb +0 -0
EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/files/output.log +2 -0
EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/files/requirements.txt +295 -0
EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/files/wandb-metadata.json +92 -0
EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/logs/debug-internal.log +6 -0
EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/logs/debug.log +21 -0
EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/run-zkytrm61.wandb +0 -0
EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/files/output.log +2 -0
EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/files/requirements.txt +295 -0
EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/files/wandb-metadata.json +92 -0
EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/logs/debug-internal.log +6 -0
EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/logs/debug.log +21 -0
EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/run-eo9xzqez.wandb +0 -0
EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/files/output.log +2 -0

EasyR1-new/examples/baselines/qwen2_5_vl_3b_clevr.sh ADDED Viewed

	@@ -0,0 +1,19 @@

+#!/bin/bash
+set -x
+export PYTHONUNBUFFERED=1
+MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct  # replace it with your local file path
+python3 -m verl.trainer.main \
+    config=examples/config.yaml \
+    data.train_files=BUAADreamer/clevr_count_70k@train \
+    data.val_files=BUAADreamer/clevr_count_70k@test \
+    data.format_prompt=./examples/format_prompt/r1v.jinja \
+    worker.actor.model.model_path=${MODEL_PATH} \
+    worker.rollout.tensor_parallel_size=1 \
+    worker.reward.reward_type=sequential \
+    worker.reward.reward_function=./examples/reward_function/r1v.py:compute_score \
+    trainer.experiment_name=qwen2_5_vl_3b_clevr \
+    trainer.n_gpus_per_node=2

EasyR1-new/examples/baselines/qwen2_5_vl_3b_geoqa8k.sh ADDED Viewed

	@@ -0,0 +1,19 @@

+#!/bin/bash
+set -x
+export PYTHONUNBUFFERED=1
+MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct  # replace it with your local file path
+python3 -m verl.trainer.main \
+    config=examples/config.yaml \
+    data.train_files=leonardPKU/GEOQA_8K_R1V@train \
+    data.val_files=leonardPKU/GEOQA_8K_R1V@test \
+    data.format_prompt=./examples/format_prompt/r1v.jinja \
+    worker.actor.model.model_path=${MODEL_PATH} \
+    worker.rollout.tensor_parallel_size=1 \
+    worker.reward.reward_type=sequential \
+    worker.reward.reward_function=./examples/reward_function/r1v.py:compute_score \
+    trainer.experiment_name=qwen2_5_vl_3b_geoqa8k \
+    trainer.n_gpus_per_node=8

EasyR1-new/examples/format_prompt/bio_format.jinja ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ {{ content \| trim }} You must first reason through the question step by step, as if you're thinking aloud. Enclose your full reasoning process within <think> </think> tags. After your reasoning, output only the number corresponding to the final answer choice inside <answer> </answer> tags.For example:<think> reasoning process </think> <answer>result number</answer>
2	+

EasyR1-new/examples/format_prompt/dapo.jinja ADDED Viewed

	@@ -0,0 +1 @@


1	+ Solve the following math problem step by step. The last line of your response should be of the form Answer: $Answer (without quotes) where $Answer is the answer to the problem.\n\n{{ content \| trim }}\n\nRemember to put your answer on its own line after "Answer:".

EasyR1-new/examples/format_prompt/math.jinja ADDED Viewed

	@@ -0,0 +1 @@


1	+ {{ content \| trim }} You FIRST think about the reasoning process as an internal monologue and then provide the final answer. The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}.

EasyR1-new/examples/format_prompt/r1v.jinja ADDED Viewed

	@@ -0,0 +1 @@

+ {{ content | trim }} A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>

EasyR1-new/examples/qwen2_5_vl_7b_multi_image.sh ADDED Viewed

	@@ -0,0 +1,18 @@

+#!/bin/bash
+# REMINDER: this script uses test data split and should ONLY be used for debugging. DO NOT use for training.
+set -x
+export PYTHONUNBUFFERED=1
+MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct  # replace it with your local file path
+python3 -m verl.trainer.main \
+    config=examples/config.yaml \
+    data.train_files=hiyouga/journeybench-multi-image-vqa@train \
+    data.val_files=hiyouga/journeybench-multi-image-vqa@test \
+    data.rollout_batch_size=256 \
+    worker.actor.model.model_path=${MODEL_PATH} \
+    worker.rollout.limit_images=2 \
+    trainer.experiment_name=qwen2_5_vl_7b_multi_image \
+    trainer.n_gpus_per_node=8

EasyR1-new/examples/qwen3_14b_dapo17k_dapo.sh ADDED Viewed

	@@ -0,0 +1,43 @@

+#!/bin/bash
+set -x
+export PYTHONUNBUFFERED=1
+MODEL_PATH=Qwen/Qwen3-14B-Base  # replace it with your local file path
+python3 -m verl.trainer.main \
+    config=examples/config.yaml \
+    data.train_files=Saigyouji-Yuyuko1000/dapo17k@train \
+    data.val_files=Saigyouji-Yuyuko1000/dapo17k@test \
+    data.format_prompt=./examples/format_prompt/dapo.jinja \
+    data.max_prompt_length=2048 \
+    data.max_response_length=20480 \
+    data.rollout_batch_size=512 \
+    data.mini_rollout_batch_size=256 \
+    worker.actor.micro_batch_size_per_device_for_update=1 \
+    worker.actor.micro_batch_size_per_device_for_experience=8 \
+    worker.actor.model.model_path=${MODEL_PATH} \
+    worker.actor.fsdp.torch_dtype=bf16 \
+    worker.actor.optim.strategy=adamw_bf16 \
+    worker.actor.optim.weight_decay=0.1 \
+    worker.actor.optim.lr_warmup_steps=10 \
+    worker.actor.global_batch_size=32 \
+    worker.actor.clip_ratio_low=0.2 \
+    worker.actor.clip_ratio_high=0.28 \
+    worker.actor.clip_ratio_dual=10.0 \
+    worker.rollout.n=16 \
+    worker.rollout.max_num_batched_tokens=22528 \
+    worker.rollout.val_override_config='{"n":16,"temperature":1.0,"top_p":0.7}' \
+    worker.rollout.gpu_memory_utilization=0.8 \
+    worker.reward.reward_function=./examples/reward_function/dapo.py:compute_score \
+    worker.reward.reward_function_kwargs='{"max_response_length":20480,"overlong_buffer_length":4096,"overlong_penalty_factor":1.0}' \
+    algorithm.disable_kl=True \
+    algorithm.online_filtering=True \
+    algorithm.filter_key=accuracy_normalized \
+    algorithm.filter_low=0.01 \
+    algorithm.filter_high=0.99 \
+    trainer.total_epochs=10 \
+    trainer.max_try_make_batch=10 \
+    trainer.experiment_name=qwen3_14b_dapo17k_dapo \
+    trainer.n_gpus_per_node=8

EasyR1-new/examples/qwen3_4b_math_grpo.sh ADDED Viewed

	@@ -0,0 +1,13 @@

+#!/bin/bash
+set -x
+export PYTHONUNBUFFERED=1
+MODEL_PATH=Qwen/Qwen3-4B  # replace it with your local file path
+python3 -m verl.trainer.main \
+    config=examples/config.yaml \
+    data.max_response_length=4096 \
+    worker.actor.model.model_path=${MODEL_PATH} \
+    trainer.experiment_name=qwen3_4b_math_grpo

EasyR1-new/examples/reward_function/__pycache__/bio.cpython-310.pyc ADDED Viewed

Binary file (4.5 kB). View file

EasyR1-new/examples/reward_function/bio.py ADDED Viewed

	@@ -0,0 +1,183 @@

+from itertools import islice, zip_longest
+from typing import Callable, Dict, List, Optional, Tuple, TypedDict
+import json
+def repeatness_reward(s: str):
+    def ranks(l):
+        index = {v: i for i, v in enumerate(sorted(set(l)))}
+        return [index[v] for v in l]
+    def suffixArray(s):
+        line = ranks(s)
+        n, k, ans, sa = len(s), 1, line, [0] * len(s)
+        while k < n - 1:
+            line = ranks(list(zip_longest(line, islice(line, k, None), fillvalue=-1)))
+            ans, k = line, k << 1
+        for i, k in enumerate(ans):
+            sa[k] = i
+        return ans, sa
+    def lcp(arr, suffixArr, inv_suff):
+        n, ans, k = len(arr), [0] * len(arr), 0
+        for i in range(n):
+            if inv_suff[i] == n - 1:
+                k = 0
+                continue
+            j = suffixArr[inv_suff[i] + 1]
+            while i + k < n and j + k < n and arr[i + k] == arr[j + k]:
+                k += 1
+            ans[inv_suff[i]] = k
+            if k > 0:
+                k -= 1
+        return ans
+    arr = [ord(i) for i in s]
+    n = len(arr)
+    if n <= 1:
+        return 0
+    c, sa = suffixArray(arr)
+    cnt = sum(lcp(arr, sa, c))
+    return 1 - cnt * 2 / (n * (n + 1))
+import re
+def format_reward(predict_str: str) -> float:
+    """
+    格式奖励函数，严格要求输出格式为：
+    <think>...</think><answer>...</answer>
+    中间不能有多余内容
+    """
+    pattern = r'^<think>.*?</think>\s*<answer>\s*([0-9])\s*</answer>$'
+    return 1.0 if re.fullmatch(pattern, predict_str.strip(), re.DOTALL) else 0.0
+def acc_reward(predict_str: str, ground_truth: str) -> float:
+    """
+    准确率奖励函数
+    要求<answer>中内容与ground_truth完全一致（顺序、空格等）
+    """
+    match = re.search(r'<answer>\s*([0-9])\s*</answer>', predict_str)
+    if not match:
+        return 0.0
+    answer_content = match.group(1)
+    # print(ground_truth)
+    # print(answer_content)
+    # print(int(answer_content) == ground_truth)
+    # print("ground_truth 类型：", type(ground_truth))
+    # print("answer_content 类型：", type(answer_content))
+    # print("========")
+    if int(answer_content) == ground_truth:
+        return 1.0
+    else:
+        return 0.0
+    # return 1.0 if answer_content == ground_truth else 0.0
+    # match = re.search(r'<answer>(.*?)</answer>', predict_str, re.DOTALL)
+    # if not match:
+    #     return 0.0
+    # answer_content = match.group(1).strip()
+    # return 1.0 if answer_content == ground_truth else 0.0
+# def compute_score( solution_str: str, ground_truth: str, extra_info):
+#     """
+#     综合评分函数
+#     """
+def compute_score(predicts: List[str], ground_truths: List[str], format_weight: float = 0.1) -> List[Dict[str, float]]:
+    scores = []
+    save_path="/nas/shared/kilab/wangyujia/check_rl/result-06170934.jsonl"
+    with open(save_path, "w", encoding="utf-8") as f:
+        for solution_str, ground_truth in zip(predicts, ground_truths):
+            format_score = format_reward(solution_str)
+            acc_score = acc_reward(solution_str, ground_truth)
+            # 提取<think>内容
+            think_match = re.search(r'<think>(.*?)</think>', solution_str, re.DOTALL)
+            think_str = think_match.group(1).strip() if think_match else ""
+            repeat_score = repeatness_reward(think_str)
+            scores.append(
+                {
+                    "overall": format_score + acc_score + repeat_score,
+                    "format": format_score,
+                    "accuracy": acc_score,
+                    "repeat" : repeat_score,
+                }
+            )
+            # 写入 JSONL 文件
+            f.write(json.dumps({
+                "solution_str": solution_str,
+                "ground_truth": ground_truth,
+                "overall": format_score + acc_score + repeat_score,
+                "format": format_score,
+                "accuracy": acc_score,
+                "repeat" : repeat_score,
+            }, ensure_ascii=False) + "\n")
+    # 加权综合评分（格式占30%，准确率占70%）
+    # 合成字典
+    # total_score = {
+    #     "format_score": format_score,
+    #     "acc_score": acc_score,
+    #     "repeat_score": repeat_score,
+    #     "total_score": format_score + acc_score + repeat_score
+    # }
+    #total_score=format_score + acc_score + repeat_score
+    return scores
+# print(format_reward("<think>Step-by-step logic</think>   <answer> 5 </answer>"))
+# print(format_reward("<think>Something\nacross lines</think>\n<answer> 0 </answer>"))
+# print(format_reward("No tags here"))
+# print(format_reward("<think>OK</think><answer>12</answer>"))  # 多位数字
+# print(format_reward("<think>OK</think><answer>A</answer>"))   # 字母不允许
+# print(format_reward("<think>Yes</think><answer> </answer>"))  # 空的答案
+# print(format_reward("<think>OK</think><answer>3</answer>extra"))  # 多余内容
+# print(format_reward("<answer>3</answer><think>Reasoning</think>"))  # 标签顺序错误
+# print(acc_reward("<think>Step-by-step logic</think>   <answer> 5 </answer>",'5'))
+# print(acc_reward("<think>Something\nacross lines</think>\n<answer> 0 </answer>",'1'))
+# str_="<think>\nThe protein name is P32783, the protein amino acid sequence is MSTKPEKPIWMSQEDYDRQYGSITGDESSTVSKKDSKVTANAPGDGNGSLPVLQSSSILTSKVSDLPIEAESGFKIQKRRHERYDQEERLRKQRAQKLREEQLKRHEIEMTANRSINVDQIVREHYNERTIIANRAKRNLSPIIKLRNFNNAIKYMLIDKYTKPGDVVLELGCGKGGDLRKYGAAGISQFIGIDISNASIQEAHKRYRSMRNLDYQVVLITGDCFGESLGVAVEPFPDCRFPCDIVSTQFCLHYAFETEEKARRALLNVAKSLKIGGHFFGTIPDSEFIRYKLNKFPKEVEKPSWGNSIYKVTFENNSYQKNDYEFTSPYGQMYTYWLEDAIDNVPEYVVPFETLRSLADEYGLELVSQMPFNKFFVQEIPKWIERFSPKMREGLQRSDGRYGVEGDEKEAASYFYTMFAFRKVKQYIEPESVKPN, the protein localization prediction for P32783 is Cell.membrane,M, so the location label is 4. Therefore, option 4 is the correct answer.\n</think>\n<answer>\n4\n</answer>"
+# print(format_reward(str_))
+def check_rewards(jsonl_path: str) -> List[Dict[str, float]]:
+    results = []
+    with open(jsonl_path, "r", encoding="utf-8") as f:
+        for line in f:
+            data = json.loads(line)
+            solution_str = data["solution_str"]
+            ground_truth = data["ground_truth"]
+            # 重新计算三个分数
+            format_score = format_reward(solution_str)
+            acc_score = acc_reward(solution_str, ground_truth)
+            think_match = re.search(r'<think>(.*?)</think>', solution_str, re.DOTALL)
+            think_str = think_match.group(1).strip() if think_match else ""
+            repeat_score = repeatness_reward(think_str)
+            total_score = format_score + acc_score + repeat_score
+            result = {
+                "format": format_score,
+                "accuracy": acc_score,
+                "repeat": repeat_score,
+                "overall": total_score,
+            }
+            # results.append(result)
+            print(json.dumps(result, indent=2, ensure_ascii=False))
+check_rewards("/nas/shared/kilab/wangyujia/check_rl/check.jsonl")

EasyR1-new/examples/reward_function/dapo.py ADDED Viewed

	@@ -0,0 +1,163 @@

+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import re
+from typing import Any, Dict, List
+# Constants for normalization
+SUBSTITUTIONS = [
+    ("an ", ""),
+    ("a ", ""),
+    (".$", "$"),
+    ("\\$", ""),
+    (r"\ ", ""),
+    (" ", ""),
+    ("mbox", "text"),
+    (",\\text{and}", ","),
+    ("\\text{and}", ","),
+    ("\\text{m}", "\\text{}"),
+]
+REMOVED_EXPRESSIONS = [
+    "square",
+    "ways",
+    "integers",
+    "dollars",
+    "mph",
+    "inches",
+    "hours",
+    "km",
+    "units",
+    "\\ldots",
+    "sue",
+    "points",
+    "feet",
+    "minutes",
+    "digits",
+    "cents",
+    "degrees",
+    "cm",
+    "gm",
+    "pounds",
+    "meters",
+    "meals",
+    "edges",
+    "students",
+    "childrentickets",
+    "multiples",
+    "\\text{s}",
+    "\\text{.}",
+    "\\text{\ns}",
+    "\\text{}^2",
+    "\\text{}^3",
+    "\\text{\n}",
+    "\\text{}",
+    r"\mathrm{th}",
+    r"^\circ",
+    r"^{\circ}",
+    r"\;",
+    r",\!",
+    "{,}",
+    '"',
+    "\\dots",
+]
+def normalize_final_answer(final_answer: str) -> str:
+    """Normalize a final answer to a quantitative reasoning question.
+    Args:
+        final_answer: The answer string to normalize
+    Returns:
+        Normalized answer string
+    """
+    final_answer = final_answer.split("=")[-1]
+    # Apply substitutions and removals
+    for before, after in SUBSTITUTIONS:
+        final_answer = final_answer.replace(before, after)
+    for expr in REMOVED_EXPRESSIONS:
+        final_answer = final_answer.replace(expr, "")
+    # Extract and normalize LaTeX math
+    final_answer = re.sub(r"(.*?)(\$)(.*?)(\$)(.*)", "$\\3$", final_answer)
+    final_answer = re.sub(r"(\\text\{)(.*?)(\})", "\\2", final_answer)
+    final_answer = re.sub(r"(\\textbf\{)(.*?)(\})", "\\2", final_answer)
+    final_answer = re.sub(r"(\\overline\{)(.*?)(\})", "\\2", final_answer)
+    final_answer = re.sub(r"(\\boxed\{)(.*)(\})", "\\2", final_answer)
+    # Normalize shorthand TeX:
+    #  \fracab -> \frac{a}{b}
+    #  \frac{abc}{bef} -> \frac{abc}{bef}
+    #  \fracabc -> \frac{a}{b}c
+    #  \sqrta -> \sqrt{a}
+    #  \sqrtab -> sqrt{a}b
+    final_answer = re.sub(r"(frac)([^{])(.)", "frac{\\2}{\\3}", final_answer)
+    final_answer = re.sub(r"(sqrt)([^{])", "sqrt{\\2}", final_answer)
+    final_answer = final_answer.replace("$", "")
+    # Normalize numbers
+    if final_answer.replace(",", "").isdigit():
+        final_answer = final_answer.replace(",", "")
+    return final_answer.strip()
+def accuracy_reward(response: str, ground_truth: str) -> float:
+    match = re.findall(r"(?i)Answer\s*:\s*([^\n]+)", response)
+    answer = match[-1] if match else "[INVALID]"
+    if normalize_final_answer(answer) == normalize_final_answer(ground_truth):
+        return 1.0
+    else:
+        return -1.0
+def soft_overlong_punishment(response_length: int, max_response_length: int, overlong_buffer_length: int):
+    expected_len = max_response_length - overlong_buffer_length
+    if response_length <= expected_len:
+        return 0.0
+    elif response_length <= max_response_length:
+        return (expected_len - response_length) / overlong_buffer_length
+    else:
+        return -1.0
+def compute_score(
+    reward_inputs: List[Dict[str, Any]],
+    max_response_length: int,
+    overlong_buffer_length: int,
+    overlong_penalty_factor: float,
+) -> List[Dict[str, float]]:
+    if not isinstance(reward_inputs, list):
+        raise ValueError("Please use `reward_type=batch` for dapo reward function.")
+    scores = []
+    for reward_input in reward_inputs:
+        response = reward_input["response"][-300:]  # The longest answer in MATH-500 has 159 characters
+        accuracy_score = accuracy_reward(response, reward_input["ground_truth"])
+        overlong_score = soft_overlong_punishment(
+            reward_input["response_length"], max_response_length, overlong_buffer_length
+        )
+        scores.append(
+            {
+                "overall": accuracy_score + overlong_score * overlong_penalty_factor,
+                "accuracy": accuracy_score,
+                "overlong": overlong_score,
+                "accuracy_normalized": 0.5 * (accuracy_score + 1.0),
+            }
+        )
+    return scores

EasyR1-new/examples/reward_function/math.py ADDED Viewed

	@@ -0,0 +1,49 @@

+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import re
+from typing import Any, Dict, List
+from mathruler.grader import extract_boxed_content, grade_answer
+def format_reward(response: str) -> float:
+    pattern = re.compile(r"<think>.*</think>.*\\boxed\{.*\}.*", re.DOTALL)
+    format_match = re.fullmatch(pattern, response)
+    return 1.0 if format_match else 0.0
+def accuracy_reward(response: str, ground_truth: str) -> float:
+    answer = extract_boxed_content(response)
+    return 1.0 if grade_answer(answer, ground_truth) else 0.0
+def compute_score(reward_inputs: List[Dict[str, Any]], format_weight: float = 0.1) -> List[Dict[str, float]]:
+    if not isinstance(reward_inputs, list):
+        raise ValueError("Please use `reward_type=batch` for math reward function.")
+    scores = []
+    for reward_input in reward_inputs:
+        response = re.sub(r"\s*(<|>|/)\s*", r"\1", reward_input["response"])  # handle qwen2.5vl-32b format
+        format_score = format_reward(response)
+        accuracy_score = accuracy_reward(response, reward_input["ground_truth"])
+        scores.append(
+            {
+                "overall": (1 - format_weight) * accuracy_score + format_weight * format_score,
+                "format": format_score,
+                "accuracy": accuracy_score,
+            }
+        )
+    return scores

EasyR1-new/examples/reward_function/r1v.py ADDED Viewed

	@@ -0,0 +1,50 @@

+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import re
+from typing import Any, Dict
+from mathruler.grader import grade_answer
+def format_reward(response: str) -> float:
+    pattern = re.compile(r"<think>.*?</think>\s*<answer>.*?</answer>", re.DOTALL)
+    format_match = re.fullmatch(pattern, response)
+    return 1.0 if format_match else 0.0
+def accuracy_reward(response: str, ground_truth: str) -> float:
+    try:
+        content_match = re.search(r"<answer>(.*?)</answer>", response)
+        given_answer = content_match.group(1).strip() if content_match else response.strip()
+        if grade_answer(given_answer, ground_truth.strip()):
+            return 1.0
+    except Exception:
+        pass
+    return 0.0
+def compute_score(reward_input: Dict[str, Any], format_weight: float = 0.5) -> Dict[str, float]:
+    if not isinstance(reward_input, dict):
+        raise ValueError("Please use `reward_type=sequential` for r1v reward function.")
+    format_score = format_reward(reward_input["response"])
+    accuracy_score = accuracy_reward(reward_input["response"], reward_input["ground_truth"])
+    return {
+        "overall": (1 - format_weight) * accuracy_score + format_weight * format_score,
+        "format": format_score,
+        "accuracy": accuracy_score,
+    }

EasyR1-new/examples/runtime_env.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+working_dir: ./
+excludes: ["/.git/"]
+env_vars:
+  TOKENIZERS_PARALLELISM: "true"
+  NCCL_DEBUG: "WARN"
+  VLLM_LOGGING_LEVEL: "WARN"
+  TORCH_NCCL_AVOID_RECORD_STREAMS: "1"
+  PYTORCH_CUDA_ALLOC_CONF: "expandable_segments:False"
+  PYTHONUNBUFFERED: "1"

EasyR1-new/examples/wandb/debug-internal.log ADDED Viewed

	@@ -0,0 +1,13 @@

+{"time":"2025-07-21T14:07:35.211628547+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-21T14:07:50.875611638+08:00","level":"INFO","msg":"stream: created new stream","id":"a9qblh0u"}
+{"time":"2025-07-21T14:07:50.876588753+08:00","level":"INFO","msg":"stream: started","id":"a9qblh0u"}
+{"time":"2025-07-21T14:07:50.87663237+08:00","level":"INFO","msg":"sender: started","stream_id":"a9qblh0u"}
+{"time":"2025-07-21T14:07:50.876605114+08:00","level":"INFO","msg":"handler: started","stream_id":"a9qblh0u"}
+{"time":"2025-07-21T14:07:50.87665507+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"a9qblh0u"}
+{"time":"2025-07-21T14:08:05.783504415+08:00","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"updating run metadata","runtime_seconds":7.434542791},{"desc":"uploading data","runtime_seconds":0.571568597}],"total_operations":2}}
+{"time":"2025-07-21T14:08:31.955353631+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-07-21T14:08:56.48244624+08:00","level":"INFO","msg":"stream: closing","id":"a9qblh0u"}
+{"time":"2025-07-21T14:08:56.48558812+08:00","level":"INFO","msg":"handler: closed","stream_id":"a9qblh0u"}
+{"time":"2025-07-21T14:08:56.485598269+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"a9qblh0u"}
+{"time":"2025-07-21T14:08:56.485607803+08:00","level":"INFO","msg":"sender: closed","stream_id":"a9qblh0u"}
+{"time":"2025-07-21T14:08:56.50017009+08:00","level":"INFO","msg":"stream: closed","id":"a9qblh0u"}

EasyR1-new/examples/wandb/debug.log ADDED Viewed

	@@ -0,0 +1,28 @@

+2025-07-21 14:07:34,952 INFO    MainThread:317976 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-21 14:07:34,952 INFO    MainThread:317976 [wandb_setup.py:_flush():80] Configure stats pid to 317976
+2025-07-21 14:07:34,952 INFO    MainThread:317976 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-21 14:07:34,952 INFO    MainThread:317976 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-21 14:07:34,952 INFO    MainThread:317976 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-21 14:07:34,952 INFO    MainThread:317976 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/logs/debug.log
+2025-07-21 14:07:34,952 INFO    MainThread:317976 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/logs/debug-internal.log
+2025-07-21 14:07:34,952 INFO    MainThread:317976 [wandb_init.py:init():830] calling init triggers
+2025-07-21 14:07:34,953 INFO    MainThread:317976 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-21 14:07:34,953 INFO    MainThread:317976 [wandb_init.py:init():871] starting backend
+2025-07-21 14:07:35,172 INFO    MainThread:317976 [wandb_init.py:init():874] sending inform_init request
+2025-07-21 14:07:35,174 INFO    MainThread:317976 [wandb_init.py:init():882] backend started and connected
+2025-07-21 14:07:35,186 INFO    MainThread:317976 [wandb_init.py:init():953] updated telemetry
+2025-07-21 14:07:35,302 INFO    MainThread:317976 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-21 14:07:58,269 INFO    MainThread:317976 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-21 14:07:58,556 INFO    MainThread:317976 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-21 14:07:58,556 INFO    MainThread:317976 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-21 14:07:58,562 INFO    MainThread:317976 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-21 14:07:58,562 INFO    MainThread:317976 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-21 14:07:58,574 INFO    MainThread:317976 [wandb_init.py:init():1075] run started, returning control to user process
+2025-07-21 14:08:04,748 INFO    MainThread:317976 [wandb_run.py:_finish():2224] finishing run gia0603yucca/easy_r1/a9qblh0u
+2025-07-21 14:08:04,755 INFO    MainThread:317976 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0
+2025-07-21 14:08:04,767 INFO    MainThread:317976 [wandb_run.py:_restore():2405] restore
+2025-07-21 14:08:04,771 INFO    MainThread:317976 [wandb_run.py:_restore():2411] restore done
+2025-07-21 14:08:56,463 INFO    MainThread:317976 [wandb_run.py:_footer_history_summary_info():3903] rendering history
+2025-07-21 14:08:56,469 INFO    MainThread:317976 [wandb_run.py:_footer_history_summary_info():3935] rendering summary
+2025-07-21 14:08:56,469 INFO    MainThread:317976 [wandb_run.py:_footer_sync_info():3864] logging synced files

EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/files/output.log ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ {"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.6.45", "pid": 7695, "uuid": "4931688589ea40edb6b0579192261e95", "closed": false}
2	+ Start validation...

EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,295 @@

+setproctitle==1.2.2
+colorama==0.4.6
+psutil==7.0.0
+numpy==2.2.6
+pylatexenc==2.10
+webdataset==1.0.2
+email_validator==2.2.0
+confection==0.1.5
+text-unidecode==1.3
+python-dotenv==1.1.1
+starlette==0.47.1
+pyasn1==0.6.1
+contexttimer==0.3.3
+requests==2.32.4
+omegaconf==2.3.0
+tzdata==2025.2
+yarl==1.20.1
+nvidia-cuda-nvrtc-cu12==12.4.127
+decord==0.6.0
+nvidia-cublas-cu12==12.4.5.8
+proto-plus==1.26.1
+opentelemetry-semantic-conventions-ai==0.4.11
+scipy==1.15.3
+googleapis-common-protos==1.70.0
+nvidia-cufile-cu12==1.11.1.6
+parso==0.8.4
+opentelemetry-exporter-otlp-proto-http==1.26.0
+vllm==0.8.5.post1
+sniffio==1.3.1
+python-dateutil==2.9.0.post0
+openai==1.90.0
+absl-py==2.3.1
+Deprecated==1.2.18
+cupy-cuda12x==13.5.1
+setuptools==78.1.1
+peft==0.16.0
+rignore==0.6.2
+joblib==1.5.1
+platformdirs==4.3.8
+regex==2024.11.6
+datasets==4.0.0
+preshed==3.0.10
+aiohappyeyeballs==2.6.1
+uvloop==0.21.0
+sentry-sdk==2.32.0
+virtualenv==20.31.2
+lazy_loader==0.4
+rich==14.0.0
+pycocotools==2.0.10
+timm==0.4.12
+rich-toolkit==0.14.8
+fastapi-cli==0.0.8
+antlr4-python3-runtime==4.9.3
+salesforce-lavis==1.0.2
+Pygments==2.19.2
+gitdb==4.0.12
+six==1.17.0
+verl==0.3.2.dev0
+smmap==5.0.2
+fastapi-cloud-cli==0.1.4
+opencensus==0.11.4
+annotated-types==0.7.0
+xxhash==3.5.0
+frozenlist==1.7.0
+pyzmq==27.0.0
+Jinja2==3.1.6
+ptyprocess==0.7.0
+interegular==0.3.3
+opentelemetry-semantic-conventions==0.47b0
+jiter==0.10.0
+idna==3.10
+typing_extensions==4.14.1
+nvidia-cusolver-cu12==11.6.1.9
+propcache==0.3.2
+nest-asyncio==1.6.0
+pillow==11.3.0
+tenacity==9.1.2
+sentencepiece==0.2.0
+portalocker==3.2.0
+matplotlib-inline==0.1.7
+pandas==2.3.1
+compressed-tensors==0.9.3
+typing-inspection==0.4.1
+nltk==3.9.1
+opencv-python-headless==4.12.0.88
+dnspython==2.7.0
+tokenizers==0.21.2
+wheel==0.45.1
+python-multipart==0.0.20
+catalogue==2.0.10
+smart_open==7.3.0.post1
+multidict==6.6.3
+xgrammar==0.1.18
+aiosignal==1.4.0
+pybase64==1.4.1
+blake3==1.0.5
+certifi==2025.7.14
+torchdata==0.11.0
+qwen-vl-utils==0.0.11
+nvidia-nvjitlink-cu12==12.4.127
+urllib3==2.5.0
+aiohttp-cors==0.8.1
+outlines_core==0.1.26
+pydantic-extra-types==2.10.5
+filelock==3.18.0
+airportsdata==20250706
+ipython==8.37.0
+pydantic==2.11.7
+cloudpickle==3.1.1
+torchaudio==2.6.0
+tiktoken==0.9.0
+pexpect==4.9.0
+flash-attn==2.7.1.post1
+nvidia-nvtx-cu12==12.4.127
+bleach==6.2.0
+watchfiles==1.1.0
+uvicorn==0.35.0
+numba==0.61.2
+tornado==6.5.1
+networkx==3.4.2
+sympy==1.13.1
+watchdog==6.0.0
+kaggle==1.7.4.5
+pyarrow==20.0.0
+accelerate==1.8.1
+mpmath==1.3.0
+lightning-utilities==0.14.3
+codetiming==1.4.0
+ftfy==6.3.1
+triton==3.2.0
+referencing==0.36.2
+dill==0.3.8
+language_data==1.3.0
+python-magic==0.4.27
+wasabi==1.1.3
+pyvers==0.1.0
+murmurhash==1.0.13
+mathruler==0.1.0
+jsonschema-specifications==2025.4.1
+blinker==1.9.0
+imageio==2.37.0
+pycocoevalcap==1.2
+python-json-logger==3.3.0
+nvidia-cuda-cupti-cu12==12.4.127
+fairscale==0.4.4
+httptools==0.6.4
+identify==2.6.12
+streamlit==1.46.1
+mdurl==0.1.2
+decorator==5.2.1
+h11==0.16.0
+distlib==0.3.9
+webencodings==0.5.1
+transformers==4.52.4
+srsly==2.5.1
+fsspec==2025.3.0
+diskcache==5.6.3
+click==8.2.1
+blis==1.3.0
+colorful==0.5.7
+websockets==15.0.1
+liger_kernel==0.6.0
+lark==1.2.2
+cymem==2.0.11
+anyio==4.9.0
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+fastapi==0.116.1
+tensordict==0.9.1
+pre_commit==4.2.0
+wrapt==1.17.2
+opentelemetry-api==1.26.0
+nvidia-curand-cu12==10.3.5.147
+spacy==3.8.7
+narwhals==1.47.0
+exceptiongroup==1.3.0
+braceexpand==0.1.7
+rouge_score==0.1.2
+msgpack==1.1.1
+async-timeout==5.0.1
+protobuf==4.25.8
+huggingface-hub==0.33.4
+wandb==0.21.0
+httpx==0.28.1
+mistral_common==1.8.0
+gguf==0.17.1
+opentelemetry-proto==1.26.0
+nvidia-nccl-cu12==2.21.5
+wcwidth==0.2.13
+nvidia-cusparselt-cu12==0.6.2
+scikit-image==0.25.2
+cfgv==3.4.0
+markdown-it-py==3.0.0
+packaging==25.0
+charset-normalizer==3.4.2
+executing==2.2.0
+py-spy==0.4.0
+pure_eval==0.2.3
+safetensors==0.5.3
+pyasn1_modules==0.4.2
+jsonschema==4.24.0
+spacy-legacy==3.0.12
+astor==0.8.1
+shellingham==1.5.4
+langcodes==3.5.0
+pytz==2025.2
+distro==1.9.0
+google-api-core==2.25.1
+rsa==4.9.1
+multiprocess==0.70.16
+iopath==0.1.10
+weasel==0.4.1
+tifffile==2025.5.10
+nodeenv==1.9.1
+opentelemetry-exporter-prometheus==0.56b0
+einops==0.8.1
+lm-format-enforcer==0.10.11
+pydantic_core==2.33.2
+hf-xet==1.1.5
+opentelemetry-sdk==1.26.0
+ninja==1.11.1.4
+altair==5.5.0
+ray==2.47.1
+depyf==0.18.0
+attrs==25.3.0
+tqdm==4.67.1
+xformers==0.0.29.post2
+pydeck==0.9.1
+stack-data==0.6.3
+prometheus-fastapi-instrumentator==7.1.0
+grpcio==1.73.1
+torch==2.6.0
+plotly==6.2.0
+nvidia-cudnn-cu12==9.1.0.70
+python-slugify==8.0.4
+opencensus-context==0.1.3
+importlib_metadata==8.0.0
+orjson==3.10.18
+prompt_toolkit==3.0.51
+psutil==7.0.0
+opendatasets==0.1.22
+asttokens==3.0.0
+pycountry==24.6.1
+partial-json-parser==0.2.1.1.post6
+zipp==3.23.0
+pip==25.1
+MarkupSafe==3.0.2
+opentelemetry-exporter-otlp-proto-common==1.26.0
+llvmlite==0.44.0
+nvidia-cufft-cu12==11.2.1.3
+GitPython==3.1.44
+fastrlock==0.8.3
+PyYAML==6.0.2
+opentelemetry-exporter-otlp==1.26.0
+typer==0.16.0
+cloudpathlib==0.21.1
+toml==0.10.2
+pytorch-lightning==2.5.2
+marisa-trie==1.2.1
+msgspec==0.19.0
+llguidance==0.7.30
+google-auth==2.40.3
+traitlets==5.14.3
+rpds-py==0.26.0
+cachetools==5.5.2
+spacy-loggers==1.0.5
+nvidia-cuda-runtime-cu12==12.4.127
+aiohttp==3.12.14
+torchvision==0.21.0
+av==15.0.0
+torchmetrics==1.7.4
+nvidia-cusparse-cu12==12.3.1.170
+outlines==0.1.11
+jedi==0.19.2
+thinc==8.3.6
+prometheus_client==0.22.1
+httpcore==1.0.9
+py-cpuinfo==9.0.0
+modelscope==1.28.0
+verl==0.3.2.dev0
+jaraco.functools==4.0.1
+inflect==7.3.1
+jaraco.collections==5.1.0
+packaging==24.2
+wheel==0.45.1
+tomli==2.0.1
+platformdirs==4.2.2
+typing_extensions==4.12.2
+more-itertools==10.3.0
+autocommand==2.2.2
+jaraco.text==3.12.1
+importlib_metadata==8.0.0
+jaraco.context==5.3.0
+zipp==3.19.2
+backports.tarfile==1.2.0
+typeguard==4.3.0

EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,71 @@

+{
+  "os":  "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.0",
+  "startedAt":  "2025-07-16T13:03:06.627811Z",
+  "args":  [
+    "--node-ip-address=10.1.6.45",
+    "--node-manager-port=42325",
+    "--object-store-name=/tmp/ray/session_2025-07-16_20-51-10_730275_5196/sockets/plasma_store",
+    "--raylet-name=/tmp/ray/session_2025-07-16_20-51-10_730275_5196/sockets/raylet",
+    "--redis-address=None",
+    "--metrics-agent-port=54069",
+    "--logging-rotate-bytes=536870912",
+    "--logging-rotate-backup-count=5",
+    "--runtime-env-agent-port=57480",
+    "--gcs-address=10.1.6.45:54882",
+    "--session-name=session_2025-07-16_20-51-10_730275_5196",
+    "--temp-dir=/tmp/ray",
+    "--webui=127.0.0.1:8265",
+    "--cluster-id=a69f29ea92b56cbc2f572353862768b5a0832495b7a590f4f273963a",
+    "--startup-token=28",
+    "--worker-launch-time-ms=1752670273261",
+    "--node-id=e54e37f4f5b34463471871dbe5c90937958f768732bc6e9579a13842",
+    "--runtime-env-hash=-115784934",
+    "--enable-resource-isolation=false"
+  ],
+  "program":  "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
+  "git":  {
+    "remote":  "https://github.com/PorUna-byte/PAR.git",
+    "commit":  "b8caf406aa1699c788f0ca6e44a1769452c317db"
+  },
+  "root":  "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
+  "host":  "dsw-297442-5bd684fbff-4l96r",
+  "executable":  "/root/miniconda3/envs/easyr1-new/bin/python3",
+  "cpu_count":  28,
+  "cpu_count_logical":  28,
+  "gpu":  "NVIDIA A800-SXM4-80GB",
+  "gpu_count":  4,
+  "disk":  {
+    "/":  {
+      "total":  "1623302262784",
+      "used":  "1165746176"
+    }
+  },
+  "memory":  {
+    "total":  "549755813888"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-c783413d-e4e1-22c5-7c48-9296c28b08a0"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-0ad82850-a679-fa6b-9200-a26edb1bb8a4"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-e73b7d7b-4455-62ee-ec7e-a2eb1d845e07"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-71ee45de-57b2-ac7c-13c1-08a1f197eb20"
+    }
+  ],
+  "cudaVersion":  "12.1",
+  "writerId":  "t6v0x6ljtdqkxmc6nxsvdn00ede7tanp"
+}

EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,6 @@

+{"time":"2025-07-16T21:03:07.422600635+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-16T21:03:27.915788626+08:00","level":"INFO","msg":"stream: created new stream","id":"lkflebyj"}
+{"time":"2025-07-16T21:03:27.937736115+08:00","level":"INFO","msg":"stream: started","id":"lkflebyj"}
+{"time":"2025-07-16T21:03:27.937745307+08:00","level":"INFO","msg":"handler: started","stream_id":"lkflebyj"}
+{"time":"2025-07-16T21:03:27.937759674+08:00","level":"INFO","msg":"sender: started","stream_id":"lkflebyj"}
+{"time":"2025-07-16T21:03:27.937780163+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"lkflebyj"}

EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/logs/debug.log ADDED Viewed

	@@ -0,0 +1,21 @@

+2025-07-16 21:03:07,126 INFO    MainThread:7695 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-16 21:03:07,126 INFO    MainThread:7695 [wandb_setup.py:_flush():80] Configure stats pid to 7695
+2025-07-16 21:03:07,126 INFO    MainThread:7695 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-16 21:03:07,126 INFO    MainThread:7695 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-16 21:03:07,127 INFO    MainThread:7695 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-16 21:03:07,127 INFO    MainThread:7695 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/logs/debug.log
+2025-07-16 21:03:07,127 INFO    MainThread:7695 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/logs/debug-internal.log
+2025-07-16 21:03:07,127 INFO    MainThread:7695 [wandb_init.py:init():830] calling init triggers
+2025-07-16 21:03:07,127 INFO    MainThread:7695 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 4, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-16 21:03:07,127 INFO    MainThread:7695 [wandb_init.py:init():871] starting backend
+2025-07-16 21:03:07,349 INFO    MainThread:7695 [wandb_init.py:init():874] sending inform_init request
+2025-07-16 21:03:07,374 INFO    MainThread:7695 [wandb_init.py:init():882] backend started and connected
+2025-07-16 21:03:07,388 INFO    MainThread:7695 [wandb_init.py:init():953] updated telemetry
+2025-07-16 21:03:08,265 INFO    MainThread:7695 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-16 21:03:32,572 INFO    MainThread:7695 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-16 21:03:32,900 INFO    MainThread:7695 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-16 21:03:32,901 INFO    MainThread:7695 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-16 21:03:32,907 INFO    MainThread:7695 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-16 21:03:32,912 INFO    MainThread:7695 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-16 21:03:32,949 INFO    MainThread:7695 [wandb_init.py:init():1075] run started, returning control to user process

EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/run-lkflebyj.wandb ADDED Viewed

File without changes

EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/config.yaml ADDED Viewed

	@@ -0,0 +1,322 @@

+_wandb:
+    value:
+        cli_version: 0.21.0
+        e:
+            repix6q725hnzsubljgya3pkb0pg0b9q:
+                args:
+                    - --node-ip-address=10.1.5.237
+                    - --node-manager-port=37853
+                    - --object-store-name=/tmp/ray/session_2025-07-18_15-56-28_336135_54391/sockets/plasma_store
+                    - --raylet-name=/tmp/ray/session_2025-07-18_15-56-28_336135_54391/sockets/raylet
+                    - --redis-address=None
+                    - --metrics-agent-port=43790
+                    - --logging-rotate-bytes=536870912
+                    - --logging-rotate-backup-count=5
+                    - --runtime-env-agent-port=63904
+                    - --gcs-address=10.1.5.237:56758
+                    - --session-name=session_2025-07-18_15-56-28_336135_54391
+                    - --temp-dir=/tmp/ray
+                    - --webui=127.0.0.1:8265
+                    - --cluster-id=cc22236bcaa2a9ab2bdb3c76723ef15af4933b041414da957aa668b9
+                    - --startup-token=64
+                    - --worker-launch-time-ms=1752825390762
+                    - --node-id=0967616139eea74249995a5549bf4039d244c259acd06a5a8fe0b7aa
+                    - --runtime-env-hash=-115784934
+                    - --enable-resource-isolation=false
+                cpu_count: 64
+                cpu_count_logical: 64
+                cudaVersion: "12.1"
+                disk:
+                    /:
+                        total: "1623302262784"
+                        used: "1224904704"
+                email: gia0603yucca@gmail.com
+                executable: /root/miniconda3/envs/easyr1-new/bin/python3
+                git:
+                    commit: b8caf406aa1699c788f0ca6e44a1769452c317db
+                    remote: https://github.com/PorUna-byte/PAR.git
+                gpu: NVIDIA A800-SXM4-80GB
+                gpu_count: 8
+                gpu_nvidia:
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-f7e858cd-ae03-031d-b834-86bf87923211
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-82fabc17-3326-51dd-9fb7-cbe82d26b6a0
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-1bba2921-208c-d0ad-1a05-25fc85d62630
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-9537a8bd-f536-20e2-b766-25a3c4bd5a71
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-482091e7-cb8c-dacb-7d0c-a78a3b795f0b
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-becb8d59-2ab7-b50d-5770-183c6478747a
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-3f0f2b1d-d6be-b6ad-b4ad-0ec841b91c40
+                    - architecture: Ampere
+                      name: NVIDIA A800-SXM4-80GB
+                      uuid: GPU-e53376dd-84ff-523c-a8c7-7cc7f7a18655
+                host: dsw-266702-dc4b748ff-f7c66
+                memory:
+                    total: "549755813888"
+                os: Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35
+                program: /root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py
+                python: CPython 3.10.0
+                root: /nas/shared/kilab/wangyujia/EasyR1-new/examples
+                startedAt: "2025-07-18T08:00:33.186442Z"
+                writerId: repix6q725hnzsubljgya3pkb0pg0b9q
+        m: []
+        python_version: 3.10.0
+        t:
+            "1":
+                - 1
+                - 9
+                - 11
+                - 30
+                - 33
+                - 41
+                - 49
+                - 51
+                - 63
+                - 71
+                - 95
+                - 98
+                - 103
+                - 105
+            "2":
+                - 1
+                - 9
+                - 11
+                - 30
+                - 33
+                - 41
+                - 49
+                - 51
+                - 63
+                - 71
+                - 95
+                - 98
+                - 103
+                - 105
+            "3":
+                - 2
+                - 13
+                - 16
+            "4": 3.10.0
+            "5": 0.21.0
+            "6": 4.52.4
+            "12": 0.21.0
+            "13": linux-x86_64
+algorithm:
+    value:
+        adv_estimator: grpo
+        disable_kl: false
+        filter_high: 0.99
+        filter_key: overall
+        filter_low: 0.01
+        gamma: 1
+        kl_coef: 0.01
+        kl_horizon: 10000
+        kl_penalty: low_var_kl
+        kl_target: 0.1
+        kl_type: fixed
+        lam: 1
+        online_filtering: false
+        use_kl_loss: true
+data:
+    value:
+        answer_key: answer
+        filter_overlong_prompts: true
+        filter_overlong_prompts_workers: 16
+        format_prompt: /nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja
+        image_dir: null
+        image_key: images
+        max_pixels: 4194304
+        max_prompt_length: 4096
+        max_response_length: 16384
+        min_pixels: 262144
+        mini_rollout_batch_size: null
+        override_chat_template: null
+        prompt_key: question
+        protein_key: protein
+        rollout_batch_size: 128
+        seed: 1
+        shuffle: true
+        train_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl
+        val_batch_size: 256
+        val_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl
+        video_fps: 2
+        video_key: videos
+trainer:
+    value:
+        critic_warmup: 0
+        experiment_name: qwen2.5_7b_bio_06182042
+        load_checkpoint_path: null
+        logger:
+            - console
+            - wandb
+        max_steps: null
+        max_try_make_batch: 20
+        n_gpus_per_node: 8
+        nnodes: 1
+        project_name: easy_r1
+        save_checkpoint_path: /oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042
+        save_freq: 5
+        save_limit: 3
+        save_model_only: false
+        total_epochs: 1
+        val_before_train: true
+        val_freq: 5
+        val_generations_to_log: 3
+        val_only: false
+worker:
+    value:
+        actor:
+            clip_ratio_dual: 3
+            clip_ratio_high: 0.3
+            clip_ratio_low: 0.2
+            disable_kl: false
+            fsdp:
+                enable_cpu_offload: false
+                enable_full_shard: true
+                enable_rank0_init: true
+                fsdp_size: -1
+                mp_buffer_dtype: fp32
+                mp_param_dtype: bf16
+                mp_reduce_dtype: fp32
+                torch_dtype: null
+                use_orig_params: false
+            global_batch_size: 64
+            global_batch_size_per_device: -1
+            kl_coef: 0.01
+            kl_penalty: low_var_kl
+            loss_avg_mode: token
+            max_grad_norm: 1
+            micro_batch_size_per_device_for_experience: 16
+            micro_batch_size_per_device_for_update: 2
+            model:
+                enable_gradient_checkpointing: true
+                freeze_vision_tower: false
+                model_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model
+                tokenizer_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model
+                trust_remote_code: false
+            offload:
+                offload_optimizer: true
+                offload_params: true
+            optim:
+                betas:
+                    - 0.9
+                    - 0.999
+                lr: 1e-06
+                lr_warmup_ratio: 0
+                lr_warmup_steps: null
+                min_lr_ratio: null
+                strategy: adamw
+                training_steps: 72
+                warmup_style: constant
+                weight_decay: 0.01
+            padding_free: true
+            ppo_epochs: 1
+            strategy: fsdp
+            ulysses_size: 1
+            use_kl_loss: true
+            use_torch_compile: true
+        critic:
+            cliprange_value: 0.5
+            fsdp:
+                enable_cpu_offload: false
+                enable_full_shard: true
+                enable_rank0_init: true
+                fsdp_size: -1
+                mp_buffer_dtype: fp32
+                mp_param_dtype: bf16
+                mp_reduce_dtype: fp32
+                torch_dtype: null
+                use_orig_params: false
+            global_batch_size: 256
+            global_batch_size_per_device: -1
+            loss_avg_mode: token
+            max_grad_norm: 1
+            micro_batch_size_per_device_for_experience: 16
+            micro_batch_size_per_device_for_update: 4
+            model:
+                enable_gradient_checkpointing: true
+                freeze_vision_tower: false
+                model_path: null
+                tokenizer_path: null
+                trust_remote_code: true
+            offload:
+                offload_optimizer: false
+                offload_params: false
+            optim:
+                betas:
+                    - 0.9
+                    - 0.999
+                lr: 1e-06
+                lr_warmup_ratio: 0
+                lr_warmup_steps: null
+                min_lr_ratio: null
+                strategy: adamw
+                training_steps: 72
+                warmup_style: constant
+                weight_decay: 0.01
+            padding_free: false
+            ppo_epochs: 1
+            strategy: fsdp
+            ulysses_size: 1
+        hybrid_engine: true
+        ref:
+            fsdp:
+                enable_cpu_offload: true
+                enable_full_shard: true
+                enable_rank0_init: true
+                fsdp_size: -1
+                mp_buffer_dtype: fp32
+                mp_param_dtype: bf16
+                mp_reduce_dtype: fp32
+                torch_dtype: null
+                use_orig_params: false
+            micro_batch_size_per_device_for_experience: 16
+            offload:
+                offload_optimizer: false
+                offload_params: false
+            padding_free: true
+            strategy: fsdp
+            ulysses_size: 1
+            use_torch_compile: true
+        reward:
+            num_cpus: 1
+            reward_function: /nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py
+            reward_function_name: main
+            reward_type: batch
+            skip_special_tokens: true
+        rollout:
+            disable_log_stats: true
+            disable_tqdm: false
+            dtype: bf16
+            enable_chunked_prefill: false
+            enforce_eager: false
+            gpu_memory_utilization: 0.6
+            ignore_eos: false
+            limit_images: 0
+            max_model_len: null
+            max_num_batched_tokens: 24576
+            "n": 5
+            name: vllm
+            prompt_length: 4096
+            response_length: 16384
+            seed: 1
+            temperature: 1
+            tensor_parallel_size: 1
+            top_k: -1
+            top_p: 0.99
+            trust_remote_code: false
+            val_override_config:
+                "n": 1
+                temperature: 0.5

EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/output.log ADDED Viewed

	@@ -0,0 +1,72 @@

+{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.5.237", "pid": 59301, "uuid": "79b41be0b4cb4caea00399d5e67f3adb", "closed": false}
+Start validation...
+Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::WorkerDict.actor_rollout_ref_prepare_rollout_engine()[39m (pid=61215, ip=10.1.5.237, actor_id=8dbb70fdf561d45e1bb95fbd01000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7fc005e599c0>)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func
+    return getattr(self.worker_dict[key], name)(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 544, in prepare_rollout_engine
+    self.rollout_sharding_manager.load_vllm_and_sync_weights()
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/sharding_manager/fsdp_vllm.py", line 124, in load_vllm_and_sync_weights
+    if "tags" in inspect.signature(self.inference_engine.wake_up).parameters:
+AttributeError: 'str' object has no attribute 'wake_up'
+Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::WorkerDict.actor_rollout_ref_prepare_rollout_engine()[39m (pid=61214, ip=10.1.5.237, actor_id=12428909aea9647197558b3701000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7f02884019c0>)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func
+    return getattr(self.worker_dict[key], name)(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 544, in prepare_rollout_engine
+    self.rollout_sharding_manager.load_vllm_and_sync_weights()
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/sharding_manager/fsdp_vllm.py", line 124, in load_vllm_and_sync_weights
+    if "tags" in inspect.signature(self.inference_engine.wake_up).parameters:
+AttributeError: 'str' object has no attribute 'wake_up'
+Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::WorkerDict.actor_rollout_ref_prepare_rollout_engine()[39m (pid=61213, ip=10.1.5.237, actor_id=17a3ff05d33225db9d5f3d2001000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7fc706441930>)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func
+    return getattr(self.worker_dict[key], name)(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 544, in prepare_rollout_engine
+    self.rollout_sharding_manager.load_vllm_and_sync_weights()
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/sharding_manager/fsdp_vllm.py", line 124, in load_vllm_and_sync_weights
+    if "tags" in inspect.signature(self.inference_engine.wake_up).parameters:
+AttributeError: 'str' object has no attribute 'wake_up'
+Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::WorkerDict.actor_rollout_ref_prepare_rollout_engine()[39m (pid=61212, ip=10.1.5.237, actor_id=8038d6b87c20ea82378ff46b01000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7edfcc7299c0>)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func
+    return getattr(self.worker_dict[key], name)(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 544, in prepare_rollout_engine
+    self.rollout_sharding_manager.load_vllm_and_sync_weights()
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/sharding_manager/fsdp_vllm.py", line 124, in load_vllm_and_sync_weights
+    if "tags" in inspect.signature(self.inference_engine.wake_up).parameters:
+AttributeError: 'str' object has no attribute 'wake_up'
+Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::WorkerDict.actor_rollout_ref_prepare_rollout_engine()[39m (pid=61211, ip=10.1.5.237, actor_id=1e5423d0856a1d601b82502801000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7f26f00119f0>)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func
+    return getattr(self.worker_dict[key], name)(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 544, in prepare_rollout_engine
+    self.rollout_sharding_manager.load_vllm_and_sync_weights()
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/sharding_manager/fsdp_vllm.py", line 124, in load_vllm_and_sync_weights
+    if "tags" in inspect.signature(self.inference_engine.wake_up).parameters:
+AttributeError: 'str' object has no attribute 'wake_up'
+Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::WorkerDict.actor_rollout_ref_prepare_rollout_engine()[39m (pid=61209, ip=10.1.5.237, actor_id=061e0c8de42fd2b69b89561501000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7fba20e05a20>)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func
+    return getattr(self.worker_dict[key], name)(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 544, in prepare_rollout_engine
+    self.rollout_sharding_manager.load_vllm_and_sync_weights()
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/sharding_manager/fsdp_vllm.py", line 124, in load_vllm_and_sync_weights
+    if "tags" in inspect.signature(self.inference_engine.wake_up).parameters:
+AttributeError: 'str' object has no attribute 'wake_up'
+Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::WorkerDict.actor_rollout_ref_prepare_rollout_engine()[39m (pid=60985, ip=10.1.5.237, actor_id=8073bd5c566ab2faaa122c0e01000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7f37c0e19780>)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func
+    return getattr(self.worker_dict[key], name)(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner
+    return func(*args, **kwargs)
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 544, in prepare_rollout_engine
+    self.rollout_sharding_manager.load_vllm_and_sync_weights()
+  File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/sharding_manager/fsdp_vllm.py", line 124, in load_vllm_and_sync_weights
+    if "tags" in inspect.signature(self.inference_engine.wake_up).parameters:
+AttributeError: 'str' object has no attribute 'wake_up'

EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,295 @@

+setproctitle==1.2.2
+colorama==0.4.6
+psutil==7.0.0
+attrs==25.3.0
+tqdm==4.67.1
+langcodes==3.5.0
+nvidia-cublas-cu12==12.4.5.8
+airportsdata==20250706
+absl-py==2.3.1
+hf-xet==1.1.5
+opentelemetry-exporter-otlp-proto-http==1.26.0
+interegular==0.3.3
+tifffile==2025.5.10
+nvidia-cufile-cu12==1.11.1.6
+nltk==3.9.1
+tokenizers==0.21.2
+salesforce-lavis==1.0.2
+tzdata==2025.2
+prometheus_client==0.22.1
+google-auth==2.40.3
+ipython==8.37.0
+pydantic==2.11.7
+mathruler==0.1.0
+six==1.17.0
+python-dateutil==2.9.0.post0
+requests==2.32.4
+mistral_common==1.8.0
+huggingface-hub==0.33.4
+preshed==3.0.10
+torchmetrics==1.7.4
+blinker==1.9.0
+nvidia-cusparse-cu12==12.3.1.170
+rich-toolkit==0.14.8
+pytz==2025.2
+pandas==2.3.1
+packaging==25.0
+async-timeout==5.0.1
+diskcache==5.6.3
+google-api-core==2.25.1
+parso==0.8.4
+joblib==1.5.1
+pycountry==24.6.1
+triton==3.2.0
+pybase64==1.4.1
+marisa-trie==1.2.1
+plotly==6.2.0
+wandb==0.21.0
+PyYAML==6.0.2
+regex==2024.11.6
+idna==3.10
+numba==0.61.2
+nvidia-curand-cu12==10.3.5.147
+uvicorn==0.35.0
+srsly==2.5.1
+confection==0.1.5
+opentelemetry-semantic-conventions-ai==0.4.11
+typing-inspection==0.4.1
+opencv-python-headless==4.12.0.88
+pyasn1==0.6.1
+av==15.0.0
+xgrammar==0.1.18
+distlib==0.3.9
+datasets==4.0.0
+networkx==3.4.2
+prometheus-fastapi-instrumentator==7.1.0
+lightning-utilities==0.14.3
+executing==2.2.0
+pycocoevalcap==1.2
+h11==0.16.0
+certifi==2025.7.14
+sniffio==1.3.1
+wheel==0.45.1
+transformers==4.52.4
+wrapt==1.17.2
+jsonschema-specifications==2025.4.1
+mpmath==1.3.0
+msgspec==0.19.0
+py-cpuinfo==9.0.0
+contexttimer==0.3.3
+watchdog==6.0.0
+pexpect==4.9.0
+webencodings==0.5.1
+verl==0.3.2.dev0
+webdataset==1.0.2
+httpcore==1.0.9
+opentelemetry-exporter-otlp==1.26.0
+lm-format-enforcer==0.10.11
+googleapis-common-protos==1.70.0
+pyzmq==27.0.0
+fsspec==2025.3.0
+grpcio==1.73.1
+cymem==2.0.11
+timm==0.4.12
+zipp==3.23.0
+llguidance==0.7.30
+opencensus-context==0.1.3
+omegaconf==2.3.0
+python-json-logger==3.3.0
+opentelemetry-exporter-otlp-proto-common==1.26.0
+watchfiles==1.1.0
+nvidia-nvjitlink-cu12==12.4.127
+peft==0.16.0
+sentry-sdk==2.32.0
+rpds-py==0.26.0
+email_validator==2.2.0
+nodeenv==1.9.1
+distro==1.9.0
+jiter==0.10.0
+compressed-tensors==0.9.3
+annotated-types==0.7.0
+matplotlib-inline==0.1.7
+rich==14.0.0
+GitPython==3.1.44
+lazy_loader==0.4
+fastapi-cloud-cli==0.1.4
+cupy-cuda12x==13.5.1
+prompt_toolkit==3.0.51
+gguf==0.17.1
+blis==1.3.0
+thinc==8.3.6
+cloudpickle==3.1.1
+multidict==6.6.3
+nvidia-nvtx-cu12==12.4.127
+flash-attn==2.7.1.post1
+pyasn1_modules==0.4.2
+rsa==4.9.1
+weasel==0.4.1
+uvloop==0.21.0
+click==8.2.1
+numpy==2.2.6
+torchdata==0.11.0
+pylatexenc==2.10
+cachetools==5.5.2
+Jinja2==3.1.6
+typer==0.16.0
+nvidia-cudnn-cu12==9.1.0.70
+fastapi-cli==0.0.8
+xxhash==3.5.0
+tornado==6.5.1
+scipy==1.15.3
+rouge_score==0.1.2
+cloudpathlib==0.21.1
+streamlit==1.46.1
+jedi==0.19.2
+referencing==0.36.2
+accelerate==1.8.1
+decord==0.6.0
+setuptools==78.1.1
+mdurl==0.1.2
+vllm==0.8.5.post1
+identify==2.6.12
+python-slugify==8.0.4
+dnspython==2.7.0
+dill==0.3.8
+opentelemetry-proto==1.26.0
+orjson==3.10.18
+msgpack==1.1.1
+aiohttp==3.12.14
+aiosignal==1.4.0
+typing_extensions==4.14.1
+tiktoken==0.9.0
+catalogue==2.0.10
+platformdirs==4.3.8
+narwhals==1.47.0
+antlr4-python3-runtime==4.9.3
+pydantic-extra-types==2.10.5
+nvidia-cusolver-cu12==11.6.1.9
+kaggle==1.7.4.5
+propcache==0.3.2
+urllib3==2.5.0
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+pydeck==0.9.1
+nvidia-cufft-cu12==11.2.1.3
+pyarrow==20.0.0
+nvidia-nccl-cu12==2.21.5
+httptools==0.6.4
+qwen-vl-utils==0.0.11
+markdown-it-py==3.0.0
+gitdb==4.0.12
+altair==5.5.0
+torchvision==0.21.0
+python-magic==0.4.27
+iopath==0.1.10
+ray==2.47.1
+blake3==1.0.5
+pillow==11.3.0
+python-dotenv==1.1.1
+torchaudio==2.6.0
+partial-json-parser==0.2.1.1.post6
+httpx==0.28.1
+torch==2.6.0
+anyio==4.9.0
+fairscale==0.4.4
+traitlets==5.14.3
+pure_eval==0.2.3
+sympy==1.13.1
+nvidia-cusparselt-cu12==0.6.2
+jsonschema==4.24.0
+imageio==2.37.0
+opencensus==0.11.4
+stack-data==0.6.3
+shellingham==1.5.4
+tensordict==0.9.1
+nvidia-cuda-runtime-cu12==12.4.127
+nest-asyncio==1.6.0
+einops==0.8.1
+lark==1.2.2
+tenacity==9.1.2
+virtualenv==20.31.2
+ptyprocess==0.7.0
+outlines==0.1.11
+depyf==0.18.0
+starlette==0.47.1
+cfgv==3.4.0
+pre_commit==4.2.0
+language_data==1.3.0
+pip==25.1
+Pygments==2.19.2
+nvidia-cuda-cupti-cu12==12.4.127
+protobuf==4.25.8
+safetensors==0.5.3
+text-unidecode==1.3
+wcwidth==0.2.13
+charset-normalizer==3.4.2
+aiohappyeyeballs==2.6.1
+outlines_core==0.1.26
+fastrlock==0.8.3
+asttokens==3.0.0
+psutil==7.0.0
+smmap==5.0.2
+exceptiongroup==1.3.0
+murmurhash==1.0.13
+pytorch-lightning==2.5.2
+filelock==3.18.0
+astor==0.8.1
+py-spy==0.4.0
+pydantic_core==2.33.2
+colorful==0.5.7
+fastapi==0.116.1
+opentelemetry-api==1.26.0
+openai==1.90.0
+ninja==1.11.1.4
+opentelemetry-semantic-conventions==0.47b0
+spacy-legacy==3.0.12
+opendatasets==0.1.22
+Deprecated==1.2.18
+proto-plus==1.26.1
+rignore==0.6.2
+aiohttp-cors==0.8.1
+liger_kernel==0.6.0
+opentelemetry-exporter-prometheus==0.56b0
+python-multipart==0.0.20
+multiprocess==0.70.16
+opentelemetry-sdk==1.26.0
+decorator==5.2.1
+xformers==0.0.29.post2
+spacy==3.8.7
+pyvers==0.1.0
+pycocotools==2.0.10
+websockets==15.0.1
+wasabi==1.1.3
+frozenlist==1.7.0
+codetiming==1.4.0
+sentencepiece==0.2.0
+toml==0.10.2
+scikit-image==0.25.2
+ftfy==6.3.1
+bleach==6.2.0
+yarl==1.20.1
+nvidia-cuda-nvrtc-cu12==12.4.127
+importlib_metadata==8.0.0
+spacy-loggers==1.0.5
+smart_open==7.3.0.post1
+portalocker==3.2.0
+llvmlite==0.44.0
+MarkupSafe==3.0.2
+braceexpand==0.1.7
+modelscope==1.28.0
+verl==0.3.2.dev0
+jaraco.context==5.3.0
+more-itertools==10.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+platformdirs==4.2.2
+packaging==24.2
+wheel==0.45.1
+zipp==3.19.2
+inflect==7.3.1
+autocommand==2.2.2
+typeguard==4.3.0
+jaraco.collections==5.1.0
+backports.tarfile==1.2.0
+tomli==2.0.1
+importlib_metadata==8.0.0
+typing_extensions==4.12.2

EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,92 @@

+{
+  "os":  "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.0",
+  "startedAt":  "2025-07-18T08:00:33.186442Z",
+  "args":  [
+    "--node-ip-address=10.1.5.237",
+    "--node-manager-port=37853",
+    "--object-store-name=/tmp/ray/session_2025-07-18_15-56-28_336135_54391/sockets/plasma_store",
+    "--raylet-name=/tmp/ray/session_2025-07-18_15-56-28_336135_54391/sockets/raylet",
+    "--redis-address=None",
+    "--metrics-agent-port=43790",
+    "--logging-rotate-bytes=536870912",
+    "--logging-rotate-backup-count=5",
+    "--runtime-env-agent-port=63904",
+    "--gcs-address=10.1.5.237:56758",
+    "--session-name=session_2025-07-18_15-56-28_336135_54391",
+    "--temp-dir=/tmp/ray",
+    "--webui=127.0.0.1:8265",
+    "--cluster-id=cc22236bcaa2a9ab2bdb3c76723ef15af4933b041414da957aa668b9",
+    "--startup-token=64",
+    "--worker-launch-time-ms=1752825390762",
+    "--node-id=0967616139eea74249995a5549bf4039d244c259acd06a5a8fe0b7aa",
+    "--runtime-env-hash=-115784934",
+    "--enable-resource-isolation=false"
+  ],
+  "program":  "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
+  "git":  {
+    "remote":  "https://github.com/PorUna-byte/PAR.git",
+    "commit":  "b8caf406aa1699c788f0ca6e44a1769452c317db"
+  },
+  "email":  "gia0603yucca@gmail.com",
+  "root":  "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
+  "host":  "dsw-266702-dc4b748ff-f7c66",
+  "executable":  "/root/miniconda3/envs/easyr1-new/bin/python3",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA A800-SXM4-80GB",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1623302262784",
+      "used":  "1224904704"
+    }
+  },
+  "memory":  {
+    "total":  "549755813888"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-f7e858cd-ae03-031d-b834-86bf87923211"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-82fabc17-3326-51dd-9fb7-cbe82d26b6a0"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-1bba2921-208c-d0ad-1a05-25fc85d62630"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-9537a8bd-f536-20e2-b766-25a3c4bd5a71"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-482091e7-cb8c-dacb-7d0c-a78a3b795f0b"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-becb8d59-2ab7-b50d-5770-183c6478747a"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-3f0f2b1d-d6be-b6ad-b4ad-0ec841b91c40"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-e53376dd-84ff-523c-a8c7-7cc7f7a18655"
+    }
+  ],
+  "cudaVersion":  "12.1",
+  "writerId":  "repix6q725hnzsubljgya3pkb0pg0b9q"
+}

EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"_wandb":{"runtime":1},"_runtime":1}

EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,15 @@

+{"time":"2025-07-18T16:00:33.944898175+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-18T16:01:04.056910886+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
+{"time":"2025-07-18T16:01:14.919464259+08:00","level":"INFO","msg":"stream: created new stream","id":"nji9xqxs"}
+{"time":"2025-07-18T16:01:14.926346872+08:00","level":"INFO","msg":"sender: started","stream_id":"nji9xqxs"}
+{"time":"2025-07-18T16:01:14.926359513+08:00","level":"INFO","msg":"stream: started","id":"nji9xqxs"}
+{"time":"2025-07-18T16:01:14.926369749+08:00","level":"INFO","msg":"handler: started","stream_id":"nji9xqxs"}
+{"time":"2025-07-18T16:01:14.926391685+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"nji9xqxs"}
+{"time":"2025-07-18T16:01:44.221082826+08:00","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"uploading data","runtime_seconds":2.593669605},{"desc":"updating run metadata","runtime_seconds":2.593493161},{"desc":"uploading wandb-metadata.json","runtime_seconds":1.024626407}],"total_operations":3}}
+{"time":"2025-07-18T16:01:58.697029208+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/nji9xqxs/file_stream\": unexpected EOF"}
+{"time":"2025-07-18T16:02:17.601004486+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-07-18T16:02:38.850804129+08:00","level":"INFO","msg":"stream: closing","id":"nji9xqxs"}
+{"time":"2025-07-18T16:02:38.850824284+08:00","level":"INFO","msg":"handler: closed","stream_id":"nji9xqxs"}
+{"time":"2025-07-18T16:02:38.850832353+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"nji9xqxs"}
+{"time":"2025-07-18T16:02:38.850837848+08:00","level":"INFO","msg":"sender: closed","stream_id":"nji9xqxs"}
+{"time":"2025-07-18T16:02:38.858004163+08:00","level":"INFO","msg":"stream: closed","id":"nji9xqxs"}

EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/logs/debug.log ADDED Viewed

	@@ -0,0 +1,28 @@

+2025-07-18 16:00:33,700 INFO    MainThread:59301 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-18 16:00:33,700 INFO    MainThread:59301 [wandb_setup.py:_flush():80] Configure stats pid to 59301
+2025-07-18 16:00:33,700 INFO    MainThread:59301 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-18 16:00:33,701 INFO    MainThread:59301 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-18 16:00:33,701 INFO    MainThread:59301 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-18 16:00:33,701 INFO    MainThread:59301 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/logs/debug.log
+2025-07-18 16:00:33,701 INFO    MainThread:59301 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/logs/debug-internal.log
+2025-07-18 16:00:33,701 INFO    MainThread:59301 [wandb_init.py:init():830] calling init triggers
+2025-07-18 16:00:33,701 INFO    MainThread:59301 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-18 16:00:33,701 INFO    MainThread:59301 [wandb_init.py:init():871] starting backend
+2025-07-18 16:00:33,911 INFO    MainThread:59301 [wandb_init.py:init():874] sending inform_init request
+2025-07-18 16:00:33,914 INFO    MainThread:59301 [wandb_init.py:init():882] backend started and connected
+2025-07-18 16:00:33,934 INFO    MainThread:59301 [wandb_init.py:init():953] updated telemetry
+2025-07-18 16:00:34,824 INFO    MainThread:59301 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-18 16:01:41,621 INFO    MainThread:59301 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-18 16:01:41,877 INFO    MainThread:59301 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-18 16:01:41,877 INFO    MainThread:59301 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-18 16:01:41,899 INFO    MainThread:59301 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-18 16:01:41,899 INFO    MainThread:59301 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-18 16:01:41,927 INFO    MainThread:59301 [wandb_init.py:init():1075] run started, returning control to user process
+2025-07-18 16:01:43,199 INFO    MainThread:59301 [wandb_run.py:_finish():2224] finishing run gia0603yucca/easy_r1/nji9xqxs
+2025-07-18 16:01:43,199 INFO    MainThread:59301 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0
+2025-07-18 16:01:43,207 INFO    MainThread:59301 [wandb_run.py:_restore():2405] restore
+2025-07-18 16:01:43,211 INFO    MainThread:59301 [wandb_run.py:_restore():2411] restore done
+2025-07-18 16:02:38,840 INFO    MainThread:59301 [wandb_run.py:_footer_history_summary_info():3903] rendering history
+2025-07-18 16:02:38,841 INFO    MainThread:59301 [wandb_run.py:_footer_history_summary_info():3935] rendering summary
+2025-07-18 16:02:38,841 INFO    MainThread:59301 [wandb_run.py:_footer_sync_info():3864] logging synced files

EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/run-nji9xqxs.wandb ADDED Viewed

Binary file (15.9 kB). View file

EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/files/output.log ADDED Viewed

File without changes

EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,295 @@

+setproctitle==1.2.2
+colorama==0.4.6
+psutil==7.0.0
+attrs==25.3.0
+tqdm==4.67.1
+langcodes==3.5.0
+nvidia-cublas-cu12==12.4.5.8
+airportsdata==20250706
+absl-py==2.3.1
+hf-xet==1.1.5
+opentelemetry-exporter-otlp-proto-http==1.26.0
+interegular==0.3.3
+tifffile==2025.5.10
+nvidia-cufile-cu12==1.11.1.6
+nltk==3.9.1
+tokenizers==0.21.2
+salesforce-lavis==1.0.2
+tzdata==2025.2
+prometheus_client==0.22.1
+google-auth==2.40.3
+ipython==8.37.0
+pydantic==2.11.7
+mathruler==0.1.0
+six==1.17.0
+python-dateutil==2.9.0.post0
+requests==2.32.4
+mistral_common==1.8.0
+huggingface-hub==0.33.4
+preshed==3.0.10
+torchmetrics==1.7.4
+blinker==1.9.0
+nvidia-cusparse-cu12==12.3.1.170
+rich-toolkit==0.14.8
+pytz==2025.2
+pandas==2.3.1
+packaging==25.0
+async-timeout==5.0.1
+diskcache==5.6.3
+google-api-core==2.25.1
+parso==0.8.4
+joblib==1.5.1
+pycountry==24.6.1
+triton==3.2.0
+pybase64==1.4.1
+marisa-trie==1.2.1
+plotly==6.2.0
+wandb==0.21.0
+PyYAML==6.0.2
+regex==2024.11.6
+idna==3.10
+numba==0.61.2
+nvidia-curand-cu12==10.3.5.147
+uvicorn==0.35.0
+srsly==2.5.1
+confection==0.1.5
+opentelemetry-semantic-conventions-ai==0.4.11
+typing-inspection==0.4.1
+opencv-python-headless==4.12.0.88
+pyasn1==0.6.1
+av==15.0.0
+xgrammar==0.1.18
+distlib==0.3.9
+datasets==4.0.0
+networkx==3.4.2
+prometheus-fastapi-instrumentator==7.1.0
+lightning-utilities==0.14.3
+executing==2.2.0
+pycocoevalcap==1.2
+h11==0.16.0
+certifi==2025.7.14
+sniffio==1.3.1
+wheel==0.45.1
+transformers==4.52.4
+wrapt==1.17.2
+jsonschema-specifications==2025.4.1
+mpmath==1.3.0
+msgspec==0.19.0
+py-cpuinfo==9.0.0
+contexttimer==0.3.3
+watchdog==6.0.0
+pexpect==4.9.0
+webencodings==0.5.1
+verl==0.3.2.dev0
+webdataset==1.0.2
+httpcore==1.0.9
+opentelemetry-exporter-otlp==1.26.0
+lm-format-enforcer==0.10.11
+googleapis-common-protos==1.70.0
+pyzmq==27.0.0
+fsspec==2025.3.0
+grpcio==1.73.1
+cymem==2.0.11
+timm==0.4.12
+zipp==3.23.0
+llguidance==0.7.30
+opencensus-context==0.1.3
+omegaconf==2.3.0
+python-json-logger==3.3.0
+opentelemetry-exporter-otlp-proto-common==1.26.0
+watchfiles==1.1.0
+nvidia-nvjitlink-cu12==12.4.127
+peft==0.16.0
+sentry-sdk==2.32.0
+rpds-py==0.26.0
+email_validator==2.2.0
+nodeenv==1.9.1
+distro==1.9.0
+jiter==0.10.0
+compressed-tensors==0.9.3
+annotated-types==0.7.0
+matplotlib-inline==0.1.7
+rich==14.0.0
+GitPython==3.1.44
+lazy_loader==0.4
+fastapi-cloud-cli==0.1.4
+cupy-cuda12x==13.5.1
+prompt_toolkit==3.0.51
+gguf==0.17.1
+blis==1.3.0
+thinc==8.3.6
+cloudpickle==3.1.1
+multidict==6.6.3
+nvidia-nvtx-cu12==12.4.127
+flash-attn==2.7.1.post1
+pyasn1_modules==0.4.2
+rsa==4.9.1
+weasel==0.4.1
+uvloop==0.21.0
+click==8.2.1
+numpy==2.2.6
+torchdata==0.11.0
+pylatexenc==2.10
+cachetools==5.5.2
+Jinja2==3.1.6
+typer==0.16.0
+nvidia-cudnn-cu12==9.1.0.70
+fastapi-cli==0.0.8
+xxhash==3.5.0
+tornado==6.5.1
+scipy==1.15.3
+rouge_score==0.1.2
+cloudpathlib==0.21.1
+streamlit==1.46.1
+jedi==0.19.2
+referencing==0.36.2
+accelerate==1.8.1
+decord==0.6.0
+setuptools==78.1.1
+mdurl==0.1.2
+vllm==0.8.5.post1
+identify==2.6.12
+python-slugify==8.0.4
+dnspython==2.7.0
+dill==0.3.8
+opentelemetry-proto==1.26.0
+orjson==3.10.18
+msgpack==1.1.1
+aiohttp==3.12.14
+aiosignal==1.4.0
+typing_extensions==4.14.1
+tiktoken==0.9.0
+catalogue==2.0.10
+platformdirs==4.3.8
+narwhals==1.47.0
+antlr4-python3-runtime==4.9.3
+pydantic-extra-types==2.10.5
+nvidia-cusolver-cu12==11.6.1.9
+kaggle==1.7.4.5
+propcache==0.3.2
+urllib3==2.5.0
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+pydeck==0.9.1
+nvidia-cufft-cu12==11.2.1.3
+pyarrow==20.0.0
+nvidia-nccl-cu12==2.21.5
+httptools==0.6.4
+qwen-vl-utils==0.0.11
+markdown-it-py==3.0.0
+gitdb==4.0.12
+altair==5.5.0
+torchvision==0.21.0
+python-magic==0.4.27
+iopath==0.1.10
+ray==2.47.1
+blake3==1.0.5
+pillow==11.3.0
+python-dotenv==1.1.1
+torchaudio==2.6.0
+partial-json-parser==0.2.1.1.post6
+httpx==0.28.1
+torch==2.6.0
+anyio==4.9.0
+fairscale==0.4.4
+traitlets==5.14.3
+pure_eval==0.2.3
+sympy==1.13.1
+nvidia-cusparselt-cu12==0.6.2
+jsonschema==4.24.0
+imageio==2.37.0
+opencensus==0.11.4
+stack-data==0.6.3
+shellingham==1.5.4
+tensordict==0.9.1
+nvidia-cuda-runtime-cu12==12.4.127
+nest-asyncio==1.6.0
+einops==0.8.1
+lark==1.2.2
+tenacity==9.1.2
+virtualenv==20.31.2
+ptyprocess==0.7.0
+outlines==0.1.11
+depyf==0.18.0
+starlette==0.47.1
+cfgv==3.4.0
+pre_commit==4.2.0
+language_data==1.3.0
+pip==25.1
+Pygments==2.19.2
+nvidia-cuda-cupti-cu12==12.4.127
+protobuf==4.25.8
+safetensors==0.5.3
+text-unidecode==1.3
+wcwidth==0.2.13
+charset-normalizer==3.4.2
+aiohappyeyeballs==2.6.1
+outlines_core==0.1.26
+fastrlock==0.8.3
+asttokens==3.0.0
+psutil==7.0.0
+smmap==5.0.2
+exceptiongroup==1.3.0
+murmurhash==1.0.13
+pytorch-lightning==2.5.2
+filelock==3.18.0
+astor==0.8.1
+py-spy==0.4.0
+pydantic_core==2.33.2
+colorful==0.5.7
+fastapi==0.116.1
+opentelemetry-api==1.26.0
+openai==1.90.0
+ninja==1.11.1.4
+opentelemetry-semantic-conventions==0.47b0
+spacy-legacy==3.0.12
+opendatasets==0.1.22
+Deprecated==1.2.18
+proto-plus==1.26.1
+rignore==0.6.2
+aiohttp-cors==0.8.1
+liger_kernel==0.6.0
+opentelemetry-exporter-prometheus==0.56b0
+python-multipart==0.0.20
+multiprocess==0.70.16
+opentelemetry-sdk==1.26.0
+decorator==5.2.1
+xformers==0.0.29.post2
+spacy==3.8.7
+pyvers==0.1.0
+pycocotools==2.0.10
+websockets==15.0.1
+wasabi==1.1.3
+frozenlist==1.7.0
+codetiming==1.4.0
+sentencepiece==0.2.0
+toml==0.10.2
+scikit-image==0.25.2
+ftfy==6.3.1
+bleach==6.2.0
+yarl==1.20.1
+nvidia-cuda-nvrtc-cu12==12.4.127
+importlib_metadata==8.0.0
+spacy-loggers==1.0.5
+smart_open==7.3.0.post1
+portalocker==3.2.0
+llvmlite==0.44.0
+MarkupSafe==3.0.2
+braceexpand==0.1.7
+modelscope==1.28.0
+verl==0.3.2.dev0
+jaraco.context==5.3.0
+more-itertools==10.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+platformdirs==4.2.2
+packaging==24.2
+wheel==0.45.1
+zipp==3.19.2
+inflect==7.3.1
+autocommand==2.2.2
+typeguard==4.3.0
+jaraco.collections==5.1.0
+backports.tarfile==1.2.0
+tomli==2.0.1
+importlib_metadata==8.0.0
+typing_extensions==4.12.2

EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "os":  "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.0",
+  "startedAt":  "2025-07-18T10:01:27.794840Z",
+  "args":  [
+    "--node-ip-address=10.1.5.237",
+    "--node-manager-port=34033",
+    "--object-store-name=/tmp/ray/session_2025-07-18_17-59-46_929054_90432/sockets/plasma_store",
+    "--raylet-name=/tmp/ray/session_2025-07-18_17-59-46_929054_90432/sockets/raylet",
+    "--redis-address=None",
+    "--metrics-agent-port=52220",
+    "--logging-rotate-bytes=536870912",
+    "--logging-rotate-backup-count=5",
+    "--runtime-env-agent-port=58307",
+    "--gcs-address=10.1.5.237:63437",
+    "--session-name=session_2025-07-18_17-59-46_929054_90432",
+    "--temp-dir=/tmp/ray",
+    "--webui=127.0.0.1:8265",
+    "--cluster-id=2320bfb132f181fae6a438fbb8ba4302101825636e86b29ea49d2a26",
+    "--startup-token=64",
+    "--worker-launch-time-ms=1752832790343",
+    "--node-id=d351a5bfa85748ebf678bc24e7adda6ad59e09972b13108dbb01547f",
+    "--runtime-env-hash=-115784934",
+    "--enable-resource-isolation=false"
+  ],
+  "program":  "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
+  "git":  {
+    "remote":  "https://github.com/PorUna-byte/PAR.git",
+    "commit":  "b8caf406aa1699c788f0ca6e44a1769452c317db"
+  },
+  "email":  "gia0603yucca@gmail.com",
+  "root":  "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
+  "host":  "dsw-266702-dc4b748ff-f7c66",
+  "executable":  "/root/miniconda3/envs/easyr1-new/bin/python3",
+  "writerId":  "qpm36h9mjv3m2bmimjfqh0pw0u9a4282"
+}

EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,6 @@

+{"time":"2025-07-18T18:01:28.970283308+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-18T18:01:36.389685825+08:00","level":"INFO","msg":"stream: created new stream","id":"wmarwr6l"}
+{"time":"2025-07-18T18:01:36.413332423+08:00","level":"INFO","msg":"handler: started","stream_id":"wmarwr6l"}
+{"time":"2025-07-18T18:01:36.413371741+08:00","level":"INFO","msg":"stream: started","id":"wmarwr6l"}
+{"time":"2025-07-18T18:01:36.413392401+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"wmarwr6l"}
+{"time":"2025-07-18T18:01:36.413389742+08:00","level":"INFO","msg":"sender: started","stream_id":"wmarwr6l"}

EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/logs/debug.log ADDED Viewed

	@@ -0,0 +1,21 @@

+2025-07-18 18:01:28,690 INFO    MainThread:95226 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-18 18:01:28,690 INFO    MainThread:95226 [wandb_setup.py:_flush():80] Configure stats pid to 95226
+2025-07-18 18:01:28,690 INFO    MainThread:95226 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-18 18:01:28,690 INFO    MainThread:95226 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-18 18:01:28,690 INFO    MainThread:95226 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-18 18:01:28,690 INFO    MainThread:95226 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/logs/debug.log
+2025-07-18 18:01:28,690 INFO    MainThread:95226 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/logs/debug-internal.log
+2025-07-18 18:01:28,690 INFO    MainThread:95226 [wandb_init.py:init():830] calling init triggers
+2025-07-18 18:01:28,690 INFO    MainThread:95226 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-18 18:01:28,690 INFO    MainThread:95226 [wandb_init.py:init():871] starting backend
+2025-07-18 18:01:28,902 INFO    MainThread:95226 [wandb_init.py:init():874] sending inform_init request
+2025-07-18 18:01:28,904 INFO    MainThread:95226 [wandb_init.py:init():882] backend started and connected
+2025-07-18 18:01:28,909 INFO    MainThread:95226 [wandb_init.py:init():953] updated telemetry
+2025-07-18 18:01:29,464 INFO    MainThread:95226 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-18 18:01:40,777 INFO    MainThread:95226 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-18 18:01:41,069 INFO    MainThread:95226 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-18 18:01:41,069 INFO    MainThread:95226 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-18 18:01:41,079 INFO    MainThread:95226 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-18 18:01:41,079 INFO    MainThread:95226 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-18 18:01:41,125 INFO    MainThread:95226 [wandb_init.py:init():1075] run started, returning control to user process

EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/run-wmarwr6l.wandb ADDED Viewed

File without changes

EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/files/output.log ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ {"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.5.237", "pid": 104882, "uuid": "0f066e81b2fc4d09a338174f40c2e400", "closed": false}
2	+ Start validation...

EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,295 @@

+setproctitle==1.2.2
+colorama==0.4.6
+psutil==7.0.0
+attrs==25.3.0
+tqdm==4.67.1
+langcodes==3.5.0
+nvidia-cublas-cu12==12.4.5.8
+airportsdata==20250706
+absl-py==2.3.1
+hf-xet==1.1.5
+opentelemetry-exporter-otlp-proto-http==1.26.0
+interegular==0.3.3
+tifffile==2025.5.10
+nvidia-cufile-cu12==1.11.1.6
+nltk==3.9.1
+tokenizers==0.21.2
+salesforce-lavis==1.0.2
+tzdata==2025.2
+prometheus_client==0.22.1
+google-auth==2.40.3
+ipython==8.37.0
+pydantic==2.11.7
+mathruler==0.1.0
+six==1.17.0
+python-dateutil==2.9.0.post0
+requests==2.32.4
+mistral_common==1.8.0
+huggingface-hub==0.33.4
+preshed==3.0.10
+torchmetrics==1.7.4
+blinker==1.9.0
+nvidia-cusparse-cu12==12.3.1.170
+rich-toolkit==0.14.8
+pytz==2025.2
+pandas==2.3.1
+packaging==25.0
+async-timeout==5.0.1
+diskcache==5.6.3
+google-api-core==2.25.1
+parso==0.8.4
+joblib==1.5.1
+pycountry==24.6.1
+triton==3.2.0
+pybase64==1.4.1
+marisa-trie==1.2.1
+plotly==6.2.0
+wandb==0.21.0
+PyYAML==6.0.2
+regex==2024.11.6
+idna==3.10
+numba==0.61.2
+nvidia-curand-cu12==10.3.5.147
+uvicorn==0.35.0
+srsly==2.5.1
+confection==0.1.5
+opentelemetry-semantic-conventions-ai==0.4.11
+typing-inspection==0.4.1
+opencv-python-headless==4.12.0.88
+pyasn1==0.6.1
+av==15.0.0
+xgrammar==0.1.18
+distlib==0.3.9
+datasets==4.0.0
+networkx==3.4.2
+prometheus-fastapi-instrumentator==7.1.0
+lightning-utilities==0.14.3
+executing==2.2.0
+pycocoevalcap==1.2
+h11==0.16.0
+certifi==2025.7.14
+sniffio==1.3.1
+wheel==0.45.1
+transformers==4.52.4
+wrapt==1.17.2
+jsonschema-specifications==2025.4.1
+mpmath==1.3.0
+msgspec==0.19.0
+py-cpuinfo==9.0.0
+contexttimer==0.3.3
+watchdog==6.0.0
+pexpect==4.9.0
+webencodings==0.5.1
+verl==0.3.2.dev0
+webdataset==1.0.2
+httpcore==1.0.9
+opentelemetry-exporter-otlp==1.26.0
+lm-format-enforcer==0.10.11
+googleapis-common-protos==1.70.0
+pyzmq==27.0.0
+fsspec==2025.3.0
+grpcio==1.73.1
+cymem==2.0.11
+timm==0.4.12
+zipp==3.23.0
+llguidance==0.7.30
+opencensus-context==0.1.3
+omegaconf==2.3.0
+python-json-logger==3.3.0
+opentelemetry-exporter-otlp-proto-common==1.26.0
+watchfiles==1.1.0
+nvidia-nvjitlink-cu12==12.4.127
+peft==0.16.0
+sentry-sdk==2.32.0
+rpds-py==0.26.0
+email_validator==2.2.0
+nodeenv==1.9.1
+distro==1.9.0
+jiter==0.10.0
+compressed-tensors==0.9.3
+annotated-types==0.7.0
+matplotlib-inline==0.1.7
+rich==14.0.0
+GitPython==3.1.44
+lazy_loader==0.4
+fastapi-cloud-cli==0.1.4
+cupy-cuda12x==13.5.1
+prompt_toolkit==3.0.51
+gguf==0.17.1
+blis==1.3.0
+thinc==8.3.6
+cloudpickle==3.1.1
+multidict==6.6.3
+nvidia-nvtx-cu12==12.4.127
+flash-attn==2.7.1.post1
+pyasn1_modules==0.4.2
+rsa==4.9.1
+weasel==0.4.1
+uvloop==0.21.0
+click==8.2.1
+numpy==2.2.6
+torchdata==0.11.0
+pylatexenc==2.10
+cachetools==5.5.2
+Jinja2==3.1.6
+typer==0.16.0
+nvidia-cudnn-cu12==9.1.0.70
+fastapi-cli==0.0.8
+xxhash==3.5.0
+tornado==6.5.1
+scipy==1.15.3
+rouge_score==0.1.2
+cloudpathlib==0.21.1
+streamlit==1.46.1
+jedi==0.19.2
+referencing==0.36.2
+accelerate==1.8.1
+decord==0.6.0
+setuptools==78.1.1
+mdurl==0.1.2
+vllm==0.8.5.post1
+identify==2.6.12
+python-slugify==8.0.4
+dnspython==2.7.0
+dill==0.3.8
+opentelemetry-proto==1.26.0
+orjson==3.10.18
+msgpack==1.1.1
+aiohttp==3.12.14
+aiosignal==1.4.0
+typing_extensions==4.14.1
+tiktoken==0.9.0
+catalogue==2.0.10
+platformdirs==4.3.8
+narwhals==1.47.0
+antlr4-python3-runtime==4.9.3
+pydantic-extra-types==2.10.5
+nvidia-cusolver-cu12==11.6.1.9
+kaggle==1.7.4.5
+propcache==0.3.2
+urllib3==2.5.0
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+pydeck==0.9.1
+nvidia-cufft-cu12==11.2.1.3
+pyarrow==20.0.0
+nvidia-nccl-cu12==2.21.5
+httptools==0.6.4
+qwen-vl-utils==0.0.11
+markdown-it-py==3.0.0
+gitdb==4.0.12
+altair==5.5.0
+torchvision==0.21.0
+python-magic==0.4.27
+iopath==0.1.10
+ray==2.47.1
+blake3==1.0.5
+pillow==11.3.0
+python-dotenv==1.1.1
+torchaudio==2.6.0
+partial-json-parser==0.2.1.1.post6
+httpx==0.28.1
+torch==2.6.0
+anyio==4.9.0
+fairscale==0.4.4
+traitlets==5.14.3
+pure_eval==0.2.3
+sympy==1.13.1
+nvidia-cusparselt-cu12==0.6.2
+jsonschema==4.24.0
+imageio==2.37.0
+opencensus==0.11.4
+stack-data==0.6.3
+shellingham==1.5.4
+tensordict==0.9.1
+nvidia-cuda-runtime-cu12==12.4.127
+nest-asyncio==1.6.0
+einops==0.8.1
+lark==1.2.2
+tenacity==9.1.2
+virtualenv==20.31.2
+ptyprocess==0.7.0
+outlines==0.1.11
+depyf==0.18.0
+starlette==0.47.1
+cfgv==3.4.0
+pre_commit==4.2.0
+language_data==1.3.0
+pip==25.1
+Pygments==2.19.2
+nvidia-cuda-cupti-cu12==12.4.127
+protobuf==4.25.8
+safetensors==0.5.3
+text-unidecode==1.3
+wcwidth==0.2.13
+charset-normalizer==3.4.2
+aiohappyeyeballs==2.6.1
+outlines_core==0.1.26
+fastrlock==0.8.3
+asttokens==3.0.0
+psutil==7.0.0
+smmap==5.0.2
+exceptiongroup==1.3.0
+murmurhash==1.0.13
+pytorch-lightning==2.5.2
+filelock==3.18.0
+astor==0.8.1
+py-spy==0.4.0
+pydantic_core==2.33.2
+colorful==0.5.7
+fastapi==0.116.1
+opentelemetry-api==1.26.0
+openai==1.90.0
+ninja==1.11.1.4
+opentelemetry-semantic-conventions==0.47b0
+spacy-legacy==3.0.12
+opendatasets==0.1.22
+Deprecated==1.2.18
+proto-plus==1.26.1
+rignore==0.6.2
+aiohttp-cors==0.8.1
+liger_kernel==0.6.0
+opentelemetry-exporter-prometheus==0.56b0
+python-multipart==0.0.20
+multiprocess==0.70.16
+opentelemetry-sdk==1.26.0
+decorator==5.2.1
+xformers==0.0.29.post2
+spacy==3.8.7
+pyvers==0.1.0
+pycocotools==2.0.10
+websockets==15.0.1
+wasabi==1.1.3
+frozenlist==1.7.0
+codetiming==1.4.0
+sentencepiece==0.2.0
+toml==0.10.2
+scikit-image==0.25.2
+ftfy==6.3.1
+bleach==6.2.0
+yarl==1.20.1
+nvidia-cuda-nvrtc-cu12==12.4.127
+importlib_metadata==8.0.0
+spacy-loggers==1.0.5
+smart_open==7.3.0.post1
+portalocker==3.2.0
+llvmlite==0.44.0
+MarkupSafe==3.0.2
+braceexpand==0.1.7
+modelscope==1.28.0
+verl==0.3.2.dev0
+jaraco.context==5.3.0
+more-itertools==10.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+platformdirs==4.2.2
+packaging==24.2
+wheel==0.45.1
+zipp==3.19.2
+inflect==7.3.1
+autocommand==2.2.2
+typeguard==4.3.0
+jaraco.collections==5.1.0
+backports.tarfile==1.2.0
+tomli==2.0.1
+importlib_metadata==8.0.0
+typing_extensions==4.12.2

EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,92 @@

+{
+  "os":  "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.0",
+  "startedAt":  "2025-07-18T10:10:22.154415Z",
+  "args":  [
+    "--node-ip-address=10.1.5.237",
+    "--node-manager-port=45779",
+    "--object-store-name=/tmp/ray/session_2025-07-18_18-08-41_995857_100101/sockets/plasma_store",
+    "--raylet-name=/tmp/ray/session_2025-07-18_18-08-41_995857_100101/sockets/raylet",
+    "--redis-address=None",
+    "--metrics-agent-port=60724",
+    "--logging-rotate-bytes=536870912",
+    "--logging-rotate-backup-count=5",
+    "--runtime-env-agent-port=59748",
+    "--gcs-address=10.1.5.237:65420",
+    "--session-name=session_2025-07-18_18-08-41_995857_100101",
+    "--temp-dir=/tmp/ray",
+    "--webui=127.0.0.1:8265",
+    "--cluster-id=d5af14f82d6884b3972b319ba9c8871ee6d621d0b395536182e77073",
+    "--startup-token=64",
+    "--worker-launch-time-ms=1752833324419",
+    "--node-id=affe3b277e8d66adb6a1a72266e2e5ce24fa5e48471c99f30a7a9bdf",
+    "--runtime-env-hash=-115784934",
+    "--enable-resource-isolation=false"
+  ],
+  "program":  "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
+  "git":  {
+    "remote":  "https://github.com/PorUna-byte/PAR.git",
+    "commit":  "b8caf406aa1699c788f0ca6e44a1769452c317db"
+  },
+  "email":  "gia0603yucca@gmail.com",
+  "root":  "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
+  "host":  "dsw-266702-dc4b748ff-f7c66",
+  "executable":  "/root/miniconda3/envs/easyr1-new/bin/python3",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA A800-SXM4-80GB",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1623302262784",
+      "used":  "1225166848"
+    }
+  },
+  "memory":  {
+    "total":  "549755813888"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-f7e858cd-ae03-031d-b834-86bf87923211"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-82fabc17-3326-51dd-9fb7-cbe82d26b6a0"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-1bba2921-208c-d0ad-1a05-25fc85d62630"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-9537a8bd-f536-20e2-b766-25a3c4bd5a71"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-482091e7-cb8c-dacb-7d0c-a78a3b795f0b"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-becb8d59-2ab7-b50d-5770-183c6478747a"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-3f0f2b1d-d6be-b6ad-b4ad-0ec841b91c40"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-e53376dd-84ff-523c-a8c7-7cc7f7a18655"
+    }
+  ],
+  "cudaVersion":  "12.1",
+  "writerId":  "71sc2v9oxtkr7yiqxoaago0bipl2xjby"
+}

EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,6 @@

+{"time":"2025-07-18T18:10:23.417471358+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-18T18:10:25.159462304+08:00","level":"INFO","msg":"stream: created new stream","id":"zkytrm61"}
+{"time":"2025-07-18T18:10:25.159507377+08:00","level":"INFO","msg":"stream: started","id":"zkytrm61"}
+{"time":"2025-07-18T18:10:25.159528642+08:00","level":"INFO","msg":"handler: started","stream_id":"zkytrm61"}
+{"time":"2025-07-18T18:10:25.15958268+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"zkytrm61"}
+{"time":"2025-07-18T18:10:25.159587635+08:00","level":"INFO","msg":"sender: started","stream_id":"zkytrm61"}

EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/logs/debug.log ADDED Viewed

	@@ -0,0 +1,21 @@

+2025-07-18 18:10:23,122 INFO    MainThread:104882 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-18 18:10:23,123 INFO    MainThread:104882 [wandb_setup.py:_flush():80] Configure stats pid to 104882
+2025-07-18 18:10:23,123 INFO    MainThread:104882 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-18 18:10:23,123 INFO    MainThread:104882 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-18 18:10:23,123 INFO    MainThread:104882 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-18 18:10:23,123 INFO    MainThread:104882 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/logs/debug.log
+2025-07-18 18:10:23,123 INFO    MainThread:104882 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/logs/debug-internal.log
+2025-07-18 18:10:23,123 INFO    MainThread:104882 [wandb_init.py:init():830] calling init triggers
+2025-07-18 18:10:23,123 INFO    MainThread:104882 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-18 18:10:23,123 INFO    MainThread:104882 [wandb_init.py:init():871] starting backend
+2025-07-18 18:10:23,360 INFO    MainThread:104882 [wandb_init.py:init():874] sending inform_init request
+2025-07-18 18:10:23,362 INFO    MainThread:104882 [wandb_init.py:init():882] backend started and connected
+2025-07-18 18:10:23,390 INFO    MainThread:104882 [wandb_init.py:init():953] updated telemetry
+2025-07-18 18:10:23,939 INFO    MainThread:104882 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-18 18:10:26,092 INFO    MainThread:104882 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-18 18:10:26,299 INFO    MainThread:104882 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-18 18:10:26,299 INFO    MainThread:104882 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-18 18:10:26,308 INFO    MainThread:104882 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-18 18:10:26,319 INFO    MainThread:104882 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-18 18:10:26,355 INFO    MainThread:104882 [wandb_init.py:init():1075] run started, returning control to user process

EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/run-zkytrm61.wandb ADDED Viewed

File without changes

EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/files/output.log ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ {"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.5.237", "pid": 160623, "uuid": "34b2c74ee0024065b840369ef674694c", "closed": false}
2	+ Start validation...

EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,295 @@

+setproctitle==1.2.2
+colorama==0.4.6
+psutil==7.0.0
+attrs==25.3.0
+tqdm==4.67.1
+langcodes==3.5.0
+nvidia-cublas-cu12==12.4.5.8
+airportsdata==20250706
+absl-py==2.3.1
+hf-xet==1.1.5
+opentelemetry-exporter-otlp-proto-http==1.26.0
+interegular==0.3.3
+tifffile==2025.5.10
+nvidia-cufile-cu12==1.11.1.6
+nltk==3.9.1
+tokenizers==0.21.2
+salesforce-lavis==1.0.2
+tzdata==2025.2
+prometheus_client==0.22.1
+google-auth==2.40.3
+ipython==8.37.0
+pydantic==2.11.7
+mathruler==0.1.0
+six==1.17.0
+python-dateutil==2.9.0.post0
+requests==2.32.4
+mistral_common==1.8.0
+huggingface-hub==0.33.4
+preshed==3.0.10
+torchmetrics==1.7.4
+blinker==1.9.0
+nvidia-cusparse-cu12==12.3.1.170
+rich-toolkit==0.14.8
+pytz==2025.2
+pandas==2.3.1
+packaging==25.0
+async-timeout==5.0.1
+diskcache==5.6.3
+google-api-core==2.25.1
+parso==0.8.4
+joblib==1.5.1
+pycountry==24.6.1
+triton==3.2.0
+pybase64==1.4.1
+marisa-trie==1.2.1
+plotly==6.2.0
+wandb==0.21.0
+PyYAML==6.0.2
+regex==2024.11.6
+idna==3.10
+numba==0.61.2
+nvidia-curand-cu12==10.3.5.147
+uvicorn==0.35.0
+srsly==2.5.1
+confection==0.1.5
+opentelemetry-semantic-conventions-ai==0.4.11
+typing-inspection==0.4.1
+opencv-python-headless==4.12.0.88
+pyasn1==0.6.1
+av==15.0.0
+xgrammar==0.1.18
+distlib==0.3.9
+datasets==4.0.0
+networkx==3.4.2
+prometheus-fastapi-instrumentator==7.1.0
+lightning-utilities==0.14.3
+executing==2.2.0
+pycocoevalcap==1.2
+h11==0.16.0
+certifi==2025.7.14
+sniffio==1.3.1
+wheel==0.45.1
+transformers==4.52.4
+wrapt==1.17.2
+jsonschema-specifications==2025.4.1
+mpmath==1.3.0
+msgspec==0.19.0
+py-cpuinfo==9.0.0
+contexttimer==0.3.3
+watchdog==6.0.0
+pexpect==4.9.0
+webencodings==0.5.1
+verl==0.3.2.dev0
+webdataset==1.0.2
+httpcore==1.0.9
+opentelemetry-exporter-otlp==1.26.0
+lm-format-enforcer==0.10.11
+googleapis-common-protos==1.70.0
+pyzmq==27.0.0
+fsspec==2025.3.0
+grpcio==1.73.1
+cymem==2.0.11
+timm==0.4.12
+zipp==3.23.0
+llguidance==0.7.30
+opencensus-context==0.1.3
+omegaconf==2.3.0
+python-json-logger==3.3.0
+opentelemetry-exporter-otlp-proto-common==1.26.0
+watchfiles==1.1.0
+nvidia-nvjitlink-cu12==12.4.127
+peft==0.16.0
+sentry-sdk==2.32.0
+rpds-py==0.26.0
+email_validator==2.2.0
+nodeenv==1.9.1
+distro==1.9.0
+jiter==0.10.0
+compressed-tensors==0.9.3
+annotated-types==0.7.0
+matplotlib-inline==0.1.7
+rich==14.0.0
+GitPython==3.1.44
+lazy_loader==0.4
+fastapi-cloud-cli==0.1.4
+cupy-cuda12x==13.5.1
+prompt_toolkit==3.0.51
+gguf==0.17.1
+blis==1.3.0
+thinc==8.3.6
+cloudpickle==3.1.1
+multidict==6.6.3
+nvidia-nvtx-cu12==12.4.127
+flash-attn==2.7.1.post1
+pyasn1_modules==0.4.2
+rsa==4.9.1
+weasel==0.4.1
+uvloop==0.21.0
+click==8.2.1
+numpy==2.2.6
+torchdata==0.11.0
+pylatexenc==2.10
+cachetools==5.5.2
+Jinja2==3.1.6
+typer==0.16.0
+nvidia-cudnn-cu12==9.1.0.70
+fastapi-cli==0.0.8
+xxhash==3.5.0
+tornado==6.5.1
+scipy==1.15.3
+rouge_score==0.1.2
+cloudpathlib==0.21.1
+streamlit==1.46.1
+jedi==0.19.2
+referencing==0.36.2
+accelerate==1.8.1
+decord==0.6.0
+setuptools==78.1.1
+mdurl==0.1.2
+vllm==0.8.5.post1
+identify==2.6.12
+python-slugify==8.0.4
+dnspython==2.7.0
+dill==0.3.8
+opentelemetry-proto==1.26.0
+orjson==3.10.18
+msgpack==1.1.1
+aiohttp==3.12.14
+aiosignal==1.4.0
+typing_extensions==4.14.1
+tiktoken==0.9.0
+catalogue==2.0.10
+platformdirs==4.3.8
+narwhals==1.47.0
+antlr4-python3-runtime==4.9.3
+pydantic-extra-types==2.10.5
+nvidia-cusolver-cu12==11.6.1.9
+kaggle==1.7.4.5
+propcache==0.3.2
+urllib3==2.5.0
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+pydeck==0.9.1
+nvidia-cufft-cu12==11.2.1.3
+pyarrow==20.0.0
+nvidia-nccl-cu12==2.21.5
+httptools==0.6.4
+qwen-vl-utils==0.0.11
+markdown-it-py==3.0.0
+gitdb==4.0.12
+altair==5.5.0
+torchvision==0.21.0
+python-magic==0.4.27
+iopath==0.1.10
+ray==2.47.1
+blake3==1.0.5
+pillow==11.3.0
+python-dotenv==1.1.1
+torchaudio==2.6.0
+partial-json-parser==0.2.1.1.post6
+httpx==0.28.1
+torch==2.6.0
+anyio==4.9.0
+fairscale==0.4.4
+traitlets==5.14.3
+pure_eval==0.2.3
+sympy==1.13.1
+nvidia-cusparselt-cu12==0.6.2
+jsonschema==4.24.0
+imageio==2.37.0
+opencensus==0.11.4
+stack-data==0.6.3
+shellingham==1.5.4
+tensordict==0.9.1
+nvidia-cuda-runtime-cu12==12.4.127
+nest-asyncio==1.6.0
+einops==0.8.1
+lark==1.2.2
+tenacity==9.1.2
+virtualenv==20.31.2
+ptyprocess==0.7.0
+outlines==0.1.11
+depyf==0.18.0
+starlette==0.47.1
+cfgv==3.4.0
+pre_commit==4.2.0
+language_data==1.3.0
+pip==25.1
+Pygments==2.19.2
+nvidia-cuda-cupti-cu12==12.4.127
+protobuf==4.25.8
+safetensors==0.5.3
+text-unidecode==1.3
+wcwidth==0.2.13
+charset-normalizer==3.4.2
+aiohappyeyeballs==2.6.1
+outlines_core==0.1.26
+fastrlock==0.8.3
+asttokens==3.0.0
+psutil==7.0.0
+smmap==5.0.2
+exceptiongroup==1.3.0
+murmurhash==1.0.13
+pytorch-lightning==2.5.2
+filelock==3.18.0
+astor==0.8.1
+py-spy==0.4.0
+pydantic_core==2.33.2
+colorful==0.5.7
+fastapi==0.116.1
+opentelemetry-api==1.26.0
+openai==1.90.0
+ninja==1.11.1.4
+opentelemetry-semantic-conventions==0.47b0
+spacy-legacy==3.0.12
+opendatasets==0.1.22
+Deprecated==1.2.18
+proto-plus==1.26.1
+rignore==0.6.2
+aiohttp-cors==0.8.1
+liger_kernel==0.6.0
+opentelemetry-exporter-prometheus==0.56b0
+python-multipart==0.0.20
+multiprocess==0.70.16
+opentelemetry-sdk==1.26.0
+decorator==5.2.1
+xformers==0.0.29.post2
+spacy==3.8.7
+pyvers==0.1.0
+pycocotools==2.0.10
+websockets==15.0.1
+wasabi==1.1.3
+frozenlist==1.7.0
+codetiming==1.4.0
+sentencepiece==0.2.0
+toml==0.10.2
+scikit-image==0.25.2
+ftfy==6.3.1
+bleach==6.2.0
+yarl==1.20.1
+nvidia-cuda-nvrtc-cu12==12.4.127
+importlib_metadata==8.0.0
+spacy-loggers==1.0.5
+smart_open==7.3.0.post1
+portalocker==3.2.0
+llvmlite==0.44.0
+MarkupSafe==3.0.2
+braceexpand==0.1.7
+modelscope==1.28.0
+verl==0.3.2.dev0
+jaraco.context==5.3.0
+more-itertools==10.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+platformdirs==4.2.2
+packaging==24.2
+wheel==0.45.1
+zipp==3.19.2
+inflect==7.3.1
+autocommand==2.2.2
+typeguard==4.3.0
+jaraco.collections==5.1.0
+backports.tarfile==1.2.0
+tomli==2.0.1
+importlib_metadata==8.0.0
+typing_extensions==4.12.2

EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,92 @@

+{
+  "os":  "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.0",
+  "startedAt":  "2025-07-18T12:19:06.753628Z",
+  "args":  [
+    "--node-ip-address=10.1.5.237",
+    "--node-manager-port=37651",
+    "--object-store-name=/tmp/ray/session_2025-07-18_20-17-27_987959_155806/sockets/plasma_store",
+    "--raylet-name=/tmp/ray/session_2025-07-18_20-17-27_987959_155806/sockets/raylet",
+    "--redis-address=None",
+    "--metrics-agent-port=46087",
+    "--logging-rotate-bytes=536870912",
+    "--logging-rotate-backup-count=5",
+    "--runtime-env-agent-port=64279",
+    "--gcs-address=10.1.5.237:55485",
+    "--session-name=session_2025-07-18_20-17-27_987959_155806",
+    "--temp-dir=/tmp/ray",
+    "--webui=127.0.0.1:8265",
+    "--cluster-id=498a1e71e98cee5fa6c370066e878218480d78d02b0b0a20550a5571",
+    "--startup-token=64",
+    "--worker-launch-time-ms=1752841050410",
+    "--node-id=18fda1330b00f9c3f38fdc4c1387555fb29a9f963f649897c6fc1dc5",
+    "--runtime-env-hash=-115784934",
+    "--enable-resource-isolation=false"
+  ],
+  "program":  "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
+  "git":  {
+    "remote":  "https://github.com/PorUna-byte/PAR.git",
+    "commit":  "b8caf406aa1699c788f0ca6e44a1769452c317db"
+  },
+  "email":  "gia0603yucca@gmail.com",
+  "root":  "/nas/shared/kilab/wangyujia/EasyR1-new/examples",
+  "host":  "dsw-266702-dc4b748ff-f7c66",
+  "executable":  "/root/miniconda3/envs/easyr1-new/bin/python3",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA A800-SXM4-80GB",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1623302262784",
+      "used":  "1225195520"
+    }
+  },
+  "memory":  {
+    "total":  "549755813888"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-f7e858cd-ae03-031d-b834-86bf87923211"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-82fabc17-3326-51dd-9fb7-cbe82d26b6a0"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-1bba2921-208c-d0ad-1a05-25fc85d62630"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-9537a8bd-f536-20e2-b766-25a3c4bd5a71"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-482091e7-cb8c-dacb-7d0c-a78a3b795f0b"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-becb8d59-2ab7-b50d-5770-183c6478747a"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-3f0f2b1d-d6be-b6ad-b4ad-0ec841b91c40"
+    },
+    {
+      "name":  "NVIDIA A800-SXM4-80GB",
+      "architecture":  "Ampere",
+      "uuid":  "GPU-e53376dd-84ff-523c-a8c7-7cc7f7a18655"
+    }
+  ],
+  "cudaVersion":  "12.1",
+  "writerId":  "99k3ygrrojzudcyj8lzv8s7kibi19jo9"
+}

EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,6 @@

+{"time":"2025-07-18T20:19:07.57787547+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-07-18T20:19:13.028328674+08:00","level":"INFO","msg":"stream: created new stream","id":"eo9xzqez"}
+{"time":"2025-07-18T20:19:13.038047308+08:00","level":"INFO","msg":"stream: started","id":"eo9xzqez"}
+{"time":"2025-07-18T20:19:13.038097996+08:00","level":"INFO","msg":"handler: started","stream_id":"eo9xzqez"}
+{"time":"2025-07-18T20:19:13.038104971+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"eo9xzqez"}
+{"time":"2025-07-18T20:19:13.038125386+08:00","level":"INFO","msg":"sender: started","stream_id":"eo9xzqez"}

EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/logs/debug.log ADDED Viewed

	@@ -0,0 +1,21 @@

+2025-07-18 20:19:07,268 INFO    MainThread:160623 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-07-18 20:19:07,268 INFO    MainThread:160623 [wandb_setup.py:_flush():80] Configure stats pid to 160623
+2025-07-18 20:19:07,268 INFO    MainThread:160623 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-07-18 20:19:07,268 INFO    MainThread:160623 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings
+2025-07-18 20:19:07,268 INFO    MainThread:160623 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-07-18 20:19:07,268 INFO    MainThread:160623 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/logs/debug.log
+2025-07-18 20:19:07,268 INFO    MainThread:160623 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/logs/debug-internal.log
+2025-07-18 20:19:07,268 INFO    MainThread:160623 [wandb_init.py:init():830] calling init triggers
+2025-07-18 20:19:07,268 INFO    MainThread:160623 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}}
+2025-07-18 20:19:07,268 INFO    MainThread:160623 [wandb_init.py:init():871] starting backend
+2025-07-18 20:19:07,479 INFO    MainThread:160623 [wandb_init.py:init():874] sending inform_init request
+2025-07-18 20:19:07,481 INFO    MainThread:160623 [wandb_init.py:init():882] backend started and connected
+2025-07-18 20:19:07,510 INFO    MainThread:160623 [wandb_init.py:init():953] updated telemetry
+2025-07-18 20:19:08,011 INFO    MainThread:160623 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-07-18 20:19:17,965 INFO    MainThread:160623 [wandb_init.py:init():1029] starting run threads in backend
+2025-07-18 20:19:18,182 INFO    MainThread:160623 [wandb_run.py:_console_start():2458] atexit reg
+2025-07-18 20:19:18,183 INFO    MainThread:160623 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-07-18 20:19:18,213 INFO    MainThread:160623 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-07-18 20:19:18,213 INFO    MainThread:160623 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-07-18 20:19:18,223 INFO    MainThread:160623 [wandb_init.py:init():1075] run started, returning control to user process

EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/run-eo9xzqez.wandb ADDED Viewed

File without changes

EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/files/output.log ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ {"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.5.237", "pid": 182641, "uuid": "99853167d0014a0cbe06d35970a786c8", "closed": false}
2	+ Start validation...