diff --git a/EasyR1-new/examples/baselines/qwen2_5_vl_3b_clevr.sh b/EasyR1-new/examples/baselines/qwen2_5_vl_3b_clevr.sh new file mode 100644 index 0000000000000000000000000000000000000000..36d77b0c010d15254449c48601fad73bf58dce97 --- /dev/null +++ b/EasyR1-new/examples/baselines/qwen2_5_vl_3b_clevr.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +set -x + +export PYTHONUNBUFFERED=1 + +MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct # replace it with your local file path + +python3 -m verl.trainer.main \ + config=examples/config.yaml \ + data.train_files=BUAADreamer/clevr_count_70k@train \ + data.val_files=BUAADreamer/clevr_count_70k@test \ + data.format_prompt=./examples/format_prompt/r1v.jinja \ + worker.actor.model.model_path=${MODEL_PATH} \ + worker.rollout.tensor_parallel_size=1 \ + worker.reward.reward_type=sequential \ + worker.reward.reward_function=./examples/reward_function/r1v.py:compute_score \ + trainer.experiment_name=qwen2_5_vl_3b_clevr \ + trainer.n_gpus_per_node=2 diff --git a/EasyR1-new/examples/baselines/qwen2_5_vl_3b_geoqa8k.sh b/EasyR1-new/examples/baselines/qwen2_5_vl_3b_geoqa8k.sh new file mode 100644 index 0000000000000000000000000000000000000000..6f38ae2fd98ff6ac65b53c45c30596ce805ee4f2 --- /dev/null +++ b/EasyR1-new/examples/baselines/qwen2_5_vl_3b_geoqa8k.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +set -x + +export PYTHONUNBUFFERED=1 + +MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct # replace it with your local file path + +python3 -m verl.trainer.main \ + config=examples/config.yaml \ + data.train_files=leonardPKU/GEOQA_8K_R1V@train \ + data.val_files=leonardPKU/GEOQA_8K_R1V@test \ + data.format_prompt=./examples/format_prompt/r1v.jinja \ + worker.actor.model.model_path=${MODEL_PATH} \ + worker.rollout.tensor_parallel_size=1 \ + worker.reward.reward_type=sequential \ + worker.reward.reward_function=./examples/reward_function/r1v.py:compute_score \ + trainer.experiment_name=qwen2_5_vl_3b_geoqa8k \ + trainer.n_gpus_per_node=8 diff --git a/EasyR1-new/examples/format_prompt/bio_format.jinja b/EasyR1-new/examples/format_prompt/bio_format.jinja new file mode 100644 index 0000000000000000000000000000000000000000..7cf6628dc7b22d6f9116c0e0f244597f871a3882 --- /dev/null +++ b/EasyR1-new/examples/format_prompt/bio_format.jinja @@ -0,0 +1,2 @@ +{{ content | trim }} You must first reason through the question step by step, as if you're thinking aloud. Enclose your full reasoning process within tags. After your reasoning, output only the number corresponding to the final answer choice inside tags.For example: reasoning process result number + diff --git a/EasyR1-new/examples/format_prompt/dapo.jinja b/EasyR1-new/examples/format_prompt/dapo.jinja new file mode 100644 index 0000000000000000000000000000000000000000..ea56a6a64c1d8c6fac481abf6dbf9ebcfff37420 --- /dev/null +++ b/EasyR1-new/examples/format_prompt/dapo.jinja @@ -0,0 +1 @@ +Solve the following math problem step by step. The last line of your response should be of the form Answer: $Answer (without quotes) where $Answer is the answer to the problem.\n\n{{ content | trim }}\n\nRemember to put your answer on its own line after "Answer:". diff --git a/EasyR1-new/examples/format_prompt/math.jinja b/EasyR1-new/examples/format_prompt/math.jinja new file mode 100644 index 0000000000000000000000000000000000000000..8d6aa2344c57d654c87dbd26d90f7af473f82cc6 --- /dev/null +++ b/EasyR1-new/examples/format_prompt/math.jinja @@ -0,0 +1 @@ +{{ content | trim }} You FIRST think about the reasoning process as an internal monologue and then provide the final answer. The reasoning process MUST BE enclosed within tags. The final answer MUST BE put in \boxed{}. diff --git a/EasyR1-new/examples/format_prompt/r1v.jinja b/EasyR1-new/examples/format_prompt/r1v.jinja new file mode 100644 index 0000000000000000000000000000000000000000..0ecf6f471146202113d4e44c10777d60e65c8e8a --- /dev/null +++ b/EasyR1-new/examples/format_prompt/r1v.jinja @@ -0,0 +1 @@ +{{ content | trim }} A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within and tags, respectively, i.e., reasoning process here answer here diff --git a/EasyR1-new/examples/qwen2_5_vl_7b_multi_image.sh b/EasyR1-new/examples/qwen2_5_vl_7b_multi_image.sh new file mode 100644 index 0000000000000000000000000000000000000000..bcf9a48616ea1897fb9e96e6a64e8d2d34672b23 --- /dev/null +++ b/EasyR1-new/examples/qwen2_5_vl_7b_multi_image.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# REMINDER: this script uses test data split and should ONLY be used for debugging. DO NOT use for training. + +set -x + +export PYTHONUNBUFFERED=1 + +MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct # replace it with your local file path + +python3 -m verl.trainer.main \ + config=examples/config.yaml \ + data.train_files=hiyouga/journeybench-multi-image-vqa@train \ + data.val_files=hiyouga/journeybench-multi-image-vqa@test \ + data.rollout_batch_size=256 \ + worker.actor.model.model_path=${MODEL_PATH} \ + worker.rollout.limit_images=2 \ + trainer.experiment_name=qwen2_5_vl_7b_multi_image \ + trainer.n_gpus_per_node=8 diff --git a/EasyR1-new/examples/qwen3_14b_dapo17k_dapo.sh b/EasyR1-new/examples/qwen3_14b_dapo17k_dapo.sh new file mode 100644 index 0000000000000000000000000000000000000000..18cc7b3408f22c12fb4193b2da3ed957394ca34c --- /dev/null +++ b/EasyR1-new/examples/qwen3_14b_dapo17k_dapo.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +set -x + +export PYTHONUNBUFFERED=1 + +MODEL_PATH=Qwen/Qwen3-14B-Base # replace it with your local file path + +python3 -m verl.trainer.main \ + config=examples/config.yaml \ + data.train_files=Saigyouji-Yuyuko1000/dapo17k@train \ + data.val_files=Saigyouji-Yuyuko1000/dapo17k@test \ + data.format_prompt=./examples/format_prompt/dapo.jinja \ + data.max_prompt_length=2048 \ + data.max_response_length=20480 \ + data.rollout_batch_size=512 \ + data.mini_rollout_batch_size=256 \ + worker.actor.micro_batch_size_per_device_for_update=1 \ + worker.actor.micro_batch_size_per_device_for_experience=8 \ + worker.actor.model.model_path=${MODEL_PATH} \ + worker.actor.fsdp.torch_dtype=bf16 \ + worker.actor.optim.strategy=adamw_bf16 \ + worker.actor.optim.weight_decay=0.1 \ + worker.actor.optim.lr_warmup_steps=10 \ + worker.actor.global_batch_size=32 \ + worker.actor.clip_ratio_low=0.2 \ + worker.actor.clip_ratio_high=0.28 \ + worker.actor.clip_ratio_dual=10.0 \ + worker.rollout.n=16 \ + worker.rollout.max_num_batched_tokens=22528 \ + worker.rollout.val_override_config='{"n":16,"temperature":1.0,"top_p":0.7}' \ + worker.rollout.gpu_memory_utilization=0.8 \ + worker.reward.reward_function=./examples/reward_function/dapo.py:compute_score \ + worker.reward.reward_function_kwargs='{"max_response_length":20480,"overlong_buffer_length":4096,"overlong_penalty_factor":1.0}' \ + algorithm.disable_kl=True \ + algorithm.online_filtering=True \ + algorithm.filter_key=accuracy_normalized \ + algorithm.filter_low=0.01 \ + algorithm.filter_high=0.99 \ + trainer.total_epochs=10 \ + trainer.max_try_make_batch=10 \ + trainer.experiment_name=qwen3_14b_dapo17k_dapo \ + trainer.n_gpus_per_node=8 diff --git a/EasyR1-new/examples/qwen3_4b_math_grpo.sh b/EasyR1-new/examples/qwen3_4b_math_grpo.sh new file mode 100644 index 0000000000000000000000000000000000000000..32bbaac99327c298aa2aaf13e5e5f2f61b3e2d45 --- /dev/null +++ b/EasyR1-new/examples/qwen3_4b_math_grpo.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +set -x + +export PYTHONUNBUFFERED=1 + +MODEL_PATH=Qwen/Qwen3-4B # replace it with your local file path + +python3 -m verl.trainer.main \ + config=examples/config.yaml \ + data.max_response_length=4096 \ + worker.actor.model.model_path=${MODEL_PATH} \ + trainer.experiment_name=qwen3_4b_math_grpo diff --git a/EasyR1-new/examples/reward_function/__pycache__/bio.cpython-310.pyc b/EasyR1-new/examples/reward_function/__pycache__/bio.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f1d41a12dfdb23d100c6e1605e07cfd55ebaa183 Binary files /dev/null and b/EasyR1-new/examples/reward_function/__pycache__/bio.cpython-310.pyc differ diff --git a/EasyR1-new/examples/reward_function/bio.py b/EasyR1-new/examples/reward_function/bio.py new file mode 100644 index 0000000000000000000000000000000000000000..3eed5582876e57d1ec0d0f0d092284e8a8841eaa --- /dev/null +++ b/EasyR1-new/examples/reward_function/bio.py @@ -0,0 +1,183 @@ + + + +from itertools import islice, zip_longest +from typing import Callable, Dict, List, Optional, Tuple, TypedDict +import json + +def repeatness_reward(s: str): + def ranks(l): + index = {v: i for i, v in enumerate(sorted(set(l)))} + return [index[v] for v in l] + + def suffixArray(s): + line = ranks(s) + n, k, ans, sa = len(s), 1, line, [0] * len(s) + while k < n - 1: + line = ranks(list(zip_longest(line, islice(line, k, None), fillvalue=-1))) + ans, k = line, k << 1 + for i, k in enumerate(ans): + sa[k] = i + return ans, sa + + def lcp(arr, suffixArr, inv_suff): + n, ans, k = len(arr), [0] * len(arr), 0 + + for i in range(n): + if inv_suff[i] == n - 1: + k = 0 + continue + + j = suffixArr[inv_suff[i] + 1] + while i + k < n and j + k < n and arr[i + k] == arr[j + k]: + k += 1 + + ans[inv_suff[i]] = k + if k > 0: + k -= 1 + + return ans + + arr = [ord(i) for i in s] + n = len(arr) + if n <= 1: + return 0 + c, sa = suffixArray(arr) + cnt = sum(lcp(arr, sa, c)) + + return 1 - cnt * 2 / (n * (n + 1)) + +import re + +def format_reward(predict_str: str) -> float: + """ + 格式奖励函数,严格要求输出格式为: + ...... + 中间不能有多余内容 + """ + pattern = r'^.*?\s*\s*([0-9])\s*$' + return 1.0 if re.fullmatch(pattern, predict_str.strip(), re.DOTALL) else 0.0 + +def acc_reward(predict_str: str, ground_truth: str) -> float: + """ + 准确率奖励函数 + 要求中内容与ground_truth完全一致(顺序、空格等) + """ + match = re.search(r'\s*([0-9])\s*', predict_str) + if not match: + return 0.0 + answer_content = match.group(1) + # print(ground_truth) + # print(answer_content) + # print(int(answer_content) == ground_truth) + # print("ground_truth 类型:", type(ground_truth)) + # print("answer_content 类型:", type(answer_content)) + # print("========") + if int(answer_content) == ground_truth: + return 1.0 + else: + return 0.0 + # return 1.0 if answer_content == ground_truth else 0.0 + # match = re.search(r'(.*?)', predict_str, re.DOTALL) + # if not match: + # return 0.0 + # answer_content = match.group(1).strip() + # return 1.0 if answer_content == ground_truth else 0.0 + +# def compute_score( solution_str: str, ground_truth: str, extra_info): +# """ +# 综合评分函数 +# """ +def compute_score(predicts: List[str], ground_truths: List[str], format_weight: float = 0.1) -> List[Dict[str, float]]: + scores = [] + save_path="/nas/shared/kilab/wangyujia/check_rl/result-06170934.jsonl" + with open(save_path, "w", encoding="utf-8") as f: + for solution_str, ground_truth in zip(predicts, ground_truths): + format_score = format_reward(solution_str) + acc_score = acc_reward(solution_str, ground_truth) + + # 提取内容 + think_match = re.search(r'(.*?)', solution_str, re.DOTALL) + think_str = think_match.group(1).strip() if think_match else "" + repeat_score = repeatness_reward(think_str) + + scores.append( + { + "overall": format_score + acc_score + repeat_score, + "format": format_score, + "accuracy": acc_score, + "repeat" : repeat_score, + } + ) + + # 写入 JSONL 文件 + f.write(json.dumps({ + "solution_str": solution_str, + "ground_truth": ground_truth, + "overall": format_score + acc_score + repeat_score, + "format": format_score, + "accuracy": acc_score, + "repeat" : repeat_score, + }, ensure_ascii=False) + "\n") + + # 加权综合评分(格式占30%,准确率占70%) + # 合成字典 + # total_score = { + # "format_score": format_score, + # "acc_score": acc_score, + # "repeat_score": repeat_score, + # "total_score": format_score + acc_score + repeat_score + # } + #total_score=format_score + acc_score + repeat_score + + return scores + + +# print(format_reward("Step-by-step logic 5 ")) +# print(format_reward("Something\nacross lines\n 0 ")) + +# print(format_reward("No tags here")) +# print(format_reward("OK12")) # 多位数字 +# print(format_reward("OKA")) # 字母不允许 +# print(format_reward("Yes ")) # 空的答案 +# print(format_reward("OK3extra")) # 多余内容 +# print(format_reward("3Reasoning")) # 标签顺序错误 + +# print(acc_reward("Step-by-step logic 5 ",'5')) +# print(acc_reward("Something\nacross lines\n 0 ",'1')) + + +# str_="\nThe protein name is P32783, the protein amino acid sequence is MSTKPEKPIWMSQEDYDRQYGSITGDESSTVSKKDSKVTANAPGDGNGSLPVLQSSSILTSKVSDLPIEAESGFKIQKRRHERYDQEERLRKQRAQKLREEQLKRHEIEMTANRSINVDQIVREHYNERTIIANRAKRNLSPIIKLRNFNNAIKYMLIDKYTKPGDVVLELGCGKGGDLRKYGAAGISQFIGIDISNASIQEAHKRYRSMRNLDYQVVLITGDCFGESLGVAVEPFPDCRFPCDIVSTQFCLHYAFETEEKARRALLNVAKSLKIGGHFFGTIPDSEFIRYKLNKFPKEVEKPSWGNSIYKVTFENNSYQKNDYEFTSPYGQMYTYWLEDAIDNVPEYVVPFETLRSLADEYGLELVSQMPFNKFFVQEIPKWIERFSPKMREGLQRSDGRYGVEGDEKEAASYFYTMFAFRKVKQYIEPESVKPN, the protein localization prediction for P32783 is Cell.membrane,M, so the location label is 4. Therefore, option 4 is the correct answer.\n\n\n4\n" +# print(format_reward(str_)) + + + +def check_rewards(jsonl_path: str) -> List[Dict[str, float]]: + results = [] + with open(jsonl_path, "r", encoding="utf-8") as f: + for line in f: + data = json.loads(line) + solution_str = data["solution_str"] + ground_truth = data["ground_truth"] + + # 重新计算三个分数 + format_score = format_reward(solution_str) + acc_score = acc_reward(solution_str, ground_truth) + think_match = re.search(r'(.*?)', solution_str, re.DOTALL) + think_str = think_match.group(1).strip() if think_match else "" + repeat_score = repeatness_reward(think_str) + + total_score = format_score + acc_score + repeat_score + + + result = { + "format": format_score, + "accuracy": acc_score, + "repeat": repeat_score, + "overall": total_score, + } + # results.append(result) + + print(json.dumps(result, indent=2, ensure_ascii=False)) + +check_rewards("/nas/shared/kilab/wangyujia/check_rl/check.jsonl") \ No newline at end of file diff --git a/EasyR1-new/examples/reward_function/dapo.py b/EasyR1-new/examples/reward_function/dapo.py new file mode 100644 index 0000000000000000000000000000000000000000..ceeee46377ffd17702f4aaae9528b93eb052389c --- /dev/null +++ b/EasyR1-new/examples/reward_function/dapo.py @@ -0,0 +1,163 @@ +# Copyright 2024 Bytedance Ltd. and/or its affiliates +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +from typing import Any, Dict, List + + +# Constants for normalization +SUBSTITUTIONS = [ + ("an ", ""), + ("a ", ""), + (".$", "$"), + ("\\$", ""), + (r"\ ", ""), + (" ", ""), + ("mbox", "text"), + (",\\text{and}", ","), + ("\\text{and}", ","), + ("\\text{m}", "\\text{}"), +] + +REMOVED_EXPRESSIONS = [ + "square", + "ways", + "integers", + "dollars", + "mph", + "inches", + "hours", + "km", + "units", + "\\ldots", + "sue", + "points", + "feet", + "minutes", + "digits", + "cents", + "degrees", + "cm", + "gm", + "pounds", + "meters", + "meals", + "edges", + "students", + "childrentickets", + "multiples", + "\\text{s}", + "\\text{.}", + "\\text{\ns}", + "\\text{}^2", + "\\text{}^3", + "\\text{\n}", + "\\text{}", + r"\mathrm{th}", + r"^\circ", + r"^{\circ}", + r"\;", + r",\!", + "{,}", + '"', + "\\dots", +] + + +def normalize_final_answer(final_answer: str) -> str: + """Normalize a final answer to a quantitative reasoning question. + + Args: + final_answer: The answer string to normalize + + Returns: + Normalized answer string + """ + final_answer = final_answer.split("=")[-1] + + # Apply substitutions and removals + for before, after in SUBSTITUTIONS: + final_answer = final_answer.replace(before, after) + for expr in REMOVED_EXPRESSIONS: + final_answer = final_answer.replace(expr, "") + + # Extract and normalize LaTeX math + final_answer = re.sub(r"(.*?)(\$)(.*?)(\$)(.*)", "$\\3$", final_answer) + final_answer = re.sub(r"(\\text\{)(.*?)(\})", "\\2", final_answer) + final_answer = re.sub(r"(\\textbf\{)(.*?)(\})", "\\2", final_answer) + final_answer = re.sub(r"(\\overline\{)(.*?)(\})", "\\2", final_answer) + final_answer = re.sub(r"(\\boxed\{)(.*)(\})", "\\2", final_answer) + + # Normalize shorthand TeX: + # \fracab -> \frac{a}{b} + # \frac{abc}{bef} -> \frac{abc}{bef} + # \fracabc -> \frac{a}{b}c + # \sqrta -> \sqrt{a} + # \sqrtab -> sqrt{a}b + final_answer = re.sub(r"(frac)([^{])(.)", "frac{\\2}{\\3}", final_answer) + final_answer = re.sub(r"(sqrt)([^{])", "sqrt{\\2}", final_answer) + final_answer = final_answer.replace("$", "") + + # Normalize numbers + if final_answer.replace(",", "").isdigit(): + final_answer = final_answer.replace(",", "") + + return final_answer.strip() + + +def accuracy_reward(response: str, ground_truth: str) -> float: + match = re.findall(r"(?i)Answer\s*:\s*([^\n]+)", response) + answer = match[-1] if match else "[INVALID]" + if normalize_final_answer(answer) == normalize_final_answer(ground_truth): + return 1.0 + else: + return -1.0 + + +def soft_overlong_punishment(response_length: int, max_response_length: int, overlong_buffer_length: int): + expected_len = max_response_length - overlong_buffer_length + if response_length <= expected_len: + return 0.0 + elif response_length <= max_response_length: + return (expected_len - response_length) / overlong_buffer_length + else: + return -1.0 + + +def compute_score( + reward_inputs: List[Dict[str, Any]], + max_response_length: int, + overlong_buffer_length: int, + overlong_penalty_factor: float, +) -> List[Dict[str, float]]: + if not isinstance(reward_inputs, list): + raise ValueError("Please use `reward_type=batch` for dapo reward function.") + + scores = [] + for reward_input in reward_inputs: + response = reward_input["response"][-300:] # The longest answer in MATH-500 has 159 characters + accuracy_score = accuracy_reward(response, reward_input["ground_truth"]) + overlong_score = soft_overlong_punishment( + reward_input["response_length"], max_response_length, overlong_buffer_length + ) + scores.append( + { + "overall": accuracy_score + overlong_score * overlong_penalty_factor, + "accuracy": accuracy_score, + "overlong": overlong_score, + "accuracy_normalized": 0.5 * (accuracy_score + 1.0), + } + ) + + return scores diff --git a/EasyR1-new/examples/reward_function/math.py b/EasyR1-new/examples/reward_function/math.py new file mode 100644 index 0000000000000000000000000000000000000000..f41ffe13f6357fd2d755cb668572ff937ebf1463 --- /dev/null +++ b/EasyR1-new/examples/reward_function/math.py @@ -0,0 +1,49 @@ +# Copyright 2024 Bytedance Ltd. and/or its affiliates +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +from typing import Any, Dict, List + +from mathruler.grader import extract_boxed_content, grade_answer + + +def format_reward(response: str) -> float: + pattern = re.compile(r".*.*\\boxed\{.*\}.*", re.DOTALL) + format_match = re.fullmatch(pattern, response) + return 1.0 if format_match else 0.0 + + +def accuracy_reward(response: str, ground_truth: str) -> float: + answer = extract_boxed_content(response) + return 1.0 if grade_answer(answer, ground_truth) else 0.0 + + +def compute_score(reward_inputs: List[Dict[str, Any]], format_weight: float = 0.1) -> List[Dict[str, float]]: + if not isinstance(reward_inputs, list): + raise ValueError("Please use `reward_type=batch` for math reward function.") + + scores = [] + for reward_input in reward_inputs: + response = re.sub(r"\s*(<|>|/)\s*", r"\1", reward_input["response"]) # handle qwen2.5vl-32b format + format_score = format_reward(response) + accuracy_score = accuracy_reward(response, reward_input["ground_truth"]) + scores.append( + { + "overall": (1 - format_weight) * accuracy_score + format_weight * format_score, + "format": format_score, + "accuracy": accuracy_score, + } + ) + + return scores diff --git a/EasyR1-new/examples/reward_function/r1v.py b/EasyR1-new/examples/reward_function/r1v.py new file mode 100644 index 0000000000000000000000000000000000000000..97b03c794946fa84d7428dba478a7c77a389f4f6 --- /dev/null +++ b/EasyR1-new/examples/reward_function/r1v.py @@ -0,0 +1,50 @@ +# Copyright 2024 Bytedance Ltd. and/or its affiliates +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +from typing import Any, Dict + +from mathruler.grader import grade_answer + + +def format_reward(response: str) -> float: + pattern = re.compile(r".*?\s*.*?", re.DOTALL) + format_match = re.fullmatch(pattern, response) + return 1.0 if format_match else 0.0 + + +def accuracy_reward(response: str, ground_truth: str) -> float: + try: + content_match = re.search(r"(.*?)", response) + given_answer = content_match.group(1).strip() if content_match else response.strip() + if grade_answer(given_answer, ground_truth.strip()): + return 1.0 + + except Exception: + pass + + return 0.0 + + +def compute_score(reward_input: Dict[str, Any], format_weight: float = 0.5) -> Dict[str, float]: + if not isinstance(reward_input, dict): + raise ValueError("Please use `reward_type=sequential` for r1v reward function.") + + format_score = format_reward(reward_input["response"]) + accuracy_score = accuracy_reward(reward_input["response"], reward_input["ground_truth"]) + return { + "overall": (1 - format_weight) * accuracy_score + format_weight * format_score, + "format": format_score, + "accuracy": accuracy_score, + } diff --git a/EasyR1-new/examples/runtime_env.yaml b/EasyR1-new/examples/runtime_env.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a4086253d4f7198f85225941c076285ab9f5321a --- /dev/null +++ b/EasyR1-new/examples/runtime_env.yaml @@ -0,0 +1,9 @@ +working_dir: ./ +excludes: ["/.git/"] +env_vars: + TOKENIZERS_PARALLELISM: "true" + NCCL_DEBUG: "WARN" + VLLM_LOGGING_LEVEL: "WARN" + TORCH_NCCL_AVOID_RECORD_STREAMS: "1" + PYTORCH_CUDA_ALLOC_CONF: "expandable_segments:False" + PYTHONUNBUFFERED: "1" diff --git a/EasyR1-new/examples/wandb/debug-internal.log b/EasyR1-new/examples/wandb/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..3a020c591187aae5f1f529cb23d2665fe481c73e --- /dev/null +++ b/EasyR1-new/examples/wandb/debug-internal.log @@ -0,0 +1,13 @@ +{"time":"2025-07-21T14:07:35.211628547+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-07-21T14:07:50.875611638+08:00","level":"INFO","msg":"stream: created new stream","id":"a9qblh0u"} +{"time":"2025-07-21T14:07:50.876588753+08:00","level":"INFO","msg":"stream: started","id":"a9qblh0u"} +{"time":"2025-07-21T14:07:50.87663237+08:00","level":"INFO","msg":"sender: started","stream_id":"a9qblh0u"} +{"time":"2025-07-21T14:07:50.876605114+08:00","level":"INFO","msg":"handler: started","stream_id":"a9qblh0u"} +{"time":"2025-07-21T14:07:50.87665507+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"a9qblh0u"} +{"time":"2025-07-21T14:08:05.783504415+08:00","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"updating run metadata","runtime_seconds":7.434542791},{"desc":"uploading data","runtime_seconds":0.571568597}],"total_operations":2}} +{"time":"2025-07-21T14:08:31.955353631+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-07-21T14:08:56.48244624+08:00","level":"INFO","msg":"stream: closing","id":"a9qblh0u"} +{"time":"2025-07-21T14:08:56.48558812+08:00","level":"INFO","msg":"handler: closed","stream_id":"a9qblh0u"} +{"time":"2025-07-21T14:08:56.485598269+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"a9qblh0u"} +{"time":"2025-07-21T14:08:56.485607803+08:00","level":"INFO","msg":"sender: closed","stream_id":"a9qblh0u"} +{"time":"2025-07-21T14:08:56.50017009+08:00","level":"INFO","msg":"stream: closed","id":"a9qblh0u"} diff --git a/EasyR1-new/examples/wandb/debug.log b/EasyR1-new/examples/wandb/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..b7e2572e2a286ff92c36c4fc2635c9b518e94415 --- /dev/null +++ b/EasyR1-new/examples/wandb/debug.log @@ -0,0 +1,28 @@ +2025-07-21 14:07:34,952 INFO MainThread:317976 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-07-21 14:07:34,952 INFO MainThread:317976 [wandb_setup.py:_flush():80] Configure stats pid to 317976 +2025-07-21 14:07:34,952 INFO MainThread:317976 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-07-21 14:07:34,952 INFO MainThread:317976 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings +2025-07-21 14:07:34,952 INFO MainThread:317976 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-07-21 14:07:34,952 INFO MainThread:317976 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/logs/debug.log +2025-07-21 14:07:34,952 INFO MainThread:317976 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/logs/debug-internal.log +2025-07-21 14:07:34,952 INFO MainThread:317976 [wandb_init.py:init():830] calling init triggers +2025-07-21 14:07:34,953 INFO MainThread:317976 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}} +2025-07-21 14:07:34,953 INFO MainThread:317976 [wandb_init.py:init():871] starting backend +2025-07-21 14:07:35,172 INFO MainThread:317976 [wandb_init.py:init():874] sending inform_init request +2025-07-21 14:07:35,174 INFO MainThread:317976 [wandb_init.py:init():882] backend started and connected +2025-07-21 14:07:35,186 INFO MainThread:317976 [wandb_init.py:init():953] updated telemetry +2025-07-21 14:07:35,302 INFO MainThread:317976 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout +2025-07-21 14:07:58,269 INFO MainThread:317976 [wandb_init.py:init():1029] starting run threads in backend +2025-07-21 14:07:58,556 INFO MainThread:317976 [wandb_run.py:_console_start():2458] atexit reg +2025-07-21 14:07:58,556 INFO MainThread:317976 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-07-21 14:07:58,562 INFO MainThread:317976 [wandb_run.py:_redirect():2375] Wrapping output streams. +2025-07-21 14:07:58,562 INFO MainThread:317976 [wandb_run.py:_redirect():2398] Redirects installed. +2025-07-21 14:07:58,574 INFO MainThread:317976 [wandb_init.py:init():1075] run started, returning control to user process +2025-07-21 14:08:04,748 INFO MainThread:317976 [wandb_run.py:_finish():2224] finishing run gia0603yucca/easy_r1/a9qblh0u +2025-07-21 14:08:04,755 INFO MainThread:317976 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0 +2025-07-21 14:08:04,767 INFO MainThread:317976 [wandb_run.py:_restore():2405] restore +2025-07-21 14:08:04,771 INFO MainThread:317976 [wandb_run.py:_restore():2411] restore done +2025-07-21 14:08:56,463 INFO MainThread:317976 [wandb_run.py:_footer_history_summary_info():3903] rendering history +2025-07-21 14:08:56,469 INFO MainThread:317976 [wandb_run.py:_footer_history_summary_info():3935] rendering summary +2025-07-21 14:08:56,469 INFO MainThread:317976 [wandb_run.py:_footer_sync_info():3864] logging synced files diff --git a/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/files/output.log b/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..087604f53da01ea95a5c98279640a5d4fbdfc220 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/files/output.log @@ -0,0 +1,2 @@ +{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.6.45", "pid": 7695, "uuid": "4931688589ea40edb6b0579192261e95", "closed": false} +Start validation... diff --git a/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..1e5a308ce5af6b0415ec3555d11191377649cd8d --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/files/requirements.txt @@ -0,0 +1,295 @@ +setproctitle==1.2.2 +colorama==0.4.6 +psutil==7.0.0 +numpy==2.2.6 +pylatexenc==2.10 +webdataset==1.0.2 +email_validator==2.2.0 +confection==0.1.5 +text-unidecode==1.3 +python-dotenv==1.1.1 +starlette==0.47.1 +pyasn1==0.6.1 +contexttimer==0.3.3 +requests==2.32.4 +omegaconf==2.3.0 +tzdata==2025.2 +yarl==1.20.1 +nvidia-cuda-nvrtc-cu12==12.4.127 +decord==0.6.0 +nvidia-cublas-cu12==12.4.5.8 +proto-plus==1.26.1 +opentelemetry-semantic-conventions-ai==0.4.11 +scipy==1.15.3 +googleapis-common-protos==1.70.0 +nvidia-cufile-cu12==1.11.1.6 +parso==0.8.4 +opentelemetry-exporter-otlp-proto-http==1.26.0 +vllm==0.8.5.post1 +sniffio==1.3.1 +python-dateutil==2.9.0.post0 +openai==1.90.0 +absl-py==2.3.1 +Deprecated==1.2.18 +cupy-cuda12x==13.5.1 +setuptools==78.1.1 +peft==0.16.0 +rignore==0.6.2 +joblib==1.5.1 +platformdirs==4.3.8 +regex==2024.11.6 +datasets==4.0.0 +preshed==3.0.10 +aiohappyeyeballs==2.6.1 +uvloop==0.21.0 +sentry-sdk==2.32.0 +virtualenv==20.31.2 +lazy_loader==0.4 +rich==14.0.0 +pycocotools==2.0.10 +timm==0.4.12 +rich-toolkit==0.14.8 +fastapi-cli==0.0.8 +antlr4-python3-runtime==4.9.3 +salesforce-lavis==1.0.2 +Pygments==2.19.2 +gitdb==4.0.12 +six==1.17.0 +verl==0.3.2.dev0 +smmap==5.0.2 +fastapi-cloud-cli==0.1.4 +opencensus==0.11.4 +annotated-types==0.7.0 +xxhash==3.5.0 +frozenlist==1.7.0 +pyzmq==27.0.0 +Jinja2==3.1.6 +ptyprocess==0.7.0 +interegular==0.3.3 +opentelemetry-semantic-conventions==0.47b0 +jiter==0.10.0 +idna==3.10 +typing_extensions==4.14.1 +nvidia-cusolver-cu12==11.6.1.9 +propcache==0.3.2 +nest-asyncio==1.6.0 +pillow==11.3.0 +tenacity==9.1.2 +sentencepiece==0.2.0 +portalocker==3.2.0 +matplotlib-inline==0.1.7 +pandas==2.3.1 +compressed-tensors==0.9.3 +typing-inspection==0.4.1 +nltk==3.9.1 +opencv-python-headless==4.12.0.88 +dnspython==2.7.0 +tokenizers==0.21.2 +wheel==0.45.1 +python-multipart==0.0.20 +catalogue==2.0.10 +smart_open==7.3.0.post1 +multidict==6.6.3 +xgrammar==0.1.18 +aiosignal==1.4.0 +pybase64==1.4.1 +blake3==1.0.5 +certifi==2025.7.14 +torchdata==0.11.0 +qwen-vl-utils==0.0.11 +nvidia-nvjitlink-cu12==12.4.127 +urllib3==2.5.0 +aiohttp-cors==0.8.1 +outlines_core==0.1.26 +pydantic-extra-types==2.10.5 +filelock==3.18.0 +airportsdata==20250706 +ipython==8.37.0 +pydantic==2.11.7 +cloudpickle==3.1.1 +torchaudio==2.6.0 +tiktoken==0.9.0 +pexpect==4.9.0 +flash-attn==2.7.1.post1 +nvidia-nvtx-cu12==12.4.127 +bleach==6.2.0 +watchfiles==1.1.0 +uvicorn==0.35.0 +numba==0.61.2 +tornado==6.5.1 +networkx==3.4.2 +sympy==1.13.1 +watchdog==6.0.0 +kaggle==1.7.4.5 +pyarrow==20.0.0 +accelerate==1.8.1 +mpmath==1.3.0 +lightning-utilities==0.14.3 +codetiming==1.4.0 +ftfy==6.3.1 +triton==3.2.0 +referencing==0.36.2 +dill==0.3.8 +language_data==1.3.0 +python-magic==0.4.27 +wasabi==1.1.3 +pyvers==0.1.0 +murmurhash==1.0.13 +mathruler==0.1.0 +jsonschema-specifications==2025.4.1 +blinker==1.9.0 +imageio==2.37.0 +pycocoevalcap==1.2 +python-json-logger==3.3.0 +nvidia-cuda-cupti-cu12==12.4.127 +fairscale==0.4.4 +httptools==0.6.4 +identify==2.6.12 +streamlit==1.46.1 +mdurl==0.1.2 +decorator==5.2.1 +h11==0.16.0 +distlib==0.3.9 +webencodings==0.5.1 +transformers==4.52.4 +srsly==2.5.1 +fsspec==2025.3.0 +diskcache==5.6.3 +click==8.2.1 +blis==1.3.0 +colorful==0.5.7 +websockets==15.0.1 +liger_kernel==0.6.0 +lark==1.2.2 +cymem==2.0.11 +anyio==4.9.0 +opentelemetry-exporter-otlp-proto-grpc==1.26.0 +fastapi==0.116.1 +tensordict==0.9.1 +pre_commit==4.2.0 +wrapt==1.17.2 +opentelemetry-api==1.26.0 +nvidia-curand-cu12==10.3.5.147 +spacy==3.8.7 +narwhals==1.47.0 +exceptiongroup==1.3.0 +braceexpand==0.1.7 +rouge_score==0.1.2 +msgpack==1.1.1 +async-timeout==5.0.1 +protobuf==4.25.8 +huggingface-hub==0.33.4 +wandb==0.21.0 +httpx==0.28.1 +mistral_common==1.8.0 +gguf==0.17.1 +opentelemetry-proto==1.26.0 +nvidia-nccl-cu12==2.21.5 +wcwidth==0.2.13 +nvidia-cusparselt-cu12==0.6.2 +scikit-image==0.25.2 +cfgv==3.4.0 +markdown-it-py==3.0.0 +packaging==25.0 +charset-normalizer==3.4.2 +executing==2.2.0 +py-spy==0.4.0 +pure_eval==0.2.3 +safetensors==0.5.3 +pyasn1_modules==0.4.2 +jsonschema==4.24.0 +spacy-legacy==3.0.12 +astor==0.8.1 +shellingham==1.5.4 +langcodes==3.5.0 +pytz==2025.2 +distro==1.9.0 +google-api-core==2.25.1 +rsa==4.9.1 +multiprocess==0.70.16 +iopath==0.1.10 +weasel==0.4.1 +tifffile==2025.5.10 +nodeenv==1.9.1 +opentelemetry-exporter-prometheus==0.56b0 +einops==0.8.1 +lm-format-enforcer==0.10.11 +pydantic_core==2.33.2 +hf-xet==1.1.5 +opentelemetry-sdk==1.26.0 +ninja==1.11.1.4 +altair==5.5.0 +ray==2.47.1 +depyf==0.18.0 +attrs==25.3.0 +tqdm==4.67.1 +xformers==0.0.29.post2 +pydeck==0.9.1 +stack-data==0.6.3 +prometheus-fastapi-instrumentator==7.1.0 +grpcio==1.73.1 +torch==2.6.0 +plotly==6.2.0 +nvidia-cudnn-cu12==9.1.0.70 +python-slugify==8.0.4 +opencensus-context==0.1.3 +importlib_metadata==8.0.0 +orjson==3.10.18 +prompt_toolkit==3.0.51 +psutil==7.0.0 +opendatasets==0.1.22 +asttokens==3.0.0 +pycountry==24.6.1 +partial-json-parser==0.2.1.1.post6 +zipp==3.23.0 +pip==25.1 +MarkupSafe==3.0.2 +opentelemetry-exporter-otlp-proto-common==1.26.0 +llvmlite==0.44.0 +nvidia-cufft-cu12==11.2.1.3 +GitPython==3.1.44 +fastrlock==0.8.3 +PyYAML==6.0.2 +opentelemetry-exporter-otlp==1.26.0 +typer==0.16.0 +cloudpathlib==0.21.1 +toml==0.10.2 +pytorch-lightning==2.5.2 +marisa-trie==1.2.1 +msgspec==0.19.0 +llguidance==0.7.30 +google-auth==2.40.3 +traitlets==5.14.3 +rpds-py==0.26.0 +cachetools==5.5.2 +spacy-loggers==1.0.5 +nvidia-cuda-runtime-cu12==12.4.127 +aiohttp==3.12.14 +torchvision==0.21.0 +av==15.0.0 +torchmetrics==1.7.4 +nvidia-cusparse-cu12==12.3.1.170 +outlines==0.1.11 +jedi==0.19.2 +thinc==8.3.6 +prometheus_client==0.22.1 +httpcore==1.0.9 +py-cpuinfo==9.0.0 +modelscope==1.28.0 +verl==0.3.2.dev0 +jaraco.functools==4.0.1 +inflect==7.3.1 +jaraco.collections==5.1.0 +packaging==24.2 +wheel==0.45.1 +tomli==2.0.1 +platformdirs==4.2.2 +typing_extensions==4.12.2 +more-itertools==10.3.0 +autocommand==2.2.2 +jaraco.text==3.12.1 +importlib_metadata==8.0.0 +jaraco.context==5.3.0 +zipp==3.19.2 +backports.tarfile==1.2.0 +typeguard==4.3.0 diff --git a/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..b6c612f8abb67917fe37f8c955472ad4e9343c7d --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/files/wandb-metadata.json @@ -0,0 +1,71 @@ +{ + "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35", + "python": "CPython 3.10.0", + "startedAt": "2025-07-16T13:03:06.627811Z", + "args": [ + "--node-ip-address=10.1.6.45", + "--node-manager-port=42325", + "--object-store-name=/tmp/ray/session_2025-07-16_20-51-10_730275_5196/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2025-07-16_20-51-10_730275_5196/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=54069", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=57480", + "--gcs-address=10.1.6.45:54882", + "--session-name=session_2025-07-16_20-51-10_730275_5196", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8265", + "--cluster-id=a69f29ea92b56cbc2f572353862768b5a0832495b7a590f4f273963a", + "--startup-token=28", + "--worker-launch-time-ms=1752670273261", + "--node-id=e54e37f4f5b34463471871dbe5c90937958f768732bc6e9579a13842", + "--runtime-env-hash=-115784934", + "--enable-resource-isolation=false" + ], + "program": "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/PorUna-byte/PAR.git", + "commit": "b8caf406aa1699c788f0ca6e44a1769452c317db" + }, + "root": "/nas/shared/kilab/wangyujia/EasyR1-new/examples", + "host": "dsw-297442-5bd684fbff-4l96r", + "executable": "/root/miniconda3/envs/easyr1-new/bin/python3", + "cpu_count": 28, + "cpu_count_logical": 28, + "gpu": "NVIDIA A800-SXM4-80GB", + "gpu_count": 4, + "disk": { + "/": { + "total": "1623302262784", + "used": "1165746176" + } + }, + "memory": { + "total": "549755813888" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-c783413d-e4e1-22c5-7c48-9296c28b08a0" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-0ad82850-a679-fa6b-9200-a26edb1bb8a4" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-e73b7d7b-4455-62ee-ec7e-a2eb1d845e07" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-71ee45de-57b2-ac7c-13c1-08a1f197eb20" + } + ], + "cudaVersion": "12.1", + "writerId": "t6v0x6ljtdqkxmc6nxsvdn00ede7tanp" +} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..c196cb934788278b679f8f697906da20df22a455 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/logs/debug-internal.log @@ -0,0 +1,6 @@ +{"time":"2025-07-16T21:03:07.422600635+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-07-16T21:03:27.915788626+08:00","level":"INFO","msg":"stream: created new stream","id":"lkflebyj"} +{"time":"2025-07-16T21:03:27.937736115+08:00","level":"INFO","msg":"stream: started","id":"lkflebyj"} +{"time":"2025-07-16T21:03:27.937745307+08:00","level":"INFO","msg":"handler: started","stream_id":"lkflebyj"} +{"time":"2025-07-16T21:03:27.937759674+08:00","level":"INFO","msg":"sender: started","stream_id":"lkflebyj"} +{"time":"2025-07-16T21:03:27.937780163+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"lkflebyj"} diff --git a/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/logs/debug.log b/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..ec471f7540ec7b50d3da94e38aa1cf19d0bfbb44 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/logs/debug.log @@ -0,0 +1,21 @@ +2025-07-16 21:03:07,126 INFO MainThread:7695 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-07-16 21:03:07,126 INFO MainThread:7695 [wandb_setup.py:_flush():80] Configure stats pid to 7695 +2025-07-16 21:03:07,126 INFO MainThread:7695 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-07-16 21:03:07,126 INFO MainThread:7695 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings +2025-07-16 21:03:07,127 INFO MainThread:7695 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-07-16 21:03:07,127 INFO MainThread:7695 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/logs/debug.log +2025-07-16 21:03:07,127 INFO MainThread:7695 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/logs/debug-internal.log +2025-07-16 21:03:07,127 INFO MainThread:7695 [wandb_init.py:init():830] calling init triggers +2025-07-16 21:03:07,127 INFO MainThread:7695 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 4, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}} +2025-07-16 21:03:07,127 INFO MainThread:7695 [wandb_init.py:init():871] starting backend +2025-07-16 21:03:07,349 INFO MainThread:7695 [wandb_init.py:init():874] sending inform_init request +2025-07-16 21:03:07,374 INFO MainThread:7695 [wandb_init.py:init():882] backend started and connected +2025-07-16 21:03:07,388 INFO MainThread:7695 [wandb_init.py:init():953] updated telemetry +2025-07-16 21:03:08,265 INFO MainThread:7695 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout +2025-07-16 21:03:32,572 INFO MainThread:7695 [wandb_init.py:init():1029] starting run threads in backend +2025-07-16 21:03:32,900 INFO MainThread:7695 [wandb_run.py:_console_start():2458] atexit reg +2025-07-16 21:03:32,901 INFO MainThread:7695 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-07-16 21:03:32,907 INFO MainThread:7695 [wandb_run.py:_redirect():2375] Wrapping output streams. +2025-07-16 21:03:32,912 INFO MainThread:7695 [wandb_run.py:_redirect():2398] Redirects installed. +2025-07-16 21:03:32,949 INFO MainThread:7695 [wandb_init.py:init():1075] run started, returning control to user process diff --git a/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/run-lkflebyj.wandb b/EasyR1-new/examples/wandb/run-20250716_210306-lkflebyj/run-lkflebyj.wandb new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/config.yaml b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6924da0d3a04a67e9fc8a9bae1d778f49dc90a7c --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/config.yaml @@ -0,0 +1,322 @@ +_wandb: + value: + cli_version: 0.21.0 + e: + repix6q725hnzsubljgya3pkb0pg0b9q: + args: + - --node-ip-address=10.1.5.237 + - --node-manager-port=37853 + - --object-store-name=/tmp/ray/session_2025-07-18_15-56-28_336135_54391/sockets/plasma_store + - --raylet-name=/tmp/ray/session_2025-07-18_15-56-28_336135_54391/sockets/raylet + - --redis-address=None + - --metrics-agent-port=43790 + - --logging-rotate-bytes=536870912 + - --logging-rotate-backup-count=5 + - --runtime-env-agent-port=63904 + - --gcs-address=10.1.5.237:56758 + - --session-name=session_2025-07-18_15-56-28_336135_54391 + - --temp-dir=/tmp/ray + - --webui=127.0.0.1:8265 + - --cluster-id=cc22236bcaa2a9ab2bdb3c76723ef15af4933b041414da957aa668b9 + - --startup-token=64 + - --worker-launch-time-ms=1752825390762 + - --node-id=0967616139eea74249995a5549bf4039d244c259acd06a5a8fe0b7aa + - --runtime-env-hash=-115784934 + - --enable-resource-isolation=false + cpu_count: 64 + cpu_count_logical: 64 + cudaVersion: "12.1" + disk: + /: + total: "1623302262784" + used: "1224904704" + email: gia0603yucca@gmail.com + executable: /root/miniconda3/envs/easyr1-new/bin/python3 + git: + commit: b8caf406aa1699c788f0ca6e44a1769452c317db + remote: https://github.com/PorUna-byte/PAR.git + gpu: NVIDIA A800-SXM4-80GB + gpu_count: 8 + gpu_nvidia: + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-f7e858cd-ae03-031d-b834-86bf87923211 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-82fabc17-3326-51dd-9fb7-cbe82d26b6a0 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-1bba2921-208c-d0ad-1a05-25fc85d62630 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-9537a8bd-f536-20e2-b766-25a3c4bd5a71 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-482091e7-cb8c-dacb-7d0c-a78a3b795f0b + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-becb8d59-2ab7-b50d-5770-183c6478747a + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-3f0f2b1d-d6be-b6ad-b4ad-0ec841b91c40 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-e53376dd-84ff-523c-a8c7-7cc7f7a18655 + host: dsw-266702-dc4b748ff-f7c66 + memory: + total: "549755813888" + os: Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35 + program: /root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py + python: CPython 3.10.0 + root: /nas/shared/kilab/wangyujia/EasyR1-new/examples + startedAt: "2025-07-18T08:00:33.186442Z" + writerId: repix6q725hnzsubljgya3pkb0pg0b9q + m: [] + python_version: 3.10.0 + t: + "1": + - 1 + - 9 + - 11 + - 30 + - 33 + - 41 + - 49 + - 51 + - 63 + - 71 + - 95 + - 98 + - 103 + - 105 + "2": + - 1 + - 9 + - 11 + - 30 + - 33 + - 41 + - 49 + - 51 + - 63 + - 71 + - 95 + - 98 + - 103 + - 105 + "3": + - 2 + - 13 + - 16 + "4": 3.10.0 + "5": 0.21.0 + "6": 4.52.4 + "12": 0.21.0 + "13": linux-x86_64 +algorithm: + value: + adv_estimator: grpo + disable_kl: false + filter_high: 0.99 + filter_key: overall + filter_low: 0.01 + gamma: 1 + kl_coef: 0.01 + kl_horizon: 10000 + kl_penalty: low_var_kl + kl_target: 0.1 + kl_type: fixed + lam: 1 + online_filtering: false + use_kl_loss: true +data: + value: + answer_key: answer + filter_overlong_prompts: true + filter_overlong_prompts_workers: 16 + format_prompt: /nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja + image_dir: null + image_key: images + max_pixels: 4194304 + max_prompt_length: 4096 + max_response_length: 16384 + min_pixels: 262144 + mini_rollout_batch_size: null + override_chat_template: null + prompt_key: question + protein_key: protein + rollout_batch_size: 128 + seed: 1 + shuffle: true + train_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl + val_batch_size: 256 + val_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl + video_fps: 2 + video_key: videos +trainer: + value: + critic_warmup: 0 + experiment_name: qwen2.5_7b_bio_06182042 + load_checkpoint_path: null + logger: + - console + - wandb + max_steps: null + max_try_make_batch: 20 + n_gpus_per_node: 8 + nnodes: 1 + project_name: easy_r1 + save_checkpoint_path: /oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042 + save_freq: 5 + save_limit: 3 + save_model_only: false + total_epochs: 1 + val_before_train: true + val_freq: 5 + val_generations_to_log: 3 + val_only: false +worker: + value: + actor: + clip_ratio_dual: 3 + clip_ratio_high: 0.3 + clip_ratio_low: 0.2 + disable_kl: false + fsdp: + enable_cpu_offload: false + enable_full_shard: true + enable_rank0_init: true + fsdp_size: -1 + mp_buffer_dtype: fp32 + mp_param_dtype: bf16 + mp_reduce_dtype: fp32 + torch_dtype: null + use_orig_params: false + global_batch_size: 64 + global_batch_size_per_device: -1 + kl_coef: 0.01 + kl_penalty: low_var_kl + loss_avg_mode: token + max_grad_norm: 1 + micro_batch_size_per_device_for_experience: 16 + micro_batch_size_per_device_for_update: 2 + model: + enable_gradient_checkpointing: true + freeze_vision_tower: false + model_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model + tokenizer_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model + trust_remote_code: false + offload: + offload_optimizer: true + offload_params: true + optim: + betas: + - 0.9 + - 0.999 + lr: 1e-06 + lr_warmup_ratio: 0 + lr_warmup_steps: null + min_lr_ratio: null + strategy: adamw + training_steps: 72 + warmup_style: constant + weight_decay: 0.01 + padding_free: true + ppo_epochs: 1 + strategy: fsdp + ulysses_size: 1 + use_kl_loss: true + use_torch_compile: true + critic: + cliprange_value: 0.5 + fsdp: + enable_cpu_offload: false + enable_full_shard: true + enable_rank0_init: true + fsdp_size: -1 + mp_buffer_dtype: fp32 + mp_param_dtype: bf16 + mp_reduce_dtype: fp32 + torch_dtype: null + use_orig_params: false + global_batch_size: 256 + global_batch_size_per_device: -1 + loss_avg_mode: token + max_grad_norm: 1 + micro_batch_size_per_device_for_experience: 16 + micro_batch_size_per_device_for_update: 4 + model: + enable_gradient_checkpointing: true + freeze_vision_tower: false + model_path: null + tokenizer_path: null + trust_remote_code: true + offload: + offload_optimizer: false + offload_params: false + optim: + betas: + - 0.9 + - 0.999 + lr: 1e-06 + lr_warmup_ratio: 0 + lr_warmup_steps: null + min_lr_ratio: null + strategy: adamw + training_steps: 72 + warmup_style: constant + weight_decay: 0.01 + padding_free: false + ppo_epochs: 1 + strategy: fsdp + ulysses_size: 1 + hybrid_engine: true + ref: + fsdp: + enable_cpu_offload: true + enable_full_shard: true + enable_rank0_init: true + fsdp_size: -1 + mp_buffer_dtype: fp32 + mp_param_dtype: bf16 + mp_reduce_dtype: fp32 + torch_dtype: null + use_orig_params: false + micro_batch_size_per_device_for_experience: 16 + offload: + offload_optimizer: false + offload_params: false + padding_free: true + strategy: fsdp + ulysses_size: 1 + use_torch_compile: true + reward: + num_cpus: 1 + reward_function: /nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py + reward_function_name: main + reward_type: batch + skip_special_tokens: true + rollout: + disable_log_stats: true + disable_tqdm: false + dtype: bf16 + enable_chunked_prefill: false + enforce_eager: false + gpu_memory_utilization: 0.6 + ignore_eos: false + limit_images: 0 + max_model_len: null + max_num_batched_tokens: 24576 + "n": 5 + name: vllm + prompt_length: 4096 + response_length: 16384 + seed: 1 + temperature: 1 + tensor_parallel_size: 1 + top_k: -1 + top_p: 0.99 + trust_remote_code: false + val_override_config: + "n": 1 + temperature: 0.5 diff --git a/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/output.log b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..2ab5c2a41cd559e0abe85962768c97759069aa61 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/output.log @@ -0,0 +1,72 @@ +{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.5.237", "pid": 59301, "uuid": "79b41be0b4cb4caea00399d5e67f3adb", "closed": false} +Start validation... +Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.actor_rollout_ref_prepare_rollout_engine() (pid=61215, ip=10.1.5.237, actor_id=8dbb70fdf561d45e1bb95fbd01000000, repr=) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func + return getattr(self.worker_dict[key], name)(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner + return func(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 544, in prepare_rollout_engine + self.rollout_sharding_manager.load_vllm_and_sync_weights() + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/sharding_manager/fsdp_vllm.py", line 124, in load_vllm_and_sync_weights + if "tags" in inspect.signature(self.inference_engine.wake_up).parameters: +AttributeError: 'str' object has no attribute 'wake_up' +Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.actor_rollout_ref_prepare_rollout_engine() (pid=61214, ip=10.1.5.237, actor_id=12428909aea9647197558b3701000000, repr=) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func + return getattr(self.worker_dict[key], name)(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner + return func(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 544, in prepare_rollout_engine + self.rollout_sharding_manager.load_vllm_and_sync_weights() + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/sharding_manager/fsdp_vllm.py", line 124, in load_vllm_and_sync_weights + if "tags" in inspect.signature(self.inference_engine.wake_up).parameters: +AttributeError: 'str' object has no attribute 'wake_up' +Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.actor_rollout_ref_prepare_rollout_engine() (pid=61213, ip=10.1.5.237, actor_id=17a3ff05d33225db9d5f3d2001000000, repr=) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func + return getattr(self.worker_dict[key], name)(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner + return func(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 544, in prepare_rollout_engine + self.rollout_sharding_manager.load_vllm_and_sync_weights() + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/sharding_manager/fsdp_vllm.py", line 124, in load_vllm_and_sync_weights + if "tags" in inspect.signature(self.inference_engine.wake_up).parameters: +AttributeError: 'str' object has no attribute 'wake_up' +Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.actor_rollout_ref_prepare_rollout_engine() (pid=61212, ip=10.1.5.237, actor_id=8038d6b87c20ea82378ff46b01000000, repr=) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func + return getattr(self.worker_dict[key], name)(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner + return func(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 544, in prepare_rollout_engine + self.rollout_sharding_manager.load_vllm_and_sync_weights() + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/sharding_manager/fsdp_vllm.py", line 124, in load_vllm_and_sync_weights + if "tags" in inspect.signature(self.inference_engine.wake_up).parameters: +AttributeError: 'str' object has no attribute 'wake_up' +Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.actor_rollout_ref_prepare_rollout_engine() (pid=61211, ip=10.1.5.237, actor_id=1e5423d0856a1d601b82502801000000, repr=) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func + return getattr(self.worker_dict[key], name)(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner + return func(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 544, in prepare_rollout_engine + self.rollout_sharding_manager.load_vllm_and_sync_weights() + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/sharding_manager/fsdp_vllm.py", line 124, in load_vllm_and_sync_weights + if "tags" in inspect.signature(self.inference_engine.wake_up).parameters: +AttributeError: 'str' object has no attribute 'wake_up' +Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.actor_rollout_ref_prepare_rollout_engine() (pid=61209, ip=10.1.5.237, actor_id=061e0c8de42fd2b69b89561501000000, repr=) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func + return getattr(self.worker_dict[key], name)(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner + return func(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 544, in prepare_rollout_engine + self.rollout_sharding_manager.load_vllm_and_sync_weights() + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/sharding_manager/fsdp_vllm.py", line 124, in load_vllm_and_sync_weights + if "tags" in inspect.signature(self.inference_engine.wake_up).parameters: +AttributeError: 'str' object has no attribute 'wake_up' +Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.actor_rollout_ref_prepare_rollout_engine() (pid=60985, ip=10.1.5.237, actor_id=8073bd5c566ab2faaa122c0e01000000, repr=) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func + return getattr(self.worker_dict[key], name)(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner + return func(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 544, in prepare_rollout_engine + self.rollout_sharding_manager.load_vllm_and_sync_weights() + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/sharding_manager/fsdp_vllm.py", line 124, in load_vllm_and_sync_weights + if "tags" in inspect.signature(self.inference_engine.wake_up).parameters: +AttributeError: 'str' object has no attribute 'wake_up' diff --git a/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..7396100b45ae50733f37383324036e5ff4cdf5aa --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/requirements.txt @@ -0,0 +1,295 @@ +setproctitle==1.2.2 +colorama==0.4.6 +psutil==7.0.0 +attrs==25.3.0 +tqdm==4.67.1 +langcodes==3.5.0 +nvidia-cublas-cu12==12.4.5.8 +airportsdata==20250706 +absl-py==2.3.1 +hf-xet==1.1.5 +opentelemetry-exporter-otlp-proto-http==1.26.0 +interegular==0.3.3 +tifffile==2025.5.10 +nvidia-cufile-cu12==1.11.1.6 +nltk==3.9.1 +tokenizers==0.21.2 +salesforce-lavis==1.0.2 +tzdata==2025.2 +prometheus_client==0.22.1 +google-auth==2.40.3 +ipython==8.37.0 +pydantic==2.11.7 +mathruler==0.1.0 +six==1.17.0 +python-dateutil==2.9.0.post0 +requests==2.32.4 +mistral_common==1.8.0 +huggingface-hub==0.33.4 +preshed==3.0.10 +torchmetrics==1.7.4 +blinker==1.9.0 +nvidia-cusparse-cu12==12.3.1.170 +rich-toolkit==0.14.8 +pytz==2025.2 +pandas==2.3.1 +packaging==25.0 +async-timeout==5.0.1 +diskcache==5.6.3 +google-api-core==2.25.1 +parso==0.8.4 +joblib==1.5.1 +pycountry==24.6.1 +triton==3.2.0 +pybase64==1.4.1 +marisa-trie==1.2.1 +plotly==6.2.0 +wandb==0.21.0 +PyYAML==6.0.2 +regex==2024.11.6 +idna==3.10 +numba==0.61.2 +nvidia-curand-cu12==10.3.5.147 +uvicorn==0.35.0 +srsly==2.5.1 +confection==0.1.5 +opentelemetry-semantic-conventions-ai==0.4.11 +typing-inspection==0.4.1 +opencv-python-headless==4.12.0.88 +pyasn1==0.6.1 +av==15.0.0 +xgrammar==0.1.18 +distlib==0.3.9 +datasets==4.0.0 +networkx==3.4.2 +prometheus-fastapi-instrumentator==7.1.0 +lightning-utilities==0.14.3 +executing==2.2.0 +pycocoevalcap==1.2 +h11==0.16.0 +certifi==2025.7.14 +sniffio==1.3.1 +wheel==0.45.1 +transformers==4.52.4 +wrapt==1.17.2 +jsonschema-specifications==2025.4.1 +mpmath==1.3.0 +msgspec==0.19.0 +py-cpuinfo==9.0.0 +contexttimer==0.3.3 +watchdog==6.0.0 +pexpect==4.9.0 +webencodings==0.5.1 +verl==0.3.2.dev0 +webdataset==1.0.2 +httpcore==1.0.9 +opentelemetry-exporter-otlp==1.26.0 +lm-format-enforcer==0.10.11 +googleapis-common-protos==1.70.0 +pyzmq==27.0.0 +fsspec==2025.3.0 +grpcio==1.73.1 +cymem==2.0.11 +timm==0.4.12 +zipp==3.23.0 +llguidance==0.7.30 +opencensus-context==0.1.3 +omegaconf==2.3.0 +python-json-logger==3.3.0 +opentelemetry-exporter-otlp-proto-common==1.26.0 +watchfiles==1.1.0 +nvidia-nvjitlink-cu12==12.4.127 +peft==0.16.0 +sentry-sdk==2.32.0 +rpds-py==0.26.0 +email_validator==2.2.0 +nodeenv==1.9.1 +distro==1.9.0 +jiter==0.10.0 +compressed-tensors==0.9.3 +annotated-types==0.7.0 +matplotlib-inline==0.1.7 +rich==14.0.0 +GitPython==3.1.44 +lazy_loader==0.4 +fastapi-cloud-cli==0.1.4 +cupy-cuda12x==13.5.1 +prompt_toolkit==3.0.51 +gguf==0.17.1 +blis==1.3.0 +thinc==8.3.6 +cloudpickle==3.1.1 +multidict==6.6.3 +nvidia-nvtx-cu12==12.4.127 +flash-attn==2.7.1.post1 +pyasn1_modules==0.4.2 +rsa==4.9.1 +weasel==0.4.1 +uvloop==0.21.0 +click==8.2.1 +numpy==2.2.6 +torchdata==0.11.0 +pylatexenc==2.10 +cachetools==5.5.2 +Jinja2==3.1.6 +typer==0.16.0 +nvidia-cudnn-cu12==9.1.0.70 +fastapi-cli==0.0.8 +xxhash==3.5.0 +tornado==6.5.1 +scipy==1.15.3 +rouge_score==0.1.2 +cloudpathlib==0.21.1 +streamlit==1.46.1 +jedi==0.19.2 +referencing==0.36.2 +accelerate==1.8.1 +decord==0.6.0 +setuptools==78.1.1 +mdurl==0.1.2 +vllm==0.8.5.post1 +identify==2.6.12 +python-slugify==8.0.4 +dnspython==2.7.0 +dill==0.3.8 +opentelemetry-proto==1.26.0 +orjson==3.10.18 +msgpack==1.1.1 +aiohttp==3.12.14 +aiosignal==1.4.0 +typing_extensions==4.14.1 +tiktoken==0.9.0 +catalogue==2.0.10 +platformdirs==4.3.8 +narwhals==1.47.0 +antlr4-python3-runtime==4.9.3 +pydantic-extra-types==2.10.5 +nvidia-cusolver-cu12==11.6.1.9 +kaggle==1.7.4.5 +propcache==0.3.2 +urllib3==2.5.0 +opentelemetry-exporter-otlp-proto-grpc==1.26.0 +pydeck==0.9.1 +nvidia-cufft-cu12==11.2.1.3 +pyarrow==20.0.0 +nvidia-nccl-cu12==2.21.5 +httptools==0.6.4 +qwen-vl-utils==0.0.11 +markdown-it-py==3.0.0 +gitdb==4.0.12 +altair==5.5.0 +torchvision==0.21.0 +python-magic==0.4.27 +iopath==0.1.10 +ray==2.47.1 +blake3==1.0.5 +pillow==11.3.0 +python-dotenv==1.1.1 +torchaudio==2.6.0 +partial-json-parser==0.2.1.1.post6 +httpx==0.28.1 +torch==2.6.0 +anyio==4.9.0 +fairscale==0.4.4 +traitlets==5.14.3 +pure_eval==0.2.3 +sympy==1.13.1 +nvidia-cusparselt-cu12==0.6.2 +jsonschema==4.24.0 +imageio==2.37.0 +opencensus==0.11.4 +stack-data==0.6.3 +shellingham==1.5.4 +tensordict==0.9.1 +nvidia-cuda-runtime-cu12==12.4.127 +nest-asyncio==1.6.0 +einops==0.8.1 +lark==1.2.2 +tenacity==9.1.2 +virtualenv==20.31.2 +ptyprocess==0.7.0 +outlines==0.1.11 +depyf==0.18.0 +starlette==0.47.1 +cfgv==3.4.0 +pre_commit==4.2.0 +language_data==1.3.0 +pip==25.1 +Pygments==2.19.2 +nvidia-cuda-cupti-cu12==12.4.127 +protobuf==4.25.8 +safetensors==0.5.3 +text-unidecode==1.3 +wcwidth==0.2.13 +charset-normalizer==3.4.2 +aiohappyeyeballs==2.6.1 +outlines_core==0.1.26 +fastrlock==0.8.3 +asttokens==3.0.0 +psutil==7.0.0 +smmap==5.0.2 +exceptiongroup==1.3.0 +murmurhash==1.0.13 +pytorch-lightning==2.5.2 +filelock==3.18.0 +astor==0.8.1 +py-spy==0.4.0 +pydantic_core==2.33.2 +colorful==0.5.7 +fastapi==0.116.1 +opentelemetry-api==1.26.0 +openai==1.90.0 +ninja==1.11.1.4 +opentelemetry-semantic-conventions==0.47b0 +spacy-legacy==3.0.12 +opendatasets==0.1.22 +Deprecated==1.2.18 +proto-plus==1.26.1 +rignore==0.6.2 +aiohttp-cors==0.8.1 +liger_kernel==0.6.0 +opentelemetry-exporter-prometheus==0.56b0 +python-multipart==0.0.20 +multiprocess==0.70.16 +opentelemetry-sdk==1.26.0 +decorator==5.2.1 +xformers==0.0.29.post2 +spacy==3.8.7 +pyvers==0.1.0 +pycocotools==2.0.10 +websockets==15.0.1 +wasabi==1.1.3 +frozenlist==1.7.0 +codetiming==1.4.0 +sentencepiece==0.2.0 +toml==0.10.2 +scikit-image==0.25.2 +ftfy==6.3.1 +bleach==6.2.0 +yarl==1.20.1 +nvidia-cuda-nvrtc-cu12==12.4.127 +importlib_metadata==8.0.0 +spacy-loggers==1.0.5 +smart_open==7.3.0.post1 +portalocker==3.2.0 +llvmlite==0.44.0 +MarkupSafe==3.0.2 +braceexpand==0.1.7 +modelscope==1.28.0 +verl==0.3.2.dev0 +jaraco.context==5.3.0 +more-itertools==10.3.0 +jaraco.functools==4.0.1 +jaraco.text==3.12.1 +platformdirs==4.2.2 +packaging==24.2 +wheel==0.45.1 +zipp==3.19.2 +inflect==7.3.1 +autocommand==2.2.2 +typeguard==4.3.0 +jaraco.collections==5.1.0 +backports.tarfile==1.2.0 +tomli==2.0.1 +importlib_metadata==8.0.0 +typing_extensions==4.12.2 diff --git a/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..e1c61cb0c0f4f69f957b59a8dc070f8bac3b8f0e --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/wandb-metadata.json @@ -0,0 +1,92 @@ +{ + "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35", + "python": "CPython 3.10.0", + "startedAt": "2025-07-18T08:00:33.186442Z", + "args": [ + "--node-ip-address=10.1.5.237", + "--node-manager-port=37853", + "--object-store-name=/tmp/ray/session_2025-07-18_15-56-28_336135_54391/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2025-07-18_15-56-28_336135_54391/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=43790", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=63904", + "--gcs-address=10.1.5.237:56758", + "--session-name=session_2025-07-18_15-56-28_336135_54391", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8265", + "--cluster-id=cc22236bcaa2a9ab2bdb3c76723ef15af4933b041414da957aa668b9", + "--startup-token=64", + "--worker-launch-time-ms=1752825390762", + "--node-id=0967616139eea74249995a5549bf4039d244c259acd06a5a8fe0b7aa", + "--runtime-env-hash=-115784934", + "--enable-resource-isolation=false" + ], + "program": "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/PorUna-byte/PAR.git", + "commit": "b8caf406aa1699c788f0ca6e44a1769452c317db" + }, + "email": "gia0603yucca@gmail.com", + "root": "/nas/shared/kilab/wangyujia/EasyR1-new/examples", + "host": "dsw-266702-dc4b748ff-f7c66", + "executable": "/root/miniconda3/envs/easyr1-new/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 64, + "gpu": "NVIDIA A800-SXM4-80GB", + "gpu_count": 8, + "disk": { + "/": { + "total": "1623302262784", + "used": "1224904704" + } + }, + "memory": { + "total": "549755813888" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-f7e858cd-ae03-031d-b834-86bf87923211" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-82fabc17-3326-51dd-9fb7-cbe82d26b6a0" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-1bba2921-208c-d0ad-1a05-25fc85d62630" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-9537a8bd-f536-20e2-b766-25a3c4bd5a71" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-482091e7-cb8c-dacb-7d0c-a78a3b795f0b" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-becb8d59-2ab7-b50d-5770-183c6478747a" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-3f0f2b1d-d6be-b6ad-b4ad-0ec841b91c40" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-e53376dd-84ff-523c-a8c7-7cc7f7a18655" + } + ], + "cudaVersion": "12.1", + "writerId": "repix6q725hnzsubljgya3pkb0pg0b9q" +} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/wandb-summary.json b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..7b4fdaac0ef2f0f7b27bbdd0252e8c2048547735 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":1},"_runtime":1} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..956b73d1226f03b272a8c74f0a7aed2e82e7f79a --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/logs/debug-internal.log @@ -0,0 +1,15 @@ +{"time":"2025-07-18T16:00:33.944898175+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-07-18T16:01:04.056910886+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"} +{"time":"2025-07-18T16:01:14.919464259+08:00","level":"INFO","msg":"stream: created new stream","id":"nji9xqxs"} +{"time":"2025-07-18T16:01:14.926346872+08:00","level":"INFO","msg":"sender: started","stream_id":"nji9xqxs"} +{"time":"2025-07-18T16:01:14.926359513+08:00","level":"INFO","msg":"stream: started","id":"nji9xqxs"} +{"time":"2025-07-18T16:01:14.926369749+08:00","level":"INFO","msg":"handler: started","stream_id":"nji9xqxs"} +{"time":"2025-07-18T16:01:14.926391685+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"nji9xqxs"} +{"time":"2025-07-18T16:01:44.221082826+08:00","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"uploading data","runtime_seconds":2.593669605},{"desc":"updating run metadata","runtime_seconds":2.593493161},{"desc":"uploading wandb-metadata.json","runtime_seconds":1.024626407}],"total_operations":3}} +{"time":"2025-07-18T16:01:58.697029208+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/nji9xqxs/file_stream\": unexpected EOF"} +{"time":"2025-07-18T16:02:17.601004486+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-07-18T16:02:38.850804129+08:00","level":"INFO","msg":"stream: closing","id":"nji9xqxs"} +{"time":"2025-07-18T16:02:38.850824284+08:00","level":"INFO","msg":"handler: closed","stream_id":"nji9xqxs"} +{"time":"2025-07-18T16:02:38.850832353+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"nji9xqxs"} +{"time":"2025-07-18T16:02:38.850837848+08:00","level":"INFO","msg":"sender: closed","stream_id":"nji9xqxs"} +{"time":"2025-07-18T16:02:38.858004163+08:00","level":"INFO","msg":"stream: closed","id":"nji9xqxs"} diff --git a/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/logs/debug.log b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..708bfef6b860b451c60bbbf5e58fd562f8786837 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/logs/debug.log @@ -0,0 +1,28 @@ +2025-07-18 16:00:33,700 INFO MainThread:59301 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-07-18 16:00:33,700 INFO MainThread:59301 [wandb_setup.py:_flush():80] Configure stats pid to 59301 +2025-07-18 16:00:33,700 INFO MainThread:59301 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-07-18 16:00:33,701 INFO MainThread:59301 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings +2025-07-18 16:00:33,701 INFO MainThread:59301 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-07-18 16:00:33,701 INFO MainThread:59301 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/logs/debug.log +2025-07-18 16:00:33,701 INFO MainThread:59301 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/logs/debug-internal.log +2025-07-18 16:00:33,701 INFO MainThread:59301 [wandb_init.py:init():830] calling init triggers +2025-07-18 16:00:33,701 INFO MainThread:59301 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}} +2025-07-18 16:00:33,701 INFO MainThread:59301 [wandb_init.py:init():871] starting backend +2025-07-18 16:00:33,911 INFO MainThread:59301 [wandb_init.py:init():874] sending inform_init request +2025-07-18 16:00:33,914 INFO MainThread:59301 [wandb_init.py:init():882] backend started and connected +2025-07-18 16:00:33,934 INFO MainThread:59301 [wandb_init.py:init():953] updated telemetry +2025-07-18 16:00:34,824 INFO MainThread:59301 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout +2025-07-18 16:01:41,621 INFO MainThread:59301 [wandb_init.py:init():1029] starting run threads in backend +2025-07-18 16:01:41,877 INFO MainThread:59301 [wandb_run.py:_console_start():2458] atexit reg +2025-07-18 16:01:41,877 INFO MainThread:59301 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-07-18 16:01:41,899 INFO MainThread:59301 [wandb_run.py:_redirect():2375] Wrapping output streams. +2025-07-18 16:01:41,899 INFO MainThread:59301 [wandb_run.py:_redirect():2398] Redirects installed. +2025-07-18 16:01:41,927 INFO MainThread:59301 [wandb_init.py:init():1075] run started, returning control to user process +2025-07-18 16:01:43,199 INFO MainThread:59301 [wandb_run.py:_finish():2224] finishing run gia0603yucca/easy_r1/nji9xqxs +2025-07-18 16:01:43,199 INFO MainThread:59301 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0 +2025-07-18 16:01:43,207 INFO MainThread:59301 [wandb_run.py:_restore():2405] restore +2025-07-18 16:01:43,211 INFO MainThread:59301 [wandb_run.py:_restore():2411] restore done +2025-07-18 16:02:38,840 INFO MainThread:59301 [wandb_run.py:_footer_history_summary_info():3903] rendering history +2025-07-18 16:02:38,841 INFO MainThread:59301 [wandb_run.py:_footer_history_summary_info():3935] rendering summary +2025-07-18 16:02:38,841 INFO MainThread:59301 [wandb_run.py:_footer_sync_info():3864] logging synced files diff --git a/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/run-nji9xqxs.wandb b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/run-nji9xqxs.wandb new file mode 100644 index 0000000000000000000000000000000000000000..9342d4a19119a3bb703fe1b02151727e1bfbcafb Binary files /dev/null and b/EasyR1-new/examples/wandb/run-20250718_160033-nji9xqxs/run-nji9xqxs.wandb differ diff --git a/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/files/output.log b/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..7396100b45ae50733f37383324036e5ff4cdf5aa --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/files/requirements.txt @@ -0,0 +1,295 @@ +setproctitle==1.2.2 +colorama==0.4.6 +psutil==7.0.0 +attrs==25.3.0 +tqdm==4.67.1 +langcodes==3.5.0 +nvidia-cublas-cu12==12.4.5.8 +airportsdata==20250706 +absl-py==2.3.1 +hf-xet==1.1.5 +opentelemetry-exporter-otlp-proto-http==1.26.0 +interegular==0.3.3 +tifffile==2025.5.10 +nvidia-cufile-cu12==1.11.1.6 +nltk==3.9.1 +tokenizers==0.21.2 +salesforce-lavis==1.0.2 +tzdata==2025.2 +prometheus_client==0.22.1 +google-auth==2.40.3 +ipython==8.37.0 +pydantic==2.11.7 +mathruler==0.1.0 +six==1.17.0 +python-dateutil==2.9.0.post0 +requests==2.32.4 +mistral_common==1.8.0 +huggingface-hub==0.33.4 +preshed==3.0.10 +torchmetrics==1.7.4 +blinker==1.9.0 +nvidia-cusparse-cu12==12.3.1.170 +rich-toolkit==0.14.8 +pytz==2025.2 +pandas==2.3.1 +packaging==25.0 +async-timeout==5.0.1 +diskcache==5.6.3 +google-api-core==2.25.1 +parso==0.8.4 +joblib==1.5.1 +pycountry==24.6.1 +triton==3.2.0 +pybase64==1.4.1 +marisa-trie==1.2.1 +plotly==6.2.0 +wandb==0.21.0 +PyYAML==6.0.2 +regex==2024.11.6 +idna==3.10 +numba==0.61.2 +nvidia-curand-cu12==10.3.5.147 +uvicorn==0.35.0 +srsly==2.5.1 +confection==0.1.5 +opentelemetry-semantic-conventions-ai==0.4.11 +typing-inspection==0.4.1 +opencv-python-headless==4.12.0.88 +pyasn1==0.6.1 +av==15.0.0 +xgrammar==0.1.18 +distlib==0.3.9 +datasets==4.0.0 +networkx==3.4.2 +prometheus-fastapi-instrumentator==7.1.0 +lightning-utilities==0.14.3 +executing==2.2.0 +pycocoevalcap==1.2 +h11==0.16.0 +certifi==2025.7.14 +sniffio==1.3.1 +wheel==0.45.1 +transformers==4.52.4 +wrapt==1.17.2 +jsonschema-specifications==2025.4.1 +mpmath==1.3.0 +msgspec==0.19.0 +py-cpuinfo==9.0.0 +contexttimer==0.3.3 +watchdog==6.0.0 +pexpect==4.9.0 +webencodings==0.5.1 +verl==0.3.2.dev0 +webdataset==1.0.2 +httpcore==1.0.9 +opentelemetry-exporter-otlp==1.26.0 +lm-format-enforcer==0.10.11 +googleapis-common-protos==1.70.0 +pyzmq==27.0.0 +fsspec==2025.3.0 +grpcio==1.73.1 +cymem==2.0.11 +timm==0.4.12 +zipp==3.23.0 +llguidance==0.7.30 +opencensus-context==0.1.3 +omegaconf==2.3.0 +python-json-logger==3.3.0 +opentelemetry-exporter-otlp-proto-common==1.26.0 +watchfiles==1.1.0 +nvidia-nvjitlink-cu12==12.4.127 +peft==0.16.0 +sentry-sdk==2.32.0 +rpds-py==0.26.0 +email_validator==2.2.0 +nodeenv==1.9.1 +distro==1.9.0 +jiter==0.10.0 +compressed-tensors==0.9.3 +annotated-types==0.7.0 +matplotlib-inline==0.1.7 +rich==14.0.0 +GitPython==3.1.44 +lazy_loader==0.4 +fastapi-cloud-cli==0.1.4 +cupy-cuda12x==13.5.1 +prompt_toolkit==3.0.51 +gguf==0.17.1 +blis==1.3.0 +thinc==8.3.6 +cloudpickle==3.1.1 +multidict==6.6.3 +nvidia-nvtx-cu12==12.4.127 +flash-attn==2.7.1.post1 +pyasn1_modules==0.4.2 +rsa==4.9.1 +weasel==0.4.1 +uvloop==0.21.0 +click==8.2.1 +numpy==2.2.6 +torchdata==0.11.0 +pylatexenc==2.10 +cachetools==5.5.2 +Jinja2==3.1.6 +typer==0.16.0 +nvidia-cudnn-cu12==9.1.0.70 +fastapi-cli==0.0.8 +xxhash==3.5.0 +tornado==6.5.1 +scipy==1.15.3 +rouge_score==0.1.2 +cloudpathlib==0.21.1 +streamlit==1.46.1 +jedi==0.19.2 +referencing==0.36.2 +accelerate==1.8.1 +decord==0.6.0 +setuptools==78.1.1 +mdurl==0.1.2 +vllm==0.8.5.post1 +identify==2.6.12 +python-slugify==8.0.4 +dnspython==2.7.0 +dill==0.3.8 +opentelemetry-proto==1.26.0 +orjson==3.10.18 +msgpack==1.1.1 +aiohttp==3.12.14 +aiosignal==1.4.0 +typing_extensions==4.14.1 +tiktoken==0.9.0 +catalogue==2.0.10 +platformdirs==4.3.8 +narwhals==1.47.0 +antlr4-python3-runtime==4.9.3 +pydantic-extra-types==2.10.5 +nvidia-cusolver-cu12==11.6.1.9 +kaggle==1.7.4.5 +propcache==0.3.2 +urllib3==2.5.0 +opentelemetry-exporter-otlp-proto-grpc==1.26.0 +pydeck==0.9.1 +nvidia-cufft-cu12==11.2.1.3 +pyarrow==20.0.0 +nvidia-nccl-cu12==2.21.5 +httptools==0.6.4 +qwen-vl-utils==0.0.11 +markdown-it-py==3.0.0 +gitdb==4.0.12 +altair==5.5.0 +torchvision==0.21.0 +python-magic==0.4.27 +iopath==0.1.10 +ray==2.47.1 +blake3==1.0.5 +pillow==11.3.0 +python-dotenv==1.1.1 +torchaudio==2.6.0 +partial-json-parser==0.2.1.1.post6 +httpx==0.28.1 +torch==2.6.0 +anyio==4.9.0 +fairscale==0.4.4 +traitlets==5.14.3 +pure_eval==0.2.3 +sympy==1.13.1 +nvidia-cusparselt-cu12==0.6.2 +jsonschema==4.24.0 +imageio==2.37.0 +opencensus==0.11.4 +stack-data==0.6.3 +shellingham==1.5.4 +tensordict==0.9.1 +nvidia-cuda-runtime-cu12==12.4.127 +nest-asyncio==1.6.0 +einops==0.8.1 +lark==1.2.2 +tenacity==9.1.2 +virtualenv==20.31.2 +ptyprocess==0.7.0 +outlines==0.1.11 +depyf==0.18.0 +starlette==0.47.1 +cfgv==3.4.0 +pre_commit==4.2.0 +language_data==1.3.0 +pip==25.1 +Pygments==2.19.2 +nvidia-cuda-cupti-cu12==12.4.127 +protobuf==4.25.8 +safetensors==0.5.3 +text-unidecode==1.3 +wcwidth==0.2.13 +charset-normalizer==3.4.2 +aiohappyeyeballs==2.6.1 +outlines_core==0.1.26 +fastrlock==0.8.3 +asttokens==3.0.0 +psutil==7.0.0 +smmap==5.0.2 +exceptiongroup==1.3.0 +murmurhash==1.0.13 +pytorch-lightning==2.5.2 +filelock==3.18.0 +astor==0.8.1 +py-spy==0.4.0 +pydantic_core==2.33.2 +colorful==0.5.7 +fastapi==0.116.1 +opentelemetry-api==1.26.0 +openai==1.90.0 +ninja==1.11.1.4 +opentelemetry-semantic-conventions==0.47b0 +spacy-legacy==3.0.12 +opendatasets==0.1.22 +Deprecated==1.2.18 +proto-plus==1.26.1 +rignore==0.6.2 +aiohttp-cors==0.8.1 +liger_kernel==0.6.0 +opentelemetry-exporter-prometheus==0.56b0 +python-multipart==0.0.20 +multiprocess==0.70.16 +opentelemetry-sdk==1.26.0 +decorator==5.2.1 +xformers==0.0.29.post2 +spacy==3.8.7 +pyvers==0.1.0 +pycocotools==2.0.10 +websockets==15.0.1 +wasabi==1.1.3 +frozenlist==1.7.0 +codetiming==1.4.0 +sentencepiece==0.2.0 +toml==0.10.2 +scikit-image==0.25.2 +ftfy==6.3.1 +bleach==6.2.0 +yarl==1.20.1 +nvidia-cuda-nvrtc-cu12==12.4.127 +importlib_metadata==8.0.0 +spacy-loggers==1.0.5 +smart_open==7.3.0.post1 +portalocker==3.2.0 +llvmlite==0.44.0 +MarkupSafe==3.0.2 +braceexpand==0.1.7 +modelscope==1.28.0 +verl==0.3.2.dev0 +jaraco.context==5.3.0 +more-itertools==10.3.0 +jaraco.functools==4.0.1 +jaraco.text==3.12.1 +platformdirs==4.2.2 +packaging==24.2 +wheel==0.45.1 +zipp==3.19.2 +inflect==7.3.1 +autocommand==2.2.2 +typeguard==4.3.0 +jaraco.collections==5.1.0 +backports.tarfile==1.2.0 +tomli==2.0.1 +importlib_metadata==8.0.0 +typing_extensions==4.12.2 diff --git a/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..819b843e2e94d09f81c1a276811070dc974cdefc --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/files/wandb-metadata.json @@ -0,0 +1,36 @@ +{ + "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35", + "python": "CPython 3.10.0", + "startedAt": "2025-07-18T10:01:27.794840Z", + "args": [ + "--node-ip-address=10.1.5.237", + "--node-manager-port=34033", + "--object-store-name=/tmp/ray/session_2025-07-18_17-59-46_929054_90432/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2025-07-18_17-59-46_929054_90432/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=52220", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=58307", + "--gcs-address=10.1.5.237:63437", + "--session-name=session_2025-07-18_17-59-46_929054_90432", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8265", + "--cluster-id=2320bfb132f181fae6a438fbb8ba4302101825636e86b29ea49d2a26", + "--startup-token=64", + "--worker-launch-time-ms=1752832790343", + "--node-id=d351a5bfa85748ebf678bc24e7adda6ad59e09972b13108dbb01547f", + "--runtime-env-hash=-115784934", + "--enable-resource-isolation=false" + ], + "program": "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/PorUna-byte/PAR.git", + "commit": "b8caf406aa1699c788f0ca6e44a1769452c317db" + }, + "email": "gia0603yucca@gmail.com", + "root": "/nas/shared/kilab/wangyujia/EasyR1-new/examples", + "host": "dsw-266702-dc4b748ff-f7c66", + "executable": "/root/miniconda3/envs/easyr1-new/bin/python3", + "writerId": "qpm36h9mjv3m2bmimjfqh0pw0u9a4282" +} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..ad8148fc0a694f50660e3c3d777201b9e2ea368c --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/logs/debug-internal.log @@ -0,0 +1,6 @@ +{"time":"2025-07-18T18:01:28.970283308+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-07-18T18:01:36.389685825+08:00","level":"INFO","msg":"stream: created new stream","id":"wmarwr6l"} +{"time":"2025-07-18T18:01:36.413332423+08:00","level":"INFO","msg":"handler: started","stream_id":"wmarwr6l"} +{"time":"2025-07-18T18:01:36.413371741+08:00","level":"INFO","msg":"stream: started","id":"wmarwr6l"} +{"time":"2025-07-18T18:01:36.413392401+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"wmarwr6l"} +{"time":"2025-07-18T18:01:36.413389742+08:00","level":"INFO","msg":"sender: started","stream_id":"wmarwr6l"} diff --git a/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/logs/debug.log b/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..6263673936833403ea2ce0f1ff8f970f7be677d6 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/logs/debug.log @@ -0,0 +1,21 @@ +2025-07-18 18:01:28,690 INFO MainThread:95226 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-07-18 18:01:28,690 INFO MainThread:95226 [wandb_setup.py:_flush():80] Configure stats pid to 95226 +2025-07-18 18:01:28,690 INFO MainThread:95226 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-07-18 18:01:28,690 INFO MainThread:95226 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings +2025-07-18 18:01:28,690 INFO MainThread:95226 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-07-18 18:01:28,690 INFO MainThread:95226 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/logs/debug.log +2025-07-18 18:01:28,690 INFO MainThread:95226 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/logs/debug-internal.log +2025-07-18 18:01:28,690 INFO MainThread:95226 [wandb_init.py:init():830] calling init triggers +2025-07-18 18:01:28,690 INFO MainThread:95226 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}} +2025-07-18 18:01:28,690 INFO MainThread:95226 [wandb_init.py:init():871] starting backend +2025-07-18 18:01:28,902 INFO MainThread:95226 [wandb_init.py:init():874] sending inform_init request +2025-07-18 18:01:28,904 INFO MainThread:95226 [wandb_init.py:init():882] backend started and connected +2025-07-18 18:01:28,909 INFO MainThread:95226 [wandb_init.py:init():953] updated telemetry +2025-07-18 18:01:29,464 INFO MainThread:95226 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout +2025-07-18 18:01:40,777 INFO MainThread:95226 [wandb_init.py:init():1029] starting run threads in backend +2025-07-18 18:01:41,069 INFO MainThread:95226 [wandb_run.py:_console_start():2458] atexit reg +2025-07-18 18:01:41,069 INFO MainThread:95226 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-07-18 18:01:41,079 INFO MainThread:95226 [wandb_run.py:_redirect():2375] Wrapping output streams. +2025-07-18 18:01:41,079 INFO MainThread:95226 [wandb_run.py:_redirect():2398] Redirects installed. +2025-07-18 18:01:41,125 INFO MainThread:95226 [wandb_init.py:init():1075] run started, returning control to user process diff --git a/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/run-wmarwr6l.wandb b/EasyR1-new/examples/wandb/run-20250718_180127-wmarwr6l/run-wmarwr6l.wandb new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/files/output.log b/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..65c4fac1e916abf486a3044e9f28ae1f90c7e133 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/files/output.log @@ -0,0 +1,2 @@ +{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.5.237", "pid": 104882, "uuid": "0f066e81b2fc4d09a338174f40c2e400", "closed": false} +Start validation... diff --git a/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..7396100b45ae50733f37383324036e5ff4cdf5aa --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/files/requirements.txt @@ -0,0 +1,295 @@ +setproctitle==1.2.2 +colorama==0.4.6 +psutil==7.0.0 +attrs==25.3.0 +tqdm==4.67.1 +langcodes==3.5.0 +nvidia-cublas-cu12==12.4.5.8 +airportsdata==20250706 +absl-py==2.3.1 +hf-xet==1.1.5 +opentelemetry-exporter-otlp-proto-http==1.26.0 +interegular==0.3.3 +tifffile==2025.5.10 +nvidia-cufile-cu12==1.11.1.6 +nltk==3.9.1 +tokenizers==0.21.2 +salesforce-lavis==1.0.2 +tzdata==2025.2 +prometheus_client==0.22.1 +google-auth==2.40.3 +ipython==8.37.0 +pydantic==2.11.7 +mathruler==0.1.0 +six==1.17.0 +python-dateutil==2.9.0.post0 +requests==2.32.4 +mistral_common==1.8.0 +huggingface-hub==0.33.4 +preshed==3.0.10 +torchmetrics==1.7.4 +blinker==1.9.0 +nvidia-cusparse-cu12==12.3.1.170 +rich-toolkit==0.14.8 +pytz==2025.2 +pandas==2.3.1 +packaging==25.0 +async-timeout==5.0.1 +diskcache==5.6.3 +google-api-core==2.25.1 +parso==0.8.4 +joblib==1.5.1 +pycountry==24.6.1 +triton==3.2.0 +pybase64==1.4.1 +marisa-trie==1.2.1 +plotly==6.2.0 +wandb==0.21.0 +PyYAML==6.0.2 +regex==2024.11.6 +idna==3.10 +numba==0.61.2 +nvidia-curand-cu12==10.3.5.147 +uvicorn==0.35.0 +srsly==2.5.1 +confection==0.1.5 +opentelemetry-semantic-conventions-ai==0.4.11 +typing-inspection==0.4.1 +opencv-python-headless==4.12.0.88 +pyasn1==0.6.1 +av==15.0.0 +xgrammar==0.1.18 +distlib==0.3.9 +datasets==4.0.0 +networkx==3.4.2 +prometheus-fastapi-instrumentator==7.1.0 +lightning-utilities==0.14.3 +executing==2.2.0 +pycocoevalcap==1.2 +h11==0.16.0 +certifi==2025.7.14 +sniffio==1.3.1 +wheel==0.45.1 +transformers==4.52.4 +wrapt==1.17.2 +jsonschema-specifications==2025.4.1 +mpmath==1.3.0 +msgspec==0.19.0 +py-cpuinfo==9.0.0 +contexttimer==0.3.3 +watchdog==6.0.0 +pexpect==4.9.0 +webencodings==0.5.1 +verl==0.3.2.dev0 +webdataset==1.0.2 +httpcore==1.0.9 +opentelemetry-exporter-otlp==1.26.0 +lm-format-enforcer==0.10.11 +googleapis-common-protos==1.70.0 +pyzmq==27.0.0 +fsspec==2025.3.0 +grpcio==1.73.1 +cymem==2.0.11 +timm==0.4.12 +zipp==3.23.0 +llguidance==0.7.30 +opencensus-context==0.1.3 +omegaconf==2.3.0 +python-json-logger==3.3.0 +opentelemetry-exporter-otlp-proto-common==1.26.0 +watchfiles==1.1.0 +nvidia-nvjitlink-cu12==12.4.127 +peft==0.16.0 +sentry-sdk==2.32.0 +rpds-py==0.26.0 +email_validator==2.2.0 +nodeenv==1.9.1 +distro==1.9.0 +jiter==0.10.0 +compressed-tensors==0.9.3 +annotated-types==0.7.0 +matplotlib-inline==0.1.7 +rich==14.0.0 +GitPython==3.1.44 +lazy_loader==0.4 +fastapi-cloud-cli==0.1.4 +cupy-cuda12x==13.5.1 +prompt_toolkit==3.0.51 +gguf==0.17.1 +blis==1.3.0 +thinc==8.3.6 +cloudpickle==3.1.1 +multidict==6.6.3 +nvidia-nvtx-cu12==12.4.127 +flash-attn==2.7.1.post1 +pyasn1_modules==0.4.2 +rsa==4.9.1 +weasel==0.4.1 +uvloop==0.21.0 +click==8.2.1 +numpy==2.2.6 +torchdata==0.11.0 +pylatexenc==2.10 +cachetools==5.5.2 +Jinja2==3.1.6 +typer==0.16.0 +nvidia-cudnn-cu12==9.1.0.70 +fastapi-cli==0.0.8 +xxhash==3.5.0 +tornado==6.5.1 +scipy==1.15.3 +rouge_score==0.1.2 +cloudpathlib==0.21.1 +streamlit==1.46.1 +jedi==0.19.2 +referencing==0.36.2 +accelerate==1.8.1 +decord==0.6.0 +setuptools==78.1.1 +mdurl==0.1.2 +vllm==0.8.5.post1 +identify==2.6.12 +python-slugify==8.0.4 +dnspython==2.7.0 +dill==0.3.8 +opentelemetry-proto==1.26.0 +orjson==3.10.18 +msgpack==1.1.1 +aiohttp==3.12.14 +aiosignal==1.4.0 +typing_extensions==4.14.1 +tiktoken==0.9.0 +catalogue==2.0.10 +platformdirs==4.3.8 +narwhals==1.47.0 +antlr4-python3-runtime==4.9.3 +pydantic-extra-types==2.10.5 +nvidia-cusolver-cu12==11.6.1.9 +kaggle==1.7.4.5 +propcache==0.3.2 +urllib3==2.5.0 +opentelemetry-exporter-otlp-proto-grpc==1.26.0 +pydeck==0.9.1 +nvidia-cufft-cu12==11.2.1.3 +pyarrow==20.0.0 +nvidia-nccl-cu12==2.21.5 +httptools==0.6.4 +qwen-vl-utils==0.0.11 +markdown-it-py==3.0.0 +gitdb==4.0.12 +altair==5.5.0 +torchvision==0.21.0 +python-magic==0.4.27 +iopath==0.1.10 +ray==2.47.1 +blake3==1.0.5 +pillow==11.3.0 +python-dotenv==1.1.1 +torchaudio==2.6.0 +partial-json-parser==0.2.1.1.post6 +httpx==0.28.1 +torch==2.6.0 +anyio==4.9.0 +fairscale==0.4.4 +traitlets==5.14.3 +pure_eval==0.2.3 +sympy==1.13.1 +nvidia-cusparselt-cu12==0.6.2 +jsonschema==4.24.0 +imageio==2.37.0 +opencensus==0.11.4 +stack-data==0.6.3 +shellingham==1.5.4 +tensordict==0.9.1 +nvidia-cuda-runtime-cu12==12.4.127 +nest-asyncio==1.6.0 +einops==0.8.1 +lark==1.2.2 +tenacity==9.1.2 +virtualenv==20.31.2 +ptyprocess==0.7.0 +outlines==0.1.11 +depyf==0.18.0 +starlette==0.47.1 +cfgv==3.4.0 +pre_commit==4.2.0 +language_data==1.3.0 +pip==25.1 +Pygments==2.19.2 +nvidia-cuda-cupti-cu12==12.4.127 +protobuf==4.25.8 +safetensors==0.5.3 +text-unidecode==1.3 +wcwidth==0.2.13 +charset-normalizer==3.4.2 +aiohappyeyeballs==2.6.1 +outlines_core==0.1.26 +fastrlock==0.8.3 +asttokens==3.0.0 +psutil==7.0.0 +smmap==5.0.2 +exceptiongroup==1.3.0 +murmurhash==1.0.13 +pytorch-lightning==2.5.2 +filelock==3.18.0 +astor==0.8.1 +py-spy==0.4.0 +pydantic_core==2.33.2 +colorful==0.5.7 +fastapi==0.116.1 +opentelemetry-api==1.26.0 +openai==1.90.0 +ninja==1.11.1.4 +opentelemetry-semantic-conventions==0.47b0 +spacy-legacy==3.0.12 +opendatasets==0.1.22 +Deprecated==1.2.18 +proto-plus==1.26.1 +rignore==0.6.2 +aiohttp-cors==0.8.1 +liger_kernel==0.6.0 +opentelemetry-exporter-prometheus==0.56b0 +python-multipart==0.0.20 +multiprocess==0.70.16 +opentelemetry-sdk==1.26.0 +decorator==5.2.1 +xformers==0.0.29.post2 +spacy==3.8.7 +pyvers==0.1.0 +pycocotools==2.0.10 +websockets==15.0.1 +wasabi==1.1.3 +frozenlist==1.7.0 +codetiming==1.4.0 +sentencepiece==0.2.0 +toml==0.10.2 +scikit-image==0.25.2 +ftfy==6.3.1 +bleach==6.2.0 +yarl==1.20.1 +nvidia-cuda-nvrtc-cu12==12.4.127 +importlib_metadata==8.0.0 +spacy-loggers==1.0.5 +smart_open==7.3.0.post1 +portalocker==3.2.0 +llvmlite==0.44.0 +MarkupSafe==3.0.2 +braceexpand==0.1.7 +modelscope==1.28.0 +verl==0.3.2.dev0 +jaraco.context==5.3.0 +more-itertools==10.3.0 +jaraco.functools==4.0.1 +jaraco.text==3.12.1 +platformdirs==4.2.2 +packaging==24.2 +wheel==0.45.1 +zipp==3.19.2 +inflect==7.3.1 +autocommand==2.2.2 +typeguard==4.3.0 +jaraco.collections==5.1.0 +backports.tarfile==1.2.0 +tomli==2.0.1 +importlib_metadata==8.0.0 +typing_extensions==4.12.2 diff --git a/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..8aebeb6b614972282a34be8538e064ae8f0091b6 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/files/wandb-metadata.json @@ -0,0 +1,92 @@ +{ + "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35", + "python": "CPython 3.10.0", + "startedAt": "2025-07-18T10:10:22.154415Z", + "args": [ + "--node-ip-address=10.1.5.237", + "--node-manager-port=45779", + "--object-store-name=/tmp/ray/session_2025-07-18_18-08-41_995857_100101/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2025-07-18_18-08-41_995857_100101/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=60724", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=59748", + "--gcs-address=10.1.5.237:65420", + "--session-name=session_2025-07-18_18-08-41_995857_100101", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8265", + "--cluster-id=d5af14f82d6884b3972b319ba9c8871ee6d621d0b395536182e77073", + "--startup-token=64", + "--worker-launch-time-ms=1752833324419", + "--node-id=affe3b277e8d66adb6a1a72266e2e5ce24fa5e48471c99f30a7a9bdf", + "--runtime-env-hash=-115784934", + "--enable-resource-isolation=false" + ], + "program": "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/PorUna-byte/PAR.git", + "commit": "b8caf406aa1699c788f0ca6e44a1769452c317db" + }, + "email": "gia0603yucca@gmail.com", + "root": "/nas/shared/kilab/wangyujia/EasyR1-new/examples", + "host": "dsw-266702-dc4b748ff-f7c66", + "executable": "/root/miniconda3/envs/easyr1-new/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 64, + "gpu": "NVIDIA A800-SXM4-80GB", + "gpu_count": 8, + "disk": { + "/": { + "total": "1623302262784", + "used": "1225166848" + } + }, + "memory": { + "total": "549755813888" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-f7e858cd-ae03-031d-b834-86bf87923211" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-82fabc17-3326-51dd-9fb7-cbe82d26b6a0" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-1bba2921-208c-d0ad-1a05-25fc85d62630" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-9537a8bd-f536-20e2-b766-25a3c4bd5a71" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-482091e7-cb8c-dacb-7d0c-a78a3b795f0b" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-becb8d59-2ab7-b50d-5770-183c6478747a" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-3f0f2b1d-d6be-b6ad-b4ad-0ec841b91c40" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-e53376dd-84ff-523c-a8c7-7cc7f7a18655" + } + ], + "cudaVersion": "12.1", + "writerId": "71sc2v9oxtkr7yiqxoaago0bipl2xjby" +} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..3759dd46e765b82983092a6a8eead014df4ca555 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/logs/debug-internal.log @@ -0,0 +1,6 @@ +{"time":"2025-07-18T18:10:23.417471358+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-07-18T18:10:25.159462304+08:00","level":"INFO","msg":"stream: created new stream","id":"zkytrm61"} +{"time":"2025-07-18T18:10:25.159507377+08:00","level":"INFO","msg":"stream: started","id":"zkytrm61"} +{"time":"2025-07-18T18:10:25.159528642+08:00","level":"INFO","msg":"handler: started","stream_id":"zkytrm61"} +{"time":"2025-07-18T18:10:25.15958268+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"zkytrm61"} +{"time":"2025-07-18T18:10:25.159587635+08:00","level":"INFO","msg":"sender: started","stream_id":"zkytrm61"} diff --git a/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/logs/debug.log b/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..e2c9a5eb31c68fab90edb95515f84c8067d3be14 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/logs/debug.log @@ -0,0 +1,21 @@ +2025-07-18 18:10:23,122 INFO MainThread:104882 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-07-18 18:10:23,123 INFO MainThread:104882 [wandb_setup.py:_flush():80] Configure stats pid to 104882 +2025-07-18 18:10:23,123 INFO MainThread:104882 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-07-18 18:10:23,123 INFO MainThread:104882 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings +2025-07-18 18:10:23,123 INFO MainThread:104882 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-07-18 18:10:23,123 INFO MainThread:104882 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/logs/debug.log +2025-07-18 18:10:23,123 INFO MainThread:104882 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/logs/debug-internal.log +2025-07-18 18:10:23,123 INFO MainThread:104882 [wandb_init.py:init():830] calling init triggers +2025-07-18 18:10:23,123 INFO MainThread:104882 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}} +2025-07-18 18:10:23,123 INFO MainThread:104882 [wandb_init.py:init():871] starting backend +2025-07-18 18:10:23,360 INFO MainThread:104882 [wandb_init.py:init():874] sending inform_init request +2025-07-18 18:10:23,362 INFO MainThread:104882 [wandb_init.py:init():882] backend started and connected +2025-07-18 18:10:23,390 INFO MainThread:104882 [wandb_init.py:init():953] updated telemetry +2025-07-18 18:10:23,939 INFO MainThread:104882 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout +2025-07-18 18:10:26,092 INFO MainThread:104882 [wandb_init.py:init():1029] starting run threads in backend +2025-07-18 18:10:26,299 INFO MainThread:104882 [wandb_run.py:_console_start():2458] atexit reg +2025-07-18 18:10:26,299 INFO MainThread:104882 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-07-18 18:10:26,308 INFO MainThread:104882 [wandb_run.py:_redirect():2375] Wrapping output streams. +2025-07-18 18:10:26,319 INFO MainThread:104882 [wandb_run.py:_redirect():2398] Redirects installed. +2025-07-18 18:10:26,355 INFO MainThread:104882 [wandb_init.py:init():1075] run started, returning control to user process diff --git a/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/run-zkytrm61.wandb b/EasyR1-new/examples/wandb/run-20250718_181022-zkytrm61/run-zkytrm61.wandb new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/files/output.log b/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..52511b6259efbc2a768bddaffcb1451523307a9e --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/files/output.log @@ -0,0 +1,2 @@ +{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.5.237", "pid": 160623, "uuid": "34b2c74ee0024065b840369ef674694c", "closed": false} +Start validation... diff --git a/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..7396100b45ae50733f37383324036e5ff4cdf5aa --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/files/requirements.txt @@ -0,0 +1,295 @@ +setproctitle==1.2.2 +colorama==0.4.6 +psutil==7.0.0 +attrs==25.3.0 +tqdm==4.67.1 +langcodes==3.5.0 +nvidia-cublas-cu12==12.4.5.8 +airportsdata==20250706 +absl-py==2.3.1 +hf-xet==1.1.5 +opentelemetry-exporter-otlp-proto-http==1.26.0 +interegular==0.3.3 +tifffile==2025.5.10 +nvidia-cufile-cu12==1.11.1.6 +nltk==3.9.1 +tokenizers==0.21.2 +salesforce-lavis==1.0.2 +tzdata==2025.2 +prometheus_client==0.22.1 +google-auth==2.40.3 +ipython==8.37.0 +pydantic==2.11.7 +mathruler==0.1.0 +six==1.17.0 +python-dateutil==2.9.0.post0 +requests==2.32.4 +mistral_common==1.8.0 +huggingface-hub==0.33.4 +preshed==3.0.10 +torchmetrics==1.7.4 +blinker==1.9.0 +nvidia-cusparse-cu12==12.3.1.170 +rich-toolkit==0.14.8 +pytz==2025.2 +pandas==2.3.1 +packaging==25.0 +async-timeout==5.0.1 +diskcache==5.6.3 +google-api-core==2.25.1 +parso==0.8.4 +joblib==1.5.1 +pycountry==24.6.1 +triton==3.2.0 +pybase64==1.4.1 +marisa-trie==1.2.1 +plotly==6.2.0 +wandb==0.21.0 +PyYAML==6.0.2 +regex==2024.11.6 +idna==3.10 +numba==0.61.2 +nvidia-curand-cu12==10.3.5.147 +uvicorn==0.35.0 +srsly==2.5.1 +confection==0.1.5 +opentelemetry-semantic-conventions-ai==0.4.11 +typing-inspection==0.4.1 +opencv-python-headless==4.12.0.88 +pyasn1==0.6.1 +av==15.0.0 +xgrammar==0.1.18 +distlib==0.3.9 +datasets==4.0.0 +networkx==3.4.2 +prometheus-fastapi-instrumentator==7.1.0 +lightning-utilities==0.14.3 +executing==2.2.0 +pycocoevalcap==1.2 +h11==0.16.0 +certifi==2025.7.14 +sniffio==1.3.1 +wheel==0.45.1 +transformers==4.52.4 +wrapt==1.17.2 +jsonschema-specifications==2025.4.1 +mpmath==1.3.0 +msgspec==0.19.0 +py-cpuinfo==9.0.0 +contexttimer==0.3.3 +watchdog==6.0.0 +pexpect==4.9.0 +webencodings==0.5.1 +verl==0.3.2.dev0 +webdataset==1.0.2 +httpcore==1.0.9 +opentelemetry-exporter-otlp==1.26.0 +lm-format-enforcer==0.10.11 +googleapis-common-protos==1.70.0 +pyzmq==27.0.0 +fsspec==2025.3.0 +grpcio==1.73.1 +cymem==2.0.11 +timm==0.4.12 +zipp==3.23.0 +llguidance==0.7.30 +opencensus-context==0.1.3 +omegaconf==2.3.0 +python-json-logger==3.3.0 +opentelemetry-exporter-otlp-proto-common==1.26.0 +watchfiles==1.1.0 +nvidia-nvjitlink-cu12==12.4.127 +peft==0.16.0 +sentry-sdk==2.32.0 +rpds-py==0.26.0 +email_validator==2.2.0 +nodeenv==1.9.1 +distro==1.9.0 +jiter==0.10.0 +compressed-tensors==0.9.3 +annotated-types==0.7.0 +matplotlib-inline==0.1.7 +rich==14.0.0 +GitPython==3.1.44 +lazy_loader==0.4 +fastapi-cloud-cli==0.1.4 +cupy-cuda12x==13.5.1 +prompt_toolkit==3.0.51 +gguf==0.17.1 +blis==1.3.0 +thinc==8.3.6 +cloudpickle==3.1.1 +multidict==6.6.3 +nvidia-nvtx-cu12==12.4.127 +flash-attn==2.7.1.post1 +pyasn1_modules==0.4.2 +rsa==4.9.1 +weasel==0.4.1 +uvloop==0.21.0 +click==8.2.1 +numpy==2.2.6 +torchdata==0.11.0 +pylatexenc==2.10 +cachetools==5.5.2 +Jinja2==3.1.6 +typer==0.16.0 +nvidia-cudnn-cu12==9.1.0.70 +fastapi-cli==0.0.8 +xxhash==3.5.0 +tornado==6.5.1 +scipy==1.15.3 +rouge_score==0.1.2 +cloudpathlib==0.21.1 +streamlit==1.46.1 +jedi==0.19.2 +referencing==0.36.2 +accelerate==1.8.1 +decord==0.6.0 +setuptools==78.1.1 +mdurl==0.1.2 +vllm==0.8.5.post1 +identify==2.6.12 +python-slugify==8.0.4 +dnspython==2.7.0 +dill==0.3.8 +opentelemetry-proto==1.26.0 +orjson==3.10.18 +msgpack==1.1.1 +aiohttp==3.12.14 +aiosignal==1.4.0 +typing_extensions==4.14.1 +tiktoken==0.9.0 +catalogue==2.0.10 +platformdirs==4.3.8 +narwhals==1.47.0 +antlr4-python3-runtime==4.9.3 +pydantic-extra-types==2.10.5 +nvidia-cusolver-cu12==11.6.1.9 +kaggle==1.7.4.5 +propcache==0.3.2 +urllib3==2.5.0 +opentelemetry-exporter-otlp-proto-grpc==1.26.0 +pydeck==0.9.1 +nvidia-cufft-cu12==11.2.1.3 +pyarrow==20.0.0 +nvidia-nccl-cu12==2.21.5 +httptools==0.6.4 +qwen-vl-utils==0.0.11 +markdown-it-py==3.0.0 +gitdb==4.0.12 +altair==5.5.0 +torchvision==0.21.0 +python-magic==0.4.27 +iopath==0.1.10 +ray==2.47.1 +blake3==1.0.5 +pillow==11.3.0 +python-dotenv==1.1.1 +torchaudio==2.6.0 +partial-json-parser==0.2.1.1.post6 +httpx==0.28.1 +torch==2.6.0 +anyio==4.9.0 +fairscale==0.4.4 +traitlets==5.14.3 +pure_eval==0.2.3 +sympy==1.13.1 +nvidia-cusparselt-cu12==0.6.2 +jsonschema==4.24.0 +imageio==2.37.0 +opencensus==0.11.4 +stack-data==0.6.3 +shellingham==1.5.4 +tensordict==0.9.1 +nvidia-cuda-runtime-cu12==12.4.127 +nest-asyncio==1.6.0 +einops==0.8.1 +lark==1.2.2 +tenacity==9.1.2 +virtualenv==20.31.2 +ptyprocess==0.7.0 +outlines==0.1.11 +depyf==0.18.0 +starlette==0.47.1 +cfgv==3.4.0 +pre_commit==4.2.0 +language_data==1.3.0 +pip==25.1 +Pygments==2.19.2 +nvidia-cuda-cupti-cu12==12.4.127 +protobuf==4.25.8 +safetensors==0.5.3 +text-unidecode==1.3 +wcwidth==0.2.13 +charset-normalizer==3.4.2 +aiohappyeyeballs==2.6.1 +outlines_core==0.1.26 +fastrlock==0.8.3 +asttokens==3.0.0 +psutil==7.0.0 +smmap==5.0.2 +exceptiongroup==1.3.0 +murmurhash==1.0.13 +pytorch-lightning==2.5.2 +filelock==3.18.0 +astor==0.8.1 +py-spy==0.4.0 +pydantic_core==2.33.2 +colorful==0.5.7 +fastapi==0.116.1 +opentelemetry-api==1.26.0 +openai==1.90.0 +ninja==1.11.1.4 +opentelemetry-semantic-conventions==0.47b0 +spacy-legacy==3.0.12 +opendatasets==0.1.22 +Deprecated==1.2.18 +proto-plus==1.26.1 +rignore==0.6.2 +aiohttp-cors==0.8.1 +liger_kernel==0.6.0 +opentelemetry-exporter-prometheus==0.56b0 +python-multipart==0.0.20 +multiprocess==0.70.16 +opentelemetry-sdk==1.26.0 +decorator==5.2.1 +xformers==0.0.29.post2 +spacy==3.8.7 +pyvers==0.1.0 +pycocotools==2.0.10 +websockets==15.0.1 +wasabi==1.1.3 +frozenlist==1.7.0 +codetiming==1.4.0 +sentencepiece==0.2.0 +toml==0.10.2 +scikit-image==0.25.2 +ftfy==6.3.1 +bleach==6.2.0 +yarl==1.20.1 +nvidia-cuda-nvrtc-cu12==12.4.127 +importlib_metadata==8.0.0 +spacy-loggers==1.0.5 +smart_open==7.3.0.post1 +portalocker==3.2.0 +llvmlite==0.44.0 +MarkupSafe==3.0.2 +braceexpand==0.1.7 +modelscope==1.28.0 +verl==0.3.2.dev0 +jaraco.context==5.3.0 +more-itertools==10.3.0 +jaraco.functools==4.0.1 +jaraco.text==3.12.1 +platformdirs==4.2.2 +packaging==24.2 +wheel==0.45.1 +zipp==3.19.2 +inflect==7.3.1 +autocommand==2.2.2 +typeguard==4.3.0 +jaraco.collections==5.1.0 +backports.tarfile==1.2.0 +tomli==2.0.1 +importlib_metadata==8.0.0 +typing_extensions==4.12.2 diff --git a/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..eafc69925e2c6abda0f83fb49745b65c3ce12896 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/files/wandb-metadata.json @@ -0,0 +1,92 @@ +{ + "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35", + "python": "CPython 3.10.0", + "startedAt": "2025-07-18T12:19:06.753628Z", + "args": [ + "--node-ip-address=10.1.5.237", + "--node-manager-port=37651", + "--object-store-name=/tmp/ray/session_2025-07-18_20-17-27_987959_155806/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2025-07-18_20-17-27_987959_155806/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=46087", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=64279", + "--gcs-address=10.1.5.237:55485", + "--session-name=session_2025-07-18_20-17-27_987959_155806", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8265", + "--cluster-id=498a1e71e98cee5fa6c370066e878218480d78d02b0b0a20550a5571", + "--startup-token=64", + "--worker-launch-time-ms=1752841050410", + "--node-id=18fda1330b00f9c3f38fdc4c1387555fb29a9f963f649897c6fc1dc5", + "--runtime-env-hash=-115784934", + "--enable-resource-isolation=false" + ], + "program": "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/PorUna-byte/PAR.git", + "commit": "b8caf406aa1699c788f0ca6e44a1769452c317db" + }, + "email": "gia0603yucca@gmail.com", + "root": "/nas/shared/kilab/wangyujia/EasyR1-new/examples", + "host": "dsw-266702-dc4b748ff-f7c66", + "executable": "/root/miniconda3/envs/easyr1-new/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 64, + "gpu": "NVIDIA A800-SXM4-80GB", + "gpu_count": 8, + "disk": { + "/": { + "total": "1623302262784", + "used": "1225195520" + } + }, + "memory": { + "total": "549755813888" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-f7e858cd-ae03-031d-b834-86bf87923211" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-82fabc17-3326-51dd-9fb7-cbe82d26b6a0" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-1bba2921-208c-d0ad-1a05-25fc85d62630" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-9537a8bd-f536-20e2-b766-25a3c4bd5a71" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-482091e7-cb8c-dacb-7d0c-a78a3b795f0b" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-becb8d59-2ab7-b50d-5770-183c6478747a" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-3f0f2b1d-d6be-b6ad-b4ad-0ec841b91c40" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-e53376dd-84ff-523c-a8c7-7cc7f7a18655" + } + ], + "cudaVersion": "12.1", + "writerId": "99k3ygrrojzudcyj8lzv8s7kibi19jo9" +} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..b77c67f346efaab72f54cdb99ba00a4b329d2146 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/logs/debug-internal.log @@ -0,0 +1,6 @@ +{"time":"2025-07-18T20:19:07.57787547+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-07-18T20:19:13.028328674+08:00","level":"INFO","msg":"stream: created new stream","id":"eo9xzqez"} +{"time":"2025-07-18T20:19:13.038047308+08:00","level":"INFO","msg":"stream: started","id":"eo9xzqez"} +{"time":"2025-07-18T20:19:13.038097996+08:00","level":"INFO","msg":"handler: started","stream_id":"eo9xzqez"} +{"time":"2025-07-18T20:19:13.038104971+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"eo9xzqez"} +{"time":"2025-07-18T20:19:13.038125386+08:00","level":"INFO","msg":"sender: started","stream_id":"eo9xzqez"} diff --git a/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/logs/debug.log b/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..79278d88118191164b274eafaa2e8feb9b88f9cf --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/logs/debug.log @@ -0,0 +1,21 @@ +2025-07-18 20:19:07,268 INFO MainThread:160623 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-07-18 20:19:07,268 INFO MainThread:160623 [wandb_setup.py:_flush():80] Configure stats pid to 160623 +2025-07-18 20:19:07,268 INFO MainThread:160623 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-07-18 20:19:07,268 INFO MainThread:160623 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings +2025-07-18 20:19:07,268 INFO MainThread:160623 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-07-18 20:19:07,268 INFO MainThread:160623 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/logs/debug.log +2025-07-18 20:19:07,268 INFO MainThread:160623 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/logs/debug-internal.log +2025-07-18 20:19:07,268 INFO MainThread:160623 [wandb_init.py:init():830] calling init triggers +2025-07-18 20:19:07,268 INFO MainThread:160623 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}} +2025-07-18 20:19:07,268 INFO MainThread:160623 [wandb_init.py:init():871] starting backend +2025-07-18 20:19:07,479 INFO MainThread:160623 [wandb_init.py:init():874] sending inform_init request +2025-07-18 20:19:07,481 INFO MainThread:160623 [wandb_init.py:init():882] backend started and connected +2025-07-18 20:19:07,510 INFO MainThread:160623 [wandb_init.py:init():953] updated telemetry +2025-07-18 20:19:08,011 INFO MainThread:160623 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout +2025-07-18 20:19:17,965 INFO MainThread:160623 [wandb_init.py:init():1029] starting run threads in backend +2025-07-18 20:19:18,182 INFO MainThread:160623 [wandb_run.py:_console_start():2458] atexit reg +2025-07-18 20:19:18,183 INFO MainThread:160623 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-07-18 20:19:18,213 INFO MainThread:160623 [wandb_run.py:_redirect():2375] Wrapping output streams. +2025-07-18 20:19:18,213 INFO MainThread:160623 [wandb_run.py:_redirect():2398] Redirects installed. +2025-07-18 20:19:18,223 INFO MainThread:160623 [wandb_init.py:init():1075] run started, returning control to user process diff --git a/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/run-eo9xzqez.wandb b/EasyR1-new/examples/wandb/run-20250718_201906-eo9xzqez/run-eo9xzqez.wandb new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/files/output.log b/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..e67e6518bd3a49ed7d59e2b81420b42f4b3dbf1f --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/files/output.log @@ -0,0 +1,2 @@ +{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.5.237", "pid": 182641, "uuid": "99853167d0014a0cbe06d35970a786c8", "closed": false} +Start validation... diff --git a/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..7396100b45ae50733f37383324036e5ff4cdf5aa --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/files/requirements.txt @@ -0,0 +1,295 @@ +setproctitle==1.2.2 +colorama==0.4.6 +psutil==7.0.0 +attrs==25.3.0 +tqdm==4.67.1 +langcodes==3.5.0 +nvidia-cublas-cu12==12.4.5.8 +airportsdata==20250706 +absl-py==2.3.1 +hf-xet==1.1.5 +opentelemetry-exporter-otlp-proto-http==1.26.0 +interegular==0.3.3 +tifffile==2025.5.10 +nvidia-cufile-cu12==1.11.1.6 +nltk==3.9.1 +tokenizers==0.21.2 +salesforce-lavis==1.0.2 +tzdata==2025.2 +prometheus_client==0.22.1 +google-auth==2.40.3 +ipython==8.37.0 +pydantic==2.11.7 +mathruler==0.1.0 +six==1.17.0 +python-dateutil==2.9.0.post0 +requests==2.32.4 +mistral_common==1.8.0 +huggingface-hub==0.33.4 +preshed==3.0.10 +torchmetrics==1.7.4 +blinker==1.9.0 +nvidia-cusparse-cu12==12.3.1.170 +rich-toolkit==0.14.8 +pytz==2025.2 +pandas==2.3.1 +packaging==25.0 +async-timeout==5.0.1 +diskcache==5.6.3 +google-api-core==2.25.1 +parso==0.8.4 +joblib==1.5.1 +pycountry==24.6.1 +triton==3.2.0 +pybase64==1.4.1 +marisa-trie==1.2.1 +plotly==6.2.0 +wandb==0.21.0 +PyYAML==6.0.2 +regex==2024.11.6 +idna==3.10 +numba==0.61.2 +nvidia-curand-cu12==10.3.5.147 +uvicorn==0.35.0 +srsly==2.5.1 +confection==0.1.5 +opentelemetry-semantic-conventions-ai==0.4.11 +typing-inspection==0.4.1 +opencv-python-headless==4.12.0.88 +pyasn1==0.6.1 +av==15.0.0 +xgrammar==0.1.18 +distlib==0.3.9 +datasets==4.0.0 +networkx==3.4.2 +prometheus-fastapi-instrumentator==7.1.0 +lightning-utilities==0.14.3 +executing==2.2.0 +pycocoevalcap==1.2 +h11==0.16.0 +certifi==2025.7.14 +sniffio==1.3.1 +wheel==0.45.1 +transformers==4.52.4 +wrapt==1.17.2 +jsonschema-specifications==2025.4.1 +mpmath==1.3.0 +msgspec==0.19.0 +py-cpuinfo==9.0.0 +contexttimer==0.3.3 +watchdog==6.0.0 +pexpect==4.9.0 +webencodings==0.5.1 +verl==0.3.2.dev0 +webdataset==1.0.2 +httpcore==1.0.9 +opentelemetry-exporter-otlp==1.26.0 +lm-format-enforcer==0.10.11 +googleapis-common-protos==1.70.0 +pyzmq==27.0.0 +fsspec==2025.3.0 +grpcio==1.73.1 +cymem==2.0.11 +timm==0.4.12 +zipp==3.23.0 +llguidance==0.7.30 +opencensus-context==0.1.3 +omegaconf==2.3.0 +python-json-logger==3.3.0 +opentelemetry-exporter-otlp-proto-common==1.26.0 +watchfiles==1.1.0 +nvidia-nvjitlink-cu12==12.4.127 +peft==0.16.0 +sentry-sdk==2.32.0 +rpds-py==0.26.0 +email_validator==2.2.0 +nodeenv==1.9.1 +distro==1.9.0 +jiter==0.10.0 +compressed-tensors==0.9.3 +annotated-types==0.7.0 +matplotlib-inline==0.1.7 +rich==14.0.0 +GitPython==3.1.44 +lazy_loader==0.4 +fastapi-cloud-cli==0.1.4 +cupy-cuda12x==13.5.1 +prompt_toolkit==3.0.51 +gguf==0.17.1 +blis==1.3.0 +thinc==8.3.6 +cloudpickle==3.1.1 +multidict==6.6.3 +nvidia-nvtx-cu12==12.4.127 +flash-attn==2.7.1.post1 +pyasn1_modules==0.4.2 +rsa==4.9.1 +weasel==0.4.1 +uvloop==0.21.0 +click==8.2.1 +numpy==2.2.6 +torchdata==0.11.0 +pylatexenc==2.10 +cachetools==5.5.2 +Jinja2==3.1.6 +typer==0.16.0 +nvidia-cudnn-cu12==9.1.0.70 +fastapi-cli==0.0.8 +xxhash==3.5.0 +tornado==6.5.1 +scipy==1.15.3 +rouge_score==0.1.2 +cloudpathlib==0.21.1 +streamlit==1.46.1 +jedi==0.19.2 +referencing==0.36.2 +accelerate==1.8.1 +decord==0.6.0 +setuptools==78.1.1 +mdurl==0.1.2 +vllm==0.8.5.post1 +identify==2.6.12 +python-slugify==8.0.4 +dnspython==2.7.0 +dill==0.3.8 +opentelemetry-proto==1.26.0 +orjson==3.10.18 +msgpack==1.1.1 +aiohttp==3.12.14 +aiosignal==1.4.0 +typing_extensions==4.14.1 +tiktoken==0.9.0 +catalogue==2.0.10 +platformdirs==4.3.8 +narwhals==1.47.0 +antlr4-python3-runtime==4.9.3 +pydantic-extra-types==2.10.5 +nvidia-cusolver-cu12==11.6.1.9 +kaggle==1.7.4.5 +propcache==0.3.2 +urllib3==2.5.0 +opentelemetry-exporter-otlp-proto-grpc==1.26.0 +pydeck==0.9.1 +nvidia-cufft-cu12==11.2.1.3 +pyarrow==20.0.0 +nvidia-nccl-cu12==2.21.5 +httptools==0.6.4 +qwen-vl-utils==0.0.11 +markdown-it-py==3.0.0 +gitdb==4.0.12 +altair==5.5.0 +torchvision==0.21.0 +python-magic==0.4.27 +iopath==0.1.10 +ray==2.47.1 +blake3==1.0.5 +pillow==11.3.0 +python-dotenv==1.1.1 +torchaudio==2.6.0 +partial-json-parser==0.2.1.1.post6 +httpx==0.28.1 +torch==2.6.0 +anyio==4.9.0 +fairscale==0.4.4 +traitlets==5.14.3 +pure_eval==0.2.3 +sympy==1.13.1 +nvidia-cusparselt-cu12==0.6.2 +jsonschema==4.24.0 +imageio==2.37.0 +opencensus==0.11.4 +stack-data==0.6.3 +shellingham==1.5.4 +tensordict==0.9.1 +nvidia-cuda-runtime-cu12==12.4.127 +nest-asyncio==1.6.0 +einops==0.8.1 +lark==1.2.2 +tenacity==9.1.2 +virtualenv==20.31.2 +ptyprocess==0.7.0 +outlines==0.1.11 +depyf==0.18.0 +starlette==0.47.1 +cfgv==3.4.0 +pre_commit==4.2.0 +language_data==1.3.0 +pip==25.1 +Pygments==2.19.2 +nvidia-cuda-cupti-cu12==12.4.127 +protobuf==4.25.8 +safetensors==0.5.3 +text-unidecode==1.3 +wcwidth==0.2.13 +charset-normalizer==3.4.2 +aiohappyeyeballs==2.6.1 +outlines_core==0.1.26 +fastrlock==0.8.3 +asttokens==3.0.0 +psutil==7.0.0 +smmap==5.0.2 +exceptiongroup==1.3.0 +murmurhash==1.0.13 +pytorch-lightning==2.5.2 +filelock==3.18.0 +astor==0.8.1 +py-spy==0.4.0 +pydantic_core==2.33.2 +colorful==0.5.7 +fastapi==0.116.1 +opentelemetry-api==1.26.0 +openai==1.90.0 +ninja==1.11.1.4 +opentelemetry-semantic-conventions==0.47b0 +spacy-legacy==3.0.12 +opendatasets==0.1.22 +Deprecated==1.2.18 +proto-plus==1.26.1 +rignore==0.6.2 +aiohttp-cors==0.8.1 +liger_kernel==0.6.0 +opentelemetry-exporter-prometheus==0.56b0 +python-multipart==0.0.20 +multiprocess==0.70.16 +opentelemetry-sdk==1.26.0 +decorator==5.2.1 +xformers==0.0.29.post2 +spacy==3.8.7 +pyvers==0.1.0 +pycocotools==2.0.10 +websockets==15.0.1 +wasabi==1.1.3 +frozenlist==1.7.0 +codetiming==1.4.0 +sentencepiece==0.2.0 +toml==0.10.2 +scikit-image==0.25.2 +ftfy==6.3.1 +bleach==6.2.0 +yarl==1.20.1 +nvidia-cuda-nvrtc-cu12==12.4.127 +importlib_metadata==8.0.0 +spacy-loggers==1.0.5 +smart_open==7.3.0.post1 +portalocker==3.2.0 +llvmlite==0.44.0 +MarkupSafe==3.0.2 +braceexpand==0.1.7 +modelscope==1.28.0 +verl==0.3.2.dev0 +jaraco.context==5.3.0 +more-itertools==10.3.0 +jaraco.functools==4.0.1 +jaraco.text==3.12.1 +platformdirs==4.2.2 +packaging==24.2 +wheel==0.45.1 +zipp==3.19.2 +inflect==7.3.1 +autocommand==2.2.2 +typeguard==4.3.0 +jaraco.collections==5.1.0 +backports.tarfile==1.2.0 +tomli==2.0.1 +importlib_metadata==8.0.0 +typing_extensions==4.12.2 diff --git a/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..1711474cf776654595bb7ef0ee5572ce3818846c --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/files/wandb-metadata.json @@ -0,0 +1,91 @@ +{ + "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35", + "python": "CPython 3.10.0", + "startedAt": "2025-07-18T13:25:11.393703Z", + "args": [ + "--node-ip-address=10.1.5.237", + "--node-manager-port=46647", + "--object-store-name=/tmp/ray/session_2025-07-18_21-23-26_144453_177856/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2025-07-18_21-23-26_144453_177856/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=39471", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=50428", + "--gcs-address=10.1.5.237:57263", + "--session-name=session_2025-07-18_21-23-26_144453_177856", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8265", + "--cluster-id=0c4472bb4b52db8edcea777b259959e49dd785e1e795a3856c465fef", + "--startup-token=64", + "--worker-launch-time-ms=1752845008645", + "--node-id=a12d55c028304a40ba5aecfb7278b8e5f70b228834872967378029cf", + "--runtime-env-hash=-115784934", + "--enable-resource-isolation=false" + ], + "program": "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/PorUna-byte/PAR.git", + "commit": "b8caf406aa1699c788f0ca6e44a1769452c317db" + }, + "root": "/nas/shared/kilab/wangyujia/EasyR1-new/examples", + "host": "dsw-266702-dc4b748ff-f7c66", + "executable": "/root/miniconda3/envs/easyr1-new/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 64, + "gpu": "NVIDIA A800-SXM4-80GB", + "gpu_count": 8, + "disk": { + "/": { + "total": "1623302262784", + "used": "1225252864" + } + }, + "memory": { + "total": "549755813888" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-f7e858cd-ae03-031d-b834-86bf87923211" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-82fabc17-3326-51dd-9fb7-cbe82d26b6a0" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-1bba2921-208c-d0ad-1a05-25fc85d62630" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-9537a8bd-f536-20e2-b766-25a3c4bd5a71" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-482091e7-cb8c-dacb-7d0c-a78a3b795f0b" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-becb8d59-2ab7-b50d-5770-183c6478747a" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-3f0f2b1d-d6be-b6ad-b4ad-0ec841b91c40" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-e53376dd-84ff-523c-a8c7-7cc7f7a18655" + } + ], + "cudaVersion": "12.1", + "writerId": "dbvg4pjdg1e4bz0ifs06d8zkh1pcyxf0" +} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..a8da9fe90f0c93dfaeb6a5801a162fcb8751c39a --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/logs/debug-internal.log @@ -0,0 +1,13 @@ +{"time":"2025-07-18T21:25:12.469804386+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-07-18T21:25:22.33136325+08:00","level":"INFO","msg":"stream: created new stream","id":"egfsoxro"} +{"time":"2025-07-18T21:25:22.34344642+08:00","level":"INFO","msg":"handler: started","stream_id":"egfsoxro"} +{"time":"2025-07-18T21:25:22.343473598+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"egfsoxro"} +{"time":"2025-07-18T21:25:22.343477736+08:00","level":"INFO","msg":"stream: started","id":"egfsoxro"} +{"time":"2025-07-18T21:25:22.343503001+08:00","level":"INFO","msg":"sender: started","stream_id":"egfsoxro"} +{"time":"2025-07-18T21:29:24.147448876+08:00","level":"ERROR","msg":"sender: sendStopStatus: failed to get run stopped status: context deadline exceeded (Client.Timeout or context cancellation while reading body)"} +{"time":"2025-07-18T21:34:51.606197319+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/egfsoxro/file_stream\": read tcp 10.1.5.237:54740->172.67.193.61:443: read: connection timed out"} +{"time":"2025-07-18T21:35:11.662284272+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/egfsoxro/file_stream\": unexpected EOF"} +{"time":"2025-07-18T21:38:58.390140338+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/egfsoxro/file_stream\": read tcp 10.1.5.237:39892->172.67.193.61:443: read: connection timed out"} +{"time":"2025-07-18T21:42:37.527116841+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/egfsoxro/file_stream\": read tcp 10.1.5.237:52510->104.21.20.172:443: read: connection timed out"} +{"time":"2025-07-18T21:45:57.207400741+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/egfsoxro/file_stream\": read tcp 10.1.5.237:51260->104.21.20.172:443: read: connection reset by peer"} +{"time":"2025-07-18T21:48:37.975131005+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/egfsoxro/file_stream\": read tcp 10.1.5.237:35598->172.67.193.61:443: read: connection timed out"} diff --git a/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/logs/debug.log b/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..e9c8978cee26d7b44df09eb2dfcd76e08a9b009d --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/logs/debug.log @@ -0,0 +1,21 @@ +2025-07-18 21:25:12,185 INFO MainThread:182641 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-07-18 21:25:12,185 INFO MainThread:182641 [wandb_setup.py:_flush():80] Configure stats pid to 182641 +2025-07-18 21:25:12,185 INFO MainThread:182641 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-07-18 21:25:12,185 INFO MainThread:182641 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings +2025-07-18 21:25:12,185 INFO MainThread:182641 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-07-18 21:25:12,185 INFO MainThread:182641 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/logs/debug.log +2025-07-18 21:25:12,185 INFO MainThread:182641 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_212511-egfsoxro/logs/debug-internal.log +2025-07-18 21:25:12,185 INFO MainThread:182641 [wandb_init.py:init():830] calling init triggers +2025-07-18 21:25:12,185 INFO MainThread:182641 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}} +2025-07-18 21:25:12,185 INFO MainThread:182641 [wandb_init.py:init():871] starting backend +2025-07-18 21:25:12,397 INFO MainThread:182641 [wandb_init.py:init():874] sending inform_init request +2025-07-18 21:25:12,399 INFO MainThread:182641 [wandb_init.py:init():882] backend started and connected +2025-07-18 21:25:12,404 INFO MainThread:182641 [wandb_init.py:init():953] updated telemetry +2025-07-18 21:25:12,959 INFO MainThread:182641 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout +2025-07-18 21:25:23,805 INFO MainThread:182641 [wandb_init.py:init():1029] starting run threads in backend +2025-07-18 21:25:24,145 INFO MainThread:182641 [wandb_run.py:_console_start():2458] atexit reg +2025-07-18 21:25:24,145 INFO MainThread:182641 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-07-18 21:25:24,151 INFO MainThread:182641 [wandb_run.py:_redirect():2375] Wrapping output streams. +2025-07-18 21:25:24,160 INFO MainThread:182641 [wandb_run.py:_redirect():2398] Redirects installed. +2025-07-18 21:25:24,180 INFO MainThread:182641 [wandb_init.py:init():1075] run started, returning control to user process diff --git a/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/files/output.log b/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..c501f4ec246cbbe687fd1c5d625de55776715882 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/files/output.log @@ -0,0 +1,2 @@ +{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.5.237", "pid": 209844, "uuid": "231f6197dcf24423b2307815cebae57f", "closed": false} +Start validation... diff --git a/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..7396100b45ae50733f37383324036e5ff4cdf5aa --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/files/requirements.txt @@ -0,0 +1,295 @@ +setproctitle==1.2.2 +colorama==0.4.6 +psutil==7.0.0 +attrs==25.3.0 +tqdm==4.67.1 +langcodes==3.5.0 +nvidia-cublas-cu12==12.4.5.8 +airportsdata==20250706 +absl-py==2.3.1 +hf-xet==1.1.5 +opentelemetry-exporter-otlp-proto-http==1.26.0 +interegular==0.3.3 +tifffile==2025.5.10 +nvidia-cufile-cu12==1.11.1.6 +nltk==3.9.1 +tokenizers==0.21.2 +salesforce-lavis==1.0.2 +tzdata==2025.2 +prometheus_client==0.22.1 +google-auth==2.40.3 +ipython==8.37.0 +pydantic==2.11.7 +mathruler==0.1.0 +six==1.17.0 +python-dateutil==2.9.0.post0 +requests==2.32.4 +mistral_common==1.8.0 +huggingface-hub==0.33.4 +preshed==3.0.10 +torchmetrics==1.7.4 +blinker==1.9.0 +nvidia-cusparse-cu12==12.3.1.170 +rich-toolkit==0.14.8 +pytz==2025.2 +pandas==2.3.1 +packaging==25.0 +async-timeout==5.0.1 +diskcache==5.6.3 +google-api-core==2.25.1 +parso==0.8.4 +joblib==1.5.1 +pycountry==24.6.1 +triton==3.2.0 +pybase64==1.4.1 +marisa-trie==1.2.1 +plotly==6.2.0 +wandb==0.21.0 +PyYAML==6.0.2 +regex==2024.11.6 +idna==3.10 +numba==0.61.2 +nvidia-curand-cu12==10.3.5.147 +uvicorn==0.35.0 +srsly==2.5.1 +confection==0.1.5 +opentelemetry-semantic-conventions-ai==0.4.11 +typing-inspection==0.4.1 +opencv-python-headless==4.12.0.88 +pyasn1==0.6.1 +av==15.0.0 +xgrammar==0.1.18 +distlib==0.3.9 +datasets==4.0.0 +networkx==3.4.2 +prometheus-fastapi-instrumentator==7.1.0 +lightning-utilities==0.14.3 +executing==2.2.0 +pycocoevalcap==1.2 +h11==0.16.0 +certifi==2025.7.14 +sniffio==1.3.1 +wheel==0.45.1 +transformers==4.52.4 +wrapt==1.17.2 +jsonschema-specifications==2025.4.1 +mpmath==1.3.0 +msgspec==0.19.0 +py-cpuinfo==9.0.0 +contexttimer==0.3.3 +watchdog==6.0.0 +pexpect==4.9.0 +webencodings==0.5.1 +verl==0.3.2.dev0 +webdataset==1.0.2 +httpcore==1.0.9 +opentelemetry-exporter-otlp==1.26.0 +lm-format-enforcer==0.10.11 +googleapis-common-protos==1.70.0 +pyzmq==27.0.0 +fsspec==2025.3.0 +grpcio==1.73.1 +cymem==2.0.11 +timm==0.4.12 +zipp==3.23.0 +llguidance==0.7.30 +opencensus-context==0.1.3 +omegaconf==2.3.0 +python-json-logger==3.3.0 +opentelemetry-exporter-otlp-proto-common==1.26.0 +watchfiles==1.1.0 +nvidia-nvjitlink-cu12==12.4.127 +peft==0.16.0 +sentry-sdk==2.32.0 +rpds-py==0.26.0 +email_validator==2.2.0 +nodeenv==1.9.1 +distro==1.9.0 +jiter==0.10.0 +compressed-tensors==0.9.3 +annotated-types==0.7.0 +matplotlib-inline==0.1.7 +rich==14.0.0 +GitPython==3.1.44 +lazy_loader==0.4 +fastapi-cloud-cli==0.1.4 +cupy-cuda12x==13.5.1 +prompt_toolkit==3.0.51 +gguf==0.17.1 +blis==1.3.0 +thinc==8.3.6 +cloudpickle==3.1.1 +multidict==6.6.3 +nvidia-nvtx-cu12==12.4.127 +flash-attn==2.7.1.post1 +pyasn1_modules==0.4.2 +rsa==4.9.1 +weasel==0.4.1 +uvloop==0.21.0 +click==8.2.1 +numpy==2.2.6 +torchdata==0.11.0 +pylatexenc==2.10 +cachetools==5.5.2 +Jinja2==3.1.6 +typer==0.16.0 +nvidia-cudnn-cu12==9.1.0.70 +fastapi-cli==0.0.8 +xxhash==3.5.0 +tornado==6.5.1 +scipy==1.15.3 +rouge_score==0.1.2 +cloudpathlib==0.21.1 +streamlit==1.46.1 +jedi==0.19.2 +referencing==0.36.2 +accelerate==1.8.1 +decord==0.6.0 +setuptools==78.1.1 +mdurl==0.1.2 +vllm==0.8.5.post1 +identify==2.6.12 +python-slugify==8.0.4 +dnspython==2.7.0 +dill==0.3.8 +opentelemetry-proto==1.26.0 +orjson==3.10.18 +msgpack==1.1.1 +aiohttp==3.12.14 +aiosignal==1.4.0 +typing_extensions==4.14.1 +tiktoken==0.9.0 +catalogue==2.0.10 +platformdirs==4.3.8 +narwhals==1.47.0 +antlr4-python3-runtime==4.9.3 +pydantic-extra-types==2.10.5 +nvidia-cusolver-cu12==11.6.1.9 +kaggle==1.7.4.5 +propcache==0.3.2 +urllib3==2.5.0 +opentelemetry-exporter-otlp-proto-grpc==1.26.0 +pydeck==0.9.1 +nvidia-cufft-cu12==11.2.1.3 +pyarrow==20.0.0 +nvidia-nccl-cu12==2.21.5 +httptools==0.6.4 +qwen-vl-utils==0.0.11 +markdown-it-py==3.0.0 +gitdb==4.0.12 +altair==5.5.0 +torchvision==0.21.0 +python-magic==0.4.27 +iopath==0.1.10 +ray==2.47.1 +blake3==1.0.5 +pillow==11.3.0 +python-dotenv==1.1.1 +torchaudio==2.6.0 +partial-json-parser==0.2.1.1.post6 +httpx==0.28.1 +torch==2.6.0 +anyio==4.9.0 +fairscale==0.4.4 +traitlets==5.14.3 +pure_eval==0.2.3 +sympy==1.13.1 +nvidia-cusparselt-cu12==0.6.2 +jsonschema==4.24.0 +imageio==2.37.0 +opencensus==0.11.4 +stack-data==0.6.3 +shellingham==1.5.4 +tensordict==0.9.1 +nvidia-cuda-runtime-cu12==12.4.127 +nest-asyncio==1.6.0 +einops==0.8.1 +lark==1.2.2 +tenacity==9.1.2 +virtualenv==20.31.2 +ptyprocess==0.7.0 +outlines==0.1.11 +depyf==0.18.0 +starlette==0.47.1 +cfgv==3.4.0 +pre_commit==4.2.0 +language_data==1.3.0 +pip==25.1 +Pygments==2.19.2 +nvidia-cuda-cupti-cu12==12.4.127 +protobuf==4.25.8 +safetensors==0.5.3 +text-unidecode==1.3 +wcwidth==0.2.13 +charset-normalizer==3.4.2 +aiohappyeyeballs==2.6.1 +outlines_core==0.1.26 +fastrlock==0.8.3 +asttokens==3.0.0 +psutil==7.0.0 +smmap==5.0.2 +exceptiongroup==1.3.0 +murmurhash==1.0.13 +pytorch-lightning==2.5.2 +filelock==3.18.0 +astor==0.8.1 +py-spy==0.4.0 +pydantic_core==2.33.2 +colorful==0.5.7 +fastapi==0.116.1 +opentelemetry-api==1.26.0 +openai==1.90.0 +ninja==1.11.1.4 +opentelemetry-semantic-conventions==0.47b0 +spacy-legacy==3.0.12 +opendatasets==0.1.22 +Deprecated==1.2.18 +proto-plus==1.26.1 +rignore==0.6.2 +aiohttp-cors==0.8.1 +liger_kernel==0.6.0 +opentelemetry-exporter-prometheus==0.56b0 +python-multipart==0.0.20 +multiprocess==0.70.16 +opentelemetry-sdk==1.26.0 +decorator==5.2.1 +xformers==0.0.29.post2 +spacy==3.8.7 +pyvers==0.1.0 +pycocotools==2.0.10 +websockets==15.0.1 +wasabi==1.1.3 +frozenlist==1.7.0 +codetiming==1.4.0 +sentencepiece==0.2.0 +toml==0.10.2 +scikit-image==0.25.2 +ftfy==6.3.1 +bleach==6.2.0 +yarl==1.20.1 +nvidia-cuda-nvrtc-cu12==12.4.127 +importlib_metadata==8.0.0 +spacy-loggers==1.0.5 +smart_open==7.3.0.post1 +portalocker==3.2.0 +llvmlite==0.44.0 +MarkupSafe==3.0.2 +braceexpand==0.1.7 +modelscope==1.28.0 +verl==0.3.2.dev0 +jaraco.context==5.3.0 +more-itertools==10.3.0 +jaraco.functools==4.0.1 +jaraco.text==3.12.1 +platformdirs==4.2.2 +packaging==24.2 +wheel==0.45.1 +zipp==3.19.2 +inflect==7.3.1 +autocommand==2.2.2 +typeguard==4.3.0 +jaraco.collections==5.1.0 +backports.tarfile==1.2.0 +tomli==2.0.1 +importlib_metadata==8.0.0 +typing_extensions==4.12.2 diff --git a/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..4a24bcc304aea41008f5b0e3ccb15f1e76d8fb87 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/files/wandb-metadata.json @@ -0,0 +1,91 @@ +{ + "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35", + "python": "CPython 3.10.0", + "startedAt": "2025-07-18T14:41:31.644159Z", + "args": [ + "--node-ip-address=10.1.5.237", + "--node-manager-port=45369", + "--object-store-name=/tmp/ray/session_2025-07-18_22-39-43_301828_205004/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2025-07-18_22-39-43_301828_205004/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=43565", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=58432", + "--gcs-address=10.1.5.237:40516", + "--session-name=session_2025-07-18_22-39-43_301828_205004", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8265", + "--cluster-id=eac498e1f2975b1ecde2971d163e13136b422b2bc16efcb3d04e18cc", + "--startup-token=64", + "--worker-launch-time-ms=1752849585965", + "--node-id=f63242f63dfbc38276020b5aea2cd0938dd3a768eff5ad09f4f8182d", + "--runtime-env-hash=-115784934", + "--enable-resource-isolation=false" + ], + "program": "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/PorUna-byte/PAR.git", + "commit": "b8caf406aa1699c788f0ca6e44a1769452c317db" + }, + "root": "/nas/shared/kilab/wangyujia/EasyR1-new/examples", + "host": "dsw-266702-dc4b748ff-f7c66", + "executable": "/root/miniconda3/envs/easyr1-new/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 64, + "gpu": "NVIDIA A800-SXM4-80GB", + "gpu_count": 8, + "disk": { + "/": { + "total": "1623302262784", + "used": "1225330688" + } + }, + "memory": { + "total": "549755813888" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-f7e858cd-ae03-031d-b834-86bf87923211" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-82fabc17-3326-51dd-9fb7-cbe82d26b6a0" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-1bba2921-208c-d0ad-1a05-25fc85d62630" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-9537a8bd-f536-20e2-b766-25a3c4bd5a71" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-482091e7-cb8c-dacb-7d0c-a78a3b795f0b" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-becb8d59-2ab7-b50d-5770-183c6478747a" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-3f0f2b1d-d6be-b6ad-b4ad-0ec841b91c40" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-e53376dd-84ff-523c-a8c7-7cc7f7a18655" + } + ], + "cudaVersion": "12.1", + "writerId": "99rgf27lonrq0txkxxch5fdtjzzzjfev" +} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..c3c0b3d98e53764df145d7228b6942fa3b3f5167 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/logs/debug-internal.log @@ -0,0 +1,15 @@ +{"time":"2025-07-18T22:41:33.020430983+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-07-18T22:41:52.945982131+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": unexpected EOF"} +{"time":"2025-07-18T22:42:00.753263753+08:00","level":"INFO","msg":"stream: created new stream","id":"vx2sr49f"} +{"time":"2025-07-18T22:42:00.754755493+08:00","level":"INFO","msg":"stream: started","id":"vx2sr49f"} +{"time":"2025-07-18T22:42:00.754778938+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"vx2sr49f"} +{"time":"2025-07-18T22:42:00.754802329+08:00","level":"INFO","msg":"sender: started","stream_id":"vx2sr49f"} +{"time":"2025-07-18T22:42:00.754811646+08:00","level":"INFO","msg":"handler: started","stream_id":"vx2sr49f"} +{"time":"2025-07-18T22:42:49.744113716+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"} +{"time":"2025-07-18T22:42:54.672269282+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"} +{"time":"2025-07-18T22:43:02.89295424+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"} +{"time":"2025-07-18T22:43:26.945471906+08:00","level":"ERROR","msg":"runupserter: failed to upload changes","error":"net/http: request canceled (Client.Timeout or context cancellation while reading body)"} +{"time":"2025-07-18T22:43:35.280027865+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"} +{"time":"2025-07-18T22:44:24.629460284+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/vx2sr49f/file_stream\": unexpected EOF"} +{"time":"2025-07-18T22:47:34.871114852+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/vx2sr49f/file_stream\": read tcp 10.1.5.237:33020->104.21.20.172:443: read: connection timed out"} +{"time":"2025-07-18T22:51:11.217832907+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/vx2sr49f/file_stream\": read tcp 10.1.5.237:42534->104.21.20.172:443: read: connection reset by peer"} diff --git a/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/logs/debug.log b/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..31cf2bf9502034e28128343e8f489c68957f46cd --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/logs/debug.log @@ -0,0 +1,21 @@ +2025-07-18 22:41:32,687 INFO MainThread:209844 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-07-18 22:41:32,687 INFO MainThread:209844 [wandb_setup.py:_flush():80] Configure stats pid to 209844 +2025-07-18 22:41:32,687 INFO MainThread:209844 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-07-18 22:41:32,687 INFO MainThread:209844 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings +2025-07-18 22:41:32,687 INFO MainThread:209844 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-07-18 22:41:32,687 INFO MainThread:209844 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/logs/debug.log +2025-07-18 22:41:32,687 INFO MainThread:209844 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/logs/debug-internal.log +2025-07-18 22:41:32,687 INFO MainThread:209844 [wandb_init.py:init():830] calling init triggers +2025-07-18 22:41:32,687 INFO MainThread:209844 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}} +2025-07-18 22:41:32,687 INFO MainThread:209844 [wandb_init.py:init():871] starting backend +2025-07-18 22:41:32,934 INFO MainThread:209844 [wandb_init.py:init():874] sending inform_init request +2025-07-18 22:41:32,936 INFO MainThread:209844 [wandb_init.py:init():882] backend started and connected +2025-07-18 22:41:32,946 INFO MainThread:209844 [wandb_init.py:init():953] updated telemetry +2025-07-18 22:41:33,334 INFO MainThread:209844 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout +2025-07-18 22:42:02,587 INFO MainThread:209844 [wandb_init.py:init():1029] starting run threads in backend +2025-07-18 22:42:02,890 INFO MainThread:209844 [wandb_run.py:_console_start():2458] atexit reg +2025-07-18 22:42:02,890 INFO MainThread:209844 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-07-18 22:42:02,904 INFO MainThread:209844 [wandb_run.py:_redirect():2375] Wrapping output streams. +2025-07-18 22:42:02,904 INFO MainThread:209844 [wandb_run.py:_redirect():2398] Redirects installed. +2025-07-18 22:42:02,920 INFO MainThread:209844 [wandb_init.py:init():1075] run started, returning control to user process diff --git a/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/run-vx2sr49f.wandb b/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/run-vx2sr49f.wandb new file mode 100644 index 0000000000000000000000000000000000000000..1f7bd3c7bac016b4e7dd14eb2a326e7a305a20a8 Binary files /dev/null and b/EasyR1-new/examples/wandb/run-20250718_224131-vx2sr49f/run-vx2sr49f.wandb differ diff --git a/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/files/output.log b/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..35a09afc3d473818c42b25134633f01ca5a529c6 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/files/output.log @@ -0,0 +1,2 @@ +{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.4.164", "pid": 44339, "uuid": "e52fee178cde4f6c8786c5a75d6c593c", "closed": false} +Start validation... diff --git a/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/files/requirements.txt @@ -0,0 +1,295 @@ +colorama==0.4.6 +psutil==7.0.0 +setproctitle==1.2.2 +ipython==8.37.0 +gitdb==4.0.12 +smmap==5.0.2 +pyzmq==27.0.0 +wcwidth==0.2.13 +antlr4-python3-runtime==4.9.3 +streamlit==1.46.1 +opentelemetry-proto==1.26.0 +tiktoken==0.9.0 +MarkupSafe==3.0.2 +openai==1.90.0 +jiter==0.10.0 +markdown-it-py==3.0.0 +rich-toolkit==0.14.8 +PyYAML==6.0.2 +pycountry==24.6.1 +nvidia-cusolver-cu12==11.6.1.9 +codetiming==1.4.0 +text-unidecode==1.3 +aiohttp-cors==0.8.1 +prometheus_client==0.22.1 +pandas==2.3.1 +wrapt==1.17.2 +tifffile==2025.5.10 +vllm==0.8.5.post1 +google-auth==2.40.3 +nvidia-curand-cu12==10.3.5.147 +networkx==3.4.2 +protobuf==4.25.8 +depyf==0.18.0 +altair==5.5.0 +wandb==0.21.0 +opentelemetry-sdk==1.26.0 +nvidia-cufft-cu12==11.2.1.3 +frozenlist==1.7.0 +ninja==1.11.1.4 +anyio==4.9.0 +rignore==0.6.2 +pydantic-extra-types==2.10.5 +tzdata==2025.2 +orjson==3.10.18 +smart_open==7.3.0.post1 +nvidia-cublas-cu12==12.4.5.8 +astor==0.8.1 +uvicorn==0.35.0 +sentry-sdk==2.32.0 +weasel==0.4.1 +opencensus==0.11.4 +certifi==2025.7.14 +nvidia-cuda-nvrtc-cu12==12.4.127 +cupy-cuda12x==13.5.1 +jedi==0.19.2 +GitPython==3.1.44 +xgrammar==0.1.18 +sniffio==1.3.1 +dill==0.3.8 +python-json-logger==3.3.0 +peft==0.16.0 +python-slugify==8.0.4 +watchfiles==1.1.0 +torchaudio==2.6.0 +omegaconf==2.3.0 +interegular==0.3.3 +torchmetrics==1.7.4 +tenacity==9.1.2 +async-timeout==5.0.1 +pybase64==1.4.1 +tqdm==4.67.1 +confection==0.1.5 +absl-py==2.3.1 +six==1.17.0 +colorful==0.5.7 +xxhash==3.5.0 +regex==2024.11.6 +nvidia-cuda-runtime-cu12==12.4.127 +pip==25.1 +annotated-types==0.7.0 +opentelemetry-exporter-otlp-proto-grpc==1.26.0 +salesforce-lavis==1.0.2 +fastapi-cloud-cli==0.1.4 +av==15.0.0 +nvidia-nvjitlink-cu12==12.4.127 +lightning-utilities==0.14.3 +pytz==2025.2 +webdataset==1.0.2 +cachetools==5.5.2 +nltk==3.9.1 +prometheus-fastapi-instrumentator==7.1.0 +pexpect==4.9.0 +jsonschema-specifications==2025.4.1 +Jinja2==3.1.6 +nvidia-cusparse-cu12==12.3.1.170 +nvidia-cudnn-cu12==9.1.0.70 +decord==0.6.0 +transformers==4.52.4 +dnspython==2.7.0 +joblib==1.5.1 +kaggle==1.7.4.5 +pyasn1_modules==0.4.2 +httpcore==1.0.9 +accelerate==1.8.1 +psutil==7.0.0 +pycocotools==2.0.10 +lm-format-enforcer==0.10.11 +liger_kernel==0.6.0 +googleapis-common-protos==1.70.0 +idna==3.10 +aiohappyeyeballs==2.6.1 +numba==0.61.2 +tornado==6.5.1 +opentelemetry-semantic-conventions==0.47b0 +torchvision==0.21.0 +exceptiongroup==1.3.0 +cfgv==3.4.0 +py-cpuinfo==9.0.0 +murmurhash==1.0.13 +pillow==11.3.0 +asttokens==3.0.0 +spacy==3.8.7 +blinker==1.9.0 +llguidance==0.7.30 +fastapi==0.116.1 +python-dateutil==2.9.0.post0 +prompt_toolkit==3.0.51 +opentelemetry-api==1.26.0 +referencing==0.36.2 +Pygments==2.19.2 +mpmath==1.3.0 +thinc==8.3.6 +multidict==6.6.3 +python-magic==0.4.27 +fairscale==0.4.4 +nodeenv==1.9.1 +mathruler==0.1.0 +identify==2.6.12 +multiprocess==0.70.16 +ftfy==6.3.1 +spacy-legacy==3.0.12 +rsa==4.9.1 +cymem==2.0.11 +flash-attn==2.7.1.post1 +typing-inspection==0.4.1 +nvidia-cufile-cu12==1.11.1.6 +filelock==3.18.0 +jsonschema==4.24.0 +language_data==1.3.0 +iopath==0.1.10 +cloudpickle==3.1.1 +pre_commit==4.2.0 +python-multipart==0.0.20 +gguf==0.17.1 +toml==0.10.2 +lazy_loader==0.4 +nvidia-cusparselt-cu12==0.6.2 +nvidia-nvtx-cu12==12.4.127 +opencv-python-headless==4.12.0.88 +rouge_score==0.1.2 +portalocker==3.2.0 +diskcache==5.6.3 +pycocoevalcap==1.2 +mdurl==0.1.2 +pure_eval==0.2.3 +ray==2.47.1 +langcodes==3.5.0 +distlib==0.3.9 +pydeck==0.9.1 +traitlets==5.14.3 +aiohttp==3.12.14 +decorator==5.2.1 +opentelemetry-exporter-otlp-proto-http==1.26.0 +verl==0.3.2.dev0 +fsspec==2025.3.0 +pydantic_core==2.33.2 +matplotlib-inline==0.1.7 +httpx==0.28.1 +fastrlock==0.8.3 +zipp==3.23.0 +aiosignal==1.4.0 +uvloop==0.21.0 +opendatasets==0.1.22 +python-dotenv==1.1.1 +attrs==25.3.0 +starlette==0.47.1 +distro==1.9.0 +pyasn1==0.6.1 +plotly==6.2.0 +opencensus-context==0.1.3 +datasets==4.0.0 +bleach==6.2.0 +hf-xet==1.1.5 +pyvers==0.1.0 +rich==14.0.0 +pylatexenc==2.10 +tensordict==0.9.1 +urllib3==2.5.0 +imageio==2.37.0 +platformdirs==4.3.8 +preshed==3.0.10 +catalogue==2.0.10 +h11==0.16.0 +outlines_core==0.1.26 +wasabi==1.1.3 +proto-plus==1.26.1 +scikit-image==0.25.2 +blis==1.3.0 +fastapi-cli==0.0.8 +opentelemetry-exporter-prometheus==0.56b0 +opentelemetry-exporter-otlp==1.26.0 +compressed-tensors==0.9.3 +pyarrow==20.0.0 +opentelemetry-semantic-conventions-ai==0.4.11 +partial-json-parser==0.2.1.1.post6 +parso==0.8.4 +importlib_metadata==8.0.0 +tokenizers==0.21.2 +opentelemetry-exporter-otlp-proto-common==1.26.0 +torchdata==0.11.0 +py-spy==0.4.0 +propcache==0.3.2 +braceexpand==0.1.7 +numpy==2.2.6 +cloudpathlib==0.21.1 +email_validator==2.2.0 +srsly==2.5.1 +webencodings==0.5.1 +airportsdata==20250706 +rpds-py==0.26.0 +outlines==0.1.11 +packaging==25.0 +yarl==1.20.1 +nvidia-cuda-cupti-cu12==12.4.127 +typing_extensions==4.14.1 +pydantic==2.11.7 +xformers==0.0.29.post2 +einops==0.8.1 +grpcio==1.73.1 +setuptools==78.1.1 +httptools==0.6.4 +nvidia-nccl-cu12==2.21.5 +Deprecated==1.2.18 +ptyprocess==0.7.0 +websockets==15.0.1 +torch==2.6.0 +scipy==1.15.3 +typer==0.16.0 +pytorch-lightning==2.5.2 +virtualenv==20.31.2 +huggingface-hub==0.33.4 +contexttimer==0.3.3 +marisa-trie==1.2.1 +shellingham==1.5.4 +charset-normalizer==3.4.2 +nest-asyncio==1.6.0 +executing==2.2.0 +stack-data==0.6.3 +msgpack==1.1.1 +blake3==1.0.5 +narwhals==1.47.0 +sentencepiece==0.2.0 +llvmlite==0.44.0 +click==8.2.1 +lark==1.2.2 +google-api-core==2.25.1 +sympy==1.13.1 +wheel==0.45.1 +safetensors==0.5.3 +mistral_common==1.8.0 +triton==3.2.0 +msgspec==0.19.0 +watchdog==6.0.0 +requests==2.32.4 +spacy-loggers==1.0.5 +timm==0.4.12 +qwen-vl-utils==0.0.11 +modelscope==1.28.0 +verl==0.3.2.dev0 +jaraco.text==3.12.1 +autocommand==2.2.2 +packaging==24.2 +jaraco.context==5.3.0 +tomli==2.0.1 +typeguard==4.3.0 +zipp==3.19.2 +backports.tarfile==1.2.0 +typing_extensions==4.12.2 +jaraco.collections==5.1.0 +inflect==7.3.1 +more-itertools==10.3.0 +jaraco.functools==4.0.1 +importlib_metadata==8.0.0 +platformdirs==4.2.2 +wheel==0.45.1 diff --git a/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..181d58e96b9d2b6453a6ad03567951f226d615e2 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/files/wandb-metadata.json @@ -0,0 +1,91 @@ +{ + "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35", + "python": "CPython 3.10.0", + "startedAt": "2025-07-20T05:42:34.299479Z", + "args": [ + "--node-ip-address=10.1.4.164", + "--node-manager-port=37845", + "--object-store-name=/tmp/ray/session_2025-07-20_13-33-41_705569_39575/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2025-07-20_13-33-41_705569_39575/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=63873", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=64414", + "--gcs-address=10.1.4.164:53875", + "--session-name=session_2025-07-20_13-33-41_705569_39575", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8265", + "--cluster-id=fbdc233d73dca0f6543c269a751d673a935fab0cb81fb078e395b1a3", + "--startup-token=64", + "--worker-launch-time-ms=1752989625411", + "--node-id=e86bda52ca2366292b136e105f4196a896ce4cd7c5afef70608609d8", + "--runtime-env-hash=-115784934", + "--enable-resource-isolation=false" + ], + "program": "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/PorUna-byte/PAR.git", + "commit": "b8caf406aa1699c788f0ca6e44a1769452c317db" + }, + "root": "/nas/shared/kilab/wangyujia/EasyR1-new/examples", + "host": "dsw-266702-557cd69888-g24kv", + "executable": "/root/miniconda3/envs/easyr1-new/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 64, + "gpu": "NVIDIA A800-SXM4-80GB", + "gpu_count": 8, + "disk": { + "/": { + "total": "1623302262784", + "used": "1178062848" + } + }, + "memory": { + "total": "549755813888" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-dd05c780-0a19-17fd-c584-d09f4318f680" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-2782427d-b86d-e7c7-247f-edefe39eec71" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-52755b3d-d761-c869-0d08-300873ba5f02" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-90465488-5319-9508-9e8a-b3b12918be35" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a" + } + ], + "cudaVersion": "12.1", + "writerId": "tqus8osn3l5740extl676gu4rhqn0zv8" +} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..bd9edaf1f409a7cddb83ee82a2933b2a9c443b31 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/logs/debug-internal.log @@ -0,0 +1,16 @@ +{"time":"2025-07-20T13:42:34.643621776+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-07-20T13:42:43.882833108+08:00","level":"INFO","msg":"stream: created new stream","id":"au69cs3i"} +{"time":"2025-07-20T13:42:43.883617829+08:00","level":"INFO","msg":"stream: started","id":"au69cs3i"} +{"time":"2025-07-20T13:42:43.88365162+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"au69cs3i"} +{"time":"2025-07-20T13:42:43.883669273+08:00","level":"INFO","msg":"handler: started","stream_id":"au69cs3i"} +{"time":"2025-07-20T13:42:43.883668078+08:00","level":"INFO","msg":"sender: started","stream_id":"au69cs3i"} +{"time":"2025-07-20T13:43:21.282576909+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/au69cs3i/file_stream\": read tcp 10.1.4.164:47278->172.67.193.61:443: read: connection reset by peer"} +{"time":"2025-07-20T13:43:53.49841014+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/au69cs3i/file_stream\": unexpected EOF"} +{"time":"2025-07-20T13:51:01.891406742+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/au69cs3i/file_stream\": read tcp 10.1.4.164:42702->104.21.20.172:443: read: connection timed out"} +{"time":"2025-07-20T13:51:28.722334221+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/au69cs3i/file_stream\": read tcp 10.1.4.164:52726->104.21.20.172:443: read: connection reset by peer"} +{"time":"2025-07-20T13:54:04.997626954+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/au69cs3i/file_stream\": read tcp 10.1.4.164:50096->172.67.193.61:443: read: connection reset by peer"} +{"time":"2025-07-20T13:56:40.143377068+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/au69cs3i/file_stream\": read tcp 10.1.4.164:44282->104.21.20.172:443: read: connection reset by peer"} +{"time":"2025-07-20T13:59:34.91644692+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/au69cs3i/file_stream\": read tcp 10.1.4.164:59608->172.67.193.61:443: read: connection reset by peer"} +{"time":"2025-07-20T14:00:22.63835448+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/au69cs3i/file_stream\": read tcp 10.1.4.164:55470->172.67.193.61:443: read: connection reset by peer"} +{"time":"2025-07-20T14:00:46.542087968+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/au69cs3i/file_stream\": unexpected EOF"} +{"time":"2025-07-20T14:04:17.265732964+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/au69cs3i/file_stream\": read tcp 10.1.4.164:34306->172.67.193.61:443: read: connection reset by peer"} diff --git a/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..ab6cade8b17f6584386b8dd6b5aa561ed4fffb0b --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/logs/debug.log @@ -0,0 +1,21 @@ +2025-07-20 13:42:34,421 INFO MainThread:44339 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-07-20 13:42:34,421 INFO MainThread:44339 [wandb_setup.py:_flush():80] Configure stats pid to 44339 +2025-07-20 13:42:34,421 INFO MainThread:44339 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-07-20 13:42:34,421 INFO MainThread:44339 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings +2025-07-20 13:42:34,421 INFO MainThread:44339 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-07-20 13:42:34,421 INFO MainThread:44339 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/logs/debug.log +2025-07-20 13:42:34,421 INFO MainThread:44339 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_134234-au69cs3i/logs/debug-internal.log +2025-07-20 13:42:34,421 INFO MainThread:44339 [wandb_init.py:init():830] calling init triggers +2025-07-20 13:42:34,421 INFO MainThread:44339 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}} +2025-07-20 13:42:34,421 INFO MainThread:44339 [wandb_init.py:init():871] starting backend +2025-07-20 13:42:34,632 INFO MainThread:44339 [wandb_init.py:init():874] sending inform_init request +2025-07-20 13:42:34,634 INFO MainThread:44339 [wandb_init.py:init():882] backend started and connected +2025-07-20 13:42:34,640 INFO MainThread:44339 [wandb_init.py:init():953] updated telemetry +2025-07-20 13:42:35,099 INFO MainThread:44339 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout +2025-07-20 13:42:45,262 INFO MainThread:44339 [wandb_init.py:init():1029] starting run threads in backend +2025-07-20 13:42:45,460 INFO MainThread:44339 [wandb_run.py:_console_start():2458] atexit reg +2025-07-20 13:42:45,460 INFO MainThread:44339 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-07-20 13:42:45,465 INFO MainThread:44339 [wandb_run.py:_redirect():2375] Wrapping output streams. +2025-07-20 13:42:45,465 INFO MainThread:44339 [wandb_run.py:_redirect():2398] Redirects installed. +2025-07-20 13:42:45,469 INFO MainThread:44339 [wandb_init.py:init():1075] run started, returning control to user process diff --git a/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/files/output.log b/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..9c84dc83be5fcb6408dd406a6d5a609d1646e048 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/files/output.log @@ -0,0 +1,2 @@ +{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.4.164", "pid": 75477, "uuid": "340f2435954d4933a68a16b643df4460", "closed": false} +Start validation... diff --git a/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/files/requirements.txt @@ -0,0 +1,295 @@ +colorama==0.4.6 +psutil==7.0.0 +setproctitle==1.2.2 +ipython==8.37.0 +gitdb==4.0.12 +smmap==5.0.2 +pyzmq==27.0.0 +wcwidth==0.2.13 +antlr4-python3-runtime==4.9.3 +streamlit==1.46.1 +opentelemetry-proto==1.26.0 +tiktoken==0.9.0 +MarkupSafe==3.0.2 +openai==1.90.0 +jiter==0.10.0 +markdown-it-py==3.0.0 +rich-toolkit==0.14.8 +PyYAML==6.0.2 +pycountry==24.6.1 +nvidia-cusolver-cu12==11.6.1.9 +codetiming==1.4.0 +text-unidecode==1.3 +aiohttp-cors==0.8.1 +prometheus_client==0.22.1 +pandas==2.3.1 +wrapt==1.17.2 +tifffile==2025.5.10 +vllm==0.8.5.post1 +google-auth==2.40.3 +nvidia-curand-cu12==10.3.5.147 +networkx==3.4.2 +protobuf==4.25.8 +depyf==0.18.0 +altair==5.5.0 +wandb==0.21.0 +opentelemetry-sdk==1.26.0 +nvidia-cufft-cu12==11.2.1.3 +frozenlist==1.7.0 +ninja==1.11.1.4 +anyio==4.9.0 +rignore==0.6.2 +pydantic-extra-types==2.10.5 +tzdata==2025.2 +orjson==3.10.18 +smart_open==7.3.0.post1 +nvidia-cublas-cu12==12.4.5.8 +astor==0.8.1 +uvicorn==0.35.0 +sentry-sdk==2.32.0 +weasel==0.4.1 +opencensus==0.11.4 +certifi==2025.7.14 +nvidia-cuda-nvrtc-cu12==12.4.127 +cupy-cuda12x==13.5.1 +jedi==0.19.2 +GitPython==3.1.44 +xgrammar==0.1.18 +sniffio==1.3.1 +dill==0.3.8 +python-json-logger==3.3.0 +peft==0.16.0 +python-slugify==8.0.4 +watchfiles==1.1.0 +torchaudio==2.6.0 +omegaconf==2.3.0 +interegular==0.3.3 +torchmetrics==1.7.4 +tenacity==9.1.2 +async-timeout==5.0.1 +pybase64==1.4.1 +tqdm==4.67.1 +confection==0.1.5 +absl-py==2.3.1 +six==1.17.0 +colorful==0.5.7 +xxhash==3.5.0 +regex==2024.11.6 +nvidia-cuda-runtime-cu12==12.4.127 +pip==25.1 +annotated-types==0.7.0 +opentelemetry-exporter-otlp-proto-grpc==1.26.0 +salesforce-lavis==1.0.2 +fastapi-cloud-cli==0.1.4 +av==15.0.0 +nvidia-nvjitlink-cu12==12.4.127 +lightning-utilities==0.14.3 +pytz==2025.2 +webdataset==1.0.2 +cachetools==5.5.2 +nltk==3.9.1 +prometheus-fastapi-instrumentator==7.1.0 +pexpect==4.9.0 +jsonschema-specifications==2025.4.1 +Jinja2==3.1.6 +nvidia-cusparse-cu12==12.3.1.170 +nvidia-cudnn-cu12==9.1.0.70 +decord==0.6.0 +transformers==4.52.4 +dnspython==2.7.0 +joblib==1.5.1 +kaggle==1.7.4.5 +pyasn1_modules==0.4.2 +httpcore==1.0.9 +accelerate==1.8.1 +psutil==7.0.0 +pycocotools==2.0.10 +lm-format-enforcer==0.10.11 +liger_kernel==0.6.0 +googleapis-common-protos==1.70.0 +idna==3.10 +aiohappyeyeballs==2.6.1 +numba==0.61.2 +tornado==6.5.1 +opentelemetry-semantic-conventions==0.47b0 +torchvision==0.21.0 +exceptiongroup==1.3.0 +cfgv==3.4.0 +py-cpuinfo==9.0.0 +murmurhash==1.0.13 +pillow==11.3.0 +asttokens==3.0.0 +spacy==3.8.7 +blinker==1.9.0 +llguidance==0.7.30 +fastapi==0.116.1 +python-dateutil==2.9.0.post0 +prompt_toolkit==3.0.51 +opentelemetry-api==1.26.0 +referencing==0.36.2 +Pygments==2.19.2 +mpmath==1.3.0 +thinc==8.3.6 +multidict==6.6.3 +python-magic==0.4.27 +fairscale==0.4.4 +nodeenv==1.9.1 +mathruler==0.1.0 +identify==2.6.12 +multiprocess==0.70.16 +ftfy==6.3.1 +spacy-legacy==3.0.12 +rsa==4.9.1 +cymem==2.0.11 +flash-attn==2.7.1.post1 +typing-inspection==0.4.1 +nvidia-cufile-cu12==1.11.1.6 +filelock==3.18.0 +jsonschema==4.24.0 +language_data==1.3.0 +iopath==0.1.10 +cloudpickle==3.1.1 +pre_commit==4.2.0 +python-multipart==0.0.20 +gguf==0.17.1 +toml==0.10.2 +lazy_loader==0.4 +nvidia-cusparselt-cu12==0.6.2 +nvidia-nvtx-cu12==12.4.127 +opencv-python-headless==4.12.0.88 +rouge_score==0.1.2 +portalocker==3.2.0 +diskcache==5.6.3 +pycocoevalcap==1.2 +mdurl==0.1.2 +pure_eval==0.2.3 +ray==2.47.1 +langcodes==3.5.0 +distlib==0.3.9 +pydeck==0.9.1 +traitlets==5.14.3 +aiohttp==3.12.14 +decorator==5.2.1 +opentelemetry-exporter-otlp-proto-http==1.26.0 +verl==0.3.2.dev0 +fsspec==2025.3.0 +pydantic_core==2.33.2 +matplotlib-inline==0.1.7 +httpx==0.28.1 +fastrlock==0.8.3 +zipp==3.23.0 +aiosignal==1.4.0 +uvloop==0.21.0 +opendatasets==0.1.22 +python-dotenv==1.1.1 +attrs==25.3.0 +starlette==0.47.1 +distro==1.9.0 +pyasn1==0.6.1 +plotly==6.2.0 +opencensus-context==0.1.3 +datasets==4.0.0 +bleach==6.2.0 +hf-xet==1.1.5 +pyvers==0.1.0 +rich==14.0.0 +pylatexenc==2.10 +tensordict==0.9.1 +urllib3==2.5.0 +imageio==2.37.0 +platformdirs==4.3.8 +preshed==3.0.10 +catalogue==2.0.10 +h11==0.16.0 +outlines_core==0.1.26 +wasabi==1.1.3 +proto-plus==1.26.1 +scikit-image==0.25.2 +blis==1.3.0 +fastapi-cli==0.0.8 +opentelemetry-exporter-prometheus==0.56b0 +opentelemetry-exporter-otlp==1.26.0 +compressed-tensors==0.9.3 +pyarrow==20.0.0 +opentelemetry-semantic-conventions-ai==0.4.11 +partial-json-parser==0.2.1.1.post6 +parso==0.8.4 +importlib_metadata==8.0.0 +tokenizers==0.21.2 +opentelemetry-exporter-otlp-proto-common==1.26.0 +torchdata==0.11.0 +py-spy==0.4.0 +propcache==0.3.2 +braceexpand==0.1.7 +numpy==2.2.6 +cloudpathlib==0.21.1 +email_validator==2.2.0 +srsly==2.5.1 +webencodings==0.5.1 +airportsdata==20250706 +rpds-py==0.26.0 +outlines==0.1.11 +packaging==25.0 +yarl==1.20.1 +nvidia-cuda-cupti-cu12==12.4.127 +typing_extensions==4.14.1 +pydantic==2.11.7 +xformers==0.0.29.post2 +einops==0.8.1 +grpcio==1.73.1 +setuptools==78.1.1 +httptools==0.6.4 +nvidia-nccl-cu12==2.21.5 +Deprecated==1.2.18 +ptyprocess==0.7.0 +websockets==15.0.1 +torch==2.6.0 +scipy==1.15.3 +typer==0.16.0 +pytorch-lightning==2.5.2 +virtualenv==20.31.2 +huggingface-hub==0.33.4 +contexttimer==0.3.3 +marisa-trie==1.2.1 +shellingham==1.5.4 +charset-normalizer==3.4.2 +nest-asyncio==1.6.0 +executing==2.2.0 +stack-data==0.6.3 +msgpack==1.1.1 +blake3==1.0.5 +narwhals==1.47.0 +sentencepiece==0.2.0 +llvmlite==0.44.0 +click==8.2.1 +lark==1.2.2 +google-api-core==2.25.1 +sympy==1.13.1 +wheel==0.45.1 +safetensors==0.5.3 +mistral_common==1.8.0 +triton==3.2.0 +msgspec==0.19.0 +watchdog==6.0.0 +requests==2.32.4 +spacy-loggers==1.0.5 +timm==0.4.12 +qwen-vl-utils==0.0.11 +modelscope==1.28.0 +verl==0.3.2.dev0 +jaraco.text==3.12.1 +autocommand==2.2.2 +packaging==24.2 +jaraco.context==5.3.0 +tomli==2.0.1 +typeguard==4.3.0 +zipp==3.19.2 +backports.tarfile==1.2.0 +typing_extensions==4.12.2 +jaraco.collections==5.1.0 +inflect==7.3.1 +more-itertools==10.3.0 +jaraco.functools==4.0.1 +importlib_metadata==8.0.0 +platformdirs==4.2.2 +wheel==0.45.1 diff --git a/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..c105f1c7be5e456b70a89c3a1823cbe87f4b1dc5 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/files/wandb-metadata.json @@ -0,0 +1,91 @@ +{ + "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35", + "python": "CPython 3.10.0", + "startedAt": "2025-07-20T07:25:17.141598Z", + "args": [ + "--node-ip-address=10.1.4.164", + "--node-manager-port=35231", + "--object-store-name=/tmp/ray/session_2025-07-20_15-23-31_151146_70713/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2025-07-20_15-23-31_151146_70713/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=63830", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=63651", + "--gcs-address=10.1.4.164:52207", + "--session-name=session_2025-07-20_15-23-31_151146_70713", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8265", + "--cluster-id=994c498dad4446fd86e2ea1ff3b1252af0b70673317e94a345017756", + "--startup-token=64", + "--worker-launch-time-ms=1752996213728", + "--node-id=0d8570de666f6f5e4f59f594c737fc4943e302fa432c09bf39023de5", + "--runtime-env-hash=-115784934", + "--enable-resource-isolation=false" + ], + "program": "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/PorUna-byte/PAR.git", + "commit": "b8caf406aa1699c788f0ca6e44a1769452c317db" + }, + "root": "/nas/shared/kilab/wangyujia/EasyR1-new/examples", + "host": "dsw-266702-557cd69888-g24kv", + "executable": "/root/miniconda3/envs/easyr1-new/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 64, + "gpu": "NVIDIA A800-SXM4-80GB", + "gpu_count": 8, + "disk": { + "/": { + "total": "1623302262784", + "used": "1178181632" + } + }, + "memory": { + "total": "549755813888" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-dd05c780-0a19-17fd-c584-d09f4318f680" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-2782427d-b86d-e7c7-247f-edefe39eec71" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-52755b3d-d761-c869-0d08-300873ba5f02" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-90465488-5319-9508-9e8a-b3b12918be35" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a" + } + ], + "cudaVersion": "12.1", + "writerId": "u3qpbij4lvd82az03j7kug0ih15hauaq" +} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..5715afb018fa64214a85c17f918366becec1a412 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/logs/debug-internal.log @@ -0,0 +1,7 @@ +{"time":"2025-07-20T15:25:17.492981977+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-07-20T15:25:36.925600892+08:00","level":"INFO","msg":"stream: created new stream","id":"v0wv109i"} +{"time":"2025-07-20T15:25:36.926584351+08:00","level":"INFO","msg":"stream: started","id":"v0wv109i"} +{"time":"2025-07-20T15:25:36.926596894+08:00","level":"INFO","msg":"handler: started","stream_id":"v0wv109i"} +{"time":"2025-07-20T15:25:36.92661766+08:00","level":"INFO","msg":"sender: started","stream_id":"v0wv109i"} +{"time":"2025-07-20T15:25:36.926629865+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"v0wv109i"} +{"time":"2025-07-20T15:26:51.299029849+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/v0wv109i/file_stream\": unexpected EOF"} diff --git a/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..e32ebd60b4da0ae148eba45ff73101207abceaa0 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/logs/debug.log @@ -0,0 +1,21 @@ +2025-07-20 15:25:17,267 INFO MainThread:75477 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-07-20 15:25:17,267 INFO MainThread:75477 [wandb_setup.py:_flush():80] Configure stats pid to 75477 +2025-07-20 15:25:17,267 INFO MainThread:75477 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-07-20 15:25:17,267 INFO MainThread:75477 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings +2025-07-20 15:25:17,267 INFO MainThread:75477 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-07-20 15:25:17,267 INFO MainThread:75477 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/logs/debug.log +2025-07-20 15:25:17,267 INFO MainThread:75477 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/logs/debug-internal.log +2025-07-20 15:25:17,267 INFO MainThread:75477 [wandb_init.py:init():830] calling init triggers +2025-07-20 15:25:17,268 INFO MainThread:75477 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}} +2025-07-20 15:25:17,268 INFO MainThread:75477 [wandb_init.py:init():871] starting backend +2025-07-20 15:25:17,481 INFO MainThread:75477 [wandb_init.py:init():874] sending inform_init request +2025-07-20 15:25:17,483 INFO MainThread:75477 [wandb_init.py:init():882] backend started and connected +2025-07-20 15:25:17,489 INFO MainThread:75477 [wandb_init.py:init():953] updated telemetry +2025-07-20 15:25:17,533 INFO MainThread:75477 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout +2025-07-20 15:25:40,441 INFO MainThread:75477 [wandb_init.py:init():1029] starting run threads in backend +2025-07-20 15:25:40,619 INFO MainThread:75477 [wandb_run.py:_console_start():2458] atexit reg +2025-07-20 15:25:40,619 INFO MainThread:75477 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-07-20 15:25:40,623 INFO MainThread:75477 [wandb_run.py:_redirect():2375] Wrapping output streams. +2025-07-20 15:25:40,623 INFO MainThread:75477 [wandb_run.py:_redirect():2398] Redirects installed. +2025-07-20 15:25:40,625 INFO MainThread:75477 [wandb_init.py:init():1075] run started, returning control to user process diff --git a/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/run-v0wv109i.wandb b/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/run-v0wv109i.wandb new file mode 100644 index 0000000000000000000000000000000000000000..9c728dd12c06af24f0c56c0d65424aec920113fd Binary files /dev/null and b/EasyR1-new/examples/wandb/run-20250720_152517-v0wv109i/run-v0wv109i.wandb differ diff --git a/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/files/output.log b/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..58025e3a3c5ba8f38dea717573709acf35011ecb --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/files/output.log @@ -0,0 +1,2 @@ +{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.4.164", "pid": 86084, "uuid": "58b35defd6e645dfb338e85b828c3067", "closed": false} +Start validation... diff --git a/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/files/requirements.txt @@ -0,0 +1,295 @@ +colorama==0.4.6 +psutil==7.0.0 +setproctitle==1.2.2 +ipython==8.37.0 +gitdb==4.0.12 +smmap==5.0.2 +pyzmq==27.0.0 +wcwidth==0.2.13 +antlr4-python3-runtime==4.9.3 +streamlit==1.46.1 +opentelemetry-proto==1.26.0 +tiktoken==0.9.0 +MarkupSafe==3.0.2 +openai==1.90.0 +jiter==0.10.0 +markdown-it-py==3.0.0 +rich-toolkit==0.14.8 +PyYAML==6.0.2 +pycountry==24.6.1 +nvidia-cusolver-cu12==11.6.1.9 +codetiming==1.4.0 +text-unidecode==1.3 +aiohttp-cors==0.8.1 +prometheus_client==0.22.1 +pandas==2.3.1 +wrapt==1.17.2 +tifffile==2025.5.10 +vllm==0.8.5.post1 +google-auth==2.40.3 +nvidia-curand-cu12==10.3.5.147 +networkx==3.4.2 +protobuf==4.25.8 +depyf==0.18.0 +altair==5.5.0 +wandb==0.21.0 +opentelemetry-sdk==1.26.0 +nvidia-cufft-cu12==11.2.1.3 +frozenlist==1.7.0 +ninja==1.11.1.4 +anyio==4.9.0 +rignore==0.6.2 +pydantic-extra-types==2.10.5 +tzdata==2025.2 +orjson==3.10.18 +smart_open==7.3.0.post1 +nvidia-cublas-cu12==12.4.5.8 +astor==0.8.1 +uvicorn==0.35.0 +sentry-sdk==2.32.0 +weasel==0.4.1 +opencensus==0.11.4 +certifi==2025.7.14 +nvidia-cuda-nvrtc-cu12==12.4.127 +cupy-cuda12x==13.5.1 +jedi==0.19.2 +GitPython==3.1.44 +xgrammar==0.1.18 +sniffio==1.3.1 +dill==0.3.8 +python-json-logger==3.3.0 +peft==0.16.0 +python-slugify==8.0.4 +watchfiles==1.1.0 +torchaudio==2.6.0 +omegaconf==2.3.0 +interegular==0.3.3 +torchmetrics==1.7.4 +tenacity==9.1.2 +async-timeout==5.0.1 +pybase64==1.4.1 +tqdm==4.67.1 +confection==0.1.5 +absl-py==2.3.1 +six==1.17.0 +colorful==0.5.7 +xxhash==3.5.0 +regex==2024.11.6 +nvidia-cuda-runtime-cu12==12.4.127 +pip==25.1 +annotated-types==0.7.0 +opentelemetry-exporter-otlp-proto-grpc==1.26.0 +salesforce-lavis==1.0.2 +fastapi-cloud-cli==0.1.4 +av==15.0.0 +nvidia-nvjitlink-cu12==12.4.127 +lightning-utilities==0.14.3 +pytz==2025.2 +webdataset==1.0.2 +cachetools==5.5.2 +nltk==3.9.1 +prometheus-fastapi-instrumentator==7.1.0 +pexpect==4.9.0 +jsonschema-specifications==2025.4.1 +Jinja2==3.1.6 +nvidia-cusparse-cu12==12.3.1.170 +nvidia-cudnn-cu12==9.1.0.70 +decord==0.6.0 +transformers==4.52.4 +dnspython==2.7.0 +joblib==1.5.1 +kaggle==1.7.4.5 +pyasn1_modules==0.4.2 +httpcore==1.0.9 +accelerate==1.8.1 +psutil==7.0.0 +pycocotools==2.0.10 +lm-format-enforcer==0.10.11 +liger_kernel==0.6.0 +googleapis-common-protos==1.70.0 +idna==3.10 +aiohappyeyeballs==2.6.1 +numba==0.61.2 +tornado==6.5.1 +opentelemetry-semantic-conventions==0.47b0 +torchvision==0.21.0 +exceptiongroup==1.3.0 +cfgv==3.4.0 +py-cpuinfo==9.0.0 +murmurhash==1.0.13 +pillow==11.3.0 +asttokens==3.0.0 +spacy==3.8.7 +blinker==1.9.0 +llguidance==0.7.30 +fastapi==0.116.1 +python-dateutil==2.9.0.post0 +prompt_toolkit==3.0.51 +opentelemetry-api==1.26.0 +referencing==0.36.2 +Pygments==2.19.2 +mpmath==1.3.0 +thinc==8.3.6 +multidict==6.6.3 +python-magic==0.4.27 +fairscale==0.4.4 +nodeenv==1.9.1 +mathruler==0.1.0 +identify==2.6.12 +multiprocess==0.70.16 +ftfy==6.3.1 +spacy-legacy==3.0.12 +rsa==4.9.1 +cymem==2.0.11 +flash-attn==2.7.1.post1 +typing-inspection==0.4.1 +nvidia-cufile-cu12==1.11.1.6 +filelock==3.18.0 +jsonschema==4.24.0 +language_data==1.3.0 +iopath==0.1.10 +cloudpickle==3.1.1 +pre_commit==4.2.0 +python-multipart==0.0.20 +gguf==0.17.1 +toml==0.10.2 +lazy_loader==0.4 +nvidia-cusparselt-cu12==0.6.2 +nvidia-nvtx-cu12==12.4.127 +opencv-python-headless==4.12.0.88 +rouge_score==0.1.2 +portalocker==3.2.0 +diskcache==5.6.3 +pycocoevalcap==1.2 +mdurl==0.1.2 +pure_eval==0.2.3 +ray==2.47.1 +langcodes==3.5.0 +distlib==0.3.9 +pydeck==0.9.1 +traitlets==5.14.3 +aiohttp==3.12.14 +decorator==5.2.1 +opentelemetry-exporter-otlp-proto-http==1.26.0 +verl==0.3.2.dev0 +fsspec==2025.3.0 +pydantic_core==2.33.2 +matplotlib-inline==0.1.7 +httpx==0.28.1 +fastrlock==0.8.3 +zipp==3.23.0 +aiosignal==1.4.0 +uvloop==0.21.0 +opendatasets==0.1.22 +python-dotenv==1.1.1 +attrs==25.3.0 +starlette==0.47.1 +distro==1.9.0 +pyasn1==0.6.1 +plotly==6.2.0 +opencensus-context==0.1.3 +datasets==4.0.0 +bleach==6.2.0 +hf-xet==1.1.5 +pyvers==0.1.0 +rich==14.0.0 +pylatexenc==2.10 +tensordict==0.9.1 +urllib3==2.5.0 +imageio==2.37.0 +platformdirs==4.3.8 +preshed==3.0.10 +catalogue==2.0.10 +h11==0.16.0 +outlines_core==0.1.26 +wasabi==1.1.3 +proto-plus==1.26.1 +scikit-image==0.25.2 +blis==1.3.0 +fastapi-cli==0.0.8 +opentelemetry-exporter-prometheus==0.56b0 +opentelemetry-exporter-otlp==1.26.0 +compressed-tensors==0.9.3 +pyarrow==20.0.0 +opentelemetry-semantic-conventions-ai==0.4.11 +partial-json-parser==0.2.1.1.post6 +parso==0.8.4 +importlib_metadata==8.0.0 +tokenizers==0.21.2 +opentelemetry-exporter-otlp-proto-common==1.26.0 +torchdata==0.11.0 +py-spy==0.4.0 +propcache==0.3.2 +braceexpand==0.1.7 +numpy==2.2.6 +cloudpathlib==0.21.1 +email_validator==2.2.0 +srsly==2.5.1 +webencodings==0.5.1 +airportsdata==20250706 +rpds-py==0.26.0 +outlines==0.1.11 +packaging==25.0 +yarl==1.20.1 +nvidia-cuda-cupti-cu12==12.4.127 +typing_extensions==4.14.1 +pydantic==2.11.7 +xformers==0.0.29.post2 +einops==0.8.1 +grpcio==1.73.1 +setuptools==78.1.1 +httptools==0.6.4 +nvidia-nccl-cu12==2.21.5 +Deprecated==1.2.18 +ptyprocess==0.7.0 +websockets==15.0.1 +torch==2.6.0 +scipy==1.15.3 +typer==0.16.0 +pytorch-lightning==2.5.2 +virtualenv==20.31.2 +huggingface-hub==0.33.4 +contexttimer==0.3.3 +marisa-trie==1.2.1 +shellingham==1.5.4 +charset-normalizer==3.4.2 +nest-asyncio==1.6.0 +executing==2.2.0 +stack-data==0.6.3 +msgpack==1.1.1 +blake3==1.0.5 +narwhals==1.47.0 +sentencepiece==0.2.0 +llvmlite==0.44.0 +click==8.2.1 +lark==1.2.2 +google-api-core==2.25.1 +sympy==1.13.1 +wheel==0.45.1 +safetensors==0.5.3 +mistral_common==1.8.0 +triton==3.2.0 +msgspec==0.19.0 +watchdog==6.0.0 +requests==2.32.4 +spacy-loggers==1.0.5 +timm==0.4.12 +qwen-vl-utils==0.0.11 +modelscope==1.28.0 +verl==0.3.2.dev0 +jaraco.text==3.12.1 +autocommand==2.2.2 +packaging==24.2 +jaraco.context==5.3.0 +tomli==2.0.1 +typeguard==4.3.0 +zipp==3.19.2 +backports.tarfile==1.2.0 +typing_extensions==4.12.2 +jaraco.collections==5.1.0 +inflect==7.3.1 +more-itertools==10.3.0 +jaraco.functools==4.0.1 +importlib_metadata==8.0.0 +platformdirs==4.2.2 +wheel==0.45.1 diff --git a/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..2afdc450dadcec889bf1e2c4b49516a49c3e841b --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/files/wandb-metadata.json @@ -0,0 +1,91 @@ +{ + "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35", + "python": "CPython 3.10.0", + "startedAt": "2025-07-20T07:34:23.233545Z", + "args": [ + "--node-ip-address=10.1.4.164", + "--node-manager-port=43923", + "--object-store-name=/tmp/ray/session_2025-07-20_15-32-39_601252_81328/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2025-07-20_15-32-39_601252_81328/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=58095", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=64817", + "--gcs-address=10.1.4.164:49625", + "--session-name=session_2025-07-20_15-32-39_601252_81328", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8265", + "--cluster-id=ef081ee94c630b8c4c1c59e7beefec090a5fbe229f6a981570b02fa1", + "--startup-token=64", + "--worker-launch-time-ms=1752996762184", + "--node-id=1f2c877a478b841eac2c1a33494d6be302c9c3ab6e684222e4733ce4", + "--runtime-env-hash=-115784934", + "--enable-resource-isolation=false" + ], + "program": "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/PorUna-byte/PAR.git", + "commit": "b8caf406aa1699c788f0ca6e44a1769452c317db" + }, + "root": "/nas/shared/kilab/wangyujia/EasyR1-new/examples", + "host": "dsw-266702-557cd69888-g24kv", + "executable": "/root/miniconda3/envs/easyr1-new/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 64, + "gpu": "NVIDIA A800-SXM4-80GB", + "gpu_count": 8, + "disk": { + "/": { + "total": "1623302262784", + "used": "1178259456" + } + }, + "memory": { + "total": "549755813888" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-dd05c780-0a19-17fd-c584-d09f4318f680" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-2782427d-b86d-e7c7-247f-edefe39eec71" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-52755b3d-d761-c869-0d08-300873ba5f02" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-90465488-5319-9508-9e8a-b3b12918be35" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a" + } + ], + "cudaVersion": "12.1", + "writerId": "9pihay9bn00sjm1rx40glf0dzx7vamvs" +} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..8515c754550201dd97c36861f88f482c70ab76ab --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/logs/debug-internal.log @@ -0,0 +1,6 @@ +{"time":"2025-07-20T15:34:23.549904932+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-07-20T15:34:34.888814067+08:00","level":"INFO","msg":"stream: created new stream","id":"bw8ozibk"} +{"time":"2025-07-20T15:34:34.889759219+08:00","level":"INFO","msg":"stream: started","id":"bw8ozibk"} +{"time":"2025-07-20T15:34:34.88981801+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"bw8ozibk"} +{"time":"2025-07-20T15:34:34.889843085+08:00","level":"INFO","msg":"handler: started","stream_id":"bw8ozibk"} +{"time":"2025-07-20T15:34:34.890312328+08:00","level":"INFO","msg":"sender: started","stream_id":"bw8ozibk"} diff --git a/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..1fc3e91020ff09d3d5c1b21d3393db30bb98ff71 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/logs/debug.log @@ -0,0 +1,21 @@ +2025-07-20 15:34:23,331 INFO MainThread:86084 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-07-20 15:34:23,331 INFO MainThread:86084 [wandb_setup.py:_flush():80] Configure stats pid to 86084 +2025-07-20 15:34:23,331 INFO MainThread:86084 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-07-20 15:34:23,331 INFO MainThread:86084 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings +2025-07-20 15:34:23,331 INFO MainThread:86084 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-07-20 15:34:23,331 INFO MainThread:86084 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/logs/debug.log +2025-07-20 15:34:23,332 INFO MainThread:86084 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/logs/debug-internal.log +2025-07-20 15:34:23,332 INFO MainThread:86084 [wandb_init.py:init():830] calling init triggers +2025-07-20 15:34:23,332 INFO MainThread:86084 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}} +2025-07-20 15:34:23,332 INFO MainThread:86084 [wandb_init.py:init():871] starting backend +2025-07-20 15:34:23,538 INFO MainThread:86084 [wandb_init.py:init():874] sending inform_init request +2025-07-20 15:34:23,543 INFO MainThread:86084 [wandb_init.py:init():882] backend started and connected +2025-07-20 15:34:23,545 INFO MainThread:86084 [wandb_init.py:init():953] updated telemetry +2025-07-20 15:34:23,578 INFO MainThread:86084 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout +2025-07-20 15:34:41,563 INFO MainThread:86084 [wandb_init.py:init():1029] starting run threads in backend +2025-07-20 15:34:41,729 INFO MainThread:86084 [wandb_run.py:_console_start():2458] atexit reg +2025-07-20 15:34:41,729 INFO MainThread:86084 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-07-20 15:34:41,733 INFO MainThread:86084 [wandb_run.py:_redirect():2375] Wrapping output streams. +2025-07-20 15:34:41,733 INFO MainThread:86084 [wandb_run.py:_redirect():2398] Redirects installed. +2025-07-20 15:34:41,735 INFO MainThread:86084 [wandb_init.py:init():1075] run started, returning control to user process diff --git a/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/run-bw8ozibk.wandb b/EasyR1-new/examples/wandb/run-20250720_153423-bw8ozibk/run-bw8ozibk.wandb new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/files/output.log b/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..53b5730d918480aff4872ea44eb5bb40b5ae563a --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/files/output.log @@ -0,0 +1,2 @@ +{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.4.164", "pid": 94972, "uuid": "a17f4aa9176b432b9cba40a20a79ec8a", "closed": false} +Start validation... diff --git a/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/files/requirements.txt @@ -0,0 +1,295 @@ +colorama==0.4.6 +psutil==7.0.0 +setproctitle==1.2.2 +ipython==8.37.0 +gitdb==4.0.12 +smmap==5.0.2 +pyzmq==27.0.0 +wcwidth==0.2.13 +antlr4-python3-runtime==4.9.3 +streamlit==1.46.1 +opentelemetry-proto==1.26.0 +tiktoken==0.9.0 +MarkupSafe==3.0.2 +openai==1.90.0 +jiter==0.10.0 +markdown-it-py==3.0.0 +rich-toolkit==0.14.8 +PyYAML==6.0.2 +pycountry==24.6.1 +nvidia-cusolver-cu12==11.6.1.9 +codetiming==1.4.0 +text-unidecode==1.3 +aiohttp-cors==0.8.1 +prometheus_client==0.22.1 +pandas==2.3.1 +wrapt==1.17.2 +tifffile==2025.5.10 +vllm==0.8.5.post1 +google-auth==2.40.3 +nvidia-curand-cu12==10.3.5.147 +networkx==3.4.2 +protobuf==4.25.8 +depyf==0.18.0 +altair==5.5.0 +wandb==0.21.0 +opentelemetry-sdk==1.26.0 +nvidia-cufft-cu12==11.2.1.3 +frozenlist==1.7.0 +ninja==1.11.1.4 +anyio==4.9.0 +rignore==0.6.2 +pydantic-extra-types==2.10.5 +tzdata==2025.2 +orjson==3.10.18 +smart_open==7.3.0.post1 +nvidia-cublas-cu12==12.4.5.8 +astor==0.8.1 +uvicorn==0.35.0 +sentry-sdk==2.32.0 +weasel==0.4.1 +opencensus==0.11.4 +certifi==2025.7.14 +nvidia-cuda-nvrtc-cu12==12.4.127 +cupy-cuda12x==13.5.1 +jedi==0.19.2 +GitPython==3.1.44 +xgrammar==0.1.18 +sniffio==1.3.1 +dill==0.3.8 +python-json-logger==3.3.0 +peft==0.16.0 +python-slugify==8.0.4 +watchfiles==1.1.0 +torchaudio==2.6.0 +omegaconf==2.3.0 +interegular==0.3.3 +torchmetrics==1.7.4 +tenacity==9.1.2 +async-timeout==5.0.1 +pybase64==1.4.1 +tqdm==4.67.1 +confection==0.1.5 +absl-py==2.3.1 +six==1.17.0 +colorful==0.5.7 +xxhash==3.5.0 +regex==2024.11.6 +nvidia-cuda-runtime-cu12==12.4.127 +pip==25.1 +annotated-types==0.7.0 +opentelemetry-exporter-otlp-proto-grpc==1.26.0 +salesforce-lavis==1.0.2 +fastapi-cloud-cli==0.1.4 +av==15.0.0 +nvidia-nvjitlink-cu12==12.4.127 +lightning-utilities==0.14.3 +pytz==2025.2 +webdataset==1.0.2 +cachetools==5.5.2 +nltk==3.9.1 +prometheus-fastapi-instrumentator==7.1.0 +pexpect==4.9.0 +jsonschema-specifications==2025.4.1 +Jinja2==3.1.6 +nvidia-cusparse-cu12==12.3.1.170 +nvidia-cudnn-cu12==9.1.0.70 +decord==0.6.0 +transformers==4.52.4 +dnspython==2.7.0 +joblib==1.5.1 +kaggle==1.7.4.5 +pyasn1_modules==0.4.2 +httpcore==1.0.9 +accelerate==1.8.1 +psutil==7.0.0 +pycocotools==2.0.10 +lm-format-enforcer==0.10.11 +liger_kernel==0.6.0 +googleapis-common-protos==1.70.0 +idna==3.10 +aiohappyeyeballs==2.6.1 +numba==0.61.2 +tornado==6.5.1 +opentelemetry-semantic-conventions==0.47b0 +torchvision==0.21.0 +exceptiongroup==1.3.0 +cfgv==3.4.0 +py-cpuinfo==9.0.0 +murmurhash==1.0.13 +pillow==11.3.0 +asttokens==3.0.0 +spacy==3.8.7 +blinker==1.9.0 +llguidance==0.7.30 +fastapi==0.116.1 +python-dateutil==2.9.0.post0 +prompt_toolkit==3.0.51 +opentelemetry-api==1.26.0 +referencing==0.36.2 +Pygments==2.19.2 +mpmath==1.3.0 +thinc==8.3.6 +multidict==6.6.3 +python-magic==0.4.27 +fairscale==0.4.4 +nodeenv==1.9.1 +mathruler==0.1.0 +identify==2.6.12 +multiprocess==0.70.16 +ftfy==6.3.1 +spacy-legacy==3.0.12 +rsa==4.9.1 +cymem==2.0.11 +flash-attn==2.7.1.post1 +typing-inspection==0.4.1 +nvidia-cufile-cu12==1.11.1.6 +filelock==3.18.0 +jsonschema==4.24.0 +language_data==1.3.0 +iopath==0.1.10 +cloudpickle==3.1.1 +pre_commit==4.2.0 +python-multipart==0.0.20 +gguf==0.17.1 +toml==0.10.2 +lazy_loader==0.4 +nvidia-cusparselt-cu12==0.6.2 +nvidia-nvtx-cu12==12.4.127 +opencv-python-headless==4.12.0.88 +rouge_score==0.1.2 +portalocker==3.2.0 +diskcache==5.6.3 +pycocoevalcap==1.2 +mdurl==0.1.2 +pure_eval==0.2.3 +ray==2.47.1 +langcodes==3.5.0 +distlib==0.3.9 +pydeck==0.9.1 +traitlets==5.14.3 +aiohttp==3.12.14 +decorator==5.2.1 +opentelemetry-exporter-otlp-proto-http==1.26.0 +verl==0.3.2.dev0 +fsspec==2025.3.0 +pydantic_core==2.33.2 +matplotlib-inline==0.1.7 +httpx==0.28.1 +fastrlock==0.8.3 +zipp==3.23.0 +aiosignal==1.4.0 +uvloop==0.21.0 +opendatasets==0.1.22 +python-dotenv==1.1.1 +attrs==25.3.0 +starlette==0.47.1 +distro==1.9.0 +pyasn1==0.6.1 +plotly==6.2.0 +opencensus-context==0.1.3 +datasets==4.0.0 +bleach==6.2.0 +hf-xet==1.1.5 +pyvers==0.1.0 +rich==14.0.0 +pylatexenc==2.10 +tensordict==0.9.1 +urllib3==2.5.0 +imageio==2.37.0 +platformdirs==4.3.8 +preshed==3.0.10 +catalogue==2.0.10 +h11==0.16.0 +outlines_core==0.1.26 +wasabi==1.1.3 +proto-plus==1.26.1 +scikit-image==0.25.2 +blis==1.3.0 +fastapi-cli==0.0.8 +opentelemetry-exporter-prometheus==0.56b0 +opentelemetry-exporter-otlp==1.26.0 +compressed-tensors==0.9.3 +pyarrow==20.0.0 +opentelemetry-semantic-conventions-ai==0.4.11 +partial-json-parser==0.2.1.1.post6 +parso==0.8.4 +importlib_metadata==8.0.0 +tokenizers==0.21.2 +opentelemetry-exporter-otlp-proto-common==1.26.0 +torchdata==0.11.0 +py-spy==0.4.0 +propcache==0.3.2 +braceexpand==0.1.7 +numpy==2.2.6 +cloudpathlib==0.21.1 +email_validator==2.2.0 +srsly==2.5.1 +webencodings==0.5.1 +airportsdata==20250706 +rpds-py==0.26.0 +outlines==0.1.11 +packaging==25.0 +yarl==1.20.1 +nvidia-cuda-cupti-cu12==12.4.127 +typing_extensions==4.14.1 +pydantic==2.11.7 +xformers==0.0.29.post2 +einops==0.8.1 +grpcio==1.73.1 +setuptools==78.1.1 +httptools==0.6.4 +nvidia-nccl-cu12==2.21.5 +Deprecated==1.2.18 +ptyprocess==0.7.0 +websockets==15.0.1 +torch==2.6.0 +scipy==1.15.3 +typer==0.16.0 +pytorch-lightning==2.5.2 +virtualenv==20.31.2 +huggingface-hub==0.33.4 +contexttimer==0.3.3 +marisa-trie==1.2.1 +shellingham==1.5.4 +charset-normalizer==3.4.2 +nest-asyncio==1.6.0 +executing==2.2.0 +stack-data==0.6.3 +msgpack==1.1.1 +blake3==1.0.5 +narwhals==1.47.0 +sentencepiece==0.2.0 +llvmlite==0.44.0 +click==8.2.1 +lark==1.2.2 +google-api-core==2.25.1 +sympy==1.13.1 +wheel==0.45.1 +safetensors==0.5.3 +mistral_common==1.8.0 +triton==3.2.0 +msgspec==0.19.0 +watchdog==6.0.0 +requests==2.32.4 +spacy-loggers==1.0.5 +timm==0.4.12 +qwen-vl-utils==0.0.11 +modelscope==1.28.0 +verl==0.3.2.dev0 +jaraco.text==3.12.1 +autocommand==2.2.2 +packaging==24.2 +jaraco.context==5.3.0 +tomli==2.0.1 +typeguard==4.3.0 +zipp==3.19.2 +backports.tarfile==1.2.0 +typing_extensions==4.12.2 +jaraco.collections==5.1.0 +inflect==7.3.1 +more-itertools==10.3.0 +jaraco.functools==4.0.1 +importlib_metadata==8.0.0 +platformdirs==4.2.2 +wheel==0.45.1 diff --git a/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..87355bc51d31077b24b98d20e126984fc824511c --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/files/wandb-metadata.json @@ -0,0 +1,91 @@ +{ + "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35", + "python": "CPython 3.10.0", + "startedAt": "2025-07-20T07:40:30.414188Z", + "args": [ + "--node-ip-address=10.1.4.164", + "--node-manager-port=40967", + "--object-store-name=/tmp/ray/session_2025-07-20_15-38-46_389693_90215/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2025-07-20_15-38-46_389693_90215/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=56230", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=59794", + "--gcs-address=10.1.4.164:64428", + "--session-name=session_2025-07-20_15-38-46_389693_90215", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8265", + "--cluster-id=ca5b30865afbad8796c50b46b9afd7529648e977b687eff906df06a2", + "--startup-token=64", + "--worker-launch-time-ms=1752997128928", + "--node-id=02e766ac939324b04dc391b619b0ea864767e322c5ef00f62c05a76b", + "--runtime-env-hash=-115784934", + "--enable-resource-isolation=false" + ], + "program": "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/PorUna-byte/PAR.git", + "commit": "b8caf406aa1699c788f0ca6e44a1769452c317db" + }, + "root": "/nas/shared/kilab/wangyujia/EasyR1-new/examples", + "host": "dsw-266702-557cd69888-g24kv", + "executable": "/root/miniconda3/envs/easyr1-new/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 64, + "gpu": "NVIDIA A800-SXM4-80GB", + "gpu_count": 8, + "disk": { + "/": { + "total": "1623302262784", + "used": "1178312704" + } + }, + "memory": { + "total": "549755813888" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-dd05c780-0a19-17fd-c584-d09f4318f680" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-2782427d-b86d-e7c7-247f-edefe39eec71" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-52755b3d-d761-c869-0d08-300873ba5f02" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-90465488-5319-9508-9e8a-b3b12918be35" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a" + } + ], + "cudaVersion": "12.1", + "writerId": "00b180ndhib4fms8g4grnfq7yctsuqqs" +} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..2c75b6bf1370cfe12ffc6e38d2fa44b25d670d70 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/logs/debug-internal.log @@ -0,0 +1,9 @@ +{"time":"2025-07-20T15:40:30.858787419+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-07-20T15:40:46.029828269+08:00","level":"INFO","msg":"stream: created new stream","id":"m94nrzgn"} +{"time":"2025-07-20T15:40:46.03096628+08:00","level":"INFO","msg":"stream: started","id":"m94nrzgn"} +{"time":"2025-07-20T15:40:46.030999671+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"m94nrzgn"} +{"time":"2025-07-20T15:40:46.031014389+08:00","level":"INFO","msg":"sender: started","stream_id":"m94nrzgn"} +{"time":"2025-07-20T15:40:46.031045708+08:00","level":"INFO","msg":"handler: started","stream_id":"m94nrzgn"} +{"time":"2025-07-20T15:42:18.760260271+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"} +{"time":"2025-07-20T15:42:51.067292854+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"} +{"time":"2025-07-20T15:43:25.184851272+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"} diff --git a/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..85b84a8d04a5d88e753d5929fe5c76aa19a3ac7a --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/logs/debug.log @@ -0,0 +1,21 @@ +2025-07-20 15:40:30,628 INFO MainThread:94972 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-07-20 15:40:30,628 INFO MainThread:94972 [wandb_setup.py:_flush():80] Configure stats pid to 94972 +2025-07-20 15:40:30,628 INFO MainThread:94972 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-07-20 15:40:30,628 INFO MainThread:94972 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings +2025-07-20 15:40:30,628 INFO MainThread:94972 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-07-20 15:40:30,628 INFO MainThread:94972 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/logs/debug.log +2025-07-20 15:40:30,628 INFO MainThread:94972 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/logs/debug-internal.log +2025-07-20 15:40:30,628 INFO MainThread:94972 [wandb_init.py:init():830] calling init triggers +2025-07-20 15:40:30,628 INFO MainThread:94972 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}} +2025-07-20 15:40:30,628 INFO MainThread:94972 [wandb_init.py:init():871] starting backend +2025-07-20 15:40:30,837 INFO MainThread:94972 [wandb_init.py:init():874] sending inform_init request +2025-07-20 15:40:30,839 INFO MainThread:94972 [wandb_init.py:init():882] backend started and connected +2025-07-20 15:40:30,853 INFO MainThread:94972 [wandb_init.py:init():953] updated telemetry +2025-07-20 15:40:30,989 INFO MainThread:94972 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout +2025-07-20 15:40:48,575 INFO MainThread:94972 [wandb_init.py:init():1029] starting run threads in backend +2025-07-20 15:40:48,758 INFO MainThread:94972 [wandb_run.py:_console_start():2458] atexit reg +2025-07-20 15:40:48,759 INFO MainThread:94972 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-07-20 15:40:48,763 INFO MainThread:94972 [wandb_run.py:_redirect():2375] Wrapping output streams. +2025-07-20 15:40:48,763 INFO MainThread:94972 [wandb_run.py:_redirect():2398] Redirects installed. +2025-07-20 15:40:48,767 INFO MainThread:94972 [wandb_init.py:init():1075] run started, returning control to user process diff --git a/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/run-m94nrzgn.wandb b/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/run-m94nrzgn.wandb new file mode 100644 index 0000000000000000000000000000000000000000..13927cb7290bba6efd0223cc4e1946519db67b29 Binary files /dev/null and b/EasyR1-new/examples/wandb/run-20250720_154030-m94nrzgn/run-m94nrzgn.wandb differ diff --git a/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/files/output.log b/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..d26c8aef3ac6d4e30953659fd6c83fab98e8f8f6 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/files/output.log @@ -0,0 +1,2 @@ +{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.4.164", "pid": 104213, "uuid": "4d1d82b5395a476bb6b006bf8181afe5", "closed": false} +Start validation... diff --git a/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/files/requirements.txt @@ -0,0 +1,295 @@ +colorama==0.4.6 +psutil==7.0.0 +setproctitle==1.2.2 +ipython==8.37.0 +gitdb==4.0.12 +smmap==5.0.2 +pyzmq==27.0.0 +wcwidth==0.2.13 +antlr4-python3-runtime==4.9.3 +streamlit==1.46.1 +opentelemetry-proto==1.26.0 +tiktoken==0.9.0 +MarkupSafe==3.0.2 +openai==1.90.0 +jiter==0.10.0 +markdown-it-py==3.0.0 +rich-toolkit==0.14.8 +PyYAML==6.0.2 +pycountry==24.6.1 +nvidia-cusolver-cu12==11.6.1.9 +codetiming==1.4.0 +text-unidecode==1.3 +aiohttp-cors==0.8.1 +prometheus_client==0.22.1 +pandas==2.3.1 +wrapt==1.17.2 +tifffile==2025.5.10 +vllm==0.8.5.post1 +google-auth==2.40.3 +nvidia-curand-cu12==10.3.5.147 +networkx==3.4.2 +protobuf==4.25.8 +depyf==0.18.0 +altair==5.5.0 +wandb==0.21.0 +opentelemetry-sdk==1.26.0 +nvidia-cufft-cu12==11.2.1.3 +frozenlist==1.7.0 +ninja==1.11.1.4 +anyio==4.9.0 +rignore==0.6.2 +pydantic-extra-types==2.10.5 +tzdata==2025.2 +orjson==3.10.18 +smart_open==7.3.0.post1 +nvidia-cublas-cu12==12.4.5.8 +astor==0.8.1 +uvicorn==0.35.0 +sentry-sdk==2.32.0 +weasel==0.4.1 +opencensus==0.11.4 +certifi==2025.7.14 +nvidia-cuda-nvrtc-cu12==12.4.127 +cupy-cuda12x==13.5.1 +jedi==0.19.2 +GitPython==3.1.44 +xgrammar==0.1.18 +sniffio==1.3.1 +dill==0.3.8 +python-json-logger==3.3.0 +peft==0.16.0 +python-slugify==8.0.4 +watchfiles==1.1.0 +torchaudio==2.6.0 +omegaconf==2.3.0 +interegular==0.3.3 +torchmetrics==1.7.4 +tenacity==9.1.2 +async-timeout==5.0.1 +pybase64==1.4.1 +tqdm==4.67.1 +confection==0.1.5 +absl-py==2.3.1 +six==1.17.0 +colorful==0.5.7 +xxhash==3.5.0 +regex==2024.11.6 +nvidia-cuda-runtime-cu12==12.4.127 +pip==25.1 +annotated-types==0.7.0 +opentelemetry-exporter-otlp-proto-grpc==1.26.0 +salesforce-lavis==1.0.2 +fastapi-cloud-cli==0.1.4 +av==15.0.0 +nvidia-nvjitlink-cu12==12.4.127 +lightning-utilities==0.14.3 +pytz==2025.2 +webdataset==1.0.2 +cachetools==5.5.2 +nltk==3.9.1 +prometheus-fastapi-instrumentator==7.1.0 +pexpect==4.9.0 +jsonschema-specifications==2025.4.1 +Jinja2==3.1.6 +nvidia-cusparse-cu12==12.3.1.170 +nvidia-cudnn-cu12==9.1.0.70 +decord==0.6.0 +transformers==4.52.4 +dnspython==2.7.0 +joblib==1.5.1 +kaggle==1.7.4.5 +pyasn1_modules==0.4.2 +httpcore==1.0.9 +accelerate==1.8.1 +psutil==7.0.0 +pycocotools==2.0.10 +lm-format-enforcer==0.10.11 +liger_kernel==0.6.0 +googleapis-common-protos==1.70.0 +idna==3.10 +aiohappyeyeballs==2.6.1 +numba==0.61.2 +tornado==6.5.1 +opentelemetry-semantic-conventions==0.47b0 +torchvision==0.21.0 +exceptiongroup==1.3.0 +cfgv==3.4.0 +py-cpuinfo==9.0.0 +murmurhash==1.0.13 +pillow==11.3.0 +asttokens==3.0.0 +spacy==3.8.7 +blinker==1.9.0 +llguidance==0.7.30 +fastapi==0.116.1 +python-dateutil==2.9.0.post0 +prompt_toolkit==3.0.51 +opentelemetry-api==1.26.0 +referencing==0.36.2 +Pygments==2.19.2 +mpmath==1.3.0 +thinc==8.3.6 +multidict==6.6.3 +python-magic==0.4.27 +fairscale==0.4.4 +nodeenv==1.9.1 +mathruler==0.1.0 +identify==2.6.12 +multiprocess==0.70.16 +ftfy==6.3.1 +spacy-legacy==3.0.12 +rsa==4.9.1 +cymem==2.0.11 +flash-attn==2.7.1.post1 +typing-inspection==0.4.1 +nvidia-cufile-cu12==1.11.1.6 +filelock==3.18.0 +jsonschema==4.24.0 +language_data==1.3.0 +iopath==0.1.10 +cloudpickle==3.1.1 +pre_commit==4.2.0 +python-multipart==0.0.20 +gguf==0.17.1 +toml==0.10.2 +lazy_loader==0.4 +nvidia-cusparselt-cu12==0.6.2 +nvidia-nvtx-cu12==12.4.127 +opencv-python-headless==4.12.0.88 +rouge_score==0.1.2 +portalocker==3.2.0 +diskcache==5.6.3 +pycocoevalcap==1.2 +mdurl==0.1.2 +pure_eval==0.2.3 +ray==2.47.1 +langcodes==3.5.0 +distlib==0.3.9 +pydeck==0.9.1 +traitlets==5.14.3 +aiohttp==3.12.14 +decorator==5.2.1 +opentelemetry-exporter-otlp-proto-http==1.26.0 +verl==0.3.2.dev0 +fsspec==2025.3.0 +pydantic_core==2.33.2 +matplotlib-inline==0.1.7 +httpx==0.28.1 +fastrlock==0.8.3 +zipp==3.23.0 +aiosignal==1.4.0 +uvloop==0.21.0 +opendatasets==0.1.22 +python-dotenv==1.1.1 +attrs==25.3.0 +starlette==0.47.1 +distro==1.9.0 +pyasn1==0.6.1 +plotly==6.2.0 +opencensus-context==0.1.3 +datasets==4.0.0 +bleach==6.2.0 +hf-xet==1.1.5 +pyvers==0.1.0 +rich==14.0.0 +pylatexenc==2.10 +tensordict==0.9.1 +urllib3==2.5.0 +imageio==2.37.0 +platformdirs==4.3.8 +preshed==3.0.10 +catalogue==2.0.10 +h11==0.16.0 +outlines_core==0.1.26 +wasabi==1.1.3 +proto-plus==1.26.1 +scikit-image==0.25.2 +blis==1.3.0 +fastapi-cli==0.0.8 +opentelemetry-exporter-prometheus==0.56b0 +opentelemetry-exporter-otlp==1.26.0 +compressed-tensors==0.9.3 +pyarrow==20.0.0 +opentelemetry-semantic-conventions-ai==0.4.11 +partial-json-parser==0.2.1.1.post6 +parso==0.8.4 +importlib_metadata==8.0.0 +tokenizers==0.21.2 +opentelemetry-exporter-otlp-proto-common==1.26.0 +torchdata==0.11.0 +py-spy==0.4.0 +propcache==0.3.2 +braceexpand==0.1.7 +numpy==2.2.6 +cloudpathlib==0.21.1 +email_validator==2.2.0 +srsly==2.5.1 +webencodings==0.5.1 +airportsdata==20250706 +rpds-py==0.26.0 +outlines==0.1.11 +packaging==25.0 +yarl==1.20.1 +nvidia-cuda-cupti-cu12==12.4.127 +typing_extensions==4.14.1 +pydantic==2.11.7 +xformers==0.0.29.post2 +einops==0.8.1 +grpcio==1.73.1 +setuptools==78.1.1 +httptools==0.6.4 +nvidia-nccl-cu12==2.21.5 +Deprecated==1.2.18 +ptyprocess==0.7.0 +websockets==15.0.1 +torch==2.6.0 +scipy==1.15.3 +typer==0.16.0 +pytorch-lightning==2.5.2 +virtualenv==20.31.2 +huggingface-hub==0.33.4 +contexttimer==0.3.3 +marisa-trie==1.2.1 +shellingham==1.5.4 +charset-normalizer==3.4.2 +nest-asyncio==1.6.0 +executing==2.2.0 +stack-data==0.6.3 +msgpack==1.1.1 +blake3==1.0.5 +narwhals==1.47.0 +sentencepiece==0.2.0 +llvmlite==0.44.0 +click==8.2.1 +lark==1.2.2 +google-api-core==2.25.1 +sympy==1.13.1 +wheel==0.45.1 +safetensors==0.5.3 +mistral_common==1.8.0 +triton==3.2.0 +msgspec==0.19.0 +watchdog==6.0.0 +requests==2.32.4 +spacy-loggers==1.0.5 +timm==0.4.12 +qwen-vl-utils==0.0.11 +modelscope==1.28.0 +verl==0.3.2.dev0 +jaraco.text==3.12.1 +autocommand==2.2.2 +packaging==24.2 +jaraco.context==5.3.0 +tomli==2.0.1 +typeguard==4.3.0 +zipp==3.19.2 +backports.tarfile==1.2.0 +typing_extensions==4.12.2 +jaraco.collections==5.1.0 +inflect==7.3.1 +more-itertools==10.3.0 +jaraco.functools==4.0.1 +importlib_metadata==8.0.0 +platformdirs==4.2.2 +wheel==0.45.1 diff --git a/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..8931917e110019fd754a86848462f2ba4e154653 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/files/wandb-metadata.json @@ -0,0 +1,91 @@ +{ + "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35", + "python": "CPython 3.10.0", + "startedAt": "2025-07-20T07:46:06.281691Z", + "args": [ + "--node-ip-address=10.1.4.164", + "--node-manager-port=40431", + "--object-store-name=/tmp/ray/session_2025-07-20_15-44-22_687829_99456/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2025-07-20_15-44-22_687829_99456/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=58574", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=50001", + "--gcs-address=10.1.4.164:64219", + "--session-name=session_2025-07-20_15-44-22_687829_99456", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8265", + "--cluster-id=76295dbf7deef67486e1e6b5c342710db221379deebbaa7e797cdf45", + "--startup-token=64", + "--worker-launch-time-ms=1752997465206", + "--node-id=d537fcc56a2bc0badeca70f7b961324d2fdd437e239db617fa81da42", + "--runtime-env-hash=-115784934", + "--enable-resource-isolation=false" + ], + "program": "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/PorUna-byte/PAR.git", + "commit": "b8caf406aa1699c788f0ca6e44a1769452c317db" + }, + "root": "/nas/shared/kilab/wangyujia/EasyR1-new/examples", + "host": "dsw-266702-557cd69888-g24kv", + "executable": "/root/miniconda3/envs/easyr1-new/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 64, + "gpu": "NVIDIA A800-SXM4-80GB", + "gpu_count": 8, + "disk": { + "/": { + "total": "1623302262784", + "used": "1178333184" + } + }, + "memory": { + "total": "549755813888" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-dd05c780-0a19-17fd-c584-d09f4318f680" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-2782427d-b86d-e7c7-247f-edefe39eec71" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-52755b3d-d761-c869-0d08-300873ba5f02" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-90465488-5319-9508-9e8a-b3b12918be35" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a" + } + ], + "cudaVersion": "12.1", + "writerId": "qyz6wumcffvf7izbshj2cparvip2q70z" +} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..1669df511ef4d5f92fbab8a5b14226d3c6c997aa --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/logs/debug-internal.log @@ -0,0 +1,6 @@ +{"time":"2025-07-20T15:46:06.581647226+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-07-20T15:46:11.117406425+08:00","level":"INFO","msg":"stream: created new stream","id":"b40ine7y"} +{"time":"2025-07-20T15:46:11.11812207+08:00","level":"INFO","msg":"stream: started","id":"b40ine7y"} +{"time":"2025-07-20T15:46:11.118152722+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"b40ine7y"} +{"time":"2025-07-20T15:46:11.118167955+08:00","level":"INFO","msg":"handler: started","stream_id":"b40ine7y"} +{"time":"2025-07-20T15:46:11.118202843+08:00","level":"INFO","msg":"sender: started","stream_id":"b40ine7y"} diff --git a/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..3ee1f69351a4048ae60a66c003497aa90f8c31da --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/logs/debug.log @@ -0,0 +1,21 @@ +2025-07-20 15:46:06,364 INFO MainThread:104213 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-07-20 15:46:06,364 INFO MainThread:104213 [wandb_setup.py:_flush():80] Configure stats pid to 104213 +2025-07-20 15:46:06,364 INFO MainThread:104213 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-07-20 15:46:06,364 INFO MainThread:104213 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings +2025-07-20 15:46:06,364 INFO MainThread:104213 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-07-20 15:46:06,364 INFO MainThread:104213 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/logs/debug.log +2025-07-20 15:46:06,364 INFO MainThread:104213 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/logs/debug-internal.log +2025-07-20 15:46:06,364 INFO MainThread:104213 [wandb_init.py:init():830] calling init triggers +2025-07-20 15:46:06,364 INFO MainThread:104213 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}} +2025-07-20 15:46:06,364 INFO MainThread:104213 [wandb_init.py:init():871] starting backend +2025-07-20 15:46:06,572 INFO MainThread:104213 [wandb_init.py:init():874] sending inform_init request +2025-07-20 15:46:06,573 INFO MainThread:104213 [wandb_init.py:init():882] backend started and connected +2025-07-20 15:46:06,578 INFO MainThread:104213 [wandb_init.py:init():953] updated telemetry +2025-07-20 15:46:06,609 INFO MainThread:104213 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout +2025-07-20 15:46:13,184 INFO MainThread:104213 [wandb_init.py:init():1029] starting run threads in backend +2025-07-20 15:46:13,368 INFO MainThread:104213 [wandb_run.py:_console_start():2458] atexit reg +2025-07-20 15:46:13,368 INFO MainThread:104213 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-07-20 15:46:13,371 INFO MainThread:104213 [wandb_run.py:_redirect():2375] Wrapping output streams. +2025-07-20 15:46:13,371 INFO MainThread:104213 [wandb_run.py:_redirect():2398] Redirects installed. +2025-07-20 15:46:13,373 INFO MainThread:104213 [wandb_init.py:init():1075] run started, returning control to user process diff --git a/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/run-b40ine7y.wandb b/EasyR1-new/examples/wandb/run-20250720_154606-b40ine7y/run-b40ine7y.wandb new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/files/output.log b/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..c7f0ffdc083aad03eb7de1d197b1e9bf13782bb6 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/files/output.log @@ -0,0 +1,2 @@ +{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.4.164", "pid": 113131, "uuid": "5e376187861c45a490c4be994ce9436d", "closed": false} +Start validation... diff --git a/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/files/requirements.txt @@ -0,0 +1,295 @@ +colorama==0.4.6 +psutil==7.0.0 +setproctitle==1.2.2 +ipython==8.37.0 +gitdb==4.0.12 +smmap==5.0.2 +pyzmq==27.0.0 +wcwidth==0.2.13 +antlr4-python3-runtime==4.9.3 +streamlit==1.46.1 +opentelemetry-proto==1.26.0 +tiktoken==0.9.0 +MarkupSafe==3.0.2 +openai==1.90.0 +jiter==0.10.0 +markdown-it-py==3.0.0 +rich-toolkit==0.14.8 +PyYAML==6.0.2 +pycountry==24.6.1 +nvidia-cusolver-cu12==11.6.1.9 +codetiming==1.4.0 +text-unidecode==1.3 +aiohttp-cors==0.8.1 +prometheus_client==0.22.1 +pandas==2.3.1 +wrapt==1.17.2 +tifffile==2025.5.10 +vllm==0.8.5.post1 +google-auth==2.40.3 +nvidia-curand-cu12==10.3.5.147 +networkx==3.4.2 +protobuf==4.25.8 +depyf==0.18.0 +altair==5.5.0 +wandb==0.21.0 +opentelemetry-sdk==1.26.0 +nvidia-cufft-cu12==11.2.1.3 +frozenlist==1.7.0 +ninja==1.11.1.4 +anyio==4.9.0 +rignore==0.6.2 +pydantic-extra-types==2.10.5 +tzdata==2025.2 +orjson==3.10.18 +smart_open==7.3.0.post1 +nvidia-cublas-cu12==12.4.5.8 +astor==0.8.1 +uvicorn==0.35.0 +sentry-sdk==2.32.0 +weasel==0.4.1 +opencensus==0.11.4 +certifi==2025.7.14 +nvidia-cuda-nvrtc-cu12==12.4.127 +cupy-cuda12x==13.5.1 +jedi==0.19.2 +GitPython==3.1.44 +xgrammar==0.1.18 +sniffio==1.3.1 +dill==0.3.8 +python-json-logger==3.3.0 +peft==0.16.0 +python-slugify==8.0.4 +watchfiles==1.1.0 +torchaudio==2.6.0 +omegaconf==2.3.0 +interegular==0.3.3 +torchmetrics==1.7.4 +tenacity==9.1.2 +async-timeout==5.0.1 +pybase64==1.4.1 +tqdm==4.67.1 +confection==0.1.5 +absl-py==2.3.1 +six==1.17.0 +colorful==0.5.7 +xxhash==3.5.0 +regex==2024.11.6 +nvidia-cuda-runtime-cu12==12.4.127 +pip==25.1 +annotated-types==0.7.0 +opentelemetry-exporter-otlp-proto-grpc==1.26.0 +salesforce-lavis==1.0.2 +fastapi-cloud-cli==0.1.4 +av==15.0.0 +nvidia-nvjitlink-cu12==12.4.127 +lightning-utilities==0.14.3 +pytz==2025.2 +webdataset==1.0.2 +cachetools==5.5.2 +nltk==3.9.1 +prometheus-fastapi-instrumentator==7.1.0 +pexpect==4.9.0 +jsonschema-specifications==2025.4.1 +Jinja2==3.1.6 +nvidia-cusparse-cu12==12.3.1.170 +nvidia-cudnn-cu12==9.1.0.70 +decord==0.6.0 +transformers==4.52.4 +dnspython==2.7.0 +joblib==1.5.1 +kaggle==1.7.4.5 +pyasn1_modules==0.4.2 +httpcore==1.0.9 +accelerate==1.8.1 +psutil==7.0.0 +pycocotools==2.0.10 +lm-format-enforcer==0.10.11 +liger_kernel==0.6.0 +googleapis-common-protos==1.70.0 +idna==3.10 +aiohappyeyeballs==2.6.1 +numba==0.61.2 +tornado==6.5.1 +opentelemetry-semantic-conventions==0.47b0 +torchvision==0.21.0 +exceptiongroup==1.3.0 +cfgv==3.4.0 +py-cpuinfo==9.0.0 +murmurhash==1.0.13 +pillow==11.3.0 +asttokens==3.0.0 +spacy==3.8.7 +blinker==1.9.0 +llguidance==0.7.30 +fastapi==0.116.1 +python-dateutil==2.9.0.post0 +prompt_toolkit==3.0.51 +opentelemetry-api==1.26.0 +referencing==0.36.2 +Pygments==2.19.2 +mpmath==1.3.0 +thinc==8.3.6 +multidict==6.6.3 +python-magic==0.4.27 +fairscale==0.4.4 +nodeenv==1.9.1 +mathruler==0.1.0 +identify==2.6.12 +multiprocess==0.70.16 +ftfy==6.3.1 +spacy-legacy==3.0.12 +rsa==4.9.1 +cymem==2.0.11 +flash-attn==2.7.1.post1 +typing-inspection==0.4.1 +nvidia-cufile-cu12==1.11.1.6 +filelock==3.18.0 +jsonschema==4.24.0 +language_data==1.3.0 +iopath==0.1.10 +cloudpickle==3.1.1 +pre_commit==4.2.0 +python-multipart==0.0.20 +gguf==0.17.1 +toml==0.10.2 +lazy_loader==0.4 +nvidia-cusparselt-cu12==0.6.2 +nvidia-nvtx-cu12==12.4.127 +opencv-python-headless==4.12.0.88 +rouge_score==0.1.2 +portalocker==3.2.0 +diskcache==5.6.3 +pycocoevalcap==1.2 +mdurl==0.1.2 +pure_eval==0.2.3 +ray==2.47.1 +langcodes==3.5.0 +distlib==0.3.9 +pydeck==0.9.1 +traitlets==5.14.3 +aiohttp==3.12.14 +decorator==5.2.1 +opentelemetry-exporter-otlp-proto-http==1.26.0 +verl==0.3.2.dev0 +fsspec==2025.3.0 +pydantic_core==2.33.2 +matplotlib-inline==0.1.7 +httpx==0.28.1 +fastrlock==0.8.3 +zipp==3.23.0 +aiosignal==1.4.0 +uvloop==0.21.0 +opendatasets==0.1.22 +python-dotenv==1.1.1 +attrs==25.3.0 +starlette==0.47.1 +distro==1.9.0 +pyasn1==0.6.1 +plotly==6.2.0 +opencensus-context==0.1.3 +datasets==4.0.0 +bleach==6.2.0 +hf-xet==1.1.5 +pyvers==0.1.0 +rich==14.0.0 +pylatexenc==2.10 +tensordict==0.9.1 +urllib3==2.5.0 +imageio==2.37.0 +platformdirs==4.3.8 +preshed==3.0.10 +catalogue==2.0.10 +h11==0.16.0 +outlines_core==0.1.26 +wasabi==1.1.3 +proto-plus==1.26.1 +scikit-image==0.25.2 +blis==1.3.0 +fastapi-cli==0.0.8 +opentelemetry-exporter-prometheus==0.56b0 +opentelemetry-exporter-otlp==1.26.0 +compressed-tensors==0.9.3 +pyarrow==20.0.0 +opentelemetry-semantic-conventions-ai==0.4.11 +partial-json-parser==0.2.1.1.post6 +parso==0.8.4 +importlib_metadata==8.0.0 +tokenizers==0.21.2 +opentelemetry-exporter-otlp-proto-common==1.26.0 +torchdata==0.11.0 +py-spy==0.4.0 +propcache==0.3.2 +braceexpand==0.1.7 +numpy==2.2.6 +cloudpathlib==0.21.1 +email_validator==2.2.0 +srsly==2.5.1 +webencodings==0.5.1 +airportsdata==20250706 +rpds-py==0.26.0 +outlines==0.1.11 +packaging==25.0 +yarl==1.20.1 +nvidia-cuda-cupti-cu12==12.4.127 +typing_extensions==4.14.1 +pydantic==2.11.7 +xformers==0.0.29.post2 +einops==0.8.1 +grpcio==1.73.1 +setuptools==78.1.1 +httptools==0.6.4 +nvidia-nccl-cu12==2.21.5 +Deprecated==1.2.18 +ptyprocess==0.7.0 +websockets==15.0.1 +torch==2.6.0 +scipy==1.15.3 +typer==0.16.0 +pytorch-lightning==2.5.2 +virtualenv==20.31.2 +huggingface-hub==0.33.4 +contexttimer==0.3.3 +marisa-trie==1.2.1 +shellingham==1.5.4 +charset-normalizer==3.4.2 +nest-asyncio==1.6.0 +executing==2.2.0 +stack-data==0.6.3 +msgpack==1.1.1 +blake3==1.0.5 +narwhals==1.47.0 +sentencepiece==0.2.0 +llvmlite==0.44.0 +click==8.2.1 +lark==1.2.2 +google-api-core==2.25.1 +sympy==1.13.1 +wheel==0.45.1 +safetensors==0.5.3 +mistral_common==1.8.0 +triton==3.2.0 +msgspec==0.19.0 +watchdog==6.0.0 +requests==2.32.4 +spacy-loggers==1.0.5 +timm==0.4.12 +qwen-vl-utils==0.0.11 +modelscope==1.28.0 +verl==0.3.2.dev0 +jaraco.text==3.12.1 +autocommand==2.2.2 +packaging==24.2 +jaraco.context==5.3.0 +tomli==2.0.1 +typeguard==4.3.0 +zipp==3.19.2 +backports.tarfile==1.2.0 +typing_extensions==4.12.2 +jaraco.collections==5.1.0 +inflect==7.3.1 +more-itertools==10.3.0 +jaraco.functools==4.0.1 +importlib_metadata==8.0.0 +platformdirs==4.2.2 +wheel==0.45.1 diff --git a/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..bdbdb4d68b86d6cff6e24e7f3d93a9ddaafdd62c --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/files/wandb-metadata.json @@ -0,0 +1,91 @@ +{ + "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35", + "python": "CPython 3.10.0", + "startedAt": "2025-07-20T07:52:10.419577Z", + "args": [ + "--node-ip-address=10.1.4.164", + "--node-manager-port=39519", + "--object-store-name=/tmp/ray/session_2025-07-20_15-50-27_532689_108378/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2025-07-20_15-50-27_532689_108378/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=49859", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=55665", + "--gcs-address=10.1.4.164:53198", + "--session-name=session_2025-07-20_15-50-27_532689_108378", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8265", + "--cluster-id=0645951f301c3c6261e21e7675494d0f66f53c7471d7ae62b1bfef71", + "--startup-token=64", + "--worker-launch-time-ms=1752997830261", + "--node-id=e12fae92f6ce3564c51cacc15e7a9d534fa0201dde3c0167c6930682", + "--runtime-env-hash=-115784934", + "--enable-resource-isolation=false" + ], + "program": "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/PorUna-byte/PAR.git", + "commit": "b8caf406aa1699c788f0ca6e44a1769452c317db" + }, + "root": "/nas/shared/kilab/wangyujia/EasyR1-new/examples", + "host": "dsw-266702-557cd69888-g24kv", + "executable": "/root/miniconda3/envs/easyr1-new/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 64, + "gpu": "NVIDIA A800-SXM4-80GB", + "gpu_count": 8, + "disk": { + "/": { + "total": "1623302262784", + "used": "1178353664" + } + }, + "memory": { + "total": "549755813888" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-dd05c780-0a19-17fd-c584-d09f4318f680" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-2782427d-b86d-e7c7-247f-edefe39eec71" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-52755b3d-d761-c869-0d08-300873ba5f02" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-90465488-5319-9508-9e8a-b3b12918be35" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a" + } + ], + "cudaVersion": "12.1", + "writerId": "7lqwpfhgfxthb7o9vn643sdvq899ymp6" +} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..fd4fcc824f7b4b3e2c10ffdd8d5e20c51ed5d80b --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/logs/debug-internal.log @@ -0,0 +1,8 @@ +{"time":"2025-07-20T15:52:10.714178248+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-07-20T15:52:40.821708731+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers)"} +{"time":"2025-07-20T15:52:56.997058402+08:00","level":"INFO","msg":"stream: created new stream","id":"dm3qeysw"} +{"time":"2025-07-20T15:52:56.997814652+08:00","level":"INFO","msg":"stream: started","id":"dm3qeysw"} +{"time":"2025-07-20T15:52:56.997837116+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"dm3qeysw"} +{"time":"2025-07-20T15:52:56.997856956+08:00","level":"INFO","msg":"sender: started","stream_id":"dm3qeysw"} +{"time":"2025-07-20T15:52:56.997885717+08:00","level":"INFO","msg":"handler: started","stream_id":"dm3qeysw"} +{"time":"2025-07-20T15:58:24.003309141+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/dm3qeysw/file_stream\": read tcp 10.1.4.164:41206->172.67.193.61:443: read: connection timed out"} diff --git a/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..e900a277f657cee9e69343e39551e800a4e3338f --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/logs/debug.log @@ -0,0 +1,21 @@ +2025-07-20 15:52:10,498 INFO MainThread:113131 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-07-20 15:52:10,498 INFO MainThread:113131 [wandb_setup.py:_flush():80] Configure stats pid to 113131 +2025-07-20 15:52:10,498 INFO MainThread:113131 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-07-20 15:52:10,498 INFO MainThread:113131 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings +2025-07-20 15:52:10,498 INFO MainThread:113131 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-07-20 15:52:10,498 INFO MainThread:113131 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/logs/debug.log +2025-07-20 15:52:10,498 INFO MainThread:113131 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/logs/debug-internal.log +2025-07-20 15:52:10,498 INFO MainThread:113131 [wandb_init.py:init():830] calling init triggers +2025-07-20 15:52:10,498 INFO MainThread:113131 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}} +2025-07-20 15:52:10,498 INFO MainThread:113131 [wandb_init.py:init():871] starting backend +2025-07-20 15:52:10,705 INFO MainThread:113131 [wandb_init.py:init():874] sending inform_init request +2025-07-20 15:52:10,707 INFO MainThread:113131 [wandb_init.py:init():882] backend started and connected +2025-07-20 15:52:10,710 INFO MainThread:113131 [wandb_init.py:init():953] updated telemetry +2025-07-20 15:52:10,741 INFO MainThread:113131 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout +2025-07-20 15:53:00,791 INFO MainThread:113131 [wandb_init.py:init():1029] starting run threads in backend +2025-07-20 15:53:00,983 INFO MainThread:113131 [wandb_run.py:_console_start():2458] atexit reg +2025-07-20 15:53:00,983 INFO MainThread:113131 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-07-20 15:53:00,987 INFO MainThread:113131 [wandb_run.py:_redirect():2375] Wrapping output streams. +2025-07-20 15:53:00,987 INFO MainThread:113131 [wandb_run.py:_redirect():2398] Redirects installed. +2025-07-20 15:53:00,993 INFO MainThread:113131 [wandb_init.py:init():1075] run started, returning control to user process diff --git a/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/run-dm3qeysw.wandb b/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/run-dm3qeysw.wandb new file mode 100644 index 0000000000000000000000000000000000000000..3c1e47133c6a38adc3b17429053042b940e40fdd Binary files /dev/null and b/EasyR1-new/examples/wandb/run-20250720_155210-dm3qeysw/run-dm3qeysw.wandb differ diff --git a/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/files/output.log b/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..b2c680d12edde77af722c62529d379986abfb732 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/files/output.log @@ -0,0 +1,2 @@ +{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.4.164", "pid": 151637, "uuid": "68188b32248f482d861ba5b2d8e18d46", "closed": false} +Start validation... diff --git a/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/files/requirements.txt @@ -0,0 +1,295 @@ +colorama==0.4.6 +psutil==7.0.0 +setproctitle==1.2.2 +ipython==8.37.0 +gitdb==4.0.12 +smmap==5.0.2 +pyzmq==27.0.0 +wcwidth==0.2.13 +antlr4-python3-runtime==4.9.3 +streamlit==1.46.1 +opentelemetry-proto==1.26.0 +tiktoken==0.9.0 +MarkupSafe==3.0.2 +openai==1.90.0 +jiter==0.10.0 +markdown-it-py==3.0.0 +rich-toolkit==0.14.8 +PyYAML==6.0.2 +pycountry==24.6.1 +nvidia-cusolver-cu12==11.6.1.9 +codetiming==1.4.0 +text-unidecode==1.3 +aiohttp-cors==0.8.1 +prometheus_client==0.22.1 +pandas==2.3.1 +wrapt==1.17.2 +tifffile==2025.5.10 +vllm==0.8.5.post1 +google-auth==2.40.3 +nvidia-curand-cu12==10.3.5.147 +networkx==3.4.2 +protobuf==4.25.8 +depyf==0.18.0 +altair==5.5.0 +wandb==0.21.0 +opentelemetry-sdk==1.26.0 +nvidia-cufft-cu12==11.2.1.3 +frozenlist==1.7.0 +ninja==1.11.1.4 +anyio==4.9.0 +rignore==0.6.2 +pydantic-extra-types==2.10.5 +tzdata==2025.2 +orjson==3.10.18 +smart_open==7.3.0.post1 +nvidia-cublas-cu12==12.4.5.8 +astor==0.8.1 +uvicorn==0.35.0 +sentry-sdk==2.32.0 +weasel==0.4.1 +opencensus==0.11.4 +certifi==2025.7.14 +nvidia-cuda-nvrtc-cu12==12.4.127 +cupy-cuda12x==13.5.1 +jedi==0.19.2 +GitPython==3.1.44 +xgrammar==0.1.18 +sniffio==1.3.1 +dill==0.3.8 +python-json-logger==3.3.0 +peft==0.16.0 +python-slugify==8.0.4 +watchfiles==1.1.0 +torchaudio==2.6.0 +omegaconf==2.3.0 +interegular==0.3.3 +torchmetrics==1.7.4 +tenacity==9.1.2 +async-timeout==5.0.1 +pybase64==1.4.1 +tqdm==4.67.1 +confection==0.1.5 +absl-py==2.3.1 +six==1.17.0 +colorful==0.5.7 +xxhash==3.5.0 +regex==2024.11.6 +nvidia-cuda-runtime-cu12==12.4.127 +pip==25.1 +annotated-types==0.7.0 +opentelemetry-exporter-otlp-proto-grpc==1.26.0 +salesforce-lavis==1.0.2 +fastapi-cloud-cli==0.1.4 +av==15.0.0 +nvidia-nvjitlink-cu12==12.4.127 +lightning-utilities==0.14.3 +pytz==2025.2 +webdataset==1.0.2 +cachetools==5.5.2 +nltk==3.9.1 +prometheus-fastapi-instrumentator==7.1.0 +pexpect==4.9.0 +jsonschema-specifications==2025.4.1 +Jinja2==3.1.6 +nvidia-cusparse-cu12==12.3.1.170 +nvidia-cudnn-cu12==9.1.0.70 +decord==0.6.0 +transformers==4.52.4 +dnspython==2.7.0 +joblib==1.5.1 +kaggle==1.7.4.5 +pyasn1_modules==0.4.2 +httpcore==1.0.9 +accelerate==1.8.1 +psutil==7.0.0 +pycocotools==2.0.10 +lm-format-enforcer==0.10.11 +liger_kernel==0.6.0 +googleapis-common-protos==1.70.0 +idna==3.10 +aiohappyeyeballs==2.6.1 +numba==0.61.2 +tornado==6.5.1 +opentelemetry-semantic-conventions==0.47b0 +torchvision==0.21.0 +exceptiongroup==1.3.0 +cfgv==3.4.0 +py-cpuinfo==9.0.0 +murmurhash==1.0.13 +pillow==11.3.0 +asttokens==3.0.0 +spacy==3.8.7 +blinker==1.9.0 +llguidance==0.7.30 +fastapi==0.116.1 +python-dateutil==2.9.0.post0 +prompt_toolkit==3.0.51 +opentelemetry-api==1.26.0 +referencing==0.36.2 +Pygments==2.19.2 +mpmath==1.3.0 +thinc==8.3.6 +multidict==6.6.3 +python-magic==0.4.27 +fairscale==0.4.4 +nodeenv==1.9.1 +mathruler==0.1.0 +identify==2.6.12 +multiprocess==0.70.16 +ftfy==6.3.1 +spacy-legacy==3.0.12 +rsa==4.9.1 +cymem==2.0.11 +flash-attn==2.7.1.post1 +typing-inspection==0.4.1 +nvidia-cufile-cu12==1.11.1.6 +filelock==3.18.0 +jsonschema==4.24.0 +language_data==1.3.0 +iopath==0.1.10 +cloudpickle==3.1.1 +pre_commit==4.2.0 +python-multipart==0.0.20 +gguf==0.17.1 +toml==0.10.2 +lazy_loader==0.4 +nvidia-cusparselt-cu12==0.6.2 +nvidia-nvtx-cu12==12.4.127 +opencv-python-headless==4.12.0.88 +rouge_score==0.1.2 +portalocker==3.2.0 +diskcache==5.6.3 +pycocoevalcap==1.2 +mdurl==0.1.2 +pure_eval==0.2.3 +ray==2.47.1 +langcodes==3.5.0 +distlib==0.3.9 +pydeck==0.9.1 +traitlets==5.14.3 +aiohttp==3.12.14 +decorator==5.2.1 +opentelemetry-exporter-otlp-proto-http==1.26.0 +verl==0.3.2.dev0 +fsspec==2025.3.0 +pydantic_core==2.33.2 +matplotlib-inline==0.1.7 +httpx==0.28.1 +fastrlock==0.8.3 +zipp==3.23.0 +aiosignal==1.4.0 +uvloop==0.21.0 +opendatasets==0.1.22 +python-dotenv==1.1.1 +attrs==25.3.0 +starlette==0.47.1 +distro==1.9.0 +pyasn1==0.6.1 +plotly==6.2.0 +opencensus-context==0.1.3 +datasets==4.0.0 +bleach==6.2.0 +hf-xet==1.1.5 +pyvers==0.1.0 +rich==14.0.0 +pylatexenc==2.10 +tensordict==0.9.1 +urllib3==2.5.0 +imageio==2.37.0 +platformdirs==4.3.8 +preshed==3.0.10 +catalogue==2.0.10 +h11==0.16.0 +outlines_core==0.1.26 +wasabi==1.1.3 +proto-plus==1.26.1 +scikit-image==0.25.2 +blis==1.3.0 +fastapi-cli==0.0.8 +opentelemetry-exporter-prometheus==0.56b0 +opentelemetry-exporter-otlp==1.26.0 +compressed-tensors==0.9.3 +pyarrow==20.0.0 +opentelemetry-semantic-conventions-ai==0.4.11 +partial-json-parser==0.2.1.1.post6 +parso==0.8.4 +importlib_metadata==8.0.0 +tokenizers==0.21.2 +opentelemetry-exporter-otlp-proto-common==1.26.0 +torchdata==0.11.0 +py-spy==0.4.0 +propcache==0.3.2 +braceexpand==0.1.7 +numpy==2.2.6 +cloudpathlib==0.21.1 +email_validator==2.2.0 +srsly==2.5.1 +webencodings==0.5.1 +airportsdata==20250706 +rpds-py==0.26.0 +outlines==0.1.11 +packaging==25.0 +yarl==1.20.1 +nvidia-cuda-cupti-cu12==12.4.127 +typing_extensions==4.14.1 +pydantic==2.11.7 +xformers==0.0.29.post2 +einops==0.8.1 +grpcio==1.73.1 +setuptools==78.1.1 +httptools==0.6.4 +nvidia-nccl-cu12==2.21.5 +Deprecated==1.2.18 +ptyprocess==0.7.0 +websockets==15.0.1 +torch==2.6.0 +scipy==1.15.3 +typer==0.16.0 +pytorch-lightning==2.5.2 +virtualenv==20.31.2 +huggingface-hub==0.33.4 +contexttimer==0.3.3 +marisa-trie==1.2.1 +shellingham==1.5.4 +charset-normalizer==3.4.2 +nest-asyncio==1.6.0 +executing==2.2.0 +stack-data==0.6.3 +msgpack==1.1.1 +blake3==1.0.5 +narwhals==1.47.0 +sentencepiece==0.2.0 +llvmlite==0.44.0 +click==8.2.1 +lark==1.2.2 +google-api-core==2.25.1 +sympy==1.13.1 +wheel==0.45.1 +safetensors==0.5.3 +mistral_common==1.8.0 +triton==3.2.0 +msgspec==0.19.0 +watchdog==6.0.0 +requests==2.32.4 +spacy-loggers==1.0.5 +timm==0.4.12 +qwen-vl-utils==0.0.11 +modelscope==1.28.0 +verl==0.3.2.dev0 +jaraco.text==3.12.1 +autocommand==2.2.2 +packaging==24.2 +jaraco.context==5.3.0 +tomli==2.0.1 +typeguard==4.3.0 +zipp==3.19.2 +backports.tarfile==1.2.0 +typing_extensions==4.12.2 +jaraco.collections==5.1.0 +inflect==7.3.1 +more-itertools==10.3.0 +jaraco.functools==4.0.1 +importlib_metadata==8.0.0 +platformdirs==4.2.2 +wheel==0.45.1 diff --git a/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..59fda2af506801cb6cf9178edb33bd421e73dcd1 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/files/wandb-metadata.json @@ -0,0 +1,92 @@ +{ + "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35", + "python": "CPython 3.10.0", + "startedAt": "2025-07-20T09:49:59.866734Z", + "args": [ + "--node-ip-address=10.1.4.164", + "--node-manager-port=36215", + "--object-store-name=/tmp/ray/session_2025-07-20_17-48-21_273251_146876/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2025-07-20_17-48-21_273251_146876/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=64359", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=59077", + "--gcs-address=10.1.4.164:54394", + "--session-name=session_2025-07-20_17-48-21_273251_146876", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8265", + "--cluster-id=36b9d744a71ae29c87604d3621452693c69c1a8bc6bd98b6b84851b7", + "--startup-token=64", + "--worker-launch-time-ms=1753004903788", + "--node-id=c275be38bd3059a0f90ff0010a5a1c29e1de2704c979f89ff841713a", + "--runtime-env-hash=-115784934", + "--enable-resource-isolation=false" + ], + "program": "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/PorUna-byte/PAR.git", + "commit": "b8caf406aa1699c788f0ca6e44a1769452c317db" + }, + "email": "gia0603yucca@gmail.com", + "root": "/nas/shared/kilab/wangyujia/EasyR1-new/examples", + "host": "dsw-266702-557cd69888-g24kv", + "executable": "/root/miniconda3/envs/easyr1-new/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 64, + "gpu": "NVIDIA A800-SXM4-80GB", + "gpu_count": 8, + "disk": { + "/": { + "total": "1623302262784", + "used": "1178603520" + } + }, + "memory": { + "total": "549755813888" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-dd05c780-0a19-17fd-c584-d09f4318f680" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-2782427d-b86d-e7c7-247f-edefe39eec71" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-52755b3d-d761-c869-0d08-300873ba5f02" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-90465488-5319-9508-9e8a-b3b12918be35" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a" + } + ], + "cudaVersion": "12.1", + "writerId": "7dtryqcd9js64s789ba7bibty7wfdyw3" +} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..96402a170fa118b6a127924738df09f239f1d16c --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/logs/debug-internal.log @@ -0,0 +1,6 @@ +{"time":"2025-07-20T17:50:00.335547599+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-07-20T17:50:04.993221807+08:00","level":"INFO","msg":"stream: created new stream","id":"sl6d9zx4"} +{"time":"2025-07-20T17:50:04.998155148+08:00","level":"INFO","msg":"stream: started","id":"sl6d9zx4"} +{"time":"2025-07-20T17:50:04.998174332+08:00","level":"INFO","msg":"handler: started","stream_id":"sl6d9zx4"} +{"time":"2025-07-20T17:50:04.998227883+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"sl6d9zx4"} +{"time":"2025-07-20T17:50:04.998235218+08:00","level":"INFO","msg":"sender: started","stream_id":"sl6d9zx4"} diff --git a/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..4f3c5dfe168d38274f6db0db0d44caf37e1f7fa5 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/logs/debug.log @@ -0,0 +1,21 @@ +2025-07-20 17:50:00,093 INFO MainThread:151637 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-07-20 17:50:00,093 INFO MainThread:151637 [wandb_setup.py:_flush():80] Configure stats pid to 151637 +2025-07-20 17:50:00,093 INFO MainThread:151637 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-07-20 17:50:00,093 INFO MainThread:151637 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings +2025-07-20 17:50:00,093 INFO MainThread:151637 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-07-20 17:50:00,094 INFO MainThread:151637 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/logs/debug.log +2025-07-20 17:50:00,094 INFO MainThread:151637 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/logs/debug-internal.log +2025-07-20 17:50:00,094 INFO MainThread:151637 [wandb_init.py:init():830] calling init triggers +2025-07-20 17:50:00,094 INFO MainThread:151637 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}} +2025-07-20 17:50:00,094 INFO MainThread:151637 [wandb_init.py:init():871] starting backend +2025-07-20 17:50:00,302 INFO MainThread:151637 [wandb_init.py:init():874] sending inform_init request +2025-07-20 17:50:00,304 INFO MainThread:151637 [wandb_init.py:init():882] backend started and connected +2025-07-20 17:50:00,311 INFO MainThread:151637 [wandb_init.py:init():953] updated telemetry +2025-07-20 17:50:00,394 INFO MainThread:151637 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout +2025-07-20 17:50:06,920 INFO MainThread:151637 [wandb_init.py:init():1029] starting run threads in backend +2025-07-20 17:50:07,098 INFO MainThread:151637 [wandb_run.py:_console_start():2458] atexit reg +2025-07-20 17:50:07,098 INFO MainThread:151637 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-07-20 17:50:07,102 INFO MainThread:151637 [wandb_run.py:_redirect():2375] Wrapping output streams. +2025-07-20 17:50:07,102 INFO MainThread:151637 [wandb_run.py:_redirect():2398] Redirects installed. +2025-07-20 17:50:07,106 INFO MainThread:151637 [wandb_init.py:init():1075] run started, returning control to user process diff --git a/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/run-sl6d9zx4.wandb b/EasyR1-new/examples/wandb/run-20250720_174959-sl6d9zx4/run-sl6d9zx4.wandb new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/files/output.log b/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..3ff37b56d793f70783e6b035485a9140a3041271 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/files/output.log @@ -0,0 +1,2 @@ +{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.4.164", "pid": 160093, "uuid": "1e1a1ad89e384a9e982686b42345ad53", "closed": false} +Start validation... diff --git a/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/files/requirements.txt @@ -0,0 +1,295 @@ +colorama==0.4.6 +psutil==7.0.0 +setproctitle==1.2.2 +ipython==8.37.0 +gitdb==4.0.12 +smmap==5.0.2 +pyzmq==27.0.0 +wcwidth==0.2.13 +antlr4-python3-runtime==4.9.3 +streamlit==1.46.1 +opentelemetry-proto==1.26.0 +tiktoken==0.9.0 +MarkupSafe==3.0.2 +openai==1.90.0 +jiter==0.10.0 +markdown-it-py==3.0.0 +rich-toolkit==0.14.8 +PyYAML==6.0.2 +pycountry==24.6.1 +nvidia-cusolver-cu12==11.6.1.9 +codetiming==1.4.0 +text-unidecode==1.3 +aiohttp-cors==0.8.1 +prometheus_client==0.22.1 +pandas==2.3.1 +wrapt==1.17.2 +tifffile==2025.5.10 +vllm==0.8.5.post1 +google-auth==2.40.3 +nvidia-curand-cu12==10.3.5.147 +networkx==3.4.2 +protobuf==4.25.8 +depyf==0.18.0 +altair==5.5.0 +wandb==0.21.0 +opentelemetry-sdk==1.26.0 +nvidia-cufft-cu12==11.2.1.3 +frozenlist==1.7.0 +ninja==1.11.1.4 +anyio==4.9.0 +rignore==0.6.2 +pydantic-extra-types==2.10.5 +tzdata==2025.2 +orjson==3.10.18 +smart_open==7.3.0.post1 +nvidia-cublas-cu12==12.4.5.8 +astor==0.8.1 +uvicorn==0.35.0 +sentry-sdk==2.32.0 +weasel==0.4.1 +opencensus==0.11.4 +certifi==2025.7.14 +nvidia-cuda-nvrtc-cu12==12.4.127 +cupy-cuda12x==13.5.1 +jedi==0.19.2 +GitPython==3.1.44 +xgrammar==0.1.18 +sniffio==1.3.1 +dill==0.3.8 +python-json-logger==3.3.0 +peft==0.16.0 +python-slugify==8.0.4 +watchfiles==1.1.0 +torchaudio==2.6.0 +omegaconf==2.3.0 +interegular==0.3.3 +torchmetrics==1.7.4 +tenacity==9.1.2 +async-timeout==5.0.1 +pybase64==1.4.1 +tqdm==4.67.1 +confection==0.1.5 +absl-py==2.3.1 +six==1.17.0 +colorful==0.5.7 +xxhash==3.5.0 +regex==2024.11.6 +nvidia-cuda-runtime-cu12==12.4.127 +pip==25.1 +annotated-types==0.7.0 +opentelemetry-exporter-otlp-proto-grpc==1.26.0 +salesforce-lavis==1.0.2 +fastapi-cloud-cli==0.1.4 +av==15.0.0 +nvidia-nvjitlink-cu12==12.4.127 +lightning-utilities==0.14.3 +pytz==2025.2 +webdataset==1.0.2 +cachetools==5.5.2 +nltk==3.9.1 +prometheus-fastapi-instrumentator==7.1.0 +pexpect==4.9.0 +jsonschema-specifications==2025.4.1 +Jinja2==3.1.6 +nvidia-cusparse-cu12==12.3.1.170 +nvidia-cudnn-cu12==9.1.0.70 +decord==0.6.0 +transformers==4.52.4 +dnspython==2.7.0 +joblib==1.5.1 +kaggle==1.7.4.5 +pyasn1_modules==0.4.2 +httpcore==1.0.9 +accelerate==1.8.1 +psutil==7.0.0 +pycocotools==2.0.10 +lm-format-enforcer==0.10.11 +liger_kernel==0.6.0 +googleapis-common-protos==1.70.0 +idna==3.10 +aiohappyeyeballs==2.6.1 +numba==0.61.2 +tornado==6.5.1 +opentelemetry-semantic-conventions==0.47b0 +torchvision==0.21.0 +exceptiongroup==1.3.0 +cfgv==3.4.0 +py-cpuinfo==9.0.0 +murmurhash==1.0.13 +pillow==11.3.0 +asttokens==3.0.0 +spacy==3.8.7 +blinker==1.9.0 +llguidance==0.7.30 +fastapi==0.116.1 +python-dateutil==2.9.0.post0 +prompt_toolkit==3.0.51 +opentelemetry-api==1.26.0 +referencing==0.36.2 +Pygments==2.19.2 +mpmath==1.3.0 +thinc==8.3.6 +multidict==6.6.3 +python-magic==0.4.27 +fairscale==0.4.4 +nodeenv==1.9.1 +mathruler==0.1.0 +identify==2.6.12 +multiprocess==0.70.16 +ftfy==6.3.1 +spacy-legacy==3.0.12 +rsa==4.9.1 +cymem==2.0.11 +flash-attn==2.7.1.post1 +typing-inspection==0.4.1 +nvidia-cufile-cu12==1.11.1.6 +filelock==3.18.0 +jsonschema==4.24.0 +language_data==1.3.0 +iopath==0.1.10 +cloudpickle==3.1.1 +pre_commit==4.2.0 +python-multipart==0.0.20 +gguf==0.17.1 +toml==0.10.2 +lazy_loader==0.4 +nvidia-cusparselt-cu12==0.6.2 +nvidia-nvtx-cu12==12.4.127 +opencv-python-headless==4.12.0.88 +rouge_score==0.1.2 +portalocker==3.2.0 +diskcache==5.6.3 +pycocoevalcap==1.2 +mdurl==0.1.2 +pure_eval==0.2.3 +ray==2.47.1 +langcodes==3.5.0 +distlib==0.3.9 +pydeck==0.9.1 +traitlets==5.14.3 +aiohttp==3.12.14 +decorator==5.2.1 +opentelemetry-exporter-otlp-proto-http==1.26.0 +verl==0.3.2.dev0 +fsspec==2025.3.0 +pydantic_core==2.33.2 +matplotlib-inline==0.1.7 +httpx==0.28.1 +fastrlock==0.8.3 +zipp==3.23.0 +aiosignal==1.4.0 +uvloop==0.21.0 +opendatasets==0.1.22 +python-dotenv==1.1.1 +attrs==25.3.0 +starlette==0.47.1 +distro==1.9.0 +pyasn1==0.6.1 +plotly==6.2.0 +opencensus-context==0.1.3 +datasets==4.0.0 +bleach==6.2.0 +hf-xet==1.1.5 +pyvers==0.1.0 +rich==14.0.0 +pylatexenc==2.10 +tensordict==0.9.1 +urllib3==2.5.0 +imageio==2.37.0 +platformdirs==4.3.8 +preshed==3.0.10 +catalogue==2.0.10 +h11==0.16.0 +outlines_core==0.1.26 +wasabi==1.1.3 +proto-plus==1.26.1 +scikit-image==0.25.2 +blis==1.3.0 +fastapi-cli==0.0.8 +opentelemetry-exporter-prometheus==0.56b0 +opentelemetry-exporter-otlp==1.26.0 +compressed-tensors==0.9.3 +pyarrow==20.0.0 +opentelemetry-semantic-conventions-ai==0.4.11 +partial-json-parser==0.2.1.1.post6 +parso==0.8.4 +importlib_metadata==8.0.0 +tokenizers==0.21.2 +opentelemetry-exporter-otlp-proto-common==1.26.0 +torchdata==0.11.0 +py-spy==0.4.0 +propcache==0.3.2 +braceexpand==0.1.7 +numpy==2.2.6 +cloudpathlib==0.21.1 +email_validator==2.2.0 +srsly==2.5.1 +webencodings==0.5.1 +airportsdata==20250706 +rpds-py==0.26.0 +outlines==0.1.11 +packaging==25.0 +yarl==1.20.1 +nvidia-cuda-cupti-cu12==12.4.127 +typing_extensions==4.14.1 +pydantic==2.11.7 +xformers==0.0.29.post2 +einops==0.8.1 +grpcio==1.73.1 +setuptools==78.1.1 +httptools==0.6.4 +nvidia-nccl-cu12==2.21.5 +Deprecated==1.2.18 +ptyprocess==0.7.0 +websockets==15.0.1 +torch==2.6.0 +scipy==1.15.3 +typer==0.16.0 +pytorch-lightning==2.5.2 +virtualenv==20.31.2 +huggingface-hub==0.33.4 +contexttimer==0.3.3 +marisa-trie==1.2.1 +shellingham==1.5.4 +charset-normalizer==3.4.2 +nest-asyncio==1.6.0 +executing==2.2.0 +stack-data==0.6.3 +msgpack==1.1.1 +blake3==1.0.5 +narwhals==1.47.0 +sentencepiece==0.2.0 +llvmlite==0.44.0 +click==8.2.1 +lark==1.2.2 +google-api-core==2.25.1 +sympy==1.13.1 +wheel==0.45.1 +safetensors==0.5.3 +mistral_common==1.8.0 +triton==3.2.0 +msgspec==0.19.0 +watchdog==6.0.0 +requests==2.32.4 +spacy-loggers==1.0.5 +timm==0.4.12 +qwen-vl-utils==0.0.11 +modelscope==1.28.0 +verl==0.3.2.dev0 +jaraco.text==3.12.1 +autocommand==2.2.2 +packaging==24.2 +jaraco.context==5.3.0 +tomli==2.0.1 +typeguard==4.3.0 +zipp==3.19.2 +backports.tarfile==1.2.0 +typing_extensions==4.12.2 +jaraco.collections==5.1.0 +inflect==7.3.1 +more-itertools==10.3.0 +jaraco.functools==4.0.1 +importlib_metadata==8.0.0 +platformdirs==4.2.2 +wheel==0.45.1 diff --git a/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..63cf50ee5c2cc5d556d9c3359bb56324bd4d43a6 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/files/wandb-metadata.json @@ -0,0 +1,91 @@ +{ + "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35", + "python": "CPython 3.10.0", + "startedAt": "2025-07-20T09:54:52.474738Z", + "args": [ + "--node-ip-address=10.1.4.164", + "--node-manager-port=32789", + "--object-store-name=/tmp/ray/session_2025-07-20_17-53-07_376621_155325/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2025-07-20_17-53-07_376621_155325/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=58284", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=36321", + "--gcs-address=10.1.4.164:59631", + "--session-name=session_2025-07-20_17-53-07_376621_155325", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8265", + "--cluster-id=dbea8f527f9086b064609c5db5aed7aea24688b11023eee9c47c3537", + "--startup-token=64", + "--worker-launch-time-ms=1753005189877", + "--node-id=7c8c28532c4a6185133a03080497308e2d83d07893be32110c9f7adb", + "--runtime-env-hash=-115784934", + "--enable-resource-isolation=false" + ], + "program": "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/PorUna-byte/PAR.git", + "commit": "b8caf406aa1699c788f0ca6e44a1769452c317db" + }, + "root": "/nas/shared/kilab/wangyujia/EasyR1-new/examples", + "host": "dsw-266702-557cd69888-g24kv", + "executable": "/root/miniconda3/envs/easyr1-new/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 64, + "gpu": "NVIDIA A800-SXM4-80GB", + "gpu_count": 8, + "disk": { + "/": { + "total": "1623302262784", + "used": "1178615808" + } + }, + "memory": { + "total": "549755813888" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-dd05c780-0a19-17fd-c584-d09f4318f680" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-2782427d-b86d-e7c7-247f-edefe39eec71" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-52755b3d-d761-c869-0d08-300873ba5f02" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-90465488-5319-9508-9e8a-b3b12918be35" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a" + } + ], + "cudaVersion": "12.1", + "writerId": "hnz4fmdpp9zbobdudzhrp438wu35xx1q" +} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..dcbff365ec41d12cd4edf278ab15daba9e9746f2 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/logs/debug-internal.log @@ -0,0 +1,7 @@ +{"time":"2025-07-20T17:54:52.871556297+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-07-20T17:55:14.368699132+08:00","level":"INFO","msg":"stream: created new stream","id":"u3x8mk80"} +{"time":"2025-07-20T17:55:14.369557756+08:00","level":"INFO","msg":"stream: started","id":"u3x8mk80"} +{"time":"2025-07-20T17:55:14.369581991+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"u3x8mk80"} +{"time":"2025-07-20T17:55:14.369590948+08:00","level":"INFO","msg":"sender: started","stream_id":"u3x8mk80"} +{"time":"2025-07-20T17:55:14.369632862+08:00","level":"INFO","msg":"handler: started","stream_id":"u3x8mk80"} +{"time":"2025-07-20T17:55:39.968202666+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/u3x8mk80/file_stream\": unexpected EOF"} diff --git a/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..76200ae1c4fed5f840c44ccf7c91d06fc37a685b --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/logs/debug.log @@ -0,0 +1,21 @@ +2025-07-20 17:54:52,631 INFO MainThread:160093 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-07-20 17:54:52,632 INFO MainThread:160093 [wandb_setup.py:_flush():80] Configure stats pid to 160093 +2025-07-20 17:54:52,632 INFO MainThread:160093 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-07-20 17:54:52,632 INFO MainThread:160093 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings +2025-07-20 17:54:52,632 INFO MainThread:160093 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-07-20 17:54:52,632 INFO MainThread:160093 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/logs/debug.log +2025-07-20 17:54:52,632 INFO MainThread:160093 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/logs/debug-internal.log +2025-07-20 17:54:52,632 INFO MainThread:160093 [wandb_init.py:init():830] calling init triggers +2025-07-20 17:54:52,632 INFO MainThread:160093 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}} +2025-07-20 17:54:52,632 INFO MainThread:160093 [wandb_init.py:init():871] starting backend +2025-07-20 17:54:52,845 INFO MainThread:160093 [wandb_init.py:init():874] sending inform_init request +2025-07-20 17:54:52,846 INFO MainThread:160093 [wandb_init.py:init():882] backend started and connected +2025-07-20 17:54:52,856 INFO MainThread:160093 [wandb_init.py:init():953] updated telemetry +2025-07-20 17:54:52,931 INFO MainThread:160093 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout +2025-07-20 17:55:16,136 INFO MainThread:160093 [wandb_init.py:init():1029] starting run threads in backend +2025-07-20 17:55:16,309 INFO MainThread:160093 [wandb_run.py:_console_start():2458] atexit reg +2025-07-20 17:55:16,310 INFO MainThread:160093 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-07-20 17:55:16,368 INFO MainThread:160093 [wandb_run.py:_redirect():2375] Wrapping output streams. +2025-07-20 17:55:16,377 INFO MainThread:160093 [wandb_run.py:_redirect():2398] Redirects installed. +2025-07-20 17:55:16,381 INFO MainThread:160093 [wandb_init.py:init():1075] run started, returning control to user process diff --git a/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/run-u3x8mk80.wandb b/EasyR1-new/examples/wandb/run-20250720_175452-u3x8mk80/run-u3x8mk80.wandb new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/files/output.log b/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..123ab12e0b9dd7525db98404c151e33db9aa43ff --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/files/output.log @@ -0,0 +1,2 @@ +{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 72, "unit": "it", "ip": "10.1.4.164", "pid": 169088, "uuid": "ab4049b6358b42718ec3f59ff589dde1", "closed": false} +Start validation... diff --git a/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/files/requirements.txt @@ -0,0 +1,295 @@ +colorama==0.4.6 +psutil==7.0.0 +setproctitle==1.2.2 +ipython==8.37.0 +gitdb==4.0.12 +smmap==5.0.2 +pyzmq==27.0.0 +wcwidth==0.2.13 +antlr4-python3-runtime==4.9.3 +streamlit==1.46.1 +opentelemetry-proto==1.26.0 +tiktoken==0.9.0 +MarkupSafe==3.0.2 +openai==1.90.0 +jiter==0.10.0 +markdown-it-py==3.0.0 +rich-toolkit==0.14.8 +PyYAML==6.0.2 +pycountry==24.6.1 +nvidia-cusolver-cu12==11.6.1.9 +codetiming==1.4.0 +text-unidecode==1.3 +aiohttp-cors==0.8.1 +prometheus_client==0.22.1 +pandas==2.3.1 +wrapt==1.17.2 +tifffile==2025.5.10 +vllm==0.8.5.post1 +google-auth==2.40.3 +nvidia-curand-cu12==10.3.5.147 +networkx==3.4.2 +protobuf==4.25.8 +depyf==0.18.0 +altair==5.5.0 +wandb==0.21.0 +opentelemetry-sdk==1.26.0 +nvidia-cufft-cu12==11.2.1.3 +frozenlist==1.7.0 +ninja==1.11.1.4 +anyio==4.9.0 +rignore==0.6.2 +pydantic-extra-types==2.10.5 +tzdata==2025.2 +orjson==3.10.18 +smart_open==7.3.0.post1 +nvidia-cublas-cu12==12.4.5.8 +astor==0.8.1 +uvicorn==0.35.0 +sentry-sdk==2.32.0 +weasel==0.4.1 +opencensus==0.11.4 +certifi==2025.7.14 +nvidia-cuda-nvrtc-cu12==12.4.127 +cupy-cuda12x==13.5.1 +jedi==0.19.2 +GitPython==3.1.44 +xgrammar==0.1.18 +sniffio==1.3.1 +dill==0.3.8 +python-json-logger==3.3.0 +peft==0.16.0 +python-slugify==8.0.4 +watchfiles==1.1.0 +torchaudio==2.6.0 +omegaconf==2.3.0 +interegular==0.3.3 +torchmetrics==1.7.4 +tenacity==9.1.2 +async-timeout==5.0.1 +pybase64==1.4.1 +tqdm==4.67.1 +confection==0.1.5 +absl-py==2.3.1 +six==1.17.0 +colorful==0.5.7 +xxhash==3.5.0 +regex==2024.11.6 +nvidia-cuda-runtime-cu12==12.4.127 +pip==25.1 +annotated-types==0.7.0 +opentelemetry-exporter-otlp-proto-grpc==1.26.0 +salesforce-lavis==1.0.2 +fastapi-cloud-cli==0.1.4 +av==15.0.0 +nvidia-nvjitlink-cu12==12.4.127 +lightning-utilities==0.14.3 +pytz==2025.2 +webdataset==1.0.2 +cachetools==5.5.2 +nltk==3.9.1 +prometheus-fastapi-instrumentator==7.1.0 +pexpect==4.9.0 +jsonschema-specifications==2025.4.1 +Jinja2==3.1.6 +nvidia-cusparse-cu12==12.3.1.170 +nvidia-cudnn-cu12==9.1.0.70 +decord==0.6.0 +transformers==4.52.4 +dnspython==2.7.0 +joblib==1.5.1 +kaggle==1.7.4.5 +pyasn1_modules==0.4.2 +httpcore==1.0.9 +accelerate==1.8.1 +psutil==7.0.0 +pycocotools==2.0.10 +lm-format-enforcer==0.10.11 +liger_kernel==0.6.0 +googleapis-common-protos==1.70.0 +idna==3.10 +aiohappyeyeballs==2.6.1 +numba==0.61.2 +tornado==6.5.1 +opentelemetry-semantic-conventions==0.47b0 +torchvision==0.21.0 +exceptiongroup==1.3.0 +cfgv==3.4.0 +py-cpuinfo==9.0.0 +murmurhash==1.0.13 +pillow==11.3.0 +asttokens==3.0.0 +spacy==3.8.7 +blinker==1.9.0 +llguidance==0.7.30 +fastapi==0.116.1 +python-dateutil==2.9.0.post0 +prompt_toolkit==3.0.51 +opentelemetry-api==1.26.0 +referencing==0.36.2 +Pygments==2.19.2 +mpmath==1.3.0 +thinc==8.3.6 +multidict==6.6.3 +python-magic==0.4.27 +fairscale==0.4.4 +nodeenv==1.9.1 +mathruler==0.1.0 +identify==2.6.12 +multiprocess==0.70.16 +ftfy==6.3.1 +spacy-legacy==3.0.12 +rsa==4.9.1 +cymem==2.0.11 +flash-attn==2.7.1.post1 +typing-inspection==0.4.1 +nvidia-cufile-cu12==1.11.1.6 +filelock==3.18.0 +jsonschema==4.24.0 +language_data==1.3.0 +iopath==0.1.10 +cloudpickle==3.1.1 +pre_commit==4.2.0 +python-multipart==0.0.20 +gguf==0.17.1 +toml==0.10.2 +lazy_loader==0.4 +nvidia-cusparselt-cu12==0.6.2 +nvidia-nvtx-cu12==12.4.127 +opencv-python-headless==4.12.0.88 +rouge_score==0.1.2 +portalocker==3.2.0 +diskcache==5.6.3 +pycocoevalcap==1.2 +mdurl==0.1.2 +pure_eval==0.2.3 +ray==2.47.1 +langcodes==3.5.0 +distlib==0.3.9 +pydeck==0.9.1 +traitlets==5.14.3 +aiohttp==3.12.14 +decorator==5.2.1 +opentelemetry-exporter-otlp-proto-http==1.26.0 +verl==0.3.2.dev0 +fsspec==2025.3.0 +pydantic_core==2.33.2 +matplotlib-inline==0.1.7 +httpx==0.28.1 +fastrlock==0.8.3 +zipp==3.23.0 +aiosignal==1.4.0 +uvloop==0.21.0 +opendatasets==0.1.22 +python-dotenv==1.1.1 +attrs==25.3.0 +starlette==0.47.1 +distro==1.9.0 +pyasn1==0.6.1 +plotly==6.2.0 +opencensus-context==0.1.3 +datasets==4.0.0 +bleach==6.2.0 +hf-xet==1.1.5 +pyvers==0.1.0 +rich==14.0.0 +pylatexenc==2.10 +tensordict==0.9.1 +urllib3==2.5.0 +imageio==2.37.0 +platformdirs==4.3.8 +preshed==3.0.10 +catalogue==2.0.10 +h11==0.16.0 +outlines_core==0.1.26 +wasabi==1.1.3 +proto-plus==1.26.1 +scikit-image==0.25.2 +blis==1.3.0 +fastapi-cli==0.0.8 +opentelemetry-exporter-prometheus==0.56b0 +opentelemetry-exporter-otlp==1.26.0 +compressed-tensors==0.9.3 +pyarrow==20.0.0 +opentelemetry-semantic-conventions-ai==0.4.11 +partial-json-parser==0.2.1.1.post6 +parso==0.8.4 +importlib_metadata==8.0.0 +tokenizers==0.21.2 +opentelemetry-exporter-otlp-proto-common==1.26.0 +torchdata==0.11.0 +py-spy==0.4.0 +propcache==0.3.2 +braceexpand==0.1.7 +numpy==2.2.6 +cloudpathlib==0.21.1 +email_validator==2.2.0 +srsly==2.5.1 +webencodings==0.5.1 +airportsdata==20250706 +rpds-py==0.26.0 +outlines==0.1.11 +packaging==25.0 +yarl==1.20.1 +nvidia-cuda-cupti-cu12==12.4.127 +typing_extensions==4.14.1 +pydantic==2.11.7 +xformers==0.0.29.post2 +einops==0.8.1 +grpcio==1.73.1 +setuptools==78.1.1 +httptools==0.6.4 +nvidia-nccl-cu12==2.21.5 +Deprecated==1.2.18 +ptyprocess==0.7.0 +websockets==15.0.1 +torch==2.6.0 +scipy==1.15.3 +typer==0.16.0 +pytorch-lightning==2.5.2 +virtualenv==20.31.2 +huggingface-hub==0.33.4 +contexttimer==0.3.3 +marisa-trie==1.2.1 +shellingham==1.5.4 +charset-normalizer==3.4.2 +nest-asyncio==1.6.0 +executing==2.2.0 +stack-data==0.6.3 +msgpack==1.1.1 +blake3==1.0.5 +narwhals==1.47.0 +sentencepiece==0.2.0 +llvmlite==0.44.0 +click==8.2.1 +lark==1.2.2 +google-api-core==2.25.1 +sympy==1.13.1 +wheel==0.45.1 +safetensors==0.5.3 +mistral_common==1.8.0 +triton==3.2.0 +msgspec==0.19.0 +watchdog==6.0.0 +requests==2.32.4 +spacy-loggers==1.0.5 +timm==0.4.12 +qwen-vl-utils==0.0.11 +modelscope==1.28.0 +verl==0.3.2.dev0 +jaraco.text==3.12.1 +autocommand==2.2.2 +packaging==24.2 +jaraco.context==5.3.0 +tomli==2.0.1 +typeguard==4.3.0 +zipp==3.19.2 +backports.tarfile==1.2.0 +typing_extensions==4.12.2 +jaraco.collections==5.1.0 +inflect==7.3.1 +more-itertools==10.3.0 +jaraco.functools==4.0.1 +importlib_metadata==8.0.0 +platformdirs==4.2.2 +wheel==0.45.1 diff --git a/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..5c2fcaebb44b064a31ae45e94c760ef691a3ebf5 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/files/wandb-metadata.json @@ -0,0 +1,91 @@ +{ + "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35", + "python": "CPython 3.10.0", + "startedAt": "2025-07-20T10:01:35.121306Z", + "args": [ + "--node-ip-address=10.1.4.164", + "--node-manager-port=41203", + "--object-store-name=/tmp/ray/session_2025-07-20_17-59-50_542822_164318/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2025-07-20_17-59-50_542822_164318/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=61732", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=53515", + "--gcs-address=10.1.4.164:57575", + "--session-name=session_2025-07-20_17-59-50_542822_164318", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8265", + "--cluster-id=c3d4af086c5d1d1af37df7b0ae84b5563d0811c6b79f2452a68d02fb", + "--startup-token=64", + "--worker-launch-time-ms=1753005592994", + "--node-id=89f3cd99cf596d5dba4bc8d8f07acb16cf7c97aeb23632bbb2b637f1", + "--runtime-env-hash=-115784934", + "--enable-resource-isolation=false" + ], + "program": "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/PorUna-byte/PAR.git", + "commit": "b8caf406aa1699c788f0ca6e44a1769452c317db" + }, + "root": "/nas/shared/kilab/wangyujia/EasyR1-new/examples", + "host": "dsw-266702-557cd69888-g24kv", + "executable": "/root/miniconda3/envs/easyr1-new/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 64, + "gpu": "NVIDIA A800-SXM4-80GB", + "gpu_count": 8, + "disk": { + "/": { + "total": "1623302262784", + "used": "1178652672" + } + }, + "memory": { + "total": "549755813888" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-dd05c780-0a19-17fd-c584-d09f4318f680" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-2782427d-b86d-e7c7-247f-edefe39eec71" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-52755b3d-d761-c869-0d08-300873ba5f02" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-90465488-5319-9508-9e8a-b3b12918be35" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a" + } + ], + "cudaVersion": "12.1", + "writerId": "fa8gi1wr876xad5m76att7408yfvwm84" +} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..d3f629b61c773a20c0e243b9f8e3bc10b388ae37 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/logs/debug-internal.log @@ -0,0 +1,8 @@ +{"time":"2025-07-20T18:01:35.480814623+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-07-20T18:02:05.589240176+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"} +{"time":"2025-07-20T18:02:13.132238778+08:00","level":"INFO","msg":"stream: created new stream","id":"m27ujde6"} +{"time":"2025-07-20T18:02:13.133859178+08:00","level":"INFO","msg":"handler: started","stream_id":"m27ujde6"} +{"time":"2025-07-20T18:02:13.134071767+08:00","level":"INFO","msg":"stream: started","id":"m27ujde6"} +{"time":"2025-07-20T18:02:13.134098714+08:00","level":"INFO","msg":"sender: started","stream_id":"m27ujde6"} +{"time":"2025-07-20T18:02:13.134089006+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"m27ujde6"} +{"time":"2025-07-20T18:08:58.115407689+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/easy_r1/m27ujde6/file_stream\": read tcp 10.1.4.164:48990->172.67.193.61:443: read: connection timed out"} diff --git a/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..e02b522c5ef29ff8c18fbb45f45e2324d1486386 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/logs/debug.log @@ -0,0 +1,21 @@ +2025-07-20 18:01:35,258 INFO MainThread:169088 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-07-20 18:01:35,258 INFO MainThread:169088 [wandb_setup.py:_flush():80] Configure stats pid to 169088 +2025-07-20 18:01:35,258 INFO MainThread:169088 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-07-20 18:01:35,258 INFO MainThread:169088 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings +2025-07-20 18:01:35,258 INFO MainThread:169088 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-07-20 18:01:35,258 INFO MainThread:169088 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/logs/debug.log +2025-07-20 18:01:35,258 INFO MainThread:169088 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/logs/debug-internal.log +2025-07-20 18:01:35,258 INFO MainThread:169088 [wandb_init.py:init():830] calling init triggers +2025-07-20 18:01:35,258 INFO MainThread:169088 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/deeplocmulti3_valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 72}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}} +2025-07-20 18:01:35,259 INFO MainThread:169088 [wandb_init.py:init():871] starting backend +2025-07-20 18:01:35,468 INFO MainThread:169088 [wandb_init.py:init():874] sending inform_init request +2025-07-20 18:01:35,470 INFO MainThread:169088 [wandb_init.py:init():882] backend started and connected +2025-07-20 18:01:35,475 INFO MainThread:169088 [wandb_init.py:init():953] updated telemetry +2025-07-20 18:01:35,528 INFO MainThread:169088 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout +2025-07-20 18:02:16,165 INFO MainThread:169088 [wandb_init.py:init():1029] starting run threads in backend +2025-07-20 18:02:16,369 INFO MainThread:169088 [wandb_run.py:_console_start():2458] atexit reg +2025-07-20 18:02:16,369 INFO MainThread:169088 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-07-20 18:02:16,373 INFO MainThread:169088 [wandb_run.py:_redirect():2375] Wrapping output streams. +2025-07-20 18:02:16,373 INFO MainThread:169088 [wandb_run.py:_redirect():2398] Redirects installed. +2025-07-20 18:02:16,376 INFO MainThread:169088 [wandb_init.py:init():1075] run started, returning control to user process diff --git a/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/run-m27ujde6.wandb b/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/run-m27ujde6.wandb new file mode 100644 index 0000000000000000000000000000000000000000..827a007cf3ac5a9a3ccbf066a7d52b6a49f801cc Binary files /dev/null and b/EasyR1-new/examples/wandb/run-20250720_180135-m27ujde6/run-m27ujde6.wandb differ diff --git a/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/files/output.log b/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..cdb5545831b8fb1c8c626d8d0857a3e5e10d2ea2 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/files/output.log @@ -0,0 +1,2 @@ +{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 1, "unit": "it", "ip": "10.1.4.164", "pid": 179677, "uuid": "ed6b32be02f24016a6d7ea58bd716567", "closed": false} +Start validation... diff --git a/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/files/requirements.txt @@ -0,0 +1,295 @@ +colorama==0.4.6 +psutil==7.0.0 +setproctitle==1.2.2 +ipython==8.37.0 +gitdb==4.0.12 +smmap==5.0.2 +pyzmq==27.0.0 +wcwidth==0.2.13 +antlr4-python3-runtime==4.9.3 +streamlit==1.46.1 +opentelemetry-proto==1.26.0 +tiktoken==0.9.0 +MarkupSafe==3.0.2 +openai==1.90.0 +jiter==0.10.0 +markdown-it-py==3.0.0 +rich-toolkit==0.14.8 +PyYAML==6.0.2 +pycountry==24.6.1 +nvidia-cusolver-cu12==11.6.1.9 +codetiming==1.4.0 +text-unidecode==1.3 +aiohttp-cors==0.8.1 +prometheus_client==0.22.1 +pandas==2.3.1 +wrapt==1.17.2 +tifffile==2025.5.10 +vllm==0.8.5.post1 +google-auth==2.40.3 +nvidia-curand-cu12==10.3.5.147 +networkx==3.4.2 +protobuf==4.25.8 +depyf==0.18.0 +altair==5.5.0 +wandb==0.21.0 +opentelemetry-sdk==1.26.0 +nvidia-cufft-cu12==11.2.1.3 +frozenlist==1.7.0 +ninja==1.11.1.4 +anyio==4.9.0 +rignore==0.6.2 +pydantic-extra-types==2.10.5 +tzdata==2025.2 +orjson==3.10.18 +smart_open==7.3.0.post1 +nvidia-cublas-cu12==12.4.5.8 +astor==0.8.1 +uvicorn==0.35.0 +sentry-sdk==2.32.0 +weasel==0.4.1 +opencensus==0.11.4 +certifi==2025.7.14 +nvidia-cuda-nvrtc-cu12==12.4.127 +cupy-cuda12x==13.5.1 +jedi==0.19.2 +GitPython==3.1.44 +xgrammar==0.1.18 +sniffio==1.3.1 +dill==0.3.8 +python-json-logger==3.3.0 +peft==0.16.0 +python-slugify==8.0.4 +watchfiles==1.1.0 +torchaudio==2.6.0 +omegaconf==2.3.0 +interegular==0.3.3 +torchmetrics==1.7.4 +tenacity==9.1.2 +async-timeout==5.0.1 +pybase64==1.4.1 +tqdm==4.67.1 +confection==0.1.5 +absl-py==2.3.1 +six==1.17.0 +colorful==0.5.7 +xxhash==3.5.0 +regex==2024.11.6 +nvidia-cuda-runtime-cu12==12.4.127 +pip==25.1 +annotated-types==0.7.0 +opentelemetry-exporter-otlp-proto-grpc==1.26.0 +salesforce-lavis==1.0.2 +fastapi-cloud-cli==0.1.4 +av==15.0.0 +nvidia-nvjitlink-cu12==12.4.127 +lightning-utilities==0.14.3 +pytz==2025.2 +webdataset==1.0.2 +cachetools==5.5.2 +nltk==3.9.1 +prometheus-fastapi-instrumentator==7.1.0 +pexpect==4.9.0 +jsonschema-specifications==2025.4.1 +Jinja2==3.1.6 +nvidia-cusparse-cu12==12.3.1.170 +nvidia-cudnn-cu12==9.1.0.70 +decord==0.6.0 +transformers==4.52.4 +dnspython==2.7.0 +joblib==1.5.1 +kaggle==1.7.4.5 +pyasn1_modules==0.4.2 +httpcore==1.0.9 +accelerate==1.8.1 +psutil==7.0.0 +pycocotools==2.0.10 +lm-format-enforcer==0.10.11 +liger_kernel==0.6.0 +googleapis-common-protos==1.70.0 +idna==3.10 +aiohappyeyeballs==2.6.1 +numba==0.61.2 +tornado==6.5.1 +opentelemetry-semantic-conventions==0.47b0 +torchvision==0.21.0 +exceptiongroup==1.3.0 +cfgv==3.4.0 +py-cpuinfo==9.0.0 +murmurhash==1.0.13 +pillow==11.3.0 +asttokens==3.0.0 +spacy==3.8.7 +blinker==1.9.0 +llguidance==0.7.30 +fastapi==0.116.1 +python-dateutil==2.9.0.post0 +prompt_toolkit==3.0.51 +opentelemetry-api==1.26.0 +referencing==0.36.2 +Pygments==2.19.2 +mpmath==1.3.0 +thinc==8.3.6 +multidict==6.6.3 +python-magic==0.4.27 +fairscale==0.4.4 +nodeenv==1.9.1 +mathruler==0.1.0 +identify==2.6.12 +multiprocess==0.70.16 +ftfy==6.3.1 +spacy-legacy==3.0.12 +rsa==4.9.1 +cymem==2.0.11 +flash-attn==2.7.1.post1 +typing-inspection==0.4.1 +nvidia-cufile-cu12==1.11.1.6 +filelock==3.18.0 +jsonschema==4.24.0 +language_data==1.3.0 +iopath==0.1.10 +cloudpickle==3.1.1 +pre_commit==4.2.0 +python-multipart==0.0.20 +gguf==0.17.1 +toml==0.10.2 +lazy_loader==0.4 +nvidia-cusparselt-cu12==0.6.2 +nvidia-nvtx-cu12==12.4.127 +opencv-python-headless==4.12.0.88 +rouge_score==0.1.2 +portalocker==3.2.0 +diskcache==5.6.3 +pycocoevalcap==1.2 +mdurl==0.1.2 +pure_eval==0.2.3 +ray==2.47.1 +langcodes==3.5.0 +distlib==0.3.9 +pydeck==0.9.1 +traitlets==5.14.3 +aiohttp==3.12.14 +decorator==5.2.1 +opentelemetry-exporter-otlp-proto-http==1.26.0 +verl==0.3.2.dev0 +fsspec==2025.3.0 +pydantic_core==2.33.2 +matplotlib-inline==0.1.7 +httpx==0.28.1 +fastrlock==0.8.3 +zipp==3.23.0 +aiosignal==1.4.0 +uvloop==0.21.0 +opendatasets==0.1.22 +python-dotenv==1.1.1 +attrs==25.3.0 +starlette==0.47.1 +distro==1.9.0 +pyasn1==0.6.1 +plotly==6.2.0 +opencensus-context==0.1.3 +datasets==4.0.0 +bleach==6.2.0 +hf-xet==1.1.5 +pyvers==0.1.0 +rich==14.0.0 +pylatexenc==2.10 +tensordict==0.9.1 +urllib3==2.5.0 +imageio==2.37.0 +platformdirs==4.3.8 +preshed==3.0.10 +catalogue==2.0.10 +h11==0.16.0 +outlines_core==0.1.26 +wasabi==1.1.3 +proto-plus==1.26.1 +scikit-image==0.25.2 +blis==1.3.0 +fastapi-cli==0.0.8 +opentelemetry-exporter-prometheus==0.56b0 +opentelemetry-exporter-otlp==1.26.0 +compressed-tensors==0.9.3 +pyarrow==20.0.0 +opentelemetry-semantic-conventions-ai==0.4.11 +partial-json-parser==0.2.1.1.post6 +parso==0.8.4 +importlib_metadata==8.0.0 +tokenizers==0.21.2 +opentelemetry-exporter-otlp-proto-common==1.26.0 +torchdata==0.11.0 +py-spy==0.4.0 +propcache==0.3.2 +braceexpand==0.1.7 +numpy==2.2.6 +cloudpathlib==0.21.1 +email_validator==2.2.0 +srsly==2.5.1 +webencodings==0.5.1 +airportsdata==20250706 +rpds-py==0.26.0 +outlines==0.1.11 +packaging==25.0 +yarl==1.20.1 +nvidia-cuda-cupti-cu12==12.4.127 +typing_extensions==4.14.1 +pydantic==2.11.7 +xformers==0.0.29.post2 +einops==0.8.1 +grpcio==1.73.1 +setuptools==78.1.1 +httptools==0.6.4 +nvidia-nccl-cu12==2.21.5 +Deprecated==1.2.18 +ptyprocess==0.7.0 +websockets==15.0.1 +torch==2.6.0 +scipy==1.15.3 +typer==0.16.0 +pytorch-lightning==2.5.2 +virtualenv==20.31.2 +huggingface-hub==0.33.4 +contexttimer==0.3.3 +marisa-trie==1.2.1 +shellingham==1.5.4 +charset-normalizer==3.4.2 +nest-asyncio==1.6.0 +executing==2.2.0 +stack-data==0.6.3 +msgpack==1.1.1 +blake3==1.0.5 +narwhals==1.47.0 +sentencepiece==0.2.0 +llvmlite==0.44.0 +click==8.2.1 +lark==1.2.2 +google-api-core==2.25.1 +sympy==1.13.1 +wheel==0.45.1 +safetensors==0.5.3 +mistral_common==1.8.0 +triton==3.2.0 +msgspec==0.19.0 +watchdog==6.0.0 +requests==2.32.4 +spacy-loggers==1.0.5 +timm==0.4.12 +qwen-vl-utils==0.0.11 +modelscope==1.28.0 +verl==0.3.2.dev0 +jaraco.text==3.12.1 +autocommand==2.2.2 +packaging==24.2 +jaraco.context==5.3.0 +tomli==2.0.1 +typeguard==4.3.0 +zipp==3.19.2 +backports.tarfile==1.2.0 +typing_extensions==4.12.2 +jaraco.collections==5.1.0 +inflect==7.3.1 +more-itertools==10.3.0 +jaraco.functools==4.0.1 +importlib_metadata==8.0.0 +platformdirs==4.2.2 +wheel==0.45.1 diff --git a/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..05bf02ba431928cb3d2d81014ed15de3d114f7f7 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/files/wandb-metadata.json @@ -0,0 +1,92 @@ +{ + "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35", + "python": "CPython 3.10.0", + "startedAt": "2025-07-20T10:12:46.823746Z", + "args": [ + "--node-ip-address=10.1.4.164", + "--node-manager-port=46025", + "--object-store-name=/tmp/ray/session_2025-07-20_18-09-53_125496_174925/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2025-07-20_18-09-53_125496_174925/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=41545", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=39916", + "--gcs-address=10.1.4.164:60493", + "--session-name=session_2025-07-20_18-09-53_125496_174925", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8265", + "--cluster-id=2b81a5b7050804e85740b145be89d8f47b8b557be40acba82e5226d9", + "--startup-token=64", + "--worker-launch-time-ms=1753006195919", + "--node-id=812b9ba1b07bc192ee99fedd03ebc5d4bcab96a2124b55aca302465b", + "--runtime-env-hash=-115784934", + "--enable-resource-isolation=false" + ], + "program": "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/PorUna-byte/PAR.git", + "commit": "b8caf406aa1699c788f0ca6e44a1769452c317db" + }, + "email": "gia0603yucca@gmail.com", + "root": "/nas/shared/kilab/wangyujia/EasyR1-new/examples", + "host": "dsw-266702-557cd69888-g24kv", + "executable": "/root/miniconda3/envs/easyr1-new/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 64, + "gpu": "NVIDIA A800-SXM4-80GB", + "gpu_count": 8, + "disk": { + "/": { + "total": "1623302262784", + "used": "1179250688" + } + }, + "memory": { + "total": "549755813888" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-dd05c780-0a19-17fd-c584-d09f4318f680" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-2782427d-b86d-e7c7-247f-edefe39eec71" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-52755b3d-d761-c869-0d08-300873ba5f02" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-90465488-5319-9508-9e8a-b3b12918be35" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a" + } + ], + "cudaVersion": "12.1", + "writerId": "jca15im09rvgmqcdbyu8ijz6wttj46fx" +} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..2d2cec0c5dd5de8d44b8f9cf6aed426d6deeaae1 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/logs/debug-internal.log @@ -0,0 +1,6 @@ +{"time":"2025-07-20T18:12:47.187575955+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-07-20T18:12:50.614283993+08:00","level":"INFO","msg":"stream: created new stream","id":"89mv3lt4"} +{"time":"2025-07-20T18:12:50.615230535+08:00","level":"INFO","msg":"stream: started","id":"89mv3lt4"} +{"time":"2025-07-20T18:12:50.615255588+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"89mv3lt4"} +{"time":"2025-07-20T18:12:50.615279379+08:00","level":"INFO","msg":"sender: started","stream_id":"89mv3lt4"} +{"time":"2025-07-20T18:12:50.615332536+08:00","level":"INFO","msg":"handler: started","stream_id":"89mv3lt4"} diff --git a/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..8b7506e1aeea69eace0c6173664d7162688ba3e4 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/logs/debug.log @@ -0,0 +1,21 @@ +2025-07-20 18:12:46,958 INFO MainThread:179677 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-07-20 18:12:46,958 INFO MainThread:179677 [wandb_setup.py:_flush():80] Configure stats pid to 179677 +2025-07-20 18:12:46,958 INFO MainThread:179677 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-07-20 18:12:46,958 INFO MainThread:179677 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings +2025-07-20 18:12:46,958 INFO MainThread:179677 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-07-20 18:12:46,958 INFO MainThread:179677 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/logs/debug.log +2025-07-20 18:12:46,958 INFO MainThread:179677 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/logs/debug-internal.log +2025-07-20 18:12:46,958 INFO MainThread:179677 [wandb_init.py:init():830] calling init triggers +2025-07-20 18:12:46,958 INFO MainThread:179677 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}} +2025-07-20 18:12:46,958 INFO MainThread:179677 [wandb_init.py:init():871] starting backend +2025-07-20 18:12:47,172 INFO MainThread:179677 [wandb_init.py:init():874] sending inform_init request +2025-07-20 18:12:47,173 INFO MainThread:179677 [wandb_init.py:init():882] backend started and connected +2025-07-20 18:12:47,176 INFO MainThread:179677 [wandb_init.py:init():953] updated telemetry +2025-07-20 18:12:47,295 INFO MainThread:179677 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout +2025-07-20 18:12:51,947 INFO MainThread:179677 [wandb_init.py:init():1029] starting run threads in backend +2025-07-20 18:12:52,164 INFO MainThread:179677 [wandb_run.py:_console_start():2458] atexit reg +2025-07-20 18:12:52,164 INFO MainThread:179677 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-07-20 18:12:52,165 INFO MainThread:179677 [wandb_run.py:_redirect():2375] Wrapping output streams. +2025-07-20 18:12:52,165 INFO MainThread:179677 [wandb_run.py:_redirect():2398] Redirects installed. +2025-07-20 18:12:52,167 INFO MainThread:179677 [wandb_init.py:init():1075] run started, returning control to user process diff --git a/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/run-89mv3lt4.wandb b/EasyR1-new/examples/wandb/run-20250720_181246-89mv3lt4/run-89mv3lt4.wandb new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/files/output.log b/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..8e478900dbdb07580c457d558258752ecbf79130 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/files/output.log @@ -0,0 +1,2 @@ +{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 1, "unit": "it", "ip": "10.1.4.164", "pid": 193580, "uuid": "0c2f3bce130f48f0a7237c58a684eeb5", "closed": false} +Start validation... diff --git a/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/files/requirements.txt @@ -0,0 +1,295 @@ +colorama==0.4.6 +psutil==7.0.0 +setproctitle==1.2.2 +ipython==8.37.0 +gitdb==4.0.12 +smmap==5.0.2 +pyzmq==27.0.0 +wcwidth==0.2.13 +antlr4-python3-runtime==4.9.3 +streamlit==1.46.1 +opentelemetry-proto==1.26.0 +tiktoken==0.9.0 +MarkupSafe==3.0.2 +openai==1.90.0 +jiter==0.10.0 +markdown-it-py==3.0.0 +rich-toolkit==0.14.8 +PyYAML==6.0.2 +pycountry==24.6.1 +nvidia-cusolver-cu12==11.6.1.9 +codetiming==1.4.0 +text-unidecode==1.3 +aiohttp-cors==0.8.1 +prometheus_client==0.22.1 +pandas==2.3.1 +wrapt==1.17.2 +tifffile==2025.5.10 +vllm==0.8.5.post1 +google-auth==2.40.3 +nvidia-curand-cu12==10.3.5.147 +networkx==3.4.2 +protobuf==4.25.8 +depyf==0.18.0 +altair==5.5.0 +wandb==0.21.0 +opentelemetry-sdk==1.26.0 +nvidia-cufft-cu12==11.2.1.3 +frozenlist==1.7.0 +ninja==1.11.1.4 +anyio==4.9.0 +rignore==0.6.2 +pydantic-extra-types==2.10.5 +tzdata==2025.2 +orjson==3.10.18 +smart_open==7.3.0.post1 +nvidia-cublas-cu12==12.4.5.8 +astor==0.8.1 +uvicorn==0.35.0 +sentry-sdk==2.32.0 +weasel==0.4.1 +opencensus==0.11.4 +certifi==2025.7.14 +nvidia-cuda-nvrtc-cu12==12.4.127 +cupy-cuda12x==13.5.1 +jedi==0.19.2 +GitPython==3.1.44 +xgrammar==0.1.18 +sniffio==1.3.1 +dill==0.3.8 +python-json-logger==3.3.0 +peft==0.16.0 +python-slugify==8.0.4 +watchfiles==1.1.0 +torchaudio==2.6.0 +omegaconf==2.3.0 +interegular==0.3.3 +torchmetrics==1.7.4 +tenacity==9.1.2 +async-timeout==5.0.1 +pybase64==1.4.1 +tqdm==4.67.1 +confection==0.1.5 +absl-py==2.3.1 +six==1.17.0 +colorful==0.5.7 +xxhash==3.5.0 +regex==2024.11.6 +nvidia-cuda-runtime-cu12==12.4.127 +pip==25.1 +annotated-types==0.7.0 +opentelemetry-exporter-otlp-proto-grpc==1.26.0 +salesforce-lavis==1.0.2 +fastapi-cloud-cli==0.1.4 +av==15.0.0 +nvidia-nvjitlink-cu12==12.4.127 +lightning-utilities==0.14.3 +pytz==2025.2 +webdataset==1.0.2 +cachetools==5.5.2 +nltk==3.9.1 +prometheus-fastapi-instrumentator==7.1.0 +pexpect==4.9.0 +jsonschema-specifications==2025.4.1 +Jinja2==3.1.6 +nvidia-cusparse-cu12==12.3.1.170 +nvidia-cudnn-cu12==9.1.0.70 +decord==0.6.0 +transformers==4.52.4 +dnspython==2.7.0 +joblib==1.5.1 +kaggle==1.7.4.5 +pyasn1_modules==0.4.2 +httpcore==1.0.9 +accelerate==1.8.1 +psutil==7.0.0 +pycocotools==2.0.10 +lm-format-enforcer==0.10.11 +liger_kernel==0.6.0 +googleapis-common-protos==1.70.0 +idna==3.10 +aiohappyeyeballs==2.6.1 +numba==0.61.2 +tornado==6.5.1 +opentelemetry-semantic-conventions==0.47b0 +torchvision==0.21.0 +exceptiongroup==1.3.0 +cfgv==3.4.0 +py-cpuinfo==9.0.0 +murmurhash==1.0.13 +pillow==11.3.0 +asttokens==3.0.0 +spacy==3.8.7 +blinker==1.9.0 +llguidance==0.7.30 +fastapi==0.116.1 +python-dateutil==2.9.0.post0 +prompt_toolkit==3.0.51 +opentelemetry-api==1.26.0 +referencing==0.36.2 +Pygments==2.19.2 +mpmath==1.3.0 +thinc==8.3.6 +multidict==6.6.3 +python-magic==0.4.27 +fairscale==0.4.4 +nodeenv==1.9.1 +mathruler==0.1.0 +identify==2.6.12 +multiprocess==0.70.16 +ftfy==6.3.1 +spacy-legacy==3.0.12 +rsa==4.9.1 +cymem==2.0.11 +flash-attn==2.7.1.post1 +typing-inspection==0.4.1 +nvidia-cufile-cu12==1.11.1.6 +filelock==3.18.0 +jsonschema==4.24.0 +language_data==1.3.0 +iopath==0.1.10 +cloudpickle==3.1.1 +pre_commit==4.2.0 +python-multipart==0.0.20 +gguf==0.17.1 +toml==0.10.2 +lazy_loader==0.4 +nvidia-cusparselt-cu12==0.6.2 +nvidia-nvtx-cu12==12.4.127 +opencv-python-headless==4.12.0.88 +rouge_score==0.1.2 +portalocker==3.2.0 +diskcache==5.6.3 +pycocoevalcap==1.2 +mdurl==0.1.2 +pure_eval==0.2.3 +ray==2.47.1 +langcodes==3.5.0 +distlib==0.3.9 +pydeck==0.9.1 +traitlets==5.14.3 +aiohttp==3.12.14 +decorator==5.2.1 +opentelemetry-exporter-otlp-proto-http==1.26.0 +verl==0.3.2.dev0 +fsspec==2025.3.0 +pydantic_core==2.33.2 +matplotlib-inline==0.1.7 +httpx==0.28.1 +fastrlock==0.8.3 +zipp==3.23.0 +aiosignal==1.4.0 +uvloop==0.21.0 +opendatasets==0.1.22 +python-dotenv==1.1.1 +attrs==25.3.0 +starlette==0.47.1 +distro==1.9.0 +pyasn1==0.6.1 +plotly==6.2.0 +opencensus-context==0.1.3 +datasets==4.0.0 +bleach==6.2.0 +hf-xet==1.1.5 +pyvers==0.1.0 +rich==14.0.0 +pylatexenc==2.10 +tensordict==0.9.1 +urllib3==2.5.0 +imageio==2.37.0 +platformdirs==4.3.8 +preshed==3.0.10 +catalogue==2.0.10 +h11==0.16.0 +outlines_core==0.1.26 +wasabi==1.1.3 +proto-plus==1.26.1 +scikit-image==0.25.2 +blis==1.3.0 +fastapi-cli==0.0.8 +opentelemetry-exporter-prometheus==0.56b0 +opentelemetry-exporter-otlp==1.26.0 +compressed-tensors==0.9.3 +pyarrow==20.0.0 +opentelemetry-semantic-conventions-ai==0.4.11 +partial-json-parser==0.2.1.1.post6 +parso==0.8.4 +importlib_metadata==8.0.0 +tokenizers==0.21.2 +opentelemetry-exporter-otlp-proto-common==1.26.0 +torchdata==0.11.0 +py-spy==0.4.0 +propcache==0.3.2 +braceexpand==0.1.7 +numpy==2.2.6 +cloudpathlib==0.21.1 +email_validator==2.2.0 +srsly==2.5.1 +webencodings==0.5.1 +airportsdata==20250706 +rpds-py==0.26.0 +outlines==0.1.11 +packaging==25.0 +yarl==1.20.1 +nvidia-cuda-cupti-cu12==12.4.127 +typing_extensions==4.14.1 +pydantic==2.11.7 +xformers==0.0.29.post2 +einops==0.8.1 +grpcio==1.73.1 +setuptools==78.1.1 +httptools==0.6.4 +nvidia-nccl-cu12==2.21.5 +Deprecated==1.2.18 +ptyprocess==0.7.0 +websockets==15.0.1 +torch==2.6.0 +scipy==1.15.3 +typer==0.16.0 +pytorch-lightning==2.5.2 +virtualenv==20.31.2 +huggingface-hub==0.33.4 +contexttimer==0.3.3 +marisa-trie==1.2.1 +shellingham==1.5.4 +charset-normalizer==3.4.2 +nest-asyncio==1.6.0 +executing==2.2.0 +stack-data==0.6.3 +msgpack==1.1.1 +blake3==1.0.5 +narwhals==1.47.0 +sentencepiece==0.2.0 +llvmlite==0.44.0 +click==8.2.1 +lark==1.2.2 +google-api-core==2.25.1 +sympy==1.13.1 +wheel==0.45.1 +safetensors==0.5.3 +mistral_common==1.8.0 +triton==3.2.0 +msgspec==0.19.0 +watchdog==6.0.0 +requests==2.32.4 +spacy-loggers==1.0.5 +timm==0.4.12 +qwen-vl-utils==0.0.11 +modelscope==1.28.0 +verl==0.3.2.dev0 +jaraco.text==3.12.1 +autocommand==2.2.2 +packaging==24.2 +jaraco.context==5.3.0 +tomli==2.0.1 +typeguard==4.3.0 +zipp==3.19.2 +backports.tarfile==1.2.0 +typing_extensions==4.12.2 +jaraco.collections==5.1.0 +inflect==7.3.1 +more-itertools==10.3.0 +jaraco.functools==4.0.1 +importlib_metadata==8.0.0 +platformdirs==4.2.2 +wheel==0.45.1 diff --git a/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..aab8b81730a8803066b8c3a1a5e8e85dc21f271c --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/files/wandb-metadata.json @@ -0,0 +1,91 @@ +{ + "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35", + "python": "CPython 3.10.0", + "startedAt": "2025-07-20T10:38:04.454833Z", + "args": [ + "--node-ip-address=10.1.4.164", + "--node-manager-port=33775", + "--object-store-name=/tmp/ray/session_2025-07-20_18-36-18_073010_188810/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2025-07-20_18-36-18_073010_188810/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=45288", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=56348", + "--gcs-address=10.1.4.164:64298", + "--session-name=session_2025-07-20_18-36-18_073010_188810", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8265", + "--cluster-id=653b197ce741c102801a48d4315a065ffeb33d7d9d7a1fb24af03e3f", + "--startup-token=64", + "--worker-launch-time-ms=1753007780685", + "--node-id=f18941412ee5f4fe4a1ee5dc9df15d495c6cf10e7fd83f7a16ec63a8", + "--runtime-env-hash=-115784934", + "--enable-resource-isolation=false" + ], + "program": "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/PorUna-byte/PAR.git", + "commit": "b8caf406aa1699c788f0ca6e44a1769452c317db" + }, + "root": "/nas/shared/kilab/wangyujia/EasyR1-new/examples", + "host": "dsw-266702-557cd69888-g24kv", + "executable": "/root/miniconda3/envs/easyr1-new/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 64, + "gpu": "NVIDIA A800-SXM4-80GB", + "gpu_count": 8, + "disk": { + "/": { + "total": "1623302262784", + "used": "1179459584" + } + }, + "memory": { + "total": "549755813888" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-dd05c780-0a19-17fd-c584-d09f4318f680" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-2782427d-b86d-e7c7-247f-edefe39eec71" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-52755b3d-d761-c869-0d08-300873ba5f02" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-90465488-5319-9508-9e8a-b3b12918be35" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a" + } + ], + "cudaVersion": "12.1", + "writerId": "hprpvicj1ru54tiz927ygpyb2z630bnt" +} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..63029ba30bc2636bc2541ee269129d546cd10373 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/logs/debug-internal.log @@ -0,0 +1,6 @@ +{"time":"2025-07-20T18:38:05.03357076+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-07-20T18:38:10.341722047+08:00","level":"INFO","msg":"stream: created new stream","id":"82cvf14y"} +{"time":"2025-07-20T18:38:10.343036806+08:00","level":"INFO","msg":"stream: started","id":"82cvf14y"} +{"time":"2025-07-20T18:38:10.343072051+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"82cvf14y"} +{"time":"2025-07-20T18:38:10.343124908+08:00","level":"INFO","msg":"handler: started","stream_id":"82cvf14y"} +{"time":"2025-07-20T18:38:10.343080828+08:00","level":"INFO","msg":"sender: started","stream_id":"82cvf14y"} diff --git a/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..dbd328c518d9107b764f20dd3ddca1aa007be074 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/logs/debug.log @@ -0,0 +1,21 @@ +2025-07-20 18:38:04,777 INFO MainThread:193580 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-07-20 18:38:04,777 INFO MainThread:193580 [wandb_setup.py:_flush():80] Configure stats pid to 193580 +2025-07-20 18:38:04,777 INFO MainThread:193580 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-07-20 18:38:04,777 INFO MainThread:193580 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings +2025-07-20 18:38:04,777 INFO MainThread:193580 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-07-20 18:38:04,777 INFO MainThread:193580 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/logs/debug.log +2025-07-20 18:38:04,778 INFO MainThread:193580 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/logs/debug-internal.log +2025-07-20 18:38:04,778 INFO MainThread:193580 [wandb_init.py:init():830] calling init triggers +2025-07-20 18:38:04,778 INFO MainThread:193580 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}} +2025-07-20 18:38:04,778 INFO MainThread:193580 [wandb_init.py:init():871] starting backend +2025-07-20 18:38:05,019 INFO MainThread:193580 [wandb_init.py:init():874] sending inform_init request +2025-07-20 18:38:05,021 INFO MainThread:193580 [wandb_init.py:init():882] backend started and connected +2025-07-20 18:38:05,028 INFO MainThread:193580 [wandb_init.py:init():953] updated telemetry +2025-07-20 18:38:05,097 INFO MainThread:193580 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout +2025-07-20 18:38:11,543 INFO MainThread:193580 [wandb_init.py:init():1029] starting run threads in backend +2025-07-20 18:38:11,737 INFO MainThread:193580 [wandb_run.py:_console_start():2458] atexit reg +2025-07-20 18:38:11,737 INFO MainThread:193580 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-07-20 18:38:11,744 INFO MainThread:193580 [wandb_run.py:_redirect():2375] Wrapping output streams. +2025-07-20 18:38:11,744 INFO MainThread:193580 [wandb_run.py:_redirect():2398] Redirects installed. +2025-07-20 18:38:11,764 INFO MainThread:193580 [wandb_init.py:init():1075] run started, returning control to user process diff --git a/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/run-82cvf14y.wandb b/EasyR1-new/examples/wandb/run-20250720_183804-82cvf14y/run-82cvf14y.wandb new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/files/output.log b/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..7b0d3f160e4c6794d0d5c3531b987a2a80b75559 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/files/output.log @@ -0,0 +1,2 @@ +{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 1, "unit": "it", "ip": "10.1.4.164", "pid": 202910, "uuid": "1a0be4df81354e5b8a89bcc67273cb08", "closed": false} +Start validation... diff --git a/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/files/requirements.txt @@ -0,0 +1,295 @@ +colorama==0.4.6 +psutil==7.0.0 +setproctitle==1.2.2 +ipython==8.37.0 +gitdb==4.0.12 +smmap==5.0.2 +pyzmq==27.0.0 +wcwidth==0.2.13 +antlr4-python3-runtime==4.9.3 +streamlit==1.46.1 +opentelemetry-proto==1.26.0 +tiktoken==0.9.0 +MarkupSafe==3.0.2 +openai==1.90.0 +jiter==0.10.0 +markdown-it-py==3.0.0 +rich-toolkit==0.14.8 +PyYAML==6.0.2 +pycountry==24.6.1 +nvidia-cusolver-cu12==11.6.1.9 +codetiming==1.4.0 +text-unidecode==1.3 +aiohttp-cors==0.8.1 +prometheus_client==0.22.1 +pandas==2.3.1 +wrapt==1.17.2 +tifffile==2025.5.10 +vllm==0.8.5.post1 +google-auth==2.40.3 +nvidia-curand-cu12==10.3.5.147 +networkx==3.4.2 +protobuf==4.25.8 +depyf==0.18.0 +altair==5.5.0 +wandb==0.21.0 +opentelemetry-sdk==1.26.0 +nvidia-cufft-cu12==11.2.1.3 +frozenlist==1.7.0 +ninja==1.11.1.4 +anyio==4.9.0 +rignore==0.6.2 +pydantic-extra-types==2.10.5 +tzdata==2025.2 +orjson==3.10.18 +smart_open==7.3.0.post1 +nvidia-cublas-cu12==12.4.5.8 +astor==0.8.1 +uvicorn==0.35.0 +sentry-sdk==2.32.0 +weasel==0.4.1 +opencensus==0.11.4 +certifi==2025.7.14 +nvidia-cuda-nvrtc-cu12==12.4.127 +cupy-cuda12x==13.5.1 +jedi==0.19.2 +GitPython==3.1.44 +xgrammar==0.1.18 +sniffio==1.3.1 +dill==0.3.8 +python-json-logger==3.3.0 +peft==0.16.0 +python-slugify==8.0.4 +watchfiles==1.1.0 +torchaudio==2.6.0 +omegaconf==2.3.0 +interegular==0.3.3 +torchmetrics==1.7.4 +tenacity==9.1.2 +async-timeout==5.0.1 +pybase64==1.4.1 +tqdm==4.67.1 +confection==0.1.5 +absl-py==2.3.1 +six==1.17.0 +colorful==0.5.7 +xxhash==3.5.0 +regex==2024.11.6 +nvidia-cuda-runtime-cu12==12.4.127 +pip==25.1 +annotated-types==0.7.0 +opentelemetry-exporter-otlp-proto-grpc==1.26.0 +salesforce-lavis==1.0.2 +fastapi-cloud-cli==0.1.4 +av==15.0.0 +nvidia-nvjitlink-cu12==12.4.127 +lightning-utilities==0.14.3 +pytz==2025.2 +webdataset==1.0.2 +cachetools==5.5.2 +nltk==3.9.1 +prometheus-fastapi-instrumentator==7.1.0 +pexpect==4.9.0 +jsonschema-specifications==2025.4.1 +Jinja2==3.1.6 +nvidia-cusparse-cu12==12.3.1.170 +nvidia-cudnn-cu12==9.1.0.70 +decord==0.6.0 +transformers==4.52.4 +dnspython==2.7.0 +joblib==1.5.1 +kaggle==1.7.4.5 +pyasn1_modules==0.4.2 +httpcore==1.0.9 +accelerate==1.8.1 +psutil==7.0.0 +pycocotools==2.0.10 +lm-format-enforcer==0.10.11 +liger_kernel==0.6.0 +googleapis-common-protos==1.70.0 +idna==3.10 +aiohappyeyeballs==2.6.1 +numba==0.61.2 +tornado==6.5.1 +opentelemetry-semantic-conventions==0.47b0 +torchvision==0.21.0 +exceptiongroup==1.3.0 +cfgv==3.4.0 +py-cpuinfo==9.0.0 +murmurhash==1.0.13 +pillow==11.3.0 +asttokens==3.0.0 +spacy==3.8.7 +blinker==1.9.0 +llguidance==0.7.30 +fastapi==0.116.1 +python-dateutil==2.9.0.post0 +prompt_toolkit==3.0.51 +opentelemetry-api==1.26.0 +referencing==0.36.2 +Pygments==2.19.2 +mpmath==1.3.0 +thinc==8.3.6 +multidict==6.6.3 +python-magic==0.4.27 +fairscale==0.4.4 +nodeenv==1.9.1 +mathruler==0.1.0 +identify==2.6.12 +multiprocess==0.70.16 +ftfy==6.3.1 +spacy-legacy==3.0.12 +rsa==4.9.1 +cymem==2.0.11 +flash-attn==2.7.1.post1 +typing-inspection==0.4.1 +nvidia-cufile-cu12==1.11.1.6 +filelock==3.18.0 +jsonschema==4.24.0 +language_data==1.3.0 +iopath==0.1.10 +cloudpickle==3.1.1 +pre_commit==4.2.0 +python-multipart==0.0.20 +gguf==0.17.1 +toml==0.10.2 +lazy_loader==0.4 +nvidia-cusparselt-cu12==0.6.2 +nvidia-nvtx-cu12==12.4.127 +opencv-python-headless==4.12.0.88 +rouge_score==0.1.2 +portalocker==3.2.0 +diskcache==5.6.3 +pycocoevalcap==1.2 +mdurl==0.1.2 +pure_eval==0.2.3 +ray==2.47.1 +langcodes==3.5.0 +distlib==0.3.9 +pydeck==0.9.1 +traitlets==5.14.3 +aiohttp==3.12.14 +decorator==5.2.1 +opentelemetry-exporter-otlp-proto-http==1.26.0 +verl==0.3.2.dev0 +fsspec==2025.3.0 +pydantic_core==2.33.2 +matplotlib-inline==0.1.7 +httpx==0.28.1 +fastrlock==0.8.3 +zipp==3.23.0 +aiosignal==1.4.0 +uvloop==0.21.0 +opendatasets==0.1.22 +python-dotenv==1.1.1 +attrs==25.3.0 +starlette==0.47.1 +distro==1.9.0 +pyasn1==0.6.1 +plotly==6.2.0 +opencensus-context==0.1.3 +datasets==4.0.0 +bleach==6.2.0 +hf-xet==1.1.5 +pyvers==0.1.0 +rich==14.0.0 +pylatexenc==2.10 +tensordict==0.9.1 +urllib3==2.5.0 +imageio==2.37.0 +platformdirs==4.3.8 +preshed==3.0.10 +catalogue==2.0.10 +h11==0.16.0 +outlines_core==0.1.26 +wasabi==1.1.3 +proto-plus==1.26.1 +scikit-image==0.25.2 +blis==1.3.0 +fastapi-cli==0.0.8 +opentelemetry-exporter-prometheus==0.56b0 +opentelemetry-exporter-otlp==1.26.0 +compressed-tensors==0.9.3 +pyarrow==20.0.0 +opentelemetry-semantic-conventions-ai==0.4.11 +partial-json-parser==0.2.1.1.post6 +parso==0.8.4 +importlib_metadata==8.0.0 +tokenizers==0.21.2 +opentelemetry-exporter-otlp-proto-common==1.26.0 +torchdata==0.11.0 +py-spy==0.4.0 +propcache==0.3.2 +braceexpand==0.1.7 +numpy==2.2.6 +cloudpathlib==0.21.1 +email_validator==2.2.0 +srsly==2.5.1 +webencodings==0.5.1 +airportsdata==20250706 +rpds-py==0.26.0 +outlines==0.1.11 +packaging==25.0 +yarl==1.20.1 +nvidia-cuda-cupti-cu12==12.4.127 +typing_extensions==4.14.1 +pydantic==2.11.7 +xformers==0.0.29.post2 +einops==0.8.1 +grpcio==1.73.1 +setuptools==78.1.1 +httptools==0.6.4 +nvidia-nccl-cu12==2.21.5 +Deprecated==1.2.18 +ptyprocess==0.7.0 +websockets==15.0.1 +torch==2.6.0 +scipy==1.15.3 +typer==0.16.0 +pytorch-lightning==2.5.2 +virtualenv==20.31.2 +huggingface-hub==0.33.4 +contexttimer==0.3.3 +marisa-trie==1.2.1 +shellingham==1.5.4 +charset-normalizer==3.4.2 +nest-asyncio==1.6.0 +executing==2.2.0 +stack-data==0.6.3 +msgpack==1.1.1 +blake3==1.0.5 +narwhals==1.47.0 +sentencepiece==0.2.0 +llvmlite==0.44.0 +click==8.2.1 +lark==1.2.2 +google-api-core==2.25.1 +sympy==1.13.1 +wheel==0.45.1 +safetensors==0.5.3 +mistral_common==1.8.0 +triton==3.2.0 +msgspec==0.19.0 +watchdog==6.0.0 +requests==2.32.4 +spacy-loggers==1.0.5 +timm==0.4.12 +qwen-vl-utils==0.0.11 +modelscope==1.28.0 +verl==0.3.2.dev0 +jaraco.text==3.12.1 +autocommand==2.2.2 +packaging==24.2 +jaraco.context==5.3.0 +tomli==2.0.1 +typeguard==4.3.0 +zipp==3.19.2 +backports.tarfile==1.2.0 +typing_extensions==4.12.2 +jaraco.collections==5.1.0 +inflect==7.3.1 +more-itertools==10.3.0 +jaraco.functools==4.0.1 +importlib_metadata==8.0.0 +platformdirs==4.2.2 +wheel==0.45.1 diff --git a/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..2785db21b6c10750a7a09a2c849618c0855ad025 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/files/wandb-metadata.json @@ -0,0 +1,91 @@ +{ + "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35", + "python": "CPython 3.10.0", + "startedAt": "2025-07-20T10:45:55.035753Z", + "args": [ + "--node-ip-address=10.1.4.164", + "--node-manager-port=38911", + "--object-store-name=/tmp/ray/session_2025-07-20_18-44-08_264554_198143/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2025-07-20_18-44-08_264554_198143/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=58446", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=59793", + "--gcs-address=10.1.4.164:54451", + "--session-name=session_2025-07-20_18-44-08_264554_198143", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8265", + "--cluster-id=a38c77a85680040c660a3b5d4ab803d8aaa44e70746e99af3f111616", + "--startup-token=64", + "--worker-launch-time-ms=1753008251912", + "--node-id=7ad278f1bf59137a7347996ddac8430d32e9591d2f83756c40b53bdf", + "--runtime-env-hash=-115784934", + "--enable-resource-isolation=false" + ], + "program": "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/PorUna-byte/PAR.git", + "commit": "b8caf406aa1699c788f0ca6e44a1769452c317db" + }, + "root": "/nas/shared/kilab/wangyujia/EasyR1-new/examples", + "host": "dsw-266702-557cd69888-g24kv", + "executable": "/root/miniconda3/envs/easyr1-new/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 64, + "gpu": "NVIDIA A800-SXM4-80GB", + "gpu_count": 8, + "disk": { + "/": { + "total": "1623302262784", + "used": "1179512832" + } + }, + "memory": { + "total": "549755813888" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-dd05c780-0a19-17fd-c584-d09f4318f680" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-2782427d-b86d-e7c7-247f-edefe39eec71" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-52755b3d-d761-c869-0d08-300873ba5f02" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-90465488-5319-9508-9e8a-b3b12918be35" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a" + } + ], + "cudaVersion": "12.1", + "writerId": "2qdyd3o7cilgxi2vgxni4m8uxslv4r1d" +} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..ca59475e3f60f69e98062d738e73f4e7dfa64a9b --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/logs/debug-internal.log @@ -0,0 +1,6 @@ +{"time":"2025-07-20T18:45:55.347049864+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-07-20T18:45:58.087436155+08:00","level":"INFO","msg":"stream: created new stream","id":"htrmvlj8"} +{"time":"2025-07-20T18:45:58.08754831+08:00","level":"INFO","msg":"stream: started","id":"htrmvlj8"} +{"time":"2025-07-20T18:45:58.087578033+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"htrmvlj8"} +{"time":"2025-07-20T18:45:58.087603416+08:00","level":"INFO","msg":"handler: started","stream_id":"htrmvlj8"} +{"time":"2025-07-20T18:45:58.087668036+08:00","level":"INFO","msg":"sender: started","stream_id":"htrmvlj8"} diff --git a/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..1176a052552495e1e4e93c8575123b6c59e3faca --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/logs/debug.log @@ -0,0 +1,21 @@ +2025-07-20 18:45:55,129 INFO MainThread:202910 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-07-20 18:45:55,130 INFO MainThread:202910 [wandb_setup.py:_flush():80] Configure stats pid to 202910 +2025-07-20 18:45:55,130 INFO MainThread:202910 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-07-20 18:45:55,130 INFO MainThread:202910 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings +2025-07-20 18:45:55,130 INFO MainThread:202910 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-07-20 18:45:55,130 INFO MainThread:202910 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/logs/debug.log +2025-07-20 18:45:55,130 INFO MainThread:202910 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/logs/debug-internal.log +2025-07-20 18:45:55,130 INFO MainThread:202910 [wandb_init.py:init():830] calling init triggers +2025-07-20 18:45:55,130 INFO MainThread:202910 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}} +2025-07-20 18:45:55,130 INFO MainThread:202910 [wandb_init.py:init():871] starting backend +2025-07-20 18:45:55,337 INFO MainThread:202910 [wandb_init.py:init():874] sending inform_init request +2025-07-20 18:45:55,339 INFO MainThread:202910 [wandb_init.py:init():882] backend started and connected +2025-07-20 18:45:55,343 INFO MainThread:202910 [wandb_init.py:init():953] updated telemetry +2025-07-20 18:45:55,379 INFO MainThread:202910 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout +2025-07-20 18:45:59,062 INFO MainThread:202910 [wandb_init.py:init():1029] starting run threads in backend +2025-07-20 18:45:59,234 INFO MainThread:202910 [wandb_run.py:_console_start():2458] atexit reg +2025-07-20 18:45:59,234 INFO MainThread:202910 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-07-20 18:45:59,238 INFO MainThread:202910 [wandb_run.py:_redirect():2375] Wrapping output streams. +2025-07-20 18:45:59,238 INFO MainThread:202910 [wandb_run.py:_redirect():2398] Redirects installed. +2025-07-20 18:45:59,241 INFO MainThread:202910 [wandb_init.py:init():1075] run started, returning control to user process diff --git a/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/run-htrmvlj8.wandb b/EasyR1-new/examples/wandb/run-20250720_184555-htrmvlj8/run-htrmvlj8.wandb new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/files/config.yaml b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9cb8f96cf110a214774352fcec46a88f1b722fd8 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/files/config.yaml @@ -0,0 +1,322 @@ +_wandb: + value: + cli_version: 0.21.0 + e: + nmbvcuty6clhkr83vjwadihsu9kpdhoi: + args: + - --node-ip-address=10.1.4.164 + - --node-manager-port=40847 + - --object-store-name=/tmp/ray/session_2025-07-20_18-50-33_612671_207255/sockets/plasma_store + - --raylet-name=/tmp/ray/session_2025-07-20_18-50-33_612671_207255/sockets/raylet + - --redis-address=None + - --metrics-agent-port=56349 + - --logging-rotate-bytes=536870912 + - --logging-rotate-backup-count=5 + - --runtime-env-agent-port=59408 + - --gcs-address=10.1.4.164:61296 + - --session-name=session_2025-07-20_18-50-33_612671_207255 + - --temp-dir=/tmp/ray + - --webui=127.0.0.1:8265 + - --cluster-id=5f593853245e93a932b0d21951d11425b24c26d22610a6a16cb50798 + - --startup-token=64 + - --worker-launch-time-ms=1753008637130 + - --node-id=f402a8cae52cba65b7db537226d3a02bb2c3b13e17c85bb8ba53fb2d + - --runtime-env-hash=-115784934 + - --enable-resource-isolation=false + cpu_count: 64 + cpu_count_logical: 64 + cudaVersion: "12.1" + disk: + /: + total: "1623302262784" + used: "1179701248" + email: gia0603yucca@gmail.com + executable: /root/miniconda3/envs/easyr1-new/bin/python3 + git: + commit: b8caf406aa1699c788f0ca6e44a1769452c317db + remote: https://github.com/PorUna-byte/PAR.git + gpu: NVIDIA A800-SXM4-80GB + gpu_count: 8 + gpu_nvidia: + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-dd05c780-0a19-17fd-c584-d09f4318f680 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-2782427d-b86d-e7c7-247f-edefe39eec71 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-52755b3d-d761-c869-0d08-300873ba5f02 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-5927b565-1506-34c6-eb8d-d657ed5f3558 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-90465488-5319-9508-9e8a-b3b12918be35 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-855bcb80-e48f-cdd2-f62d-35138b69089a + host: dsw-266702-557cd69888-g24kv + memory: + total: "549755813888" + os: Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35 + program: /root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py + python: CPython 3.10.0 + root: /nas/shared/kilab/wangyujia/EasyR1-new/examples + startedAt: "2025-07-20T10:52:18.641147Z" + writerId: nmbvcuty6clhkr83vjwadihsu9kpdhoi + m: [] + python_version: 3.10.0 + t: + "1": + - 1 + - 9 + - 11 + - 30 + - 33 + - 41 + - 49 + - 51 + - 63 + - 71 + - 95 + - 98 + - 103 + - 105 + "2": + - 1 + - 9 + - 11 + - 30 + - 33 + - 41 + - 49 + - 51 + - 63 + - 71 + - 95 + - 98 + - 103 + - 105 + "3": + - 2 + - 13 + - 16 + "4": 3.10.0 + "5": 0.21.0 + "6": 4.52.4 + "12": 0.21.0 + "13": linux-x86_64 +algorithm: + value: + adv_estimator: grpo + disable_kl: false + filter_high: 0.99 + filter_key: overall + filter_low: 0.01 + gamma: 1 + kl_coef: 0.01 + kl_horizon: 10000 + kl_penalty: low_var_kl + kl_target: 0.1 + kl_type: fixed + lam: 1 + online_filtering: false + use_kl_loss: true +data: + value: + answer_key: answer + filter_overlong_prompts: true + filter_overlong_prompts_workers: 16 + format_prompt: /nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja + image_dir: null + image_key: images + max_pixels: 4194304 + max_prompt_length: 4096 + max_response_length: 16384 + min_pixels: 262144 + mini_rollout_batch_size: null + override_chat_template: null + prompt_key: question + protein_key: protein + rollout_batch_size: 128 + seed: 1 + shuffle: true + train_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl + val_batch_size: 256 + val_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl + video_fps: 2 + video_key: videos +trainer: + value: + critic_warmup: 0 + experiment_name: qwen2.5_7b_bio_06182042 + load_checkpoint_path: null + logger: + - console + - wandb + max_steps: null + max_try_make_batch: 20 + n_gpus_per_node: 8 + nnodes: 1 + project_name: easy_r1 + save_checkpoint_path: /oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042 + save_freq: 5 + save_limit: 3 + save_model_only: false + total_epochs: 1 + val_before_train: true + val_freq: 5 + val_generations_to_log: 3 + val_only: false +worker: + value: + actor: + clip_ratio_dual: 3 + clip_ratio_high: 0.3 + clip_ratio_low: 0.2 + disable_kl: false + fsdp: + enable_cpu_offload: false + enable_full_shard: true + enable_rank0_init: true + fsdp_size: -1 + mp_buffer_dtype: fp32 + mp_param_dtype: bf16 + mp_reduce_dtype: fp32 + torch_dtype: null + use_orig_params: false + global_batch_size: 64 + global_batch_size_per_device: -1 + kl_coef: 0.01 + kl_penalty: low_var_kl + loss_avg_mode: token + max_grad_norm: 1 + micro_batch_size_per_device_for_experience: 16 + micro_batch_size_per_device_for_update: 2 + model: + enable_gradient_checkpointing: true + freeze_vision_tower: false + model_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model + tokenizer_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model + trust_remote_code: false + offload: + offload_optimizer: true + offload_params: true + optim: + betas: + - 0.9 + - 0.999 + lr: 1e-06 + lr_warmup_ratio: 0 + lr_warmup_steps: null + min_lr_ratio: null + strategy: adamw + training_steps: 1 + warmup_style: constant + weight_decay: 0.01 + padding_free: true + ppo_epochs: 1 + strategy: fsdp + ulysses_size: 1 + use_kl_loss: true + use_torch_compile: true + critic: + cliprange_value: 0.5 + fsdp: + enable_cpu_offload: false + enable_full_shard: true + enable_rank0_init: true + fsdp_size: -1 + mp_buffer_dtype: fp32 + mp_param_dtype: bf16 + mp_reduce_dtype: fp32 + torch_dtype: null + use_orig_params: false + global_batch_size: 256 + global_batch_size_per_device: -1 + loss_avg_mode: token + max_grad_norm: 1 + micro_batch_size_per_device_for_experience: 16 + micro_batch_size_per_device_for_update: 4 + model: + enable_gradient_checkpointing: true + freeze_vision_tower: false + model_path: null + tokenizer_path: null + trust_remote_code: true + offload: + offload_optimizer: false + offload_params: false + optim: + betas: + - 0.9 + - 0.999 + lr: 1e-06 + lr_warmup_ratio: 0 + lr_warmup_steps: null + min_lr_ratio: null + strategy: adamw + training_steps: 1 + warmup_style: constant + weight_decay: 0.01 + padding_free: false + ppo_epochs: 1 + strategy: fsdp + ulysses_size: 1 + hybrid_engine: true + ref: + fsdp: + enable_cpu_offload: true + enable_full_shard: true + enable_rank0_init: true + fsdp_size: -1 + mp_buffer_dtype: fp32 + mp_param_dtype: bf16 + mp_reduce_dtype: fp32 + torch_dtype: null + use_orig_params: false + micro_batch_size_per_device_for_experience: 16 + offload: + offload_optimizer: false + offload_params: false + padding_free: true + strategy: fsdp + ulysses_size: 1 + use_torch_compile: true + reward: + num_cpus: 1 + reward_function: /nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py + reward_function_name: main + reward_type: batch + skip_special_tokens: true + rollout: + disable_log_stats: true + disable_tqdm: false + dtype: bf16 + enable_chunked_prefill: false + enforce_eager: false + gpu_memory_utilization: 0.6 + ignore_eos: false + limit_images: 0 + max_model_len: null + max_num_batched_tokens: 24576 + "n": 5 + name: vllm + prompt_length: 4096 + response_length: 16384 + seed: 1 + temperature: 1 + tensor_parallel_size: 1 + top_k: -1 + top_p: 0.99 + trust_remote_code: false + val_override_config: + "n": 1 + temperature: 0.5 diff --git a/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/files/output.log b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..80a724dfe36a4e3971e5a46e9778c19c0f6d3c1a --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/files/output.log @@ -0,0 +1,2 @@ +{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 1, "unit": "it", "ip": "10.1.4.164", "pid": 212029, "uuid": "4f888a0cb3b2443cadcfcfb0f31adc07", "closed": false} +Start validation... diff --git a/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/files/requirements.txt @@ -0,0 +1,295 @@ +colorama==0.4.6 +psutil==7.0.0 +setproctitle==1.2.2 +ipython==8.37.0 +gitdb==4.0.12 +smmap==5.0.2 +pyzmq==27.0.0 +wcwidth==0.2.13 +antlr4-python3-runtime==4.9.3 +streamlit==1.46.1 +opentelemetry-proto==1.26.0 +tiktoken==0.9.0 +MarkupSafe==3.0.2 +openai==1.90.0 +jiter==0.10.0 +markdown-it-py==3.0.0 +rich-toolkit==0.14.8 +PyYAML==6.0.2 +pycountry==24.6.1 +nvidia-cusolver-cu12==11.6.1.9 +codetiming==1.4.0 +text-unidecode==1.3 +aiohttp-cors==0.8.1 +prometheus_client==0.22.1 +pandas==2.3.1 +wrapt==1.17.2 +tifffile==2025.5.10 +vllm==0.8.5.post1 +google-auth==2.40.3 +nvidia-curand-cu12==10.3.5.147 +networkx==3.4.2 +protobuf==4.25.8 +depyf==0.18.0 +altair==5.5.0 +wandb==0.21.0 +opentelemetry-sdk==1.26.0 +nvidia-cufft-cu12==11.2.1.3 +frozenlist==1.7.0 +ninja==1.11.1.4 +anyio==4.9.0 +rignore==0.6.2 +pydantic-extra-types==2.10.5 +tzdata==2025.2 +orjson==3.10.18 +smart_open==7.3.0.post1 +nvidia-cublas-cu12==12.4.5.8 +astor==0.8.1 +uvicorn==0.35.0 +sentry-sdk==2.32.0 +weasel==0.4.1 +opencensus==0.11.4 +certifi==2025.7.14 +nvidia-cuda-nvrtc-cu12==12.4.127 +cupy-cuda12x==13.5.1 +jedi==0.19.2 +GitPython==3.1.44 +xgrammar==0.1.18 +sniffio==1.3.1 +dill==0.3.8 +python-json-logger==3.3.0 +peft==0.16.0 +python-slugify==8.0.4 +watchfiles==1.1.0 +torchaudio==2.6.0 +omegaconf==2.3.0 +interegular==0.3.3 +torchmetrics==1.7.4 +tenacity==9.1.2 +async-timeout==5.0.1 +pybase64==1.4.1 +tqdm==4.67.1 +confection==0.1.5 +absl-py==2.3.1 +six==1.17.0 +colorful==0.5.7 +xxhash==3.5.0 +regex==2024.11.6 +nvidia-cuda-runtime-cu12==12.4.127 +pip==25.1 +annotated-types==0.7.0 +opentelemetry-exporter-otlp-proto-grpc==1.26.0 +salesforce-lavis==1.0.2 +fastapi-cloud-cli==0.1.4 +av==15.0.0 +nvidia-nvjitlink-cu12==12.4.127 +lightning-utilities==0.14.3 +pytz==2025.2 +webdataset==1.0.2 +cachetools==5.5.2 +nltk==3.9.1 +prometheus-fastapi-instrumentator==7.1.0 +pexpect==4.9.0 +jsonschema-specifications==2025.4.1 +Jinja2==3.1.6 +nvidia-cusparse-cu12==12.3.1.170 +nvidia-cudnn-cu12==9.1.0.70 +decord==0.6.0 +transformers==4.52.4 +dnspython==2.7.0 +joblib==1.5.1 +kaggle==1.7.4.5 +pyasn1_modules==0.4.2 +httpcore==1.0.9 +accelerate==1.8.1 +psutil==7.0.0 +pycocotools==2.0.10 +lm-format-enforcer==0.10.11 +liger_kernel==0.6.0 +googleapis-common-protos==1.70.0 +idna==3.10 +aiohappyeyeballs==2.6.1 +numba==0.61.2 +tornado==6.5.1 +opentelemetry-semantic-conventions==0.47b0 +torchvision==0.21.0 +exceptiongroup==1.3.0 +cfgv==3.4.0 +py-cpuinfo==9.0.0 +murmurhash==1.0.13 +pillow==11.3.0 +asttokens==3.0.0 +spacy==3.8.7 +blinker==1.9.0 +llguidance==0.7.30 +fastapi==0.116.1 +python-dateutil==2.9.0.post0 +prompt_toolkit==3.0.51 +opentelemetry-api==1.26.0 +referencing==0.36.2 +Pygments==2.19.2 +mpmath==1.3.0 +thinc==8.3.6 +multidict==6.6.3 +python-magic==0.4.27 +fairscale==0.4.4 +nodeenv==1.9.1 +mathruler==0.1.0 +identify==2.6.12 +multiprocess==0.70.16 +ftfy==6.3.1 +spacy-legacy==3.0.12 +rsa==4.9.1 +cymem==2.0.11 +flash-attn==2.7.1.post1 +typing-inspection==0.4.1 +nvidia-cufile-cu12==1.11.1.6 +filelock==3.18.0 +jsonschema==4.24.0 +language_data==1.3.0 +iopath==0.1.10 +cloudpickle==3.1.1 +pre_commit==4.2.0 +python-multipart==0.0.20 +gguf==0.17.1 +toml==0.10.2 +lazy_loader==0.4 +nvidia-cusparselt-cu12==0.6.2 +nvidia-nvtx-cu12==12.4.127 +opencv-python-headless==4.12.0.88 +rouge_score==0.1.2 +portalocker==3.2.0 +diskcache==5.6.3 +pycocoevalcap==1.2 +mdurl==0.1.2 +pure_eval==0.2.3 +ray==2.47.1 +langcodes==3.5.0 +distlib==0.3.9 +pydeck==0.9.1 +traitlets==5.14.3 +aiohttp==3.12.14 +decorator==5.2.1 +opentelemetry-exporter-otlp-proto-http==1.26.0 +verl==0.3.2.dev0 +fsspec==2025.3.0 +pydantic_core==2.33.2 +matplotlib-inline==0.1.7 +httpx==0.28.1 +fastrlock==0.8.3 +zipp==3.23.0 +aiosignal==1.4.0 +uvloop==0.21.0 +opendatasets==0.1.22 +python-dotenv==1.1.1 +attrs==25.3.0 +starlette==0.47.1 +distro==1.9.0 +pyasn1==0.6.1 +plotly==6.2.0 +opencensus-context==0.1.3 +datasets==4.0.0 +bleach==6.2.0 +hf-xet==1.1.5 +pyvers==0.1.0 +rich==14.0.0 +pylatexenc==2.10 +tensordict==0.9.1 +urllib3==2.5.0 +imageio==2.37.0 +platformdirs==4.3.8 +preshed==3.0.10 +catalogue==2.0.10 +h11==0.16.0 +outlines_core==0.1.26 +wasabi==1.1.3 +proto-plus==1.26.1 +scikit-image==0.25.2 +blis==1.3.0 +fastapi-cli==0.0.8 +opentelemetry-exporter-prometheus==0.56b0 +opentelemetry-exporter-otlp==1.26.0 +compressed-tensors==0.9.3 +pyarrow==20.0.0 +opentelemetry-semantic-conventions-ai==0.4.11 +partial-json-parser==0.2.1.1.post6 +parso==0.8.4 +importlib_metadata==8.0.0 +tokenizers==0.21.2 +opentelemetry-exporter-otlp-proto-common==1.26.0 +torchdata==0.11.0 +py-spy==0.4.0 +propcache==0.3.2 +braceexpand==0.1.7 +numpy==2.2.6 +cloudpathlib==0.21.1 +email_validator==2.2.0 +srsly==2.5.1 +webencodings==0.5.1 +airportsdata==20250706 +rpds-py==0.26.0 +outlines==0.1.11 +packaging==25.0 +yarl==1.20.1 +nvidia-cuda-cupti-cu12==12.4.127 +typing_extensions==4.14.1 +pydantic==2.11.7 +xformers==0.0.29.post2 +einops==0.8.1 +grpcio==1.73.1 +setuptools==78.1.1 +httptools==0.6.4 +nvidia-nccl-cu12==2.21.5 +Deprecated==1.2.18 +ptyprocess==0.7.0 +websockets==15.0.1 +torch==2.6.0 +scipy==1.15.3 +typer==0.16.0 +pytorch-lightning==2.5.2 +virtualenv==20.31.2 +huggingface-hub==0.33.4 +contexttimer==0.3.3 +marisa-trie==1.2.1 +shellingham==1.5.4 +charset-normalizer==3.4.2 +nest-asyncio==1.6.0 +executing==2.2.0 +stack-data==0.6.3 +msgpack==1.1.1 +blake3==1.0.5 +narwhals==1.47.0 +sentencepiece==0.2.0 +llvmlite==0.44.0 +click==8.2.1 +lark==1.2.2 +google-api-core==2.25.1 +sympy==1.13.1 +wheel==0.45.1 +safetensors==0.5.3 +mistral_common==1.8.0 +triton==3.2.0 +msgspec==0.19.0 +watchdog==6.0.0 +requests==2.32.4 +spacy-loggers==1.0.5 +timm==0.4.12 +qwen-vl-utils==0.0.11 +modelscope==1.28.0 +verl==0.3.2.dev0 +jaraco.text==3.12.1 +autocommand==2.2.2 +packaging==24.2 +jaraco.context==5.3.0 +tomli==2.0.1 +typeguard==4.3.0 +zipp==3.19.2 +backports.tarfile==1.2.0 +typing_extensions==4.12.2 +jaraco.collections==5.1.0 +inflect==7.3.1 +more-itertools==10.3.0 +jaraco.functools==4.0.1 +importlib_metadata==8.0.0 +platformdirs==4.2.2 +wheel==0.45.1 diff --git a/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..f3214ee37d6934631517bd6e45ec299a6ed93f5d --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/files/wandb-metadata.json @@ -0,0 +1,92 @@ +{ + "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35", + "python": "CPython 3.10.0", + "startedAt": "2025-07-20T10:52:18.641147Z", + "args": [ + "--node-ip-address=10.1.4.164", + "--node-manager-port=40847", + "--object-store-name=/tmp/ray/session_2025-07-20_18-50-33_612671_207255/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2025-07-20_18-50-33_612671_207255/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=56349", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=59408", + "--gcs-address=10.1.4.164:61296", + "--session-name=session_2025-07-20_18-50-33_612671_207255", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8265", + "--cluster-id=5f593853245e93a932b0d21951d11425b24c26d22610a6a16cb50798", + "--startup-token=64", + "--worker-launch-time-ms=1753008637130", + "--node-id=f402a8cae52cba65b7db537226d3a02bb2c3b13e17c85bb8ba53fb2d", + "--runtime-env-hash=-115784934", + "--enable-resource-isolation=false" + ], + "program": "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/PorUna-byte/PAR.git", + "commit": "b8caf406aa1699c788f0ca6e44a1769452c317db" + }, + "email": "gia0603yucca@gmail.com", + "root": "/nas/shared/kilab/wangyujia/EasyR1-new/examples", + "host": "dsw-266702-557cd69888-g24kv", + "executable": "/root/miniconda3/envs/easyr1-new/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 64, + "gpu": "NVIDIA A800-SXM4-80GB", + "gpu_count": 8, + "disk": { + "/": { + "total": "1623302262784", + "used": "1179701248" + } + }, + "memory": { + "total": "549755813888" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-dd05c780-0a19-17fd-c584-d09f4318f680" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-2782427d-b86d-e7c7-247f-edefe39eec71" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-52755b3d-d761-c869-0d08-300873ba5f02" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-90465488-5319-9508-9e8a-b3b12918be35" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a" + } + ], + "cudaVersion": "12.1", + "writerId": "nmbvcuty6clhkr83vjwadihsu9kpdhoi" +} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/files/wandb-summary.json b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..97d29b9d12b49eb80fef4dfe6cc237926a61eadd --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":86},"_runtime":86} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..267e360925561c316cc43ab6fa7009424ffaca81 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/logs/debug-internal.log @@ -0,0 +1,13 @@ +{"time":"2025-07-20T18:52:19.072240465+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-07-20T18:52:21.880833557+08:00","level":"INFO","msg":"stream: created new stream","id":"hiby21ed"} +{"time":"2025-07-20T18:52:21.880900115+08:00","level":"INFO","msg":"stream: started","id":"hiby21ed"} +{"time":"2025-07-20T18:52:21.8809482+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"hiby21ed"} +{"time":"2025-07-20T18:52:21.880962341+08:00","level":"INFO","msg":"handler: started","stream_id":"hiby21ed"} +{"time":"2025-07-20T18:52:21.881001525+08:00","level":"INFO","msg":"sender: started","stream_id":"hiby21ed"} +{"time":"2025-07-20T18:53:51.736418092+08:00","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"updating run metadata","runtime_seconds":1.018644786}],"total_operations":1}} +{"time":"2025-07-20T18:53:55.214428055+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-07-20T18:54:06.246943554+08:00","level":"INFO","msg":"stream: closing","id":"hiby21ed"} +{"time":"2025-07-20T18:54:06.247836119+08:00","level":"INFO","msg":"handler: closed","stream_id":"hiby21ed"} +{"time":"2025-07-20T18:54:06.247850793+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"hiby21ed"} +{"time":"2025-07-20T18:54:06.24786321+08:00","level":"INFO","msg":"sender: closed","stream_id":"hiby21ed"} +{"time":"2025-07-20T18:54:06.255074667+08:00","level":"INFO","msg":"stream: closed","id":"hiby21ed"} diff --git a/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..e4ca65344d880c9f07206e4af4eb0752336deaa9 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/logs/debug.log @@ -0,0 +1,28 @@ +2025-07-20 18:52:18,853 INFO MainThread:212029 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-07-20 18:52:18,853 INFO MainThread:212029 [wandb_setup.py:_flush():80] Configure stats pid to 212029 +2025-07-20 18:52:18,853 INFO MainThread:212029 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-07-20 18:52:18,853 INFO MainThread:212029 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings +2025-07-20 18:52:18,853 INFO MainThread:212029 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-07-20 18:52:18,853 INFO MainThread:212029 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/logs/debug.log +2025-07-20 18:52:18,853 INFO MainThread:212029 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/logs/debug-internal.log +2025-07-20 18:52:18,853 INFO MainThread:212029 [wandb_init.py:init():830] calling init triggers +2025-07-20 18:52:18,853 INFO MainThread:212029 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}} +2025-07-20 18:52:18,853 INFO MainThread:212029 [wandb_init.py:init():871] starting backend +2025-07-20 18:52:19,060 INFO MainThread:212029 [wandb_init.py:init():874] sending inform_init request +2025-07-20 18:52:19,061 INFO MainThread:212029 [wandb_init.py:init():882] backend started and connected +2025-07-20 18:52:19,068 INFO MainThread:212029 [wandb_init.py:init():953] updated telemetry +2025-07-20 18:52:19,145 INFO MainThread:212029 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout +2025-07-20 18:52:24,026 INFO MainThread:212029 [wandb_init.py:init():1029] starting run threads in backend +2025-07-20 18:52:24,193 INFO MainThread:212029 [wandb_run.py:_console_start():2458] atexit reg +2025-07-20 18:52:24,194 INFO MainThread:212029 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-07-20 18:52:24,198 INFO MainThread:212029 [wandb_run.py:_redirect():2375] Wrapping output streams. +2025-07-20 18:52:24,198 INFO MainThread:212029 [wandb_run.py:_redirect():2398] Redirects installed. +2025-07-20 18:52:24,200 INFO MainThread:212029 [wandb_init.py:init():1075] run started, returning control to user process +2025-07-20 18:53:50,710 INFO MainThread:212029 [wandb_run.py:_finish():2224] finishing run gia0603yucca/easy_r1/hiby21ed +2025-07-20 18:53:50,717 INFO MainThread:212029 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0 +2025-07-20 18:53:50,734 INFO MainThread:212029 [wandb_run.py:_restore():2405] restore +2025-07-20 18:53:50,734 INFO MainThread:212029 [wandb_run.py:_restore():2411] restore done +2025-07-20 18:54:06,232 INFO MainThread:212029 [wandb_run.py:_footer_history_summary_info():3903] rendering history +2025-07-20 18:54:06,233 INFO MainThread:212029 [wandb_run.py:_footer_history_summary_info():3935] rendering summary +2025-07-20 18:54:06,239 INFO MainThread:212029 [wandb_run.py:_footer_sync_info():3864] logging synced files diff --git a/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/run-hiby21ed.wandb b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/run-hiby21ed.wandb new file mode 100644 index 0000000000000000000000000000000000000000..3110c1fc373205f16bd5e7932384f1ba368c2fa9 Binary files /dev/null and b/EasyR1-new/examples/wandb/run-20250720_185218-hiby21ed/run-hiby21ed.wandb differ diff --git a/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/files/config.yaml b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..14ee11a0cb292dad8d69ff327102b77cba2ec272 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/files/config.yaml @@ -0,0 +1,322 @@ +_wandb: + value: + cli_version: 0.21.0 + e: + t6vyfkz6zns915ezpi7icvj23e02z4q0: + args: + - --node-ip-address=10.1.4.164 + - --node-manager-port=38935 + - --object-store-name=/tmp/ray/session_2025-07-20_19-06-49_559805_218438/sockets/plasma_store + - --raylet-name=/tmp/ray/session_2025-07-20_19-06-49_559805_218438/sockets/raylet + - --redis-address=None + - --metrics-agent-port=44894 + - --logging-rotate-bytes=536870912 + - --logging-rotate-backup-count=5 + - --runtime-env-agent-port=41153 + - --gcs-address=10.1.4.164:48225 + - --session-name=session_2025-07-20_19-06-49_559805_218438 + - --temp-dir=/tmp/ray + - --webui=127.0.0.1:8265 + - --cluster-id=cf15bbb8c672ce3f59302373bb0e3c555a73db416b5edbf38e5c645f + - --startup-token=64 + - --worker-launch-time-ms=1753009612034 + - --node-id=c982adee0b3c4e6317899ac6dd65aa17e75b59bb373b3d128c71698a + - --runtime-env-hash=-115784934 + - --enable-resource-isolation=false + cpu_count: 64 + cpu_count_logical: 64 + cudaVersion: "12.1" + disk: + /: + total: "1623302262784" + used: "1179754496" + email: gia0603yucca@gmail.com + executable: /root/miniconda3/envs/easyr1-new/bin/python3 + git: + commit: b8caf406aa1699c788f0ca6e44a1769452c317db + remote: https://github.com/PorUna-byte/PAR.git + gpu: NVIDIA A800-SXM4-80GB + gpu_count: 8 + gpu_nvidia: + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-dd05c780-0a19-17fd-c584-d09f4318f680 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-2782427d-b86d-e7c7-247f-edefe39eec71 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-52755b3d-d761-c869-0d08-300873ba5f02 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-5927b565-1506-34c6-eb8d-d657ed5f3558 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-90465488-5319-9508-9e8a-b3b12918be35 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-855bcb80-e48f-cdd2-f62d-35138b69089a + host: dsw-266702-557cd69888-g24kv + memory: + total: "549755813888" + os: Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35 + program: /root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py + python: CPython 3.10.0 + root: /nas/shared/kilab/wangyujia/EasyR1-new/examples + startedAt: "2025-07-20T11:08:29.360965Z" + writerId: t6vyfkz6zns915ezpi7icvj23e02z4q0 + m: [] + python_version: 3.10.0 + t: + "1": + - 1 + - 9 + - 11 + - 30 + - 33 + - 41 + - 49 + - 51 + - 63 + - 71 + - 95 + - 98 + - 103 + - 105 + "2": + - 1 + - 9 + - 11 + - 30 + - 33 + - 41 + - 49 + - 51 + - 63 + - 71 + - 95 + - 98 + - 103 + - 105 + "3": + - 2 + - 13 + - 16 + "4": 3.10.0 + "5": 0.21.0 + "6": 4.52.4 + "12": 0.21.0 + "13": linux-x86_64 +algorithm: + value: + adv_estimator: grpo + disable_kl: false + filter_high: 0.99 + filter_key: overall + filter_low: 0.01 + gamma: 1 + kl_coef: 0.01 + kl_horizon: 10000 + kl_penalty: low_var_kl + kl_target: 0.1 + kl_type: fixed + lam: 1 + online_filtering: false + use_kl_loss: true +data: + value: + answer_key: answer + filter_overlong_prompts: true + filter_overlong_prompts_workers: 16 + format_prompt: /nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja + image_dir: null + image_key: images + max_pixels: 4194304 + max_prompt_length: 4096 + max_response_length: 16384 + min_pixels: 262144 + mini_rollout_batch_size: null + override_chat_template: null + prompt_key: question + protein_key: protein + rollout_batch_size: 128 + seed: 1 + shuffle: true + train_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl + val_batch_size: 256 + val_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl + video_fps: 2 + video_key: videos +trainer: + value: + critic_warmup: 0 + experiment_name: qwen2.5_7b_bio_06182042 + load_checkpoint_path: null + logger: + - console + - wandb + max_steps: null + max_try_make_batch: 20 + n_gpus_per_node: 8 + nnodes: 1 + project_name: easy_r1 + save_checkpoint_path: /oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042 + save_freq: 5 + save_limit: 3 + save_model_only: false + total_epochs: 1 + val_before_train: true + val_freq: 5 + val_generations_to_log: 3 + val_only: false +worker: + value: + actor: + clip_ratio_dual: 3 + clip_ratio_high: 0.3 + clip_ratio_low: 0.2 + disable_kl: false + fsdp: + enable_cpu_offload: false + enable_full_shard: true + enable_rank0_init: true + fsdp_size: -1 + mp_buffer_dtype: fp32 + mp_param_dtype: bf16 + mp_reduce_dtype: fp32 + torch_dtype: null + use_orig_params: false + global_batch_size: 64 + global_batch_size_per_device: -1 + kl_coef: 0.01 + kl_penalty: low_var_kl + loss_avg_mode: token + max_grad_norm: 1 + micro_batch_size_per_device_for_experience: 16 + micro_batch_size_per_device_for_update: 2 + model: + enable_gradient_checkpointing: true + freeze_vision_tower: false + model_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model + tokenizer_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model + trust_remote_code: false + offload: + offload_optimizer: true + offload_params: true + optim: + betas: + - 0.9 + - 0.999 + lr: 1e-06 + lr_warmup_ratio: 0 + lr_warmup_steps: null + min_lr_ratio: null + strategy: adamw + training_steps: 1 + warmup_style: constant + weight_decay: 0.01 + padding_free: true + ppo_epochs: 1 + strategy: fsdp + ulysses_size: 1 + use_kl_loss: true + use_torch_compile: true + critic: + cliprange_value: 0.5 + fsdp: + enable_cpu_offload: false + enable_full_shard: true + enable_rank0_init: true + fsdp_size: -1 + mp_buffer_dtype: fp32 + mp_param_dtype: bf16 + mp_reduce_dtype: fp32 + torch_dtype: null + use_orig_params: false + global_batch_size: 256 + global_batch_size_per_device: -1 + loss_avg_mode: token + max_grad_norm: 1 + micro_batch_size_per_device_for_experience: 16 + micro_batch_size_per_device_for_update: 4 + model: + enable_gradient_checkpointing: true + freeze_vision_tower: false + model_path: null + tokenizer_path: null + trust_remote_code: true + offload: + offload_optimizer: false + offload_params: false + optim: + betas: + - 0.9 + - 0.999 + lr: 1e-06 + lr_warmup_ratio: 0 + lr_warmup_steps: null + min_lr_ratio: null + strategy: adamw + training_steps: 1 + warmup_style: constant + weight_decay: 0.01 + padding_free: false + ppo_epochs: 1 + strategy: fsdp + ulysses_size: 1 + hybrid_engine: true + ref: + fsdp: + enable_cpu_offload: true + enable_full_shard: true + enable_rank0_init: true + fsdp_size: -1 + mp_buffer_dtype: fp32 + mp_param_dtype: bf16 + mp_reduce_dtype: fp32 + torch_dtype: null + use_orig_params: false + micro_batch_size_per_device_for_experience: 16 + offload: + offload_optimizer: false + offload_params: false + padding_free: true + strategy: fsdp + ulysses_size: 1 + use_torch_compile: true + reward: + num_cpus: 1 + reward_function: /nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py + reward_function_name: main + reward_type: batch + skip_special_tokens: true + rollout: + disable_log_stats: true + disable_tqdm: false + dtype: bf16 + enable_chunked_prefill: false + enforce_eager: false + gpu_memory_utilization: 0.6 + ignore_eos: false + limit_images: 0 + max_model_len: null + max_num_batched_tokens: 24576 + "n": 5 + name: vllm + prompt_length: 4096 + response_length: 16384 + seed: 1 + temperature: 1 + tensor_parallel_size: 1 + top_k: -1 + top_p: 0.99 + trust_remote_code: false + val_override_config: + "n": 1 + temperature: 0.5 diff --git a/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/files/output.log b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..0c4d99df5fdcafde7bb5b46a0a00f3a82b5b8160 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/files/output.log @@ -0,0 +1,2 @@ +{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 1, "unit": "it", "ip": "10.1.4.164", "pid": 223200, "uuid": "ef2384a07bb14e09b722b1dd2e623ad1", "closed": false} +Start validation... diff --git a/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/files/requirements.txt @@ -0,0 +1,295 @@ +colorama==0.4.6 +psutil==7.0.0 +setproctitle==1.2.2 +ipython==8.37.0 +gitdb==4.0.12 +smmap==5.0.2 +pyzmq==27.0.0 +wcwidth==0.2.13 +antlr4-python3-runtime==4.9.3 +streamlit==1.46.1 +opentelemetry-proto==1.26.0 +tiktoken==0.9.0 +MarkupSafe==3.0.2 +openai==1.90.0 +jiter==0.10.0 +markdown-it-py==3.0.0 +rich-toolkit==0.14.8 +PyYAML==6.0.2 +pycountry==24.6.1 +nvidia-cusolver-cu12==11.6.1.9 +codetiming==1.4.0 +text-unidecode==1.3 +aiohttp-cors==0.8.1 +prometheus_client==0.22.1 +pandas==2.3.1 +wrapt==1.17.2 +tifffile==2025.5.10 +vllm==0.8.5.post1 +google-auth==2.40.3 +nvidia-curand-cu12==10.3.5.147 +networkx==3.4.2 +protobuf==4.25.8 +depyf==0.18.0 +altair==5.5.0 +wandb==0.21.0 +opentelemetry-sdk==1.26.0 +nvidia-cufft-cu12==11.2.1.3 +frozenlist==1.7.0 +ninja==1.11.1.4 +anyio==4.9.0 +rignore==0.6.2 +pydantic-extra-types==2.10.5 +tzdata==2025.2 +orjson==3.10.18 +smart_open==7.3.0.post1 +nvidia-cublas-cu12==12.4.5.8 +astor==0.8.1 +uvicorn==0.35.0 +sentry-sdk==2.32.0 +weasel==0.4.1 +opencensus==0.11.4 +certifi==2025.7.14 +nvidia-cuda-nvrtc-cu12==12.4.127 +cupy-cuda12x==13.5.1 +jedi==0.19.2 +GitPython==3.1.44 +xgrammar==0.1.18 +sniffio==1.3.1 +dill==0.3.8 +python-json-logger==3.3.0 +peft==0.16.0 +python-slugify==8.0.4 +watchfiles==1.1.0 +torchaudio==2.6.0 +omegaconf==2.3.0 +interegular==0.3.3 +torchmetrics==1.7.4 +tenacity==9.1.2 +async-timeout==5.0.1 +pybase64==1.4.1 +tqdm==4.67.1 +confection==0.1.5 +absl-py==2.3.1 +six==1.17.0 +colorful==0.5.7 +xxhash==3.5.0 +regex==2024.11.6 +nvidia-cuda-runtime-cu12==12.4.127 +pip==25.1 +annotated-types==0.7.0 +opentelemetry-exporter-otlp-proto-grpc==1.26.0 +salesforce-lavis==1.0.2 +fastapi-cloud-cli==0.1.4 +av==15.0.0 +nvidia-nvjitlink-cu12==12.4.127 +lightning-utilities==0.14.3 +pytz==2025.2 +webdataset==1.0.2 +cachetools==5.5.2 +nltk==3.9.1 +prometheus-fastapi-instrumentator==7.1.0 +pexpect==4.9.0 +jsonschema-specifications==2025.4.1 +Jinja2==3.1.6 +nvidia-cusparse-cu12==12.3.1.170 +nvidia-cudnn-cu12==9.1.0.70 +decord==0.6.0 +transformers==4.52.4 +dnspython==2.7.0 +joblib==1.5.1 +kaggle==1.7.4.5 +pyasn1_modules==0.4.2 +httpcore==1.0.9 +accelerate==1.8.1 +psutil==7.0.0 +pycocotools==2.0.10 +lm-format-enforcer==0.10.11 +liger_kernel==0.6.0 +googleapis-common-protos==1.70.0 +idna==3.10 +aiohappyeyeballs==2.6.1 +numba==0.61.2 +tornado==6.5.1 +opentelemetry-semantic-conventions==0.47b0 +torchvision==0.21.0 +exceptiongroup==1.3.0 +cfgv==3.4.0 +py-cpuinfo==9.0.0 +murmurhash==1.0.13 +pillow==11.3.0 +asttokens==3.0.0 +spacy==3.8.7 +blinker==1.9.0 +llguidance==0.7.30 +fastapi==0.116.1 +python-dateutil==2.9.0.post0 +prompt_toolkit==3.0.51 +opentelemetry-api==1.26.0 +referencing==0.36.2 +Pygments==2.19.2 +mpmath==1.3.0 +thinc==8.3.6 +multidict==6.6.3 +python-magic==0.4.27 +fairscale==0.4.4 +nodeenv==1.9.1 +mathruler==0.1.0 +identify==2.6.12 +multiprocess==0.70.16 +ftfy==6.3.1 +spacy-legacy==3.0.12 +rsa==4.9.1 +cymem==2.0.11 +flash-attn==2.7.1.post1 +typing-inspection==0.4.1 +nvidia-cufile-cu12==1.11.1.6 +filelock==3.18.0 +jsonschema==4.24.0 +language_data==1.3.0 +iopath==0.1.10 +cloudpickle==3.1.1 +pre_commit==4.2.0 +python-multipart==0.0.20 +gguf==0.17.1 +toml==0.10.2 +lazy_loader==0.4 +nvidia-cusparselt-cu12==0.6.2 +nvidia-nvtx-cu12==12.4.127 +opencv-python-headless==4.12.0.88 +rouge_score==0.1.2 +portalocker==3.2.0 +diskcache==5.6.3 +pycocoevalcap==1.2 +mdurl==0.1.2 +pure_eval==0.2.3 +ray==2.47.1 +langcodes==3.5.0 +distlib==0.3.9 +pydeck==0.9.1 +traitlets==5.14.3 +aiohttp==3.12.14 +decorator==5.2.1 +opentelemetry-exporter-otlp-proto-http==1.26.0 +verl==0.3.2.dev0 +fsspec==2025.3.0 +pydantic_core==2.33.2 +matplotlib-inline==0.1.7 +httpx==0.28.1 +fastrlock==0.8.3 +zipp==3.23.0 +aiosignal==1.4.0 +uvloop==0.21.0 +opendatasets==0.1.22 +python-dotenv==1.1.1 +attrs==25.3.0 +starlette==0.47.1 +distro==1.9.0 +pyasn1==0.6.1 +plotly==6.2.0 +opencensus-context==0.1.3 +datasets==4.0.0 +bleach==6.2.0 +hf-xet==1.1.5 +pyvers==0.1.0 +rich==14.0.0 +pylatexenc==2.10 +tensordict==0.9.1 +urllib3==2.5.0 +imageio==2.37.0 +platformdirs==4.3.8 +preshed==3.0.10 +catalogue==2.0.10 +h11==0.16.0 +outlines_core==0.1.26 +wasabi==1.1.3 +proto-plus==1.26.1 +scikit-image==0.25.2 +blis==1.3.0 +fastapi-cli==0.0.8 +opentelemetry-exporter-prometheus==0.56b0 +opentelemetry-exporter-otlp==1.26.0 +compressed-tensors==0.9.3 +pyarrow==20.0.0 +opentelemetry-semantic-conventions-ai==0.4.11 +partial-json-parser==0.2.1.1.post6 +parso==0.8.4 +importlib_metadata==8.0.0 +tokenizers==0.21.2 +opentelemetry-exporter-otlp-proto-common==1.26.0 +torchdata==0.11.0 +py-spy==0.4.0 +propcache==0.3.2 +braceexpand==0.1.7 +numpy==2.2.6 +cloudpathlib==0.21.1 +email_validator==2.2.0 +srsly==2.5.1 +webencodings==0.5.1 +airportsdata==20250706 +rpds-py==0.26.0 +outlines==0.1.11 +packaging==25.0 +yarl==1.20.1 +nvidia-cuda-cupti-cu12==12.4.127 +typing_extensions==4.14.1 +pydantic==2.11.7 +xformers==0.0.29.post2 +einops==0.8.1 +grpcio==1.73.1 +setuptools==78.1.1 +httptools==0.6.4 +nvidia-nccl-cu12==2.21.5 +Deprecated==1.2.18 +ptyprocess==0.7.0 +websockets==15.0.1 +torch==2.6.0 +scipy==1.15.3 +typer==0.16.0 +pytorch-lightning==2.5.2 +virtualenv==20.31.2 +huggingface-hub==0.33.4 +contexttimer==0.3.3 +marisa-trie==1.2.1 +shellingham==1.5.4 +charset-normalizer==3.4.2 +nest-asyncio==1.6.0 +executing==2.2.0 +stack-data==0.6.3 +msgpack==1.1.1 +blake3==1.0.5 +narwhals==1.47.0 +sentencepiece==0.2.0 +llvmlite==0.44.0 +click==8.2.1 +lark==1.2.2 +google-api-core==2.25.1 +sympy==1.13.1 +wheel==0.45.1 +safetensors==0.5.3 +mistral_common==1.8.0 +triton==3.2.0 +msgspec==0.19.0 +watchdog==6.0.0 +requests==2.32.4 +spacy-loggers==1.0.5 +timm==0.4.12 +qwen-vl-utils==0.0.11 +modelscope==1.28.0 +verl==0.3.2.dev0 +jaraco.text==3.12.1 +autocommand==2.2.2 +packaging==24.2 +jaraco.context==5.3.0 +tomli==2.0.1 +typeguard==4.3.0 +zipp==3.19.2 +backports.tarfile==1.2.0 +typing_extensions==4.12.2 +jaraco.collections==5.1.0 +inflect==7.3.1 +more-itertools==10.3.0 +jaraco.functools==4.0.1 +importlib_metadata==8.0.0 +platformdirs==4.2.2 +wheel==0.45.1 diff --git a/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..1505160ce482dc9c6444d5deb1fd3dd7a7b326b6 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/files/wandb-metadata.json @@ -0,0 +1,92 @@ +{ + "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35", + "python": "CPython 3.10.0", + "startedAt": "2025-07-20T11:08:29.360965Z", + "args": [ + "--node-ip-address=10.1.4.164", + "--node-manager-port=38935", + "--object-store-name=/tmp/ray/session_2025-07-20_19-06-49_559805_218438/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2025-07-20_19-06-49_559805_218438/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=44894", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=41153", + "--gcs-address=10.1.4.164:48225", + "--session-name=session_2025-07-20_19-06-49_559805_218438", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8265", + "--cluster-id=cf15bbb8c672ce3f59302373bb0e3c555a73db416b5edbf38e5c645f", + "--startup-token=64", + "--worker-launch-time-ms=1753009612034", + "--node-id=c982adee0b3c4e6317899ac6dd65aa17e75b59bb373b3d128c71698a", + "--runtime-env-hash=-115784934", + "--enable-resource-isolation=false" + ], + "program": "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/PorUna-byte/PAR.git", + "commit": "b8caf406aa1699c788f0ca6e44a1769452c317db" + }, + "email": "gia0603yucca@gmail.com", + "root": "/nas/shared/kilab/wangyujia/EasyR1-new/examples", + "host": "dsw-266702-557cd69888-g24kv", + "executable": "/root/miniconda3/envs/easyr1-new/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 64, + "gpu": "NVIDIA A800-SXM4-80GB", + "gpu_count": 8, + "disk": { + "/": { + "total": "1623302262784", + "used": "1179754496" + } + }, + "memory": { + "total": "549755813888" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-dd05c780-0a19-17fd-c584-d09f4318f680" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-2782427d-b86d-e7c7-247f-edefe39eec71" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-52755b3d-d761-c869-0d08-300873ba5f02" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-90465488-5319-9508-9e8a-b3b12918be35" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a" + } + ], + "cudaVersion": "12.1", + "writerId": "t6vyfkz6zns915ezpi7icvj23e02z4q0" +} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/files/wandb-summary.json b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..97d29b9d12b49eb80fef4dfe6cc237926a61eadd --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":86},"_runtime":86} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..eea55cfe26731ac5b22b4d89ce313358f3d83848 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/logs/debug-internal.log @@ -0,0 +1,13 @@ +{"time":"2025-07-20T19:08:29.724190471+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-07-20T19:08:32.806897652+08:00","level":"INFO","msg":"stream: created new stream","id":"rjowgpxz"} +{"time":"2025-07-20T19:08:32.816321489+08:00","level":"INFO","msg":"sender: started","stream_id":"rjowgpxz"} +{"time":"2025-07-20T19:08:32.816336671+08:00","level":"INFO","msg":"stream: started","id":"rjowgpxz"} +{"time":"2025-07-20T19:08:32.816359861+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"rjowgpxz"} +{"time":"2025-07-20T19:08:32.816322496+08:00","level":"INFO","msg":"handler: started","stream_id":"rjowgpxz"} +{"time":"2025-07-20T19:10:01.737560141+08:00","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"uploading wandb-metadata.json","runtime_seconds":86.389395114},{"desc":"uploading requirements.txt","runtime_seconds":86.151408678}],"total_operations":2}} +{"time":"2025-07-20T19:10:45.930239641+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-07-20T19:10:52.409320897+08:00","level":"INFO","msg":"stream: closing","id":"rjowgpxz"} +{"time":"2025-07-20T19:10:52.409346873+08:00","level":"INFO","msg":"handler: closed","stream_id":"rjowgpxz"} +{"time":"2025-07-20T19:10:52.409355912+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"rjowgpxz"} +{"time":"2025-07-20T19:10:52.409369828+08:00","level":"INFO","msg":"sender: closed","stream_id":"rjowgpxz"} +{"time":"2025-07-20T19:10:52.417363691+08:00","level":"INFO","msg":"stream: closed","id":"rjowgpxz"} diff --git a/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..900deb9f09c558d90f460fe18314e902afb8c503 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/logs/debug.log @@ -0,0 +1,28 @@ +2025-07-20 19:08:29,503 INFO MainThread:223200 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-07-20 19:08:29,503 INFO MainThread:223200 [wandb_setup.py:_flush():80] Configure stats pid to 223200 +2025-07-20 19:08:29,503 INFO MainThread:223200 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-07-20 19:08:29,503 INFO MainThread:223200 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings +2025-07-20 19:08:29,503 INFO MainThread:223200 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-07-20 19:08:29,503 INFO MainThread:223200 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/logs/debug.log +2025-07-20 19:08:29,503 INFO MainThread:223200 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/logs/debug-internal.log +2025-07-20 19:08:29,503 INFO MainThread:223200 [wandb_init.py:init():830] calling init triggers +2025-07-20 19:08:29,503 INFO MainThread:223200 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}} +2025-07-20 19:08:29,503 INFO MainThread:223200 [wandb_init.py:init():871] starting backend +2025-07-20 19:08:29,711 INFO MainThread:223200 [wandb_init.py:init():874] sending inform_init request +2025-07-20 19:08:29,713 INFO MainThread:223200 [wandb_init.py:init():882] backend started and connected +2025-07-20 19:08:29,719 INFO MainThread:223200 [wandb_init.py:init():953] updated telemetry +2025-07-20 19:08:29,812 INFO MainThread:223200 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout +2025-07-20 19:08:34,079 INFO MainThread:223200 [wandb_init.py:init():1029] starting run threads in backend +2025-07-20 19:08:34,261 INFO MainThread:223200 [wandb_run.py:_console_start():2458] atexit reg +2025-07-20 19:08:34,261 INFO MainThread:223200 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-07-20 19:08:34,302 INFO MainThread:223200 [wandb_run.py:_redirect():2375] Wrapping output streams. +2025-07-20 19:08:34,302 INFO MainThread:223200 [wandb_run.py:_redirect():2398] Redirects installed. +2025-07-20 19:08:34,306 INFO MainThread:223200 [wandb_init.py:init():1075] run started, returning control to user process +2025-07-20 19:10:00,647 INFO MainThread:223200 [wandb_run.py:_finish():2224] finishing run gia0603yucca/easy_r1/rjowgpxz +2025-07-20 19:10:00,659 INFO MainThread:223200 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0 +2025-07-20 19:10:00,735 INFO MainThread:223200 [wandb_run.py:_restore():2405] restore +2025-07-20 19:10:00,735 INFO MainThread:223200 [wandb_run.py:_restore():2411] restore done +2025-07-20 19:10:52,381 INFO MainThread:223200 [wandb_run.py:_footer_history_summary_info():3903] rendering history +2025-07-20 19:10:52,383 INFO MainThread:223200 [wandb_run.py:_footer_history_summary_info():3935] rendering summary +2025-07-20 19:10:52,383 INFO MainThread:223200 [wandb_run.py:_footer_sync_info():3864] logging synced files diff --git a/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/run-rjowgpxz.wandb b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/run-rjowgpxz.wandb new file mode 100644 index 0000000000000000000000000000000000000000..cecbffade790ebac3e17e1d9f0bba9ee401f80b5 Binary files /dev/null and b/EasyR1-new/examples/wandb/run-20250720_190829-rjowgpxz/run-rjowgpxz.wandb differ diff --git a/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/files/config.yaml b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9e14feab9091f8abf1d289eb42a01549d5d0779b --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/files/config.yaml @@ -0,0 +1,322 @@ +_wandb: + value: + cli_version: 0.21.0 + e: + bmyd1r1vwx9pfo07osl5tt1bsnz5nor2: + args: + - --node-ip-address=10.1.4.164 + - --node-manager-port=37981 + - --object-store-name=/tmp/ray/session_2025-07-20_19-12-49_683982_227299/sockets/plasma_store + - --raylet-name=/tmp/ray/session_2025-07-20_19-12-49_683982_227299/sockets/raylet + - --redis-address=None + - --metrics-agent-port=60823 + - --logging-rotate-bytes=536870912 + - --logging-rotate-backup-count=5 + - --runtime-env-agent-port=60965 + - --gcs-address=10.1.4.164:60942 + - --session-name=session_2025-07-20_19-12-49_683982_227299 + - --temp-dir=/tmp/ray + - --webui=127.0.0.1:8265 + - --cluster-id=edbd7ce8cd4b448a3a0fe384bbef5900718e1aba5abd1864af5145b8 + - --startup-token=64 + - --worker-launch-time-ms=1753009973102 + - --node-id=c7c2fc8754cd7e338d6c3cc50f0b96b730630420a2b67812b4dbe2f2 + - --runtime-env-hash=-115784934 + - --enable-resource-isolation=false + cpu_count: 64 + cpu_count_logical: 64 + cudaVersion: "12.1" + disk: + /: + total: "1623302262784" + used: "1179848704" + email: gia0603yucca@gmail.com + executable: /root/miniconda3/envs/easyr1-new/bin/python3 + git: + commit: b8caf406aa1699c788f0ca6e44a1769452c317db + remote: https://github.com/PorUna-byte/PAR.git + gpu: NVIDIA A800-SXM4-80GB + gpu_count: 8 + gpu_nvidia: + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-dd05c780-0a19-17fd-c584-d09f4318f680 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-2782427d-b86d-e7c7-247f-edefe39eec71 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-52755b3d-d761-c869-0d08-300873ba5f02 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-5927b565-1506-34c6-eb8d-d657ed5f3558 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-90465488-5319-9508-9e8a-b3b12918be35 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-855bcb80-e48f-cdd2-f62d-35138b69089a + host: dsw-266702-557cd69888-g24kv + memory: + total: "549755813888" + os: Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35 + program: /root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py + python: CPython 3.10.0 + root: /nas/shared/kilab/wangyujia/EasyR1-new/examples + startedAt: "2025-07-20T11:14:42.544523Z" + writerId: bmyd1r1vwx9pfo07osl5tt1bsnz5nor2 + m: [] + python_version: 3.10.0 + t: + "1": + - 1 + - 9 + - 11 + - 30 + - 33 + - 41 + - 49 + - 51 + - 63 + - 71 + - 95 + - 98 + - 103 + - 105 + "2": + - 1 + - 9 + - 11 + - 30 + - 33 + - 41 + - 49 + - 51 + - 63 + - 71 + - 95 + - 98 + - 103 + - 105 + "3": + - 2 + - 13 + - 16 + "4": 3.10.0 + "5": 0.21.0 + "6": 4.52.4 + "12": 0.21.0 + "13": linux-x86_64 +algorithm: + value: + adv_estimator: grpo + disable_kl: false + filter_high: 0.99 + filter_key: overall + filter_low: 0.01 + gamma: 1 + kl_coef: 0.01 + kl_horizon: 10000 + kl_penalty: low_var_kl + kl_target: 0.1 + kl_type: fixed + lam: 1 + online_filtering: false + use_kl_loss: true +data: + value: + answer_key: answer + filter_overlong_prompts: true + filter_overlong_prompts_workers: 16 + format_prompt: /nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja + image_dir: null + image_key: images + max_pixels: 4194304 + max_prompt_length: 4096 + max_response_length: 16384 + min_pixels: 262144 + mini_rollout_batch_size: null + override_chat_template: null + prompt_key: question + protein_key: protein + rollout_batch_size: 128 + seed: 1 + shuffle: true + train_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl + val_batch_size: 256 + val_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl + video_fps: 2 + video_key: videos +trainer: + value: + critic_warmup: 0 + experiment_name: qwen2.5_7b_bio_06182042 + load_checkpoint_path: null + logger: + - console + - wandb + max_steps: null + max_try_make_batch: 20 + n_gpus_per_node: 8 + nnodes: 1 + project_name: easy_r1 + save_checkpoint_path: /oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042 + save_freq: 5 + save_limit: 3 + save_model_only: false + total_epochs: 1 + val_before_train: true + val_freq: 5 + val_generations_to_log: 3 + val_only: false +worker: + value: + actor: + clip_ratio_dual: 3 + clip_ratio_high: 0.3 + clip_ratio_low: 0.2 + disable_kl: false + fsdp: + enable_cpu_offload: false + enable_full_shard: true + enable_rank0_init: true + fsdp_size: -1 + mp_buffer_dtype: fp32 + mp_param_dtype: bf16 + mp_reduce_dtype: fp32 + torch_dtype: null + use_orig_params: false + global_batch_size: 64 + global_batch_size_per_device: -1 + kl_coef: 0.01 + kl_penalty: low_var_kl + loss_avg_mode: token + max_grad_norm: 1 + micro_batch_size_per_device_for_experience: 16 + micro_batch_size_per_device_for_update: 2 + model: + enable_gradient_checkpointing: true + freeze_vision_tower: false + model_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model + tokenizer_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model + trust_remote_code: false + offload: + offload_optimizer: true + offload_params: true + optim: + betas: + - 0.9 + - 0.999 + lr: 1e-06 + lr_warmup_ratio: 0 + lr_warmup_steps: null + min_lr_ratio: null + strategy: adamw + training_steps: 1 + warmup_style: constant + weight_decay: 0.01 + padding_free: true + ppo_epochs: 1 + strategy: fsdp + ulysses_size: 1 + use_kl_loss: true + use_torch_compile: true + critic: + cliprange_value: 0.5 + fsdp: + enable_cpu_offload: false + enable_full_shard: true + enable_rank0_init: true + fsdp_size: -1 + mp_buffer_dtype: fp32 + mp_param_dtype: bf16 + mp_reduce_dtype: fp32 + torch_dtype: null + use_orig_params: false + global_batch_size: 256 + global_batch_size_per_device: -1 + loss_avg_mode: token + max_grad_norm: 1 + micro_batch_size_per_device_for_experience: 16 + micro_batch_size_per_device_for_update: 4 + model: + enable_gradient_checkpointing: true + freeze_vision_tower: false + model_path: null + tokenizer_path: null + trust_remote_code: true + offload: + offload_optimizer: false + offload_params: false + optim: + betas: + - 0.9 + - 0.999 + lr: 1e-06 + lr_warmup_ratio: 0 + lr_warmup_steps: null + min_lr_ratio: null + strategy: adamw + training_steps: 1 + warmup_style: constant + weight_decay: 0.01 + padding_free: false + ppo_epochs: 1 + strategy: fsdp + ulysses_size: 1 + hybrid_engine: true + ref: + fsdp: + enable_cpu_offload: true + enable_full_shard: true + enable_rank0_init: true + fsdp_size: -1 + mp_buffer_dtype: fp32 + mp_param_dtype: bf16 + mp_reduce_dtype: fp32 + torch_dtype: null + use_orig_params: false + micro_batch_size_per_device_for_experience: 16 + offload: + offload_optimizer: false + offload_params: false + padding_free: true + strategy: fsdp + ulysses_size: 1 + use_torch_compile: true + reward: + num_cpus: 1 + reward_function: /nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py + reward_function_name: main + reward_type: batch + skip_special_tokens: true + rollout: + disable_log_stats: true + disable_tqdm: false + dtype: bf16 + enable_chunked_prefill: false + enforce_eager: false + gpu_memory_utilization: 0.6 + ignore_eos: false + limit_images: 0 + max_model_len: null + max_num_batched_tokens: 24576 + "n": 5 + name: vllm + prompt_length: 4096 + response_length: 16384 + seed: 1 + temperature: 1 + tensor_parallel_size: 1 + top_k: -1 + top_p: 0.99 + trust_remote_code: false + val_override_config: + "n": 1 + temperature: 0.5 diff --git a/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/files/output.log b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..05ac641fade83f7651fb358da40f4042cd81f9a1 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/files/output.log @@ -0,0 +1,48 @@ +{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 1, "unit": "it", "ip": "10.1.4.164", "pid": 232058, "uuid": "2b6c8b0cab6d4bbb95f53b8629aa9dcb", "closed": false} +Start validation... +key +prot_embeds +value +tensor([[[ 0.1386, -0.2522, 0.0733, ..., 0.5373, 0.6044, -0.6031], + [ 0.1386, -0.2522, 0.0733, ..., 0.5373, 0.6044, -0.6031], + [ 0.1386, -0.2522, 0.0733, ..., 0.5373, 0.6044, -0.6031], + ..., + [ 0.1386, -0.2522, 0.0733, ..., 0.5373, 0.6044, -0.6031], + [ 0.1386, -0.2522, 0.0733, ..., 0.5373, 0.6044, -0.6031], + [ 0.1386, -0.2522, 0.0733, ..., 0.5373, 0.6044, -0.6031]], + + [[-0.0670, 0.1320, 0.0262, ..., 0.0592, -0.0983, 0.1597], + [-0.0670, 0.1320, 0.0262, ..., 0.0592, -0.0983, 0.1597], + [-0.0670, 0.1320, 0.0262, ..., 0.0592, -0.0983, 0.1597], + ..., + [-0.0670, 0.1320, 0.0262, ..., 0.0592, -0.0983, 0.1597], + [-0.0670, 0.1320, 0.0262, ..., 0.0592, -0.0983, 0.1597], + [-0.0670, 0.1320, 0.0262, ..., 0.0592, -0.0983, 0.1597]], + + [[ 0.2223, 0.0633, 0.3426, ..., 0.4689, 0.0807, 0.2240], + [ 0.2223, 0.0633, 0.3426, ..., 0.4689, 0.0807, 0.2240], + [ 0.2223, 0.0633, 0.3426, ..., 0.4689, 0.0807, 0.2240], + ..., + [ 0.2223, 0.0633, 0.3426, ..., 0.4689, 0.0807, 0.2240], + [ 0.2223, 0.0633, 0.3426, ..., 0.4689, 0.0807, 0.2240], + [ 0.2223, 0.0633, 0.3426, ..., 0.4689, 0.0807, 0.2240]], + + [[-0.1802, -0.4834, 0.1041, ..., -0.1436, -0.3784, -0.5227], + [-0.1802, -0.4834, 0.1041, ..., -0.1436, -0.3784, -0.5227], + [-0.1802, -0.4834, 0.1041, ..., -0.1436, -0.3784, -0.5227], + ..., + [-0.1802, -0.4834, 0.1041, ..., -0.1436, -0.3784, -0.5227], + [-0.1802, -0.4834, 0.1041, ..., -0.1436, -0.3784, -0.5227], + [-0.1802, -0.4834, 0.1041, ..., -0.1436, -0.3784, -0.5227]], + + [[ 0.0572, 0.1415, 0.0391, ..., 0.1771, 0.2956, 0.6073], + [ 0.0572, 0.1415, 0.0391, ..., 0.1771, 0.2956, 0.6073], + [ 0.0572, 0.1415, 0.0391, ..., 0.1771, 0.2956, 0.6073], + ..., + [ 0.0572, 0.1415, 0.0391, ..., 0.1771, 0.2956, 0.6073], + [ 0.0572, 0.1415, 0.0391, ..., 0.1771, 0.2956, 0.6073], + [ 0.0572, 0.1415, 0.0391, ..., 0.1771, 0.2956, 0.6073]]]) +key +prompt_input_ids +value +[tensor([[ 11190, 311, 279, ..., 151665, 151665, 151665]]), tensor([[ 11190, 311, 279, ..., 151665, 151665, 151665]]), tensor([[ 11190, 311, 279, ..., 151665, 151665, 151665]]), tensor([[ 11190, 311, 279, ..., 151665, 151665, 151665]]), tensor([[ 11190, 311, 279, ..., 151665, 151665, 151665]])] diff --git a/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/files/requirements.txt @@ -0,0 +1,295 @@ +colorama==0.4.6 +psutil==7.0.0 +setproctitle==1.2.2 +ipython==8.37.0 +gitdb==4.0.12 +smmap==5.0.2 +pyzmq==27.0.0 +wcwidth==0.2.13 +antlr4-python3-runtime==4.9.3 +streamlit==1.46.1 +opentelemetry-proto==1.26.0 +tiktoken==0.9.0 +MarkupSafe==3.0.2 +openai==1.90.0 +jiter==0.10.0 +markdown-it-py==3.0.0 +rich-toolkit==0.14.8 +PyYAML==6.0.2 +pycountry==24.6.1 +nvidia-cusolver-cu12==11.6.1.9 +codetiming==1.4.0 +text-unidecode==1.3 +aiohttp-cors==0.8.1 +prometheus_client==0.22.1 +pandas==2.3.1 +wrapt==1.17.2 +tifffile==2025.5.10 +vllm==0.8.5.post1 +google-auth==2.40.3 +nvidia-curand-cu12==10.3.5.147 +networkx==3.4.2 +protobuf==4.25.8 +depyf==0.18.0 +altair==5.5.0 +wandb==0.21.0 +opentelemetry-sdk==1.26.0 +nvidia-cufft-cu12==11.2.1.3 +frozenlist==1.7.0 +ninja==1.11.1.4 +anyio==4.9.0 +rignore==0.6.2 +pydantic-extra-types==2.10.5 +tzdata==2025.2 +orjson==3.10.18 +smart_open==7.3.0.post1 +nvidia-cublas-cu12==12.4.5.8 +astor==0.8.1 +uvicorn==0.35.0 +sentry-sdk==2.32.0 +weasel==0.4.1 +opencensus==0.11.4 +certifi==2025.7.14 +nvidia-cuda-nvrtc-cu12==12.4.127 +cupy-cuda12x==13.5.1 +jedi==0.19.2 +GitPython==3.1.44 +xgrammar==0.1.18 +sniffio==1.3.1 +dill==0.3.8 +python-json-logger==3.3.0 +peft==0.16.0 +python-slugify==8.0.4 +watchfiles==1.1.0 +torchaudio==2.6.0 +omegaconf==2.3.0 +interegular==0.3.3 +torchmetrics==1.7.4 +tenacity==9.1.2 +async-timeout==5.0.1 +pybase64==1.4.1 +tqdm==4.67.1 +confection==0.1.5 +absl-py==2.3.1 +six==1.17.0 +colorful==0.5.7 +xxhash==3.5.0 +regex==2024.11.6 +nvidia-cuda-runtime-cu12==12.4.127 +pip==25.1 +annotated-types==0.7.0 +opentelemetry-exporter-otlp-proto-grpc==1.26.0 +salesforce-lavis==1.0.2 +fastapi-cloud-cli==0.1.4 +av==15.0.0 +nvidia-nvjitlink-cu12==12.4.127 +lightning-utilities==0.14.3 +pytz==2025.2 +webdataset==1.0.2 +cachetools==5.5.2 +nltk==3.9.1 +prometheus-fastapi-instrumentator==7.1.0 +pexpect==4.9.0 +jsonschema-specifications==2025.4.1 +Jinja2==3.1.6 +nvidia-cusparse-cu12==12.3.1.170 +nvidia-cudnn-cu12==9.1.0.70 +decord==0.6.0 +transformers==4.52.4 +dnspython==2.7.0 +joblib==1.5.1 +kaggle==1.7.4.5 +pyasn1_modules==0.4.2 +httpcore==1.0.9 +accelerate==1.8.1 +psutil==7.0.0 +pycocotools==2.0.10 +lm-format-enforcer==0.10.11 +liger_kernel==0.6.0 +googleapis-common-protos==1.70.0 +idna==3.10 +aiohappyeyeballs==2.6.1 +numba==0.61.2 +tornado==6.5.1 +opentelemetry-semantic-conventions==0.47b0 +torchvision==0.21.0 +exceptiongroup==1.3.0 +cfgv==3.4.0 +py-cpuinfo==9.0.0 +murmurhash==1.0.13 +pillow==11.3.0 +asttokens==3.0.0 +spacy==3.8.7 +blinker==1.9.0 +llguidance==0.7.30 +fastapi==0.116.1 +python-dateutil==2.9.0.post0 +prompt_toolkit==3.0.51 +opentelemetry-api==1.26.0 +referencing==0.36.2 +Pygments==2.19.2 +mpmath==1.3.0 +thinc==8.3.6 +multidict==6.6.3 +python-magic==0.4.27 +fairscale==0.4.4 +nodeenv==1.9.1 +mathruler==0.1.0 +identify==2.6.12 +multiprocess==0.70.16 +ftfy==6.3.1 +spacy-legacy==3.0.12 +rsa==4.9.1 +cymem==2.0.11 +flash-attn==2.7.1.post1 +typing-inspection==0.4.1 +nvidia-cufile-cu12==1.11.1.6 +filelock==3.18.0 +jsonschema==4.24.0 +language_data==1.3.0 +iopath==0.1.10 +cloudpickle==3.1.1 +pre_commit==4.2.0 +python-multipart==0.0.20 +gguf==0.17.1 +toml==0.10.2 +lazy_loader==0.4 +nvidia-cusparselt-cu12==0.6.2 +nvidia-nvtx-cu12==12.4.127 +opencv-python-headless==4.12.0.88 +rouge_score==0.1.2 +portalocker==3.2.0 +diskcache==5.6.3 +pycocoevalcap==1.2 +mdurl==0.1.2 +pure_eval==0.2.3 +ray==2.47.1 +langcodes==3.5.0 +distlib==0.3.9 +pydeck==0.9.1 +traitlets==5.14.3 +aiohttp==3.12.14 +decorator==5.2.1 +opentelemetry-exporter-otlp-proto-http==1.26.0 +verl==0.3.2.dev0 +fsspec==2025.3.0 +pydantic_core==2.33.2 +matplotlib-inline==0.1.7 +httpx==0.28.1 +fastrlock==0.8.3 +zipp==3.23.0 +aiosignal==1.4.0 +uvloop==0.21.0 +opendatasets==0.1.22 +python-dotenv==1.1.1 +attrs==25.3.0 +starlette==0.47.1 +distro==1.9.0 +pyasn1==0.6.1 +plotly==6.2.0 +opencensus-context==0.1.3 +datasets==4.0.0 +bleach==6.2.0 +hf-xet==1.1.5 +pyvers==0.1.0 +rich==14.0.0 +pylatexenc==2.10 +tensordict==0.9.1 +urllib3==2.5.0 +imageio==2.37.0 +platformdirs==4.3.8 +preshed==3.0.10 +catalogue==2.0.10 +h11==0.16.0 +outlines_core==0.1.26 +wasabi==1.1.3 +proto-plus==1.26.1 +scikit-image==0.25.2 +blis==1.3.0 +fastapi-cli==0.0.8 +opentelemetry-exporter-prometheus==0.56b0 +opentelemetry-exporter-otlp==1.26.0 +compressed-tensors==0.9.3 +pyarrow==20.0.0 +opentelemetry-semantic-conventions-ai==0.4.11 +partial-json-parser==0.2.1.1.post6 +parso==0.8.4 +importlib_metadata==8.0.0 +tokenizers==0.21.2 +opentelemetry-exporter-otlp-proto-common==1.26.0 +torchdata==0.11.0 +py-spy==0.4.0 +propcache==0.3.2 +braceexpand==0.1.7 +numpy==2.2.6 +cloudpathlib==0.21.1 +email_validator==2.2.0 +srsly==2.5.1 +webencodings==0.5.1 +airportsdata==20250706 +rpds-py==0.26.0 +outlines==0.1.11 +packaging==25.0 +yarl==1.20.1 +nvidia-cuda-cupti-cu12==12.4.127 +typing_extensions==4.14.1 +pydantic==2.11.7 +xformers==0.0.29.post2 +einops==0.8.1 +grpcio==1.73.1 +setuptools==78.1.1 +httptools==0.6.4 +nvidia-nccl-cu12==2.21.5 +Deprecated==1.2.18 +ptyprocess==0.7.0 +websockets==15.0.1 +torch==2.6.0 +scipy==1.15.3 +typer==0.16.0 +pytorch-lightning==2.5.2 +virtualenv==20.31.2 +huggingface-hub==0.33.4 +contexttimer==0.3.3 +marisa-trie==1.2.1 +shellingham==1.5.4 +charset-normalizer==3.4.2 +nest-asyncio==1.6.0 +executing==2.2.0 +stack-data==0.6.3 +msgpack==1.1.1 +blake3==1.0.5 +narwhals==1.47.0 +sentencepiece==0.2.0 +llvmlite==0.44.0 +click==8.2.1 +lark==1.2.2 +google-api-core==2.25.1 +sympy==1.13.1 +wheel==0.45.1 +safetensors==0.5.3 +mistral_common==1.8.0 +triton==3.2.0 +msgspec==0.19.0 +watchdog==6.0.0 +requests==2.32.4 +spacy-loggers==1.0.5 +timm==0.4.12 +qwen-vl-utils==0.0.11 +modelscope==1.28.0 +verl==0.3.2.dev0 +jaraco.text==3.12.1 +autocommand==2.2.2 +packaging==24.2 +jaraco.context==5.3.0 +tomli==2.0.1 +typeguard==4.3.0 +zipp==3.19.2 +backports.tarfile==1.2.0 +typing_extensions==4.12.2 +jaraco.collections==5.1.0 +inflect==7.3.1 +more-itertools==10.3.0 +jaraco.functools==4.0.1 +importlib_metadata==8.0.0 +platformdirs==4.2.2 +wheel==0.45.1 diff --git a/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..538e7e992c65cd5c7c2435544be5a9f46773e169 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/files/wandb-metadata.json @@ -0,0 +1,92 @@ +{ + "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35", + "python": "CPython 3.10.0", + "startedAt": "2025-07-20T11:14:42.544523Z", + "args": [ + "--node-ip-address=10.1.4.164", + "--node-manager-port=37981", + "--object-store-name=/tmp/ray/session_2025-07-20_19-12-49_683982_227299/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2025-07-20_19-12-49_683982_227299/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=60823", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=60965", + "--gcs-address=10.1.4.164:60942", + "--session-name=session_2025-07-20_19-12-49_683982_227299", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8265", + "--cluster-id=edbd7ce8cd4b448a3a0fe384bbef5900718e1aba5abd1864af5145b8", + "--startup-token=64", + "--worker-launch-time-ms=1753009973102", + "--node-id=c7c2fc8754cd7e338d6c3cc50f0b96b730630420a2b67812b4dbe2f2", + "--runtime-env-hash=-115784934", + "--enable-resource-isolation=false" + ], + "program": "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/PorUna-byte/PAR.git", + "commit": "b8caf406aa1699c788f0ca6e44a1769452c317db" + }, + "email": "gia0603yucca@gmail.com", + "root": "/nas/shared/kilab/wangyujia/EasyR1-new/examples", + "host": "dsw-266702-557cd69888-g24kv", + "executable": "/root/miniconda3/envs/easyr1-new/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 64, + "gpu": "NVIDIA A800-SXM4-80GB", + "gpu_count": 8, + "disk": { + "/": { + "total": "1623302262784", + "used": "1179848704" + } + }, + "memory": { + "total": "549755813888" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-dd05c780-0a19-17fd-c584-d09f4318f680" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-2782427d-b86d-e7c7-247f-edefe39eec71" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-52755b3d-d761-c869-0d08-300873ba5f02" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-90465488-5319-9508-9e8a-b3b12918be35" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a" + } + ], + "cudaVersion": "12.1", + "writerId": "bmyd1r1vwx9pfo07osl5tt1bsnz5nor2" +} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/files/wandb-summary.json b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..bf55868a8b18bdd0746b03671b2168432b325fcb --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/files/wandb-summary.json @@ -0,0 +1 @@ +{"_runtime":28,"_wandb":{"runtime":28}} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..2d1149c77a97ba2b49de7b4387c09c046fef4cb8 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/logs/debug-internal.log @@ -0,0 +1,13 @@ +{"time":"2025-07-20T19:14:42.896313208+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-07-20T19:14:45.107007409+08:00","level":"INFO","msg":"stream: created new stream","id":"18c4gjei"} +{"time":"2025-07-20T19:14:45.107045881+08:00","level":"INFO","msg":"stream: started","id":"18c4gjei"} +{"time":"2025-07-20T19:14:45.107089127+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"18c4gjei"} +{"time":"2025-07-20T19:14:45.107117209+08:00","level":"INFO","msg":"sender: started","stream_id":"18c4gjei"} +{"time":"2025-07-20T19:14:45.107141507+08:00","level":"INFO","msg":"handler: started","stream_id":"18c4gjei"} +{"time":"2025-07-20T19:15:15.899922253+08:00","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"updating run metadata","runtime_seconds":1.0589074}],"total_operations":1}} +{"time":"2025-07-20T19:15:23.137185078+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-07-20T19:15:47.140064574+08:00","level":"INFO","msg":"stream: closing","id":"18c4gjei"} +{"time":"2025-07-20T19:15:47.141573102+08:00","level":"INFO","msg":"handler: closed","stream_id":"18c4gjei"} +{"time":"2025-07-20T19:15:47.141588801+08:00","level":"INFO","msg":"sender: closed","stream_id":"18c4gjei"} +{"time":"2025-07-20T19:15:47.14158413+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"18c4gjei"} +{"time":"2025-07-20T19:15:47.147232995+08:00","level":"INFO","msg":"stream: closed","id":"18c4gjei"} diff --git a/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..f4c271aaad7e334e366ed34c0cd9747be734dbed --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/logs/debug.log @@ -0,0 +1,28 @@ +2025-07-20 19:14:42,658 INFO MainThread:232058 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-07-20 19:14:42,658 INFO MainThread:232058 [wandb_setup.py:_flush():80] Configure stats pid to 232058 +2025-07-20 19:14:42,658 INFO MainThread:232058 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-07-20 19:14:42,658 INFO MainThread:232058 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings +2025-07-20 19:14:42,658 INFO MainThread:232058 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-07-20 19:14:42,658 INFO MainThread:232058 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/logs/debug.log +2025-07-20 19:14:42,658 INFO MainThread:232058 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/logs/debug-internal.log +2025-07-20 19:14:42,658 INFO MainThread:232058 [wandb_init.py:init():830] calling init triggers +2025-07-20 19:14:42,658 INFO MainThread:232058 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}} +2025-07-20 19:14:42,658 INFO MainThread:232058 [wandb_init.py:init():871] starting backend +2025-07-20 19:14:42,866 INFO MainThread:232058 [wandb_init.py:init():874] sending inform_init request +2025-07-20 19:14:42,868 INFO MainThread:232058 [wandb_init.py:init():882] backend started and connected +2025-07-20 19:14:42,880 INFO MainThread:232058 [wandb_init.py:init():953] updated telemetry +2025-07-20 19:14:42,981 INFO MainThread:232058 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout +2025-07-20 19:14:46,052 INFO MainThread:232058 [wandb_init.py:init():1029] starting run threads in backend +2025-07-20 19:14:46,234 INFO MainThread:232058 [wandb_run.py:_console_start():2458] atexit reg +2025-07-20 19:14:46,234 INFO MainThread:232058 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-07-20 19:14:46,238 INFO MainThread:232058 [wandb_run.py:_redirect():2375] Wrapping output streams. +2025-07-20 19:14:46,238 INFO MainThread:232058 [wandb_run.py:_redirect():2398] Redirects installed. +2025-07-20 19:14:46,241 INFO MainThread:232058 [wandb_init.py:init():1075] run started, returning control to user process +2025-07-20 19:15:14,838 INFO MainThread:232058 [wandb_run.py:_finish():2224] finishing run gia0603yucca/easy_r1/18c4gjei +2025-07-20 19:15:14,840 INFO MainThread:232058 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0 +2025-07-20 19:15:14,883 INFO MainThread:232058 [wandb_run.py:_restore():2405] restore +2025-07-20 19:15:14,887 INFO MainThread:232058 [wandb_run.py:_restore():2411] restore done +2025-07-20 19:15:47,120 INFO MainThread:232058 [wandb_run.py:_footer_history_summary_info():3903] rendering history +2025-07-20 19:15:47,127 INFO MainThread:232058 [wandb_run.py:_footer_history_summary_info():3935] rendering summary +2025-07-20 19:15:47,127 INFO MainThread:232058 [wandb_run.py:_footer_sync_info():3864] logging synced files diff --git a/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/run-18c4gjei.wandb b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/run-18c4gjei.wandb new file mode 100644 index 0000000000000000000000000000000000000000..43209156f6235dd1820d96556ecbab66ee59fc31 Binary files /dev/null and b/EasyR1-new/examples/wandb/run-20250720_191442-18c4gjei/run-18c4gjei.wandb differ diff --git a/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/files/config.yaml b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..512a1fcb46c50f5d1f858e93a0d972d52773bcdc --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/files/config.yaml @@ -0,0 +1,321 @@ +_wandb: + value: + cli_version: 0.21.0 + e: + aen8lqfqe6nmonwi9mg6k364ewm1zbk1: + args: + - --node-ip-address=10.1.4.164 + - --node-manager-port=39481 + - --object-store-name=/tmp/ray/session_2025-07-20_19-23-46_177404_237363/sockets/plasma_store + - --raylet-name=/tmp/ray/session_2025-07-20_19-23-46_177404_237363/sockets/raylet + - --redis-address=None + - --metrics-agent-port=50645 + - --logging-rotate-bytes=536870912 + - --logging-rotate-backup-count=5 + - --runtime-env-agent-port=61124 + - --gcs-address=10.1.4.164:47108 + - --session-name=session_2025-07-20_19-23-46_177404_237363 + - --temp-dir=/tmp/ray + - --webui=127.0.0.1:8265 + - --cluster-id=c47a61f296c7eb4c70ae9057de30f64c14ef34b9d67ed1e6c9f9e1ab + - --startup-token=64 + - --worker-launch-time-ms=1753010628799 + - --node-id=f70ded91aa9dcf3abddbd985069477193b47e040a95f0010c3430c70 + - --runtime-env-hash=-115784934 + - --enable-resource-isolation=false + cpu_count: 64 + cpu_count_logical: 64 + cudaVersion: "12.1" + disk: + /: + total: "1623302262784" + used: "1179885568" + executable: /root/miniconda3/envs/easyr1-new/bin/python3 + git: + commit: b8caf406aa1699c788f0ca6e44a1769452c317db + remote: https://github.com/PorUna-byte/PAR.git + gpu: NVIDIA A800-SXM4-80GB + gpu_count: 8 + gpu_nvidia: + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-dd05c780-0a19-17fd-c584-d09f4318f680 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-2782427d-b86d-e7c7-247f-edefe39eec71 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-52755b3d-d761-c869-0d08-300873ba5f02 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-5927b565-1506-34c6-eb8d-d657ed5f3558 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-90465488-5319-9508-9e8a-b3b12918be35 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-855bcb80-e48f-cdd2-f62d-35138b69089a + host: dsw-266702-557cd69888-g24kv + memory: + total: "549755813888" + os: Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35 + program: /root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py + python: CPython 3.10.0 + root: /nas/shared/kilab/wangyujia/EasyR1-new/examples + startedAt: "2025-07-20T11:25:31.090058Z" + writerId: aen8lqfqe6nmonwi9mg6k364ewm1zbk1 + m: [] + python_version: 3.10.0 + t: + "1": + - 1 + - 9 + - 11 + - 30 + - 33 + - 41 + - 49 + - 51 + - 63 + - 71 + - 95 + - 98 + - 103 + - 105 + "2": + - 1 + - 9 + - 11 + - 30 + - 33 + - 41 + - 49 + - 51 + - 63 + - 71 + - 95 + - 98 + - 103 + - 105 + "3": + - 2 + - 13 + - 16 + "4": 3.10.0 + "5": 0.21.0 + "6": 4.52.4 + "12": 0.21.0 + "13": linux-x86_64 +algorithm: + value: + adv_estimator: grpo + disable_kl: false + filter_high: 0.99 + filter_key: overall + filter_low: 0.01 + gamma: 1 + kl_coef: 0.01 + kl_horizon: 10000 + kl_penalty: low_var_kl + kl_target: 0.1 + kl_type: fixed + lam: 1 + online_filtering: false + use_kl_loss: true +data: + value: + answer_key: answer + filter_overlong_prompts: true + filter_overlong_prompts_workers: 16 + format_prompt: /nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja + image_dir: null + image_key: images + max_pixels: 4194304 + max_prompt_length: 4096 + max_response_length: 16384 + min_pixels: 262144 + mini_rollout_batch_size: null + override_chat_template: null + prompt_key: question + protein_key: protein + rollout_batch_size: 128 + seed: 1 + shuffle: true + train_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl + val_batch_size: 256 + val_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl + video_fps: 2 + video_key: videos +trainer: + value: + critic_warmup: 0 + experiment_name: qwen2.5_7b_bio_06182042 + load_checkpoint_path: null + logger: + - console + - wandb + max_steps: null + max_try_make_batch: 20 + n_gpus_per_node: 8 + nnodes: 1 + project_name: easy_r1 + save_checkpoint_path: /oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042 + save_freq: 5 + save_limit: 3 + save_model_only: false + total_epochs: 1 + val_before_train: true + val_freq: 5 + val_generations_to_log: 3 + val_only: false +worker: + value: + actor: + clip_ratio_dual: 3 + clip_ratio_high: 0.3 + clip_ratio_low: 0.2 + disable_kl: false + fsdp: + enable_cpu_offload: false + enable_full_shard: true + enable_rank0_init: true + fsdp_size: -1 + mp_buffer_dtype: fp32 + mp_param_dtype: bf16 + mp_reduce_dtype: fp32 + torch_dtype: null + use_orig_params: false + global_batch_size: 64 + global_batch_size_per_device: -1 + kl_coef: 0.01 + kl_penalty: low_var_kl + loss_avg_mode: token + max_grad_norm: 1 + micro_batch_size_per_device_for_experience: 16 + micro_batch_size_per_device_for_update: 2 + model: + enable_gradient_checkpointing: true + freeze_vision_tower: false + model_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model + tokenizer_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model + trust_remote_code: false + offload: + offload_optimizer: true + offload_params: true + optim: + betas: + - 0.9 + - 0.999 + lr: 1e-06 + lr_warmup_ratio: 0 + lr_warmup_steps: null + min_lr_ratio: null + strategy: adamw + training_steps: 1 + warmup_style: constant + weight_decay: 0.01 + padding_free: true + ppo_epochs: 1 + strategy: fsdp + ulysses_size: 1 + use_kl_loss: true + use_torch_compile: true + critic: + cliprange_value: 0.5 + fsdp: + enable_cpu_offload: false + enable_full_shard: true + enable_rank0_init: true + fsdp_size: -1 + mp_buffer_dtype: fp32 + mp_param_dtype: bf16 + mp_reduce_dtype: fp32 + torch_dtype: null + use_orig_params: false + global_batch_size: 256 + global_batch_size_per_device: -1 + loss_avg_mode: token + max_grad_norm: 1 + micro_batch_size_per_device_for_experience: 16 + micro_batch_size_per_device_for_update: 4 + model: + enable_gradient_checkpointing: true + freeze_vision_tower: false + model_path: null + tokenizer_path: null + trust_remote_code: true + offload: + offload_optimizer: false + offload_params: false + optim: + betas: + - 0.9 + - 0.999 + lr: 1e-06 + lr_warmup_ratio: 0 + lr_warmup_steps: null + min_lr_ratio: null + strategy: adamw + training_steps: 1 + warmup_style: constant + weight_decay: 0.01 + padding_free: false + ppo_epochs: 1 + strategy: fsdp + ulysses_size: 1 + hybrid_engine: true + ref: + fsdp: + enable_cpu_offload: true + enable_full_shard: true + enable_rank0_init: true + fsdp_size: -1 + mp_buffer_dtype: fp32 + mp_param_dtype: bf16 + mp_reduce_dtype: fp32 + torch_dtype: null + use_orig_params: false + micro_batch_size_per_device_for_experience: 16 + offload: + offload_optimizer: false + offload_params: false + padding_free: true + strategy: fsdp + ulysses_size: 1 + use_torch_compile: true + reward: + num_cpus: 1 + reward_function: /nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py + reward_function_name: main + reward_type: batch + skip_special_tokens: true + rollout: + disable_log_stats: true + disable_tqdm: false + dtype: bf16 + enable_chunked_prefill: false + enforce_eager: false + gpu_memory_utilization: 0.6 + ignore_eos: false + limit_images: 0 + max_model_len: null + max_num_batched_tokens: 24576 + "n": 5 + name: vllm + prompt_length: 4096 + response_length: 16384 + seed: 1 + temperature: 1 + tensor_parallel_size: 1 + top_k: -1 + top_p: 0.99 + trust_remote_code: false + val_override_config: + "n": 1 + temperature: 0.5 diff --git a/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/files/output.log b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..d0a19aa21317f2d31bcd06b975e6bd93f7b139b3 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/files/output.log @@ -0,0 +1,51 @@ +{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 1, "unit": "it", "ip": "10.1.4.164", "pid": 242135, "uuid": "dccebe4a06fd42b7b04143ecb47f7f86", "closed": false} +Start validation... +{'prot_embeds': tensor([[[ 0.1386, -0.2522, 0.0733, ..., 0.5373, 0.6044, -0.6031], + [ 0.1386, -0.2522, 0.0733, ..., 0.5373, 0.6044, -0.6031], + [ 0.1386, -0.2522, 0.0733, ..., 0.5373, 0.6044, -0.6031], + ..., + [ 0.1386, -0.2522, 0.0733, ..., 0.5373, 0.6044, -0.6031], + [ 0.1386, -0.2522, 0.0733, ..., 0.5373, 0.6044, -0.6031], + [ 0.1386, -0.2522, 0.0733, ..., 0.5373, 0.6044, -0.6031]], + + [[-0.0670, 0.1320, 0.0262, ..., 0.0592, -0.0983, 0.1597], + [-0.0670, 0.1320, 0.0262, ..., 0.0592, -0.0983, 0.1597], + [-0.0670, 0.1320, 0.0262, ..., 0.0592, -0.0983, 0.1597], + ..., + [-0.0670, 0.1320, 0.0262, ..., 0.0592, -0.0983, 0.1597], + [-0.0670, 0.1320, 0.0262, ..., 0.0592, -0.0983, 0.1597], + [-0.0670, 0.1320, 0.0262, ..., 0.0592, -0.0983, 0.1597]], + + [[ 0.2223, 0.0633, 0.3426, ..., 0.4689, 0.0807, 0.2240], + [ 0.2223, 0.0633, 0.3426, ..., 0.4689, 0.0807, 0.2240], + [ 0.2223, 0.0633, 0.3426, ..., 0.4689, 0.0807, 0.2240], + ..., + [ 0.2223, 0.0633, 0.3426, ..., 0.4689, 0.0807, 0.2240], + [ 0.2223, 0.0633, 0.3426, ..., 0.4689, 0.0807, 0.2240], + [ 0.2223, 0.0633, 0.3426, ..., 0.4689, 0.0807, 0.2240]], + + [[-0.1802, -0.4834, 0.1041, ..., -0.1436, -0.3784, -0.5227], + [-0.1802, -0.4834, 0.1041, ..., -0.1436, -0.3784, -0.5227], + [-0.1802, -0.4834, 0.1041, ..., -0.1436, -0.3784, -0.5227], + ..., + [-0.1802, -0.4834, 0.1041, ..., -0.1436, -0.3784, -0.5227], + [-0.1802, -0.4834, 0.1041, ..., -0.1436, -0.3784, -0.5227], + [-0.1802, -0.4834, 0.1041, ..., -0.1436, -0.3784, -0.5227]], + + [[ 0.0572, 0.1415, 0.0391, ..., 0.1771, 0.2956, 0.6073], + [ 0.0572, 0.1415, 0.0391, ..., 0.1771, 0.2956, 0.6073], + [ 0.0572, 0.1415, 0.0391, ..., 0.1771, 0.2956, 0.6073], + ..., + [ 0.0572, 0.1415, 0.0391, ..., 0.1771, 0.2956, 0.6073], + [ 0.0572, 0.1415, 0.0391, ..., 0.1771, 0.2956, 0.6073], + [ 0.0572, 0.1415, 0.0391, ..., 0.1771, 0.2956, 0.6073]]]), 'prompt_input_ids': [tensor([[ 11190, 311, 279, ..., 151665, 151665, 151665]]), tensor([[ 11190, 311, 279, ..., 151665, 151665, 151665]]), tensor([[ 11190, 311, 279, ..., 151665, 151665, 151665]]), tensor([[ 11190, 311, 279, ..., 151665, 151665, 151665]]), tensor([[ 11190, 311, 279, ..., 151665, 151665, 151665]])], 'input_ids': [tensor([[ 11190, 311, 279, ..., 151665, 151665, 151665]]), tensor([[ 11190, 311, 279, ..., 151665, 151665, 151665]]), tensor([[ 11190, 311, 279, ..., 151665, 151665, 151665]]), tensor([[ 11190, 311, 279, ..., 151665, 151665, 151665]]), tensor([[ 11190, 311, 279, ..., 151665, 151665, 151665]])], 'raw_prompt_ids': array([list([11190, 311, 279, 12833, 1995, 3897, 3685, 323, 279, 12833, 829, 1207, 24, 56, 19, 34, 17, 11, 7023, 279, 1429, 4363, 1186, 5873, 1276, 52304, 504, 279, 2701, 2606, 510, 3798, 25, 220, 15, 13, 330, 45, 22147, 355, 11, 547, 1, 715, 220, 16, 13, 330, 56715, 98605, 10530, 11, 328, 1, 2303, 220, 17, 13, 330, 840, 376, 64341, 11, 328, 1, 2303, 220, 18, 13, 330, 54370, 46417, 81, 290, 11, 547, 1, 2303, 220, 19, 13, 330, 3599, 38554, 11, 386, 1, 2303, 220, 20, 13, 330, 3727, 55078, 10530, 292, 2112, 292, 16496, 11, 386, 1, 2303, 220, 21, 13, 330, 2120, 559, 307, 11, 328, 1, 2303, 220, 22, 13, 330, 38, 337, 8212, 40605, 11, 386, 1, 2303, 220, 220, 23, 13, 330, 43, 1047, 31454, 27233, 580, 84, 1263, 11, 386, 1, 2303, 24, 13, 330, 47, 2328, 7191, 635, 11, 547, 698]), + list([11190, 311, 279, 12833, 1995, 3897, 3685, 323, 279, 12833, 829, 1207, 24, 38, 3390, 23, 11, 7023, 279, 1429, 4363, 1186, 5873, 1276, 52304, 504, 279, 2701, 2606, 510, 3798, 25, 220, 15, 13, 330, 45, 22147, 355, 11, 547, 1, 715, 220, 16, 13, 330, 56715, 98605, 10530, 11, 328, 1, 2303, 220, 17, 13, 330, 840, 376, 64341, 11, 328, 1, 2303, 220, 18, 13, 330, 54370, 46417, 81, 290, 11, 547, 1, 2303, 220, 19, 13, 330, 3599, 38554, 11, 386, 1, 2303, 220, 20, 13, 330, 3727, 55078, 10530, 292, 2112, 292, 16496, 11, 386, 1, 2303, 220, 21, 13, 330, 2120, 559, 307, 11, 328, 1, 2303, 220, 22, 13, 330, 38, 337, 8212, 40605, 11, 386, 1, 2303, 220, 220, 23, 13, 330, 43, 1047, 31454, 27233, 580, 84, 1263, 11, 386, 1, 2303, 24, 13, 330, 47, 2328, 7191, 635, 11, 547, 698]), + list([11190, 311, 279, 12833, 1995, 3897, 3685, 323, 279, 12833, 829, 1207, 24, 17, 18, 55, 16, 11, 7023, 279, 1429, 4363, 1186, 5873, 1276, 52304, 504, 279, 2701, 2606, 510, 3798, 25, 220, 15, 13, 330, 45, 22147, 355, 11, 547, 1, 715, 220, 16, 13, 330, 56715, 98605, 10530, 11, 328, 1, 2303, 220, 17, 13, 330, 840, 376, 64341, 11, 328, 1, 2303, 220, 18, 13, 330, 54370, 46417, 81, 290, 11, 547, 1, 2303, 220, 19, 13, 330, 3599, 38554, 11, 386, 1, 2303, 220, 20, 13, 330, 3727, 55078, 10530, 292, 2112, 292, 16496, 11, 386, 1, 2303, 220, 21, 13, 330, 2120, 559, 307, 11, 328, 1, 2303, 220, 22, 13, 330, 38, 337, 8212, 40605, 11, 386, 1, 2303, 220, 220, 23, 13, 330, 43, 1047, 31454, 27233, 580, 84, 1263, 11, 386, 1, 2303, 24, 13, 330, 47, 2328, 7191, 635, 11, 547, 698]), + list([11190, 311, 279, 12833, 1995, 3897, 3685, 323, 279, 12833, 829, 422, 17, 42, 21, 37, 16, 11, 7023, 279, 1429, 4363, 1186, 5873, 1276, 52304, 504, 279, 2701, 2606, 510, 3798, 25, 220, 15, 13, 330, 45, 22147, 355, 11, 547, 1, 715, 220, 16, 13, 330, 56715, 98605, 10530, 11, 328, 1, 2303, 220, 17, 13, 330, 840, 376, 64341, 11, 328, 1, 2303, 220, 18, 13, 330, 54370, 46417, 81, 290, 11, 547, 1, 2303, 220, 19, 13, 330, 3599, 38554, 11, 386, 1, 2303, 220, 20, 13, 330, 3727, 55078, 10530, 292, 2112, 292, 16496, 11, 386, 1, 2303, 220, 21, 13, 330, 2120, 559, 307, 11, 328, 1, 2303, 220, 22, 13, 330, 38, 337, 8212, 40605, 11, 386, 1, 2303, 220, 220, 23, 13, 330, 43, 1047, 31454, 27233, 580, 84, 1263, 11, 386, 1, 2303, 24, 13, 330, 47, 2328, 7191, 635, 11, 547, 698]), + list([11190, 311, 279, 12833, 1995, 3897, 3685, 323, 279, 12833, 829, 1207, 24, 53, 12457, 15, 11, 7023, 279, 1429, 4363, 1186, 5873, 1276, 52304, 504, 279, 2701, 2606, 510, 3798, 25, 220, 15, 13, 330, 45, 22147, 355, 11, 547, 1, 715, 220, 16, 13, 330, 56715, 98605, 10530, 11, 328, 1, 2303, 220, 17, 13, 330, 840, 376, 64341, 11, 328, 1, 2303, 220, 18, 13, 330, 54370, 46417, 81, 290, 11, 547, 1, 2303, 220, 19, 13, 330, 3599, 38554, 11, 386, 1, 2303, 220, 20, 13, 330, 3727, 55078, 10530, 292, 2112, 292, 16496, 11, 386, 1, 2303, 220, 21, 13, 330, 2120, 559, 307, 11, 328, 1, 2303, 220, 22, 13, 330, 38, 337, 8212, 40605, 11, 386, 1, 2303, 220, 220, 23, 13, 330, 43, 1047, 31454, 27233, 580, 84, 1263, 11, 386, 1, 2303, 24, 13, 330, 47, 2328, 7191, 635, 11, 547, 698])], + dtype=object), 'ground_truth': array(['4', '4', '4', '4', '4'], dtype=object), 'protein_sequence': array(['MATPSAAFEALMNGVTSWDVPEDAVPCELLLIGEASFPVMVNDMGQVLIAASSYGRGRLVVVSHEDYLVEAQLTPFLLNAVGWLCSSPGAPIGVHPSLAPLAKILEGSGVDAKVEPEVKDSLGVYCIDAYNETMTEKLVKFMKCGGGLLIGGQAWDWANQGEDERVLFTFPGNLVTSVAGIYFTDNKGDTSFFKVSKKMPKIPVLVSCEDDLSDDREELLHGISELDISNSDCFPSQLLVHGALAFPLGLDSYHGCVIAAARYGRGRVVVTGHKVLFTVGKLGPFLLNAVRWLDGGRRGKVVVQTELRTLSGLLAVGGIDTSIEPNLTSDASVYCFEPVSEVGVKELQEFVAEGGGLFVGAQAWWWAFKNPGVSPLARFPGNLLLNPFGISITSQSLNPGPFRTPKAGIRTYHFRSTLAEFQVIMGRKRGNVEKGWLAKLGPDGAAFLQIPAEEIPAYMSVHRLLRKLLSRYRLPVATRENPVINDCCRGAMLSLATGLAHSGSDLSLLVPEIEDMYSSPYLRPSESPITVEVNCTNPGTRYCWMSTGLYIPGRQIIEVSLPEAAASADLKIQIGCHTDDLTRASKLFRGPLVINRCCLDKPTKSITCLWGGLLYIIVPQNSKLGSVPVTVKGAVHAPYYKLGETTLEEWKRRIQENPGPWGELATDNIILTVPTANLRTLENPEPLLRLWDEVMQAVARLGAEPFPLRLPQRIVADVQISVGWMHAGYPIMCHLESVQELINEKLIRTKGLWGPVHELGRNQQRQEWEFPPHTTEATCNLWCVYVHETVLGIPRSRANIALWPPVREKRVRIYLSKGPNVKNWNAWTALETYLQLQEAFGWEPFIRLFTEYRNQTNLPTENVDKMNLWVKMFSHQVQKNLAPFFEAWAWPIQKEVATSLAYLPEWKENIMKLYLLTQMPH', + 'MWPLVVVVLLGSAYCGSAQLIFNITKSVEFTVCNTTVTIPCFVNNMEAKNISELYVKWKFKGKDIFIFDGAQHISKPSEAFPSSKISPSELLHGIASLKMDKRDAVIGNYTCEVTELSREGETIIELKRRFVSWFSPNENILIVIFPILAILLFWGQFGILTLKYKSSYTKEKTIFLLVAGLMLTIIVIVGAILFIPGEYSTKNACGLGLIVIPTAILILLQYCVFMMALGMSSFTIAILILQVLGHVLSVVGLSLCVSECTPVHGPLLISGLGIIALAELLGLVYMKCVASDHKTIQPPRNN', + 'MRLLPLLVGFSTLLNCSYTQNCSKTTCLPNAKCEVHNGVEACFCSQGYSGNGVTICEDIDECSESSVCGDHAVCENVNGGFSCFCREGYQTATGKSQFTPNDGSYCQDIDECSESSVCGDHAVCENVNGGFSCFCREGYQTATGKSQFTPNDGSYCQESMNSNCHLEHACIAANINKTLKRIGPITEQTTLLQEIYRNSEAELSLMDIVTYIEILTESSSLLGHPNSTTSYKDAHFNSTLTEFGETINNFVERSTHKMWDQLPTNHRRLHLTKLMHTAELVTLQIAQNTQKNSQFDMNSTDLALKVFAFDSTHMKHAHPHMNVDGGYVKISPRRKAAHGTTGNVVVAFLCYKSIGPLLSSSDNFLLDTQNDNSEGKEKVISSVISASISSNPPTLYELEKITFTLSHVKLSDKHRTQCAFWNYSVDAMNNGSWSTEGCELTHSNDTHTSCRCSHLTHFAILMSSTSSIGIKDYNILTRITQLGIIISLICLAICIFTFWFFSEIQSTRTTIHKNLCCSLFLAELVFLIGININTNKLVCSIIAGLLHYFFLAAFAWMCIEGIHLYLIVVGVIYNKGFLHKNFYIFGYLSPAVVVGFSASLGYRYYGTTKVCWLSTENNFIWSFIGPACLIILVNLLAFGVIIYKVFRHTAGLKPEVSCYENIRSCARGALALLFLLGTTWIFGVLHVVHASVVTAYLFTVSNAFQGMFIFLFLCVLSRKIQEEYYRLFKNVPCCFGCLR', + 'MGFGWQGSVSIAFTALAFVVMAADWVGPDVTFTVLLAFLTAFDGQIVTVAKAAAGYGNTGLLTVIFLYWVAEGITQTGGLELIMNFVLGRSRSVHWALARSMFPVMCLSAFLNNTPCVTFMIPILISWGRRCGVPIKKLLIPLSYASVLGGTCTSIGTSTNLVIVGLQDARYTKAKQLDQAKFQIFDIAPYGVPYALWGFVFILLTQAFLLPGNSSRYAKDLLIAVRVLPSSSVAKKKLKDSGLLQQSGFSVSGIYRDGKYLSKPDPNWVLEPNDILYAAGEFDVVEFVGEEFGLGLVNADAETSAERPFTTGEESVFTPTGGAPYQKLVQATIAPTSDLIGRTVREVSWQGRFGLIPVAIQRGNGREDGRLNDVVLAAGDVLILDTTPFYDEEREDSKNNFAGKVRAVKDGAAKEFVVGVKVKKSSEVVNKTVSAAGLRGIPGLFVLSVDRADGSSVEASDYLYKIQPDDTIWIATDIGAVGFLAKFPGLELVQQEQVDKTGTSILYRHLVQAAVSHKGPIVGKTVRDVRFRTLYNAAVVAVHREGARVPLKVQDIVLQGGDVLLISCHTNWADEHRHDKSFVLLQPVPDSSPPKRSRMVIGVLLATGMVLTQIVGGLKSREYIHLWPAAVLTSALMLLTGCMNADQARKAIYWDVYLTIAAAFGVSAALEGTGVAASFANGIISIGKNLHSDGAALIAIYIATAMLSELLTNNAAGAIMYPIAAIAGDALKISPKETSVAIMLGASAGFINPFSYQCNLMVYAAGNYSVREFAIIGAPFQIWLMIVAGFILCYMKEWHQVWIVSWICTAGIVLLPALYFLLPTKVQLRIDAFFDRVAQTLNPKLIIERRNSIRRQASRTGSDGTGSSDSPRALGVPKVITA', + 'MVAQEQLVLLLMLLAGCRGGANAILDPGWVIPSKVEQLIGGDFNLSCTLNEDYFNGKSAEDCPVEKLYFTGGGRVYRDSKHIRILNNTTILFSDTNAVEQENDYHCMCDEYVINKSKVYVGTRPLLVRDFNCLDYDFQFMVCNFTQPPNTVITKYNISYNTNNDWRYSNTLDCNFDSAPVVTCNLTDDNYKRFSETFYFRLSISNALGHETQPITINHFERLVPARPGQNLTLLNRTESSVCLSWEMPRRSNYNRGLVWQVRVTPQNFEPITRPSWRNHTLTIKDTLCLTELPFAGYNYTLRVRVRANQNNTLWSEPMIYAFATAPAPPRRPPRVTYGSFYVYSSEKAMRFYWEPLEEHELNGPDFRYSISEYRINGTAVDPGLIKVESNSAMIDHWSMSAVHHFLIRSSNSQGLSVNATPMTIGPISNRDFKVREPRNIRSVYHPTNKSYTLSWDPPSDQRELQNYTVFWCVPKPGLQSECEGSIRFAEVASGLHHFTTSPDQLLTLHMAVSANYQSHNTGLHWAICSSDKKDDLAKMEPSIDVATSTSLTVSWSERVCAVILAGYNLTYCQRSAGRPDNCTTVTIDRYTNKHVIQNLVPYTDYSVKMLMYSDSRVSKYSDELVNRTGEAAPSQPRELQLIRVTSDSVELAWKPPLLANGVVRAYEGTFRSLHDNVTDTFRVSASADELVNNEKPITYRLGNLTAFTKYEISVRARTVYPSEPSNVILFSTAIGVPSPPQLYVINNPDQSSRLDWEPPRTPAGRIDFYEISLRDNNASCLTSTILPGRNLSYVMATPRCTSHNPFQLAVRAINVEQHPQLNGADAAEGAVLLMSTNGKGCEARTDALGEEERLQFEAYAANMTAYRLYRSDWGIYGFICTPDTHSVKAMYQTIEVTVAILVLGVIFYLVYKKYRKMSDIGLVLPQGIMETMKKPIDMGGLGLGLGPDSSVSGGIVCTRVDDSPPYTPQDLPHDFSSCGSESSKLLLRTASSSGGGGCVDRDGYDDNHETGPISAVGPPTSYLAMRHGLLVQNDRERERERDREQEREREQQQQQRESEMDREQSCTNGYIKPTQMKSWGGNGPSDNDHTFSVPSTAMTAPMSQPLSQIPLSGYVPVPIPQSRFNPAPVQPFGSPAVPSAATAAAASTFFPPAHLLNMDNYVQASDLHKLKPLVAAPLSQTGGPAFAGSSPATSPPLQLPPVHAASPAAATPKMADIGYTTMEQLQLTGLIKPPLAATVGSPTHAAGGAPGGGNQHSRLQPQINGYVTPQDLNAMAHNRHVL'], + dtype=object)} diff --git a/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/files/requirements.txt @@ -0,0 +1,295 @@ +colorama==0.4.6 +psutil==7.0.0 +setproctitle==1.2.2 +ipython==8.37.0 +gitdb==4.0.12 +smmap==5.0.2 +pyzmq==27.0.0 +wcwidth==0.2.13 +antlr4-python3-runtime==4.9.3 +streamlit==1.46.1 +opentelemetry-proto==1.26.0 +tiktoken==0.9.0 +MarkupSafe==3.0.2 +openai==1.90.0 +jiter==0.10.0 +markdown-it-py==3.0.0 +rich-toolkit==0.14.8 +PyYAML==6.0.2 +pycountry==24.6.1 +nvidia-cusolver-cu12==11.6.1.9 +codetiming==1.4.0 +text-unidecode==1.3 +aiohttp-cors==0.8.1 +prometheus_client==0.22.1 +pandas==2.3.1 +wrapt==1.17.2 +tifffile==2025.5.10 +vllm==0.8.5.post1 +google-auth==2.40.3 +nvidia-curand-cu12==10.3.5.147 +networkx==3.4.2 +protobuf==4.25.8 +depyf==0.18.0 +altair==5.5.0 +wandb==0.21.0 +opentelemetry-sdk==1.26.0 +nvidia-cufft-cu12==11.2.1.3 +frozenlist==1.7.0 +ninja==1.11.1.4 +anyio==4.9.0 +rignore==0.6.2 +pydantic-extra-types==2.10.5 +tzdata==2025.2 +orjson==3.10.18 +smart_open==7.3.0.post1 +nvidia-cublas-cu12==12.4.5.8 +astor==0.8.1 +uvicorn==0.35.0 +sentry-sdk==2.32.0 +weasel==0.4.1 +opencensus==0.11.4 +certifi==2025.7.14 +nvidia-cuda-nvrtc-cu12==12.4.127 +cupy-cuda12x==13.5.1 +jedi==0.19.2 +GitPython==3.1.44 +xgrammar==0.1.18 +sniffio==1.3.1 +dill==0.3.8 +python-json-logger==3.3.0 +peft==0.16.0 +python-slugify==8.0.4 +watchfiles==1.1.0 +torchaudio==2.6.0 +omegaconf==2.3.0 +interegular==0.3.3 +torchmetrics==1.7.4 +tenacity==9.1.2 +async-timeout==5.0.1 +pybase64==1.4.1 +tqdm==4.67.1 +confection==0.1.5 +absl-py==2.3.1 +six==1.17.0 +colorful==0.5.7 +xxhash==3.5.0 +regex==2024.11.6 +nvidia-cuda-runtime-cu12==12.4.127 +pip==25.1 +annotated-types==0.7.0 +opentelemetry-exporter-otlp-proto-grpc==1.26.0 +salesforce-lavis==1.0.2 +fastapi-cloud-cli==0.1.4 +av==15.0.0 +nvidia-nvjitlink-cu12==12.4.127 +lightning-utilities==0.14.3 +pytz==2025.2 +webdataset==1.0.2 +cachetools==5.5.2 +nltk==3.9.1 +prometheus-fastapi-instrumentator==7.1.0 +pexpect==4.9.0 +jsonschema-specifications==2025.4.1 +Jinja2==3.1.6 +nvidia-cusparse-cu12==12.3.1.170 +nvidia-cudnn-cu12==9.1.0.70 +decord==0.6.0 +transformers==4.52.4 +dnspython==2.7.0 +joblib==1.5.1 +kaggle==1.7.4.5 +pyasn1_modules==0.4.2 +httpcore==1.0.9 +accelerate==1.8.1 +psutil==7.0.0 +pycocotools==2.0.10 +lm-format-enforcer==0.10.11 +liger_kernel==0.6.0 +googleapis-common-protos==1.70.0 +idna==3.10 +aiohappyeyeballs==2.6.1 +numba==0.61.2 +tornado==6.5.1 +opentelemetry-semantic-conventions==0.47b0 +torchvision==0.21.0 +exceptiongroup==1.3.0 +cfgv==3.4.0 +py-cpuinfo==9.0.0 +murmurhash==1.0.13 +pillow==11.3.0 +asttokens==3.0.0 +spacy==3.8.7 +blinker==1.9.0 +llguidance==0.7.30 +fastapi==0.116.1 +python-dateutil==2.9.0.post0 +prompt_toolkit==3.0.51 +opentelemetry-api==1.26.0 +referencing==0.36.2 +Pygments==2.19.2 +mpmath==1.3.0 +thinc==8.3.6 +multidict==6.6.3 +python-magic==0.4.27 +fairscale==0.4.4 +nodeenv==1.9.1 +mathruler==0.1.0 +identify==2.6.12 +multiprocess==0.70.16 +ftfy==6.3.1 +spacy-legacy==3.0.12 +rsa==4.9.1 +cymem==2.0.11 +flash-attn==2.7.1.post1 +typing-inspection==0.4.1 +nvidia-cufile-cu12==1.11.1.6 +filelock==3.18.0 +jsonschema==4.24.0 +language_data==1.3.0 +iopath==0.1.10 +cloudpickle==3.1.1 +pre_commit==4.2.0 +python-multipart==0.0.20 +gguf==0.17.1 +toml==0.10.2 +lazy_loader==0.4 +nvidia-cusparselt-cu12==0.6.2 +nvidia-nvtx-cu12==12.4.127 +opencv-python-headless==4.12.0.88 +rouge_score==0.1.2 +portalocker==3.2.0 +diskcache==5.6.3 +pycocoevalcap==1.2 +mdurl==0.1.2 +pure_eval==0.2.3 +ray==2.47.1 +langcodes==3.5.0 +distlib==0.3.9 +pydeck==0.9.1 +traitlets==5.14.3 +aiohttp==3.12.14 +decorator==5.2.1 +opentelemetry-exporter-otlp-proto-http==1.26.0 +verl==0.3.2.dev0 +fsspec==2025.3.0 +pydantic_core==2.33.2 +matplotlib-inline==0.1.7 +httpx==0.28.1 +fastrlock==0.8.3 +zipp==3.23.0 +aiosignal==1.4.0 +uvloop==0.21.0 +opendatasets==0.1.22 +python-dotenv==1.1.1 +attrs==25.3.0 +starlette==0.47.1 +distro==1.9.0 +pyasn1==0.6.1 +plotly==6.2.0 +opencensus-context==0.1.3 +datasets==4.0.0 +bleach==6.2.0 +hf-xet==1.1.5 +pyvers==0.1.0 +rich==14.0.0 +pylatexenc==2.10 +tensordict==0.9.1 +urllib3==2.5.0 +imageio==2.37.0 +platformdirs==4.3.8 +preshed==3.0.10 +catalogue==2.0.10 +h11==0.16.0 +outlines_core==0.1.26 +wasabi==1.1.3 +proto-plus==1.26.1 +scikit-image==0.25.2 +blis==1.3.0 +fastapi-cli==0.0.8 +opentelemetry-exporter-prometheus==0.56b0 +opentelemetry-exporter-otlp==1.26.0 +compressed-tensors==0.9.3 +pyarrow==20.0.0 +opentelemetry-semantic-conventions-ai==0.4.11 +partial-json-parser==0.2.1.1.post6 +parso==0.8.4 +importlib_metadata==8.0.0 +tokenizers==0.21.2 +opentelemetry-exporter-otlp-proto-common==1.26.0 +torchdata==0.11.0 +py-spy==0.4.0 +propcache==0.3.2 +braceexpand==0.1.7 +numpy==2.2.6 +cloudpathlib==0.21.1 +email_validator==2.2.0 +srsly==2.5.1 +webencodings==0.5.1 +airportsdata==20250706 +rpds-py==0.26.0 +outlines==0.1.11 +packaging==25.0 +yarl==1.20.1 +nvidia-cuda-cupti-cu12==12.4.127 +typing_extensions==4.14.1 +pydantic==2.11.7 +xformers==0.0.29.post2 +einops==0.8.1 +grpcio==1.73.1 +setuptools==78.1.1 +httptools==0.6.4 +nvidia-nccl-cu12==2.21.5 +Deprecated==1.2.18 +ptyprocess==0.7.0 +websockets==15.0.1 +torch==2.6.0 +scipy==1.15.3 +typer==0.16.0 +pytorch-lightning==2.5.2 +virtualenv==20.31.2 +huggingface-hub==0.33.4 +contexttimer==0.3.3 +marisa-trie==1.2.1 +shellingham==1.5.4 +charset-normalizer==3.4.2 +nest-asyncio==1.6.0 +executing==2.2.0 +stack-data==0.6.3 +msgpack==1.1.1 +blake3==1.0.5 +narwhals==1.47.0 +sentencepiece==0.2.0 +llvmlite==0.44.0 +click==8.2.1 +lark==1.2.2 +google-api-core==2.25.1 +sympy==1.13.1 +wheel==0.45.1 +safetensors==0.5.3 +mistral_common==1.8.0 +triton==3.2.0 +msgspec==0.19.0 +watchdog==6.0.0 +requests==2.32.4 +spacy-loggers==1.0.5 +timm==0.4.12 +qwen-vl-utils==0.0.11 +modelscope==1.28.0 +verl==0.3.2.dev0 +jaraco.text==3.12.1 +autocommand==2.2.2 +packaging==24.2 +jaraco.context==5.3.0 +tomli==2.0.1 +typeguard==4.3.0 +zipp==3.19.2 +backports.tarfile==1.2.0 +typing_extensions==4.12.2 +jaraco.collections==5.1.0 +inflect==7.3.1 +more-itertools==10.3.0 +jaraco.functools==4.0.1 +importlib_metadata==8.0.0 +platformdirs==4.2.2 +wheel==0.45.1 diff --git a/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..9c5883d6f0a3aedd20b77cf4ce6c1d7e4d7a9648 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/files/wandb-metadata.json @@ -0,0 +1,91 @@ +{ + "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35", + "python": "CPython 3.10.0", + "startedAt": "2025-07-20T11:25:31.090058Z", + "args": [ + "--node-ip-address=10.1.4.164", + "--node-manager-port=39481", + "--object-store-name=/tmp/ray/session_2025-07-20_19-23-46_177404_237363/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2025-07-20_19-23-46_177404_237363/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=50645", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=61124", + "--gcs-address=10.1.4.164:47108", + "--session-name=session_2025-07-20_19-23-46_177404_237363", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8265", + "--cluster-id=c47a61f296c7eb4c70ae9057de30f64c14ef34b9d67ed1e6c9f9e1ab", + "--startup-token=64", + "--worker-launch-time-ms=1753010628799", + "--node-id=f70ded91aa9dcf3abddbd985069477193b47e040a95f0010c3430c70", + "--runtime-env-hash=-115784934", + "--enable-resource-isolation=false" + ], + "program": "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/PorUna-byte/PAR.git", + "commit": "b8caf406aa1699c788f0ca6e44a1769452c317db" + }, + "root": "/nas/shared/kilab/wangyujia/EasyR1-new/examples", + "host": "dsw-266702-557cd69888-g24kv", + "executable": "/root/miniconda3/envs/easyr1-new/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 64, + "gpu": "NVIDIA A800-SXM4-80GB", + "gpu_count": 8, + "disk": { + "/": { + "total": "1623302262784", + "used": "1179885568" + } + }, + "memory": { + "total": "549755813888" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-dd05c780-0a19-17fd-c584-d09f4318f680" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-2782427d-b86d-e7c7-247f-edefe39eec71" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-52755b3d-d761-c869-0d08-300873ba5f02" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-90465488-5319-9508-9e8a-b3b12918be35" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a" + } + ], + "cudaVersion": "12.1", + "writerId": "aen8lqfqe6nmonwi9mg6k364ewm1zbk1" +} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/files/wandb-summary.json b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..334abaad9a4c576b8b414ca3a7804f2fbc807661 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":28},"_runtime":28} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..026e8cf157c7da6bacb3f33b46bb622a12ace9a4 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/logs/debug-internal.log @@ -0,0 +1,15 @@ +{"time":"2025-07-20T19:25:31.457576073+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-07-20T19:25:35.573678908+08:00","level":"INFO","msg":"stream: created new stream","id":"zygsruir"} +{"time":"2025-07-20T19:25:35.574999882+08:00","level":"INFO","msg":"stream: started","id":"zygsruir"} +{"time":"2025-07-20T19:25:35.575019345+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"zygsruir"} +{"time":"2025-07-20T19:25:35.575032079+08:00","level":"INFO","msg":"handler: started","stream_id":"zygsruir"} +{"time":"2025-07-20T19:25:35.575046919+08:00","level":"INFO","msg":"sender: started","stream_id":"zygsruir"} +{"time":"2025-07-20T19:26:06.941634052+08:00","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"uploading wandb-metadata.json","runtime_seconds":27.851439057},{"desc":"uploading requirements.txt","runtime_seconds":27.601123972},{"desc":"updating run metadata","runtime_seconds":1.002174891}],"total_operations":3}} +{"time":"2025-07-20T19:27:06.990529067+08:00","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"uploading wandb-metadata.json","runtime_seconds":87.900327481},{"desc":"uploading requirements.txt","runtime_seconds":87.650013021},{"desc":"uploading output.log","runtime_seconds":59.29233501},{"desc":"uploading wandb-summary.json","runtime_seconds":59.292321132},{"desc":"uploading config.yaml","runtime_seconds":57.41576814}],"total_operations":5}} +{"time":"2025-07-20T19:28:07.03660474+08:00","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"uploading wandb-metadata.json","runtime_seconds":147.946406593},{"desc":"uploading requirements.txt","runtime_seconds":147.696092307},{"desc":"uploading output.log","runtime_seconds":119.338414484},{"desc":"uploading wandb-summary.json","runtime_seconds":119.338400774},{"desc":"uploading config.yaml","runtime_seconds":117.461848187}],"total_operations":5}} +{"time":"2025-07-20T19:28:18.110317576+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-07-20T19:28:26.613849672+08:00","level":"INFO","msg":"stream: closing","id":"zygsruir"} +{"time":"2025-07-20T19:28:26.613879904+08:00","level":"INFO","msg":"handler: closed","stream_id":"zygsruir"} +{"time":"2025-07-20T19:28:26.613889947+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"zygsruir"} +{"time":"2025-07-20T19:28:26.61390552+08:00","level":"INFO","msg":"sender: closed","stream_id":"zygsruir"} +{"time":"2025-07-20T19:28:26.619789342+08:00","level":"INFO","msg":"stream: closed","id":"zygsruir"} diff --git a/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..6ebd24ff68141c0f7b1e8626f8b3f7a92f8fad87 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/logs/debug.log @@ -0,0 +1,28 @@ +2025-07-20 19:25:31,219 INFO MainThread:242135 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-07-20 19:25:31,219 INFO MainThread:242135 [wandb_setup.py:_flush():80] Configure stats pid to 242135 +2025-07-20 19:25:31,219 INFO MainThread:242135 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-07-20 19:25:31,219 INFO MainThread:242135 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings +2025-07-20 19:25:31,219 INFO MainThread:242135 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-07-20 19:25:31,219 INFO MainThread:242135 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/logs/debug.log +2025-07-20 19:25:31,219 INFO MainThread:242135 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/logs/debug-internal.log +2025-07-20 19:25:31,219 INFO MainThread:242135 [wandb_init.py:init():830] calling init triggers +2025-07-20 19:25:31,219 INFO MainThread:242135 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}} +2025-07-20 19:25:31,232 INFO MainThread:242135 [wandb_init.py:init():871] starting backend +2025-07-20 19:25:31,444 INFO MainThread:242135 [wandb_init.py:init():874] sending inform_init request +2025-07-20 19:25:31,446 INFO MainThread:242135 [wandb_init.py:init():882] backend started and connected +2025-07-20 19:25:31,451 INFO MainThread:242135 [wandb_init.py:init():953] updated telemetry +2025-07-20 19:25:31,492 INFO MainThread:242135 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout +2025-07-20 19:25:37,433 INFO MainThread:242135 [wandb_init.py:init():1029] starting run threads in backend +2025-07-20 19:25:37,647 INFO MainThread:242135 [wandb_run.py:_console_start():2458] atexit reg +2025-07-20 19:25:37,647 INFO MainThread:242135 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-07-20 19:25:37,652 INFO MainThread:242135 [wandb_run.py:_redirect():2375] Wrapping output streams. +2025-07-20 19:25:37,652 INFO MainThread:242135 [wandb_run.py:_redirect():2398] Redirects installed. +2025-07-20 19:25:37,655 INFO MainThread:242135 [wandb_init.py:init():1075] run started, returning control to user process +2025-07-20 19:26:05,937 INFO MainThread:242135 [wandb_run.py:_finish():2224] finishing run gia0603yucca/easy_r1/zygsruir +2025-07-20 19:26:05,939 INFO MainThread:242135 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0 +2025-07-20 19:26:05,939 INFO MainThread:242135 [wandb_run.py:_restore():2405] restore +2025-07-20 19:26:05,939 INFO MainThread:242135 [wandb_run.py:_restore():2411] restore done +2025-07-20 19:28:26,559 INFO MainThread:242135 [wandb_run.py:_footer_history_summary_info():3903] rendering history +2025-07-20 19:28:26,580 INFO MainThread:242135 [wandb_run.py:_footer_history_summary_info():3935] rendering summary +2025-07-20 19:28:26,600 INFO MainThread:242135 [wandb_run.py:_footer_sync_info():3864] logging synced files diff --git a/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/run-zygsruir.wandb b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/run-zygsruir.wandb new file mode 100644 index 0000000000000000000000000000000000000000..1c10e0cb085ee7a2463d0ac5397ce7139bf082cf Binary files /dev/null and b/EasyR1-new/examples/wandb/run-20250720_192531-zygsruir/run-zygsruir.wandb differ diff --git a/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/files/output.log b/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..fee82decdb3d2d155a5bd27c3020a01a06910ea0 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/files/output.log @@ -0,0 +1,2 @@ +{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 1, "unit": "it", "ip": "10.1.4.164", "pid": 258417, "uuid": "eea63d59625341e689a596fa0b39bb32", "closed": false} +Start validation... diff --git a/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/files/requirements.txt @@ -0,0 +1,295 @@ +colorama==0.4.6 +psutil==7.0.0 +setproctitle==1.2.2 +ipython==8.37.0 +gitdb==4.0.12 +smmap==5.0.2 +pyzmq==27.0.0 +wcwidth==0.2.13 +antlr4-python3-runtime==4.9.3 +streamlit==1.46.1 +opentelemetry-proto==1.26.0 +tiktoken==0.9.0 +MarkupSafe==3.0.2 +openai==1.90.0 +jiter==0.10.0 +markdown-it-py==3.0.0 +rich-toolkit==0.14.8 +PyYAML==6.0.2 +pycountry==24.6.1 +nvidia-cusolver-cu12==11.6.1.9 +codetiming==1.4.0 +text-unidecode==1.3 +aiohttp-cors==0.8.1 +prometheus_client==0.22.1 +pandas==2.3.1 +wrapt==1.17.2 +tifffile==2025.5.10 +vllm==0.8.5.post1 +google-auth==2.40.3 +nvidia-curand-cu12==10.3.5.147 +networkx==3.4.2 +protobuf==4.25.8 +depyf==0.18.0 +altair==5.5.0 +wandb==0.21.0 +opentelemetry-sdk==1.26.0 +nvidia-cufft-cu12==11.2.1.3 +frozenlist==1.7.0 +ninja==1.11.1.4 +anyio==4.9.0 +rignore==0.6.2 +pydantic-extra-types==2.10.5 +tzdata==2025.2 +orjson==3.10.18 +smart_open==7.3.0.post1 +nvidia-cublas-cu12==12.4.5.8 +astor==0.8.1 +uvicorn==0.35.0 +sentry-sdk==2.32.0 +weasel==0.4.1 +opencensus==0.11.4 +certifi==2025.7.14 +nvidia-cuda-nvrtc-cu12==12.4.127 +cupy-cuda12x==13.5.1 +jedi==0.19.2 +GitPython==3.1.44 +xgrammar==0.1.18 +sniffio==1.3.1 +dill==0.3.8 +python-json-logger==3.3.0 +peft==0.16.0 +python-slugify==8.0.4 +watchfiles==1.1.0 +torchaudio==2.6.0 +omegaconf==2.3.0 +interegular==0.3.3 +torchmetrics==1.7.4 +tenacity==9.1.2 +async-timeout==5.0.1 +pybase64==1.4.1 +tqdm==4.67.1 +confection==0.1.5 +absl-py==2.3.1 +six==1.17.0 +colorful==0.5.7 +xxhash==3.5.0 +regex==2024.11.6 +nvidia-cuda-runtime-cu12==12.4.127 +pip==25.1 +annotated-types==0.7.0 +opentelemetry-exporter-otlp-proto-grpc==1.26.0 +salesforce-lavis==1.0.2 +fastapi-cloud-cli==0.1.4 +av==15.0.0 +nvidia-nvjitlink-cu12==12.4.127 +lightning-utilities==0.14.3 +pytz==2025.2 +webdataset==1.0.2 +cachetools==5.5.2 +nltk==3.9.1 +prometheus-fastapi-instrumentator==7.1.0 +pexpect==4.9.0 +jsonschema-specifications==2025.4.1 +Jinja2==3.1.6 +nvidia-cusparse-cu12==12.3.1.170 +nvidia-cudnn-cu12==9.1.0.70 +decord==0.6.0 +transformers==4.52.4 +dnspython==2.7.0 +joblib==1.5.1 +kaggle==1.7.4.5 +pyasn1_modules==0.4.2 +httpcore==1.0.9 +accelerate==1.8.1 +psutil==7.0.0 +pycocotools==2.0.10 +lm-format-enforcer==0.10.11 +liger_kernel==0.6.0 +googleapis-common-protos==1.70.0 +idna==3.10 +aiohappyeyeballs==2.6.1 +numba==0.61.2 +tornado==6.5.1 +opentelemetry-semantic-conventions==0.47b0 +torchvision==0.21.0 +exceptiongroup==1.3.0 +cfgv==3.4.0 +py-cpuinfo==9.0.0 +murmurhash==1.0.13 +pillow==11.3.0 +asttokens==3.0.0 +spacy==3.8.7 +blinker==1.9.0 +llguidance==0.7.30 +fastapi==0.116.1 +python-dateutil==2.9.0.post0 +prompt_toolkit==3.0.51 +opentelemetry-api==1.26.0 +referencing==0.36.2 +Pygments==2.19.2 +mpmath==1.3.0 +thinc==8.3.6 +multidict==6.6.3 +python-magic==0.4.27 +fairscale==0.4.4 +nodeenv==1.9.1 +mathruler==0.1.0 +identify==2.6.12 +multiprocess==0.70.16 +ftfy==6.3.1 +spacy-legacy==3.0.12 +rsa==4.9.1 +cymem==2.0.11 +flash-attn==2.7.1.post1 +typing-inspection==0.4.1 +nvidia-cufile-cu12==1.11.1.6 +filelock==3.18.0 +jsonschema==4.24.0 +language_data==1.3.0 +iopath==0.1.10 +cloudpickle==3.1.1 +pre_commit==4.2.0 +python-multipart==0.0.20 +gguf==0.17.1 +toml==0.10.2 +lazy_loader==0.4 +nvidia-cusparselt-cu12==0.6.2 +nvidia-nvtx-cu12==12.4.127 +opencv-python-headless==4.12.0.88 +rouge_score==0.1.2 +portalocker==3.2.0 +diskcache==5.6.3 +pycocoevalcap==1.2 +mdurl==0.1.2 +pure_eval==0.2.3 +ray==2.47.1 +langcodes==3.5.0 +distlib==0.3.9 +pydeck==0.9.1 +traitlets==5.14.3 +aiohttp==3.12.14 +decorator==5.2.1 +opentelemetry-exporter-otlp-proto-http==1.26.0 +verl==0.3.2.dev0 +fsspec==2025.3.0 +pydantic_core==2.33.2 +matplotlib-inline==0.1.7 +httpx==0.28.1 +fastrlock==0.8.3 +zipp==3.23.0 +aiosignal==1.4.0 +uvloop==0.21.0 +opendatasets==0.1.22 +python-dotenv==1.1.1 +attrs==25.3.0 +starlette==0.47.1 +distro==1.9.0 +pyasn1==0.6.1 +plotly==6.2.0 +opencensus-context==0.1.3 +datasets==4.0.0 +bleach==6.2.0 +hf-xet==1.1.5 +pyvers==0.1.0 +rich==14.0.0 +pylatexenc==2.10 +tensordict==0.9.1 +urllib3==2.5.0 +imageio==2.37.0 +platformdirs==4.3.8 +preshed==3.0.10 +catalogue==2.0.10 +h11==0.16.0 +outlines_core==0.1.26 +wasabi==1.1.3 +proto-plus==1.26.1 +scikit-image==0.25.2 +blis==1.3.0 +fastapi-cli==0.0.8 +opentelemetry-exporter-prometheus==0.56b0 +opentelemetry-exporter-otlp==1.26.0 +compressed-tensors==0.9.3 +pyarrow==20.0.0 +opentelemetry-semantic-conventions-ai==0.4.11 +partial-json-parser==0.2.1.1.post6 +parso==0.8.4 +importlib_metadata==8.0.0 +tokenizers==0.21.2 +opentelemetry-exporter-otlp-proto-common==1.26.0 +torchdata==0.11.0 +py-spy==0.4.0 +propcache==0.3.2 +braceexpand==0.1.7 +numpy==2.2.6 +cloudpathlib==0.21.1 +email_validator==2.2.0 +srsly==2.5.1 +webencodings==0.5.1 +airportsdata==20250706 +rpds-py==0.26.0 +outlines==0.1.11 +packaging==25.0 +yarl==1.20.1 +nvidia-cuda-cupti-cu12==12.4.127 +typing_extensions==4.14.1 +pydantic==2.11.7 +xformers==0.0.29.post2 +einops==0.8.1 +grpcio==1.73.1 +setuptools==78.1.1 +httptools==0.6.4 +nvidia-nccl-cu12==2.21.5 +Deprecated==1.2.18 +ptyprocess==0.7.0 +websockets==15.0.1 +torch==2.6.0 +scipy==1.15.3 +typer==0.16.0 +pytorch-lightning==2.5.2 +virtualenv==20.31.2 +huggingface-hub==0.33.4 +contexttimer==0.3.3 +marisa-trie==1.2.1 +shellingham==1.5.4 +charset-normalizer==3.4.2 +nest-asyncio==1.6.0 +executing==2.2.0 +stack-data==0.6.3 +msgpack==1.1.1 +blake3==1.0.5 +narwhals==1.47.0 +sentencepiece==0.2.0 +llvmlite==0.44.0 +click==8.2.1 +lark==1.2.2 +google-api-core==2.25.1 +sympy==1.13.1 +wheel==0.45.1 +safetensors==0.5.3 +mistral_common==1.8.0 +triton==3.2.0 +msgspec==0.19.0 +watchdog==6.0.0 +requests==2.32.4 +spacy-loggers==1.0.5 +timm==0.4.12 +qwen-vl-utils==0.0.11 +modelscope==1.28.0 +verl==0.3.2.dev0 +jaraco.text==3.12.1 +autocommand==2.2.2 +packaging==24.2 +jaraco.context==5.3.0 +tomli==2.0.1 +typeguard==4.3.0 +zipp==3.19.2 +backports.tarfile==1.2.0 +typing_extensions==4.12.2 +jaraco.collections==5.1.0 +inflect==7.3.1 +more-itertools==10.3.0 +jaraco.functools==4.0.1 +importlib_metadata==8.0.0 +platformdirs==4.2.2 +wheel==0.45.1 diff --git a/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..357fe4d9723bf82d2dadc05c36a49e89f85c3713 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/files/wandb-metadata.json @@ -0,0 +1,91 @@ +{ + "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35", + "python": "CPython 3.10.0", + "startedAt": "2025-07-20T12:04:22.761771Z", + "args": [ + "--node-ip-address=10.1.4.164", + "--node-manager-port=39065", + "--object-store-name=/tmp/ray/session_2025-07-20_20-02-37_338127_253666/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2025-07-20_20-02-37_338127_253666/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=51673", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=61422", + "--gcs-address=10.1.4.164:60743", + "--session-name=session_2025-07-20_20-02-37_338127_253666", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8265", + "--cluster-id=b4b34c9cedc29492f8fa882ae8351e7ee3416f09f147c7e74caf17c3", + "--startup-token=64", + "--worker-launch-time-ms=1753012959888", + "--node-id=51eea3898a82302f4cb8f9a222fdcca1b829b0df7dd10244eed979fe", + "--runtime-env-hash=-115784934", + "--enable-resource-isolation=false" + ], + "program": "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/PorUna-byte/PAR.git", + "commit": "b8caf406aa1699c788f0ca6e44a1769452c317db" + }, + "root": "/nas/shared/kilab/wangyujia/EasyR1-new/examples", + "host": "dsw-266702-557cd69888-g24kv", + "executable": "/root/miniconda3/envs/easyr1-new/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 64, + "gpu": "NVIDIA A800-SXM4-80GB", + "gpu_count": 8, + "disk": { + "/": { + "total": "1623302262784", + "used": "1179971584" + } + }, + "memory": { + "total": "549755813888" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-dd05c780-0a19-17fd-c584-d09f4318f680" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-2782427d-b86d-e7c7-247f-edefe39eec71" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-52755b3d-d761-c869-0d08-300873ba5f02" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-90465488-5319-9508-9e8a-b3b12918be35" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a" + } + ], + "cudaVersion": "12.1", + "writerId": "61kjyeeak3xa3675zcay0f3n8u00jg65" +} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..5ce12fb741fa20d3c2a9c92f5b00dc8f204498fd --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/logs/debug-internal.log @@ -0,0 +1,6 @@ +{"time":"2025-07-20T20:04:23.360241675+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-07-20T20:04:27.983686669+08:00","level":"INFO","msg":"stream: created new stream","id":"m2esqgth"} +{"time":"2025-07-20T20:04:27.984452648+08:00","level":"INFO","msg":"stream: started","id":"m2esqgth"} +{"time":"2025-07-20T20:04:27.98445835+08:00","level":"INFO","msg":"handler: started","stream_id":"m2esqgth"} +{"time":"2025-07-20T20:04:27.98449331+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"m2esqgth"} +{"time":"2025-07-20T20:04:27.984472178+08:00","level":"INFO","msg":"sender: started","stream_id":"m2esqgth"} diff --git a/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..4820c2e5eb196c71b32cecc700c86e3667344782 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/logs/debug.log @@ -0,0 +1,21 @@ +2025-07-20 20:04:23,137 INFO MainThread:258417 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-07-20 20:04:23,137 INFO MainThread:258417 [wandb_setup.py:_flush():80] Configure stats pid to 258417 +2025-07-20 20:04:23,137 INFO MainThread:258417 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-07-20 20:04:23,137 INFO MainThread:258417 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings +2025-07-20 20:04:23,137 INFO MainThread:258417 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-07-20 20:04:23,137 INFO MainThread:258417 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/logs/debug.log +2025-07-20 20:04:23,137 INFO MainThread:258417 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/logs/debug-internal.log +2025-07-20 20:04:23,137 INFO MainThread:258417 [wandb_init.py:init():830] calling init triggers +2025-07-20 20:04:23,137 INFO MainThread:258417 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}} +2025-07-20 20:04:23,138 INFO MainThread:258417 [wandb_init.py:init():871] starting backend +2025-07-20 20:04:23,345 INFO MainThread:258417 [wandb_init.py:init():874] sending inform_init request +2025-07-20 20:04:23,347 INFO MainThread:258417 [wandb_init.py:init():882] backend started and connected +2025-07-20 20:04:23,353 INFO MainThread:258417 [wandb_init.py:init():953] updated telemetry +2025-07-20 20:04:23,405 INFO MainThread:258417 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout +2025-07-20 20:04:29,804 INFO MainThread:258417 [wandb_init.py:init():1029] starting run threads in backend +2025-07-20 20:04:29,998 INFO MainThread:258417 [wandb_run.py:_console_start():2458] atexit reg +2025-07-20 20:04:29,998 INFO MainThread:258417 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-07-20 20:04:30,002 INFO MainThread:258417 [wandb_run.py:_redirect():2375] Wrapping output streams. +2025-07-20 20:04:30,002 INFO MainThread:258417 [wandb_run.py:_redirect():2398] Redirects installed. +2025-07-20 20:04:30,005 INFO MainThread:258417 [wandb_init.py:init():1075] run started, returning control to user process diff --git a/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/run-m2esqgth.wandb b/EasyR1-new/examples/wandb/run-20250720_200422-m2esqgth/run-m2esqgth.wandb new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/files/config.yaml b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6c4bc878537dc6fc2dc7dff776dd4d748ea52668 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/files/config.yaml @@ -0,0 +1,321 @@ +_wandb: + value: + cli_version: 0.21.0 + e: + xtan9jnlk3anxkvk8lrtde0kzlter6ht: + args: + - --node-ip-address=10.1.4.164 + - --node-manager-port=36211 + - --object-store-name=/tmp/ray/session_2025-07-20_20-06-04_261605_261953/sockets/plasma_store + - --raylet-name=/tmp/ray/session_2025-07-20_20-06-04_261605_261953/sockets/raylet + - --redis-address=None + - --metrics-agent-port=55320 + - --logging-rotate-bytes=536870912 + - --logging-rotate-backup-count=5 + - --runtime-env-agent-port=40764 + - --gcs-address=10.1.4.164:53846 + - --session-name=session_2025-07-20_20-06-04_261605_261953 + - --temp-dir=/tmp/ray + - --webui=127.0.0.1:8265 + - --cluster-id=0063e3b7b4568489d096fdc242c3518b906b31d74ced6b05fe470650 + - --startup-token=64 + - --worker-launch-time-ms=1753013166837 + - --node-id=fdba09a4dfb48b5d51465042822e51b6b219b770c4e580a471120e79 + - --runtime-env-hash=-115784934 + - --enable-resource-isolation=false + cpu_count: 64 + cpu_count_logical: 64 + cudaVersion: "12.1" + disk: + /: + total: "1623302262784" + used: "1180028928" + executable: /root/miniconda3/envs/easyr1-new/bin/python3 + git: + commit: b8caf406aa1699c788f0ca6e44a1769452c317db + remote: https://github.com/PorUna-byte/PAR.git + gpu: NVIDIA A800-SXM4-80GB + gpu_count: 8 + gpu_nvidia: + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-dd05c780-0a19-17fd-c584-d09f4318f680 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-2782427d-b86d-e7c7-247f-edefe39eec71 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-52755b3d-d761-c869-0d08-300873ba5f02 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-5927b565-1506-34c6-eb8d-d657ed5f3558 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-90465488-5319-9508-9e8a-b3b12918be35 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0 + - architecture: Ampere + name: NVIDIA A800-SXM4-80GB + uuid: GPU-855bcb80-e48f-cdd2-f62d-35138b69089a + host: dsw-266702-557cd69888-g24kv + memory: + total: "549755813888" + os: Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35 + program: /root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py + python: CPython 3.10.0 + root: /nas/shared/kilab/wangyujia/EasyR1-new/examples + startedAt: "2025-07-20T12:07:50.034461Z" + writerId: xtan9jnlk3anxkvk8lrtde0kzlter6ht + m: [] + python_version: 3.10.0 + t: + "1": + - 1 + - 9 + - 11 + - 30 + - 33 + - 41 + - 49 + - 51 + - 63 + - 71 + - 95 + - 98 + - 103 + - 105 + "2": + - 1 + - 9 + - 11 + - 30 + - 33 + - 41 + - 49 + - 51 + - 63 + - 71 + - 95 + - 98 + - 103 + - 105 + "3": + - 2 + - 13 + - 16 + "4": 3.10.0 + "5": 0.21.0 + "6": 4.52.4 + "12": 0.21.0 + "13": linux-x86_64 +algorithm: + value: + adv_estimator: grpo + disable_kl: false + filter_high: 0.99 + filter_key: overall + filter_low: 0.01 + gamma: 1 + kl_coef: 0.01 + kl_horizon: 10000 + kl_penalty: low_var_kl + kl_target: 0.1 + kl_type: fixed + lam: 1 + online_filtering: false + use_kl_loss: true +data: + value: + answer_key: answer + filter_overlong_prompts: true + filter_overlong_prompts_workers: 16 + format_prompt: /nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja + image_dir: null + image_key: images + max_pixels: 4194304 + max_prompt_length: 4096 + max_response_length: 16384 + min_pixels: 262144 + mini_rollout_batch_size: null + override_chat_template: null + prompt_key: question + protein_key: protein + rollout_batch_size: 128 + seed: 1 + shuffle: true + train_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl + val_batch_size: 256 + val_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl + video_fps: 2 + video_key: videos +trainer: + value: + critic_warmup: 0 + experiment_name: qwen2.5_7b_bio_06182042 + load_checkpoint_path: null + logger: + - console + - wandb + max_steps: null + max_try_make_batch: 20 + n_gpus_per_node: 8 + nnodes: 1 + project_name: easy_r1 + save_checkpoint_path: /oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042 + save_freq: 5 + save_limit: 3 + save_model_only: false + total_epochs: 1 + val_before_train: true + val_freq: 5 + val_generations_to_log: 3 + val_only: false +worker: + value: + actor: + clip_ratio_dual: 3 + clip_ratio_high: 0.3 + clip_ratio_low: 0.2 + disable_kl: false + fsdp: + enable_cpu_offload: false + enable_full_shard: true + enable_rank0_init: true + fsdp_size: -1 + mp_buffer_dtype: fp32 + mp_param_dtype: bf16 + mp_reduce_dtype: fp32 + torch_dtype: null + use_orig_params: false + global_batch_size: 64 + global_batch_size_per_device: -1 + kl_coef: 0.01 + kl_penalty: low_var_kl + loss_avg_mode: token + max_grad_norm: 1 + micro_batch_size_per_device_for_experience: 16 + micro_batch_size_per_device_for_update: 2 + model: + enable_gradient_checkpointing: true + freeze_vision_tower: false + model_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model + tokenizer_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model + trust_remote_code: false + offload: + offload_optimizer: true + offload_params: true + optim: + betas: + - 0.9 + - 0.999 + lr: 1e-06 + lr_warmup_ratio: 0 + lr_warmup_steps: null + min_lr_ratio: null + strategy: adamw + training_steps: 1 + warmup_style: constant + weight_decay: 0.01 + padding_free: true + ppo_epochs: 1 + strategy: fsdp + ulysses_size: 1 + use_kl_loss: true + use_torch_compile: true + critic: + cliprange_value: 0.5 + fsdp: + enable_cpu_offload: false + enable_full_shard: true + enable_rank0_init: true + fsdp_size: -1 + mp_buffer_dtype: fp32 + mp_param_dtype: bf16 + mp_reduce_dtype: fp32 + torch_dtype: null + use_orig_params: false + global_batch_size: 256 + global_batch_size_per_device: -1 + loss_avg_mode: token + max_grad_norm: 1 + micro_batch_size_per_device_for_experience: 16 + micro_batch_size_per_device_for_update: 4 + model: + enable_gradient_checkpointing: true + freeze_vision_tower: false + model_path: null + tokenizer_path: null + trust_remote_code: true + offload: + offload_optimizer: false + offload_params: false + optim: + betas: + - 0.9 + - 0.999 + lr: 1e-06 + lr_warmup_ratio: 0 + lr_warmup_steps: null + min_lr_ratio: null + strategy: adamw + training_steps: 1 + warmup_style: constant + weight_decay: 0.01 + padding_free: false + ppo_epochs: 1 + strategy: fsdp + ulysses_size: 1 + hybrid_engine: true + ref: + fsdp: + enable_cpu_offload: true + enable_full_shard: true + enable_rank0_init: true + fsdp_size: -1 + mp_buffer_dtype: fp32 + mp_param_dtype: bf16 + mp_reduce_dtype: fp32 + torch_dtype: null + use_orig_params: false + micro_batch_size_per_device_for_experience: 16 + offload: + offload_optimizer: false + offload_params: false + padding_free: true + strategy: fsdp + ulysses_size: 1 + use_torch_compile: true + reward: + num_cpus: 1 + reward_function: /nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py + reward_function_name: main + reward_type: batch + skip_special_tokens: true + rollout: + disable_log_stats: true + disable_tqdm: false + dtype: bf16 + enable_chunked_prefill: false + enforce_eager: false + gpu_memory_utilization: 0.6 + ignore_eos: false + limit_images: 0 + max_model_len: null + max_num_batched_tokens: 24576 + "n": 5 + name: vllm + prompt_length: 4096 + response_length: 16384 + seed: 1 + temperature: 1 + tensor_parallel_size: 1 + top_k: -1 + top_p: 0.99 + trust_remote_code: false + val_override_config: + "n": 1 + temperature: 0.5 diff --git a/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/files/output.log b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..cad8f45f4328a4f84e7d55842a40398c72b9c663 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/files/output.log @@ -0,0 +1,230 @@ +{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 1, "unit": "it", "ip": "10.1.4.164", "pid": 266721, "uuid": "6d31a99c30c448c08468eabd5882cd00", "closed": false} +Start validation... +key +prot_embeds +value +tensor([[[ 0.1386, -0.2522, 0.0733, ..., 0.5373, 0.6044, -0.6031], + [ 0.1386, -0.2522, 0.0733, ..., 0.5373, 0.6044, -0.6031], + [ 0.1386, -0.2522, 0.0733, ..., 0.5373, 0.6044, -0.6031], + ..., + [ 0.1386, -0.2522, 0.0733, ..., 0.5373, 0.6044, -0.6031], + [ 0.1386, -0.2522, 0.0733, ..., 0.5373, 0.6044, -0.6031], + [ 0.1386, -0.2522, 0.0733, ..., 0.5373, 0.6044, -0.6031]]]) +key +prompt_input_ids +value +[[[11190 311 279 ... 151665 151665 151665]]] +key +input_ids +value +[[[11190 311 279 ... 151665 151665 151665]]] +key +raw_prompt_ids +value +[[11190 311 279 12833 1995 3897 3685 323 279 12833 829 1207 24 56 19 34 + 17 11 7023 279 1429 4363 1186 5873 1276 52304 504 279 2701 2606 510 + 3798 25 220 15 13 330 45 22147 355 11 547 1 715 220 16 13 330 56715 + 98605 10530 11 328 1 2303 220 17 13 330 840 376 64341 11 328 1 2303 220 + 18 13 330 54370 46417 81 290 11 547 1 2303 220 19 13 330 3599 38554 11 + 386 1 2303 220 20 13 330 3727 55078 10530 292 2112 292 16496 11 386 1 + 2303 220 21 13 330 2120 559 307 11 328 1 2303 220 22 13 330 38 337 8212 + 40605 11 386 1 2303 220 220 23 13 330 43 1047 31454 27233 580 84 1263 + 11 386 1 2303 24 13 330 47 2328 7191 635 11 547 698]] +key +ground_truth +value +['4'] +key +protein_sequence +value +['MATPSAAFEALMNGVTSWDVPEDAVPCELLLIGEASFPVMVNDMGQVLIAASSYGRGRLVVVSHEDYLVEAQLTPFLLNAVGWLCSSPGAPIGVHPSLAPLAKILEGSGVDAKVEPEVKDSLGVYCIDAYNETMTEKLVKFMKCGGGLLIGGQAWDWANQGEDERVLFTFPGNLVTSVAGIYFTDNKGDTSFFKVSKKMPKIPVLVSCEDDLSDDREELLHGISELDISNSDCFPSQLLVHGALAFPLGLDSYHGCVIAAARYGRGRVVVTGHKVLFTVGKLGPFLLNAVRWLDGGRRGKVVVQTELRTLSGLLAVGGIDTSIEPNLTSDASVYCFEPVSEVGVKELQEFVAEGGGLFVGAQAWWWAFKNPGVSPLARFPGNLLLNPFGISITSQSLNPGPFRTPKAGIRTYHFRSTLAEFQVIMGRKRGNVEKGWLAKLGPDGAAFLQIPAEEIPAYMSVHRLLRKLLSRYRLPVATRENPVINDCCRGAMLSLATGLAHSGSDLSLLVPEIEDMYSSPYLRPSESPITVEVNCTNPGTRYCWMSTGLYIPGRQIIEVSLPEAAASADLKIQIGCHTDDLTRASKLFRGPLVINRCCLDKPTKSITCLWGGLLYIIVPQNSKLGSVPVTVKGAVHAPYYKLGETTLEEWKRRIQENPGPWGELATDNIILTVPTANLRTLENPEPLLRLWDEVMQAVARLGAEPFPLRLPQRIVADVQISVGWMHAGYPIMCHLESVQELINEKLIRTKGLWGPVHELGRNQQRQEWEFPPHTTEATCNLWCVYVHETVLGIPRSRANIALWPPVREKRVRIYLSKGPNVKNWNAWTALETYLQLQEAFGWEPFIRLFTEYRNQTNLPTENVDKMNLWVKMFSHQVQKNLAPFFEAWAWPIQKEVATSLAYLPEWKENIMKLYLLTQMPH'] +开始valid generate_sequence +Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.actor_rollout_ref_generate_sequences() (pid=268621, ip=10.1.4.164, actor_id=ce52cf1bf65fb687f4ef2e8501000000, repr=) +KeyError: 'prompt_input_ids' + +During handling of the above exception, another exception occurred: + +ray::WorkerDict.actor_rollout_ref_generate_sequences() (pid=268621, ip=10.1.4.164, actor_id=ce52cf1bf65fb687f4ef2e8501000000, repr=) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func + return getattr(self.worker_dict[key], name)(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner + return func(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 574, in generate_sequences + output = self.rollout.generate_sequences(prompts=prompts) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context + return func(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/rollout/vllm_rollout_spmd_new.py", line 176, in generate_sequences + prompt_input_ids: torch.Tensor = prompts.batch["prompt_input_ids"] + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 574, in __getitem__ + return self._get_tuple_maybe_non_tensor(idx_unravel, NO_DEFAULT) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 6563, in _get_tuple_maybe_non_tensor + result = self._get_tuple(key, default, **kwargs) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/_td.py", line 2711, in _get_tuple + first = self._get_str(key[0], default, **kwargs) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/_td.py", line 2708, in _get_str + return self._default_get(key, default) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 6500, in _default_get + raise KeyError( +KeyError: 'key "prompt_input_ids" not found in TensorDict with keys [\'prot_embeds\']' +Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.actor_rollout_ref_generate_sequences() (pid=268620, ip=10.1.4.164, actor_id=f7a12dbb859a77a4660ab73b01000000, repr=) +KeyError: 'prompt_input_ids' + +During handling of the above exception, another exception occurred: + +ray::WorkerDict.actor_rollout_ref_generate_sequences() (pid=268620, ip=10.1.4.164, actor_id=f7a12dbb859a77a4660ab73b01000000, repr=) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func + return getattr(self.worker_dict[key], name)(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner + return func(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 574, in generate_sequences + output = self.rollout.generate_sequences(prompts=prompts) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context + return func(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/rollout/vllm_rollout_spmd_new.py", line 176, in generate_sequences + prompt_input_ids: torch.Tensor = prompts.batch["prompt_input_ids"] + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 574, in __getitem__ + return self._get_tuple_maybe_non_tensor(idx_unravel, NO_DEFAULT) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 6563, in _get_tuple_maybe_non_tensor + result = self._get_tuple(key, default, **kwargs) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/_td.py", line 2711, in _get_tuple + first = self._get_str(key[0], default, **kwargs) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/_td.py", line 2708, in _get_str + return self._default_get(key, default) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 6500, in _default_get + raise KeyError( +KeyError: 'key "prompt_input_ids" not found in TensorDict with keys [\'prot_embeds\']' +Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.actor_rollout_ref_generate_sequences() (pid=268619, ip=10.1.4.164, actor_id=b9a2160653e6d12bcb36fdfb01000000, repr=) +KeyError: 'prompt_input_ids' + +During handling of the above exception, another exception occurred: + +ray::WorkerDict.actor_rollout_ref_generate_sequences() (pid=268619, ip=10.1.4.164, actor_id=b9a2160653e6d12bcb36fdfb01000000, repr=) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func + return getattr(self.worker_dict[key], name)(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner + return func(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 574, in generate_sequences + output = self.rollout.generate_sequences(prompts=prompts) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context + return func(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/rollout/vllm_rollout_spmd_new.py", line 176, in generate_sequences + prompt_input_ids: torch.Tensor = prompts.batch["prompt_input_ids"] + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 574, in __getitem__ + return self._get_tuple_maybe_non_tensor(idx_unravel, NO_DEFAULT) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 6563, in _get_tuple_maybe_non_tensor + result = self._get_tuple(key, default, **kwargs) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/_td.py", line 2711, in _get_tuple + first = self._get_str(key[0], default, **kwargs) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/_td.py", line 2708, in _get_str + return self._default_get(key, default) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 6500, in _default_get + raise KeyError( +KeyError: 'key "prompt_input_ids" not found in TensorDict with keys [\'prot_embeds\']' +Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.actor_rollout_ref_generate_sequences() (pid=268618, ip=10.1.4.164, actor_id=446eaa8d63c457018134bc9c01000000, repr=) +KeyError: 'prompt_input_ids' + +During handling of the above exception, another exception occurred: + +ray::WorkerDict.actor_rollout_ref_generate_sequences() (pid=268618, ip=10.1.4.164, actor_id=446eaa8d63c457018134bc9c01000000, repr=) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func + return getattr(self.worker_dict[key], name)(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner + return func(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 574, in generate_sequences + output = self.rollout.generate_sequences(prompts=prompts) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context + return func(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/rollout/vllm_rollout_spmd_new.py", line 176, in generate_sequences + prompt_input_ids: torch.Tensor = prompts.batch["prompt_input_ids"] + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 574, in __getitem__ + return self._get_tuple_maybe_non_tensor(idx_unravel, NO_DEFAULT) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 6563, in _get_tuple_maybe_non_tensor + result = self._get_tuple(key, default, **kwargs) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/_td.py", line 2711, in _get_tuple + first = self._get_str(key[0], default, **kwargs) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/_td.py", line 2708, in _get_str + return self._default_get(key, default) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 6500, in _default_get + raise KeyError( +KeyError: 'key "prompt_input_ids" not found in TensorDict with keys [\'prot_embeds\']' +Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.actor_rollout_ref_generate_sequences() (pid=268617, ip=10.1.4.164, actor_id=efd1feff531e367223ca45bf01000000, repr=) +KeyError: 'prompt_input_ids' + +During handling of the above exception, another exception occurred: + +ray::WorkerDict.actor_rollout_ref_generate_sequences() (pid=268617, ip=10.1.4.164, actor_id=efd1feff531e367223ca45bf01000000, repr=) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func + return getattr(self.worker_dict[key], name)(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner + return func(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 574, in generate_sequences + output = self.rollout.generate_sequences(prompts=prompts) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context + return func(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/rollout/vllm_rollout_spmd_new.py", line 176, in generate_sequences + prompt_input_ids: torch.Tensor = prompts.batch["prompt_input_ids"] + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 574, in __getitem__ + return self._get_tuple_maybe_non_tensor(idx_unravel, NO_DEFAULT) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 6563, in _get_tuple_maybe_non_tensor + result = self._get_tuple(key, default, **kwargs) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/_td.py", line 2711, in _get_tuple + first = self._get_str(key[0], default, **kwargs) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/_td.py", line 2708, in _get_str + return self._default_get(key, default) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 6500, in _default_get + raise KeyError( +KeyError: 'key "prompt_input_ids" not found in TensorDict with keys [\'prot_embeds\']' +Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.actor_rollout_ref_generate_sequences() (pid=268616, ip=10.1.4.164, actor_id=ec16be3ae05a2ccd6645853a01000000, repr=) +KeyError: 'prompt_input_ids' + +During handling of the above exception, another exception occurred: + +ray::WorkerDict.actor_rollout_ref_generate_sequences() (pid=268616, ip=10.1.4.164, actor_id=ec16be3ae05a2ccd6645853a01000000, repr=) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func + return getattr(self.worker_dict[key], name)(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner + return func(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 574, in generate_sequences + output = self.rollout.generate_sequences(prompts=prompts) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context + return func(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/rollout/vllm_rollout_spmd_new.py", line 176, in generate_sequences + prompt_input_ids: torch.Tensor = prompts.batch["prompt_input_ids"] + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 574, in __getitem__ + return self._get_tuple_maybe_non_tensor(idx_unravel, NO_DEFAULT) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 6563, in _get_tuple_maybe_non_tensor + result = self._get_tuple(key, default, **kwargs) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/_td.py", line 2711, in _get_tuple + first = self._get_str(key[0], default, **kwargs) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/_td.py", line 2708, in _get_str + return self._default_get(key, default) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 6500, in _default_get + raise KeyError( +KeyError: 'key "prompt_input_ids" not found in TensorDict with keys [\'prot_embeds\']' +Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.actor_rollout_ref_generate_sequences() (pid=268397, ip=10.1.4.164, actor_id=f5dc0479983652f5dfca6aad01000000, repr=) +KeyError: 'prompt_input_ids' + +During handling of the above exception, another exception occurred: + +ray::WorkerDict.actor_rollout_ref_generate_sequences() (pid=268397, ip=10.1.4.164, actor_id=f5dc0479983652f5dfca6aad01000000, repr=) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func + return getattr(self.worker_dict[key], name)(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner + return func(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 574, in generate_sequences + output = self.rollout.generate_sequences(prompts=prompts) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context + return func(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/rollout/vllm_rollout_spmd_new.py", line 176, in generate_sequences + prompt_input_ids: torch.Tensor = prompts.batch["prompt_input_ids"] + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 574, in __getitem__ + return self._get_tuple_maybe_non_tensor(idx_unravel, NO_DEFAULT) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 6563, in _get_tuple_maybe_non_tensor + result = self._get_tuple(key, default, **kwargs) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/_td.py", line 2711, in _get_tuple + first = self._get_str(key[0], default, **kwargs) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/_td.py", line 2708, in _get_str + return self._default_get(key, default) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/tensordict/base.py", line 6500, in _default_get + raise KeyError( +KeyError: 'key "prompt_input_ids" not found in TensorDict with keys [\'prot_embeds\']' diff --git a/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/files/requirements.txt @@ -0,0 +1,295 @@ +colorama==0.4.6 +psutil==7.0.0 +setproctitle==1.2.2 +ipython==8.37.0 +gitdb==4.0.12 +smmap==5.0.2 +pyzmq==27.0.0 +wcwidth==0.2.13 +antlr4-python3-runtime==4.9.3 +streamlit==1.46.1 +opentelemetry-proto==1.26.0 +tiktoken==0.9.0 +MarkupSafe==3.0.2 +openai==1.90.0 +jiter==0.10.0 +markdown-it-py==3.0.0 +rich-toolkit==0.14.8 +PyYAML==6.0.2 +pycountry==24.6.1 +nvidia-cusolver-cu12==11.6.1.9 +codetiming==1.4.0 +text-unidecode==1.3 +aiohttp-cors==0.8.1 +prometheus_client==0.22.1 +pandas==2.3.1 +wrapt==1.17.2 +tifffile==2025.5.10 +vllm==0.8.5.post1 +google-auth==2.40.3 +nvidia-curand-cu12==10.3.5.147 +networkx==3.4.2 +protobuf==4.25.8 +depyf==0.18.0 +altair==5.5.0 +wandb==0.21.0 +opentelemetry-sdk==1.26.0 +nvidia-cufft-cu12==11.2.1.3 +frozenlist==1.7.0 +ninja==1.11.1.4 +anyio==4.9.0 +rignore==0.6.2 +pydantic-extra-types==2.10.5 +tzdata==2025.2 +orjson==3.10.18 +smart_open==7.3.0.post1 +nvidia-cublas-cu12==12.4.5.8 +astor==0.8.1 +uvicorn==0.35.0 +sentry-sdk==2.32.0 +weasel==0.4.1 +opencensus==0.11.4 +certifi==2025.7.14 +nvidia-cuda-nvrtc-cu12==12.4.127 +cupy-cuda12x==13.5.1 +jedi==0.19.2 +GitPython==3.1.44 +xgrammar==0.1.18 +sniffio==1.3.1 +dill==0.3.8 +python-json-logger==3.3.0 +peft==0.16.0 +python-slugify==8.0.4 +watchfiles==1.1.0 +torchaudio==2.6.0 +omegaconf==2.3.0 +interegular==0.3.3 +torchmetrics==1.7.4 +tenacity==9.1.2 +async-timeout==5.0.1 +pybase64==1.4.1 +tqdm==4.67.1 +confection==0.1.5 +absl-py==2.3.1 +six==1.17.0 +colorful==0.5.7 +xxhash==3.5.0 +regex==2024.11.6 +nvidia-cuda-runtime-cu12==12.4.127 +pip==25.1 +annotated-types==0.7.0 +opentelemetry-exporter-otlp-proto-grpc==1.26.0 +salesforce-lavis==1.0.2 +fastapi-cloud-cli==0.1.4 +av==15.0.0 +nvidia-nvjitlink-cu12==12.4.127 +lightning-utilities==0.14.3 +pytz==2025.2 +webdataset==1.0.2 +cachetools==5.5.2 +nltk==3.9.1 +prometheus-fastapi-instrumentator==7.1.0 +pexpect==4.9.0 +jsonschema-specifications==2025.4.1 +Jinja2==3.1.6 +nvidia-cusparse-cu12==12.3.1.170 +nvidia-cudnn-cu12==9.1.0.70 +decord==0.6.0 +transformers==4.52.4 +dnspython==2.7.0 +joblib==1.5.1 +kaggle==1.7.4.5 +pyasn1_modules==0.4.2 +httpcore==1.0.9 +accelerate==1.8.1 +psutil==7.0.0 +pycocotools==2.0.10 +lm-format-enforcer==0.10.11 +liger_kernel==0.6.0 +googleapis-common-protos==1.70.0 +idna==3.10 +aiohappyeyeballs==2.6.1 +numba==0.61.2 +tornado==6.5.1 +opentelemetry-semantic-conventions==0.47b0 +torchvision==0.21.0 +exceptiongroup==1.3.0 +cfgv==3.4.0 +py-cpuinfo==9.0.0 +murmurhash==1.0.13 +pillow==11.3.0 +asttokens==3.0.0 +spacy==3.8.7 +blinker==1.9.0 +llguidance==0.7.30 +fastapi==0.116.1 +python-dateutil==2.9.0.post0 +prompt_toolkit==3.0.51 +opentelemetry-api==1.26.0 +referencing==0.36.2 +Pygments==2.19.2 +mpmath==1.3.0 +thinc==8.3.6 +multidict==6.6.3 +python-magic==0.4.27 +fairscale==0.4.4 +nodeenv==1.9.1 +mathruler==0.1.0 +identify==2.6.12 +multiprocess==0.70.16 +ftfy==6.3.1 +spacy-legacy==3.0.12 +rsa==4.9.1 +cymem==2.0.11 +flash-attn==2.7.1.post1 +typing-inspection==0.4.1 +nvidia-cufile-cu12==1.11.1.6 +filelock==3.18.0 +jsonschema==4.24.0 +language_data==1.3.0 +iopath==0.1.10 +cloudpickle==3.1.1 +pre_commit==4.2.0 +python-multipart==0.0.20 +gguf==0.17.1 +toml==0.10.2 +lazy_loader==0.4 +nvidia-cusparselt-cu12==0.6.2 +nvidia-nvtx-cu12==12.4.127 +opencv-python-headless==4.12.0.88 +rouge_score==0.1.2 +portalocker==3.2.0 +diskcache==5.6.3 +pycocoevalcap==1.2 +mdurl==0.1.2 +pure_eval==0.2.3 +ray==2.47.1 +langcodes==3.5.0 +distlib==0.3.9 +pydeck==0.9.1 +traitlets==5.14.3 +aiohttp==3.12.14 +decorator==5.2.1 +opentelemetry-exporter-otlp-proto-http==1.26.0 +verl==0.3.2.dev0 +fsspec==2025.3.0 +pydantic_core==2.33.2 +matplotlib-inline==0.1.7 +httpx==0.28.1 +fastrlock==0.8.3 +zipp==3.23.0 +aiosignal==1.4.0 +uvloop==0.21.0 +opendatasets==0.1.22 +python-dotenv==1.1.1 +attrs==25.3.0 +starlette==0.47.1 +distro==1.9.0 +pyasn1==0.6.1 +plotly==6.2.0 +opencensus-context==0.1.3 +datasets==4.0.0 +bleach==6.2.0 +hf-xet==1.1.5 +pyvers==0.1.0 +rich==14.0.0 +pylatexenc==2.10 +tensordict==0.9.1 +urllib3==2.5.0 +imageio==2.37.0 +platformdirs==4.3.8 +preshed==3.0.10 +catalogue==2.0.10 +h11==0.16.0 +outlines_core==0.1.26 +wasabi==1.1.3 +proto-plus==1.26.1 +scikit-image==0.25.2 +blis==1.3.0 +fastapi-cli==0.0.8 +opentelemetry-exporter-prometheus==0.56b0 +opentelemetry-exporter-otlp==1.26.0 +compressed-tensors==0.9.3 +pyarrow==20.0.0 +opentelemetry-semantic-conventions-ai==0.4.11 +partial-json-parser==0.2.1.1.post6 +parso==0.8.4 +importlib_metadata==8.0.0 +tokenizers==0.21.2 +opentelemetry-exporter-otlp-proto-common==1.26.0 +torchdata==0.11.0 +py-spy==0.4.0 +propcache==0.3.2 +braceexpand==0.1.7 +numpy==2.2.6 +cloudpathlib==0.21.1 +email_validator==2.2.0 +srsly==2.5.1 +webencodings==0.5.1 +airportsdata==20250706 +rpds-py==0.26.0 +outlines==0.1.11 +packaging==25.0 +yarl==1.20.1 +nvidia-cuda-cupti-cu12==12.4.127 +typing_extensions==4.14.1 +pydantic==2.11.7 +xformers==0.0.29.post2 +einops==0.8.1 +grpcio==1.73.1 +setuptools==78.1.1 +httptools==0.6.4 +nvidia-nccl-cu12==2.21.5 +Deprecated==1.2.18 +ptyprocess==0.7.0 +websockets==15.0.1 +torch==2.6.0 +scipy==1.15.3 +typer==0.16.0 +pytorch-lightning==2.5.2 +virtualenv==20.31.2 +huggingface-hub==0.33.4 +contexttimer==0.3.3 +marisa-trie==1.2.1 +shellingham==1.5.4 +charset-normalizer==3.4.2 +nest-asyncio==1.6.0 +executing==2.2.0 +stack-data==0.6.3 +msgpack==1.1.1 +blake3==1.0.5 +narwhals==1.47.0 +sentencepiece==0.2.0 +llvmlite==0.44.0 +click==8.2.1 +lark==1.2.2 +google-api-core==2.25.1 +sympy==1.13.1 +wheel==0.45.1 +safetensors==0.5.3 +mistral_common==1.8.0 +triton==3.2.0 +msgspec==0.19.0 +watchdog==6.0.0 +requests==2.32.4 +spacy-loggers==1.0.5 +timm==0.4.12 +qwen-vl-utils==0.0.11 +modelscope==1.28.0 +verl==0.3.2.dev0 +jaraco.text==3.12.1 +autocommand==2.2.2 +packaging==24.2 +jaraco.context==5.3.0 +tomli==2.0.1 +typeguard==4.3.0 +zipp==3.19.2 +backports.tarfile==1.2.0 +typing_extensions==4.12.2 +jaraco.collections==5.1.0 +inflect==7.3.1 +more-itertools==10.3.0 +jaraco.functools==4.0.1 +importlib_metadata==8.0.0 +platformdirs==4.2.2 +wheel==0.45.1 diff --git a/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..f8884381d325d2f793e08f2cd106cc32b6a6564d --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/files/wandb-metadata.json @@ -0,0 +1,91 @@ +{ + "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35", + "python": "CPython 3.10.0", + "startedAt": "2025-07-20T12:07:50.034461Z", + "args": [ + "--node-ip-address=10.1.4.164", + "--node-manager-port=36211", + "--object-store-name=/tmp/ray/session_2025-07-20_20-06-04_261605_261953/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2025-07-20_20-06-04_261605_261953/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=55320", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=40764", + "--gcs-address=10.1.4.164:53846", + "--session-name=session_2025-07-20_20-06-04_261605_261953", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8265", + "--cluster-id=0063e3b7b4568489d096fdc242c3518b906b31d74ced6b05fe470650", + "--startup-token=64", + "--worker-launch-time-ms=1753013166837", + "--node-id=fdba09a4dfb48b5d51465042822e51b6b219b770c4e580a471120e79", + "--runtime-env-hash=-115784934", + "--enable-resource-isolation=false" + ], + "program": "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/PorUna-byte/PAR.git", + "commit": "b8caf406aa1699c788f0ca6e44a1769452c317db" + }, + "root": "/nas/shared/kilab/wangyujia/EasyR1-new/examples", + "host": "dsw-266702-557cd69888-g24kv", + "executable": "/root/miniconda3/envs/easyr1-new/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 64, + "gpu": "NVIDIA A800-SXM4-80GB", + "gpu_count": 8, + "disk": { + "/": { + "total": "1623302262784", + "used": "1180028928" + } + }, + "memory": { + "total": "549755813888" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-dd05c780-0a19-17fd-c584-d09f4318f680" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-2782427d-b86d-e7c7-247f-edefe39eec71" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-52755b3d-d761-c869-0d08-300873ba5f02" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-5927b565-1506-34c6-eb8d-d657ed5f3558" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-0a3eb397-6191-dcb0-589d-21bc9fb326bd" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-90465488-5319-9508-9e8a-b3b12918be35" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-eb3f319e-72f8-08f1-526a-2971f458a9e0" + }, + { + "name": "NVIDIA A800-SXM4-80GB", + "architecture": "Ampere", + "uuid": "GPU-855bcb80-e48f-cdd2-f62d-35138b69089a" + } + ], + "cudaVersion": "12.1", + "writerId": "xtan9jnlk3anxkvk8lrtde0kzlter6ht" +} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/files/wandb-summary.json b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..533452d4a934da3482e9f08995d671c42966eba9 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/files/wandb-summary.json @@ -0,0 +1 @@ +{"_runtime":6,"_wandb":{"runtime":6}} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..89585658bb17ee57991500fce03ed1101a676252 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/logs/debug-internal.log @@ -0,0 +1,13 @@ +{"time":"2025-07-20T20:07:50.38283684+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-07-20T20:07:53.483644117+08:00","level":"INFO","msg":"stream: created new stream","id":"qx2pyd9p"} +{"time":"2025-07-20T20:07:53.484513955+08:00","level":"INFO","msg":"stream: started","id":"qx2pyd9p"} +{"time":"2025-07-20T20:07:53.484532882+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"qx2pyd9p"} +{"time":"2025-07-20T20:07:53.484557873+08:00","level":"INFO","msg":"sender: started","stream_id":"qx2pyd9p"} +{"time":"2025-07-20T20:07:53.484540258+08:00","level":"INFO","msg":"handler: started","stream_id":"qx2pyd9p"} +{"time":"2025-07-20T20:08:02.135520705+08:00","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"updating run metadata","runtime_seconds":7.373944436}],"total_operations":1}} +{"time":"2025-07-20T20:08:13.947103141+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-07-20T20:08:26.949791701+08:00","level":"INFO","msg":"stream: closing","id":"qx2pyd9p"} +{"time":"2025-07-20T20:08:26.950524242+08:00","level":"INFO","msg":"handler: closed","stream_id":"qx2pyd9p"} +{"time":"2025-07-20T20:08:26.950531995+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"qx2pyd9p"} +{"time":"2025-07-20T20:08:26.950552576+08:00","level":"INFO","msg":"sender: closed","stream_id":"qx2pyd9p"} +{"time":"2025-07-20T20:08:26.956579798+08:00","level":"INFO","msg":"stream: closed","id":"qx2pyd9p"} diff --git a/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/logs/debug.log b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..03d1a7f54049deb83caba3d0ce58aaf8462cf976 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/logs/debug.log @@ -0,0 +1,28 @@ +2025-07-20 20:07:50,155 INFO MainThread:266721 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-07-20 20:07:50,155 INFO MainThread:266721 [wandb_setup.py:_flush():80] Configure stats pid to 266721 +2025-07-20 20:07:50,155 INFO MainThread:266721 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-07-20 20:07:50,155 INFO MainThread:266721 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings +2025-07-20 20:07:50,155 INFO MainThread:266721 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-07-20 20:07:50,155 INFO MainThread:266721 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/logs/debug.log +2025-07-20 20:07:50,155 INFO MainThread:266721 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/logs/debug-internal.log +2025-07-20 20:07:50,155 INFO MainThread:266721 [wandb_init.py:init():830] calling init triggers +2025-07-20 20:07:50,155 INFO MainThread:266721 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}} +2025-07-20 20:07:50,155 INFO MainThread:266721 [wandb_init.py:init():871] starting backend +2025-07-20 20:07:50,362 INFO MainThread:266721 [wandb_init.py:init():874] sending inform_init request +2025-07-20 20:07:50,364 INFO MainThread:266721 [wandb_init.py:init():882] backend started and connected +2025-07-20 20:07:50,371 INFO MainThread:266721 [wandb_init.py:init():953] updated telemetry +2025-07-20 20:07:50,424 INFO MainThread:266721 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout +2025-07-20 20:07:54,757 INFO MainThread:266721 [wandb_init.py:init():1029] starting run threads in backend +2025-07-20 20:07:55,002 INFO MainThread:266721 [wandb_run.py:_console_start():2458] atexit reg +2025-07-20 20:07:55,002 INFO MainThread:266721 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-07-20 20:07:55,009 INFO MainThread:266721 [wandb_run.py:_redirect():2375] Wrapping output streams. +2025-07-20 20:07:55,009 INFO MainThread:266721 [wandb_run.py:_redirect():2398] Redirects installed. +2025-07-20 20:07:55,014 INFO MainThread:266721 [wandb_init.py:init():1075] run started, returning control to user process +2025-07-20 20:08:01,111 INFO MainThread:266721 [wandb_run.py:_finish():2224] finishing run gia0603yucca/easy_r1/qx2pyd9p +2025-07-20 20:08:01,112 INFO MainThread:266721 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0 +2025-07-20 20:08:01,117 INFO MainThread:266721 [wandb_run.py:_restore():2405] restore +2025-07-20 20:08:01,120 INFO MainThread:266721 [wandb_run.py:_restore():2411] restore done +2025-07-20 20:08:26,943 INFO MainThread:266721 [wandb_run.py:_footer_history_summary_info():3903] rendering history +2025-07-20 20:08:26,944 INFO MainThread:266721 [wandb_run.py:_footer_history_summary_info():3935] rendering summary +2025-07-20 20:08:26,945 INFO MainThread:266721 [wandb_run.py:_footer_sync_info():3864] logging synced files diff --git a/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/run-qx2pyd9p.wandb b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/run-qx2pyd9p.wandb new file mode 100644 index 0000000000000000000000000000000000000000..a67ee390595a2327a9e23a96f01a3bafcf2d3b63 Binary files /dev/null and b/EasyR1-new/examples/wandb/run-20250720_200750-qx2pyd9p/run-qx2pyd9p.wandb differ diff --git a/EasyR1-new/examples/wandb/run-20250721_140302-esnp51q2/files/output.log b/EasyR1-new/examples/wandb/run-20250721_140302-esnp51q2/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/EasyR1-new/examples/wandb/run-20250721_140302-esnp51q2/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250721_140302-esnp51q2/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..5df308d8b383a43baf17da17990166de83b5ca30 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250721_140302-esnp51q2/logs/debug-internal.log @@ -0,0 +1,7 @@ +{"time":"2025-07-21T14:03:03.306005681+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-07-21T14:03:16.726235454+08:00","level":"INFO","msg":"stream: created new stream","id":"esnp51q2"} +{"time":"2025-07-21T14:03:16.72791216+08:00","level":"INFO","msg":"stream: started","id":"esnp51q2"} +{"time":"2025-07-21T14:03:16.727930603+08:00","level":"INFO","msg":"handler: started","stream_id":"esnp51q2"} +{"time":"2025-07-21T14:03:16.727955266+08:00","level":"INFO","msg":"sender: started","stream_id":"esnp51q2"} +{"time":"2025-07-21T14:03:16.727988136+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"esnp51q2"} +{"time":"2025-07-21T14:03:46.759865901+08:00","level":"ERROR","msg":"runupserter: failed to init run","error":"context deadline exceeded (Client.Timeout or context cancellation while reading body)"} diff --git a/EasyR1-new/examples/wandb/run-20250721_140302-esnp51q2/logs/debug.log b/EasyR1-new/examples/wandb/run-20250721_140302-esnp51q2/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..865615cccfa51ae2499481321bbd83f562a738df --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250721_140302-esnp51q2/logs/debug.log @@ -0,0 +1,15 @@ +2025-07-21 14:03:03,077 INFO MainThread:309550 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-07-21 14:03:03,077 INFO MainThread:309550 [wandb_setup.py:_flush():80] Configure stats pid to 309550 +2025-07-21 14:03:03,077 INFO MainThread:309550 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-07-21 14:03:03,077 INFO MainThread:309550 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings +2025-07-21 14:03:03,077 INFO MainThread:309550 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-07-21 14:03:03,077 INFO MainThread:309550 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250721_140302-esnp51q2/logs/debug.log +2025-07-21 14:03:03,077 INFO MainThread:309550 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250721_140302-esnp51q2/logs/debug-internal.log +2025-07-21 14:03:03,077 INFO MainThread:309550 [wandb_init.py:init():830] calling init triggers +2025-07-21 14:03:03,077 INFO MainThread:309550 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}} +2025-07-21 14:03:03,077 INFO MainThread:309550 [wandb_init.py:init():871] starting backend +2025-07-21 14:03:03,285 INFO MainThread:309550 [wandb_init.py:init():874] sending inform_init request +2025-07-21 14:03:03,286 INFO MainThread:309550 [wandb_init.py:init():882] backend started and connected +2025-07-21 14:03:03,302 INFO MainThread:309550 [wandb_init.py:init():953] updated telemetry +2025-07-21 14:03:03,345 INFO MainThread:309550 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout diff --git a/EasyR1-new/examples/wandb/run-20250721_140302-esnp51q2/run-esnp51q2.wandb b/EasyR1-new/examples/wandb/run-20250721_140302-esnp51q2/run-esnp51q2.wandb new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/files/config.yaml b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c296dc14c1b357fe68bd9e6fe8b886b72a844a7c --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/files/config.yaml @@ -0,0 +1,285 @@ +_wandb: + value: + cli_version: 0.21.0 + e: + 8rebgclfceg9loyocndmo2x990qn9an8: + args: + - --node-ip-address=10.1.4.164 + - --node-manager-port=39807 + - --object-store-name=/tmp/ray/session_2025-07-21_14-05-48_702174_313219/sockets/plasma_store + - --raylet-name=/tmp/ray/session_2025-07-21_14-05-48_702174_313219/sockets/raylet + - --redis-address=None + - --metrics-agent-port=59329 + - --logging-rotate-bytes=536870912 + - --logging-rotate-backup-count=5 + - --runtime-env-agent-port=57728 + - --gcs-address=10.1.4.164:57619 + - --session-name=session_2025-07-21_14-05-48_702174_313219 + - --temp-dir=/tmp/ray + - --webui=127.0.0.1:8265 + - --cluster-id=5e1b6583b033a5ac3a5096c47aedf2ae03a43832b5f14da9ae247fc3 + - --startup-token=64 + - --worker-launch-time-ms=1753077952181 + - --node-id=455c3d0267181f710150724f5570e592401a2c2957f60bcaf24ff8ae + - --runtime-env-hash=-115784934 + - --enable-resource-isolation=false + executable: /root/miniconda3/envs/easyr1-new/bin/python3 + git: + commit: b8caf406aa1699c788f0ca6e44a1769452c317db + remote: https://github.com/PorUna-byte/PAR.git + host: dsw-266702-557cd69888-g24kv + os: Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35 + program: /root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py + python: CPython 3.10.0 + root: /nas/shared/kilab/wangyujia/EasyR1-new/examples + startedAt: "2025-07-21T06:07:34.664044Z" + writerId: 8rebgclfceg9loyocndmo2x990qn9an8 + m: [] + python_version: 3.10.0 + t: + "1": + - 1 + - 9 + - 11 + - 30 + - 33 + - 41 + - 49 + - 51 + - 63 + - 71 + - 95 + - 98 + - 103 + - 105 + "2": + - 1 + - 9 + - 11 + - 30 + - 33 + - 41 + - 49 + - 51 + - 63 + - 71 + - 95 + - 98 + - 103 + - 105 + "3": + - 2 + - 13 + - 16 + "4": 3.10.0 + "5": 0.21.0 + "6": 4.52.4 + "12": 0.21.0 + "13": linux-x86_64 +algorithm: + value: + adv_estimator: grpo + disable_kl: false + filter_high: 0.99 + filter_key: overall + filter_low: 0.01 + gamma: 1 + kl_coef: 0.01 + kl_horizon: 10000 + kl_penalty: low_var_kl + kl_target: 0.1 + kl_type: fixed + lam: 1 + online_filtering: false + use_kl_loss: true +data: + value: + answer_key: answer + filter_overlong_prompts: true + filter_overlong_prompts_workers: 16 + format_prompt: /nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja + image_dir: null + image_key: images + max_pixels: 4194304 + max_prompt_length: 4096 + max_response_length: 16384 + min_pixels: 262144 + mini_rollout_batch_size: null + override_chat_template: null + prompt_key: question + protein_key: protein + rollout_batch_size: 128 + seed: 1 + shuffle: true + train_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl + val_batch_size: 256 + val_files: /nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl + video_fps: 2 + video_key: videos +trainer: + value: + critic_warmup: 0 + experiment_name: qwen2.5_7b_bio_06182042 + load_checkpoint_path: null + logger: + - console + - wandb + max_steps: null + max_try_make_batch: 20 + n_gpus_per_node: 8 + nnodes: 1 + project_name: easy_r1 + save_checkpoint_path: /oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042 + save_freq: 5 + save_limit: 3 + save_model_only: false + total_epochs: 1 + val_before_train: true + val_freq: 5 + val_generations_to_log: 3 + val_only: false +worker: + value: + actor: + clip_ratio_dual: 3 + clip_ratio_high: 0.3 + clip_ratio_low: 0.2 + disable_kl: false + fsdp: + enable_cpu_offload: false + enable_full_shard: true + enable_rank0_init: true + fsdp_size: -1 + mp_buffer_dtype: fp32 + mp_param_dtype: bf16 + mp_reduce_dtype: fp32 + torch_dtype: null + use_orig_params: false + global_batch_size: 64 + global_batch_size_per_device: -1 + kl_coef: 0.01 + kl_penalty: low_var_kl + loss_avg_mode: token + max_grad_norm: 1 + micro_batch_size_per_device_for_experience: 16 + micro_batch_size_per_device_for_update: 2 + model: + enable_gradient_checkpointing: true + freeze_vision_tower: false + model_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model + tokenizer_path: /nas/shared/kilab/wangyujia/ProtT3/llm_model + trust_remote_code: false + offload: + offload_optimizer: true + offload_params: true + optim: + betas: + - 0.9 + - 0.999 + lr: 1e-06 + lr_warmup_ratio: 0 + lr_warmup_steps: null + min_lr_ratio: null + strategy: adamw + training_steps: 1 + warmup_style: constant + weight_decay: 0.01 + padding_free: true + ppo_epochs: 1 + strategy: fsdp + ulysses_size: 1 + use_kl_loss: true + use_torch_compile: true + critic: + cliprange_value: 0.5 + fsdp: + enable_cpu_offload: false + enable_full_shard: true + enable_rank0_init: true + fsdp_size: -1 + mp_buffer_dtype: fp32 + mp_param_dtype: bf16 + mp_reduce_dtype: fp32 + torch_dtype: null + use_orig_params: false + global_batch_size: 256 + global_batch_size_per_device: -1 + loss_avg_mode: token + max_grad_norm: 1 + micro_batch_size_per_device_for_experience: 16 + micro_batch_size_per_device_for_update: 4 + model: + enable_gradient_checkpointing: true + freeze_vision_tower: false + model_path: null + tokenizer_path: null + trust_remote_code: true + offload: + offload_optimizer: false + offload_params: false + optim: + betas: + - 0.9 + - 0.999 + lr: 1e-06 + lr_warmup_ratio: 0 + lr_warmup_steps: null + min_lr_ratio: null + strategy: adamw + training_steps: 1 + warmup_style: constant + weight_decay: 0.01 + padding_free: false + ppo_epochs: 1 + strategy: fsdp + ulysses_size: 1 + hybrid_engine: true + ref: + fsdp: + enable_cpu_offload: true + enable_full_shard: true + enable_rank0_init: true + fsdp_size: -1 + mp_buffer_dtype: fp32 + mp_param_dtype: bf16 + mp_reduce_dtype: fp32 + torch_dtype: null + use_orig_params: false + micro_batch_size_per_device_for_experience: 16 + offload: + offload_optimizer: false + offload_params: false + padding_free: true + strategy: fsdp + ulysses_size: 1 + use_torch_compile: true + reward: + num_cpus: 1 + reward_function: /nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py + reward_function_name: main + reward_type: batch + skip_special_tokens: true + rollout: + disable_log_stats: true + disable_tqdm: false + dtype: bf16 + enable_chunked_prefill: false + enforce_eager: false + gpu_memory_utilization: 0.6 + ignore_eos: false + limit_images: 0 + max_model_len: null + max_num_batched_tokens: 24576 + "n": 5 + name: vllm + prompt_length: 4096 + response_length: 16384 + seed: 1 + temperature: 1 + tensor_parallel_size: 1 + top_k: -1 + top_p: 0.99 + trust_remote_code: false + val_override_config: + "n": 1 + temperature: 0.5 diff --git a/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/files/output.log b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..ea9d75e411eaa15741df68ff8613074c5a3303b0 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/files/output.log @@ -0,0 +1,77 @@ +{"__magic_token__": "__ray_tqdm_magic_token__", "x": 0, "pos": 0, "desc": "Running step", "total": 1, "unit": "it", "ip": "10.1.4.164", "pid": 317976, "uuid": "a2969af0271e4dd2a54b4cda655313f9", "closed": false} +Start validation... +key +prot_embeds +value +tensor([[[ 0.1386, -0.2522, 0.0733, ..., 0.5373, 0.6044, -0.6031], + [ 0.1386, -0.2522, 0.0733, ..., 0.5373, 0.6044, -0.6031], + [ 0.1386, -0.2522, 0.0733, ..., 0.5373, 0.6044, -0.6031], + ..., + [ 0.1386, -0.2522, 0.0733, ..., 0.5373, 0.6044, -0.6031], + [ 0.1386, -0.2522, 0.0733, ..., 0.5373, 0.6044, -0.6031], + [ 0.1386, -0.2522, 0.0733, ..., 0.5373, 0.6044, -0.6031]]]) +key +prompt_input_ids +value +[[[11190 311 279 ... 151665 151665 151665]]] +key +input_ids +value +[[[11190 311 279 ... 151665 151665 151665]]] +key +raw_prompt_ids +value +[[11190 311 279 12833 1995 3897 3685 323 279 12833 829 1207 24 56 19 34 + 17 11 7023 279 1429 4363 1186 5873 1276 52304 504 279 2701 2606 510 + 3798 25 220 15 13 330 45 22147 355 11 547 1 715 220 16 13 330 56715 + 98605 10530 11 328 1 2303 220 17 13 330 840 376 64341 11 328 1 2303 220 + 18 13 330 54370 46417 81 290 11 547 1 2303 220 19 13 330 3599 38554 11 + 386 1 2303 220 20 13 330 3727 55078 10530 292 2112 292 16496 11 386 1 + 2303 220 21 13 330 2120 559 307 11 328 1 2303 220 22 13 330 38 337 8212 + 40605 11 386 1 2303 220 220 23 13 330 43 1047 31454 27233 580 84 1263 + 11 386 1 2303 24 13 330 47 2328 7191 635 11 547 698]] +key +ground_truth +value +['4'] +key +multi_modal_data +value +['MATPSAAFEALMNGVTSWDVPEDAVPCELLLIGEASFPVMVNDMGQVLIAASSYGRGRLVVVSHEDYLVEAQLTPFLLNAVGWLCSSPGAPIGVHPSLAPLAKILEGSGVDAKVEPEVKDSLGVYCIDAYNETMTEKLVKFMKCGGGLLIGGQAWDWANQGEDERVLFTFPGNLVTSVAGIYFTDNKGDTSFFKVSKKMPKIPVLVSCEDDLSDDREELLHGISELDISNSDCFPSQLLVHGALAFPLGLDSYHGCVIAAARYGRGRVVVTGHKVLFTVGKLGPFLLNAVRWLDGGRRGKVVVQTELRTLSGLLAVGGIDTSIEPNLTSDASVYCFEPVSEVGVKELQEFVAEGGGLFVGAQAWWWAFKNPGVSPLARFPGNLLLNPFGISITSQSLNPGPFRTPKAGIRTYHFRSTLAEFQVIMGRKRGNVEKGWLAKLGPDGAAFLQIPAEEIPAYMSVHRLLRKLLSRYRLPVATRENPVINDCCRGAMLSLATGLAHSGSDLSLLVPEIEDMYSSPYLRPSESPITVEVNCTNPGTRYCWMSTGLYIPGRQIIEVSLPEAAASADLKIQIGCHTDDLTRASKLFRGPLVINRCCLDKPTKSITCLWGGLLYIIVPQNSKLGSVPVTVKGAVHAPYYKLGETTLEEWKRRIQENPGPWGELATDNIILTVPTANLRTLENPEPLLRLWDEVMQAVARLGAEPFPLRLPQRIVADVQISVGWMHAGYPIMCHLESVQELINEKLIRTKGLWGPVHELGRNQQRQEWEFPPHTTEATCNLWCVYVHETVLGIPRSRANIALWPPVREKRVRIYLSKGPNVKNWNAWTALETYLQLQEAFGWEPFIRLFTEYRNQTNLPTENVDKMNLWVKMFSHQVQKNLAPFFEAWAWPIQKEVATSLAYLPEWKENIMKLYLLTQMPH'] +开始valid generate_sequence +Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.actor_rollout_ref_generate_sequences() (pid=319880, ip=10.1.4.164, actor_id=98a9fd8ac8699eddde886a1b01000000, repr=) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func + return getattr(self.worker_dict[key], name)(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner + return func(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 574, in generate_sequences + output = self.rollout.generate_sequences(prompts=prompts) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context + return func(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/rollout/vllm_rollout_spmd_new.py", line 193, in generate_sequences + prompt_input_ids = non_tensor_batch.pop["prompt_input_ids"] +TypeError: 'builtin_function_or_method' object is not subscriptable +Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.actor_rollout_ref_generate_sequences() (pid=319879, ip=10.1.4.164, actor_id=10eea48f2ea264c060e0257601000000, repr=) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func + return getattr(self.worker_dict[key], name)(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner + return func(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 574, in generate_sequences + output = self.rollout.generate_sequences(prompts=prompts) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context + return func(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/rollout/vllm_rollout_spmd_new.py", line 193, in generate_sequences + prompt_input_ids = non_tensor_batch.pop["prompt_input_ids"] +TypeError: 'builtin_function_or_method' object is not subscriptable +Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.actor_rollout_ref_generate_sequences() (pid=319878, ip=10.1.4.164, actor_id=e85a62ca0dbb69fe1e14622801000000, repr=) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/ray/base.py", line 432, in func + return getattr(self.worker_dict[key], name)(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/single_controller/base/decorator.py", line 207, in inner + return func(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/fsdp_workers.py", line 574, in generate_sequences + output = self.rollout.generate_sequences(prompts=prompts) + File "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context + return func(*args, **kwargs) + File "/nas/shared/kilab/wangyujia/EasyR1-new/verl/workers/rollout/vllm_rollout_spmd_new.py", line 193, in generate_sequences + prompt_input_ids = non_tensor_batch.pop["prompt_input_ids"] +TypeError: 'builtin_function_or_method' object is not subscriptable diff --git a/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/files/requirements.txt b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..bc2e79f4aa14e8941b53c16fb89c4211fab47e8a --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/files/requirements.txt @@ -0,0 +1,295 @@ +colorama==0.4.6 +psutil==7.0.0 +setproctitle==1.2.2 +ipython==8.37.0 +gitdb==4.0.12 +smmap==5.0.2 +pyzmq==27.0.0 +wcwidth==0.2.13 +antlr4-python3-runtime==4.9.3 +streamlit==1.46.1 +opentelemetry-proto==1.26.0 +tiktoken==0.9.0 +MarkupSafe==3.0.2 +openai==1.90.0 +jiter==0.10.0 +markdown-it-py==3.0.0 +rich-toolkit==0.14.8 +PyYAML==6.0.2 +pycountry==24.6.1 +nvidia-cusolver-cu12==11.6.1.9 +codetiming==1.4.0 +text-unidecode==1.3 +aiohttp-cors==0.8.1 +prometheus_client==0.22.1 +pandas==2.3.1 +wrapt==1.17.2 +tifffile==2025.5.10 +vllm==0.8.5.post1 +google-auth==2.40.3 +nvidia-curand-cu12==10.3.5.147 +networkx==3.4.2 +protobuf==4.25.8 +depyf==0.18.0 +altair==5.5.0 +wandb==0.21.0 +opentelemetry-sdk==1.26.0 +nvidia-cufft-cu12==11.2.1.3 +frozenlist==1.7.0 +ninja==1.11.1.4 +anyio==4.9.0 +rignore==0.6.2 +pydantic-extra-types==2.10.5 +tzdata==2025.2 +orjson==3.10.18 +smart_open==7.3.0.post1 +nvidia-cublas-cu12==12.4.5.8 +astor==0.8.1 +uvicorn==0.35.0 +sentry-sdk==2.32.0 +weasel==0.4.1 +opencensus==0.11.4 +certifi==2025.7.14 +nvidia-cuda-nvrtc-cu12==12.4.127 +cupy-cuda12x==13.5.1 +jedi==0.19.2 +GitPython==3.1.44 +xgrammar==0.1.18 +sniffio==1.3.1 +dill==0.3.8 +python-json-logger==3.3.0 +peft==0.16.0 +python-slugify==8.0.4 +watchfiles==1.1.0 +torchaudio==2.6.0 +omegaconf==2.3.0 +interegular==0.3.3 +torchmetrics==1.7.4 +tenacity==9.1.2 +async-timeout==5.0.1 +pybase64==1.4.1 +tqdm==4.67.1 +confection==0.1.5 +absl-py==2.3.1 +six==1.17.0 +colorful==0.5.7 +xxhash==3.5.0 +regex==2024.11.6 +nvidia-cuda-runtime-cu12==12.4.127 +pip==25.1 +annotated-types==0.7.0 +opentelemetry-exporter-otlp-proto-grpc==1.26.0 +salesforce-lavis==1.0.2 +fastapi-cloud-cli==0.1.4 +av==15.0.0 +nvidia-nvjitlink-cu12==12.4.127 +lightning-utilities==0.14.3 +pytz==2025.2 +webdataset==1.0.2 +cachetools==5.5.2 +nltk==3.9.1 +prometheus-fastapi-instrumentator==7.1.0 +pexpect==4.9.0 +jsonschema-specifications==2025.4.1 +Jinja2==3.1.6 +nvidia-cusparse-cu12==12.3.1.170 +nvidia-cudnn-cu12==9.1.0.70 +decord==0.6.0 +transformers==4.52.4 +dnspython==2.7.0 +joblib==1.5.1 +kaggle==1.7.4.5 +pyasn1_modules==0.4.2 +httpcore==1.0.9 +accelerate==1.8.1 +psutil==7.0.0 +pycocotools==2.0.10 +lm-format-enforcer==0.10.11 +liger_kernel==0.6.0 +googleapis-common-protos==1.70.0 +idna==3.10 +aiohappyeyeballs==2.6.1 +numba==0.61.2 +tornado==6.5.1 +opentelemetry-semantic-conventions==0.47b0 +torchvision==0.21.0 +exceptiongroup==1.3.0 +cfgv==3.4.0 +py-cpuinfo==9.0.0 +murmurhash==1.0.13 +pillow==11.3.0 +asttokens==3.0.0 +spacy==3.8.7 +blinker==1.9.0 +llguidance==0.7.30 +fastapi==0.116.1 +python-dateutil==2.9.0.post0 +prompt_toolkit==3.0.51 +opentelemetry-api==1.26.0 +referencing==0.36.2 +Pygments==2.19.2 +mpmath==1.3.0 +thinc==8.3.6 +multidict==6.6.3 +python-magic==0.4.27 +fairscale==0.4.4 +nodeenv==1.9.1 +mathruler==0.1.0 +identify==2.6.12 +multiprocess==0.70.16 +ftfy==6.3.1 +spacy-legacy==3.0.12 +rsa==4.9.1 +cymem==2.0.11 +flash-attn==2.7.1.post1 +typing-inspection==0.4.1 +nvidia-cufile-cu12==1.11.1.6 +filelock==3.18.0 +jsonschema==4.24.0 +language_data==1.3.0 +iopath==0.1.10 +cloudpickle==3.1.1 +pre_commit==4.2.0 +python-multipart==0.0.20 +gguf==0.17.1 +toml==0.10.2 +lazy_loader==0.4 +nvidia-cusparselt-cu12==0.6.2 +nvidia-nvtx-cu12==12.4.127 +opencv-python-headless==4.12.0.88 +rouge_score==0.1.2 +portalocker==3.2.0 +diskcache==5.6.3 +pycocoevalcap==1.2 +mdurl==0.1.2 +pure_eval==0.2.3 +ray==2.47.1 +langcodes==3.5.0 +distlib==0.3.9 +pydeck==0.9.1 +traitlets==5.14.3 +aiohttp==3.12.14 +decorator==5.2.1 +opentelemetry-exporter-otlp-proto-http==1.26.0 +verl==0.3.2.dev0 +fsspec==2025.3.0 +pydantic_core==2.33.2 +matplotlib-inline==0.1.7 +httpx==0.28.1 +fastrlock==0.8.3 +zipp==3.23.0 +aiosignal==1.4.0 +uvloop==0.21.0 +opendatasets==0.1.22 +python-dotenv==1.1.1 +attrs==25.3.0 +starlette==0.47.1 +distro==1.9.0 +pyasn1==0.6.1 +plotly==6.2.0 +opencensus-context==0.1.3 +datasets==4.0.0 +bleach==6.2.0 +hf-xet==1.1.5 +pyvers==0.1.0 +rich==14.0.0 +pylatexenc==2.10 +tensordict==0.9.1 +urllib3==2.5.0 +imageio==2.37.0 +platformdirs==4.3.8 +preshed==3.0.10 +catalogue==2.0.10 +h11==0.16.0 +outlines_core==0.1.26 +wasabi==1.1.3 +proto-plus==1.26.1 +scikit-image==0.25.2 +blis==1.3.0 +fastapi-cli==0.0.8 +opentelemetry-exporter-prometheus==0.56b0 +opentelemetry-exporter-otlp==1.26.0 +compressed-tensors==0.9.3 +pyarrow==20.0.0 +opentelemetry-semantic-conventions-ai==0.4.11 +partial-json-parser==0.2.1.1.post6 +parso==0.8.4 +importlib_metadata==8.0.0 +tokenizers==0.21.2 +opentelemetry-exporter-otlp-proto-common==1.26.0 +torchdata==0.11.0 +py-spy==0.4.0 +propcache==0.3.2 +braceexpand==0.1.7 +numpy==2.2.6 +cloudpathlib==0.21.1 +email_validator==2.2.0 +srsly==2.5.1 +webencodings==0.5.1 +airportsdata==20250706 +rpds-py==0.26.0 +outlines==0.1.11 +packaging==25.0 +yarl==1.20.1 +nvidia-cuda-cupti-cu12==12.4.127 +typing_extensions==4.14.1 +pydantic==2.11.7 +xformers==0.0.29.post2 +einops==0.8.1 +grpcio==1.73.1 +setuptools==78.1.1 +httptools==0.6.4 +nvidia-nccl-cu12==2.21.5 +Deprecated==1.2.18 +ptyprocess==0.7.0 +websockets==15.0.1 +torch==2.6.0 +scipy==1.15.3 +typer==0.16.0 +pytorch-lightning==2.5.2 +virtualenv==20.31.2 +huggingface-hub==0.33.4 +contexttimer==0.3.3 +marisa-trie==1.2.1 +shellingham==1.5.4 +charset-normalizer==3.4.2 +nest-asyncio==1.6.0 +executing==2.2.0 +stack-data==0.6.3 +msgpack==1.1.1 +blake3==1.0.5 +narwhals==1.47.0 +sentencepiece==0.2.0 +llvmlite==0.44.0 +click==8.2.1 +lark==1.2.2 +google-api-core==2.25.1 +sympy==1.13.1 +wheel==0.45.1 +safetensors==0.5.3 +mistral_common==1.8.0 +triton==3.2.0 +msgspec==0.19.0 +watchdog==6.0.0 +requests==2.32.4 +spacy-loggers==1.0.5 +timm==0.4.12 +qwen-vl-utils==0.0.11 +modelscope==1.28.0 +verl==0.3.2.dev0 +jaraco.text==3.12.1 +autocommand==2.2.2 +packaging==24.2 +jaraco.context==5.3.0 +tomli==2.0.1 +typeguard==4.3.0 +zipp==3.19.2 +backports.tarfile==1.2.0 +typing_extensions==4.12.2 +jaraco.collections==5.1.0 +inflect==7.3.1 +more-itertools==10.3.0 +jaraco.functools==4.0.1 +importlib_metadata==8.0.0 +platformdirs==4.2.2 +wheel==0.45.1 diff --git a/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/files/wandb-metadata.json b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..0d66826f53b406c11b3f42776924d35d1a24d020 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/files/wandb-metadata.json @@ -0,0 +1,35 @@ +{ + "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35", + "python": "CPython 3.10.0", + "startedAt": "2025-07-21T06:07:34.664044Z", + "args": [ + "--node-ip-address=10.1.4.164", + "--node-manager-port=39807", + "--object-store-name=/tmp/ray/session_2025-07-21_14-05-48_702174_313219/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2025-07-21_14-05-48_702174_313219/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=59329", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=57728", + "--gcs-address=10.1.4.164:57619", + "--session-name=session_2025-07-21_14-05-48_702174_313219", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8265", + "--cluster-id=5e1b6583b033a5ac3a5096c47aedf2ae03a43832b5f14da9ae247fc3", + "--startup-token=64", + "--worker-launch-time-ms=1753077952181", + "--node-id=455c3d0267181f710150724f5570e592401a2c2957f60bcaf24ff8ae", + "--runtime-env-hash=-115784934", + "--enable-resource-isolation=false" + ], + "program": "/root/miniconda3/envs/easyr1-new/lib/python3.10/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/PorUna-byte/PAR.git", + "commit": "b8caf406aa1699c788f0ca6e44a1769452c317db" + }, + "root": "/nas/shared/kilab/wangyujia/EasyR1-new/examples", + "host": "dsw-266702-557cd69888-g24kv", + "executable": "/root/miniconda3/envs/easyr1-new/bin/python3", + "writerId": "8rebgclfceg9loyocndmo2x990qn9an8" +} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/files/wandb-summary.json b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..533452d4a934da3482e9f08995d671c42966eba9 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/files/wandb-summary.json @@ -0,0 +1 @@ +{"_runtime":6,"_wandb":{"runtime":6}} \ No newline at end of file diff --git a/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/logs/debug-internal.log b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..3a020c591187aae5f1f529cb23d2665fe481c73e --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/logs/debug-internal.log @@ -0,0 +1,13 @@ +{"time":"2025-07-21T14:07:35.211628547+08:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-07-21T14:07:50.875611638+08:00","level":"INFO","msg":"stream: created new stream","id":"a9qblh0u"} +{"time":"2025-07-21T14:07:50.876588753+08:00","level":"INFO","msg":"stream: started","id":"a9qblh0u"} +{"time":"2025-07-21T14:07:50.87663237+08:00","level":"INFO","msg":"sender: started","stream_id":"a9qblh0u"} +{"time":"2025-07-21T14:07:50.876605114+08:00","level":"INFO","msg":"handler: started","stream_id":"a9qblh0u"} +{"time":"2025-07-21T14:07:50.87665507+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"a9qblh0u"} +{"time":"2025-07-21T14:08:05.783504415+08:00","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"updating run metadata","runtime_seconds":7.434542791},{"desc":"uploading data","runtime_seconds":0.571568597}],"total_operations":2}} +{"time":"2025-07-21T14:08:31.955353631+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-07-21T14:08:56.48244624+08:00","level":"INFO","msg":"stream: closing","id":"a9qblh0u"} +{"time":"2025-07-21T14:08:56.48558812+08:00","level":"INFO","msg":"handler: closed","stream_id":"a9qblh0u"} +{"time":"2025-07-21T14:08:56.485598269+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"a9qblh0u"} +{"time":"2025-07-21T14:08:56.485607803+08:00","level":"INFO","msg":"sender: closed","stream_id":"a9qblh0u"} +{"time":"2025-07-21T14:08:56.50017009+08:00","level":"INFO","msg":"stream: closed","id":"a9qblh0u"} diff --git a/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/logs/debug.log b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..b7e2572e2a286ff92c36c4fc2635c9b518e94415 --- /dev/null +++ b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/logs/debug.log @@ -0,0 +1,28 @@ +2025-07-21 14:07:34,952 INFO MainThread:317976 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-07-21 14:07:34,952 INFO MainThread:317976 [wandb_setup.py:_flush():80] Configure stats pid to 317976 +2025-07-21 14:07:34,952 INFO MainThread:317976 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-07-21 14:07:34,952 INFO MainThread:317976 [wandb_setup.py:_flush():80] Loading settings from /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/settings +2025-07-21 14:07:34,952 INFO MainThread:317976 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-07-21 14:07:34,952 INFO MainThread:317976 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/logs/debug.log +2025-07-21 14:07:34,952 INFO MainThread:317976 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/logs/debug-internal.log +2025-07-21 14:07:34,952 INFO MainThread:317976 [wandb_init.py:init():830] calling init triggers +2025-07-21 14:07:34,953 INFO MainThread:317976 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/train.jsonl', 'val_files': '/nas/shared/kilab/wangyujia/rl_data/deeplocmulti3/valid.jsonl', 'prompt_key': 'question', 'answer_key': 'answer', 'protein_key': 'protein', 'image_key': 'images', 'video_key': 'videos', 'image_dir': None, 'video_fps': 2.0, 'max_prompt_length': 4096, 'max_response_length': 16384, 'rollout_batch_size': 128, 'mini_rollout_batch_size': None, 'val_batch_size': 256, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 16}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 64, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'tokenizer_path': '/nas/shared/kilab/wangyujia/ProtT3/llm_model', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'loss_avg_mode': 'token', 'ppo_epochs': 1, 'padding_free': False, 'ulysses_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'lr_warmup_steps': None, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 16, 'padding_free': True, 'ulysses_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1-new/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'main'}, 'rollout': {'name': 'vllm', 'n': 5, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 24576, 'disable_log_stats': True, 'disable_tqdm': False, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 4096, 'response_length': 16384, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 10000.0, 'kl_target': 0.1, 'online_filtering': False, 'filter_key': 'overall', 'filter_low': 0.01, 'filter_high': 0.99}, 'trainer': {'total_epochs': 1, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2.5_7b_bio_06182042', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'max_try_make_batch': 20, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_model_only': False, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_07112042', 'load_checkpoint_path': None}, '_wandb': {}} +2025-07-21 14:07:34,953 INFO MainThread:317976 [wandb_init.py:init():871] starting backend +2025-07-21 14:07:35,172 INFO MainThread:317976 [wandb_init.py:init():874] sending inform_init request +2025-07-21 14:07:35,174 INFO MainThread:317976 [wandb_init.py:init():882] backend started and connected +2025-07-21 14:07:35,186 INFO MainThread:317976 [wandb_init.py:init():953] updated telemetry +2025-07-21 14:07:35,302 INFO MainThread:317976 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout +2025-07-21 14:07:58,269 INFO MainThread:317976 [wandb_init.py:init():1029] starting run threads in backend +2025-07-21 14:07:58,556 INFO MainThread:317976 [wandb_run.py:_console_start():2458] atexit reg +2025-07-21 14:07:58,556 INFO MainThread:317976 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-07-21 14:07:58,562 INFO MainThread:317976 [wandb_run.py:_redirect():2375] Wrapping output streams. +2025-07-21 14:07:58,562 INFO MainThread:317976 [wandb_run.py:_redirect():2398] Redirects installed. +2025-07-21 14:07:58,574 INFO MainThread:317976 [wandb_init.py:init():1075] run started, returning control to user process +2025-07-21 14:08:04,748 INFO MainThread:317976 [wandb_run.py:_finish():2224] finishing run gia0603yucca/easy_r1/a9qblh0u +2025-07-21 14:08:04,755 INFO MainThread:317976 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0 +2025-07-21 14:08:04,767 INFO MainThread:317976 [wandb_run.py:_restore():2405] restore +2025-07-21 14:08:04,771 INFO MainThread:317976 [wandb_run.py:_restore():2411] restore done +2025-07-21 14:08:56,463 INFO MainThread:317976 [wandb_run.py:_footer_history_summary_info():3903] rendering history +2025-07-21 14:08:56,469 INFO MainThread:317976 [wandb_run.py:_footer_history_summary_info():3935] rendering summary +2025-07-21 14:08:56,469 INFO MainThread:317976 [wandb_run.py:_footer_sync_info():3864] logging synced files diff --git a/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/run-a9qblh0u.wandb b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/run-a9qblh0u.wandb new file mode 100644 index 0000000000000000000000000000000000000000..437f9686a6e8622c2dec80595104988edc47111b Binary files /dev/null and b/EasyR1-new/examples/wandb/run-20250721_140734-a9qblh0u/run-a9qblh0u.wandb differ diff --git a/EasyR1-new/scripts/model_merger.py b/EasyR1-new/scripts/model_merger.py new file mode 100644 index 0000000000000000000000000000000000000000..4f4dd3daee1728f35e9fb0aaea94a04b0591a02c --- /dev/null +++ b/EasyR1-new/scripts/model_merger.py @@ -0,0 +1,187 @@ +# Copyright 2024 Bytedance Ltd. and/or its affiliates +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import re +from concurrent.futures import ThreadPoolExecutor +from typing import Dict, List, Tuple + +import numpy as np +import torch +from torch.distributed._tensor import DTensor, Placement, Shard +from transformers import ( + AutoConfig, + AutoModelForCausalLM, + AutoModelForTokenClassification, + AutoModelForVision2Seq, + PretrainedConfig, + PreTrainedModel, +) + + +def merge_by_placement(tensors: List[torch.Tensor], placement: Placement): + if placement.is_replicate(): + return tensors[0] + elif placement.is_partial(): + raise NotImplementedError("Partial placement is not supported yet") + elif placement.is_shard(): + return torch.cat(tensors, dim=placement.dim).contiguous() + else: + raise ValueError(f"Unsupported placement: {placement}") + + +def upload_model_to_huggingface(local_path: str, remote_path: str): + # Push to hugging face + from huggingface_hub import HfApi + + api = HfApi() + api.create_repo(repo_id=remote_path, private=False, exist_ok=True) + api.upload_folder(repo_id=remote_path, folder_path=local_path, repo_type="model") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--local_dir", required=True, type=str, help="The path for your saved model") + parser.add_argument("--hf_upload_path", default=False, type=str, help="The path of the huggingface repo to upload") + args = parser.parse_args() + local_dir: str = args.local_dir + + assert not local_dir.endswith("huggingface"), "The local_dir should not end with huggingface." + + # copy rank zero to find the shape of (dp, fsdp) + rank = 0 + world_size = 0 + for filename in os.listdir(local_dir): + match = re.match(r"model_world_size_(\d+)_rank_0\.pt", filename) + if match: + world_size = match.group(1) + break + + assert world_size, "No model file with the proper format." + + rank0_weight_path = os.path.join(local_dir, f"model_world_size_{world_size}_rank_{rank}.pt") + state_dict = torch.load(rank0_weight_path, map_location="cpu", weights_only=False) + pivot_key = sorted(state_dict.keys())[0] + weight = state_dict[pivot_key] + if isinstance(weight, DTensor): + # get sharding info + device_mesh = weight.device_mesh + mesh = device_mesh.mesh + mesh_dim_names = device_mesh.mesh_dim_names + else: + # for non-DTensor + mesh = np.array([int(world_size)], dtype=np.int64) + mesh_dim_names = ("fsdp",) + + print(f"Got device mesh {mesh}, mesh_dim_names {mesh_dim_names}") + + assert mesh_dim_names in (("fsdp",), ("ddp", "fsdp")), f"Unsupported mesh_dim_names {mesh_dim_names}." + + if "tp" in mesh_dim_names: + # fsdp * tp + total_shards = mesh.shape[-1] * mesh.shape[-2] + mesh_shape = (mesh.shape[-2], mesh.shape[-1]) + else: + # fsdp + total_shards = mesh.shape[-1] + mesh_shape = (mesh.shape[-1],) + + print(f"Processing {total_shards} model shards in total.") + model_state_dict_lst = [] + model_state_dict_lst.append(state_dict) + model_state_dict_lst.extend([""] * (total_shards - 1)) + + def process_one_shard(rank, model_state_dict_lst): + model_path = os.path.join(local_dir, f"model_world_size_{world_size}_rank_{rank}.pt") + state_dict = torch.load(model_path, map_location="cpu", weights_only=False) + model_state_dict_lst[rank] = state_dict + return state_dict + + with ThreadPoolExecutor(max_workers=min(32, os.cpu_count())) as executor: + for rank in range(1, total_shards): + executor.submit(process_one_shard, rank, model_state_dict_lst) + + state_dict: Dict[str, List[torch.Tensor]] = {} + param_placements: Dict[str, List[Placement]] = {} + keys = set(model_state_dict_lst[0].keys()) + for key in keys: + state_dict[key] = [] + for model_state_dict in model_state_dict_lst: + try: + tensor = model_state_dict.pop(key) + except Exception: + print(f"Cannot find key {key} in rank {rank}.") + + if isinstance(tensor, DTensor): + state_dict[key].append(tensor._local_tensor.bfloat16()) + placements = tuple(tensor.placements) + # replicated placement at ddp dimension can be discarded + if mesh_dim_names[0] == "ddp": + placements = placements[1:] + + if key not in param_placements: + param_placements[key] = placements + else: + assert param_placements[key] == placements + else: + state_dict[key].append(tensor.bfloat16()) + + del model_state_dict_lst + + for key in sorted(state_dict): + if not isinstance(state_dict[key], list): + print(f"No need to merge key {key}") + continue + + if key in param_placements: + # merge shards + placements: Tuple[Shard] = param_placements[key] + if len(mesh_shape) == 1: + # 1-D list, FSDP without TP + assert len(placements) == 1 + shards = state_dict[key] + state_dict[key] = merge_by_placement(shards, placements[0]) + else: + # 2-D list, FSDP + TP + raise NotImplementedError("FSDP + TP is not supported yet.") + else: + state_dict[key] = torch.cat(state_dict[key], dim=0) + + print("Merge completed.") + hf_path = os.path.join(local_dir, "huggingface") + config: PretrainedConfig = AutoConfig.from_pretrained(hf_path) + architectures: List[str] = getattr(config, "architectures", ["Unknown"]) + + if "ForTokenClassification" in architectures[0]: + AutoClass = AutoModelForTokenClassification + elif "ForCausalLM" in architectures[0]: + AutoClass = AutoModelForCausalLM + elif "ForConditionalGeneration" in architectures[0]: + AutoClass = AutoModelForVision2Seq + else: + raise NotImplementedError(f"Unknown architecture {architectures}.") + + with torch.device("meta"): + model: PreTrainedModel = AutoClass.from_config(config, torch_dtype=torch.bfloat16) + + assert isinstance(model, PreTrainedModel) + model.to_empty(device="cpu") + + print(f"Saving model to {hf_path}...") + model.save_pretrained(hf_path, state_dict=state_dict) + del state_dict, model + + if args.hf_upload_path: + upload_model_to_huggingface(hf_path, args.hf_upload_path) diff --git a/EasyR1-new/tests/check_license.py b/EasyR1-new/tests/check_license.py new file mode 100644 index 0000000000000000000000000000000000000000..4196c7a6a64a6b662bbde8f8a2550fbc111772cf --- /dev/null +++ b/EasyR1-new/tests/check_license.py @@ -0,0 +1,39 @@ +# Copyright 2024 Bytedance Ltd. and/or its affiliates +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +from pathlib import Path + + +KEYWORDS = ("Copyright", "2024", "Bytedance") + + +def main(): + path_list: list[Path] = [] + for check_dir in sys.argv[1:]: + path_list.extend(Path(check_dir).glob("**/*.py")) + + for path in path_list: + with open(path.absolute(), encoding="utf-8") as f: + file_content = f.read().strip().split("\n") + license = "\n".join(file_content[:5]) + if not license: + continue + + print(f"Check license: {path}") + assert all(keyword in license for keyword in KEYWORDS), f"File {path} does not contain license." + + +if __name__ == "__main__": + main() diff --git a/EasyR1-new/tests/test_dataproto.py b/EasyR1-new/tests/test_dataproto.py new file mode 100644 index 0000000000000000000000000000000000000000..187346a922988c4b0dd3f13ac0603fc6aeef707c --- /dev/null +++ b/EasyR1-new/tests/test_dataproto.py @@ -0,0 +1,183 @@ +# Copyright 2024 Bytedance Ltd. and/or its affiliates +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import os +from typing import Any, Dict, List, Optional + +import numpy as np +import pytest +import torch + +from verl.protocol import DataProto, pad_dataproto_to_divisor, unpad_dataproto + + +def _get_data_proto( + tensors: Optional[Dict[str, List[Any]]] = None, + non_tensors: Optional[Dict[str, List[Any]]] = None, + meta_info: Optional[Dict[str, Any]] = None, +) -> DataProto: + if tensors is None and non_tensors is None: + tensors = {"obs": [1, 2, 3, 4, 5, 6]} + non_tensors = {"labels": ["a", "b", "c", "d", "e", "f"]} + + if tensors is not None: + tensors = {k: torch.tensor(v) if not isinstance(v, torch.Tensor) else v for k, v in tensors.items()} + + if non_tensors is not None: + non_tensors = { + k: np.array(v, dtype=object) if not isinstance(v, np.ndarray) else v for k, v in non_tensors.items() + } + + meta_info = meta_info or {"info": "test_info"} + return DataProto.from_dict(tensors=tensors, non_tensors=non_tensors, meta_info=meta_info) + + +def _assert_equal(data1: DataProto, data2: Optional[DataProto] = None): + data2 = data2 or _get_data_proto() + if data1.batch is not None: + assert data1.batch.keys() == data2.batch.keys() + for key in data1.batch.keys(): + assert torch.all(data1.batch[key] == data2.batch[key]) + else: + assert data2.batch is None + + if data1.non_tensor_batch is not None: + assert data1.non_tensor_batch.keys() == data2.non_tensor_batch.keys() + for key in data1.non_tensor_batch.keys(): + assert np.all(data1.non_tensor_batch[key] == data2.non_tensor_batch[key]) + else: + assert data2.non_tensor_batch is None + + assert data1.meta_info == data2.meta_info + + +def test_tensor_dict_constructor(): + obs = torch.randn(100, 10) + act = torch.randn(100, 10, 3) + data = DataProto.from_dict(tensors={"obs": obs, "act": act}) + assert len(data) == 100 + + with pytest.raises(AssertionError): + data = DataProto.from_dict(tensors={"obs": obs, "act": act}, num_batch_dims=2) + + with pytest.raises(AssertionError): + data = DataProto.from_dict(tensors={"obs": obs, "act": act}, num_batch_dims=3) + + labels = np.array(["a", "b", "c"], dtype=object) + data = DataProto.from_dict(non_tensors={"labels": labels}) + assert len(data) == 3 + + +def test_getitem(): + data = _get_data_proto() + assert data[0].batch["obs"] == torch.tensor(1) + assert data[0].non_tensor_batch["labels"] == "a" + _assert_equal(data[1:3], _get_data_proto({"obs": [2, 3]}, {"labels": ["b", "c"]})) + _assert_equal(data[[0, 2]], _get_data_proto({"obs": [1, 3]}, {"labels": ["a", "c"]})) + _assert_equal(data[torch.tensor([1])], _get_data_proto({"obs": [2]}, {"labels": ["b"]})) + + +def test_select_pop(): + obs = torch.randn(100, 10) + act = torch.randn(100, 3) + dataset = _get_data_proto(tensors={"obs": obs, "act": act}, meta_info={"p": 1, "q": 2}) + selected_dataset = dataset.select(batch_keys=["obs"], meta_info_keys=["p"]) + + assert selected_dataset.batch.keys() == {"obs"} + assert selected_dataset.meta_info.keys() == {"p"} + assert dataset.batch.keys() == {"obs", "act"} + assert dataset.meta_info.keys() == {"p", "q"} + + popped_dataset = dataset.pop(batch_keys=["obs"], meta_info_keys=["p"]) + assert popped_dataset.batch.keys() == {"obs"} + assert popped_dataset.meta_info.keys() == {"p"} + assert dataset.batch.keys() == {"act"} + assert dataset.meta_info.keys() == {"q"} + + +def test_chunk_concat_split(): + data = _get_data_proto() + with pytest.raises(AssertionError): + data.chunk(5) + + chunked_data = data.chunk(2) + + assert len(chunked_data) == 2 + expected_data = _get_data_proto({"obs": [1, 2, 3]}, {"labels": ["a", "b", "c"]}) + _assert_equal(chunked_data[0], expected_data) + + concat_data = DataProto.concat(chunked_data) + _assert_equal(concat_data, data) + + splitted_data = data.split(2) + assert len(splitted_data) == 3 + expected_data = _get_data_proto({"obs": [1, 2]}, {"labels": ["a", "b"]}) + _assert_equal(splitted_data[0], expected_data) + + +def test_reorder(): + data = _get_data_proto() + data.reorder(torch.tensor([3, 4, 2, 0, 1, 5])) + expected_data = _get_data_proto({"obs": [4, 5, 3, 1, 2, 6]}, {"labels": ["d", "e", "c", "a", "b", "f"]}) + _assert_equal(data, expected_data) + + +@pytest.mark.parametrize("interleave", [True, False]) +def test_repeat(interleave: bool): + data = _get_data_proto({"obs": [1, 2]}, {"labels": ["a", "b"]}) + repeated_data = data.repeat(repeat_times=2, interleave=interleave) + expected_tensors = {"obs": [1, 1, 2, 2] if interleave else [1, 2, 1, 2]} + expected_non_tensors = {"labels": ["a", "a", "b", "b"] if interleave else ["a", "b", "a", "b"]} + _assert_equal(repeated_data, _get_data_proto(expected_tensors, expected_non_tensors)) + + +@pytest.mark.parametrize("size_divisor", [2, 3]) +def test_dataproto_pad_unpad(size_divisor: int): + data = _get_data_proto({"obs": [1, 2, 3]}, {"labels": ["a", "b", "c"]}) + # test size_divisor=2 + padded_data, pad_size = pad_dataproto_to_divisor(data, size_divisor=size_divisor) + unpadded_data = unpad_dataproto(padded_data, pad_size=pad_size) + + if size_divisor == 2: + assert pad_size == 1 + expected_tensors = {"obs": [1, 2, 3, 1]} + expected_non_tensors = {"labels": ["a", "b", "c", "a"]} + expected_data = _get_data_proto(expected_tensors, expected_non_tensors) + else: + assert pad_size == 0 + expected_data = data + + _assert_equal(padded_data, expected_data) + _assert_equal(unpadded_data, data) + + +def test_data_proto_save_load(): + data = _get_data_proto() + data.save_to_disk("test_data.pt") + loaded_data = DataProto.load_from_disk("test_data.pt") + os.remove("test_data.pt") + _assert_equal(data, loaded_data) + + +def test_union_tensor_dict(): + obs = torch.randn(100, 10) + data1 = _get_data_proto({"obs": obs, "act": torch.randn(100, 3)}) + data2 = _get_data_proto({"obs": obs, "rew": torch.randn(100)}) + data1.union(data2) + + data1 = _get_data_proto({"obs": obs, "act": torch.randn(100, 3)}) + data2 = _get_data_proto({"obs": obs + 1, "rew": torch.randn(100)}) + with pytest.raises(ValueError): + data1.union(data2) diff --git a/EasyR1-new/tests/test_dataset.py b/EasyR1-new/tests/test_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..514b862744aad2b85b1dddc622c22689306acadb --- /dev/null +++ b/EasyR1-new/tests/test_dataset.py @@ -0,0 +1,60 @@ +# Copyright 2024 Bytedance Ltd. and/or its affiliates +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from PIL.Image import Image + +from verl.utils.dataset import RLHFDataset +from verl.utils.tokenizer import get_processor, get_tokenizer + + +def test_image_dataset(): + tokenizer = get_tokenizer("Qwen/Qwen2.5-VL-7B-Instruct", use_fast=True) + processor = get_processor("Qwen/Qwen2.5-VL-7B-Instruct", use_fast=True) + dataset = RLHFDataset( + data_path="hiyouga/geometry3k@test", + tokenizer=tokenizer, + processor=processor, + prompt_key="problem", + answer_key="answer", + image_key="images", + max_prompt_length=16, + truncation="right", + filter_overlong_prompts=False, + ) + token_ids = [151644, 8948, 198, 2610, 525, 264, 10950, 17847, 13, 151645, 198, 151644, 872, 198, 151652, 151655] + assert set(dataset[0].keys()) == { + "problem", + "ground_truth", + "input_ids", + "attention_mask", + "position_ids", + "raw_prompt_ids", + "multi_modal_data", + } + assert dataset[0]["problem"] == ( + "Chords $\\overline{A C}$ and $\\overline{D F}$ are equidistant from the center. " + "If the radius of $\\odot G$ is 26 find $A C$" + ) + assert dataset[0]["ground_truth"] == "48" + assert torch.all(dataset[0]["input_ids"] == torch.tensor(token_ids)) + assert torch.all(dataset[0]["attention_mask"] == torch.ones(16)) + assert torch.all(dataset[0]["position_ids"] == torch.arange(16).unsqueeze(0).expand(3, -1)) + assert list(dataset[0]["position_ids"].size()) == [3, 16] # avoid fake positive caused by broadcasting + assert dataset[0]["raw_prompt_ids"] == token_ids + assert isinstance(dataset[0]["multi_modal_data"]["images"][0], Image) + + +if __name__ == "__main__": + test_image_dataset() diff --git a/EasyR1-new/verl/ProtT3/blip2.py b/EasyR1-new/verl/ProtT3/blip2.py new file mode 100644 index 0000000000000000000000000000000000000000..40c386877ebfe65229e7550b3f6a92b3df05d867 --- /dev/null +++ b/EasyR1-new/verl/ProtT3/blip2.py @@ -0,0 +1,126 @@ +""" + Copyright (c) 2023, salesforce.com, inc. + All rights reserved. + SPDX-License-Identifier: BSD-3-Clause + For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause +""" +import torch +import torch.nn as nn + +from lavis.models.base_model import BaseModel +from lavis.models.blip2_models.Qformer import BertConfig, BertLMHeadModel +from transformers import BertTokenizer, BitsAndBytesConfig +from transformers import EsmTokenizer, EsmModel +import os +from pathlib import Path # 添加到文件顶部 + + +def get_gpu_memory(device=0): + # t = torch.cuda.get_device_properties(device).total_memory + # r = torch.cuda.memory_reserved(device) + # a = torch.cuda.memory_allocated(device) + # f = r-a # free inside reserved + free, total = torch.cuda.mem_get_info(device) + free = free / (1024 ** 3) + total = total / (1024 ** 3) + return free, total-free, total + + +class Blip2Base(BaseModel): + # @classmethod + # def init_tokenizer(cls): + # tokenizer = BertTokenizer.from_pretrained('./bert_pretrained/') + # tokenizer.add_special_tokens({"bos_token": "[DEC]"}) + # return tokenizer + + @classmethod + def init_Qformer(cls, model_name, num_query_token, plm_width, cross_attention_freq=2): + # assert model_name == 'microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract' + # print("bert load microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext") + + print(f"Loading Qformer from: {model_name}") + + # 修改2:添加本地路径检查逻辑 + if not model_name.startswith('microsoft/') and Path(model_name).exists(): + print("Loading from local path...") + else: + print("Loading from Hugging Face Hub...") + + encoder_config = BertConfig.from_pretrained(model_name) + encoder_config.encoder_width = plm_width + # insert cross-attention layer every other block + encoder_config.add_cross_attention = True + encoder_config.cross_attention_freq = cross_attention_freq + encoder_config.query_length = num_query_token + + Qformer = BertLMHeadModel.from_pretrained(model_name, config=encoder_config) + query_tokens = nn.Parameter( + torch.zeros(1, num_query_token, encoder_config.hidden_size) + ) + query_tokens.data.normal_(mean=0.0, std=encoder_config.initializer_range) + + tokenizer = BertTokenizer.from_pretrained(model_name) + tokenizer.add_special_tokens({"bos_token": "[DEC]"}) + return tokenizer, Qformer, query_tokens + + + def init_protein_encoder(self, plm_name, load_4bit=False): + # assert plm_name.startswith('facebook/esm2') + # plm_tokenizer = EsmTokenizer.from_pretrained(plm_name) + # 检查是否为本地路径(判断是否存在文件夹或文件) + if os.path.isdir(plm_name) or os.path.exists(os.path.join(plm_name, "config.json")): + print(f"Loading local PLM from {plm_name}") + plm_tokenizer = EsmTokenizer.from_pretrained(plm_name) + else: + # 保留远程加载逻辑(可选) + print(f"Loading remote PLM from {plm_name}") + plm_tokenizer = EsmTokenizer.from_pretrained(plm_name) + + if not load_4bit: + plm = EsmModel.from_pretrained(plm_name, add_pooling_layer=False, torch_dtype=torch.bfloat16) + else: + quant_config = BitsAndBytesConfig( + load_in_4bit=True, + load_in_8bit=False, + llm_int8_threshold=6.0, + llm_int8_has_fp16_weight=False, + bnb_4bit_compute_dtype=torch.bfloat16, + bnb_4bit_use_double_quant=True, + bnb_4bit_quant_type='nf4', + ) + ## give a device map that assign all layers to device 0 + outputs = get_gpu_memory(6) + used_memory = outputs[1] + if used_memory > 1: + device_map = {"": 7} + else: + device_map = {"": 6} + plm = EsmModel.from_pretrained( + plm_name, + add_pooling_layer=False, + quantization_config=quant_config, + load_in_4bit=True, + load_in_8bit=False, + device_map=device_map, + torch_dtype=torch.bfloat16, + ) + + plm.num_features = plm.config.hidden_size + ln_layer = nn.LayerNorm(plm.num_features) + return plm_tokenizer, plm, ln_layer + + +def disabled_train(self, mode=True): + """Overwrite model.train with this function to make sure train/eval mode + does not change anymore.""" + return self + + +# class LayerNorm(nn.LayerNorm): +# """Subclass torch's LayerNorm to handle fp16.""" + +# def forward(self, x: torch.Tensor): +# orig_type = x.dtype +# ret = super().forward(x.type(torch.float32)) +# return ret.type(orig_type) + diff --git a/EasyR1-new/verl/ProtT3/blip2_opt.py b/EasyR1-new/verl/ProtT3/blip2_opt.py new file mode 100644 index 0000000000000000000000000000000000000000..aff4b8c549d37d8e853c9689268ec06a2a206ea9 --- /dev/null +++ b/EasyR1-new/verl/ProtT3/blip2_opt.py @@ -0,0 +1,450 @@ +""" + Copyright (c) 2023, salesforce.com, inc. + All rights reserved. + SPDX-License-Identifier: BSD-3-Clause + For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause +""" +import logging +import torch +import torch.nn as nn +from torch.cuda.amp import autocast as autocast +# from peft import get_peft_config, get_peft_model, get_peft_model_state_dict, LoraConfig, TaskType, PeftModel +from lavis.models.blip2_models.blip2 import disabled_train +from .blip2 import Blip2Base +from transformers import AutoTokenizer +from transformers import OPTForCausalLM +from transformers import AutoTokenizer, AutoModelForCausalLM +# from opendelta import LoraModel + +from transformers import BertTokenizer, BitsAndBytesConfig +from .help_funcs import hf_enable_gradient_checkpointing +import json +# from accelerate import Accelerator +# import torch.distributed as dist + +# from peft.tuners.lora import LoraLayer +# from peft import ( +# prepare_model_for_kbit_training, +# LoraConfig as PeftLoraConfig, +# get_peft_model, +# PeftModel +# ) + +# from opendelta.delta_configs + +opt_model_list = [ + "facebook/galactica-125m", + "facebook/galactica-1.3b", + "facebook/galactica-6.7b", + "facebook/galactica-30b", +] + +def get_gpu_memory(device=0): + # t = torch.cuda.get_device_properties(device).total_memory + # r = torch.cuda.memory_reserved(device) + # a = torch.cuda.memory_allocated(device) + # f = r-a # free inside reserved + free, total = torch.cuda.mem_get_info(device) + free = free / (1024 ** 3) + total = total / (1024 ** 3) + return free, total-free, total + +def mask_by_len(input, lens, fill_value=0): + ''' + input: shape = [N, D] + lens: shape = [N] + ''' + mask = torch.arange(input.shape[1], device=input.device).reshape(1, -1) + mask = mask < lens.reshape(-1, 1) + input[mask] = fill_value + return input + + + +class Blip2OPT(Blip2Base): + """ + BLIP2 first-stage model with Q-former and ViT. + Supported model types: + - pretrained: pretrained model with vit-g + - pretrain_vitL: pretrained model with vit-large + - coco: fintuned model on coco + Usage: + >>> from lavis.models import load_model + >>> model = load_model("blip2", "pretrain") + """ + def __init__( + self, + bert_name, + num_query_token=32, + cross_attention_freq=2, + plm_model="facebook/esm2_t30_150M_UR50D", + plm_tune='freeze', + llm_name="facebook/galactica-1.3b", + llm_tune='freeze', + peft_dir='', + args=None, + ): + super().__init__() + #self.args = args + #self.enbale_gradient_checkpointing = args.enbale_gradient_checkpointing + + self.plm_tokenizer, self.plm, self.ln_layer = self.init_protein_encoder(plm_model) + self.plm_tune = plm_tune + # if plm_tune == 'freeze': + # for name, param in self.plm.named_parameters(): + # param.requires_grad = False + # self.plm = self.plm.eval() + # self.plm.train = disabled_train + # logging.info("freeze plm encoder") + # elif plm_tune == 'lora': + # lora_config = DeltaLoraConfig(args.lora_r, + # args.lora_alpha, + # args.lora_dropout, + # modified_modules=["query", "value"]) + # self.delta = LoraModel.from_config(lora_config, self.plm) + # self.delta.freeze_module(set_state_dict=False) + # self.delta.log() + # else: + # raise NotImplementedError() + + self.num_query_token = num_query_token + self.qformer_tokenizer, self.Qformer, self.query_tokens = self.init_Qformer(bert_name, num_query_token, self.plm.num_features, cross_attention_freq) + ### remove the unused parameters + self.Qformer.cls = None + self.Qformer.bert.embeddings.word_embeddings = None + self.Qformer.bert.embeddings.position_embeddings = None + for layer in self.Qformer.bert.encoder.layer: + layer.output = None + layer.intermediate = None + + ## initialize llm model + # self.init_distributed() + self.llm_model, self.llm_tokenizer = self.load_llm(llm_name) + + #self.llm_model, self.llm_tokenizer = self.load_model_on_single_gpu(llm_name) + self.eos_token_id = self.llm_tokenizer.eos_token_id + self.pad_token_id = self.llm_tokenizer.pad_token_id + + # if llm_tune == 'freeze': + # for name, param in self.llm_model.named_parameters(): + # param.requires_grad = False + # elif llm_tune == 'full': + # for name, param in self.llm_model.named_parameters(): + # param.requires_grad = True + # elif llm_tune == 'lora': + # lora_config = DeltaLoraConfig(args.lora_r, + # args.lora_alpha, + # args.lora_dropout,) + # self.delta = LoraModel.from_config(lora_config, self.llm_model) + # self.delta.freeze_module(set_state_dict=False) + # self.delta.log() + # elif llm_tune == 'mid_lora': + # lora_config = DeltaLoraConfig(args.lora_r, args.lora_alpha, args.lora_dropout, modified_modules=["q_proj", "v_proj", 'k_proj', "out_proj", "fc1", "fc2"]) + # self.delta = LoraModel.from_config(lora_config, self.llm_model) + # self.delta.freeze_module(set_state_dict=False) + # self.delta.log() + # elif llm_tune == 'peft_lora': + # config = PeftLoraConfig( + # r=args.lora_r, + # lora_alpha=args.lora_alpha, + # # target_modules=modules, + # lora_dropout=args.lora_dropout, + # bias="none", + # task_type="CAUSAL_LM", + # ) + # self.llm_model = get_peft_model(self.llm_model, config) + # for name, module in self.llm_model.named_modules(): + # if isinstance(module, LoraLayer): + # if True: + # module = module.to(torch.bfloat16) + # if 'norm' in name: + # module = module.to(torch.float32) + # if 'lm_head' in name or 'embed_tokens' in name: + # if hasattr(module, 'weight'): + # if True and module.weight.dtype == torch.float32: + # module = module.to(torch.bfloat16) + # else: + # raise NotImplementedError() + + ## fixme: this is different from the original BLIP2 + # self.eos_token_id = self.llm_tokenizer( + # "\n", add_special_tokens=False + # ).input_ids[0] + self.opt_proj = nn.Linear(self.Qformer.config.hidden_size, self.llm_model.config.hidden_size) + + def load_llm(self, llm_model, load_4bit=False, enable_gradient_checkpointing=True): + llm_tokenizer = AutoTokenizer.from_pretrained(llm_model, use_fast=False, padding_side='right') + llm_tokenizer.add_special_tokens({'pad_token': ''}) + + special_tokens_dict = {'additional_special_tokens': ['', '']} + llm_tokenizer.add_special_tokens(special_tokens_dict) + + llm_model = AutoModelForCausalLM.from_pretrained(llm_model, torch_dtype=torch.bfloat16) + llm_model.resize_token_embeddings(len(llm_tokenizer)) ## this will cause bug when + + return llm_model, llm_tokenizer + + + # def forward(self, batch): + # prot_batch, text_batch = batch + # prot_embeds = self.plm(**prot_batch, return_dict=True) + # prot_embeds = prot_embeds.last_hidden_state + # if self.plm_tune == 'freeze': + # prot_embeds = prot_embeds.detach() + # prot_embeds = self.ln_layer(prot_embeds) + # device = prot_embeds.device + # query_tokens = self.query_tokens.expand(prot_embeds.shape[0], -1, -1) + # query_output = self.Qformer.bert( + # query_embeds=query_tokens, + # encoder_hidden_states=prot_embeds, + # encoder_attention_mask=prot_batch.attention_mask, + # return_dict=True, + # ) + # prot_tokens = self.opt_proj(query_output.last_hidden_state) + # prot_mask = torch.ones(prot_tokens.shape[:2], dtype=text_batch.attention_mask.dtype, device=device) + # prot_empty_targets = torch.ones(prot_tokens.shape[:2], dtype=torch.long, device=device).fill_(-100) + + # targets = text_batch.input_ids.masked_fill(text_batch.input_ids == self.llm_tokenizer.pad_token_id, -100) + # targets = targets.masked_fill(text_batch.token_type_ids == 0, -100) + # targets = torch.cat([prot_empty_targets, targets], dim=1) + + # inputs_embeds = self.llm_model.get_input_embeddings()(text_batch.input_ids) + # inputs_embeds = torch.cat((prot_tokens, inputs_embeds), dim=1) + # attention_mask = torch.cat([prot_mask, text_batch.attention_mask], dim=1) + + # outputs = self.llm_model( + # inputs_embeds=inputs_embeds, + # attention_mask=attention_mask, + # return_dict=True, + # labels=targets, + # ) + # loss = outputs.loss + # return loss + + def forward(self, batch): + prot_batch, prompt_batch, text_dict = batch + text_seqs = text_dict['targets'] + + prot_embeds = self.plm(**prot_batch, return_dict=True) + prot_embeds = prot_embeds.last_hidden_state + if self.plm_tune == 'freeze': + prot_embeds = prot_embeds.detach() + prot_embeds = self.ln_layer(prot_embeds) + device = prot_embeds.device + query_tokens = self.query_tokens.expand(prot_embeds.shape[0], -1, -1) + query_output = self.Qformer.bert( + query_embeds=query_tokens, + encoder_hidden_states=prot_embeds, + encoder_attention_mask=prot_batch.attention_mask, + return_dict=True, + ) + prot_tokens = self.opt_proj(query_output.last_hidden_state) + prot_mask = torch.ones(prot_tokens.shape[:2], dtype=torch.long, device=device) + + # === Step 3: 编码 prompt 输入 === + prompt_embeds = self.llm_model.get_input_embeddings()(prompt_batch.input_ids) # [B, L_prompt, D_llm] + prompt_mask = prompt_batch['attention_mask'] + + + text_batch = self.llm_tokenizer( + list(text_seqs), + padding='longest', + truncation=True, + max_length=1024, + return_tensors='pt' + ).to(device) + target_embeds = self.llm_model.get_input_embeddings()(text_batch['input_ids']) # [B, T, D] + target_mask = text_batch['attention_mask'] + targets = text_batch['input_ids'].masked_fill(text_batch['input_ids'] == self.llm_tokenizer.pad_token_id, -100) + + inputs_embeds = torch.cat([prot_tokens, prompt_embeds, target_embeds], dim=1) + attention_mask = torch.cat([prot_mask, prompt_mask, target_mask], dim=1) + + # === Step 7: 构造 labels,只监督 target 部分 === + prot_label_pad = torch.full(prot_tokens.shape[:2], -100, dtype=torch.long, device=device) + prompt_label_pad = torch.full(prompt_mask.shape, -100, dtype=torch.long, device=device) + labels = torch.cat([prot_label_pad, prompt_label_pad, targets], dim=1) + + # === Step 8: 送入 LLM === + outputs = self.llm_model( + inputs_embeds=inputs_embeds, + attention_mask=attention_mask, + labels=labels, + return_dict=True, + ) + loss = outputs.loss + # prot_mask = torch.ones(prot_tokens.shape[:2], dtype=text_batch.attention_mask.dtype, device=device) + # prot_empty_targets = torch.ones(prot_tokens.shape[:2], dtype=torch.long, device=device).fill_(-100) + # empty_targets = torch.ones(prompt_batch.attention_mask.shape, dtype=torch.long, device=device).fill_(-100) + # targets = text_batch.input_ids.masked_fill(text_batch.input_ids == self.llm_tokenizer.pad_token_id, -100) + # targets = torch.cat([prot_empty_targets, empty_targets, targets], dim=1) + + # prompt_embeds = self.llm_model.get_input_embeddings()(prompt_batch.input_ids) + # inputs_embeds = self.llm_model.get_input_embeddings()(text_batch.input_ids) + # inputs_embeds = torch.cat((prot_tokens, prompt_embeds, inputs_embeds), dim=1) + # attention_mask = torch.cat([prot_mask, prompt_batch.attention_mask, text_batch.attention_mask], dim=1) + + # outputs = self.llm_model( + # inputs_embeds=inputs_embeds, + # attention_mask=attention_mask, + # return_dict=True, + # labels=targets, + # ) + # loss = outputs.loss + return loss + + # def forwardv2(self, batch): + # prot_batch, prompt_batch, text_batch = batch + # prot_embeds = self.plm(**prot_batch, return_dict=True) + # prot_embeds = prot_embeds.last_hidden_state + # if self.plm_tune == 'freeze': + # prot_embeds = prot_embeds.detach() + # prot_embeds = self.ln_layer(prot_embeds) + # device = prot_embeds.device + # query_tokens = self.query_tokens.expand(prot_embeds.shape[0], -1, -1) + # query_output = self.Qformer.bert( + # query_embeds=query_tokens, + # encoder_hidden_states=prot_embeds, + # encoder_attention_mask=prot_batch.attention_mask, + # return_dict=True, + # ) + # prot_tokens = self.opt_proj(query_output.last_hidden_state) + # prot_mask = torch.ones(prot_tokens.shape[:2], dtype=text_batch.attention_mask.dtype, device=device) + # targets = text_batch.input_ids.masked_fill(text_batch.input_ids == self.llm_tokenizer.pad_token_id, -100) + + # ### forward prefix + # prompt_embeds = self.llm_model.get_input_embeddings()(prompt_batch.input_ids) + # prefix_embeds = torch.cat([prot_tokens, prompt_embeds], dim=1) + # prefix_mask = torch.cat([prot_mask, prompt_batch.attention_mask], dim=1) + # prefix_output = self.llm_model.model( + # inputs_embeds=prefix_embeds, + # attention_mask=prefix_mask, + # use_cache=True, + # return_dict=True, + # ) + + # ## forward decoding + # if False: + # attention_mask = torch.cat([prot_mask, prompt_batch.attention_mask, text_batch.attention_mask], dim=1) + # else: + # attention_mask = text_batch.attention_mask + # print(prefix_output.past_key_values) + # outputs = self.llm_model( + # input_ids=text_batch.input_ids, + # attention_mask=attention_mask, + # past_key_values=prefix_output.past_key_values, + # return_dict=True, + # labels=targets, + # ) + # loss = outputs.loss + # return loss + + @torch.no_grad() + def generate( + self, + samples, + do_sample=False, + num_beams=5, + max_length=128, + min_length=1, + top_p=0.9, + repetition_penalty=1.0, + length_penalty=1.0, + num_captions=1, + temperature=1, + ): + """ + Args: + samples (dict): A dictionary containing the following keys: + - image (torch.Tensor): A tensor of shape (batch_size, 3, H, W) + num_beams (int): Number of beams for beam search. 1 means no beam search. + max_length (int): The maximum length of the sequence to be generated. + min_length (int): The minimum length of the sequence to be generated. + top_p (float): The cumulative probability for nucleus sampling. + repetition_penalty (float): The parameter for repetition penalty. 1.0 means no penalty. + num_captions (int): Number of captions to be generated for each image. + Returns: + captions (list): A list of strings of length batch_size * num_captions. + """ + prot_batch = samples['prot_batch'] + prompt_batch = samples['prompt_batch'] + + # with self.maybe_autocast(): + prot_embeds = self.plm(**prot_batch, return_dict=True) + prot_embeds = self.ln_layer(prot_embeds.last_hidden_state) + + query_tokens = self.query_tokens.expand(prot_embeds.shape[0], -1, -1) + query_output = self.Qformer.bert( + query_embeds=query_tokens, + encoder_hidden_states=prot_embeds, + encoder_attention_mask=prot_batch['attention_mask'], + return_dict=True, + ) + prot_tokens = self.opt_proj(query_output.last_hidden_state) + + + + # prompt_batch = samples['prompt_batch'] + prompt_input_ids = prompt_batch['input_ids'] # shape: [B, L] + # for i, ids in enumerate(prompt_input_ids): + # print(f"Prompt {i} token length: {len(ids)}") + decoded_texts = [self.llm_tokenizer.decode(ids, skip_special_tokens=True) for ids in prompt_input_ids] + #print(decoded_texts) + save_path = "decoded_prompts.json" + + # 将 list 写入 JSON 文件 + with open(save_path, 'w', encoding='utf-8') as f: + json.dump(decoded_texts, f, ensure_ascii=False, indent=4) + + prompt_attention_mask = prompt_batch['attention_mask'] + prompt_embeds = self.llm_model.model.embed_tokens(prompt_input_ids) + + + + # device = prompt_input_ids.device + # prot_token_id = self.llm_tokenizer.convert_tokens_to_ids("") + # text_token_id = self.llm_tokenizer.convert_tokens_to_ids("") + # text_token_embed = self.llm_model.model.embed_tokens(torch.tensor([[text_token_id]], device=device)).expand(prompt_embeds.shape[0], -1, -1) + # prot_token_embed = self.llm_model.model.embed_tokens(torch.tensor([[prot_token_id]], device=device)).expand(prompt_embeds.shape[0], -1, -1) + + # inputs_embeds = torch.cat([text_token_embed, prompt_embeds, prot_token_embed, prot_tokens], dim=1) + + # B = prompt_input_ids.shape[0] + # special_attention = torch.ones((B, 1), dtype=prompt_attention_mask.dtype, device=prompt_attention_mask.device) + # prot_attention_mask = torch.ones(prot_tokens.shape[:2], dtype=prompt_attention_mask.dtype, device=prompt_attention_mask.device) + # attention_mask = torch.cat([ + # special_attention, # [TEXT] + # prompt_attention_mask, # prompt + # special_attention, # [PROT] + # prot_attention_mask # protein + # ], dim=1) + # inputs_embeds = torch.cat((prot_tokens, prompt_embeds), dim=1) + inputs_embeds = torch.cat((prompt_embeds, prot_tokens), dim=1) + + prot_attention_mask = torch.ones(prot_tokens.shape[:2], dtype=prompt_attention_mask.dtype, device=prompt_attention_mask.device) + #attention_mask = torch.cat([prot_attention_mask, prompt_attention_mask], dim=1) + attention_mask = torch.cat([ prompt_attention_mask,prot_attention_mask], dim=1) + + + + outputs = self.llm_model.generate( + inputs_embeds=prompt_embeds, + attention_mask=prompt_attention_mask, + do_sample=do_sample, + top_p=top_p, + temperature=temperature, + num_beams=num_beams, + max_new_tokens=max_length, + min_length=min_length, + # pad_token_id=self.pad_token_id, + eos_token_id=self.eos_token_id, + repetition_penalty=repetition_penalty, + length_penalty=length_penalty, + num_return_sequences=num_captions, + use_cache=True, + cache_implementation="hybrid" + ) + output_text = self.llm_tokenizer.batch_decode(outputs, skip_special_tokens=True) + output_text = [text.strip() for text in output_text] + # print(output_text) + return output_text diff --git a/EasyR1-new/verl/ProtT3/blip2_stage2.py b/EasyR1-new/verl/ProtT3/blip2_stage2.py new file mode 100644 index 0000000000000000000000000000000000000000..6ec563453278fa3220290b4f3283f6d21ae6ca35 --- /dev/null +++ b/EasyR1-new/verl/ProtT3/blip2_stage2.py @@ -0,0 +1,344 @@ +import os +import torch +from .blip2_opt import Blip2OPT +import pytorch_lightning as pl +from torch import optim +from lavis.common.optims import LinearWarmupCosineLRScheduler, LinearWarmupStepLRScheduler +import json +import torch.distributed as dist +#from peft import LoraConfig, TaskType +from typing import Any, Dict +from .help_funcs import caption_evaluate, AttrDict +try: + from .opt_flash_attention import replace_opt_attn_with_flash_attn, replace_opt_attn_with_original_attn +except ModuleNotFoundError: + pass + + +def get_module_state_dict(state_dict, module_name): + module_state_dict = {} + for key, value in state_dict.items(): + if key.startswith(module_name): + key = key[len(module_name) + 1:] + if key == '': + return value + module_state_dict[key] = value + return module_state_dict + +class Blip2Stage2(pl.LightningModule): + def on_save_checkpoint(self, checkpoint: Dict[str, Any]) -> None: + # checkpoint.pop('optimizer_states') + to_be_removed = [] + for key, value in checkpoint['state_dict'].items(): + try: + if not self.get_parameter(key).requires_grad: + to_be_removed.append(key) + except AttributeError: + to_be_removed.append(key) + for key in to_be_removed: + checkpoint['state_dict'].pop(key) + + def __init__(self, args): + super().__init__() + if isinstance(args, dict): + args = AttrDict(**args) + + self.args = args + self.caption_eval_epoch = 10, #args.other.caption_eval_epoch + self.do_sample = False + self.num_beams = 5 #args.OPT.num_beams + self.max_inference_len = 128 + self.min_inference_len = 1 + self.llm_tune = 'freeze' + self.enable_flash = False + + # if args.llm_name.find('galactica') >= 0: + self.blip2 = Blip2OPT("/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft", + 8, #args.Bert.num_query_token, + 2, #args.Bert.cross_attention_freq, + "/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m", #args.plm_model.plm_model, + 'freeze', #args.plm_model.plm_tune, + "/oss/wangyujia/BIO/construction_finetuning/alpaca/v1-20250609-141541/checkpoint-50-merged", #args.OPT.llm_name, + 'freeze', #args.OPT.llm_tune, + "", #args.OPT.peft_dir, + args) + # else: + # raise NotImplementedError() + #self.save_hyperparameters(args) + +# def load_from_stage1_checkpoint(self, path): +# ckpt = torch.load(path, map_location='cpu') +# state_dict = ckpt['state_dict'] +# state_dict = {k.split('blip2qformer.')[1]:v for k, v in state_dict.items()} +# self.blip2.load_state_dict(state_dict, strict=False) +# return self + +# def configure_optimizers(self): +# self.trainer.fit_loop.setup_data() +# warmup_steps = min(len(self.trainer.train_dataloader), self.args.warmup_steps) +# optimizer = optim.AdamW(self.parameters(), lr=self.args.init_lr, weight_decay=self.args.weight_decay) +# if self.args.scheduler == 'linear_warmup_cosine_lr': +# self.scheduler = LinearWarmupCosineLRScheduler(optimizer, self.args.max_epochs, self.args.min_lr, self.args.init_lr, warmup_steps, self.args.warmup_lr) +# elif self.args.scheduler == 'linear_warmup_step_lr': +# self.scheduler = LinearWarmupStepLRScheduler(optimizer, self.args.max_epochs, self.args.min_lr, self.args.init_lr, self.args.lr_decay_rate, self.args.warmup_lr, warmup_steps) +# elif self.args.scheduler == 'None': +# self.scheduler = None +# else: +# raise NotImplementedError() +# return optimizer + +# def save_predictions(self, predictions, targets, q_types=None, log_prefix=''): +# assert len(predictions) == len(targets) +# if log_prefix: +# name = f'{log_prefix}_predictions.txt' +# else: +# name = 'predictions.txt' +# with open(os.path.join(self.logger.log_dir, name), 'w', encoding='utf8') as f: +# if q_types is not None: +# for p, t, q in zip(predictions, targets, q_types): +# line = {'prediction': p, 'target': t, 'q_type': q} +# f.write(json.dumps(line, ensure_ascii=True) + '\n') +# else: +# for p, t in zip(predictions, targets): +# line = {'prediction': p, 'target': t} +# f.write(json.dumps(line, ensure_ascii=True) + '\n') + +# def on_validation_epoch_start(self) -> None: +# if self.enable_flash: +# replace_opt_attn_with_original_attn() +# self.saved_dict_list = [] +# self.prediction_list0 = [] +# self.target_list0 = [] +# self.prediction_list1 = [] +# self.target_list1 = [] + +# @torch.no_grad() +# def validation_step(self, batch, batch_idx, dataloader_idx=0): +# prot_batch, prompt_batch, target_dict = batch +# if (dataloader_idx % 2) == 0: +# # text_batch = batch[-1] +# # batch_size = text_batch.input_ids.shape[0] +# batch_size = len(target_dict['targets']) # ✅ 正确获取batch大小 +# loss = self.blip2(batch) +# ###============== Overall Loss ===================### +# self.log(f"dataloader{dataloader_idx}/val loss", float(loss), batch_size=batch_size, sync_dist=True) +# elif (dataloader_idx % 2) == 1: +# if (self.current_epoch+1) % self.caption_eval_epoch != 0: +# return +# # prot_batch, prompt_batch, target_dict = batch +# ###============== Captioning Results ===================### +# samples = {'prot_batch': prot_batch, 'prompt_batch': prompt_batch} +# predictions = self.blip2.generate( +# samples, +# do_sample=self.do_sample, +# num_beams=self.num_beams, +# max_length=self.max_inference_len, +# min_length=self.min_inference_len +# ) +# target_dict['predictions'] = predictions +# self.saved_dict_list.append(target_dict) + +# def gather_dict_results(self, dict_list): +# list_of_dict_list = [None for _ in range(self.trainer.world_size)] +# dist.all_gather_object(list_of_dict_list, dict_list) +# dict_list = [i for ii in list_of_dict_list for i in ii] ## dict list, each dict has values that are lists of predictions, etc. +# keys = dict_list[0].keys() +# gathered_dict = {} # each value is a list of predictions, etc. +# for key in keys: +# gathered_dict[key] = [i for d in dict_list for i in d[key]] +# dict_list = [] +# for i in range(len(gathered_dict['predictions'])): +# d = {k:gathered_dict[k][i] for k in keys} +# dict_list.append(d) +# return dict_list + +# def save_results(self, dict_list, log_prefix=""): +# ## save the results +# if log_prefix: +# name = f'{log_prefix}_predictions.txt' +# else: +# name = 'predictions.txt' +# with open(name, 'w', encoding='utf8') as f: +# for d in dict_list: +# f.write(json.dumps(d, ensure_ascii=True) + '\n') + +# def on_validation_epoch_end(self): +# if self.enable_flash: +# replace_opt_attn_with_flash_attn() +# if (self.current_epoch+1) % self.caption_eval_epoch != 0: +# return +# result_list = self.gather_dict_results(self.saved_dict_list) +# ## empty cache +# self.saved_dict_list = [] + +# if self.global_rank == 0: +# self.save_results(result_list, 'dataset0') +# all_predictions = [i['predictions'] for i in result_list] +# all_targets = [i['targets'] for i in result_list] + +# log_prefix = 'dataset0' ## fixme: this is just a placeholder +# if 'q_types' in result_list[0]: +# ## evaluate protein qa +# pass +# else: +# ## evaluate captioning +# bleu2, bleu4, rouge_1, rouge_2, rouge_l, meteor_score = \ +# caption_evaluate(all_predictions, all_targets, self.blip2.llm_tokenizer, self.max_inference_len) +# acc = evaluate_exact_match(all_predictions, all_targets) +# self.log(f"{log_prefix}/acc", acc, sync_dist=False) +# self.log(f"{log_prefix}/bleu2", bleu2, sync_dist=False) +# self.log(f"{log_prefix}/bleu4", bleu4, sync_dist=False) +# self.log(f"{log_prefix}/rouge_1", rouge_1, sync_dist=False) +# self.log(f"{log_prefix}/rouge_2", rouge_2, sync_dist=False) +# self.log(f"{log_prefix}/rouge_l", rouge_l, sync_dist=False) +# self.log(f"{log_prefix}/meteor_score", meteor_score, sync_dist=False) + +# @torch.no_grad() +# def validation_step_old(self, batch, batch_idx, dataloader_idx=0): +# if (dataloader_idx % 2) == 0: +# text_batch = batch[-1] +# batch_size = text_batch.input_ids.shape[0] +# loss = self.blip2(batch) +# ###============== Overall Loss ===================### +# self.log(f"dataloader{dataloader_idx}/val loss", float(loss), batch_size=batch_size, sync_dist=True) +# elif (dataloader_idx % 2) == 1: +# if (self.current_epoch+1) % self.caption_eval_epoch != 0: +# return +# prot_batch, prompt_batch, target_dict = batch +# ###============== Captioning Results ===================### +# samples = {'prot_batch': prot_batch, 'prompt_batch': prompt_batch} +# predictions = self.blip2.generate( +# samples, +# do_sample=self.do_sample, +# num_beams=self.num_beams, +# max_length=self.max_inference_len, +# min_length=self.min_inference_len +# ) +# if dataloader_idx // 2 == 0: +# self.prediction_list0.append(predictions) +# self.target_list0.append(target_dict) +# elif dataloader_idx // 2 == 1: +# self.prediction_list1.append(predictions) +# self.target_list1.append(target_dict) +# else: +# raise NotImplementedError +# else: +# raise NotImplementedError + +# def on_validation_epoch_end_old(self): +# if self.enable_flash: +# replace_opt_attn_with_flash_attn() +# if (self.current_epoch+1) % self.caption_eval_epoch != 0: +# return +# predictions0 = [i for ii in self.prediction_list0 for i in ii] +# targets0 = [i for ii in self.target_list0 for i in ii['answers']] +# if 'q_types' in self.target_list0[0]: +# q_types0 = [i for ii in self.target_list0 for i in ii['q_types']] +# self.reduce_and_evaluate_qa(predictions0, targets0, q_types0, 'dataset0') +# else: +# self.reduce_and_evaluate_captioning(predictions0, targets0, 'dataset0') + +# if len(self.prediction_list1) > 0: +# predictions1 = [i for ii in self.prediction_list1 for i in ii] +# targets1 = [i for ii in self.target_list1 for i in ii] +# self.reduce_and_evaluate_captioning(predictions1, targets1, 'dataset1') + +# def reduce_and_evaluate_qa(self, predictions, targets, q_types, log_prefix=""): +# all_predictions = [None for _ in range(self.trainer.world_size)] +# all_targets = [None for _ in range(self.trainer.world_size)] +# all_q_types = [None for _ in range(self.trainer.world_size)] +# dist.all_gather_object(all_predictions, predictions) +# dist.all_gather_object(all_targets, targets) +# dist.all_gather_object(all_q_types, q_types) +# if self.global_rank == 0: +# all_predictions = [i for ii in all_predictions for i in ii] +# all_targets = [i for ii in all_targets for i in ii] +# all_q_types = [i for ii in all_q_types for i in ii] +# self.save_predictions(all_predictions, all_targets, all_q_types, log_prefix=log_prefix) + +# def reduce_and_evaluate_captioning(self, predictions, targets, log_prefix=""): +# all_predictions = [None for _ in range(self.trainer.world_size)] +# all_targets = [None for _ in range(self.trainer.world_size)] +# dist.all_gather_object(all_predictions, predictions) +# dist.all_gather_object(all_targets, targets) +# if self.global_rank == 0: +# all_predictions = [i for ii in all_predictions for i in ii] +# all_targets = [i for ii in all_targets for i in ii] +# self.save_predictions(all_predictions, all_targets, log_prefix) +# ## fixme: I am not sure if the max length is the same as previous experiments +# bleu2, bleu4, rouge_1, rouge_2, rouge_l, meteor_score = \ +# caption_evaluate(all_predictions, all_targets, self.blip2.llm_tokenizer, self.max_inference_len) +# acc = evaluate_exact_match(all_predictions, all_targets) +# self.log(f"{log_prefix}/acc", acc, sync_dist=False) +# self.log(f"{log_prefix}/bleu2", bleu2, sync_dist=False) +# self.log(f"{log_prefix}/bleu4", bleu4, sync_dist=False) +# self.log(f"{log_prefix}/rouge_1", rouge_1, sync_dist=False) +# self.log(f"{log_prefix}/rouge_2", rouge_2, sync_dist=False) +# self.log(f"{log_prefix}/rouge_l", rouge_l, sync_dist=False) +# self.log(f"{log_prefix}/meteor_score", meteor_score, sync_dist=False) + +# def training_step(self, batch, batch_idx): +# if self.scheduler: +# self.scheduler.step(self.trainer.current_epoch, self.trainer.global_step) + +# #batch_size = batch[-1].input_ids.size(0) +# batch_size = len(batch[-1]['targets']) +# ###============== Overall Loss ===================### +# loss = self.blip2(batch) +# self.log("loss", float(loss), batch_size=batch_size, sync_dist=True) +# self.log("lr", self.trainer.optimizers[0].param_groups[0]['lr'], batch_size=batch_size, sync_dist=True) +# return loss + +# @staticmethod +# def add_model_specific_args(parent_parser): +# parser = parent_parser.add_argument_group("ProtBlip2") +# # train mode +# parser.add_argument('--save_every_n_epochs', type=int, default=0) + +# # Bert +# parser.add_argument('--bert_name', type=str, default='microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract') +# parser.add_argument('--cross_attention_freq', type=int, default=2) +# parser.add_argument('--num_query_token', type=int, default=8) +# # OPT +# parser.add_argument('--llm_name', type=str, default="facebook/galactica-1.3b") +# parser.add_argument('--num_beams', type=int, default=5) +# parser.add_argument('--do_sample', action='store_true', default=False) +# parser.add_argument('--max_inference_len', type=int, default=128) +# parser.add_argument('--min_inference_len', type=int, default=1) +# parser.add_argument('--llm_tune', type=str, default='freeze') +# parser.add_argument('--peft_config', type=str, default='') +# parser.add_argument('--peft_dir', type=str, default='') + +# ## plm model +# parser.add_argument('--plm_model', type=str, default='facebook/esm2_t30_150M_UR50D') +# parser.add_argument('--plm_tune', type=str, default='freeze') + +# ## lora config +# parser.add_argument('--lora_r', type=int, default=8) +# parser.add_argument('--lora_alpha', type=int, default=16) +# parser.add_argument('--lora_dropout', type=int, default=0.1) +# parser.add_argument('--enbale_gradient_checkpointing', action='store_true', default=False) + +# # optimization +# parser.add_argument('--weight_decay', type=float, default=0.05, help='optimizer weight decay') +# parser.add_argument('--init_lr', type=float, default=1e-4, help='optimizer init learning rate') +# parser.add_argument('--min_lr', type=float, default=1e-5, help='optimizer min learning rate') +# parser.add_argument('--warmup_lr', type=float, default=1e-6, help='optimizer warmup learning rate') +# parser.add_argument('--warmup_steps', type=int, default=1000, help='optimizer warmup steps') +# parser.add_argument('--lr_decay_rate', type=float, default=0.9, help='optimizer lr decay rate') +# parser.add_argument('--scheduler', type=str, default='linear_warmup_cosine_lr', help='type of scheduler') # or linear_warmup_step_lr +# parser.add_argument('--stage1_path', type=str, default='') +# parser.add_argument('--stage2_path', type=str, default='') +# parser.add_argument('--init_checkpoint', type=str, default='') +# parser.add_argument('--caption_eval_epoch', type=int, default=10) +# return parent_parser + + + +# def evaluate_exact_match(predictions, targets): +# acc = 0 +# for prediction, target in zip(predictions, targets): +# if prediction.strip() == target.strip(): +# acc += 1 +# acc = round(acc / len(predictions) * 100, 2) +# return acc \ No newline at end of file diff --git a/EasyR1-new/verl/ProtT3/help_funcs.py b/EasyR1-new/verl/ProtT3/help_funcs.py new file mode 100644 index 0000000000000000000000000000000000000000..b57a64f8045e2824c90da07a601b826224d2394b --- /dev/null +++ b/EasyR1-new/verl/ProtT3/help_funcs.py @@ -0,0 +1,112 @@ +import torch +from nltk.translate.bleu_score import corpus_bleu +from nltk.translate.meteor_score import meteor_score +from rouge_score import rouge_scorer +from tqdm import tqdm +import numpy as np + + +def caption_evaluate(predictions, targets, tokenizer, text_trunc_length): + targets = [t.strip() for t in targets] + meteor_scores = [] + references = [] + hypotheses = [] + for gt, out in tqdm(zip(targets, predictions)): + gt_tokens = tokenizer.tokenize(gt, truncation=True, max_length=text_trunc_length, + padding='max_length') + ## added for galactica + gt_tokens = list(filter(('').__ne__, gt_tokens)) + gt_tokens = list(filter(('[PAD]').__ne__, gt_tokens)) + gt_tokens = list(filter(('[CLS]').__ne__, gt_tokens)) + gt_tokens = list(filter(('[SEP]').__ne__, gt_tokens)) + + out_tokens = tokenizer.tokenize(out, truncation=True, max_length=text_trunc_length, + padding='max_length') + out_tokens = list(filter(('').__ne__, out_tokens)) + gt_tokens = list(filter(('[PAD]').__ne__, gt_tokens)) + out_tokens = list(filter(('[CLS]').__ne__, out_tokens)) + out_tokens = list(filter(('[SEP]').__ne__, out_tokens)) + + references.append([gt_tokens]) + hypotheses.append(out_tokens) + + mscore = meteor_score([gt_tokens], out_tokens) + meteor_scores.append(mscore) + + bleu2 = corpus_bleu(references, hypotheses, weights=(.5,.5)) + bleu4 = corpus_bleu(references, hypotheses, weights=(.25,.25,.25,.25)) + bleu2 *= 100 + bleu4 *= 100 + + print('BLEU-2 score:', bleu2) + print('BLEU-4 score:', bleu4) + _meteor_score = np.mean(meteor_scores) + _meteor_score *= 100 + print('Average Meteor score:', _meteor_score) + + scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL']) + + rouge_scores = [] + + references = [] + hypotheses = [] + + for gt, out in tqdm(zip(targets, predictions)): + rs = scorer.score(out, gt) + rouge_scores.append(rs) + + print('ROUGE score:') + rouge_1 = np.mean([rs['rouge1'].fmeasure for rs in rouge_scores]) * 100 + rouge_2 = np.mean([rs['rouge2'].fmeasure for rs in rouge_scores]) * 100 + rouge_l = np.mean([rs['rougeL'].fmeasure for rs in rouge_scores]) * 100 + print('rouge1:', rouge_1) + print('rouge2:', rouge_2) + print('rougeL:', rouge_l) + return bleu2, bleu4, rouge_1, rouge_2, rouge_l, _meteor_score + + +class AttrDict(dict): + def __init__(self, *args, **kwargs): + super(AttrDict, self).__init__(*args, **kwargs) + self.__dict__ = self + + +def pad_and_concat(tensor_list, fill_value=0): + ''' + concat the first dimension and pad the second dimension + tensor_list: [[B (diff), N_num, *], ...] + ''' + device = tensor_list[0].device + dtype=tensor_list[0].dtype + max_dim1 = max(t.shape[1] for t in tensor_list) + sum_dim0 = sum(t.shape[0] for t in tensor_list) + if len(tensor_list[0].shape) == 3: + out = torch.full((sum_dim0, max_dim1, tensor_list[0].shape[-1]), fill_value=fill_value, device=device, dtype=dtype) + i = 0 + for t in tensor_list: + out[i:i+t.shape[0], :t.shape[1]] = t + i += t.shape[0] + return out + elif len(tensor_list[0].shape) == 2: + out = torch.full((sum_dim0, max_dim1), fill_value=fill_value, device=device, dtype=dtype) + i = 0 + for t in tensor_list: + out[i:i+t.shape[0], :t.shape[1]] = t + i += t.shape[0] + return out + raise NotImplementedError() + + +def hf_enable_gradient_checkpointing(hf_model): + if hasattr(hf_model, "enable_input_require_grads"): + hf_model.enable_input_require_grads() + else: + + def make_inputs_require_grad(module, input, output): + output.requires_grad_(True) + + hf_model.get_input_embeddings().register_forward_hook(make_inputs_require_grad) + + # enable gradient checkpointing for memory efficiency + hf_model.gradient_checkpointing_enable() + return hf_model \ No newline at end of file diff --git a/EasyR1-new/verl/ProtT3/opt_flash_attention.py b/EasyR1-new/verl/ProtT3/opt_flash_attention.py new file mode 100644 index 0000000000000000000000000000000000000000..9129a4a88c2ecc6afc2811e9b203248002a14232 --- /dev/null +++ b/EasyR1-new/verl/ProtT3/opt_flash_attention.py @@ -0,0 +1,348 @@ +from typing import List, Optional, Tuple +import logging + +import torch +from torch import nn + +import transformers +from einops import rearrange + +from flash_attn.flash_attn_interface import flash_attn_varlen_qkvpacked_func +from flash_attn.bert_padding import unpad_input, pad_input +# from transformers.models.opt.modeling_opt import _make_causal_mask, _expand_mask +from transformers.modeling_attn_mask_utils import AttentionMaskConverter + +# 使用类方法调用 +_make_causal_mask = AttentionMaskConverter._make_causal_mask +_expand_mask = AttentionMaskConverter._expand_mask + + +def _prepare_decoder_attention_mask_original(self, attention_mask, input_shape, inputs_embeds, past_key_values_length): + # create causal mask + # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len] + combined_attention_mask = None + if input_shape[-1] > 1: + combined_attention_mask = _make_causal_mask( + input_shape, + inputs_embeds.dtype, + device=inputs_embeds.device, + past_key_values_length=past_key_values_length, + ) + + if attention_mask is not None: + # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len] + expanded_attn_mask = _expand_mask(attention_mask, inputs_embeds.dtype, tgt_len=input_shape[-1]).to( + inputs_embeds.device + ) + combined_attention_mask = ( + expanded_attn_mask if combined_attention_mask is None else expanded_attn_mask + combined_attention_mask + ) + + return combined_attention_mask + +# def _shape(tensor: torch.Tensor, seq_len: int, bsz: int) -> torch.Tensor: +# return tensor.view(bsz, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous() + +def forward_original( + self, + hidden_states: torch.Tensor, + position_ids = None, + key_value_states: Optional[torch.Tensor] = None, + past_key_value: Optional[Tuple[torch.Tensor]] = None, + attention_mask: Optional[torch.Tensor] = None, + layer_head_mask: Optional[torch.Tensor] = None, + output_attentions: bool = False, + cache_position=None, # 👈 加上这个参数 + **kwargs # 👈 建议保留以避免未来不兼容 +) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: + """Input shape: Batch x Time x Channel""" + # if key_value_states are provided this layer is used as a cross-attention layer + # for the decoder + is_cross_attention = key_value_states is not None + + bsz, tgt_len, _ = hidden_states.size() + + # get query proj + query_states = self.q_proj(hidden_states) * self.scaling + # get key, value proj + if is_cross_attention and past_key_value is not None: + # reuse k,v, cross_attentions + key_states = past_key_value[0] + value_states = past_key_value[1] + elif is_cross_attention: + # cross_attentions + key_states = self._shape(self.k_proj(key_value_states), -1, bsz) + value_states = self._shape(self.v_proj(key_value_states), -1, bsz) + elif past_key_value is not None: + # reuse k, v, self_attention + key_states = self._shape(self.k_proj(hidden_states), -1, bsz) + value_states = self._shape(self.v_proj(hidden_states), -1, bsz) + key_states = torch.cat([past_key_value[0], key_states], dim=2) + value_states = torch.cat([past_key_value[1], value_states], dim=2) + else: + # self_attention + key_states = self._shape(self.k_proj(hidden_states), -1, bsz) + value_states = self._shape(self.v_proj(hidden_states), -1, bsz) + + # if self.is_decoder: + # # if cross_attention save Tuple(torch.Tensor, torch.Tensor) of all cross attention key/value_states. + # # Further calls to cross_attention layer can then reuse all cross-attention + # # key/value_states (first "if" case) + # # if uni-directional self-attention (decoder) save Tuple(torch.Tensor, torch.Tensor) of + # # all previous decoder key/value_states. Further calls to uni-directional self-attention + # # can concat previous decoder key/value_states to current projected key/value_states (third "elif" case) + # # if encoder bi-directional self-attention `past_key_value` is always `None` + # past_key_value = (key_states, value_states) + past_key_value = (key_states, value_states) + + proj_shape = (bsz * self.num_heads, -1, self.head_dim) + query_states = self._shape(query_states, tgt_len, bsz).view(*proj_shape) + key_states = key_states.view(*proj_shape) + value_states = value_states.view(*proj_shape) + + src_len = key_states.size(1) + attn_weights = torch.bmm(query_states, key_states.transpose(1, 2)) + + if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): + raise ValueError( + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {attn_weights.size()}" + ) + + if attention_mask is not None: + if attention_mask.size() != (bsz, 1, tgt_len, src_len): + raise ValueError( + f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is {attention_mask.size()}" + ) + attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + attention_mask + attn_weights = torch.max( + attn_weights, torch.tensor(torch.finfo(attn_weights.dtype).min, device=attn_weights.device) + ) + attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) + + # upcast to fp32 if the weights are in fp16. Please see https://github.com/huggingface/transformers/pull/17437 + if attn_weights.dtype == torch.float16: + attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(torch.float16) + else: + attn_weights = nn.functional.softmax(attn_weights, dim=-1) + + if layer_head_mask is not None: + if layer_head_mask.size() != (self.num_heads,): + raise ValueError( + f"Head mask for a single layer should be of size {(self.num_heads,)}, but is" + f" {layer_head_mask.size()}" + ) + attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) + + if output_attentions: + # this operation is a bit awkward, but it's required to + # make sure that attn_weights keeps its gradient. + # In order to do so, attn_weights have to be reshaped + # twice and have to be reused in the following + attn_weights_reshaped = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + attn_weights = attn_weights_reshaped.view(bsz * self.num_heads, tgt_len, src_len) + else: + attn_weights_reshaped = None + + attn_probs = nn.functional.dropout(attn_weights, p=self.dropout, training=self.training) + + attn_output = torch.bmm(attn_probs, value_states) + + if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): + raise ValueError( + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {attn_output.size()}" + ) + + attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim) + attn_output = attn_output.transpose(1, 2) + + # Use the `embed_dim` from the config (stored in the class) rather than `hidden_state` because `attn_output` can be + # partitioned aross GPUs when using tensor-parallelism. + attn_output = attn_output.reshape(bsz, tgt_len, self.embed_dim) + + attn_output = self.out_proj(attn_output) + + return attn_output, attn_weights_reshaped, past_key_value + + +def forward( + self, + hidden_states: torch.Tensor, + key_value_states: Optional[torch.Tensor] = None, + past_key_value: Optional[Tuple[torch.Tensor]] = None, + attention_mask: Optional[torch.Tensor] = None, + layer_head_mask: Optional[torch.Tensor] = None, + output_attentions: bool = False, + position_ids = None, + cache_position=None, # 👈 加上这个参数 + **kwargs # 👈 建议保留以避免未来不兼容 +) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: + """Input shape: Batch x Time x Channel""" + + # if key_value_states are provided this layer is used as a cross-attention layer + # for the decoder + is_cross_attention = key_value_states is not None + assert not is_cross_attention, "Cross attention is not supported for flash attention" + assert past_key_value is None, "past_key_value is not None is not supported for flash attention" + assert not output_attentions, "output_attentions is not supported for flash attention" + + bsz, tgt_len, _ = hidden_states.size() + + # get query proj + query_states = self.q_proj(hidden_states) * self.scaling + # get key, value proj + + if past_key_value is not None: + # reuse k, v, self_attention + key_states = self._shape(self.k_proj(hidden_states), -1, bsz) + value_states = self._shape(self.v_proj(hidden_states), -1, bsz) + key_states = torch.cat([past_key_value[0], key_states], dim=2) + value_states = torch.cat([past_key_value[1], value_states], dim=2) + else: + # self_attention + key_states = self._shape(self.k_proj(hidden_states), -1, bsz) + value_states = self._shape(self.v_proj(hidden_states), -1, bsz) + + # if self.is_decoder: + # # if cross_attention save Tuple(torch.Tensor, torch.Tensor) of all cross attention key/value_states. + # # Further calls to cross_attention layer can then reuse all cross-attention + # # key/value_states (first "if" case) + # # if uni-directional self-attention (decoder) save Tuple(torch.Tensor, torch.Tensor) of + # # all previous decoder key/value_states. Further calls to uni-directional self-attention + # # can concat previous decoder key/value_states to current projected key/value_states (third "elif" case) + # # if encoder bi-directional self-attention `past_key_value` is always `None` + # past_key_value = (key_states, value_states) + past_key_value = (key_states, value_states) + + + proj_shape = (bsz * self.num_heads, -1, self.head_dim) + query_states = self._shape(query_states, tgt_len, bsz).view(*proj_shape) + key_states = key_states.view(*proj_shape) + value_states = value_states.view(*proj_shape) + + ## for flash attention + flash_shape = (bsz, self.num_heads, tgt_len, self.head_dim) + query_states = query_states.view(*flash_shape) + key_states = key_states.view(*flash_shape) + value_states = value_states.view(*flash_shape) + qkv = torch.stack([query_states, key_states, value_states], dim=2) # shape = [bsz, num_heads, 3, tgt_len, head_dim] + qkv = qkv.transpose(1, 3) # [bsz, tgt_len, 3, num_heads, head_dim] + + key_padding_mask = attention_mask + + + assert key_padding_mask is not None + x = rearrange(qkv, "b s three h d -> b s (three h d)") + x_unpad, indices, cu_seqlens, max_s = unpad_input(x, key_padding_mask) + x_unpad = rearrange(x_unpad, 'nnz (three h d) -> nnz three h d', three=3, h=self.num_heads) + output_unpad = flash_attn_varlen_qkvpacked_func( + x_unpad, cu_seqlens, max_s, self.dropout if self.training else 0.0, + softmax_scale=1, causal=True, return_attn_probs=False + ) + + output = rearrange(pad_input(rearrange(output_unpad, 'nnz h d -> nnz (h d)'), + indices, bsz, tgt_len), + 'b s (h d) -> b s h d', h=self.num_heads) + + attn_output = self.out_proj(rearrange(output, "b s h d -> b s (h d)")) + return attn_output, None, past_key_value + + +# Disable the transformation of the attention mask in LlamaModel as the flash attention +# requires the attention mask to be the same as the key_padding_mask +def _prepare_decoder_attention_mask( + self, attention_mask, input_shape, inputs_embeds, past_key_values_length +): + # [bsz, seq_len] + return attention_mask + + +def replace_opt_attn_with_flash_attn(): + cuda_major, cuda_minor = torch.cuda.get_device_capability() + if cuda_major < 8: + logging.warning( + "Flash attention is only supported on A100 or H100 GPU during training due to head dim > 64 backward." + "ref: https://github.com/HazyResearch/flash-attention/issues/190#issuecomment-1523359593" + ) + transformers.models.opt.modeling_opt.OPTDecoder._prepare_decoder_attention_mask = _prepare_decoder_attention_mask + transformers.models.opt.modeling_opt.OPTAttention.forward = forward + +def replace_opt_attn_with_original_attn(): + transformers.models.opt.modeling_opt.OPTDecoder._prepare_decoder_attention_mask = _prepare_decoder_attention_mask_original + transformers.models.opt.modeling_opt.OPTAttention.forward = forward_original + +if __name__ == '__main__': + ## generate tests to verify the equivalence between forward_original and forward + import torch.nn as nn + import math + class FakeNN(nn.Module): + def __init__(self, ): + super().__init__() + self.scaling = 1 / math.sqrt(2048) + if False: + self.q_proj = nn.Linear(2048, 2048) + self.k_proj = nn.Linear(2048, 2048) + self.v_proj = nn.Linear(2048, 2048) + self.out_proj = nn.Linear(2048, 2048) + else: + self.q_proj = nn.Identity() + self.k_proj = nn.Identity() + self.v_proj = nn.Identity() + self.out_proj = nn.Identity() + + self.is_decoder = True + self.num_heads = 2 + self.head_dim = 128 + self.embed_dim = 256 + self.dropout = 0 + + def _prepare_decoder_attention_mask(self, attention_mask, input_shape, inputs_embeds, past_key_values_length): + # create causal mask + # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len] + combined_attention_mask = None + if input_shape[-1] > 1: + combined_attention_mask = _make_causal_mask( + input_shape, + inputs_embeds.dtype, + device=inputs_embeds.device, + past_key_values_length=past_key_values_length, + ) + + if attention_mask is not None: + # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len] + expanded_attn_mask = _expand_mask(attention_mask, inputs_embeds.dtype, tgt_len=input_shape[-1]).to( + inputs_embeds.device + ) + combined_attention_mask = ( + expanded_attn_mask if combined_attention_mask is None else expanded_attn_mask + combined_attention_mask + ) + + return combined_attention_mask + + def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): + return tensor.view(bsz, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous() + + fakenn = FakeNN().to(torch.bfloat16).to('cuda:0') + + t_len = 3 + fake_input = torch.randn(2, t_len, fakenn.embed_dim).to(torch.bfloat16).to('cuda:0') + if False: + fake_lens = torch.randint(0, t_len, (2,)).to('cuda:0') + fake_lens = torch.LongTensor([3, 2]).to('cuda:0') + # fake_lens = torch.ones((2,)).to('cuda:0') * 3 + fake_mask = torch.arange(t_len).unsqueeze(0).to('cuda:0') < fake_lens.unsqueeze(1) + else: + fake_mask = torch.randint(0, t_len, (2, t_len)).bool().to('cuda:0') + + fake_mask2 = fakenn._prepare_decoder_attention_mask(fake_mask, (2,t_len), fake_input, 0) + attn_output0, _, _ = forward_original(fakenn, fake_input, None, None, fake_mask2, None, False) + attn_output1, _, _ = forward(fakenn, fake_input, None, None, fake_mask, None, False) # shape = [2, 3, 256] + attn_output0 = attn_output0 * fake_mask.unsqueeze(-1) + + print(torch.isclose(attn_output0, attn_output1).all()) + print(attn_output0.shape, attn_output1.shape) + difference = (attn_output0- attn_output1).abs() + print(difference) + print(difference.sum()) \ No newline at end of file diff --git a/EasyR1-new/verl/__init__.py b/EasyR1-new/verl/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d1ca74ff322af7ea582be946a184e119681c71e2 --- /dev/null +++ b/EasyR1-new/verl/__init__.py @@ -0,0 +1,32 @@ +# Copyright 2024 Bytedance Ltd. and/or its affiliates +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from .utils.py_functional import is_package_available + + +if is_package_available("modelscope"): + from modelscope.utils.hf_util import patch_hub # type: ignore + + +__version__ = "0.3.2.dev0" + + +if os.getenv("USE_MODELSCOPE_HUB", "0").lower() in ["true", "y", "1"]: + # Patch hub to download models from modelscope to speed up. + if not is_package_available("modelscope"): + raise ImportError("You are using the modelscope hub, please install modelscope by `pip install modelscope`.") + + patch_hub() diff --git a/EasyR1-new/verl/protocol.py b/EasyR1-new/verl/protocol.py new file mode 100644 index 0000000000000000000000000000000000000000..7cce4344a3ca8493c8eb75ba3be53a5806e2c83c --- /dev/null +++ b/EasyR1-new/verl/protocol.py @@ -0,0 +1,741 @@ +# Copyright 2024 Bytedance Ltd. and/or its affiliates +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Implement base data transfer protocol between any two functions, modules. +We can subclass Protocol to define more detailed batch info with specific keys +""" + +import copy +import io +import pickle +from collections import defaultdict +from dataclasses import dataclass, field +from typing import Any, Callable, Dict, List, Optional, Tuple, Union + +import numpy as np +import ray +import torch +from numpy.typing import NDArray +from tensordict import TensorDict +from torch.distributed import ProcessGroup +from torch.utils.data import DataLoader + +from .utils.py_functional import union_two_dict + + +try: + import tensordict + + tensordict.set_lazy_legacy(False).set() +except Exception: + pass + + +__all__ = ["DataProto", "union_tensor_dict"] + + +def pad_dataproto_to_divisor(data: "DataProto", size_divisor: int) -> Tuple["DataProto", int]: + """Pad a DataProto to size divisible by size_divisor + + Args: + data (DataProto): the unpadded DataProto + size_divisor (int): size divisor + + Returns: + data (DataProto): the padded DataProto + pad_size (int) + """ + assert isinstance(data, DataProto), "data must be a DataProto" + if len(data) % size_divisor != 0: + pad_size = size_divisor - len(data) % size_divisor + padding_protos = [] + remaining_pad = pad_size + while remaining_pad > 0: + take_size = min(remaining_pad, len(data)) + padding_protos.append(data[:take_size]) + remaining_pad -= take_size + + data_padded = DataProto.concat([data] + padding_protos) + else: + pad_size = 0 + data_padded = data + + return data_padded, pad_size + + +def unpad_dataproto(data: "DataProto", pad_size: int) -> "DataProto": + if pad_size != 0: + data = data[:-pad_size] + + return data + + +def union_tensor_dict(tensor_dict1: TensorDict, tensor_dict2: TensorDict) -> TensorDict: + """Union two tensordicts.""" + if tensor_dict1.batch_size != tensor_dict2.batch_size: + raise ValueError( + f"Two tensor dict must have identical batch size. Got {tensor_dict1.batch_size} and {tensor_dict2.batch_size}" + ) + + for key in tensor_dict2.keys(): + if key in tensor_dict1 and not torch.equal(tensor_dict1[key], tensor_dict2[key]): + raise ValueError(f"Key already exists: {key}.") + + tensor_dict1[key] = tensor_dict2[key] + + return tensor_dict1 + + +def union_numpy_dict(tensor_dict1: Dict[str, NDArray], tensor_dict2: Dict[str, NDArray]) -> Dict[str, NDArray]: + for key in tensor_dict2.keys(): + if key in tensor_dict1: + assert isinstance(tensor_dict2[key], np.ndarray) + assert isinstance(tensor_dict1[key], np.ndarray) + if not np.all(tensor_dict1[key] == tensor_dict2[key]): + raise ValueError(f"Key already exists: {key}.") + + tensor_dict1[key] = tensor_dict2[key] + + return tensor_dict1 + + +def batch_collate(features: List[Dict[str, Any]]) -> Dict[str, List[Any]]: + if len(features) == 0: + return {} + + batch_features = defaultdict(list) + for feature in features: + for key, value in feature.items(): + batch_features[key].append(value) + + return batch_features + + +def fold_batch_dim(data: "DataProto", new_batch_size: int): + """ + Fold a batch dim from [bsz, xxx] into [new_bsz, bsz // new_bsz, xxx] + """ + batch_size = data.batch.batch_size[0] + + assert batch_size % new_batch_size == 0 + + tensor: TensorDict = data.batch + non_tensor = data.non_tensor_batch + + tensor = tensor.view(new_batch_size, -1) + tensor.auto_batch_size_(batch_dims=1) + + for key, value in non_tensor.items(): + non_tensor[key] = np.reshape(value, newshape=(new_batch_size, -1, *value.shape[1:])) + + return DataProto(batch=tensor, non_tensor_batch=non_tensor, meta_info=data.meta_info) + + +def collate_fn(data_items: list["DataProtoItem"]): + batch = [] + non_tensor_batch = [] + for data in data_items: + batch.append(data.batch) + non_tensor_batch.append(data.non_tensor_batch) + + batch = torch.stack(batch).contiguous() + non_tensor_batch = batch_collate(non_tensor_batch) + non_tensor_batch = {key: np.array(value, dtype=object) for key, value in non_tensor_batch.items()} + return DataProto(batch=batch, non_tensor_batch=non_tensor_batch) + + +@dataclass +class DataProtoItem: + batch: Optional[TensorDict] = None + non_tensor_batch: Dict[str, NDArray] = field(default_factory=dict) + meta_info: Dict[str, Any] = field(default_factory=dict) + + +@dataclass +class DataProto: + """ + A DataProto is a data structure that aims to provide a standard protocol for data exchange between functions. + It contains a batch (TensorDict) and a meta_info (Dict). The batch is a TensorDict https://pytorch.org/tensordict/. + TensorDict allows you to manipulate a dictionary of Tensors like a single Tensor. Ideally, the tensors with the + same batch size should be put inside batch. + """ + + batch: Optional[TensorDict] = None + non_tensor_batch: Dict[str, NDArray] = field(default_factory=dict) + meta_info: Dict[str, Any] = field(default_factory=dict) + + def __post_init__(self): + self.check_consistency() # perform necessary checking + + def __len__(self) -> int: + if self.batch is not None: + return self.batch.batch_size[0] + elif self.non_tensor_batch is not None and len(self.non_tensor_batch) > 0: + pivot_key = list(self.non_tensor_batch.keys())[0] + return self.non_tensor_batch[pivot_key].shape[0] + else: + return 0 + + def __getitem__( + self, item: Union[int, slice, List[int], np.ndarray, torch.Tensor] + ) -> Union["DataProto", "DataProtoItem"]: + if isinstance(item, slice): + return self.slice_select(item.start, item.stop, item.step) + + if isinstance(item, (list, np.ndarray, torch.Tensor)): + return self.index_select(item) + + tensor_data = self.batch[item] if self.batch is not None else None + non_tensor_data = {key: value[item] for key, value in self.non_tensor_batch.items()} + return DataProtoItem(batch=tensor_data, non_tensor_batch=non_tensor_data, meta_info=self.meta_info) + + def __getstate__(self) -> Tuple[bytes, Dict[str, NDArray], Dict[str, Any]]: + buffer = io.BytesIO() + if self.batch is not None: + self.batch: TensorDict = self.batch.contiguous() + self.batch: TensorDict = self.batch.consolidate() + + torch.save(self.batch, buffer) + buffer_bytes = buffer.getvalue() + return buffer_bytes, self.non_tensor_batch, self.meta_info + + def __setstate__(self, data: Tuple[bytes, Dict[str, NDArray], Dict[str, Any]]) -> None: + batch_deserialized_bytes, non_tensor_batch, meta_info = data + batch_deserialized = io.BytesIO(batch_deserialized_bytes) + batch = torch.load(batch_deserialized, weights_only=False, map_location="cpu") + self.batch = batch + self.non_tensor_batch = non_tensor_batch + self.meta_info = meta_info + + def save_to_disk(self, filepath: str) -> None: + with open(filepath, "wb") as f: + pickle.dump(self, f) + + @staticmethod + def load_from_disk(filepath: str) -> "DataProto": + with open(filepath, "rb") as f: + data = pickle.load(f) + return data + + def print_size(self, prefix: str = "") -> None: + size_of_tensordict = 0 + if self.batch is not None: + for tensor in self.batch.values(): + if isinstance(tensor, torch.Tensor): + size_of_tensordict += tensor.element_size() * tensor.numel() + + size_of_numpy_array = 0 + for value in self.non_tensor_batch.values(): + size_of_numpy_array += value.nbytes + + size_of_numpy_array /= 1024**3 + size_of_tensordict /= 1024**3 + + message = f"Size of tensordict: {size_of_tensordict} GB, size of non_tensor_batch: {size_of_numpy_array} GB." + print({prefix}, {message}) + + def check_consistency(self): + """Check the consistency of the DataProto. Mainly for batch and non_tensor_batch + We expose this function as a public one so that user can call themselves directly + """ + if self.batch is not None: + assert len(self.batch.batch_size) == 1, "only support num_batch_dims=1" + + if self.batch is not None and len(self.non_tensor_batch) != 0: + # TODO: we can actually lift this restriction if needed + assert len(self.batch.batch_size) == 1, "only support num_batch_dims=1 when non_tensor_batch is not empty." + + batch_size = self.batch.batch_size[0] + for key, value in self.non_tensor_batch.items(): + assert len(value) == batch_size, f"key {key} length {len(value)} is not equal to bsz {batch_size}." + + @classmethod + def from_single_dict( + cls, + data: Dict[str, Union[torch.Tensor, NDArray]], + meta_info: Optional[Dict[str, Any]] = None, + ) -> "DataProto": + tensors, non_tensors = {}, {} + for key, value in data.items(): + print("key") + print(key) + print("value") + print(value) + if isinstance(value, torch.Tensor): + tensors[key] = value + elif isinstance(value, np.ndarray): + non_tensors[key] = value + else: + raise ValueError(f"Unsupported type in data {type(value)}") + + return DataProto.from_dict(tensors=tensors, non_tensors=non_tensors, meta_info=meta_info) + + @classmethod + def from_dict( + cls, + tensors: Optional[Dict[str, torch.Tensor]] = None, + non_tensors: Optional[Dict[str, NDArray]] = None, + meta_info: Optional[Dict[str, Any]] = None, + num_batch_dims: int = 1, + ) -> "DataProto": + """Create a DataProto from a dict of tensors. This assumes that + 1. All the tensor in tensors have the same dim0 + 2. Only dim0 is the batch dim + """ + assert num_batch_dims > 0, "num_batch_dims must be greater than zero" + if non_tensors is not None: + assert num_batch_dims == 1, "only support num_batch_dims=1 when non_tensors is not None." + + tensors = tensors or {} + non_tensors = non_tensors or {} + meta_info = meta_info or {} + assert isinstance(tensors, dict) and isinstance(non_tensors, dict) and isinstance(meta_info, dict) + + # get and check batch size + batch_size = None + pivot_key = None + for key, tensor in tensors.items(): + if batch_size is None: + batch_size = tensor.shape[:num_batch_dims] + pivot_key = key + else: + current_batch = tensor.shape[:num_batch_dims] + assert batch_size == current_batch, ( + f"Not all the tensor in tensors have the same batch size with batch_dims={num_batch_dims}. " + f"Got {pivot_key} has {batch_size}, {key} has {current_batch}" + ) + + for key, value in non_tensors.items(): + if not isinstance(value, np.ndarray) or value.dtype != np.dtype(object): + non_tensors[key] = np.array(value, dtype=object) + + tensor_dict = TensorDict(source=tensors, batch_size=batch_size) if tensors else None + return cls(batch=tensor_dict, non_tensor_batch=non_tensors, meta_info=meta_info) + + def to(self, device: torch.device) -> "DataProto": + """move the batch to device + + Args: + device (torch.device, str): torch device + + Returns: + DataProto: the current DataProto + + """ + if self.batch is not None: + self.batch = self.batch.to(device) + + return self + + def select( + self, + batch_keys: Optional[List[str]] = None, + non_tensor_batch_keys: Optional[List[str]] = None, + meta_info_keys: Optional[List[str]] = None, + deepcopy: bool = False, + ) -> "DataProto": + """Select a subset of the DataProto via batch_keys and meta_info_keys + + Args: + batch_keys (list, optional): a list of strings indicating the keys in batch to select + meta_info_keys (list, optional): a list of keys indicating the meta info to select + + Returns: + DataProto: the DataProto with the selected batch_keys and meta_info_keys + """ + # TODO (zhangchi.usc1992) whether to copy + if batch_keys is not None: + batch_keys = tuple(filter(lambda k: k in self.batch, batch_keys)) + sub_batch = self.batch.select(*batch_keys) + else: + sub_batch = self.batch + + if non_tensor_batch_keys is not None: + # we must convert it to tuple to avoid the missing elements + non_tensor_batch_keys = tuple(filter(lambda k: k in self.non_tensor_batch, non_tensor_batch_keys)) + non_tensor_batch = {k: v for k, v in self.non_tensor_batch.items() if k in non_tensor_batch_keys} + else: + non_tensor_batch = self.non_tensor_batch + + if deepcopy: + non_tensor_batch = copy.deepcopy(non_tensor_batch) + + if meta_info_keys is not None: + meta_info_keys = tuple(filter(lambda k: k in self.meta_info, meta_info_keys)) + sub_meta_info = {k: v for k, v in self.meta_info.items() if k in meta_info_keys} + else: + sub_meta_info = self.meta_info + + if deepcopy: + sub_meta_info = copy.deepcopy(sub_meta_info) + + return DataProto(batch=sub_batch, non_tensor_batch=non_tensor_batch, meta_info=sub_meta_info) + + def index_select(self, index: Union[List[int], NDArray, torch.Tensor]) -> "DataProto": + """Select a subset of the DataProto via index. + + Args: + index (list, ndarray, torch.Tensor): a list of indices to select. + + Returns: + DataProto: the DataProto containing the selected indices. + """ + if isinstance(index, list): + index = np.array(index, dtype=bool if isinstance(index[0], bool) else np.int32) + elif isinstance(index, torch.Tensor): + index = index.detach().cpu().numpy() + + tensor_data = self.batch[index] if self.batch is not None else None + non_tensor_data = {key: value[index] for key, value in self.non_tensor_batch.items()} + return DataProto(batch=tensor_data, non_tensor_batch=non_tensor_data, meta_info=self.meta_info) + + def slice_select( + self, start: Optional[int] = None, end: Optional[int] = None, step: Optional[int] = None + ) -> "DataProto": + """Select a subset of the DataProto via slice. + + Args: + start (int, optional): the start index of the slice. + end (int, optional): the end index of the slice. + step (int, optional): the step of the slice. + + Returns: + DataProto: the DataProto containing the selected slice. + """ + index = slice(start, end, step) + tensor_data = self.batch[index] if self.batch is not None else None + non_tensor_data = {key: value[index] for key, value in self.non_tensor_batch.items()} + return DataProto(batch=tensor_data, non_tensor_batch=non_tensor_data, meta_info=self.meta_info) + + def pop( + self, + batch_keys: Optional[List[str]] = None, + non_tensor_batch_keys: Optional[List[str]] = None, + meta_info_keys: Optional[List[str]] = None, + ) -> "DataProto": + """Pop a subset of the DataProto via `batch_keys` and `meta_info_keys` + + Args: + batch_keys (list, optional): a list of strings indicating the keys in batch to pop + meta_info_keys (list, optional): a list of keys indicating the meta info to pop + + Returns: + DataProto: the DataProto with the poped batch_keys and meta_info_keys + """ + assert batch_keys is not None + non_tensor_batch_keys = non_tensor_batch_keys or [] + meta_info_keys = meta_info_keys or [] + + tensors = {} + for key in filter(lambda k: k in self.batch, batch_keys): + tensors[key] = self.batch.pop(key) + + non_tensors = {} + for key in filter(lambda k: k in self.non_tensor_batch, non_tensor_batch_keys): + non_tensors[key] = self.non_tensor_batch.pop(key) + + meta_info = {} + for key in filter(lambda k: k in self.meta_info, meta_info_keys): + meta_info[key] = self.meta_info.pop(key) + + return DataProto.from_dict(tensors=tensors, non_tensors=non_tensors, meta_info=meta_info) + + def rename( + self, old_keys: Optional[Union[str, List[str]]] = None, new_keys: Optional[Union[str, List[str]]] = None + ) -> "DataProto": + """ + Note that this function only rename the key in the batch + """ + + def validate_input(keys): + if keys is not None: + if isinstance(keys, str): + keys = [keys] + elif isinstance(keys, list): + pass + else: + raise TypeError(f"keys must be a list or a string, but got {type(keys)}") + return keys + + old_keys = validate_input(old_keys) + new_keys = validate_input(new_keys) + + if len(new_keys) != len(old_keys): + raise ValueError( + f"new_keys and old_keys must have the same length, but got {len(new_keys)} and {len(old_keys)}" + ) + + self.batch.rename_key_(tuple(old_keys), tuple(new_keys)) + + return self + + def union(self, other: "DataProto") -> "DataProto": + """Union with another DataProto. Union batch and meta_info separately. + Throw an error if + - there are conflict keys in batch and they are not equal + - the batch size of two data batch is not the same + - there are conflict keys in meta_info and they are not the same. + + Args: + other (DataProto): another DataProto to union + + Returns: + DataProto: the DataProto after union + """ + self.batch = union_tensor_dict(self.batch, other.batch) + self.non_tensor_batch = union_numpy_dict(self.non_tensor_batch, other.non_tensor_batch) + self.meta_info = union_two_dict(self.meta_info, other.meta_info) + return self + + def make_iterator( + self, mini_batch_size: int, epochs: int, seed: int = None, dataloader_kwargs: Dict[str, Any] = None + ): + """Make an iterator from the DataProto. This is built upon that TensorDict can be used as a normal Pytorch + dataset. See https://pytorch.org/tensordict/tutorials/data_fashion for more details. + + Args: + mini_batch_size (int): mini-batch size when iterating the dataset. We require that + ``batch.batch_size[0] % mini_batch_size == 0`` + epochs (int): number of epochs when iterating the dataset. + dataloader_kwargs: internally, it returns a DataLoader over the batch. + The dataloader_kwargs is the kwargs passed to the DataLoader + + Returns: + Iterator: an iterator that yields a mini-batch data at a time. The total number of iteration steps is + ``self.batch.batch_size * epochs // mini_batch_size`` + """ + assert self.batch.batch_size[0] % mini_batch_size == 0, f"{self.batch.batch_size[0]} % {mini_batch_size} != 0" + if seed is not None: + generator = torch.Generator() + generator.manual_seed(seed) + else: + generator = None + + dataloader_kwargs = dataloader_kwargs or {} + assert isinstance(dataloader_kwargs, dict) + train_dataloader = DataLoader( + dataset=self, + batch_size=mini_batch_size, + collate_fn=collate_fn, + generator=generator, + **dataloader_kwargs, + ) + + def get_data(): + for _ in range(epochs): + for data in train_dataloader: + setattr(data, "meta_info", self.meta_info) + yield data + + return iter(get_data()) + + def chunk(self, chunks: int) -> List["DataProto"]: + """Split the batch among dim=0 into chunks. The meta_info is passed to each DataProto after split. + + Args: + chunks (int): the number of chunks to split on dim=0 + + Returns: + List[DataProto]: a list of DataProto after splitting + """ + assert len(self) % chunks == 0, ( + f"only support equal chunk. Got size of DataProto {len(self)} and chunk {chunks}." + ) + if self.batch is not None: + batch_lst = self.batch.chunk(chunks=chunks, dim=0) + else: + batch_lst = [None for _ in range(chunks)] + + non_tensor_batch_lst = [{} for _ in range(chunks)] + for key, value in self.non_tensor_batch.items(): + non_tensor_lst = np.array_split(value, chunks) + for i in range(chunks): + non_tensor_batch_lst[i][key] = non_tensor_lst[i] + + return [ + DataProto(batch=batch_lst[i], non_tensor_batch=non_tensor_batch_lst[i], meta_info=self.meta_info) + for i in range(chunks) + ] + + def split(self, split_size: int) -> List["DataProto"]: + """Split the batch among dim=0 into chunks. The meta_info is passed to each DataProto after split. + + Args: + split_size (int): the size of each split + + Returns: + List[DataProto]: a list of DataProto after splitting + """ + chunks = len(self) // split_size + return self.chunk(chunks) + + @staticmethod + def concat(data: List["DataProto"]) -> "DataProto": + """Concat a list of DataProto. The batch is concatenated among dim=0. + The meta_info is assumed to be identical and will use the first one. + + Args: + data (List[DataProto]): list of DataProto + + Returns: + DataProto: concatenated DataProto + """ + batch_lst = [batch.batch for batch in data] + new_batch = torch.cat(batch_lst, dim=0) if batch_lst[0] is not None else None + non_tensor_batch = batch_collate([d.non_tensor_batch for d in data]) + for key, value in non_tensor_batch.items(): + non_tensor_batch[key] = np.concatenate(value, axis=0) + + return DataProto(batch=new_batch, non_tensor_batch=non_tensor_batch, meta_info=data[0].meta_info) + + def reorder(self, indices: torch.Tensor) -> None: + """ + Note that this operation is in-place + """ + indices_np = indices.detach().numpy() + self.batch = self.batch[indices] + self.non_tensor_batch = {key: value[indices_np] for key, value in self.non_tensor_batch.items()} + + def repeat(self, repeat_times: int, interleave: bool = True) -> "DataProto": + """ + Repeat the batch data a specified number of times. + + Args: + repeat_times (int): Number of times to repeat the data. + interleave (bool): Whether to interleave the repeated data. + + Returns: + DataProto: A new DataProto with repeated data. + """ + if self.batch is not None: + if interleave: # interleave the data + repeated_tensors = { + key: tensor.repeat_interleave(repeat_times, dim=0) for key, tensor in self.batch.items() + } + else: # stack the data + repeated_tensors = { + key: tensor.unsqueeze(0).expand(repeat_times, *tensor.shape).reshape(-1, *tensor.shape[1:]) + for key, tensor in self.batch.items() + } + + repeated_batch = TensorDict( + source=repeated_tensors, + batch_size=(self.batch.batch_size[0] * repeat_times,), + ) + else: + repeated_batch = None + + repeated_non_tensor_batch = {} + for key, value in self.non_tensor_batch.items(): + if interleave: + repeated_non_tensor_batch[key] = np.repeat(value, repeat_times, axis=0) + else: + repeated_non_tensor_batch[key] = np.tile(value, (repeat_times,) + (1,) * (value.ndim - 1)) + + return DataProto( + batch=repeated_batch, + non_tensor_batch=repeated_non_tensor_batch, + meta_info=self.meta_info, + ) + + +@dataclass +class DataProtoFuture: + """ + DataProtoFuture aims to eliminate actual data fetching on driver. By doing so, the driver doesn't have to wait + for data so that asynchronous execution becomes possible. + DataProtoFuture contains a list of futures from another WorkerGroup of size world_size. + - collect_fn is a Callable that reduces the list of futures to a DataProto + - dispatch_fn is a Callable that partitions the DataProto into a list of DataProto of size world_size and then select + + Potential issue: we can optimize dispatch_fn(collect_fn) such that only needed data is fetched on destination + - DataProtoFuture only supports directly passing from the output of a method to another input. You can't perform any + operation on the DataProtoFuture in driver. + """ + + collect_fn: Callable + futures: List[ray.ObjectRef] + dispatch_fn: Callable = None + + @staticmethod + def concat(data: List[ray.ObjectRef]) -> "DataProtoFuture": + output = DataProtoFuture(collect_fn=DataProto.concat, futures=data) + return output + + def chunk(self, chunks: int) -> List["DataProtoFuture"]: + from functools import partial + + arg_future_lst = [] + for i in range(chunks): + # note that we can't directly pass i and chunks + def dispatch_fn(x, i, chunks): + return x.chunk(chunks=chunks)[i] + + arg_future = DataProtoFuture( + collect_fn=self.collect_fn, dispatch_fn=partial(dispatch_fn, i=i, chunks=chunks), futures=self.futures + ) + arg_future_lst.append(arg_future) + return arg_future_lst + + def get(self): + outputs = ray.get(self.futures) # dp_size + for output in outputs: + assert isinstance(output, DataProto) + + outputs = self.collect_fn(outputs) # select dp, concat + if self.dispatch_fn is not None: + outputs = self.dispatch_fn(outputs) # split in batch dim, select using dp + + return outputs + + +def allgather_dict_tensors( + tensors: Union[Dict[str, torch.Tensor], TensorDict], size: int, group: ProcessGroup, dim: int = 0 +) -> Union[Dict[str, torch.Tensor], TensorDict]: + """ + TODO: optimize this. + - We can use async ops + - We can use only one allgather + """ + if isinstance(tensors, TensorDict): + is_tensor_dict = True + tensors_as_dict = tensors.to_dict() + else: + tensors_as_dict = tensors + is_tensor_dict = False + + output = {} + sorted_keys = sorted(tensors_as_dict.keys()) + for key in sorted_keys: + value = tensors_as_dict[key] + output[key] = [torch.empty_like(value) for _ in range(size)] + torch.distributed.all_gather(output[key], value, group=group, async_op=False) + output[key] = torch.cat(output[key], dim=dim) + + if is_tensor_dict: + output = TensorDict(source=output, batch_size=tensors.batch_size[0] * size) + + return output + + +def all_gather_data_proto(data: DataProto, size: int, group: ProcessGroup) -> None: + # Note that this is an inplace operator just like torch.distributed.all_gather + prev_device = data.batch.device + data.batch = data.batch.cuda(device=torch.cuda.current_device()) + data.batch = allgather_dict_tensors(data.batch.contiguous(), size=size, group=group, dim=0) + data.batch = data.batch.to(prev_device) + # all gather non_tensor_batch + all_non_tensor_batch = [None for _ in range(size)] + torch.distributed.all_gather_object(all_non_tensor_batch, data.non_tensor_batch, group=group) + data.non_tensor_batch = {k: np.concatenate([d[k] for d in all_non_tensor_batch]) for k in data.non_tensor_batch}