BudgetThinker_backup / easyr1 /verl /utils /logger /gen_logger.py

Upload folder using huggingface_hub

7155cf2 verified 4 months ago

3.48 kB

	# Copyright 2024 Bytedance Ltd. and/or its affiliates
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.


	from abc import ABC, abstractmethod
	from dataclasses import dataclass
	from typing import List, Tuple

	from ..py_functional import is_package_available


	if is_package_available("wandb"):
	import wandb # type: ignore


	if is_package_available("swanlab"):
	import swanlab # type: ignore


	@dataclass
	class GenerationLogger(ABC):
	@abstractmethod
	def log(self, samples: List[Tuple[str, str, str, float]], step: int) -> None: ...


	@dataclass
	class ConsoleGenerationLogger(GenerationLogger):
	def log(self, samples: List[Tuple[str, str, str, float]], step: int) -> None:
	for inp, out, lab, score in samples:
	print(f"[prompt] {inp}\n[output] {out}\n[ground_truth] {lab}\n[score] {score}\n")


	@dataclass
	class WandbGenerationLogger(GenerationLogger):
	def log(self, samples: List[Tuple[str, str, str, float]], step: int) -> None:
	# Create column names for all samples
	columns = ["step"] + sum(
	[[f"input_{i + 1}", f"output_{i + 1}", f"label_{i + 1}", f"score_{i + 1}"] for i in range(len(samples))],
	[],
	)

	if not hasattr(self, "validation_table"):
	# Initialize the table on first call
	self.validation_table = wandb.Table(columns=columns)

	# Create a new table with same columns and existing data
	# Workaround for https://github.com/wandb/wandb/issues/2981#issuecomment-1997445737
	new_table = wandb.Table(columns=columns, data=self.validation_table.data)

	# Add new row with all data
	row_data = [step]
	for sample in samples:
	row_data.extend(sample)

	new_table.add_data(*row_data)
	wandb.log({"val/generations": new_table}, step=step)
	self.validation_table = new_table


	@dataclass
	class SwanlabGenerationLogger(GenerationLogger):
	def log(self, samples: List[Tuple[str, str, str, float]], step: int) -> None:
	swanlab_text_list = []
	for i, sample in enumerate(samples):
	row_text = "\n\n---\n\n".join(
	(f"input: {sample[0]}", f"output: {sample[1]}", f"label: {sample[2]}", f"score: {sample[3]}")
	)
	swanlab_text_list.append(swanlab.Text(row_text, caption=f"sample {i + 1}"))

	swanlab.log({"val/generations": swanlab_text_list}, step=step)


	GEN_LOGGERS = {
	"console": ConsoleGenerationLogger,
	"wandb": WandbGenerationLogger,
	"swanlab": SwanlabGenerationLogger,
	}


	@dataclass
	class AggregateGenerationsLogger:
	def __init__(self, loggers: List[str]):
	self.loggers: List[GenerationLogger] = []

	for logger in loggers:
	if logger in GEN_LOGGERS:
	self.loggers.append(GEN_LOGGERS[logger]())

	def log(self, samples: List[Tuple[str, str, str, float]], step: int) -> None:
	for logger in self.loggers:
	logger.log(samples, step)