yuccaaa commited on Sep 3, 2025

Commit

acbfbc3

verified ·

1 Parent(s): ffcfc75

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

BioReason-0813/=1.18.1 +10 -0
BioReason-0813/__pycache__/blip2_dna_module.cpython-310.pyc +0 -0
BioReason-0813/__pycache__/blip2_grpo_trainer.cpython-310.pyc +0 -0
BioReason-0813/bioreason/dna_modules/__pycache__/dna_module.cpython-310.pyc +0 -0
BioReason-0813/bioreason/dna_modules/dna_module.py +49 -0
BioReason-0813/bioreason/trainer/grpo_config.py +365 -0
BioReason-0813/blip2_dna_module.py +349 -0
BioReason-0813/blip2_grpo_trainer.py +591 -0
BioReason-0813/blips_reason.py +866 -0
BioReason-0813/model/__pycache__/blip2.cpython-310.pyc +0 -0
BioReason-0813/model/__pycache__/blip2_opt.cpython-310.pyc +0 -0
BioReason-0813/model/__pycache__/blip2_opt.cpython-311.pyc +0 -0
BioReason-0813/model/__pycache__/blip2_stage2.cpython-310.pyc +0 -0
BioReason-0813/model/__pycache__/blip2_stage2.cpython-311.pyc +0 -0
BioReason-0813/model/__pycache__/help_funcs.cpython-310.pyc +0 -0
BioReason-0813/model/blip2.py +126 -0
BioReason-0813/model/blip2_opt.py +550 -0
BioReason-0813/model/blip2_stage2.py +365 -0
BioReason-0813/model/help_funcs.py +112 -0
BioReason-0813/prompt_templates.py +57 -0
BioReason-0813/run.sh +103 -0
BioReason-main/.gitignore +180 -0
BioReason-main/LICENSE +201 -0
BioReason-main/README.md +148 -0
BioReason-main/bioreason.egg-info/PKG-INFO +181 -0
BioReason-main/bioreason.egg-info/SOURCES.txt +9 -0
BioReason-main/bioreason.egg-info/dependency_links.txt +1 -0
BioReason-main/bioreason.egg-info/requires.txt +19 -0
BioReason-main/bioreason.egg-info/top_level.txt +1 -0
BioReason-main/bioreason/__init__.py +0 -0
BioReason-main/bioreason/dataset/__init__.py +11 -0
BioReason-main/bioreason/dataset/kegg.py +382 -0
BioReason-main/bioreason/dataset/utils.py +59 -0
BioReason-main/bioreason/dataset/variant_effect.py +98 -0
BioReason-main/bioreason/dna_modules/__init__.py +4 -0
BioReason-main/bioreason/dna_modules/dna_module.py +49 -0
BioReason-main/bioreason/dna_modules/nucleotide_module.py +263 -0
BioReason-main/bioreason/models/__init__.py +9 -0
BioReason-main/bioreason/models/dl/__init__.py +1 -0
BioReason-main/bioreason/models/dl/chat_template_dl.py +1 -0
BioReason-main/bioreason/models/dl/configuration_dl.py +232 -0
BioReason-main/bioreason/models/dl/processing_dl.py +275 -0
BioReason-main/bioreason/models/dna_llm.py +306 -0
BioReason-main/bioreason/models/dna_only.py +203 -0
BioReason-main/bioreason/models/evo2_tokenizer.py +219 -0
BioReason-main/bioreason/trainer/__init__.py +7 -0
BioReason-main/bioreason/trainer/demo_grpo.py +811 -0
BioReason-main/bioreason/trainer/grpo_config.py +365 -0
BioReason-main/bioreason/trainer/grpo_trainer.py +905 -0
BioReason-main/bioreason/utils/__init__.py +0 -0

BioReason-0813/=1.18.1 ADDED Viewed

	@@ -0,0 +1,10 @@

+Looking in indexes: https://mirrors.aliyun.com/pypi/simple/
+Requirement already satisfied: modelscope in /root/miniconda3/envs/bioreason/lib/python3.11/site-packages (1.29.1)
+Requirement already satisfied: filelock in /root/miniconda3/envs/bioreason/lib/python3.11/site-packages (from modelscope) (3.18.0)
+Requirement already satisfied: requests>=2.25 in /root/miniconda3/envs/bioreason/lib/python3.11/site-packages (from modelscope) (2.32.4)
+Requirement already satisfied: setuptools in /root/miniconda3/envs/bioreason/lib/python3.11/site-packages (from modelscope) (78.1.1)
+Requirement already satisfied: tqdm>=4.64.0 in /root/miniconda3/envs/bioreason/lib/python3.11/site-packages (from modelscope) (4.67.1)
+Requirement already satisfied: urllib3>=1.26 in /root/miniconda3/envs/bioreason/lib/python3.11/site-packages (from modelscope) (2.5.0)
+Requirement already satisfied: charset_normalizer<4,>=2 in /root/miniconda3/envs/bioreason/lib/python3.11/site-packages (from requests>=2.25->modelscope) (3.4.3)
+Requirement already satisfied: idna<4,>=2.5 in /root/miniconda3/envs/bioreason/lib/python3.11/site-packages (from requests>=2.25->modelscope) (3.10)
+Requirement already satisfied: certifi>=2017.4.17 in /root/miniconda3/envs/bioreason/lib/python3.11/site-packages (from requests>=2.25->modelscope) (2025.8.3)

BioReason-0813/__pycache__/blip2_dna_module.cpython-310.pyc ADDED Viewed

Binary file (11.3 kB). View file

BioReason-0813/__pycache__/blip2_grpo_trainer.cpython-310.pyc ADDED Viewed

Binary file (15.6 kB). View file

BioReason-0813/bioreason/dna_modules/__pycache__/dna_module.cpython-310.pyc ADDED Viewed

Binary file (2.53 kB). View file

BioReason-0813/bioreason/dna_modules/dna_module.py ADDED Viewed

	@@ -0,0 +1,49 @@

+from abc import ABC, abstractmethod
+from typing import Dict, Any, Union
+import torch
+class DNABaseModule(ABC):
+    def __init__(self):
+        super().__init__()
+    @abstractmethod
+    def get_dnallm_key(self):
+        pass
+    @abstractmethod
+    def get_model_class(self, model_id: str, model_init_kwargs: dict):
+        pass
+    def post_model_init(self, model, processing_class):
+        pass
+    def is_embeds_input(self):
+        return False
+    @abstractmethod
+    def get_processing_class(self):
+        pass
+    @abstractmethod
+    def get_dnallm_modules_keywords(self):
+        pass
+    @abstractmethod
+    def get_custom_multimodal_keywords(self):
+        pass
+    @abstractmethod
+    def get_non_generate_params(self):
+        pass
+    @abstractmethod
+    def get_custom_processing_keywords(self):
+        pass
+    @abstractmethod
+    def prepare_prompt(self, processing_class, inputs: dict[str, Union[torch.Tensor, Any]]):
+        pass
+    @abstractmethod
+    def prepare_model_inputs(self, processing_class, prompts_text, images, return_tensors, padding, padding_side, add_special_tokens):
+        pass

BioReason-0813/bioreason/trainer/grpo_config.py ADDED Viewed

	@@ -0,0 +1,365 @@

+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from dataclasses import dataclass, field
+from typing import Optional, Union
+from transformers import TrainingArguments
+@dataclass
+class DNALLMGRPOConfig(TrainingArguments):
+    r"""
+    Configuration class for the [`GRPOTrainer`].
+    Only the parameters specific to GRPO training are listed here. For details on other parameters, refer to the
+    [`~transformers.TrainingArguments`] documentation.
+    Using [`~transformers.HfArgumentParser`] we can turn this class into
+    [argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the
+    command line.
+    Parameters:
+        > Parameters that control the model and reference model
+        model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
+            Keyword arguments for [`~transformers.AutoModelForCausalLM.from_pretrained`], used when the `model`
+            argument of the [`GRPOTrainer`] is provided as a string.
+        > Parameters that control the data preprocessing
+        remove_unused_columns (`bool`, *optional*, defaults to `False`):
+            Whether to only keep the column `"prompt"` in the dataset. If you use a custom reward function that
+            requires any column other than `"prompts"` and `"completions"`, you should keep this to `False`.
+        max_prompt_length (`int` or `None`, *optional*, defaults to `512`):
+            Maximum length of the prompt. If the prompt is longer than this value, it will be truncated left.
+        num_generations (`int` or `None`, *optional*, defaults to `8`):
+            Number of generations per prompt to sample. The global batch size (num_processes * per_device_batch_size)
+            must be divisible by this value.
+        max_completion_length (`int` or `None`, *optional*, defaults to `256`):
+            Maximum length of the generated completion.
+        ds3_gather_for_generation (`bool`, *optional*, defaults to `True`):
+            This setting applies to DeepSpeed ZeRO-3. If enabled, the policy model weights are gathered for generation,
+            improving generation speed. However, disabling this option allows training models that exceed the VRAM
+            capacity of a single GPU, albeit at the cost of slower generation. Disabling this option is not compatible
+            with vLLM generation.
+        > Parameters that control generation
+        temperature (`float`, defaults to `0.9`):
+            Temperature for sampling. The higher the temperature, the more random the completions.
+        top_p (`float`, *optional*, defaults to `1.0`):
+            Float that controls the cumulative probability of the top tokens to consider. Must be in (0, 1]. Set to
+            `1.0` to consider all tokens.
+        top_k (`int` or `None`, *optional*, defaults to `50`):
+            Number of highest probability vocabulary tokens to keep for top-k-filtering. If `None`, top-k-filtering is
+            disabled.
+        min_p (`float` or `None`, *optional*, defaults to `None`):
+            Minimum token probability, which will be scaled by the probability of the most likely token. It must be a
+            value between `0.0` and `1.0`. Typical values are in the `0.01-0.2` range.
+        repetition_penalty (`float`, *optional*, defaults to `1.0`):
+            Float that penalizes new tokens based on whether they appear in the prompt and the generated text so far.
+            Values > `1.0` encourage the model to use new tokens, while values < `1.0` encourage the model to repeat
+            tokens.
+        cache_implementation (`str` or `None`, *optional*, defaults to `None`):
+            Implementation of the cache method for faster generation when use_vllm is set to False.
+        > Parameters that control generation acceleration powered by vLLM
+        use_vllm (`bool`, *optional*, defaults to `False`):
+            Whether to use vLLM for generating completions. If set to `True`, ensure that a GPU is kept unused for
+            training, as vLLM will require one for generation. vLLM must be installed (`pip install vllm`).
+        vllm_device (`str`, *optional*, defaults to `"auto"`):
+            Device where vLLM generation will run, e.g. `"cuda:1"`. If set to `"auto"` (default), the system will
+            automatically select the next available GPU after the last one used for training. This assumes that
+            training has not already occupied all available GPUs. If only one device is available, the device will be
+            shared between both training and vLLM.
+        vllm_gpu_memory_utilization (`float`, *optional*, defaults to `0.9`):
+            Ratio (between 0 and 1) of GPU memory to reserve for the model weights, activations, and KV cache on the
+            device dedicated to generation powered by vLLM. Higher values will increase the KV cache size and thus
+            improve the model's throughput. However, if the value is too high, it may cause out-of-memory (OOM) errors
+            during initialization.
+        vllm_dtype (`str`, *optional*, defaults to `"auto"`):
+            Data type to use for vLLM generation. If set to `"auto"`, the data type will be automatically determined
+            based on the model configuration. Find the supported values in the vLLM documentation.
+        vllm_max_model_len (`int` or `None`, *optional*, defaults to `None`):
+            If set, the `max_model_len` to use for vLLM. This could be useful when running with reduced
+            `vllm_gpu_memory_utilization`, leading to a reduced KV cache size. If not set, vLLM will use the model
+            context size, which might be much larger than the KV cache, leading to inefficiencies.
+        vllm_enable_prefix_caching (`bool`, *optional*, defaults to `True`):
+            Whether to enable prefix caching in vLLM. If set to `True` (default), ensure that the model and the hardware
+            support this feature.
+        vllm_guided_decoding_regex (`str` or `None`, *optional*, defaults to `None`):
+            Regex for vLLM guided decoding. If `None` (default), guided decoding is disabled.
+        > Parameters that control the training
+        learning_rate (`float`, *optional*, defaults to `1e-6`):
+            Initial learning rate for [`AdamW`] optimizer. The default value replaces that of
+            [`~transformers.TrainingArguments`].
+        beta (`float`, *optional*, defaults to `0.04`):
+            KL coefficient. If `0.0`, the reference model is not loaded, reducing memory usage and improving training
+            speed, but may be numerically unstable for long training runs.
+        num_iterations (`int`, *optional*, defaults to `1`):
+            Number of iterations per batch (denoted as μ in the algorithm).
+        epsilon (`float`, *optional*, defaults to `0.2`):
+            Epsilon value for clipping.
+        epsilon_high (`float` or `None`, *optional*, defaults to `None`):
+            Upper-bound epsilon value for clipping. If not specified, it defaults to the same value as the lower-bound
+            specified in argument `epsilon`. Paper [DAPO](https://huggingface.co/papers/2503.14476) recommends `0.28`.
+        reward_weights (`list[float]` or `None`, *optional*, defaults to `None`):
+            Weights for each reward function. Must match the number of reward functions. If `None`, all rewards are
+            weighted equally with weight `1.0`.
+        sync_ref_model (`bool`, *optional*, defaults to `False`):
+            Whether to synchronize the reference model with the active model every `ref_model_sync_steps` steps, using
+            the `ref_model_mixup_alpha` parameter. This synchronization originites from the
+            [TR-DPO](https://huggingface.co/papers/2404.09656) paper.
+        ref_model_mixup_alpha (`float`, *optional*, defaults to `0.6`):
+            α parameter from the [TR-DPO](https://huggingface.co/papers/2404.09656) paper, which controls the mix
+            between the current policy and the previous reference policy during updates. The reference policy is
+            updated according to the equation: `π_ref = α * π_θ + (1 - α) * π_ref_prev`. To use this parameter, you
+            must set `sync_ref_model=True`.
+        ref_model_sync_steps (`int`, *optional*, defaults to `512`):
+            τ parameter from the [TR-DPO](https://huggingface.co/papers/2404.09656) paper, which determines how
+            frequently the current policy is synchronized with the reference policy. To use this parameter, you must
+            set `sync_ref_model=True`.
+        > Parameters that control the logging
+        log_completions (`bool`, *optional*, defaults to `False`):
+            Whether to log a sample of (prompt, completion) pairs every `logging_steps` steps. If `rich` is
+            installed, it prints the sample. If `wandb` logging is enabled, it logs it to `wandb`.
+    """
+    # Parameters that control the model and reference model
+    model_init_kwargs: Optional[dict] = field(
+        default=None,
+        metadata={
+            "help": "Keyword arguments for `transformers.AutoModelForCausalLM.from_pretrained`, used when the `model` "
+            "argument of the `GRPOTrainer` is provided as a string."
+        },
+    )
+    # Parameters that control the data preprocessing
+    # The default value remove_unused_columns is overwritten from the parent class, because in GRPO we usually rely on
+    # additional columns to compute the reward
+    remove_unused_columns: Optional[bool] = field(
+        default=False,
+        metadata={
+            "help": "Whether to only keep the column 'prompt' in the dataset. If you use a custom reward function "
+            "that requires any column other than 'prompts' and 'completions', you should keep this to `False`."
+        },
+    )
+    max_prompt_length: Optional[int] = field(
+        default=512,
+        metadata={
+            "help": "Maximum length of the prompt. If the prompt is longer than this value, it will be truncated left."
+        },
+    )
+    num_generations: Optional[int] = field(
+        default=8,
+        metadata={
+            "help": "Number of generations to sample. The global batch size (num_processes * per_device_batch_size) "
+            "must be divisible by this value."
+        },
+    )
+    max_completion_length: Optional[int] = field(
+        default=800,
+        metadata={"help": "Maximum length of the generated completion."},
+    )
+    ds3_gather_for_generation: bool = field(
+        default=True,
+        metadata={
+            "help": "This setting applies to DeepSpeed ZeRO-3. If enabled, the policy model weights are gathered for "
+            "generation, improving generation speed. However, disabling this option allows training models that "
+            "exceed the VRAM capacity of a single GPU, albeit at the cost of slower generation. Disabling this option "
+            "is not compatible with vLLM generation."
+        },
+    )
+    # Parameters that control generation
+    temperature: float = field(
+        default=0.6,
+        metadata={"help": "Temperature for sampling. The higher the temperature, the more random the completions."},
+    )
+    top_p: float = field(
+        default=0.95,
+        metadata={
+            "help": "Float that controls the cumulative probability of the top tokens to consider. Must be in (0, 1]. "
+            "Set to 1.0 to consider all tokens."
+        },
+    )
+    top_k: Optional[int] = field(
+        default=20,
+        metadata={
+            "help": "Number of highest probability vocabulary tokens to keep for top-k-filtering. If `None`, "
+            "top-k-filtering is disabled."
+        },
+    )
+    min_p: Optional[float] = field(
+        default=None,
+        metadata={
+            "help": "Minimum token probability, which will be scaled by the probability of the most likely token. It "
+            "must be a value between 0.0 and 1.0. Typical values are in the 0.01-0.2 range."
+        },
+    )
+    repetition_penalty: float = field(
+        default=1.0,
+        metadata={
+            "help": "Float that penalizes new tokens based on whether they appear in the prompt and the generated "
+            "text so far. Values > 1.0 encourage the model to use new tokens, while values < 1.0 encourage the model "
+            "to repeat tokens."
+        },
+    )
+    cache_implementation: Optional[str] = field(
+        default=None,
+        metadata={"help": "Implementation of the cache method for faster generation when use_vllm is set to False."},
+    )
+    # Parameters that control generation acceleration powered by vLLM
+    use_vllm: Optional[bool] = field(
+        default=False,
+        metadata={
+            "help": "Whether to use vLLM for generating completions. If set to `True`, ensure that a GPU is kept "
+            "unused for training, as vLLM will require one for generation. vLLM must be installed "
+            "(`pip install vllm`)."
+        },
+    )
+    vllm_device: Optional[str] = field(
+        default="auto",
+        metadata={
+            "help": "Device where vLLM generation will run, e.g. 'cuda:1'. If set to 'auto' (default), the system "
+            "will automatically select the next available GPU after the last one used for training. This assumes "
+            "that training has not already occupied all available GPUs."
+        },
+    )
+    vllm_gpu_memory_utilization: float = field(
+        default=0.9,
+        metadata={
+            "help": "Ratio (between 0 and 1) of GPU memory to reserve for the model weights, activations, and KV "
+            "cache on the device dedicated to generation powered by vLLM. Higher values will increase the KV cache "
+            "size and thus improve the model's throughput. However, if the value is too high, it may cause "
+            "out-of-memory (OOM) errors during initialization."
+        },
+    )
+    vllm_dtype: Optional[str] = field(
+        default="auto",
+        metadata={
+            "help": "Data type to use for vLLM generation. If set to 'auto', the data type will be automatically "
+            "determined based on the model configuration. Find the supported values in the vLLM documentation."
+        },
+    )
+    vllm_max_model_len: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "If set, the `max_model_len` to use for vLLM. This could be useful when running with reduced "
+            "`vllm_gpu_memory_utilization`, leading to a reduced KV cache size. If not set, vLLM will use the model "
+            "context size, which might be much larger than the KV cache, leading to inefficiencies."
+        },
+    )
+    vllm_enable_prefix_caching: Optional[bool] = field(
+        default=True,
+        metadata={
+            "help": "Whether to enable prefix caching in vLLM. If set to `True` (default), ensure that the model and "
+            "the hardware support this feature."
+        },
+    )
+    vllm_guided_decoding_regex: Optional[str] = field(
+        default=None,
+        metadata={"help": "Regex for vLLM guided decoding. If `None` (default), guided decoding is disabled."},
+    )
+    # Parameters that control the training
+    learning_rate: float = field(
+        default=1e-6,
+        metadata={
+            "help": "Initial learning rate for `AdamW` optimizer. The default value replaces that of "
+            "`transformers.TrainingArguments`."
+        },
+    )
+    beta: float = field(
+        default=0.04,
+        metadata={
+            "help": "KL coefficient. If `0.0`, the reference model is not loaded, reducing memory usage and improving "
+            "training speed, but may be numerically unstable for long training runs."
+        },
+    )
+    num_iterations: int = field(
+        default=1,
+        metadata={"help": "Number of iterations per batch (denoted as μ in the algorithm)."},
+    )
+    epsilon: float = field(
+        default=0.2,
+        metadata={"help": "Epsilon value for clipping."},
+    )
+    epsilon_high: Optional[float] = field(
+        default=None,
+        metadata={
+            "help": "Upper-bound epsilon value for clipping. If not specified, it defaults to the same value as the "
+            "lower-bound specified in argument `epsilon`. Paper DAPO recommends `0.28`."
+        },
+    )
+    reward_weights: Optional[list[float]] = field(
+        default=None,
+        metadata={
+            "help": "Weights for each reward function. Must match the number of reward functions. If `None`, all "
+            "rewards are weighted equally with weight `1.0`."
+        },
+    )
+    sync_ref_model: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to synchronize the reference model with the active model every `ref_model_sync_steps` "
+            "steps, using the `ref_model_mixup_alpha` parameter."
+        },
+    )
+    ref_model_mixup_alpha: float = field(
+        default=0.6,
+        metadata={
+            "help": "α parameter from the TR-DPO paper, which controls the mix between the current policy and the "
+            "previous reference policy during updates. The reference policy is updated according to the equation: "
+            "`π_ref = α * π_θ + (1 - α) * π_ref_prev`. To use this parameter, you must set `sync_ref_model=True`."
+        },
+    )
+    ref_model_sync_steps: int = field(
+        default=512,
+        metadata={
+            "help": "τ parameter from the TR-DPO paper, which determines how frequently the current policy is "
+            "synchronized with the reference policy. To use this parameter, you must set `sync_ref_model=True`."
+        },
+    )
+    # Parameters that control the logging
+    log_completions: bool = field(
+        default=True,
+        metadata={
+            "help": "Whether to log a sample of (prompt, completion) pairs every `logging_steps` steps. If `rich` is "
+            "installed, it prints the sample. If `wandb` logging is enabled, it logs it to `wandb`."
+        },
+    )
+    report_to: Union[None, str, list[str]] = field(
+        default="wandb", metadata={"help": "The list of integrations to report the results and logs to."}
+    )
+    logging_first_step: bool = field(default=False, metadata={"help": "Log the first global_step"})
+    logging_steps: float = field(
+        default=2,
+        metadata={
+            "help": (
+                "Log every X updates steps. Should be an integer or a float in range `[0,1)`. "
+                "If smaller than 1, will be interpreted as ratio of total training steps."
+            )
+        },
+    )

BioReason-0813/blip2_dna_module.py ADDED Viewed

	@@ -0,0 +1,349 @@

+from transformers import (
+    AutoProcessor,
+    AutoTokenizer,
+)
+from typing import Dict, Any, Union, List, Optional, Callable, Type
+from trl.data_utils import maybe_apply_chat_template
+import torch
+from bioreason.dna_modules.dna_module import DNABaseModule
+from model.blip2_stage2 import Blip2Stage2
+class Blip2DNAModule(DNABaseModule):
+    """
+    DNA module implementation for BLIP2-based models.
+    This module provides the interface between BLIP2 models and the GRPO training
+    infrastructure, handling model loading, processing setup, and reward functions.
+    """
+    def __init__(self):
+        """Initialize the Blip2DNAModule."""
+        super().__init__()
+    def get_dnallm_key(self) -> str:
+        """
+        Get the key identifier for this DNA-LLM implementation.
+        Returns:
+            String identifier for this module type
+        """
+        return "blip2"
+    def get_model_class(self, model_id: str, model_init_kwargs: Dict[str, Any]) -> Type:
+        """
+        Return the appropriate model class based on model ID.
+        Args:
+            model_id: Identifier for the model
+            model_init_kwargs: Initialization arguments for the model
+        Returns:
+            The model class to instantiate
+        Raises:
+            ValueError: If the model is not supported
+        """
+        if "blip2" in model_id.lower() or "stage2" in model_id.lower():
+            model_cls = Blip2Stage2
+        else:
+            raise ValueError(f"Unsupported model: {model_id}")
+        return model_cls
+    def post_model_init(self, model: Any, processing_class: Any) -> None:
+        """
+        Perform any post-initialization setup on the model.
+        Args:
+            model: The initialized model
+            processing_class: The processor for the model
+        """
+        # BLIP2 models might need specific post-init setup
+        if hasattr(model, 'blip2') and hasattr(model.blip2, 'llm_tokenizer'):
+            # Ensure the tokenizer is properly configured
+            if not hasattr(model.blip2.llm_tokenizer, 'pad_token') or model.blip2.llm_tokenizer.pad_token is None:
+                model.blip2.llm_tokenizer.pad_token = model.blip2.llm_tokenizer.eos_token
+    def get_processing_class(self) -> Type:
+        """
+        Get the processing class to use with this BLIP2 model.
+        Returns:
+            The processing class
+        """
+        return Blip2Processor
+    def get_dnallm_modules_keywords(self) -> List[str]:
+        """
+        Get keywords to identify DNA-specific modules in the model.
+        Used to exclude DNA modules from LoRA adaptation during training.
+        Returns:
+            List of keywords that identify DNA modules
+        """
+        return ["plm", "qformer", "opt_proj"]
+    def get_custom_multimodal_keywords(self) -> List[str]:
+        """
+        Get keywords for multimodal inputs that should be passed to the model.
+        Returns:
+            List of input keywords for multimodal processing
+        """
+        return ["prot_batch", "prompt_batch"]
+    def get_non_generate_params(self) -> List[str]:
+        """
+        Get parameter names that should be excluded from generation.
+        Returns:
+            List of parameter names to exclude from generation calls
+        """
+        return ["prot_batch"]
+    def get_custom_processing_keywords(self) -> List[tuple]:
+        """
+        Get custom processing keywords for the processor.
+        Returns:
+            List of (component, parameter) tuples for custom processing
+        """
+        return [("plm_tokenizer", "max_length"), ("llm_tokenizer", "max_length")]
+    def prepare_prompt(
+        self, processing_class: Any, inputs: List[Dict[str, Union[torch.Tensor, Any]]]
+    ) -> List[str]:
+        """
+        Prepare prompts from input examples.
+        Args:
+            processing_class: The processor to use
+            inputs: List of input examples
+        Returns:
+            List of prepared prompts
+        """
+        prompts_text = []
+        for example in inputs:
+            if "prompt" in example:
+                # Extract text content from conversational format
+                if isinstance(example["prompt"], list) and len(example["prompt"]) > 0:
+                    user_content = example["prompt"][0].get("content", "")
+                    if isinstance(user_content, list):
+                        # Extract text from multimodal content
+                        text_parts = [item.get("text", "") for item in user_content if item.get("type") == "text"]
+                        prompt_text = " ".join(text_parts)
+                    else:
+                        prompt_text = str(user_content)
+                else:
+                    prompt_text = str(example["prompt"])
+            else:
+                prompt_text = ""
+            prompts_text.append(prompt_text)
+        return prompts_text
+    def prepare_model_inputs(
+        self,
+        processing_class: Any,
+        model: Any,
+        prompts_text: List[str],
+        batch_dna_sequences: List[List[str]],
+        return_tensors: str = "pt",
+        padding: bool = True,
+        padding_side: str = "left",
+        add_special_tokens: bool = False,
+    ) -> Dict[str, Any]:
+        """
+        Prepare inputs for the BLIP2 model.
+        Args:
+            processing_class: The processor to use
+            model: The model to prepare inputs for
+            prompts_text: List of text prompts
+            batch_dna_sequences: List of lists of DNA sequences (treated as protein sequences)
+            return_tensors: Return format for tensors
+            padding: Whether to pad inputs
+            padding_side: Side to pad on
+            add_special_tokens: Whether to add special tokens
+        Returns:
+            Processed inputs for the model
+        """
+        # Get the BLIP2 model from the wrapper
+        blip2_model = model.blip2 if hasattr(model, 'blip2') else model
+        # Prepare protein batch (using DNA sequences as protein sequences)
+        # Flatten all DNA sequences to treat them as individual protein sequences
+        all_sequences = []
+        for sequences in batch_dna_sequences:
+            all_sequences.extend(sequences)
+        if all_sequences:
+            prot_batch = blip2_model.plm_tokenizer(
+                all_sequences,
+                padding=padding,
+                truncation=True,
+                max_length=512,  # Default protein sequence length
+                return_tensors=return_tensors,
+            )
+        else:
+            # Empty batch handling
+            prot_batch = {
+                'input_ids': torch.empty(0, 1, dtype=torch.long),
+                'attention_mask': torch.empty(0, 1, dtype=torch.long)
+            }
+        # Prepare prompt batch
+        prompt_batch = blip2_model.llm_tokenizer(
+            prompts_text,
+            padding=padding,
+            truncation=True,
+            max_length=256,  # Default prompt length
+            return_tensors=return_tensors,
+        )
+        return {
+            "prot_batch": prot_batch,
+            "prompt_batch": prompt_batch,
+            "input_ids": prompt_batch["input_ids"],  # For compatibility
+            "attention_mask": prompt_batch["attention_mask"],  # For compatibility
+        }
+    def is_embeds_input(self) -> bool:
+        """
+        Whether the model uses embeddings as input (instead of token IDs).
+        Returns:
+            Boolean indicating if the model takes embedding inputs
+        """
+        return True  # BLIP2 uses embeddings internally
+    @staticmethod
+    def get_question_template() -> str:
+        """
+        Get the template for formatting questions.
+        Returns:
+            String template for questions
+        """
+        return "{Question}"
+    @staticmethod
+    def format_reward_rec(completions: List[Dict[str, Any]], **kwargs) -> List[float]:
+        """
+        Check if the BLIP2 model output matches a specific format.
+        Args:
+            completions: List of model completions
+            **kwargs: Additional arguments
+        Returns:
+            List of reward scores (1.0 for match, 0.0 for no match)
+        """
+        import re
+        import os
+        from datetime import datetime
+        # Pattern to match the expected output format
+        pattern = r"<think>.*?</think>\s*<answer>.*?\{.*\[\d+,\s*\d+,\s*\d+,\s*\d+\].*\}.*?</answer>"
+        completion_contents = [completion[0]["content"] for completion in completions]
+        matches = [
+            re.search(pattern, content, re.DOTALL) is not None
+            for content in completion_contents
+        ]
+        # Log format results if in debug mode
+        current_time = datetime.now().strftime("%d-%H-%M-%S-%f")
+        if os.getenv("DEBUG_MODE") == "true":
+            log_path = os.getenv("LOG_PATH")
+            with open(
+                log_path.replace(".txt", "_format.txt"), "a", encoding="utf-8"
+            ) as f:
+                f.write(f"------------- {current_time} Format reward -------------\n")
+                for content, match in zip(completion_contents, matches):
+                    f.write(f"Content: {content}\n")
+                    f.write(f"Has format: {bool(match)}\n")
+        return [1.0 if match else 0.0 for match in matches]
+    @staticmethod
+    def select_reward_func(func: str, task_type: str) -> Callable:
+        """
+        Select the appropriate reward function based on function name and task type.
+        Args:
+            func: The type of reward function ('accuracy', 'format', etc.)
+            task_type: The type of task ('rec', etc.)
+        Returns:
+            The reward function to use
+        Raises:
+            ValueError: If the function or task type is not supported
+        """
+        if func == "accuracy":
+            match task_type:
+                case "rec":
+                    return Blip2DNAModule.iou_reward
+                case _:
+                    raise ValueError(f"Unsupported reward function: {func}")
+        elif func == "format":
+            match task_type:
+                case "rec":
+                    return Blip2DNAModule.format_reward_rec
+                case _:
+                    raise ValueError(f"Unsupported reward function: {func}")
+        else:
+            raise ValueError(f"Unsupported reward function: {func}")
+    @staticmethod
+    def iou_reward(completions: List[Dict[str, Any]], **kwargs) -> List[float]:
+        """
+        Placeholder IoU reward function.
+        Args:
+            completions: List of model completions
+            **kwargs: Additional arguments
+        Returns:
+            List of reward scores
+        """
+        # Placeholder implementation
+        return [1.0] * len(completions)
+class Blip2Processor:
+    """
+    Simple processor wrapper for BLIP2 models to maintain compatibility
+    with the GRPO trainer interface.
+    """
+    def __init__(self, plm_tokenizer=None, llm_tokenizer=None):
+        self.plm_tokenizer = plm_tokenizer
+        self.llm_tokenizer = llm_tokenizer
+        # Set compatibility attributes
+        if llm_tokenizer:
+            self.eos_token_id = llm_tokenizer.eos_token_id
+            self.pad_token_id = llm_tokenizer.pad_token_id
+    def __call__(self, *args, **kwargs):
+        """
+        Process inputs for BLIP2 model.
+        This is a simplified version that delegates to the appropriate tokenizer.
+        """
+        # For compatibility, return a simple tokenization result
+        if self.llm_tokenizer:
+            return self.llm_tokenizer(*args, **kwargs)
+        else:
+            # Fallback behavior
+            return {"input_ids": torch.tensor([[1]]), "attention_mask": torch.tensor([[1]])}
+    def batch_decode(self, *args, **kwargs):
+        """Decode token sequences."""
+        if self.llm_tokenizer:
+            return self.llm_tokenizer.batch_decode(*args, **kwargs)
+        else:
+            return [""]

BioReason-0813/blip2_grpo_trainer.py ADDED Viewed

	@@ -0,0 +1,591 @@

+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import time
+import textwrap
+import pandas as pd
+from collections import defaultdict
+from typing import Any, Callable, Optional, Union, Sized
+import torch
+import torch.utils.data
+import transformers
+from datasets import Dataset, IterableDataset
+from packaging import version
+from transformers import (
+    AutoModelForCausalLM,
+    AutoModelForSequenceClassification,
+    AutoProcessor,
+    AutoTokenizer,
+    GenerationConfig,
+    PreTrainedModel,
+    PreTrainedTokenizerBase,
+    Trainer,
+    TrainerCallback,
+    is_wandb_available,
+)
+from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled
+from transformers.utils import is_peft_available
+from trl.data_utils import apply_chat_template, is_conversational, maybe_apply_chat_template
+from trl.models import create_reference_model, prepare_deepspeed, unwrap_model_for_generation
+from trl.trainer.grpo_config import GRPOConfig
+from trl.trainer.utils import generate_model_card, get_comet_experiment_url
+from accelerate.utils import is_peft_model, set_seed, gather_object
+import PIL.Image
+import copy
+from torch.utils.data import Sampler
+import warnings
+if is_peft_available():
+    from peft import PeftConfig, get_peft_model, prepare_model_for_kbit_training
+if is_wandb_available():
+    import wandb
+from bioreason.dna_modules.dna_module import DNABaseModule
+from bioreason.trainer import DNALLMGRPOConfig
+# Import the RepeatRandomSampler from the original trainer
+from bioreason.trainer.grpo_trainer import RepeatRandomSampler
+# What we call a reward function is a callable that takes a list of prompts and completions and returns a list of
+# rewards. When it's a string, it's a model ID, so it's loaded as a pretrained model.
+RewardFunc = Union[str, PreTrainedModel, Callable[[list, list], list[float]]]
+class Blip2GRPOTrainer(Trainer):
+    """
+    Modified GRPO Trainer for BLIP2 models.
+    This trainer adapts the original GRPO trainer to work with BLIP2 architecture,
+    handling the different input formats and forward pass requirements.
+    """
+    def __init__(
+        self,
+        model: Union[str, PreTrainedModel],
+        reward_funcs: Union[RewardFunc, list[RewardFunc]],
+        args: DNALLMGRPOConfig = None,
+        dna_module: DNABaseModule = None,
+        train_dataset: Optional[Union[Dataset, IterableDataset]] = None,
+        eval_dataset: Optional[Union[Dataset, IterableDataset, dict[str, Union[Dataset, IterableDataset]]]] = None,
+        processing_class: Optional[PreTrainedTokenizerBase] = None,
+        reward_processing_classes: Optional[Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]] = None,
+        callbacks: Optional[list[TrainerCallback]] = None,
+        optimizers: tuple[Optional[torch.optim.Optimizer], Optional[torch.optim.lr_scheduler.LambdaLR]] = (None, None),
+        peft_config: Optional["PeftConfig"] = None,
+        freeze_dna_modules: Optional[bool] = False,
+        attn_implementation: str = "flash_attention_2",
+        torch_dtype: str = "bfloat16",
+        **kwargs,
+    ):
+        # Args
+        if args is None:
+            model_name = model if isinstance(model, str) else "blip2-model"
+            args = GRPOConfig(f"{model_name}-GRPO")
+        self.dna_module = dna_module
+        # Models
+        model_init_kwargs = args.model_init_kwargs or {}
+        model_init_kwargs["attn_implementation"] = attn_implementation
+        if model_init_kwargs.get("torch_dtype") is None:
+            model_init_kwargs["torch_dtype"] = torch_dtype
+        assert not isinstance(model, str), "model must NOT be a string in the current implementation"
+        torch_dtype = model_init_kwargs.get("torch_dtype")
+        if isinstance(torch_dtype, torch.dtype) or torch_dtype == "auto" or torch_dtype is None:
+            pass  # torch_dtype is already a torch.dtype or "auto" or None
+        elif isinstance(torch_dtype, str):  # it's a str, but not "auto"
+            torch_dtype = getattr(torch, torch_dtype)
+        else:
+            raise ValueError(
+                "Invalid `torch_dtype` passed to `GRPOConfig`. Expected either 'auto' or a string representing "
+                f"a `torch.dtype` (e.g., 'float32'), but got {torch_dtype}."
+            )
+        # Disable caching if gradient checkpointing is enabled (not supported)
+        if hasattr(model, 'blip2') and hasattr(model.blip2, 'llm_model'):
+            model.blip2.llm_model.config.use_cache = (
+                False if args.gradient_checkpointing else model.blip2.llm_model.config.use_cache
+            )
+        # LoRA setup for BLIP2
+        self.dna_modules_keywords = self.dna_module.get_dnallm_modules_keywords()
+        if peft_config is not None:
+            print("Applying LoRA...")
+            def find_all_linear_names(model, multimodal_keywords):
+                cls = torch.nn.Linear
+                lora_module_names = set()
+                # Focus on the LLM part of BLIP2
+                if hasattr(model, 'blip2') and hasattr(model.blip2, 'llm_model'):
+                    llm_model = model.blip2.llm_model
+                    for name, module in llm_model.named_modules():
+                        # Skip DNA/multimodal modules
+                        if any(mm_keyword in name for mm_keyword in multimodal_keywords):
+                            continue
+                        if isinstance(module, cls):
+                            lora_module_names.add(name)
+                    # Remove embedding layers
+                    for m in list(lora_module_names):
+                        if "embed_tokens" in m or "embedding" in m:
+                            lora_module_names.remove(m)
+                return list(lora_module_names)
+            target_modules = find_all_linear_names(model, self.dna_modules_keywords)
+            peft_config.target_modules = target_modules
+            # Apply LoRA to the LLM part
+            if hasattr(model, 'blip2') and hasattr(model.blip2, 'llm_model'):
+                model.blip2.llm_model = prepare_model_for_kbit_training(model.blip2.llm_model)
+                model.blip2.llm_model = get_peft_model(model.blip2.llm_model, peft_config)
+        # Freeze DNA/protein modules if requested
+        if freeze_dna_modules:
+            print("Freezing protein/DNA modules...")
+            if hasattr(model, 'blip2'):
+                # Freeze protein language model
+                if hasattr(model.blip2, 'plm'):
+                    for p in model.blip2.plm.parameters():
+                        p.requires_grad = False
+                # Freeze Q-former if specified
+                if hasattr(model.blip2, 'Qformer'):
+                    for p in model.blip2.Qformer.parameters():
+                        p.requires_grad = False
+        # Count trainable parameters
+        trainable_params = [p for p in model.parameters() if p.requires_grad]
+        total_params = sum(p.numel() for p in trainable_params)
+        print(f"Total trainable parameters: {total_params}")
+        # Enable gradient checkpointing if requested
+        if args.gradient_checkpointing:
+            model = self._enable_gradient_checkpointing(model, args)
+        # Reference model
+        self.beta = args.beta
+        if self.beta == 0.0:
+            self.ref_model = None
+        elif is_deepspeed_zero3_enabled():
+            # Create reference model for DeepSpeed
+            self.ref_model = type(model)(model.args)  # Create same type of model
+        elif is_peft_model(model.blip2.llm_model if hasattr(model, 'blip2') else model):
+            self.ref_model = None
+        else:
+            self.ref_model = create_reference_model(model)
+        # Processing class setup
+        if processing_class is None:
+            processing_cls = self.dna_module.get_processing_class()
+            # Get tokenizers from BLIP2 model
+            if hasattr(model, 'blip2'):
+                plm_tokenizer = getattr(model.blip2, 'plm_tokenizer', None)
+                llm_tokenizer = getattr(model.blip2, 'llm_tokenizer', None)
+                processing_class = processing_cls(plm_tokenizer=plm_tokenizer, llm_tokenizer=llm_tokenizer)
+            else:
+                processing_class = processing_cls()
+            # Set up tokenizer attributes
+            if hasattr(processing_class, 'llm_tokenizer') and processing_class.llm_tokenizer:
+                processing_class.pad_token_id = processing_class.llm_tokenizer.pad_token_id
+                processing_class.eos_token_id = processing_class.llm_tokenizer.eos_token_id
+            else:
+                # Fallback
+                processing_class.pad_token_id = 0
+                processing_class.eos_token_id = 1
+        self.dna_module.post_model_init(model, processing_class)
+        self.dna_module.post_model_init(self.ref_model, processing_class)
+        # Reward functions
+        if not isinstance(reward_funcs, list):
+            reward_funcs = [reward_funcs]
+        for i, reward_func in enumerate(reward_funcs):
+            if isinstance(reward_func, str):
+                reward_funcs[i] = AutoModelForSequenceClassification.from_pretrained(
+                    reward_func, num_labels=1, **model_init_kwargs
+                )
+        self.reward_funcs = reward_funcs
+        # Reward processing classes
+        if reward_processing_classes is None:
+            reward_processing_classes = [None] * len(reward_funcs)
+        elif not isinstance(reward_processing_classes, list):
+            reward_processing_classes = [reward_processing_classes]
+        for i, (reward_processing_class, reward_func) in enumerate(zip(reward_processing_classes, reward_funcs)):
+            if isinstance(reward_func, PreTrainedModel):
+                if reward_processing_class is None:
+                    reward_processing_class = AutoTokenizer.from_pretrained(reward_func.config._name_or_path)
+                if reward_processing_class.pad_token_id is None:
+                    reward_processing_class.pad_token = reward_processing_class.eos_token
+                reward_func.config.pad_token_id = reward_processing_class.pad_token_id
+                reward_processing_classes[i] = reward_processing_class
+        self.reward_processing_classes = reward_processing_classes
+        # Data collator
+        def data_collator(features):
+            return features
+        # Training arguments
+        self.max_prompt_length = args.max_prompt_length
+        self.max_prompt_length = None
+        if args.max_prompt_length is not None:
+            warnings.warn("Setting max_prompt_length is currently not supported, it has been set to None")
+        self.max_completion_length = args.max_completion_length
+        self.num_generations = args.num_generations
+        # Generation config for BLIP2
+        self.generation_config = GenerationConfig(
+            max_new_tokens=self.max_completion_length,
+            do_sample=True,
+            temperature=0.6,
+            top_p=0.95,
+            top_k=20,
+            pad_token_id=processing_class.pad_token_id,
+            eos_token_id=processing_class.eos_token_id,
+        )
+        self.beta = args.beta
+        self.epsilon_low = args.epsilon
+        self.epsilon_high = args.epsilon_high if args.epsilon_high is not None else args.epsilon
+        # Multi-step
+        self.num_iterations = args.num_iterations
+        self._step = 0
+        self._buffered_inputs = [None] * args.gradient_accumulation_steps
+        # Initialize metrics
+        self._metrics = defaultdict(list)
+        self.log_completions = args.log_completions
+        super().__init__(
+            model=model,
+            args=args,
+            data_collator=data_collator,
+            train_dataset=train_dataset,
+            eval_dataset=eval_dataset,
+            processing_class=processing_class,
+            callbacks=callbacks,
+            optimizers=optimizers,
+        )
+        # Validate batch sizes
+        num_processes = self.accelerator.num_processes
+        global_batch_size = args.per_device_train_batch_size * num_processes
+        possible_values = [n_gen for n_gen in range(2, global_batch_size + 1) if (global_batch_size) % n_gen == 0]
+        if self.num_generations not in possible_values:
+            raise ValueError(
+                f"The global train batch size ({num_processes} x {args.per_device_train_batch_size}) must be evenly "
+                f"divisible by the number of generations per prompt ({self.num_generations}). Given the current train "
+                f"batch size, the valid values for the number of generations are: {possible_values}."
+            )
+        # Set unique seed per process
+        set_seed(args.seed, device_specific=True)
+        # Gradient accumulation settings
+        self.model_accepts_loss_kwargs = False
+        # Prepare reference model and reward functions
+        if self.ref_model is not None:
+            if is_deepspeed_zero3_enabled():
+                self.ref_model = prepare_deepspeed(self.ref_model, self.accelerator)
+            else:
+                self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True)
+        for i, reward_func in enumerate(self.reward_funcs):
+            if isinstance(reward_func, PreTrainedModel):
+                self.reward_funcs[i] = self.accelerator.prepare_model(reward_func, evaluation_mode=True)
+    def _enable_gradient_checkpointing(self, model: PreTrainedModel, args: DNALLMGRPOConfig) -> PreTrainedModel:
+        """Enables gradient checkpointing for BLIP2 model."""
+        if hasattr(model, 'blip2'):
+            # Enable for the LLM component
+            if hasattr(model.blip2, 'llm_model'):
+                model.blip2.llm_model.config.use_cache = False
+                if hasattr(model.blip2.llm_model, 'gradient_checkpointing_enable'):
+                    model.blip2.llm_model.gradient_checkpointing_enable()
+            # Enable for protein model if needed
+            if hasattr(model.blip2, 'plm') and hasattr(model.blip2.plm, 'gradient_checkpointing_enable'):
+                model.blip2.plm.gradient_checkpointing_enable()
+        return model
+    def _set_signature_columns_if_needed(self):
+        if self._signature_columns is None:
+            self._signature_columns = ["prompt"]
+    def _get_key_from_inputs(self, x, key):
+        ele = x.get(key, None)
+        assert ele is not None, f"The key {key} is not found in the input"
+        if isinstance(ele, list):
+            return [e for e in ele]
+        else:
+            return [ele]
+    def _generate_and_score_completions(self, inputs: dict[str, Union[torch.Tensor, Any]], model) -> dict[str, Union[torch.Tensor, Any]]:
+        device = self.accelerator.device
+        prompts = [x["prompt"] for x in inputs]
+        prompts_text = self.dna_module.prepare_prompt(self.processing_class, inputs)
+        # Handle DNA sequences (treat as protein sequences for BLIP2)
+        batch_dna_sequences = []
+        print("_generate_and_score_completions (BLIP2 GRPO):")
+        for x in inputs:
+            if 'dna_sequences' in x:
+                dnas = self._get_key_from_inputs(x, "dna_sequences")
+                batch_dna_sequences.append(dnas)
+            else:
+                batch_dna_sequences.append([])
+        # Prepare model inputs for BLIP2
+        prompt_inputs = self.dna_module.prepare_model_inputs(
+            self.processing_class,
+            model,
+            prompts_text,
+            batch_dna_sequences,
+            return_tensors="pt",
+            padding=True,
+            padding_side="left",
+            add_special_tokens=False,
+        )
+        prompt_inputs = super()._prepare_inputs(prompt_inputs)
+        # Extract BLIP2-specific inputs
+        prot_batch = prompt_inputs.get("prot_batch")
+        prompt_batch = prompt_inputs.get("prompt_batch")
+        # Generate completions using BLIP2
+        start = time.time()
+        with unwrap_model_for_generation(model, self.accelerator) as unwrapped_model:
+            # Prepare samples for BLIP2 generation
+            samples = {
+                'prot_batch': prot_batch,
+                'prompt_batch': prompt_batch
+            }
+            # Use BLIP2's generate method
+            if hasattr(unwrapped_model, 'blip2'):
+                completions_text = unwrapped_model.blip2.generate(
+                    samples,
+                    do_sample=True,
+                    temperature=0.6,
+                    top_p=0.95,
+                    num_beams=1,
+                    max_length=self.max_completion_length,
+                    min_length=1,
+                )
+            else:
+                # Fallback if not BLIP2 structure
+                completions_text = ["Generated text"] * len(prompts_text)
+            end = time.time()
+            print(f"Generation time: {end - start:.9f} seconds")
+        # Convert completions to expected format
+        if is_conversational(inputs[0]):
+            completions = [[{"role": "assistant", "content": completion}] for completion in completions_text]
+        else:
+            completions = completions_text
+        # Compute rewards
+        print("Reward calculation...")
+        rewards_per_func = torch.zeros(len(prompts), len(self.reward_funcs), device=device)
+        for i, (reward_func, reward_processing_class) in enumerate(
+            zip(self.reward_funcs, self.reward_processing_classes)
+        ):
+            if isinstance(reward_func, PreTrainedModel):
+                if is_conversational(inputs[0]):
+                    messages = [{"messages": p + c} for p, c in zip(prompts, completions)]
+                    texts = [apply_chat_template(x, reward_processing_class)["text"] for x in messages]
+                else:
+                    texts = [p + c for p, c in zip(prompts, completions)]
+                reward_inputs = reward_processing_class(
+                    texts, return_tensors="pt", padding=True, padding_side="right", add_special_tokens=False
+                )
+                reward_inputs = super()._prepare_inputs(reward_inputs)
+                with torch.inference_mode():
+                    rewards_per_func[:, i] = reward_func(**reward_inputs).logits[:, 0]
+            else:
+                # Custom reward function
+                reward_kwargs = {key: [] for key in inputs[0].keys() if key not in ["prompt", "completion"]}
+                for key in reward_kwargs:
+                    for example in inputs:
+                        reward_kwargs[key].extend([example[key]])
+                output_reward_func = reward_func(prompts=prompts, completions=completions, **reward_kwargs)
+                rewards_per_func[:, i] = torch.tensor(output_reward_func, dtype=torch.float32, device=device)
+        # Gather rewards across processes
+        rewards_per_func = self.accelerator.gather(rewards_per_func)
+        rewards = rewards_per_func.sum(dim=1)
+        # Compute grouped-wise rewards
+        mean_grouped_rewards = rewards.view(-1, self.num_generations).mean(dim=1)
+        std_grouped_rewards = rewards.view(-1, self.num_generations).std(dim=1)
+        # Normalize rewards to compute advantages
+        mean_grouped_rewards = mean_grouped_rewards.repeat_interleave(self.num_generations, dim=0)
+        std_grouped_rewards = std_grouped_rewards.repeat_interleave(self.num_generations, dim=0)
+        advantages = (rewards - mean_grouped_rewards) / (std_grouped_rewards + 1e-4)
+        # Get local slice of advantages
+        process_slice = slice(
+            self.accelerator.process_index * len(prompts),
+            (self.accelerator.process_index + 1) * len(prompts),
+        )
+        advantages = advantages[process_slice]
+        # Log metrics
+        print("Logging metrics...")
+        completion_length = len(completions_text[0].split()) if completions_text else 0
+        self._metrics["completion_length"].append(completion_length)
+        reward_per_func = self.accelerator.gather_for_metrics(rewards_per_func).mean(0)
+        for i, reward_func in enumerate(self.reward_funcs):
+            if isinstance(reward_func, PreTrainedModel):
+                reward_func_name = reward_func.config._name_or_path.split("/")[-1]
+            else:
+                reward_func_name = reward_func.__name__
+            self._metrics[f"rewards/{reward_func_name}"].append(reward_per_func[i].item())
+        self._metrics["reward"].append(self.accelerator.gather_for_metrics(rewards).mean().item())
+        self._metrics["reward_std"].append(self.accelerator.gather_for_metrics(std_grouped_rewards).mean().item())
+        # Log completions if enabled
+        if (
+            self.log_completions
+            and self.state.global_step % self.args.logging_steps == 0
+            and "wandb" in self.args.report_to
+        ):
+            timestamp = time.time()
+            num_items = len(gather_object(prompts_text))
+            table = {
+                "step": [f"{self.state.global_step}_{timestamp}"] * num_items,
+                "prompt": gather_object(prompts_text),
+                "completion": gather_object(completions_text),
+                "reward": rewards.tolist(),
+            }
+            df = pd.DataFrame(table)
+            if wandb.run is not None and self.accelerator.is_main_process:
+                wandb.log({f"completions_{self.state.global_step}_{timestamp}": wandb.Table(dataframe=df)})
+        return {
+            "prot_batch": prot_batch,
+            "prompt_batch": prompt_batch,
+            "completions_text": completions_text,
+            "old_per_token_logps": None,  # BLIP2 doesn't need this for current implementation
+            "ref_per_token_logps": None,  # BLIP2 doesn't need this for current implementation
+            "advantages": advantages,
+            "multimodal_inputs": {"prot_batch": prot_batch, "prompt_batch": prompt_batch}
+        }
+    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
+        if return_outputs:
+            raise ValueError("The BLIP2 GRPO Trainer does not support returning outputs")
+        print("compute_loss - index 1")
+        if self.state.global_step % self.num_iterations == 0:
+            inputs = self._generate_and_score_completions(inputs, model)
+            self._buffered_inputs[self._step % self.args.gradient_accumulation_steps] = inputs
+        else:
+            inputs = self._buffered_inputs[self._step % self.args.gradient_accumulation_steps]
+        self._step += 1
+        print("compute_loss - index 2")
+        # For BLIP2, we need to compute loss differently
+        # This is a simplified version - you may need to adapt based on your specific BLIP2 implementation
+        # Extract the necessary components
+        prot_batch = inputs.get("prot_batch")
+        prompt_batch = inputs.get("prompt_batch")
+        advantages = inputs.get("advantages")
+        print("compute_loss - index 3")
+        # Create a batch for BLIP2 forward pass
+        # This assumes your BLIP2 model expects (prot_batch, prompt_batch, text_dict) format
+        text_dict = {"targets": inputs.get("completions_text", [])}
+        batch = (prot_batch, prompt_batch, text_dict)
+        print("compute_loss - index 4")
+        # Forward pass through BLIP2
+        if hasattr(model, 'blip2'):
+            loss = model.blip2(batch)
+        else:
+            loss = model(batch)
+        print("compute_loss - index 5")
+        # For now, return the basic loss
+        # You may want to incorporate the advantages into the loss calculation
+        # based on your specific GRPO implementation needs
+        if advantages is not None:
+            # Apply advantages weighting (simplified)
+            advantage_weight = advantages.mean().item()
+            loss = loss * (1.0 + advantage_weight)
+        print("Computing final loss...")
+        return loss
+    def log(self, logs: dict[str, float], start_time: Optional[float] = None) -> None:
+        metrics = {key: sum(val) / len(val) for key, val in self._metrics.items()}
+        logs = {**logs, **metrics}
+        if version.parse(transformers.__version__) >= version.parse("4.47.0.dev0"):
+            super().log(logs, start_time)
+        else:
+            super().log(logs)
+        self._metrics.clear()
+    def _get_train_sampler(self) -> Sampler:
+        """Returns a sampler that ensures proper data sampling for GRPO training."""
+        effective_batch_size = (
+            self.args.per_device_train_batch_size
+            * self.accelerator.num_processes
+            * self.args.gradient_accumulation_steps
+        )
+        return RepeatRandomSampler(
+            data_source=self.train_dataset,
+            mini_repeat_count=self.num_generations,
+            batch_size=effective_batch_size // self.num_generations,
+            repeat_count=self.num_iterations,
+            seed=self.args.seed,
+        )
+    def _get_eval_sampler(self, eval_dataset) -> Sampler:
+        """Returns a sampler for evaluation."""
+        return RepeatRandomSampler(
+            data_source=eval_dataset,
+            mini_repeat_count=self.num_generations,
+            seed=self.args.seed,
+        )

BioReason-0813/blips_reason.py ADDED Viewed

	@@ -0,0 +1,866 @@

+import os
+import re
+import pathlib
+from argparse import ArgumentParser
+from typing import List, Dict, Optional
+from dataclasses import dataclass, field
+import torch
+from torch import nn
+import torch.nn.functional as F
+from torch.optim import AdamW
+from torch.utils.data import DataLoader, Dataset
+from transformers import get_cosine_schedule_with_warmup, AutoTokenizer
+from transformers import (
+    AutoTokenizer,
+    AutoModelForCausalLM,
+    AutoModelForMaskedLM,
+    AutoProcessor,
+)
+from datasets import load_dataset, DatasetDict
+from peft import get_peft_model, LoraConfig, prepare_model_for_kbit_training
+from transformers import BitsAndBytesConfig
+import pytorch_lightning as pl
+from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor
+from pytorch_lightning.loggers import WandbLogger
+from trl import GRPOConfig, GRPOTrainer, ModelConfig, ScriptArguments, TrlParser, get_peft_config
+# Import BLIP2 modules
+from model.blip2_stage2 import Blip2Stage2
+from blip2_dna_module import Blip2DNAModule
+from blip2_grpo_trainer import Blip2GRPOTrainer
+from bioreason.trainer import DNALLMGRPOConfig
+# Custom TrainerCallback to override the saving mechanism
+from transformers import TrainerCallback, TrainerState, TrainerControl
+from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
+from prompt_templates import prompt_templates
+class SaveWithPyTorchCallback(TrainerCallback):
+    """Custom callback to save models with PyTorch's native save mechanism instead of safetensors"""
+    def on_save(self, args, state, control, **kwargs):
+        # Get the checkpoint folder
+        checkpoint_folder = os.path.join(
+            args.output_dir, f"{PREFIX_CHECKPOINT_DIR}-{state.global_step}"
+        )
+        os.makedirs(checkpoint_folder, exist_ok=True)
+        # Save with PyTorch instead of safetensors
+        checkpoint_path = os.path.join(checkpoint_folder, "pytorch_model.bin")
+        model = kwargs.get("model")
+        # Get model unwrapped from accelerator etc.
+        unwrapped_model = model.module if hasattr(model, "module") else model
+        # Save using PyTorch directly
+        torch.save(unwrapped_model.state_dict(), checkpoint_path)
+        # For BLIP2, save the config from the LLM component
+        if hasattr(unwrapped_model, "blip2") and hasattr(unwrapped_model.blip2, "llm_model"):
+            if hasattr(unwrapped_model.blip2.llm_model, "config"):
+                unwrapped_model.blip2.llm_model.config.save_pretrained(checkpoint_folder)
+            elif hasattr(unwrapped_model.blip2.llm_model, "base_model") and hasattr(unwrapped_model.blip2.llm_model.base_model, "config"):
+                unwrapped_model.blip2.llm_model.base_model.config.save_pretrained(checkpoint_folder)
+        # Print info about what's being saved
+        print(f"Saved model checkpoint to {checkpoint_folder}")
+        lora_params = [k for k in unwrapped_model.state_dict().keys() if "lora" in k]
+        print(f"Checkpoint contains {len(lora_params)} LoRA parameters")
+        # Signal that we've saved
+        control.should_save = False
+        return control
+def extract_xml_answer(text: str) -> str:
+    """提取answer标签中的内容，如果没有则返回think标签后的内容"""
+    # 首先尝试提取answer标签
+    answer_match = re.search(r"<answer>(.*?)</answer>", text, re.DOTALL)
+    if answer_match:
+        return answer_match.group(1).strip()
+    # 如果没有answer标签，尝试提取think标签后的内容
+    think_split = text.split("</think>")
+    if len(think_split) > 1:
+        return think_split[-1].strip()
+    # 如果都没有，返回原文
+    return text.strip()
+def extract_classification_answer(text: str) -> str:
+    """专门用于提取分类答案的函数"""
+    # 提取answer标签中的内容
+    answer_match = re.search(r"<answer>(.*?)</answer>", text, re.DOTALL)
+    if answer_match:
+        answer_content = answer_match.group(1).strip()
+        # 查找分类相关的模式
+        classification_patterns = [
+            r"[Cc]lassification:\s*(\d+)",
+            r"[Cc]lass:\s*(\d+)",
+            r"[Ll]abel:\s*(\d+)",
+            r"[Pp]rediction:\s*(\d+)",
+            r"(\d+)",  # 任何数字
+        ]
+        for pattern in classification_patterns:
+            match = re.search(pattern, answer_content)
+            if match:
+                return match.group(1)
+        return answer_content
+    return extract_xml_answer(text)
+def extract_hash_answer(text: str) -> str | None:
+    if "####" not in text:
+        return None
+    return text.split("####")[1].strip()
+def get_kegg_questions() -> Dataset:
+    """保留原有的KEGG数据集加载函数作为fallback"""
+    try:
+        data = load_dataset('wanglab/kegg', 'default') # type: ignore
+        example_dna_sequences = ["ATCTACATGCAT", "CAGCAGCTACAG", "CATCACATCGACATCGAC"]
+        num_dna_sequences = 2
+        data = data.map(lambda x: { # type: ignore
+            'prompt': [
+                {
+                    'role': 'user',
+                    'content': [
+                        *({'type': 'dna', 'text': None} for _ in range(num_dna_sequences)),
+                        {'type': 'text', 'text': x['question']},
+                    ],
+                },
+            ],
+            'dna_sequences': [x['reference_sequence'], x['variant_sequence']],
+            'answer': x['answer'],
+        })  # type: ignore
+        return data
+    except Exception as e:
+        print(f"Failed to load KEGG dataset: {e}")
+        # 返回一个空的数据集结构
+        from datasets import Dataset
+        empty_data = {
+            'prompt': [],
+            'dna_sequences': [],
+            'answer': []
+        }
+        dataset = Dataset.from_dict(empty_data)
+        return {'train': dataset, 'val': dataset}
+def get_protein_classification_data(data_path: str = None, prompt_template: str = None) -> Dataset:
+    """
+    加载蛋白质分类数据集
+    数据格式：name,aa_seq,label,location,unique_id,pdb_hash
+    """
+    import pandas as pd
+    from datasets import Dataset
+    if data_path is None:
+        # 如果没有提供路径，使用默认的kegg数据集作为fallback
+        return get_kegg_questions()
+    # 读取CSV数据
+    if data_path.endswith('.csv'):
+        df = pd.read_csv(data_path)
+    else:
+        # 假设是其他格式，可以扩展
+        raise ValueError(f"Unsupported file format: {data_path}")
+    # 默认prompt模板
+    if prompt_template is None:
+        prompt_template = """
+Please analyze the following protein sequence and predict its classification.
+Protein sequence: <protein>{aa_seq}</protein>
+Question: What is the classification of this protein sequence?
+Please provide your reasoning in <think></think> tags and your final answer in <answer></answer> tags.
+"""
+    # 数据转换
+    def process_example(row):
+        # 构建prompt
+        prompt_text = prompt_template.format(
+            aa_seq=row['aa_seq'],
+            name=row.get('name', ''),
+            location=row.get('location', ''),
+            unique_id=row.get('unique_id', ''),
+        )
+        return {
+            'prompt': [
+                {
+                    'role': 'user',
+                    'content': [
+                        {'type': 'protein', 'text': None},  # 蛋白质序列占位符
+                        {'type': 'text', 'text': prompt_text},
+                    ],
+                },
+            ],
+            'dna_sequences': [row['aa_seq']],  # 使用aa_seq作为"dna_sequences"
+            'answer': str(row['label']),       # label作为答案
+            'metadata': {
+                'name': row.get('name', ''),
+                'location': row.get('location', ''),
+                'unique_id': row.get('unique_id', ''),
+                'pdb_hash': row.get('pdb_hash', ''),
+            }
+        }
+    # 转换所有数据
+    processed_data = []
+    for _, row in df.iterrows():
+        processed_data.append(process_example(row))
+    # 创建数据集
+    dataset = Dataset.from_list(processed_data)
+    # 划分训练集和验证集
+    if len(dataset) > 100:  # 如果数据足够大，进行划分
+        dataset = dataset.train_test_split(test_size=0.1, seed=42)
+    else:
+        # 数据较小时，复制训练集作为验证集
+        dataset = {
+            'train': dataset,
+            'val': dataset.select(range(min(10, len(dataset))))  # 选择前10个作为验证
+        }
+    return dataset
+def get_custom_protein_data_with_prompts(data_path: str = None,
+                                       prompt_templates: Dict[str, str] = None) -> Dataset:
+    """
+    更灵活的蛋白质数据加载函数，支持多种prompt模板
+    """
+    import pandas as pd
+    from datasets import Dataset
+    import random
+    if data_path is None:
+        return get_kegg_questions()
+    # 读取数据
+    df = pd.read_csv(data_path)
+    def process_example(row, template_name=None):
+        # 随机选择或指定template
+        if template_name is None:
+            template_name = random.choice(list(prompt_templates.keys()))
+        template = prompt_templates[template_name]
+        # 格式化prompt
+        prompt_text = template.format(
+            aa_seq=row['aa_seq'][:500] + "..." if len(row['aa_seq']) > 500 else row['aa_seq'],  # 截断长序列
+            label=row['label'],
+            name=row.get('name', ''),
+            location=row.get('location', ''),
+        )
+        return {
+            'prompt': [
+                {
+                    'role': 'user',
+                    'content': [
+                        {'type': 'protein', 'text': None},
+                        {'type': 'text', 'text': prompt_text.split('<protein>')[0]},  # prompt前半部分
+                    ],
+                },
+            ],
+            'dna_sequences': [row['aa_seq']],  # 完整序列用于模型处理
+            'answer': str(row['label']),
+            'template_used': template_name,
+            'metadata': {
+                'name': row.get('name', ''),
+                'location': row.get('location', ''),
+                'unique_id': row.get('unique_id', ''),
+                'pdb_hash': row.get('pdb_hash', ''),
+                'full_prompt': prompt_text,
+            }
+        }
+    # 处理数据
+    processed_data = []
+    print("template_name")
+    print(script_args.template_name)
+    for _, row in df.iterrows():
+        processed_data.append(process_example(row,script_args.template_name))
+    dataset = Dataset.from_list(processed_data)
+    # 数据集划分
+    if len(dataset) > 50:
+        dataset = dataset.train_test_split(test_size=0.1, seed=42)
+    else:
+        dataset = {
+            'train': dataset,
+            'val': dataset.select(range(min(5, len(dataset))))
+        }
+    return dataset
+def get_gsm8k_questions(question_prompt: str) -> Dataset:
+    data = load_dataset('openai/gsm8k', 'main') # type: ignore
+    example_dna_sequences = ["ATCTACATGCAT", "CAGCAGCTACAG", "CATCACATCGACATCGAC"]
+    data = data.map(lambda x: { # type: ignore
+        'prompt': [
+            {
+                'role': 'user',
+                'content': [
+                    *({'type': 'dna', 'text': None} for _ in range(len(example_dna_sequences))),
+                    {'type': 'text', 'text': 'Give me a short introduction to large language model.'}
+                ]
+            },
+        ],
+        'dna_sequences': [dna for dna in example_dna_sequences],
+        'answer': extract_hash_answer(x['answer']),
+    }) # type: ignore
+    return data # type: ignore
+# Reward functions
+def format_correct_reward_func(completions, **kwargs) -> list[float]:
+    """
+    奖励函数：检查格式是否正确
+    要求：包含 <think>...</think> 和 <answer>...</answer> 标签
+    """
+    responses = [completion[0]["content"] for completion in completions]
+    rewards = []
+    for response in responses:
+        score = 0.0
+        # 检查是否有think标签
+        if "<think>" in response and "</think>" in response:
+            score += 0.5
+        # 检查是否有answer标签
+        if "<answer>" in response and "</answer>" in response:
+            score += 0.5
+        # 检查标签的顺序是否正确
+        think_start = response.find("<think>")
+        think_end = response.find("</think>")
+        answer_start = response.find("<answer>")
+        answer_end = response.find("</answer>")
+        if (think_start != -1 and think_end != -1 and
+            answer_start != -1 and answer_end != -1 and
+            think_start < think_end < answer_start < answer_end):
+            score += 0.5  # 格式完全正确的额外奖励
+        rewards.append(score)
+    return rewards
+def accuracy_reward_func(prompts, completions, answer, **kwargs) -> list[float]:
+    """
+    奖励函数：检查答案准确率
+    适配蛋白质分类任务
+    """
+    responses = [completion[0]['content'] for completion in completions]
+    rewards = []
+    for i, response in enumerate(responses):
+        # 提取answer标签中的内容
+        answer_match = re.search(r"<answer>(.*?)</answer>", response, re.DOTALL)
+        if answer_match:
+            extracted_answer = answer_match.group(1).strip()
+        else:
+            extracted_answer = response.strip()
+        # 获取正确答案
+        if isinstance(answer, list) and len(answer) > i:
+            correct_answer = str(answer[i]).strip()
+        elif isinstance(answer, list) and len(answer) > 0:
+            correct_answer = str(answer[0]).strip()
+        else:
+            correct_answer = str(answer).strip()
+        # 计算准确率奖励
+        # 对于分类任务，检查数字或类别匹配
+        extracted_clean = re.sub(r'[^\w\d]', '', extracted_answer.lower())
+        correct_clean = re.sub(r'[^\w\d]', '', correct_answer.lower())
+        if correct_clean in extracted_clean or extracted_clean == correct_clean:
+            rewards.append(1.0)  # 完全匹配
+        elif any(word in extracted_clean for word in correct_clean.split()):
+            rewards.append(0.5)  # 部分匹配
+        else:
+            rewards.append(0.0)  # 不匹配
+    return rewards
+def classification_specific_reward_func(prompts, completions, answer, **kwargs) -> list[float]:
+    """
+    针对蛋白质分类任务的专门奖励函数
+    """
+    responses = [completion[0]['content'] for completion in completions]
+    rewards = []
+    for i, response in enumerate(responses):
+        score = 0.0
+        # 提取答案
+        answer_match = re.search(r"<answer>(.*?)</answer>", response, re.DOTALL)
+        if answer_match:
+            extracted_answer = answer_match.group(1).strip()
+        else:
+            extracted_answer = response.strip()
+        # 获取正确答案
+        if isinstance(answer, list) and len(answer) > i:
+            correct_answer = str(answer[i]).strip()
+        elif isinstance(answer, list) and len(answer) > 0:
+            correct_answer = str(answer[0]).strip()
+        else:
+            correct_answer = str(answer).strip()
+        # 检查是否包含分类关键词
+        classification_keywords = ['classification', 'class', 'category', 'type', 'function', 'family']
+        if any(keyword in extracted_answer.lower() for keyword in classification_keywords):
+            score += 0.2
+        # 检查数字匹配（对于数字标签）
+        if correct_answer.isdigit():
+            if correct_answer in extracted_answer:
+                score += 0.8
+            # 检查数字临近性
+            try:
+                extracted_numbers = re.findall(r'\d+', extracted_answer)
+                if extracted_numbers:
+                    closest_num = min(extracted_numbers, key=lambda x: abs(int(x) - int(correct_answer)))
+                    if abs(int(closest_num) - int(correct_answer)) <= 1:
+                        score += 0.4
+            except:
+                pass
+        else:
+            # 文本标签匹配
+            if correct_answer.lower() in extracted_answer.lower():
+                score += 0.8
+        # 检查是否有推理过程
+        if "<think>" in response and "</think>" in response:
+            think_content = re.search(r"<think>(.*?)</think>", response, re.DOTALL)
+            if think_content and len(think_content.group(1).strip()) > 20:
+                score += 0.2
+        rewards.append(min(score, 1.0))  # 确保不超过1.0
+    return rewards
+def repetition_penalty_reward_func(completions, **kwargs) -> list[float]:
+    """
+    奖励函数：检查重复率（越低越好）
+    计算文本中重复词汇的比例，重复率越低奖励越高
+    """
+    responses = [completion[0]["content"] for completion in completions]
+    rewards = []
+    for response in responses:
+        # 提取answer部分的文本
+        answer_match = re.search(r"<answer>(.*?)</answer>", response, re.DOTALL)
+        if answer_match:
+            text_to_analyze = answer_match.group(1).strip()
+        else:
+            text_to_analyze = response.strip()
+        # 分词并计算重复率
+        words = text_to_analyze.lower().split()
+        if len(words) == 0:
+            rewards.append(0.0)
+            continue
+        # 计算词汇重复率
+        unique_words = set(words)
+        repetition_rate = 1.0 - (len(unique_words) / len(words))
+        # 计算句子重复率
+        sentences = [s.strip() for s in text_to_analyze.split('.') if s.strip()]
+        if len(sentences) > 1:
+            unique_sentences = set(sentences)
+            sentence_repetition_rate = 1.0 - (len(unique_sentences) / len(sentences))
+        else:
+            sentence_repetition_rate = 0.0
+        # 综合重复率
+        overall_repetition = (repetition_rate + sentence_repetition_rate) / 2
+        # 重复率越低，奖励越高
+        reward = max(0.0, 1.0 - overall_repetition * 2)  # 乘以2让惩罚更明显
+        rewards.append(reward)
+    return rewards
+def combined_reward_func(prompts, completions, answer,
+                        format_weight=0.3, accuracy_weight=0.5, repetition_weight=0.2,
+                        **kwargs) -> list[float]:
+    """
+    组合奖励函数：格式+准确率+重复率的加权组合
+    """
+    format_rewards = format_correct_reward_func(completions, **kwargs)
+    accuracy_rewards = accuracy_reward_func(prompts, completions, answer, **kwargs)
+    repetition_rewards = repetition_penalty_reward_func(completions, **kwargs)
+    # 确保权重总和为1
+    total_weight = format_weight + accuracy_weight + repetition_weight
+    if total_weight != 1.0:
+        format_weight /= total_weight
+        accuracy_weight /= total_weight
+        repetition_weight /= total_weight
+        print(f"Normalized weights - Format: {format_weight:.3f}, Accuracy: {accuracy_weight:.3f}, Repetition: {repetition_weight:.3f}")
+    combined_rewards = []
+    for f_reward, a_reward, r_reward in zip(format_rewards, accuracy_rewards, repetition_rewards):
+        combined = (format_weight * f_reward +
+                   accuracy_weight * a_reward +
+                   repetition_weight * r_reward)
+        combined_rewards.append(combined)
+    return combined_rewards
+# 保留一些原有的奖励函数作为备选
+def less_than_4_reward_func(completions, **kwargs) -> list[float]:
+    responses = [completion[0]['content'] for completion in completions]
+    extracted_responses = [extract_xml_answer(r) for r in responses]
+    return [0.5 if len(r.split(' ')) <= 4 else 0.0 for r in extracted_responses]
+def strict_format_reward_func(completions, **kwargs) -> list[float]:
+    """Reward function that checks if the completion has a specific format."""
+    pattern = r"^<think>\n.*?\n</think>\n.*?\n$"
+    responses = [completion[0]["content"] for completion in completions]
+    matches = [re.match(pattern, r) for r in responses]
+    return [0.5 if match else 0.0 for match in matches]
+def xmlcount_reward_func(completions, **kwargs) -> list[float]:
+    contents = [completion[0]["content"] for completion in completions]
+    return [count_xml(c) for c in contents]
+def count_xml(text) -> float:
+    count = 0.0
+    if text.count("<think>\n") == 1:
+        count += 0.125
+    if text.count("\n</think>\n") == 1:
+        count += 0.125
+    return count
+@dataclass
+class Blip2ModelConfig(ModelConfig):
+    # BLIP2 specific configuration
+    model_name_or_path: str = field(default="blip2-model", metadata={"help": "Model checkpoint for weights initialization."})
+    # BLIP2 Architecture parameters
+    bert_name: str = field(default="/path/to/bert", metadata={"help": "BERT model for Q-former"})
+    num_query_token: int = field(default=8, metadata={"help": "Number of query tokens"})
+    cross_attention_freq: int = field(default=2, metadata={"help": "Cross attention frequency"})
+    plm_model: str = field(default="facebook/esm2_t30_150M_UR50D", metadata={"help": "Protein language model"})
+    plm_tune: str = field(default="freeze", metadata={"help": "PLM tuning strategy"})
+    llm_name: str = field(default="facebook/galactica-1.3b", metadata={"help": "Language model name"})
+    llm_tune: str = field(default="lora", metadata={"help": "LLM tuning strategy"})
+    qformer_tune: str = field(default="train", metadata={"help": "Q-former tuning strategy"})
+    peft_dir: str = field(default="", metadata={"help": "PEFT directory"})
+    # LoRA parameters
+    lora_r: int = field(default=8, metadata={"help": "LoRA rank"})
+    lora_alpha: int = field(default=16, metadata={"help": "LoRA alpha"})
+    lora_dropout: float = field(default=0.1, metadata={"help": "LoRA dropout"})
+    # Training parameters
+    enbale_gradient_checkpointing: bool = field(default=False, metadata={"help": "Enable gradient checkpointing"})
+    enable_flash: bool = field(default=False, metadata={"help": "Enable flash attention"})
+    # Other parameters
+    cache_dir: str = field(default=None, metadata={"help": "Path to model cache directory."})
+    sft_checkpoint: str = field(default=None, metadata={"help": "Path to the checkpoint for SFT."})
+    freeze_dna_modules: bool = field(default=False, metadata={"help": "Freeze DNA/protein modules"})
+@dataclass
+class GRPOScriptArguments(ScriptArguments):
+    """
+    Script arguments for the GRPO training script with BLIP2.
+    """
+    dataset_name: str = field(default="wanglab/kegg", metadata={"help": "Dataset name with default."})
+    data_file_paths: str = field(
+        default=None,
+        metadata={"help": "Path to protein classification CSV file (format: name,aa_seq,label,location,unique_id,pdb_hash)"},
+    )
+    arrow_cache_dir: str = field(
+        default=None,
+        metadata={"help": "Path to arrow cache directory"},
+    )
+    val_split_ratio: float = field(
+        default=0.1,
+        metadata={"help": "Ratio of validation split, default 0.1"},
+    )
+    reward_funcs: list[str] = field(
+        # 选项1：使用组合奖励函数（推荐）
+        default_factory=lambda: ["combined"],
+        # 选项2：使用分离的奖励函数
+        # default_factory=lambda: ["format_correct", "accuracy", "repetition_penalty"],
+        # 选项3：使用蛋白质分类专用奖励
+        # default_factory=lambda: ["format_correct", "classification_specific", "repetition_penalty"],
+        metadata={"help": "List of reward functions. Available: 'combined', 'format_correct', 'accuracy', 'classification_specific', 'repetition_penalty', 'xmlcount', 'strict_format', 'less_than_4'"},
+    )
+    # 奖励函数权重配置
+    format_weight: float = field(
+        default=0.3,
+        metadata={"help": "Weight for format correctness reward (used in combined reward)"}
+    )
+    accuracy_weight: float = field(
+        default=0.5,
+        metadata={"help": "Weight for accuracy reward (used in combined reward)"}
+    )
+    repetition_weight: float = field(
+        default=0.2,
+        metadata={"help": "Weight for repetition penalty reward (used in combined reward)"}
+    )
+    # 数据处理参数
+    template_name: str = field(
+        default="classification",
+        metadata={"help": "Prompt template to use: 'classification', 'function_prediction', 'location_prediction'"}
+    )
+    max_seq_length: int = field(
+        default=1000,
+        metadata={"help": "Maximum protein sequence length for display in prompt"}
+    )
+    use_custom_prompts: bool = field(
+        default=True,
+        metadata={"help": "Whether to use custom protein-specific prompts"}
+    )
+reward_funcs_registry = {
+    # 新的三合一奖励函数
+    "combined": combined_reward_func,          # 格式+准确率+重复率组合
+    # 分离的奖励函数
+    "format_correct": format_correct_reward_func,      # 格式正确性
+    "accuracy": accuracy_reward_func,                  # 准确率
+    "repetition_penalty": repetition_penalty_reward_func,  # 重复率惩罚
+    "classification_specific": classification_specific_reward_func,  # 蛋白质分类专用
+    # 原有的奖励函数（保留作为备选）
+    "xmlcount": xmlcount_reward_func,
+    "strict_format": strict_format_reward_func,
+    "less_than_4": less_than_4_reward_func,
+}
+def get_vlm_module(model_name_or_path):
+    # Always use BLIP2 module for this implementation
+    return Blip2DNAModule
+def create_blip2_args_from_config(model_args):
+    """Create BLIP2 args from model config"""
+    # Convert model config to the format expected by BLIP2
+    blip2_args = {
+        'bert_name': model_args.bert_name,
+        'num_query_token': model_args.num_query_token,
+        'cross_attention_freq': model_args.cross_attention_freq,
+        'plm_model': model_args.plm_model,
+        'plm_tune': model_args.plm_tune,
+        'llm_name': model_args.llm_name,
+        'llm_tune': model_args.llm_tune,
+        'qformer_tune': model_args.qformer_tune,
+        'peft_dir': model_args.peft_dir,
+        'lora_r': model_args.lora_r,
+        'lora_alpha': model_args.lora_alpha,
+        'lora_dropout': model_args.lora_dropout,
+        'enbale_gradient_checkpointing': model_args.enbale_gradient_checkpointing,
+        'enable_flash': model_args.enable_flash,
+    }
+    return blip2_args
+def _prep_for_training(model, training_args):
+    """
+    Prepare BLIP2 model for training with LoRA.
+    """
+    # The BLIP2 model should handle its own LoRA setup
+    # This is mainly for any additional preparation needed
+    target_modules = ["q_proj", "k_proj", "v_proj", "out_proj", "fc1", "fc2"]
+    lora_config = LoraConfig(
+        r=training_args.lora_r,
+        lora_alpha=training_args.lora_alpha,
+        lora_dropout=training_args.lora_dropout,
+        target_modules=target_modules,
+        init_lora_weights="gaussian",
+        bias="none",
+        task_type="CAUSAL_LM",
+    )
+    return lora_config
+def main(script_args, training_args, model_args):
+    print(training_args.output_dir)
+    torch.cuda.empty_cache()
+    torch.set_float32_matmul_precision("medium")
+    # Create BLIP2 model
+    blip2_args = create_blip2_args_from_config(model_args)
+    model = Blip2Stage2(blip2_args)
+    # Load checkpoint if specified
+    if model_args.sft_checkpoint is not None:
+        print(f"Loading SFT checkpoint from {model_args.sft_checkpoint}")
+        model = Blip2Stage2.load_from_checkpoint(model_args.sft_checkpoint, strict=False, args=blip2_args, map_location='cpu')
+        # if os.path.isdir(model_args.sft_checkpoint):
+        #     # Load Lightning checkpoint
+        #     checkpoint = torch.load(os.path.join(model_args.sft_checkpoint, "last.ckpt"), map_location='cpu')
+        #     model.load_state_dict(checkpoint['state_dict'], strict=False)
+        #     print("Loaded Lightning checkpoint")
+        # else:
+        #     # Load PyTorch state dict
+        #     checkpoint = torch.load(model_args.sft_checkpoint, map_location='cpu')
+        #     if "state_dict" in checkpoint:
+        #         state_dict = checkpoint["state_dict"]
+        #     else:
+        #         state_dict = checkpoint
+        #     # Remove module prefix if present
+        #     state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()}
+        #     result = model.load_state_dict(state_dict, strict=False)
+        #     print(f"Loaded checkpoint with {len(result.missing_keys)} missing keys and {len(result.unexpected_keys)} unexpected keys")
+    # Get reward functions with weights
+    reward_funcs = []
+    for func_name in script_args.reward_funcs:
+        if func_name == "combined":
+            # 为组合奖励函数传递权重参数
+            def weighted_combined_reward(prompts, completions, answer, **kwargs):
+                return combined_reward_func(
+                    prompts, completions, answer,
+                    format_weight=script_args.format_weight,
+                    accuracy_weight=script_args.accuracy_weight,
+                    repetition_weight=script_args.repetition_weight,
+                    **kwargs
+                )
+            reward_funcs.append(weighted_combined_reward)
+        else:
+            reward_funcs.append(reward_funcs_registry[func_name])
+    print("reward_funcs:", [func.__name__ if hasattr(func, '__name__') else 'weighted_combined_reward' for func in reward_funcs])
+    print(f"Reward weights - Format: {script_args.format_weight}, Accuracy: {script_args.accuracy_weight}, Repetition: {script_args.repetition_weight}")
+    vlm_module_cls = get_vlm_module(model_args.model_name_or_path)
+    print("using vlm module:", vlm_module_cls.__name__)
+    question_prompt = vlm_module_cls.get_question_template()
+    # Load dataset based on data source
+    if script_args.data_file_paths and script_args.use_custom_prompts:
+        print(f"Loading custom protein data from: {script_args.data_file_paths}")
+        dataset = get_custom_protein_data_with_prompts(
+            data_path=script_args.data_file_paths,
+            prompt_templates=prompt_templates,
+            template_name=script_args.template_name
+        )
+    elif script_args.data_file_paths:
+        print(f"Loading protein data from: {script_args.data_file_paths}")
+        dataset = get_protein_classification_data(
+            data_path=script_args.data_file_paths
+        )
+    else:
+        print("Using default KEGG dataset")
+        dataset = get_kegg_questions()
+    print("Dataset loaded:")
+    print(f"Train size: {len(dataset['train'])}")
+    print(f"Val size: {len(dataset.get('val', []))}")
+    # 打印数据样例
+    if len(dataset['train']) > 0:
+        print("\nSample data:")
+        sample = dataset['train'][0]
+        print(f"Prompt type: {type(sample.get('prompt', 'Unknown'))}")
+        print(f"DNA sequences count: {len(sample.get('dna_sequences', []))}")
+        print(f"Answer: {sample.get('answer', 'N/A')}")
+        if 'metadata' in sample:
+            print(f"Metadata: {sample['metadata']}")
+        print(f"First 100 chars of sequence: {sample.get('dna_sequences', [''])[0][:100]}...")
+    # Custom callback to handle saving with PyTorch's native mechanism
+    custom_save_callback = SaveWithPyTorchCallback()
+    # Initialize the BLIP2 GRPO trainer
+    trainer = Blip2GRPOTrainer(
+        model=model,
+        reward_funcs=reward_funcs,
+        args=training_args,
+        dna_module=vlm_module_cls(),
+        train_dataset=dataset['train'],
+        eval_dataset=dataset['val'] if training_args.eval_strategy != "no" else None,
+        peft_config=get_peft_config(model_args),
+        attn_implementation=getattr(model_args, 'attn_implementation', 'flash_attention_2'),
+        torch_dtype=getattr(model_args, 'torch_dtype', 'bfloat16'),
+        callbacks=[custom_save_callback],
+    )
+    # Set the trainer to save in PyTorch format instead of safetensors
+    training_args.save_safetensors = False
+    # Train the model
+    trainer.train()
+if __name__ == "__main__":
+    print(f"CUDA_VISIBLE_DEVICES: {os.environ.get('CUDA_VISIBLE_DEVICES')}")
+    parser = TrlParser((GRPOScriptArguments, DNALLMGRPOConfig, Blip2ModelConfig))
+    script_args, training_args, model_args = parser.parse_args_and_config()
+    # Ensure we use PyTorch's save mechanism instead of safetensors
+    training_args.save_safetensors = False
+    main(script_args, training_args, model_args)
+# 使用示例:
+"""
+使用你的蛋白质数据进行训练:
+1. 准备CSV文件，格式：name,aa_seq,label,location,unique_id,pdb_hash
+2. 运行训练：
+python blip2_reason.py \
+    --data_file_paths /path/to/your/protein_data.csv \
+    --reward_funcs combined \
+    --format_weight 0.2 \
+    --accuracy_weight 0.6 \
+    --repetition_weight 0.2 \
+    --use_custom_prompts \
+    --prompt_template classification \
+    --max_seq_length 1000 \
+    --output_dir ./output \
+    --per_device_train_batch_size 4 \
+    --num_train_epochs 3 \
+    --learning_rate 1e-5
+3. 或者使用分离的奖励函数：
+python blip2_reason.py \
+    --data_file_paths /path/to/your/protein_data.csv \
+    --reward_funcs format_correct classification_specific repetition_penalty \
+    --use_custom_prompts \
+    --prompt_template function_prediction
+数据格式示例：
+P0DM40,MLRVVVESASINPPLSTTPKAFVTVYFRDMMKRTRVEEGHDPIWNETLIWHLWNQPLENDSFLKVILQDSVSKKKERFIGLATVPLKRLAQRPKEVMFVRDLILLNHSMKPTNCTVTLHVAQIYDQDTEMTGNEELLGSTVNEVTQKKLMVSGLPMHRALASKPQHFQVRVKVFEARQLLGNNIKPVVKVNIADQQHLTRIKMGNNPFFNEIFFQNFHEVPAKFFEENISIEVVDSAASRSKAEIGRFQTDIGFIYHSPGHTLLRKWLGLCQRNKTTSGVRGYLKVTICALGVGDQALVDQKLPYEQNTRVQIFKSKEVPVSLAYLQFFIYCAEDLHFGTHKSATPVLEVELIGDKLRTKPQNPSDNPIWNQILTFQIQLPCLSSYIKFRVMDCSKYKCQDEIGSASLCLSQISSTGEEIQGMYSGFLPCFGPSFLTLRGGKKPPFRTSEEGTCIMDAVQHGLAYRGRIFVEIVTKIKSQQDSVMKDLSQEVTQVEMQYYRQKYGLCVIFLSCTMMPKFKDLIQFEVSMGHYGNKTDPNYKPLVSTTQYSPVIYDGTTYHYVPWYNTKPVVAVTSNWEDVGFRMNCLNLLHITRDRLKTNLDILKSIRNPRDPALLQQWEKLLKELQEDCRRPLPCMTDQPRANSLDRNKWQLRSQLLQQLAQMAKEAKPVNMVGTAKEWLHRLNAVIPEPQESLPDVLIWLMSRQQRVAYARVPAHTVLFSPAGPLSSGKFCGKIQNILLQYPEGEGQDTFPASLRVCMWLGNVKYSKNLKLLQQGSMVVYAETYENQAKTRDDWGQQGLYHCPNFSDVMGRKALPKTDFKAPPGWHWKDDWVVEPQRRLLLDIDINKSQVLEEVYENQLRNATGAWVPAAIPNTDVNGQPVEALENVKCPQGWHFKKNWIVKLNHAVDSEGWEYGVGIPPSGLPQIWNSVEKTYHSCRRRRWVRVRFRNHKELGQERSQEQETLSFLQMQDLSEEGKEGWEYGTFDSRFHLDPQPTSRFRRRCWHRQLAPNKDRGVASIFLLEGSLAVEQKDQPRKEMEKTRSWQPWKDLRHTPEDPRIPTTPFIYYILNKPHYYQLFCYIYQARNLMYNQILTFQEPFIQVVFLNHSLCTQTLRSSAAPTWSQSIIFQHLLLFEDPKDTRENPPLVVLELWQHDSRGNKILWGRSMWPPVVWLGLQDWVFTPLRWHPLVRELGEEEGEILASCELILETQKLKELHPPILSIPCKDGIYLLPKNIQPTMKMMAIEIMAWGLRNMTKVRYPQLLLECGGESLKTEPISNFQENPNFPTSTFFFTVFMPLEETHAQPLVVKVVDNQEYGQQIVVGQANIDFLQPYFCDPWSLNYTTVKLPTLSVKKPDTFLDFVYKKFWFDSSKDEEVYEEEVDWWSKLFWATGDADKSLNYNHKSYHTLKVYDCELEAVLTFKGLQDFCQTFKLYQEKPKVDSPVVGEFKGLFRIYPFPEDPEAPKPPRQFSAWPEIEDFPQMCLVRVYLIRAINLQPQDYNGLCDPYVILKLGQTKLGSRDSYYPNTLDPIFGMMYELTCNIPLEKDLEIQLFDFDLITADDEIGSTVIDLENRLLSGFGARCGLSKSYCKSGPFKWRDQMTPSYLLYRYAKQKGLPPPVFDLEGDSLYYNGETFKLQSFESAPPTYKHLGPKKERLALYILNTQGLVPEHVETRTLHSNSQPGIDQGKIQMWVDIFPKMLGPPGPQVNISPRKPKRYQLRCIIWSTAEVDLVQETFSKEKMSDIYVKGWLFGLEEDTQKTDVHYHSLTGEATFNWRFIFTMDYLTTERACVQSQKDYIWSLDPTSTKFPARLMIQIWDNDFFSPDDFLGVLELDLSDMPLPAQNIKQCSLKMMETDSKWPFTPQKRISLFKKTNVTGWWPCQVLDGDKWRLSGKVKMTLEMLSEREALIRPAGRGQSEPNQFPMLHPPERNDSFLLWYQSPIKNFCYAVCKRYRSKIICLVVTLVIGFILLNFVYSAPSYFAMNWIKPQLRLSSPIKIVNLIGTVNTSNINSSILTMEGSTYHASHVFPEAPAP,0,M,af67d99c09f74ea8af5004cc2906bbc5,d55cbc3d94bd9668d97a678b4a04176a
+"""

BioReason-0813/model/__pycache__/blip2.cpython-310.pyc ADDED Viewed

Binary file (3.17 kB). View file

BioReason-0813/model/__pycache__/blip2_opt.cpython-310.pyc ADDED Viewed

Binary file (9.75 kB). View file

BioReason-0813/model/__pycache__/blip2_opt.cpython-311.pyc ADDED Viewed

Binary file (18.1 kB). View file

BioReason-0813/model/__pycache__/blip2_stage2.cpython-310.pyc ADDED Viewed

Binary file (14.1 kB). View file

BioReason-0813/model/__pycache__/blip2_stage2.cpython-311.pyc ADDED Viewed

Binary file (28.2 kB). View file

BioReason-0813/model/__pycache__/help_funcs.cpython-310.pyc ADDED Viewed

Binary file (3.97 kB). View file

BioReason-0813/model/blip2.py ADDED Viewed

	@@ -0,0 +1,126 @@

+"""
+ Copyright (c) 2023, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+import torch
+import torch.nn as nn
+from lavis.models.base_model import BaseModel
+from lavis.models.blip2_models.Qformer import BertConfig, BertLMHeadModel
+from transformers import BertTokenizer, BitsAndBytesConfig
+from transformers import EsmTokenizer, EsmModel
+import os
+from pathlib import Path  # 添加到文件顶部
+def get_gpu_memory(device=0):
+    # t = torch.cuda.get_device_properties(device).total_memory
+    # r = torch.cuda.memory_reserved(device)
+    # a = torch.cuda.memory_allocated(device)
+    # f = r-a  # free inside reserved
+    free, total = torch.cuda.mem_get_info(device)
+    free = free / (1024 ** 3)
+    total = total / (1024 ** 3)
+    return free, total-free, total
+class Blip2Base(BaseModel):
+    # @classmethod
+    # def init_tokenizer(cls):
+    #     tokenizer = BertTokenizer.from_pretrained('./bert_pretrained/')
+    #     tokenizer.add_special_tokens({"bos_token": "[DEC]"})
+    #     return tokenizer
+    @classmethod
+    def init_Qformer(cls, model_name, num_query_token, plm_width, cross_attention_freq=2):
+        # assert model_name == 'microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract'
+        # print("bert load microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext")
+        print(f"Loading Qformer from: {model_name}")
+        # 修改2：添加本地路径检查逻辑
+        if not model_name.startswith('microsoft/') and Path(model_name).exists():
+            print("Loading from local path...")
+        else:
+            print("Loading from Hugging Face Hub...")
+        encoder_config = BertConfig.from_pretrained(model_name)
+        encoder_config.encoder_width = plm_width
+        # insert cross-attention layer every other block
+        encoder_config.add_cross_attention = True
+        encoder_config.cross_attention_freq = cross_attention_freq
+        encoder_config.query_length = num_query_token
+        Qformer = BertLMHeadModel.from_pretrained(model_name, config=encoder_config)
+        query_tokens = nn.Parameter(
+            torch.zeros(1, num_query_token, encoder_config.hidden_size)
+        )
+        query_tokens.data.normal_(mean=0.0, std=encoder_config.initializer_range)
+        tokenizer = BertTokenizer.from_pretrained(model_name)
+        tokenizer.add_special_tokens({"bos_token": "[DEC]"})
+        return tokenizer, Qformer, query_tokens
+    def init_protein_encoder(self, plm_name, load_4bit=False):
+        # assert plm_name.startswith('facebook/esm2')
+        # plm_tokenizer = EsmTokenizer.from_pretrained(plm_name)
+         # 检查是否为本地路径（判断是否存在文件夹或文件）
+        if os.path.isdir(plm_name) or os.path.exists(os.path.join(plm_name, "config.json")):
+            print(f"Loading local PLM from {plm_name}")
+            plm_tokenizer = EsmTokenizer.from_pretrained(plm_name)
+        else:
+            # 保留远程加载逻辑（可选）
+            print(f"Loading remote PLM from {plm_name}")
+            plm_tokenizer = EsmTokenizer.from_pretrained(plm_name)
+        if not load_4bit:
+            plm = EsmModel.from_pretrained(plm_name, add_pooling_layer=False, torch_dtype=torch.bfloat16)
+        else:
+            quant_config = BitsAndBytesConfig(
+                load_in_4bit=True,
+                load_in_8bit=False,
+                llm_int8_threshold=6.0,
+                llm_int8_has_fp16_weight=False,
+                bnb_4bit_compute_dtype=torch.bfloat16,
+                bnb_4bit_use_double_quant=True,
+                bnb_4bit_quant_type='nf4',
+            )
+            ## give a device map that assign all layers to device 0
+            outputs = get_gpu_memory(6)
+            used_memory = outputs[1]
+            if used_memory > 1:
+                device_map = {"": 7}
+            else:
+                device_map = {"": 6}
+            plm = EsmModel.from_pretrained(
+                plm_name,
+                add_pooling_layer=False,
+                quantization_config=quant_config,
+                load_in_4bit=True,
+                load_in_8bit=False,
+                device_map=device_map,
+                torch_dtype=torch.bfloat16,
+            )
+        plm.num_features = plm.config.hidden_size
+        ln_layer = nn.LayerNorm(plm.num_features)
+        return plm_tokenizer, plm, ln_layer
+def disabled_train(self, mode=True):
+    """Overwrite model.train with this function to make sure train/eval mode
+    does not change anymore."""
+    return self
+# class LayerNorm(nn.LayerNorm):
+#     """Subclass torch's LayerNorm to handle fp16."""
+#     def forward(self, x: torch.Tensor):
+#         orig_type = x.dtype
+#         ret = super().forward(x.type(torch.float32))
+#         return ret.type(orig_type)

BioReason-0813/model/blip2_opt.py ADDED Viewed

	@@ -0,0 +1,550 @@

+"""
+ Copyright (c) 2023, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+import logging
+import torch
+import torch.nn as nn
+from torch.cuda.amp import autocast as autocast
+# from peft import get_peft_config, get_peft_model, get_peft_model_state_dict, LoraConfig, TaskType, PeftModel
+from lavis.models.blip2_models.blip2 import disabled_train
+from model.blip2 import Blip2Base
+from transformers import AutoTokenizer
+from transformers import OPTForCausalLM
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from opendelta import LoraModel
+from opendelta.delta_models.lora import LoraConfig as DeltaLoraConfig
+from transformers import BertTokenizer, BitsAndBytesConfig
+from model.help_funcs import hf_enable_gradient_checkpointing
+import json
+# from accelerate import Accelerator
+# import torch.distributed as dist
+# from peft.tuners.lora import LoraLayer
+# from peft import (
+#     prepare_model_for_kbit_training,
+#     LoraConfig as PeftLoraConfig,
+#     get_peft_model,
+#     PeftModel
+# )
+# from opendelta.delta_configs
+opt_model_list = [
+    "facebook/galactica-125m",
+    "facebook/galactica-1.3b",
+    "facebook/galactica-6.7b",
+    "facebook/galactica-30b",
+]
+def get_gpu_memory(device=0):
+    # t = torch.cuda.get_device_properties(device).total_memory
+    # r = torch.cuda.memory_reserved(device)
+    # a = torch.cuda.memory_allocated(device)
+    # f = r-a  # free inside reserved
+    free, total = torch.cuda.mem_get_info(device)
+    free = free / (1024 ** 3)
+    total = total / (1024 ** 3)
+    return free, total-free, total
+def mask_by_len(input, lens, fill_value=0):
+    '''
+    input: shape = [N, D]
+    lens: shape = [N]
+    '''
+    mask = torch.arange(input.shape[1], device=input.device).reshape(1, -1)
+    mask = mask < lens.reshape(-1, 1)
+    input[mask] = fill_value
+    return input
+class Blip2OPT(Blip2Base):
+    """
+    BLIP2 first-stage model with Q-former and ViT.
+    Supported model types:
+        - pretrained: pretrained model with vit-g
+        - pretrain_vitL: pretrained model with vit-large
+        - coco: fintuned model on coco
+    Usage:
+        >>> from lavis.models import load_model
+        >>> model = load_model("blip2", "pretrain")
+    """
+    def __init__(
+        self,
+        bert_name,
+        num_query_token=32,
+        cross_attention_freq=2,
+        plm_model="facebook/esm2_t30_150M_UR50D",
+        plm_tune='freeze',
+        llm_name="facebook/galactica-1.3b",
+        llm_tune='freeze',
+        qformer_tune='train',
+        peft_dir='',
+        args=None,
+    ):
+        super().__init__()
+        self.args = args
+        self.enbale_gradient_checkpointing = args.enbale_gradient_checkpointing
+        self.plm_tokenizer, self.plm, self.ln_layer = self.init_protein_encoder(plm_model)
+        self.plm_tune = plm_tune
+        if plm_tune == 'freeze':
+            for name, param in self.plm.named_parameters():
+                param.requires_grad = False
+            self.plm = self.plm.eval()
+            self.plm.train = disabled_train
+            logging.info("freeze plm encoder")
+        elif plm_tune == 'lora':
+            lora_config = DeltaLoraConfig(args.lora_r,
+                                          args.lora_alpha,
+                                          args.lora_dropout,
+                                          modified_modules=["query", "value"])
+            self.delta = LoraModel.from_config(lora_config, self.plm)
+            self.delta.freeze_module(set_state_dict=False)
+            self.delta.log()
+        else:
+            raise NotImplementedError()
+        self.num_query_token = num_query_token
+        self.qformer_tokenizer, self.Qformer, self.query_tokens = self.init_Qformer(bert_name, num_query_token, self.plm.num_features, cross_attention_freq)
+        ### remove the unused parameters
+        self.Qformer.cls = None
+        self.Qformer.bert.embeddings.word_embeddings = None
+        self.Qformer.bert.embeddings.position_embeddings = None
+        for layer in self.Qformer.bert.encoder.layer:
+            layer.output = None
+            layer.intermediate = None
+        # === 3. 控制 Qformer 是否冻结 ===
+        self.qformer_tune = qformer_tune
+        if self.qformer_tune == 'freeze':
+            for name, param in self.Qformer.named_parameters():
+                param.requires_grad = False
+            self.Qformer = self.Qformer.eval()
+            self.Qformer.train = disabled_train
+            logging.info("freeze Qformer encoder")
+        elif self.qformer_tune == 'train':
+            logging.info("train Qformer encoder")
+        else:
+            raise NotImplementedError(f"Unsupported qformer_tune mode: {self.qformer_tune}")
+        ## initialize llm model
+        # self.init_distributed()
+        self.llm_model, self.llm_tokenizer = self.load_llm(llm_name)
+        #self.llm_model, self.llm_tokenizer = self.load_model_on_single_gpu(llm_name)
+        self.eos_token_id = self.llm_tokenizer.eos_token_id
+        self.pad_token_id = self.llm_tokenizer.pad_token_id
+        if llm_tune == 'freeze':
+            for name, param in self.llm_model.named_parameters():
+                param.requires_grad = False
+        elif llm_tune == 'full':
+            for name, param in self.llm_model.named_parameters():
+                param.requires_grad = True
+        elif llm_tune == 'lora':
+            lora_config = DeltaLoraConfig(args.lora_r,
+                                          args.lora_alpha,
+                                          args.lora_dropout,)
+            self.delta = LoraModel.from_config(lora_config, self.llm_model)
+            self.delta.freeze_module(set_state_dict=False)
+            self.delta.log()
+        elif llm_tune == 'mid_lora':
+            print("================")
+            print("加载了小lora")
+            print("=================")
+            lora_config = DeltaLoraConfig(args.lora_r, args.lora_alpha, args.lora_dropout, modified_modules=["q_proj", "v_proj", 'k_proj', "out_proj", "fc1", "fc2"])
+            self.delta = LoraModel.from_config(lora_config, self.llm_model)
+            self.delta.freeze_module(set_state_dict=False)
+            self.delta.log()
+        elif llm_tune == 'peft_lora':
+            config = PeftLoraConfig(
+                r=args.lora_r,
+                lora_alpha=args.lora_alpha,
+                # target_modules=modules,
+                lora_dropout=args.lora_dropout,
+                bias="none",
+                task_type="CAUSAL_LM",
+            )
+            self.llm_model = get_peft_model(self.llm_model, config)
+            for name, module in self.llm_model.named_modules():
+                if isinstance(module, LoraLayer):
+                    if True:
+                        module = module.to(torch.bfloat16)
+                if 'norm' in name:
+                    module = module.to(torch.float32)
+                if 'lm_head' in name or 'embed_tokens' in name:
+                    if hasattr(module, 'weight'):
+                        if True and module.weight.dtype == torch.float32:
+                            module = module.to(torch.bfloat16)
+        else:
+            raise NotImplementedError()
+        ## fixme: this is different from the original BLIP2
+        # self.eos_token_id = self.llm_tokenizer(
+        #     "\n", add_special_tokens=False
+        # ).input_ids[0]
+        self.opt_proj = nn.Linear(self.Qformer.config.hidden_size, self.llm_model.config.hidden_size)
+    def load_llm(self, llm_model, load_4bit=False, enable_gradient_checkpointing=True):
+        llm_tokenizer = AutoTokenizer.from_pretrained(llm_model, use_fast=False, padding_side='right')
+        llm_tokenizer.add_special_tokens({'pad_token': '<pad>'})
+        special_tokens_dict = {'additional_special_tokens': ['<PROT>', '<TEXT>']}
+        llm_tokenizer.add_special_tokens(special_tokens_dict)
+        llm_model = AutoModelForCausalLM.from_pretrained(llm_model, torch_dtype=torch.bfloat16)
+        llm_model.resize_token_embeddings(len(llm_tokenizer)) ## this will cause bug when
+        return llm_model, llm_tokenizer
+    # def forward(self, batch):
+    #     prot_batch, text_batch = batch
+    #     prot_embeds = self.plm(**prot_batch, return_dict=True)
+    #     prot_embeds = prot_embeds.last_hidden_state
+    #     if self.plm_tune == 'freeze':
+    #         prot_embeds = prot_embeds.detach()
+    #     prot_embeds = self.ln_layer(prot_embeds)
+    #     device = prot_embeds.device
+    #     query_tokens = self.query_tokens.expand(prot_embeds.shape[0], -1, -1)
+    #     query_output = self.Qformer.bert(
+    #         query_embeds=query_tokens,
+    #         encoder_hidden_states=prot_embeds,
+    #         encoder_attention_mask=prot_batch.attention_mask,
+    #         return_dict=True,
+    #     )
+    #     prot_tokens = self.opt_proj(query_output.last_hidden_state)
+    #     prot_mask = torch.ones(prot_tokens.shape[:2], dtype=text_batch.attention_mask.dtype, device=device)
+    #     prot_empty_targets = torch.ones(prot_tokens.shape[:2], dtype=torch.long, device=device).fill_(-100)
+    #     targets = text_batch.input_ids.masked_fill(text_batch.input_ids == self.llm_tokenizer.pad_token_id, -100)
+    #     targets = targets.masked_fill(text_batch.token_type_ids == 0, -100)
+    #     targets = torch.cat([prot_empty_targets, targets], dim=1)
+    #     inputs_embeds = self.llm_model.get_input_embeddings()(text_batch.input_ids)
+    #     inputs_embeds = torch.cat((prot_tokens, inputs_embeds), dim=1)
+    #     attention_mask = torch.cat([prot_mask, text_batch.attention_mask], dim=1)
+    #     outputs = self.llm_model(
+    #         inputs_embeds=inputs_embeds,
+    #         attention_mask=attention_mask,
+    #         return_dict=True,
+    #         labels=targets,
+    #     )
+    #     loss = outputs.loss
+    #     return loss
+    def forward(self, batch):
+        prot_batch, prompt_batch, text_dict = batch
+        text_seqs = text_dict['targets']
+        batch_size = prompt_batch['input_ids'].size(0)
+        # print("{{{{{}}}}}")
+        # print(batch_size)
+        prot_embeds = self.plm(**prot_batch, return_dict=True)
+        prot_embeds = prot_embeds.last_hidden_state
+        if self.plm_tune == 'freeze':
+            prot_embeds = prot_embeds.detach()
+        prot_embeds = self.ln_layer(prot_embeds)
+        device = prot_embeds.device
+        query_tokens = self.query_tokens.expand(prot_embeds.shape[0], -1, -1)
+        query_output = self.Qformer.bert(
+            query_embeds=query_tokens,
+            encoder_hidden_states=prot_embeds,
+            encoder_attention_mask=prot_batch.attention_mask,
+            return_dict=True,
+        )
+        prot_tokens = self.opt_proj(query_output.last_hidden_state)
+        prot_mask = torch.ones(prot_tokens.shape[:2], dtype=torch.long, device=device)
+        # === Step 3: 编码 prompt 输入 ===
+        prompt_embeds = self.llm_model.get_input_embeddings()(prompt_batch.input_ids)  # [B, L_prompt, D_llm]
+        prompt_mask = prompt_batch['attention_mask']
+        text_batch = self.llm_tokenizer(
+            list(text_seqs),
+            padding='longest',
+            truncation=True,
+            max_length=1024,
+            return_tensors='pt'
+        ).to(device)
+        target_embeds = self.llm_model.get_input_embeddings()(text_batch['input_ids'])  # [B, T, D]
+        target_mask = text_batch['attention_mask']
+        targets = text_batch['input_ids'].masked_fill(text_batch['input_ids'] == self.llm_tokenizer.pad_token_id, -100)
+        # === : 加入 ChatML 特殊 token embedding ===
+        embedding_layer = self.llm_model.get_input_embeddings()
+        def embed_special_str(token_str):
+            # 先 tokenize，得到一系列 ID
+            ids = self.llm_tokenizer(token_str, add_special_tokens=False).input_ids
+            # 把它变成 [1, N] tensor
+            ids_tensor = torch.tensor([ids], device=device)
+            # 查 embedding 层：
+            embs = embedding_layer(ids_tensor)  # shape [1, N, D]
+            # Expand 到 batch 大小
+            return embs.expand(batch_size, -1, -1)
+        # 示例
+        embed_im_start = embed_special_str("<|im_start|>user\n protein sequence is:<protein>")   # 可能对应多个 sub-tokens
+        embed_protein = embed_special_str("</protein>")
+        embed_im_end   = embed_special_str("<|im_end|>\n")
+        embed_assistant= embed_special_str("<|im_start|>assistant\n")
+        user_embeds = torch.cat([embed_im_start, prot_tokens , embed_protein, prompt_embeds,embed_im_end, embed_assistant], dim=1)
+        user_mask = torch.ones(user_embeds.shape[:2], dtype=torch.long, device=device)
+        assistant_embeds = target_embeds
+        assistant_mask = target_mask
+        inputs_embeds = torch.cat([user_embeds, assistant_embeds], dim=1)
+        attention_mask = torch.cat([user_mask, assistant_mask], dim=1)
+        # === Step 6: 构造 labels，只监督 assistant 部分 ===
+        ignore_labels = torch.full(user_embeds.shape[:2], -100, dtype=torch.long, device=device)
+        assistant_labels = targets
+        labels = torch.cat([ignore_labels, assistant_labels], dim=1)
+        # print("embed_im_start:", embed_im_start.shape)
+        # print("prompt_embeds:", prompt_embeds.shape)
+        # print("prot_tokens:", prot_tokens.shape)
+        # print("embed_im_end:", embed_im_end.shape)
+        # print("embed_assistant:", embed_assistant.shape)
+        # print("target_embeds:", target_embeds.shape)
+        # print("labels:", labels.shape)
+        # print("inputs_embeds:", inputs_embeds.shape)
+        #============================
+        # inputs_embeds = torch.cat([prot_tokens, prompt_embeds, target_embeds], dim=1)
+        # attention_mask = torch.cat([prot_mask, prompt_mask, target_mask], dim=1)
+        # # === Step 7: 构造 labels，只监督 target 部分 ===
+        # prot_label_pad = torch.full(prot_tokens.shape[:2], -100, dtype=torch.long, device=device)
+        # prompt_label_pad = torch.full(prompt_mask.shape, -100, dtype=torch.long, device=device)
+        # labels = torch.cat([prot_label_pad, prompt_label_pad, targets], dim=1)
+        # === Step 8: 送入 LLM ===
+        outputs = self.llm_model(
+            inputs_embeds=inputs_embeds,
+            attention_mask=attention_mask,
+            labels=labels,
+            return_dict=True,
+        )
+        loss = outputs.loss
+        # prot_mask = torch.ones(prot_tokens.shape[:2], dtype=text_batch.attention_mask.dtype, device=device)
+        # prot_empty_targets = torch.ones(prot_tokens.shape[:2], dtype=torch.long, device=device).fill_(-100)
+        # empty_targets = torch.ones(prompt_batch.attention_mask.shape, dtype=torch.long, device=device).fill_(-100)
+        # targets = text_batch.input_ids.masked_fill(text_batch.input_ids == self.llm_tokenizer.pad_token_id, -100)
+        # targets = torch.cat([prot_empty_targets, empty_targets, targets], dim=1)
+        # prompt_embeds = self.llm_model.get_input_embeddings()(prompt_batch.input_ids)
+        # inputs_embeds = self.llm_model.get_input_embeddings()(text_batch.input_ids)
+        # inputs_embeds = torch.cat((prot_tokens, prompt_embeds, inputs_embeds), dim=1)
+        # attention_mask = torch.cat([prot_mask, prompt_batch.attention_mask, text_batch.attention_mask], dim=1)
+        # outputs = self.llm_model(
+        #     inputs_embeds=inputs_embeds,
+        #     attention_mask=attention_mask,
+        #     return_dict=True,
+        #     labels=targets,
+        # )
+        # loss = outputs.loss
+        return loss
+    # def forwardv2(self, batch):
+    #     prot_batch, prompt_batch, text_batch = batch
+    #     prot_embeds = self.plm(**prot_batch, return_dict=True)
+    #     prot_embeds = prot_embeds.last_hidden_state
+    #     if self.plm_tune == 'freeze':
+    #         prot_embeds = prot_embeds.detach()
+    #     prot_embeds = self.ln_layer(prot_embeds)
+    #     device = prot_embeds.device
+    #     query_tokens = self.query_tokens.expand(prot_embeds.shape[0], -1, -1)
+    #     query_output = self.Qformer.bert(
+    #         query_embeds=query_tokens,
+    #         encoder_hidden_states=prot_embeds,
+    #         encoder_attention_mask=prot_batch.attention_mask,
+    #         return_dict=True,
+    #     )
+    #     prot_tokens = self.opt_proj(query_output.last_hidden_state)
+    #     prot_mask = torch.ones(prot_tokens.shape[:2], dtype=text_batch.attention_mask.dtype, device=device)
+    #     targets = text_batch.input_ids.masked_fill(text_batch.input_ids == self.llm_tokenizer.pad_token_id, -100)
+    #     ### forward prefix
+    #     prompt_embeds = self.llm_model.get_input_embeddings()(prompt_batch.input_ids)
+    #     prefix_embeds = torch.cat([prot_tokens, prompt_embeds], dim=1)
+    #     prefix_mask = torch.cat([prot_mask, prompt_batch.attention_mask], dim=1)
+    #     prefix_output = self.llm_model.model(
+    #         inputs_embeds=prefix_embeds,
+    #         attention_mask=prefix_mask,
+    #         use_cache=True,
+    #         return_dict=True,
+    #     )
+    #     ## forward decoding
+    #     if False:
+    #         attention_mask = torch.cat([prot_mask, prompt_batch.attention_mask, text_batch.attention_mask], dim=1)
+    #     else:
+    #         attention_mask = text_batch.attention_mask
+    #     print(prefix_output.past_key_values)
+    #     outputs = self.llm_model(
+    #         input_ids=text_batch.input_ids,
+    #         attention_mask=attention_mask,
+    #         past_key_values=prefix_output.past_key_values,
+    #         return_dict=True,
+    #         labels=targets,
+    #     )
+    #     loss = outputs.loss
+    #     return loss
+    @torch.no_grad()
+    def generate(
+        self,
+        samples,
+        do_sample=False,
+        num_beams=5,
+        max_length=128,
+        min_length=1,
+        top_p=0.9,
+        repetition_penalty=1.0,
+        length_penalty=1.0,
+        num_captions=1,
+        temperature=1,
+    ):
+        """
+        Args:
+            samples (dict): A dictionary containing the following keys:
+                - image (torch.Tensor): A tensor of shape (batch_size, 3, H, W)
+            num_beams (int): Number of beams for beam search. 1 means no beam search.
+            max_length (int): The maximum length of the sequence to be generated.
+            min_length (int): The minimum length of the sequence to be generated.
+            top_p (float): The cumulative probability for nucleus sampling.
+            repetition_penalty (float): The parameter for repetition penalty. 1.0 means no penalty.
+            num_captions (int): Number of captions to be generated for each image.
+        Returns:
+            captions (list): A list of strings of length batch_size * num_captions.
+        """
+        # prot_batch = samples['prot_batch']
+        # prompt_batch = samples['prompt_batch']
+        # # with self.maybe_autocast():
+        # prot_embeds = self.plm(**prot_batch, return_dict=True)
+        # prot_embeds = self.ln_layer(prot_embeds.last_hidden_state)
+        # query_tokens = self.query_tokens.expand(prot_embeds.shape[0], -1, -1)
+        # query_output = self.Qformer.bert(
+        #     query_embeds=query_tokens,
+        #     encoder_hidden_states=prot_embeds,
+        #     encoder_attention_mask=prot_batch['attention_mask'],
+        #     return_dict=True,
+        # )
+        # prot_tokens = self.opt_proj(query_output.last_hidden_state)
+        # # prompt_batch = samples['prompt_batch']
+        # prompt_input_ids = prompt_batch['input_ids']           # shape: [B, L]
+        # # for i, ids in enumerate(prompt_input_ids):
+        # #     print(f"Prompt {i} token length: {len(ids)}")
+        # decoded_texts = [self.llm_tokenizer.decode(ids, skip_special_tokens=True) for ids in prompt_input_ids]
+        # # print("=========")
+        # # print(decoded_texts)
+        # #print(decoded_texts)
+        # save_path = "decoded_prompts.json"
+        # # 将 list 写入 JSON 文件
+        # with open(save_path, 'w', encoding='utf-8') as f:
+        #     json.dump(decoded_texts, f, ensure_ascii=False, indent=4)
+        # prompt_attention_mask = prompt_batch['attention_mask']
+        # prompt_embeds = self.llm_model.model.embed_tokens(prompt_input_ids)
+        # inputs_embeds = torch.cat((prompt_embeds, prot_tokens), dim=1)
+        # prot_attention_mask = torch.ones(prot_tokens.shape[:2], dtype=prompt_attention_mask.dtype, device=prompt_attention_mask.device)
+        # #attention_mask = torch.cat([prot_attention_mask, prompt_attention_mask], dim=1)
+        # attention_mask = torch.cat([ prompt_attention_mask,prot_attention_mask], dim=1)
+        #==========================
+        prot_batch = samples['prot_batch']
+        prompt_batch = samples['prompt_batch']
+        device = prompt_batch['input_ids'].device
+        batch_size = prompt_batch['input_ids'].size(0)
+        # === Step 1: 编码蛋白质 + QFormer ===
+        prot_embeds = self.plm(**prot_batch, return_dict=True).last_hidden_state
+        prot_embeds = self.ln_layer(prot_embeds)
+        query_tokens = self.query_tokens.expand(prot_embeds.shape[0], -1, -1)
+        query_output = self.Qformer.bert(
+            query_embeds=query_tokens,
+            encoder_hidden_states=prot_embeds,
+            encoder_attention_mask=prot_batch['attention_mask'],
+            return_dict=True,
+        )
+        prot_tokens = self.opt_proj(query_output.last_hidden_state)  # [B, L_qformer, D]
+        # === Step 2: 编码 prompt 输入 ===
+        prompt_input_ids = prompt_batch['input_ids']
+        prompt_attention_mask = prompt_batch['attention_mask']
+        prompt_embeds = self.llm_model.get_input_embeddings()(prompt_input_ids)
+        # === Step 3: 获取 ChatML 特殊 token 的 embedding ===
+        embedding_layer = self.llm_model.get_input_embeddings()
+        def embed_special_str(token_str):
+            # 先 tokenize，得到一系列 ID
+            ids = self.llm_tokenizer(token_str, add_special_tokens=False).input_ids
+            # 把它变成 [1, N] tensor
+            ids_tensor = torch.tensor([ids], device=device)
+            # 查 embedding 层：
+            embs = embedding_layer(ids_tensor)  # shape [1, N, D]
+            # Expand 到 batch 大小
+            return embs.expand(batch_size, -1, -1)
+        # 示例
+        embed_im_start = embed_special_str("<|im_start|>user\nprotein sequence is: <protein>")   # 可能对应多个 sub-tokens
+        embed_protein = embed_special_str("</protein>")
+        embed_im_end   = embed_special_str("<|im_end|>\n")
+        embed_assistant= embed_special_str("<|im_start|>assistant\n")
+        # === Step 4: 拼接 Embeddings ===
+        user_embeds = torch.cat([embed_im_start, prot_tokens, embed_protein, prompt_embeds, embed_im_end], dim=1)
+        assistant_prefix = embed_assistant  # 模型从这里开始生成
+        inputs_embeds = torch.cat([user_embeds, assistant_prefix], dim=1)
+        # === Step 5: attention_mask ===
+        user_mask = torch.ones(user_embeds.shape[:2], dtype=torch.long, device=device)
+        assistant_mask = torch.ones((batch_size, embed_assistant.size(1)), dtype=torch.long, device=device)
+        attention_mask = torch.cat([user_mask, assistant_mask], dim=1)
+        outputs = self.llm_model.generate(
+            inputs_embeds=inputs_embeds,
+            attention_mask=attention_mask,
+            do_sample=do_sample,
+            top_p=top_p,
+            temperature=temperature,
+            num_beams=num_beams,
+            max_new_tokens=max_length,
+            min_length=min_length,
+            # pad_token_id=self.pad_token_id,
+            eos_token_id=self.eos_token_id,
+            repetition_penalty=repetition_penalty,
+            length_penalty=length_penalty,
+            num_return_sequences=num_captions,
+            use_cache=True,
+            cache_implementation="hybrid"
+        )
+        output_text = self.llm_tokenizer.batch_decode(outputs, skip_special_tokens=True)
+        output_text = [text.strip() for text in output_text]
+        # print(output_text)
+        return output_text

BioReason-0813/model/blip2_stage2.py ADDED Viewed

	@@ -0,0 +1,365 @@

+import os
+import torch
+from model.blip2_opt import Blip2OPT
+import pytorch_lightning as pl
+from torch import optim
+from lavis.common.optims import LinearWarmupCosineLRScheduler, LinearWarmupStepLRScheduler
+import json
+import torch.distributed as dist
+# from peft import LoraConfig, TaskType
+from typing import Any, Dict
+from model.help_funcs import caption_evaluate, AttrDict
+try:
+    from model.opt_flash_attention import replace_opt_attn_with_flash_attn, replace_opt_attn_with_original_attn
+except ModuleNotFoundError:
+    pass
+def get_module_state_dict(state_dict, module_name):
+    module_state_dict = {}
+    for key, value in state_dict.items():
+        if key.startswith(module_name):
+            key = key[len(module_name) + 1:]
+            if key == '':
+                return value
+            module_state_dict[key] = value
+    return module_state_dict
+class Blip2Stage2(pl.LightningModule):
+    def on_save_checkpoint(self, checkpoint: Dict[str, Any]) -> None:
+        # checkpoint.pop('optimizer_states')
+        to_be_removed = []
+        for key, value in checkpoint['state_dict'].items():
+            try:
+                if not self.get_parameter(key).requires_grad:
+                    to_be_removed.append(key)
+            except AttributeError:
+                to_be_removed.append(key)
+        for key in to_be_removed:
+            checkpoint['state_dict'].pop(key)
+    def __init__(self, args):
+        super().__init__()
+        if isinstance(args, dict):
+            args = AttrDict(**args)
+        self.args = args
+        self.caption_eval_epoch = args.caption_eval_epoch
+        self.do_sample = args.do_sample
+        self.num_beams = args.num_beams
+        self.max_inference_len = args.max_inference_len
+        self.min_inference_len = args.min_inference_len
+        self.llm_tune = args.llm_tune
+        self.enable_flash = args.enable_flash
+       # if args.llm_name.find('galactica') >= 0:
+        self.blip2 = Blip2OPT(args.bert_name,
+                                args.num_query_token,
+                                args.cross_attention_freq,
+                                args.plm_model,
+                                args.plm_tune,
+                                args.llm_name,
+                                args.llm_tune,
+                                args.qformer_tune,
+                                args.peft_dir,
+                                args)
+        # else:
+        #     raise NotImplementedError()
+        self.save_hyperparameters(args)
+    def load_from_stage1_checkpoint(self, path):
+        ckpt = torch.load(path, map_location='cpu')
+        state_dict = ckpt['state_dict']
+        state_dict = {k.split('blip2qformer.')[1]:v for k, v in state_dict.items()}
+        self.blip2.load_state_dict(state_dict, strict=False)
+        return self
+    def configure_optimizers(self):
+        self.trainer.fit_loop.setup_data()
+        warmup_steps = min(len(self.trainer.train_dataloader), self.args.warmup_steps)
+        optimizer = optim.AdamW(self.parameters(), lr=self.args.init_lr, weight_decay=self.args.weight_decay)
+        if self.args.scheduler == 'linear_warmup_cosine_lr':
+            self.scheduler = LinearWarmupCosineLRScheduler(optimizer, self.args.max_epochs, self.args.min_lr, self.args.init_lr, warmup_steps, self.args.warmup_lr)
+        elif self.args.scheduler == 'linear_warmup_step_lr':
+            self.scheduler = LinearWarmupStepLRScheduler(optimizer, self.args.max_epochs, self.args.min_lr, self.args.init_lr, self.args.lr_decay_rate, self.args.warmup_lr, warmup_steps)
+        elif self.args.scheduler == 'None':
+            self.scheduler = None
+        else:
+            raise NotImplementedError()
+        return optimizer
+    def save_predictions(self, predictions, targets, q_types=None, log_prefix=''):
+        assert len(predictions) == len(targets)
+        if log_prefix:
+            name = f'{log_prefix}_predictions.txt'
+        else:
+            name = 'predictions.txt'
+        with open(os.path.join(self.logger.log_dir, name), 'w', encoding='utf8') as f:
+            if q_types is not None:
+                for p, t, q in zip(predictions, targets, q_types):
+                    line = {'prediction': p, 'target': t, 'q_type': q}
+                    f.write(json.dumps(line, ensure_ascii=True) + '\n')
+            else:
+                for p, t in zip(predictions, targets):
+                    line = {'prediction': p, 'target': t}
+                    f.write(json.dumps(line, ensure_ascii=True) + '\n')
+    def on_validation_epoch_start(self) -> None:
+        if self.enable_flash:
+            replace_opt_attn_with_original_attn()
+        self.saved_dict_list = []
+        self.prediction_list0 = []
+        self.target_list0 = []
+        self.prediction_list1 = []
+        self.target_list1 = []
+    @torch.no_grad()
+    def validation_step(self, batch, batch_idx, dataloader_idx=0):
+        prot_batch, prompt_batch, target_dict = batch
+        if (dataloader_idx % 2) == 0:
+            # text_batch = batch[-1]
+            # batch_size = text_batch.input_ids.shape[0]
+            batch_size = len(target_dict['targets'])  # ✅ 正确获取batch大小
+            loss = self.blip2(batch)
+            ###============== Overall Loss ===================###
+            self.log(f"dataloader{dataloader_idx}/val loss", float(loss), batch_size=batch_size, sync_dist=True)
+        elif (dataloader_idx % 2) == 1:
+            if (self.current_epoch+1) % self.caption_eval_epoch != 0:
+                return
+            # prot_batch, prompt_batch, target_dict = batch
+            ###============== Captioning Results ===================###
+            samples = {'prot_batch': prot_batch, 'prompt_batch': prompt_batch}
+            predictions = self.blip2.generate(
+                samples,
+                do_sample=self.do_sample,
+                num_beams=self.num_beams,
+                max_length=self.max_inference_len,
+                min_length=self.min_inference_len
+            )
+            target_dict['predictions'] = predictions
+            self.saved_dict_list.append(target_dict)
+    def gather_dict_results(self, dict_list):
+        list_of_dict_list = [None for _ in range(self.trainer.world_size)]
+        dist.all_gather_object(list_of_dict_list, dict_list)
+        dict_list = [i for ii in list_of_dict_list for i in ii] ## dict list, each dict has values that are lists of predictions, etc.
+        keys = dict_list[0].keys()
+        gathered_dict = {} # each value is a list of predictions, etc.
+        for key in keys:
+            gathered_dict[key] = [i for d in dict_list for i in d[key]]
+        dict_list = []
+        for i in range(len(gathered_dict['predictions'])):
+            d = {k:gathered_dict[k][i] for k in keys}
+            dict_list.append(d)
+        return dict_list
+    def save_results(self, dict_list, log_prefix=""):
+        ## save the results
+        if log_prefix:
+            name = f'results/{log_prefix}_predictions.txt'
+        else:
+            name = 'predictions.txt'
+        with open(name, 'w', encoding='utf8') as f:
+            for d in dict_list:
+                f.write(json.dumps(d, ensure_ascii=True) + '\n')
+    def on_validation_epoch_end(self):
+        if self.enable_flash:
+            replace_opt_attn_with_flash_attn()
+        if (self.current_epoch+1) % self.caption_eval_epoch != 0:
+            return
+        result_list = self.gather_dict_results(self.saved_dict_list)
+        ## empty cache
+        self.saved_dict_list = []
+        if self.global_rank == 0:
+            # 假设 args.filename = 'stage2_continue_deeplocmulti_07241522'
+            filename_parts = self.args.filename.split('_')
+            # 获取最后两部分并组合
+            new_filename = '_'.join(filename_parts[-2:])  # 得到 'deeplocmulti_07241522'
+            self.save_results(result_list, new_filename)
+            all_predictions = [i['predictions'] for i in result_list]
+            all_targets = [i['targets'] for i in result_list]
+            log_prefix = 'dataset0' ## fixme: this is just a placeholder
+            if 'q_types' in result_list[0]:
+                ## evaluate protein qa
+                pass
+            else:
+                ## evaluate captioning
+                bleu2, bleu4, rouge_1, rouge_2, rouge_l, meteor_score = \
+                    caption_evaluate(all_predictions, all_targets, self.blip2.llm_tokenizer, self.max_inference_len)
+                acc = evaluate_exact_match(all_predictions, all_targets)
+                self.log(f"{log_prefix}/acc", acc, sync_dist=False)
+                self.log(f"{log_prefix}/bleu2", bleu2, sync_dist=False)
+                self.log(f"{log_prefix}/bleu4", bleu4, sync_dist=False)
+                self.log(f"{log_prefix}/rouge_1", rouge_1, sync_dist=False)
+                self.log(f"{log_prefix}/rouge_2", rouge_2, sync_dist=False)
+                self.log(f"{log_prefix}/rouge_l", rouge_l, sync_dist=False)
+                self.log(f"{log_prefix}/meteor_score", meteor_score, sync_dist=False)
+    @torch.no_grad()
+    def validation_step_old(self, batch, batch_idx, dataloader_idx=0):
+        if (dataloader_idx % 2) == 0:
+            text_batch = batch[-1]
+            batch_size = text_batch.input_ids.shape[0]
+            loss = self.blip2(batch)
+            ###============== Overall Loss ===================###
+            self.log(f"dataloader{dataloader_idx}/val loss", float(loss), batch_size=batch_size, sync_dist=True)
+        elif (dataloader_idx % 2) == 1:
+            if (self.current_epoch+1) % self.caption_eval_epoch != 0:
+                return
+            prot_batch, prompt_batch, target_dict = batch
+            ###============== Captioning Results ===================###
+            samples = {'prot_batch': prot_batch, 'prompt_batch': prompt_batch}
+            predictions = self.blip2.generate(
+                samples,
+                do_sample=self.do_sample,
+                num_beams=self.num_beams,
+                max_length=self.max_inference_len,
+                min_length=self.min_inference_len
+            )
+            if dataloader_idx // 2 == 0:
+                self.prediction_list0.append(predictions)
+                self.target_list0.append(target_dict)
+            elif dataloader_idx // 2 == 1:
+                self.prediction_list1.append(predictions)
+                self.target_list1.append(target_dict)
+            else:
+                raise NotImplementedError
+        else:
+            raise NotImplementedError
+    def on_validation_epoch_end_old(self):
+        if self.enable_flash:
+            replace_opt_attn_with_flash_attn()
+        if (self.current_epoch+1) % self.caption_eval_epoch != 0:
+            return
+        predictions0 = [i for ii in self.prediction_list0 for i in ii]
+        targets0 = [i for ii in self.target_list0 for i in ii['answers']]
+        if 'q_types' in self.target_list0[0]:
+            q_types0 = [i for ii in self.target_list0 for i in ii['q_types']]
+            self.reduce_and_evaluate_qa(predictions0, targets0, q_types0, 'dataset0')
+        else:
+            self.reduce_and_evaluate_captioning(predictions0, targets0, 'dataset0')
+        if len(self.prediction_list1) > 0:
+            predictions1 = [i for ii in self.prediction_list1 for i in ii]
+            targets1 = [i for ii in self.target_list1 for i in ii]
+            self.reduce_and_evaluate_captioning(predictions1, targets1, 'dataset1')
+    def reduce_and_evaluate_qa(self, predictions, targets, q_types, log_prefix=""):
+        all_predictions = [None for _ in range(self.trainer.world_size)]
+        all_targets = [None for _ in range(self.trainer.world_size)]
+        all_q_types = [None for _ in range(self.trainer.world_size)]
+        dist.all_gather_object(all_predictions, predictions)
+        dist.all_gather_object(all_targets, targets)
+        dist.all_gather_object(all_q_types, q_types)
+        if self.global_rank == 0:
+            all_predictions = [i for ii in all_predictions for i in ii]
+            all_targets = [i for ii in all_targets for i in ii]
+            all_q_types = [i for ii in all_q_types for i in ii]
+            self.save_predictions(all_predictions, all_targets, all_q_types, log_prefix=log_prefix)
+    def reduce_and_evaluate_captioning(self, predictions, targets, log_prefix=""):
+        all_predictions = [None for _ in range(self.trainer.world_size)]
+        all_targets = [None for _ in range(self.trainer.world_size)]
+        dist.all_gather_object(all_predictions, predictions)
+        dist.all_gather_object(all_targets, targets)
+        if self.global_rank == 0:
+            all_predictions = [i for ii in all_predictions for i in ii]
+            all_targets = [i for ii in all_targets for i in ii]
+            self.save_predictions(all_predictions, all_targets, log_prefix)
+            ## fixme: I am not sure if the max length is the same as previous experiments
+            bleu2, bleu4, rouge_1, rouge_2, rouge_l, meteor_score = \
+                caption_evaluate(all_predictions, all_targets, self.blip2.llm_tokenizer, self.max_inference_len)
+            acc = evaluate_exact_match(all_predictions, all_targets)
+            self.log(f"{log_prefix}/acc", acc, sync_dist=False)
+            self.log(f"{log_prefix}/bleu2", bleu2, sync_dist=False)
+            self.log(f"{log_prefix}/bleu4", bleu4, sync_dist=False)
+            self.log(f"{log_prefix}/rouge_1", rouge_1, sync_dist=False)
+            self.log(f"{log_prefix}/rouge_2", rouge_2, sync_dist=False)
+            self.log(f"{log_prefix}/rouge_l", rouge_l, sync_dist=False)
+            self.log(f"{log_prefix}/meteor_score", meteor_score, sync_dist=False)
+    def training_step(self, batch, batch_idx):
+        if self.scheduler:
+            self.scheduler.step(self.trainer.current_epoch, self.trainer.global_step)
+        #batch_size = batch[-1].input_ids.size(0)
+        batch_size = len(batch[-1]['targets'])
+        ###============== Overall Loss ===================###
+        loss = self.blip2(batch)
+        self.log("loss", float(loss), batch_size=batch_size, sync_dist=True)
+        self.log("lr", self.trainer.optimizers[0].param_groups[0]['lr'], batch_size=batch_size, sync_dist=True)
+        return loss
+    @staticmethod
+    def add_model_specific_args(parent_parser):
+        parser = parent_parser.add_argument_group("ProtBlip2")
+        # train mode
+        parser.add_argument('--save_every_n_epochs', type=int, default=0)
+        # Bert
+        parser.add_argument('--bert_name', type=str, default='/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft')
+        parser.add_argument('--cross_attention_freq', type=int, default=2)
+        parser.add_argument('--num_query_token', type=int, default=8)
+        parser.add_argument('--qformer_tune',type=str,default='train')
+        # OPT
+        parser.add_argument('--llm_name', type=str, default="/oss/wangyujia/BIO/construction_finetuning/alpaca/v1-20250609-141541/checkpoint-50-merged")
+        parser.add_argument('--num_beams', type=int, default=5)
+        parser.add_argument('--do_sample', action='store_true', default=False)
+        parser.add_argument('--max_inference_len', type=int, default=512)
+        parser.add_argument('--min_inference_len', type=int, default=1)
+        parser.add_argument('--llm_tune', type=str, default='freeze')
+        parser.add_argument('--peft_config', type=str, default='')
+        parser.add_argument('--peft_dir', type=str, default='')
+        ## plm model
+        parser.add_argument('--plm_model', type=str, default='/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m')
+        parser.add_argument('--plm_tune', type=str, default='freeze')
+        ## lora config
+        parser.add_argument('--lora_r', type=int, default=8)
+        parser.add_argument('--lora_alpha', type=int, default=16)
+        parser.add_argument('--lora_dropout', type=int, default=0.1)
+        parser.add_argument('--enbale_gradient_checkpointing', action='store_true', default=False)
+        # optimization
+        parser.add_argument('--weight_decay', type=float, default=0.05, help='optimizer weight decay')
+        parser.add_argument('--init_lr', type=float, default=1e-4, help='optimizer init learning rate')
+        parser.add_argument('--min_lr', type=float, default=1e-5, help='optimizer min learning rate')
+        parser.add_argument('--warmup_lr', type=float, default=1e-6, help='optimizer warmup learning rate')
+        parser.add_argument('--warmup_steps', type=int, default=1000, help='optimizer warmup steps')
+        parser.add_argument('--lr_decay_rate', type=float, default=0.9, help='optimizer lr decay rate')
+        parser.add_argument('--scheduler', type=str, default='linear_warmup_cosine_lr', help='type of scheduler') # or linear_warmup_step_lr
+        parser.add_argument('--stage1_path', type=str, default='')
+        parser.add_argument('--stage2_path', type=str, default='')
+        parser.add_argument('--init_checkpoint', type=str, default='/nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage2_07070513_2datasets_construct/epoch=09.ckpt/converted.ckpt')
+        parser.add_argument('--caption_eval_epoch', type=int, default=5)
+        return parent_parser
+# def evaluate_exact_match(predictions, targets):
+#     acc = 0
+#     for prediction, target in zip(predictions, targets):
+#         if prediction.strip() == target.strip():
+#             acc += 1
+#     acc = round(acc / len(predictions) * 100, 2)
+#     return acc
+import re
+def evaluate_exact_match(predictions, targets):
+    acc = 0
+    for prediction, target in zip(predictions, targets):
+        # 使用正则提取 <answer>...</answer> 中的内容
+        match = re.search(r"<answer>(.*?)</answer>", target.strip(), re.DOTALL)
+        if match:
+            answer = match.group(1).strip()
+            if prediction.strip() == answer:
+                acc += 1
+        else:
+            print(f"Warning: No <answer> tag found in target: {target}")
+    acc = round(acc / len(predictions) * 100, 2)
+    return acc

BioReason-0813/model/help_funcs.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import torch
+from nltk.translate.bleu_score import corpus_bleu
+from nltk.translate.meteor_score import meteor_score
+from rouge_score import rouge_scorer
+from tqdm import tqdm
+import numpy as np
+def caption_evaluate(predictions, targets, tokenizer, text_trunc_length):
+    targets = [t.strip() for t in targets]
+    meteor_scores = []
+    references = []
+    hypotheses = []
+    for gt, out in tqdm(zip(targets, predictions)):
+        gt_tokens = tokenizer.tokenize(gt, truncation=True, max_length=text_trunc_length,
+                                            padding='max_length')
+        ## added for galactica
+        gt_tokens = list(filter(('<pad>').__ne__, gt_tokens))
+        gt_tokens = list(filter(('[PAD]').__ne__, gt_tokens))
+        gt_tokens = list(filter(('[CLS]').__ne__, gt_tokens))
+        gt_tokens = list(filter(('[SEP]').__ne__, gt_tokens))
+        out_tokens = tokenizer.tokenize(out, truncation=True, max_length=text_trunc_length,
+                                            padding='max_length')
+        out_tokens = list(filter(('<pad>').__ne__, out_tokens))
+        gt_tokens = list(filter(('[PAD]').__ne__, gt_tokens))
+        out_tokens = list(filter(('[CLS]').__ne__, out_tokens))
+        out_tokens = list(filter(('[SEP]').__ne__, out_tokens))
+        references.append([gt_tokens])
+        hypotheses.append(out_tokens)
+        mscore = meteor_score([gt_tokens], out_tokens)
+        meteor_scores.append(mscore)
+    bleu2 = corpus_bleu(references, hypotheses, weights=(.5,.5))
+    bleu4 = corpus_bleu(references, hypotheses, weights=(.25,.25,.25,.25))
+    bleu2 *= 100
+    bleu4 *= 100
+    print('BLEU-2 score:', bleu2)
+    print('BLEU-4 score:', bleu4)
+    _meteor_score = np.mean(meteor_scores)
+    _meteor_score *= 100
+    print('Average Meteor score:', _meteor_score)
+    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'])
+    rouge_scores = []
+    references = []
+    hypotheses = []
+    for gt, out in tqdm(zip(targets, predictions)):
+        rs = scorer.score(out, gt)
+        rouge_scores.append(rs)
+    print('ROUGE score:')
+    rouge_1 = np.mean([rs['rouge1'].fmeasure for rs in rouge_scores]) * 100
+    rouge_2 = np.mean([rs['rouge2'].fmeasure for rs in rouge_scores]) * 100
+    rouge_l = np.mean([rs['rougeL'].fmeasure for rs in rouge_scores]) * 100
+    print('rouge1:', rouge_1)
+    print('rouge2:', rouge_2)
+    print('rougeL:', rouge_l)
+    return bleu2, bleu4, rouge_1, rouge_2, rouge_l, _meteor_score
+class AttrDict(dict):
+    def __init__(self, *args, **kwargs):
+        super(AttrDict, self).__init__(*args, **kwargs)
+        self.__dict__ = self
+def pad_and_concat(tensor_list, fill_value=0):
+    '''
+    concat the first dimension and pad the second dimension
+    tensor_list: [[B (diff), N_num, *], ...]
+    '''
+    device = tensor_list[0].device
+    dtype=tensor_list[0].dtype
+    max_dim1 = max(t.shape[1] for t in tensor_list)
+    sum_dim0 = sum(t.shape[0] for t in tensor_list)
+    if len(tensor_list[0].shape) == 3:
+        out = torch.full((sum_dim0, max_dim1, tensor_list[0].shape[-1]), fill_value=fill_value, device=device, dtype=dtype)
+        i = 0
+        for t in tensor_list:
+            out[i:i+t.shape[0], :t.shape[1]] = t
+            i += t.shape[0]
+        return out
+    elif len(tensor_list[0].shape) == 2:
+        out = torch.full((sum_dim0, max_dim1), fill_value=fill_value, device=device, dtype=dtype)
+        i = 0
+        for t in tensor_list:
+            out[i:i+t.shape[0], :t.shape[1]] = t
+            i += t.shape[0]
+        return out
+    raise NotImplementedError()
+def hf_enable_gradient_checkpointing(hf_model):
+    if hasattr(hf_model, "enable_input_require_grads"):
+        hf_model.enable_input_require_grads()
+    else:
+        def make_inputs_require_grad(module, input, output):
+            output.requires_grad_(True)
+        hf_model.get_input_embeddings().register_forward_hook(make_inputs_require_grad)
+    # enable gradient checkpointing for memory efficiency
+    hf_model.gradient_checkpointing_enable()
+    return hf_model

BioReason-0813/prompt_templates.py ADDED Viewed

	@@ -0,0 +1,57 @@

+prompt_templates = {
+    "classification": """
+Analyze the following protein sequence and predict its classification.
+Protein sequence: <protein>{aa_seq}</protein>
+Please provide your reasoning and classification.
+<think>
+Let me analyze this protein sequence step by step:
+1. Sequence length: {seq_length}
+2. Composition analysis...
+3. Structural predictions...
+4. Functional domains...
+</think>
+<answer>
+Classification: {label}
+</answer>
+""",
+    "function_prediction": """
+Given the protein sequence below, predict its function and classification:
+Sequence: <protein>{aa_seq}</protein>
+Analyze the sequence and provide your prediction.
+<think>
+Sequence analysis:
+- Length: {seq_length} amino acids
+- Notable features...
+- Homology considerations...
+</think>
+<answer>
+Function prediction: {label}
+</answer>
+""",
+    "location_prediction": """
+Predict the cellular location and classification of this protein:
+Protein sequence: <protein>{aa_seq}</protein>
+What is the most likely classification for this protein?
+<think>
+Location and function analysis:
+- Sequence characteristics...
+- Signal peptides...
+- Transmembrane regions...
+</think>
+<answer>
+Classification: {label}
+</answer>
+"""
+}

BioReason-0813/run.sh ADDED Viewed

	@@ -0,0 +1,103 @@

+echo "Starting GRPO training..."
+#!/bin/bash
+# run_blip2.sh
+# 用于启动 BLIP2 + GRPO 训练的脚本
+# ===== 基本路径配置 =====
+DATA_FILE=/oss/wangyujia/ProtT3/ProtT3/data/sft/dataset/DeepLocBinary/test.csv
+DATASET_NAME=deeplocbinary
+OUTPUT_DIR=./output
+CACHE_DIR=./cache
+# ===== 模型配置 =====
+BERT_PATH=/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft
+PLM_MODEL=/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m
+LLM_MODEL=/oss/wangyujia/BIO/construction_finetuning/alpaca/v1-20250609-141541/checkpoint-50-merged
+SFT_CHECKPOINT=/nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage2_07301646_2datasets_construct/epoch=09.ckpt/converted.ckpt
+# ===== 训练参数 =====
+BATCH_SIZE=4
+EPOCHS=3
+LR=1e-5
+# ===== 奖励函数权重 =====
+FORMAT_WEIGHT=0.2
+ACCURACY_WEIGHT=0.6
+REPETITION_WEIGHT=0.2
+# ===== 运行训练 =====
+python blips_reason.py \
+    --data_file_paths ${DATA_FILE} \
+    --dataset_name ${DATASET_NAME} \
+    --reward_funcs combined \
+    --format_weight ${FORMAT_WEIGHT} \
+    --accuracy_weight ${ACCURACY_WEIGHT} \
+    --repetition_weight ${REPETITION_WEIGHT} \
+    --use_custom_prompts \
+    --template_name classification \
+    --max_seq_length 1000 \
+    --output_dir ${OUTPUT_DIR} \
+    --per_device_train_batch_size ${BATCH_SIZE} \
+    --num_train_epochs ${EPOCHS} \
+    --learning_rate ${LR} \
+    --bert_name ${BERT_PATH} \
+    --plm_model ${PLM_MODEL} \
+    --llm_name ${LLM_MODEL} \
+    --sft_checkpoint ${SFT_CHECKPOINT} \
+    --plm_tune freeze \
+    --llm_tune lora \
+    --qformer_tune train \
+    --lora_r 8 \
+    --lora_alpha 16 \
+    --lora_dropout 0.1 \
+    --enable_flash \
+    --cache_dir ${CACHE_DIR}
+# python protein_reason.py \
+#     --output_dir "./grpo_outputs" \
+#     --model_name_or_path "Qwen/Qwen3-0.6B" \
+#     --protein_model_name_or_path "facebook/esm2_t6_8M_UR50D" \
+#     --qformer_model_name_or_path "microsoft/BiomedNLP-BiomedBERT-base-uncased-abstract-fulltext" \
+#     --dataset_name "wanglab/protein_function" \
+#     --sft_checkpoint "./checkpoints/best_model" \
+#     --per_device_train_batch_size 4 \
+#     --gradient_accumulation_steps 4 \
+#     --num_train_epochs 3 \
+#     --learning_rate 1e-6 \
+#     --beta 0.04 \
+#     --temperature 0.6 \
+#     --top_p 0.95 \
+#     --top_k 20 \
+#     --max_completion_length 800 \
+#     --num_generations 8 \
+#     --reward_funcs "xmlcount" "soft_format" "strict_format" "correctness" \
+#     --lora_r 32 \
+#     --lora_alpha 64 \
+#     --lora_dropout 0.05 \
+#     --freeze_protein_modules \
+#     --logging_steps 2 \
+#     --eval_strategy "steps" \
+#     --eval_steps 100 \
+#     --save_steps 200 \
+#     --report_to "wandb" \
+#     --log_completions
+# python blip2_reason.py \
+#     --data_file_paths /oss/wangyujia/ProtT3/ProtT3/data/sft/dataset/DeepLocBinary/test.csv \
+#     --reward_funcs combined \
+#     --format_weight 0.2 \
+#     --accuracy_weight 0.6 \
+#     --repetition_weight 0.2 \
+#     --use_custom_prompts \
+#     --template_name classification \
+#     --max_seq_length 1000 \
+#     --output_dir ./output \
+#     --per_device_train_batch_size 4 \
+#     --num_train_epochs 3 \
+#     --learning_rate 1e-5
+echo "GRPO training completed!"
+echo "All training stages completed successfully!"

BioReason-main/.gitignore ADDED Viewed

	@@ -0,0 +1,180 @@

+# Byte-compiled / optimized / DLL files
+.idea/
+__pycache__/
+*.py[cod]
+*$py.class
+wandb/
+.DS_Store
+.vscode/
+.venv/
+.env
+.pytest_cache/
+# C extensions
+*.so
+outputs/
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# PyPI configuration file
+.pypirc

BioReason-main/LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

BioReason-main/README.md ADDED Viewed

	@@ -0,0 +1,148 @@

+<h1 align="center">
+🧬 BioReason<br>Incentivizing Multimodal Biological Reasoning<br>within a DNA-LLM Model
+</h1>
+<p align="center">
+  <a href="https://www.arxiv.org/abs/2505.23579" target="_blank"><img src="https://img.shields.io/badge/arXiv-2505.23579-FF6B6B?style=for-the-badge&logo=arxiv&logoColor=white" alt="arXiv"></a>
+  <a href="https://github.com/bowang-lab/BioReason"><img src="https://img.shields.io/badge/GitHub-Code-4A90E2?style=for-the-badge&logo=github&logoColor=white" alt="GitHub"></a>
+  <a href="https://bowang-lab.github.io/BioReason/"><img src="https://img.shields.io/badge/Website-Online-00B89E?style=for-the-badge&logo=internet-explorer&logoColor=white" alt="Website"></a>
+  <a href="https://huggingface.co/collections/wanglab/bioreason-683cd17172a037a31d208f70"><img src="https://img.shields.io/badge/HuggingFace-Dataset-FFBF00?style=for-the-badge&logo=huggingface&logoColor=white" alt="HuggingFace Dataset"></a>
+</p>
+<br>
+## Updates [Jun 10, 2025]
+- We are integrating vLLM to improve the speed and efficiency of the GRPO pipeline. We expect this to be pushed by end of week.
+- Checkpoints along with the custom DNA-LLM model class will be released on HuggingFace by end of week.
+- More training results with GRPO will be shared soon.
+<br>
+## Abstract
+Unlocking deep, interpretable biological reasoning from complex genomic data is a major AI challenge hindering scientific discovery. Current DNA foundation models, despite strong sequence representation, struggle with multi-step reasoning and lack inherent transparent, biologically intuitive explanations. We introduce BioReason, a pioneering architecture that, for the first time, deeply integrates a DNA foundation model with a large language model (LLM). This novel connection enables the LLM to directly process and reason with genomic information as a fundamental input, fostering a new form of multimodal biological understanding. BioReason's sophisticated multi-step reasoning is developed through supervised fine-tuning and targeted reinforcement learning, guiding the system to generate logical, biologically coherent deductions. On biological reasoning benchmarks including KEGG-based disease pathway prediction—where accuracy improves from 88% to 97%—and variant effect prediction, BioReason demonstrates an average 15% performance gain over strong single-modality baselines.
+<br>
+## Key Contributions
+• **Novel multimodal architecture**: The first successful integration of a DNA foundation model with an LLM, establishing a new methodology for AI-driven biological studies.
+• **Advanced reasoning methodology**: A systematic training approach combining supervised fine-tuning and reinforcement learning that incentivizes multi-step biological reasoning.
+• **New biological reasoning benchmarks**: Development and curation of novel benchmarks for evaluating biological reasoning capabilities, including an annotated reasoning dataset for gene pathway and disease prediction from KEGG.
+• **Empirical performance improvements**: Demonstration that BioReason outperforms both DNA foundation models and LLMs used independently or in simple combination, with average performance gains of 15%+ over baseline.
+• **Interpretable reasoning traces**: A mechanism for generating step-by-step biological reasoning traces that provide interpretable predictions, enhancing scientific insight and hypothesis generation.
+<br>
+## Datasets
+The datasets used to train and evaluate BioReason can be found on our [HuggingFace collection](https://huggingface.co/collections/wanglab/bioreason-683cd17172a037a31d208f70) with detailed download and usage instructions.
+<br>
+## Checkpoints
+We will release the checkpoints soon!
+<br>
+## Installation
+### Prerequisites
+- Python 3.11+
+- CUDA/GPU for best performance
+### Installation Steps
+```bash
+# Clone the repository
+git clone https://github.com/bowang-lab/BioReason.git
+cd BioReason
+# Install package
+pip install -e .
+```
+<br>
+## Results
+### KEGG-Derived Biological Reasoning Task
+Performance comparison on 290 test datapoints for multi-step mechanistic reasoning:
+| Model | Accuracy | F1-Score | Precision | Recall |
+|-------|----------|----------|-----------|---------|
+| [DNA] NT - 500M | 86.55 | 69.76 | 73.23 | 66.61 |
+| [DNA] Evo2 - 1B | 88.28 | 72.43 | 75.23 | 69.83 |
+| [LLM] Qwen3 - 1B | 85.17 | 65.71 | 71.39 | 64.19 |
+| [LLM] Qwen3 - 4B | 93.48 | 85.44 | 88.31 | 86.72 |
+| [DNA-LLM] NT + Qwen3 - 1B | 88.42 | 72.13 | 75.42 | 71.91 |
+| [DNA-LLM] NT + Qwen3 - 1B (+RL) | 89.66 | 74.11 | 78.82 | 72.96 |
+| [DNA-LLM] NT + Qwen3 - 4B | 96.90 | **89.03** | **90.99** | **89.38** |
+| [DNA-LLM] Evo2 + Qwen3 - 1B | 90.42 | 75.62 | 77.42 | 73.91 |
+| [DNA-LLM] Evo2 + Qwen3 - 4B | **97.24** | 86.30 | 86.75 | 87.25 |
+### Variant Effect Prediction Benchmarks
+Performance on pathogenic/benign classification:
+| Model | Variant Effect - Coding | | Variant Effect - Non-SNV | |
+|-------|------------|----------|------------|----------|
+| | Accuracy | F1-Score | Accuracy | F1-Score |
+| [DNA] NT - 500M | 60.91 | 45.20 | 67.93 | 65.97 |
+| [DNA] Evo2 - 1B | 70.07 | 49.19 | 76.17 | 66.51 |
+| [LLM] Qwen3 - 1B | 46.55 | 34.82 | 70.67 | 76.21 |
+| [LLM] Qwen3 - 4B | 48.99 | 39.58 | 61.86 | 67.60 |
+| [DNA-LLM] NT + Qwen3 - 1B | 55.58 | 54.50 | 72.82 | 76.93 |
+| [DNA-LLM] NT + Qwen3 - 4B | 60.94 | 55.66 | 65.59 | 73.00 |
+| [DNA-LLM] Evo2 + Qwen3 - 1B | 72.83 | 68.90 | **88.20** | **89.91** |
+| [DNA-LLM] Evo2 + Qwen3 - 4B | **80.21** | **80.00** | 83.85 | 85.02 |
+<br>
+## Citation
+If you find this work useful, please cite our paper:
+```bibtex
+@misc{fallahpour2025bioreasonincentivizingmultimodalbiological,
+      title={BioReason: Incentivizing Multimodal Biological Reasoning within a DNA-LLM Model},
+      author={Adibvafa Fallahpour and Andrew Magnuson and Purav Gupta and Shihao Ma and Jack Naimer and Arnav Shah and Haonan Duan and Omar Ibrahim and Hani Goodarzi and Chris J. Maddison and Bo Wang},
+      year={2025},
+      eprint={2505.23579},
+      archivePrefix={arXiv},
+      primaryClass={cs.LG},
+      url={https://arxiv.org/abs/2505.23579},
+}
+```
+<br>
+## Authors
+- **Adibvafa Fallahpour**¹²³⁵ * (adibvafa.fallahpour@mail.utoronto.ca)
+- **Andrew Magnuson**¹² *
+- **Purav Gupta**¹² *
+- **Shihao Ma**¹²³
+- **Jack Naimer**¹²³
+- **Arnav Shah**¹²³
+- **Haonan Duan**¹²
+- **Omar Ibrahim**³
+- **Hani Goodarzi**†⁴⁶
+- **Chris J. Maddison**†¹²⁷
+- **Bo Wang**†¹²³
+¹ University of Toronto ² Vector Institute ³ University Health Network (UHN) <br>
+⁴ Arc Institute ⁵ Cohere ⁶ University of California, San Francisco ⁷ Google DeepMind
+<br>
+* Equal contribution <br>
+† Equal advising
+---
+<p align="center">
+Made with ❤️ at University of Toronto, Vector Institute, and University Health Network
+</p>

BioReason-main/bioreason.egg-info/PKG-INFO ADDED Viewed

	@@ -0,0 +1,181 @@

+Metadata-Version: 2.4
+Name: bioreason
+Version: 0.1.0
+Summary: Bio-related Reasoning with Language Models
+License: UNKNOWN
+Platform: UNKNOWN
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.11
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.11
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: torch
+Requires-Dist: torchvision
+Requires-Dist: transformers
+Requires-Dist: accelerate
+Requires-Dist: qwen-vl-utils
+Requires-Dist: jupyter
+Requires-Dist: datasets
+Requires-Dist: peft
+Requires-Dist: pytorch_lightning
+Requires-Dist: wandb
+Requires-Dist: trl[vllm]
+Requires-Dist: bitsandbytes
+Requires-Dist: deepspeed
+Provides-Extra: dev
+Requires-Dist: pytest; extra == "dev"
+Requires-Dist: black; extra == "dev"
+Requires-Dist: isort; extra == "dev"
+Requires-Dist: mypy; extra == "dev"
+Dynamic: license-file
+<h1 align="center">
+🧬 BioReason<br>Incentivizing Multimodal Biological Reasoning<br>within a DNA-LLM Model
+</h1>
+<p align="center">
+  <a href="https://www.arxiv.org/abs/2505.23579" target="_blank"><img src="https://img.shields.io/badge/arXiv-2505.23579-FF6B6B?style=for-the-badge&logo=arxiv&logoColor=white" alt="arXiv"></a>
+  <a href="https://github.com/bowang-lab/BioReason"><img src="https://img.shields.io/badge/GitHub-Code-4A90E2?style=for-the-badge&logo=github&logoColor=white" alt="GitHub"></a>
+  <a href="https://bowang-lab.github.io/BioReason/"><img src="https://img.shields.io/badge/Website-Online-00B89E?style=for-the-badge&logo=internet-explorer&logoColor=white" alt="Website"></a>
+  <a href="https://huggingface.co/collections/wanglab/bioreason-683cd17172a037a31d208f70"><img src="https://img.shields.io/badge/HuggingFace-Dataset-FFBF00?style=for-the-badge&logo=huggingface&logoColor=white" alt="HuggingFace Dataset"></a>
+</p>
+<br>
+## Updates [Jun 10, 2025]
+- We are integrating vLLM to improve the speed and efficiency of the GRPO pipeline. We expect this to be pushed by end of week.
+- Checkpoints along with the custom DNA-LLM model class will be released on HuggingFace by end of week.
+- More training results with GRPO will be shared soon.
+<br>
+## Abstract
+Unlocking deep, interpretable biological reasoning from complex genomic data is a major AI challenge hindering scientific discovery. Current DNA foundation models, despite strong sequence representation, struggle with multi-step reasoning and lack inherent transparent, biologically intuitive explanations. We introduce BioReason, a pioneering architecture that, for the first time, deeply integrates a DNA foundation model with a large language model (LLM). This novel connection enables the LLM to directly process and reason with genomic information as a fundamental input, fostering a new form of multimodal biological understanding. BioReason's sophisticated multi-step reasoning is developed through supervised fine-tuning and targeted reinforcement learning, guiding the system to generate logical, biologically coherent deductions. On biological reasoning benchmarks including KEGG-based disease pathway prediction—where accuracy improves from 88% to 97%—and variant effect prediction, BioReason demonstrates an average 15% performance gain over strong single-modality baselines.
+<br>
+## Key Contributions
+• **Novel multimodal architecture**: The first successful integration of a DNA foundation model with an LLM, establishing a new methodology for AI-driven biological studies.
+• **Advanced reasoning methodology**: A systematic training approach combining supervised fine-tuning and reinforcement learning that incentivizes multi-step biological reasoning.
+• **New biological reasoning benchmarks**: Development and curation of novel benchmarks for evaluating biological reasoning capabilities, including an annotated reasoning dataset for gene pathway and disease prediction from KEGG.
+• **Empirical performance improvements**: Demonstration that BioReason outperforms both DNA foundation models and LLMs used independently or in simple combination, with average performance gains of 15%+ over baseline.
+• **Interpretable reasoning traces**: A mechanism for generating step-by-step biological reasoning traces that provide interpretable predictions, enhancing scientific insight and hypothesis generation.
+<br>
+## Datasets
+The datasets used to train and evaluate BioReason can be found on our [HuggingFace collection](https://huggingface.co/collections/wanglab/bioreason-683cd17172a037a31d208f70) with detailed download and usage instructions.
+<br>
+## Checkpoints
+We will release the checkpoints soon!
+<br>
+## Installation
+### Prerequisites
+- Python 3.11+
+- CUDA/GPU for best performance
+### Installation Steps
+```bash
+# Clone the repository
+git clone https://github.com/bowang-lab/BioReason.git
+cd BioReason
+# Install package
+pip install -e .
+```
+<br>
+## Results
+### KEGG-Derived Biological Reasoning Task
+Performance comparison on 290 test datapoints for multi-step mechanistic reasoning:
+| Model | Accuracy | F1-Score | Precision | Recall |
+|-------|----------|----------|-----------|---------|
+| [DNA] NT - 500M | 86.55 | 69.76 | 73.23 | 66.61 |
+| [DNA] Evo2 - 1B | 88.28 | 72.43 | 75.23 | 69.83 |
+| [LLM] Qwen3 - 1B | 85.17 | 65.71 | 71.39 | 64.19 |
+| [LLM] Qwen3 - 4B | 93.48 | 85.44 | 88.31 | 86.72 |
+| [DNA-LLM] NT + Qwen3 - 1B | 88.42 | 72.13 | 75.42 | 71.91 |
+| [DNA-LLM] NT + Qwen3 - 1B (+RL) | 89.66 | 74.11 | 78.82 | 72.96 |
+| [DNA-LLM] NT + Qwen3 - 4B | 96.90 | **89.03** | **90.99** | **89.38** |
+| [DNA-LLM] Evo2 + Qwen3 - 1B | 90.42 | 75.62 | 77.42 | 73.91 |
+| [DNA-LLM] Evo2 + Qwen3 - 4B | **97.24** | 86.30 | 86.75 | 87.25 |
+### Variant Effect Prediction Benchmarks
+Performance on pathogenic/benign classification:
+| Model | Variant Effect - Coding | | Variant Effect - Non-SNV | |
+|-------|------------|----------|------------|----------|
+| | Accuracy | F1-Score | Accuracy | F1-Score |
+| [DNA] NT - 500M | 60.91 | 45.20 | 67.93 | 65.97 |
+| [DNA] Evo2 - 1B | 70.07 | 49.19 | 76.17 | 66.51 |
+| [LLM] Qwen3 - 1B | 46.55 | 34.82 | 70.67 | 76.21 |
+| [LLM] Qwen3 - 4B | 48.99 | 39.58 | 61.86 | 67.60 |
+| [DNA-LLM] NT + Qwen3 - 1B | 55.58 | 54.50 | 72.82 | 76.93 |
+| [DNA-LLM] NT + Qwen3 - 4B | 60.94 | 55.66 | 65.59 | 73.00 |
+| [DNA-LLM] Evo2 + Qwen3 - 1B | 72.83 | 68.90 | **88.20** | **89.91** |
+| [DNA-LLM] Evo2 + Qwen3 - 4B | **80.21** | **80.00** | 83.85 | 85.02 |
+<br>
+## Citation
+If you find this work useful, please cite our paper:
+```bibtex
+@misc{fallahpour2025bioreasonincentivizingmultimodalbiological,
+      title={BioReason: Incentivizing Multimodal Biological Reasoning within a DNA-LLM Model},
+      author={Adibvafa Fallahpour and Andrew Magnuson and Purav Gupta and Shihao Ma and Jack Naimer and Arnav Shah and Haonan Duan and Omar Ibrahim and Hani Goodarzi and Chris J. Maddison and Bo Wang},
+      year={2025},
+      eprint={2505.23579},
+      archivePrefix={arXiv},
+      primaryClass={cs.LG},
+      url={https://arxiv.org/abs/2505.23579},
+}
+```
+<br>
+## Authors
+- **Adibvafa Fallahpour**¹²³⁵ * (adibvafa.fallahpour@mail.utoronto.ca)
+- **Andrew Magnuson**¹² *
+- **Purav Gupta**¹² *
+- **Shihao Ma**¹²³
+- **Jack Naimer**¹²³
+- **Arnav Shah**¹²³
+- **Haonan Duan**¹²
+- **Omar Ibrahim**³
+- **Hani Goodarzi**†⁴⁶
+- **Chris J. Maddison**†¹²⁷
+- **Bo Wang**†¹²³
+¹ University of Toronto ² Vector Institute ³ University Health Network (UHN) <br>
+⁴ Arc Institute ⁵ Cohere ⁶ University of California, San Francisco ⁷ Google DeepMind
+<br>
+* Equal contribution <br>
+† Equal advising
+---
+<p align="center">
+Made with ❤️ at University of Toronto, Vector Institute, and University Health Network
+</p>

BioReason-main/bioreason.egg-info/SOURCES.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+LICENSE
+README.md
+pyproject.toml
+bioreason/__init__.py
+bioreason.egg-info/PKG-INFO
+bioreason.egg-info/SOURCES.txt
+bioreason.egg-info/dependency_links.txt
+bioreason.egg-info/requires.txt
+bioreason.egg-info/top_level.txt

BioReason-main/bioreason.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+

BioReason-main/bioreason.egg-info/requires.txt ADDED Viewed

	@@ -0,0 +1,19 @@

+torch
+torchvision
+transformers
+accelerate
+qwen-vl-utils
+jupyter
+datasets
+peft
+pytorch_lightning
+wandb
+trl[vllm]
+bitsandbytes
+deepspeed
+[dev]
+pytest
+black
+isort
+mypy

BioReason-main/bioreason.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ bioreason

BioReason-main/bioreason/__init__.py ADDED Viewed

File without changes

BioReason-main/bioreason/dataset/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from .kegg import KEGGDataset, split_kegg_dataset
+from .utils import torch_to_hf_dataset, truncate_dna
+from .variant_effect import get_format_variant_effect_function
+__all__ = [
+    "KEGGDataset",
+    "split_kegg_dataset",
+    "torch_to_hf_dataset",
+    "truncate_dna",
+    "get_format_variant_effect_function",
+]

BioReason-main/bioreason/dataset/kegg.py ADDED Viewed

	@@ -0,0 +1,382 @@

+import json
+import os
+import random
+import sys
+import torch
+from torch.utils.data import Dataset, DataLoader
+from typing import Any, Dict, List, Tuple
+from bioreason.dataset.utils import torch_to_hf_dataset
+from bioreason.models.dl.processing_dl import DLProcessor
+from trl.data_utils import maybe_apply_chat_template
+class KEGGDataset(Dataset):
+    """Dataset for KEGG data."""
+    def __init__(self, data_dir: str):
+        """
+        Initialize the dataset by loading all JSON files from the given directory.
+        Args:
+            data_dir: Path to the directory containing JSON files
+        """
+        self.data_dir = data_dir
+        self.data = []
+        # Load all JSON files
+        json_files = sorted([f for f in os.listdir(data_dir) if f.endswith(".json")])
+        # Process each file
+        for filename in json_files:
+            file_path = os.path.join(data_dir, filename)
+            kegg_id = filename.split("_")[1]
+            with open(file_path, "r", encoding="utf-8") as f:
+                item = json.load(f)
+                item["kegg_id"] = kegg_id
+                processed_item = self._process_item(item)
+                self.data.append(processed_item)
+    def _process_item(self, item: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Process a single data item to format fields as required.
+        Args:
+            item: Original data item from JSON
+        Returns:
+            Processed data item
+        """
+        # Extract question as is
+        question = item.get("question", "")
+        # Convert answer to lowercase and strip whitespace
+        answer = item.get("answer", "").lower().strip()
+        # Combine reasoning steps into a single paragraph with newlines
+        reasoning_steps = item.get("reasoning", {}).get("reasoning_steps", [])
+        reasoning = "\n".join(reasoning_steps)
+        # Convert sequences to uppercase and strip whitespace
+        reference_sequence = item.get("reference_sequence", "").upper().strip()
+        variant_sequence = item.get("variant_sequence", "").upper().strip()
+        return {
+            "question": question,
+            "answer": answer,
+            "reasoning": reasoning,
+            "reference_sequence": reference_sequence,
+            "variant_sequence": variant_sequence,
+        }
+    def __len__(self) -> int:
+        """Return the number of items in the dataset."""
+        return len(self.data)
+    def __getitem__(self, idx: int) -> Dict[str, Any]:
+        """Return a specific item from the dataset."""
+        return self.data[idx]
+def split_kegg_dataset(
+    dataset: KEGGDataset,
+    train_ratio: float = 0.8,
+    val_ratio: float = 0.1,
+    test_ratio: float = 0.1,
+    seed: int = 42,
+) -> Tuple[KEGGDataset, KEGGDataset, KEGGDataset]:
+    """
+    Split a KEGG dataset into train, validation, and test sets.
+    Args:
+        dataset: The dataset to split
+        train_ratio: Proportion of data for training
+        val_ratio: Proportion of data for validation
+        test_ratio: Proportion of data for testing
+        batch_size: Batch size for the dataloaders
+        seed: Random seed for reproducibility
+    Returns:
+        Tuple of (train_dataset, val_dataset, test_dataset)
+    """
+    # Calculate the size of each split
+    dataset_size = len(dataset)
+    train_size = int(train_ratio * dataset_size)
+    val_size = int(val_ratio * dataset_size)
+    test_size = dataset_size - train_size - val_size
+    assert train_ratio + val_ratio + test_ratio == 1.0, "Ratios must sum to 1"
+    # Set the random seed
+    torch.manual_seed(seed)
+    random.seed(seed)
+    # Split the dataset
+    train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(
+        dataset, [train_size, val_size, test_size]
+    )
+    return train_dataset, val_dataset, test_dataset
+def create_kegg_dataloader(
+    data_dir: str,
+    batch_size: int = 2,
+    shuffle: bool = True,
+    num_workers: int = 2,
+    pin_memory: bool = True,
+) -> DataLoader:
+    """
+    Create a DataLoader for the KEGG dataset.
+    Args:
+        data_dir: Path to the directory containing JSON files
+        batch_size: Batch size for the dataloader
+        shuffle: Whether to shuffle the data
+        num_workers: Number of worker processes for loading data
+        pin_memory: Whether to pin memory for faster data transfer
+    Returns:
+        DataLoader for the KEGG dataset
+    """
+    dataset = KEGGDataset(data_dir)
+    return DataLoader(
+        dataset,
+        batch_size=batch_size,
+        shuffle=shuffle,
+        num_workers=num_workers,
+        pin_memory=pin_memory,
+    )
+def get_format_kegg_function(model_name: str) -> Any:
+    """
+    Get the appropriate format function for a given model name.
+    """
+    if model_name.lower() == "llm":
+        return format_kegg_for_llm
+    elif model_name.lower() == "dna-llm":
+        return format_kegg_for_dna_llm
+    else:
+        raise ValueError(f"Unsupported model name: {model_name}")
+def format_kegg_for_dna_llm(example: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Format a KEGG example into the required chat format for DNA-LLM.
+    """
+    return {
+        "prompt": [
+            {
+                "role": "user",
+                "content": [
+                    *({"type": "dna", "text": None} for _ in range(2)),
+                    {"type": "text", "text": example["question"].strip()},
+                ],
+            },
+            {
+                "role": "assistant",
+                "reasoning_content": example["reasoning"].strip(),
+                "content": [
+                    {"type": "text", "text": f"Answer: {example['answer'].strip()}"},
+                ],
+            },
+        ],
+        "dna_sequences": [
+            example["reference_sequence"],
+            example["variant_sequence"],
+        ],
+        "answer": example["answer"],
+    }
+def format_kegg_for_llm(example: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Format a KEGG example into the required chat format for LLM.
+    """
+    question = f"Reference sequence: {example['reference_sequence']}\nVariant sequence: {example['variant_sequence']}\nQuestion: {example['question']}"
+    return {
+        "prompt": [
+            {
+                "role": "user",
+                "content": [
+                    *({"type": "dna", "text": None} for _ in range(2)),
+                    {"type": "text", "text": question.strip()},
+                ],
+            },
+            {
+                "role": "assistant",
+                "reasoning_content": example["reasoning"].strip(),
+                "content": [
+                    {"type": "text", "text": f"Answer: {example['answer'].strip()}"},
+                ],
+            },
+        ],
+        "dna_sequences": [
+            "",
+            "",
+        ],
+        "answer": example["answer"],
+    }
+def qwen_dna_collate_fn(
+    examples: List[Dict],
+    processor: DLProcessor,
+    max_length_text: int,
+    max_length_dna: int,
+    return_answer_in_batch: bool = False,
+) -> Dict:
+    """
+    Custom collate function for Qwen DNA models.
+    Creates a batch with proper labels for supervised fine-tuning where only
+    the assistant responses contribute to the loss calculation.
+    """
+    prompts_text = [
+        maybe_apply_chat_template(example, processor)["prompt"] for example in examples
+    ]
+    batch_dna_sequences = [example["dna_sequences"] for example in examples]
+    batch = processor(
+        text=prompts_text,
+        batch_dna_sequences=batch_dna_sequences,
+        return_tensors="pt",
+        padding=True,
+        padding_side="left",
+        add_special_tokens=False,
+        max_length_text=max_length_text,
+        max_length_dna=max_length_dna,
+    )
+    # Create labels tensor filled with -100 (ignored in loss calculation)
+    labels = torch.full_like(batch["input_ids"], -100)
+    # Get token IDs for special markers
+    assistant_start_marker = "<|im_start|>assistant\n"
+    im_end_marker = "<|im_end|>"
+    assistant_start_token_ids = processor.tokenizer.encode(
+        assistant_start_marker, add_special_tokens=False
+    )
+    im_end_token_ids = processor.tokenizer.encode(
+        im_end_marker, add_special_tokens=False
+    )
+    # Convert token arrays to tensors for faster comparison
+    assistant_marker_tensor = torch.tensor(
+        assistant_start_token_ids, device=batch["input_ids"].device
+    )
+    im_end_marker_tensor = torch.tensor(
+        im_end_token_ids, device=batch["input_ids"].device
+    )
+    # Get dimensions for easier reference
+    assistant_marker_len = len(assistant_start_token_ids)
+    im_end_marker_len = len(im_end_token_ids)
+    # For each sequence in the batch
+    for i in range(batch["input_ids"].shape[0]):
+        input_ids = batch["input_ids"][i]
+        seq_len = input_ids.size(0)
+        # Track assistant sections
+        assistant_sections = []
+        # Find all assistant start markers
+        start_positions = []
+        for pos in range(seq_len - assistant_marker_len + 1):
+            if torch.all(
+                input_ids[pos : pos + assistant_marker_len] == assistant_marker_tensor
+            ):
+                start_positions.append(
+                    pos + assistant_marker_len
+                )  # Store position after marker
+        # Find all end markers
+        end_positions = []
+        for pos in range(seq_len - im_end_marker_len + 1):
+            if torch.all(
+                input_ids[pos : pos + im_end_marker_len] == im_end_marker_tensor
+            ):
+                end_positions.append(pos)  # Store position at start of end marker
+        # Match start and end markers to create sections
+        for start_pos in start_positions:
+            # Find the next end marker after this start position
+            valid_ends = [pos for pos in end_positions if pos > start_pos]
+            if valid_ends:
+                end_pos = min(valid_ends)  # Take the first end marker after start
+                # Only include content between markers (not the markers themselves)
+                if start_pos < end_pos:
+                    assistant_sections.append((start_pos, end_pos))
+            else:
+                # If no end marker, assume the section runs to the end of the sequence
+                assistant_sections.append((start_pos, seq_len))
+        # Set labels for all identified assistant sections
+        for start_pos, end_pos in assistant_sections:
+            if start_pos < end_pos and start_pos < seq_len:
+                end_pos = min(end_pos, seq_len)  # Safety check
+                labels[i, start_pos:end_pos] = input_ids[start_pos:end_pos]
+    # Also mask padding tokens
+    labels[batch["input_ids"] == processor.tokenizer.pad_token_id] = -100
+    # Add labels to batch
+    batch["labels"] = labels
+    # Add answer to batch
+    if return_answer_in_batch:
+        batch["answer"] = [example["answer"].strip() for example in examples]
+    return batch
+def dna_collate_fn(
+    batch: List[Dict[str, Any]],
+    dna_tokenizer: Any,
+    label2id: Dict[str, int],
+    max_length: int = 2048,
+) -> Dict[str, Any]:
+    """
+    Custom collate function for DNA models.
+    """
+    ref_sequences = [item["reference_sequence"] for item in batch]
+    alt_sequences = [item["variant_sequence"] for item in batch]
+    # Tokenize DNA sequences separately
+    tokenized_ref = dna_tokenizer(
+        ref_sequences,
+        padding=True,
+        truncation=True,
+        max_length=max_length,
+        return_tensors="pt",
+    )
+    tokenized_alt = dna_tokenizer(
+        alt_sequences,
+        padding=True,
+        truncation=True,
+        max_length=max_length,
+        return_tensors="pt",
+    )
+    # Get labels
+    labels = []
+    for item in batch:
+        label = label2id[item["answer"]]
+        labels.append(label)
+    # Create labels tensor
+    labels_tensor = torch.tensor(labels, dtype=torch.long)
+    tokenized_batch = {
+        "ref_ids": tokenized_ref.input_ids,
+        "ref_attention_mask": tokenized_ref.attention_mask,
+        "alt_ids": tokenized_alt.input_ids,
+        "alt_attention_mask": tokenized_alt.attention_mask,
+        "labels": labels_tensor,
+    }
+    return tokenized_batch

BioReason-main/bioreason/dataset/utils.py ADDED Viewed

	@@ -0,0 +1,59 @@

+from datasets import Dataset as HFDataset
+from torch.utils.data import Dataset as TorchDataset
+from typing import Dict, Any, Union, List
+def truncate_dna(
+    example: Dict[str, Any], truncate_dna_per_side: int = 1024
+) -> Dict[str, Any]:
+    """
+    Truncate DNA sequences by removing a specified number of base pairs from both ends.
+    If the sequence is too short, it will return the middle portion.
+    """
+    for key in ["reference_sequence", "variant_sequence"]:
+        sequence = example[key]
+        seq_len = len(sequence)
+        if seq_len > 2 * truncate_dna_per_side + 8:
+            example[key] = sequence[truncate_dna_per_side:-truncate_dna_per_side]
+    return example
+def torch_to_hf_dataset(torch_dataset: TorchDataset) -> HFDataset:
+    """
+    Convert a PyTorch Dataset to a Hugging Face Dataset.
+    This function takes a PyTorch Dataset and converts it to a Hugging Face Dataset
+    by extracting all items and organizing them into a dictionary structure that
+    can be used to create a Hugging Face Dataset.
+    Args:
+        torch_dataset: A PyTorch Dataset object to be converted
+    Returns:
+        A Hugging Face Dataset containing the same data as the input PyTorch Dataset
+    """
+    # Get first item to determine structure
+    if len(torch_dataset) == 0:
+        return HFDataset.from_dict({})
+    first_item = torch_dataset[0]
+    # Initialize dictionary based on first item's keys
+    data_dict = (
+        {k: [] for k in first_item.keys()}
+        if isinstance(first_item, dict)
+        else {"data": []}
+    )
+    # Populate dictionary
+    for i in range(len(torch_dataset)):
+        item = torch_dataset[i]
+        if isinstance(item, dict):
+            for k in data_dict:
+                data_dict[k].append(item[k])
+        else:
+            data_dict["data"].append(item)
+    return HFDataset.from_dict(data_dict)

BioReason-main/bioreason/dataset/variant_effect.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import json
+import os
+import random
+import sys
+import torch
+from torch.utils.data import Dataset, DataLoader
+from typing import Any, Dict, List, Tuple
+from bioreason.dataset.utils import torch_to_hf_dataset
+from bioreason.models.dl.processing_dl import DLProcessor
+from trl.data_utils import maybe_apply_chat_template
+def get_format_variant_effect_function(model_name: str) -> Any:
+    """
+    Get the appropriate format function for a given model name.
+    """
+    if model_name.lower() == "llm":
+        return format_variant_effect_for_llm
+    elif model_name.lower() == "dna-llm":
+        return format_variant_effect_for_dna_llm
+    else:
+        raise ValueError(f"Unsupported model name: {model_name}")
+def clean_variant_effect_example(example: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Clean a variant effect example.
+    """
+    example['answer'] = example['answer'].split(";")[0].strip().lower()
+    return example
+def clean_variant_effect_non_snv_example(example: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Clean a variant effect non-SNV example.
+    """
+    example['answer'] = example['answer'].replace("[", "").replace("]", "").replace("'", "").replace("_", " ").strip()
+    return example
+def format_variant_effect_for_dna_llm(example: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Format a VEP example into the required chat format for DNA-LLM.
+    """
+    return {
+        "prompt": [
+            {
+                "role": "user",
+                "content": [
+                    *({"type": "dna", "text": None} for _ in range(2)),
+                    {"type": "text", "text": example["question"].strip()},
+                ],
+            },
+            {
+                "role": "assistant",
+                "reasoning_content": f"Answer: {example['answer'].strip()}",
+                "content": [
+                    {"type": "text", "text": f"Answer: {example['answer'].strip()}"},
+                ],
+            },
+        ],
+        "dna_sequences": [
+            example["reference_sequence"],
+            example["variant_sequence"],
+        ],
+        "answer": example["answer"].strip(),
+    }
+def format_variant_effect_for_llm(example: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Format a VEP example into the required chat format for LLM.
+    """
+    question = f"Reference sequence: {example['reference_sequence']}\nVariant sequence: {example['variant_sequence']}\nQuestion: {example['question']}"
+    return {
+        "prompt": [
+            {
+                "role": "user",
+                "content": [
+                    *({"type": "dna", "text": None} for _ in range(2)),
+                    {"type": "text", "text": question.strip()},
+                ],
+            },
+            {
+                "role": "assistant",
+                "reasoning_content": f"Answer: {example['answer'].strip()}",
+                "content": [
+                    {"type": "text", "text": f"Answer: {example['answer'].strip()}"},
+                ],
+            },
+        ],
+        "dna_sequences": [
+            "",
+            "",
+        ],
+        "answer": example["answer"].strip(),
+    }

BioReason-main/bioreason/dna_modules/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .dna_module import DNABaseModule
+from .nucleotide_module import NucleotideDNAModule
+__all__ = ["DNABaseModule", "NucleotideDNAModule"]

BioReason-main/bioreason/dna_modules/dna_module.py ADDED Viewed

	@@ -0,0 +1,49 @@

+from abc import ABC, abstractmethod
+from typing import Dict, Any, Union
+import torch
+class DNABaseModule(ABC):
+    def __init__(self):
+        super().__init__()
+    @abstractmethod
+    def get_dnallm_key(self):
+        pass
+    @abstractmethod
+    def get_model_class(self, model_id: str, model_init_kwargs: dict):
+        pass
+    def post_model_init(self, model, processing_class):
+        pass
+    def is_embeds_input(self):
+        return False
+    @abstractmethod
+    def get_processing_class(self):
+        pass
+    @abstractmethod
+    def get_dnallm_modules_keywords(self):
+        pass
+    @abstractmethod
+    def get_custom_multimodal_keywords(self):
+        pass
+    @abstractmethod
+    def get_non_generate_params(self):
+        pass
+    @abstractmethod
+    def get_custom_processing_keywords(self):
+        pass
+    @abstractmethod
+    def prepare_prompt(self, processing_class, inputs: dict[str, Union[torch.Tensor, Any]]):
+        pass
+    @abstractmethod
+    def prepare_model_inputs(self, processing_class, prompts_text, images, return_tensors, padding, padding_side, add_special_tokens):
+        pass

BioReason-main/bioreason/dna_modules/nucleotide_module.py ADDED Viewed

	@@ -0,0 +1,263 @@

+from transformers import (
+    Qwen2_5_VLForConditionalGeneration,
+    Qwen2VLForConditionalGeneration,
+    AutoProcessor,
+)
+from typing import Dict, Any, Union, List, Optional, Callable, Type
+from trl.data_utils import maybe_apply_chat_template
+from trl import SFTTrainer
+import torch
+from bioreason.dna_modules.dna_module import DNABaseModule
+from bioreason.models.dna_llm import DNALLMModel
+from bioreason.models.dl.processing_dl import DLProcessor
+class NucleotideDNAModule(DNABaseModule):
+    """
+    DNA module implementation for NucleotideTransformer-based models.
+    This module provides the interface between DNA-LLM models and the training
+    infrastructure, handling model loading, processing setup, and reward functions.
+    """
+    def __init__(self):
+        """Initialize the NucleotideDNAModule."""
+        super().__init__()
+    def get_dnallm_key(self) -> str:
+        """
+        Get the key identifier for this DNA-LLM implementation.
+        Returns:
+            String identifier for this module type
+        """
+        return "qwen"
+    def get_model_class(self, model_id: str, model_init_kwargs: Dict[str, Any]) -> Type:
+        """
+        Return the appropriate model class based on model ID.
+        Args:
+            model_id: Identifier for the model
+            model_init_kwargs: Initialization arguments for the model
+        Returns:
+            The model class to instantiate
+        Raises:
+            ValueError: If the model is not supported
+        """
+        if "DNALLM" in model_id:
+            model_cls = DNALLMModel
+        else:
+            raise ValueError(f"Unsupported model: {model_id}")
+        return model_cls
+    def post_model_init(self, model: Any, processing_class: Any) -> None:
+        """
+        Perform any post-initialization setup on the model.
+        Args:
+            model: The initialized model
+            processing_class: The processor for the model
+        """
+        # No post-init needed for this implementation
+        pass
+    def get_processing_class(self) -> Type:
+        """
+        Get the processing class to use with this DNA-LLM model.
+        Returns:
+            The processing class
+        """
+        return DLProcessor
+    def get_dnallm_modules_keywords(self) -> List[str]:
+        """
+        Get keywords to identify DNA-specific modules in the model.
+        Used to exclude DNA modules from LoRA adaptation during training.
+        Returns:
+            List of keywords that identify DNA modules
+        """
+        return ["dna"]
+    def get_custom_multimodal_keywords(self) -> List[str]:
+        """
+        Get keywords for multimodal inputs that should be passed to the model.
+        Returns:
+            List of input keywords for multimodal processing
+        """
+        return ["dna_tokenized", "batch_idx_map"]
+    def get_non_generate_params(self) -> List[str]:
+        """
+        Get parameter names that should be excluded from generation.
+        Returns:
+            List of parameter names to exclude from generation calls
+        """
+        return []
+    def get_custom_processing_keywords(self) -> List[tuple]:
+        """
+        Get custom processing keywords for the processor.
+        Returns:
+            List of (component, parameter) tuples for custom processing
+        """
+        return [("dna_tokenizer", "max_length")]
+    def prepare_prompt(
+        self, processing_class: Any, inputs: List[Dict[str, Union[torch.Tensor, Any]]]
+    ) -> List[str]:
+        """
+        Prepare prompts from input examples.
+        Args:
+            processing_class: The processor to use
+            inputs: List of input examples
+        Returns:
+            List of prepared prompts
+        """
+        prompts_text = [
+            maybe_apply_chat_template(example, processing_class)["prompt"]
+            for example in inputs
+        ]
+        return prompts_text
+    def prepare_model_inputs(
+        self,
+        processing_class: Any,
+        model: Any,
+        prompts_text: List[str],
+        batch_dna_sequences: List[List[str]],
+        return_tensors: str = "pt",
+        padding: bool = True,
+        padding_side: str = "left",
+        add_special_tokens: bool = False,
+    ) -> Dict[str, Any]:
+        """
+        Prepare inputs for the model.
+        Args:
+            processing_class: The processor to use
+            model: The model to prepare inputs for
+            prompts_text: List of text prompts
+            batch_dna_sequences: List of lists of DNA sequences
+            return_tensors: Return format for tensors
+            padding: Whether to pad inputs
+            padding_side: Side to pad on
+            add_special_tokens: Whether to add special tokens
+        Returns:
+            Processed inputs for the model
+        """
+        # Handle DataParallel wrapped models by accessing the module attribute if needed
+        max_length_text = model.max_length_text if not hasattr(model, 'module') else model.module.max_length_text
+        max_length_dna = model.max_length_dna if not hasattr(model, 'module') else model.module.max_length_dna
+        prompt_inputs = processing_class(
+            text=prompts_text,
+            batch_dna_sequences=batch_dna_sequences,
+            return_tensors=return_tensors,
+            padding=padding,
+            padding_side=padding_side,
+            add_special_tokens=add_special_tokens,
+            max_length_text=max_length_text,
+            max_length_dna=max_length_dna,
+        )
+        return prompt_inputs
+    def is_embeds_input(self) -> bool:
+        """
+        Whether the model uses embeddings as input (instead of token IDs).
+        Returns:
+            Boolean indicating if the model takes embedding inputs
+        """
+        return True
+    @staticmethod
+    def get_question_template() -> str:
+        """
+        Get the template for formatting questions.
+        Returns:
+            String template for questions
+        """
+        return "{Question}"
+    @staticmethod
+    def format_reward_rec(completions: List[Dict[str, Any]], **kwargs) -> List[float]:
+        """
+        Check if the Qwen model output matches a specific format.
+        Args:
+            completions: List of model completions
+            **kwargs: Additional arguments
+        Returns:
+            List of reward scores (1.0 for match, 0.0 for no match)
+        """
+        import re
+        import os
+        from datetime import datetime
+        # Pattern to match the expected output format
+        pattern = r"<think>.*?</think>\s*<answer>.*?\{.*\[\d+,\s*\d+,\s*\d+,\s*\d+\].*\}.*?</answer>"
+        completion_contents = [completion[0]["content"] for completion in completions]
+        matches = [
+            re.search(pattern, content, re.DOTALL) is not None
+            for content in completion_contents
+        ]
+        # Log format results if in debug mode
+        current_time = datetime.now().strftime("%d-%H-%M-%S-%f")
+        if os.getenv("DEBUG_MODE") == "true":
+            log_path = os.getenv("LOG_PATH")
+            with open(
+                log_path.replace(".txt", "_format.txt"), "a", encoding="utf-8"
+            ) as f:
+                f.write(f"------------- {current_time} Format reward -------------\n")
+                for content, match in zip(completion_contents, matches):
+                    f.write(f"Content: {content}\n")
+                    f.write(f"Has format: {bool(match)}\n")
+        return [1.0 if match else 0.0 for match in matches]
+    @staticmethod
+    def select_reward_func(func: str, task_type: str) -> Callable:
+        """
+        Select the appropriate reward function based on function name and task type.
+        Args:
+            func: The type of reward function ('accuracy', 'format', etc.)
+            task_type: The type of task ('rec', etc.)
+        Returns:
+            The reward function to use
+        Raises:
+            ValueError: If the function or task type is not supported
+        """
+        if func == "accuracy":
+            match task_type:
+                case "rec":
+                    return NucleotideDNAModule.iou_reward
+                case _:
+                    raise ValueError(f"Unsupported reward function: {func}")
+        elif func == "format":
+            match task_type:
+                case "rec":
+                    return NucleotideDNAModule.format_reward_rec
+                case _:
+                    raise ValueError(f"Unsupported reward function: {func}")
+        else:
+            raise ValueError(f"Unsupported reward function: {func}")

BioReason-main/bioreason/models/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from .dna_only import DNAClassifierModel
+from .dna_llm import DNALLMModel
+from .evo2_tokenizer import Evo2Tokenizer
+__all__ = [
+    "DNAClassifierModel",
+    "DNALLMModel",
+    "Evo2Tokenizer",
+]

BioReason-main/bioreason/models/dl/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

BioReason-main/bioreason/models/dl/chat_template_dl.py ADDED Viewed

	@@ -0,0 +1 @@

+ CHAT_TEMPLATE = "{%- set dna_count = namespace(value=0) %}{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and not(message.content is string and message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' }} {%- if message.content is string %}{{- message.content + '<|im_end|>' + '\\n' }}{%- else %}{%- for content in message.content %}{%- if content.type == 'dna' or 'dna' in content %}{%- set dna_count.value = dna_count.value + 1 %}{%- if add_dna_id %}DNA Sequence {{- dna_count.value }}: {%- endif %}<|dna_start|><|dna_pad|><|dna_end|>{%- elif 'text' in content %}{{- content.text }}{%- endif %}{%- endfor %}{{- '<|im_end|>' + '\\n' }}{%- endif %}{%- elif message.role == \"assistant\" %}\n {%- set content = message.content[0].text %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is defined and message.reasoning_content is not none %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '</think>' in message.content %}\n {%- set content = message.content[0].text.split('</think>')[-1].lstrip('\\n') %}\n {%- set reasoning_content = message.content[0].text.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n {%- if enable_thinking is defined and enable_thinking is false %}\n {{- '<think>\\n\\n</think>\\n\\n' }}\n {%- endif %}\n{%- endif %}"

BioReason-main/bioreason/models/dl/configuration_dl.py ADDED Viewed

	@@ -0,0 +1,232 @@

+from transformers import PretrainedConfig
+class DLDNAConfig(PretrainedConfig):
+    model_type = "dl"
+    base_config_key = "dna_config"
+    def __init__(
+        self,
+        depth=32,
+        hidden_size=3584,
+        hidden_act="silu",
+        intermediate_size=3420,
+        num_heads=16,
+        in_channels=3,
+        patch_size=14,
+        spatial_merge_size=2,
+        temporal_patch_size=2,
+        tokens_per_second=4,
+        window_size=112,
+        out_hidden_size=3584,
+        fullatt_block_indexes=[7, 15, 23, 31],
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.depth = depth
+        self.hidden_size = hidden_size
+        self.hidden_act = hidden_act
+        self.intermediate_size = intermediate_size
+        self.num_heads = num_heads
+        self.in_channels = in_channels
+        self.patch_size = patch_size
+        self.spatial_merge_size = spatial_merge_size
+        self.temporal_patch_size = temporal_patch_size
+        self.tokens_per_second = tokens_per_second
+        self.window_size = window_size
+        self.fullatt_block_indexes = fullatt_block_indexes
+        self.out_hidden_size = out_hidden_size
+class DLConfig(PretrainedConfig):
+    r"""
+    This is the configuration class to store the configuration of a [`Qwen2_5_VLModel`]. It is used to instantiate a
+    Qwen2-VL model according to the specified arguments, defining the model architecture. Instantiating a configuration
+    with the defaults will yield a similar configuration to that of
+    Qwen2-VL-7B-Instruct [Qwen/Qwen2-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct).
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
+    documentation from [`PretrainedConfig`] for more information.
+    Args:
+        vocab_size (`int`, *optional*, defaults to 152064):
+            Vocabulary size of the Qwen2_5_VL model. Defines the number of different tokens that can be represented by the
+            `inputs_ids` passed when calling [`Qwen2_5_VLModel`]
+        hidden_size (`int`, *optional*, defaults to 8192):
+            Dimension of the hidden representations.
+        intermediate_size (`int`, *optional*, defaults to 29568):
+            Dimension of the MLP representations.
+        num_hidden_layers (`int`, *optional*, defaults to 80):
+            Number of hidden layers in the Transformer encoder.
+        num_attention_heads (`int`, *optional*, defaults to 64):
+            Number of attention heads for each attention layer in the Transformer encoder.
+        num_key_value_heads (`int`, *optional*, defaults to 8):
+            This is the number of key_value heads that should be used to implement Grouped Query Attention. If
+            `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
+            `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
+            converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
+            by meanpooling all the original heads within that group. For more details checkout [this
+            paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to `32`.
+        hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
+            The non-linear activation function (function or string) in the decoder.
+        max_position_embeddings (`int`, *optional*, defaults to 32768):
+            The maximum sequence length that this model might ever be used with.
+        initializer_range (`float`, *optional*, defaults to 0.02):
+            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
+        rms_norm_eps (`float`, *optional*, defaults to 1e-05):
+            The epsilon used by the rms normalization layers.
+        use_cache (`bool`, *optional*, defaults to `True`):
+            Whether or not the model should return the last key/values attentions (not used by all models). Only
+            relevant if `config.is_decoder=True`.
+        tie_word_embeddings (`bool`, *optional*, defaults to `False`):
+            Whether the model's input and output word embeddings should be tied.
+        rope_theta (`float`, *optional*, defaults to 1000000.0):
+            The base period of the RoPE embeddings.
+        use_sliding_window (`bool`, *optional*, defaults to `False`):
+            Whether to use sliding window attention.
+        sliding_window (`int`, *optional*, defaults to 4096):
+            Sliding window attention (SWA) window size. If not specified, will default to `4096`.
+        max_window_layers (`int`, *optional*, defaults to 80):
+            The number of layers that use SWA (Sliding Window Attention). The bottom layers use SWA while the top use full attention.
+        attention_dropout (`float`, *optional*, defaults to 0.0):
+            The dropout ratio for the attention probabilities.
+        vision_config (`Dict`, *optional*):
+            The config for the visual encoder initialization.
+        rope_scaling (`Dict`, *optional*):
+            Dictionary containing the scaling configuration for the RoPE embeddings. NOTE: if you apply new rope type
+            and you expect the model to work on longer `max_position_embeddings`, we recommend you to update this value
+            accordingly.
+            Expected contents:
+                `rope_type` (`str`):
+                    The sub-variant of RoPE to use. Can be one of ['default', 'linear', 'dynamic', 'yarn', 'longrope',
+                    'llama3'], with 'default' being the original RoPE implementation.
+                `factor` (`float`, *optional*):
+                    Used with all rope types except 'default'. The scaling factor to apply to the RoPE embeddings. In
+                    most scaling types, a `factor` of x will enable the model to handle sequences of length x *
+                    original maximum pre-trained length.
+                `original_max_position_embeddings` (`int`, *optional*):
+                    Used with 'dynamic', 'longrope' and 'llama3'. The original max position embeddings used during
+                    pretraining.
+                `attention_factor` (`float`, *optional*):
+                    Used with 'yarn' and 'longrope'. The scaling factor to be applied on the attention
+                    computation. If unspecified, it defaults to value recommended by the implementation, using the
+                    `factor` field to infer the suggested value.
+                `beta_fast` (`float`, *optional*):
+                    Only used with 'yarn'. Parameter to set the boundary for extrapolation (only) in the linear
+                    ramp function. If unspecified, it defaults to 32.
+                `beta_slow` (`float`, *optional*):
+                    Only used with 'yarn'. Parameter to set the boundary for interpolation (only) in the linear
+                    ramp function. If unspecified, it defaults to 1.
+                `short_factor` (`List[float]`, *optional*):
+                    Only used with 'longrope'. The scaling factor to be applied to short contexts (<
+                    `original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden
+                    size divided by the number of attention heads divided by 2
+                `long_factor` (`List[float]`, *optional*):
+                    Only used with 'longrope'. The scaling factor to be applied to long contexts (<
+                    `original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden
+                    size divided by the number of attention heads divided by 2
+                `low_freq_factor` (`float`, *optional*):
+                    Only used with 'llama3'. Scaling factor applied to low frequency components of the RoPE
+                `high_freq_factor` (`float`, *optional*):
+                    Only used with 'llama3'. Scaling factor applied to high frequency components of the RoPE
+    ```python
+    >>> from transformers import Qwen2_5_VLForConditionalGeneration, Qwen2_5_VLConfig
+    >>> # Initializing a Qwen2_5_VL style configuration
+    >>> configuration = Qwen2_5_VLConfig()
+    >>> # Initializing a model from the Qwen2-VL-7B style configuration
+    >>> model = Qwen2_5_VLForConditionalGeneration(configuration)
+    >>> # Accessing the model configuration
+    >>> configuration = model.config
+    ```"""
+    model_type = "dl"
+    sub_configs = {"dna_config": DLDNAConfig}
+    keys_to_ignore_at_inference = ["past_key_values"]
+    # Default tensor parallel plan for base model `Qwen2_5_VL`
+    base_model_tp_plan = {
+        "layers.*.self_attn.q_proj": "colwise",
+        "layers.*.self_attn.k_proj": "colwise",
+        "layers.*.self_attn.v_proj": "colwise",
+        "layers.*.self_attn.o_proj": "rowwise",
+        "layers.*.mlp.gate_proj": "colwise",
+        "layers.*.mlp.up_proj": "colwise",
+        "layers.*.mlp.down_proj": "rowwise",
+    }
+    base_model_pp_plan = {
+        "embed_tokens": (["input_ids"], ["inputs_embeds"]),
+        "layers": (["hidden_states", "attention_mask"], ["hidden_states"]),
+        "norm": (["hidden_states"], ["hidden_states"]),
+    }
+    def __init__(
+        self,
+        vocab_size=152064,
+        hidden_size=8192,
+        intermediate_size=29568,
+        num_hidden_layers=80,
+        num_attention_heads=64,
+        num_key_value_heads=8,
+        hidden_act="silu",
+        max_position_embeddings=32768,
+        initializer_range=0.02,
+        rms_norm_eps=1e-05,
+        use_cache=True,
+        tie_word_embeddings=False,
+        rope_theta=1000000.0,
+        use_sliding_window=False,
+        sliding_window=4096,
+        max_window_layers=80,
+        attention_dropout=0.0,
+        vision_config=None,
+        rope_scaling=None,
+        image_token_id=None,
+        **kwargs,
+    ):
+        if isinstance(vision_config, dict):
+            self.vision_config = self.sub_configs["vision_config"](**vision_config)
+        elif vision_config is None:
+            self.vision_config = self.sub_configs["vision_config"]()
+        self.vocab_size = vocab_size
+        self.max_position_embeddings = max_position_embeddings
+        self.hidden_size = hidden_size
+        self.intermediate_size = intermediate_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.use_sliding_window = use_sliding_window
+        self.sliding_window = sliding_window
+        self.max_window_layers = max_window_layers
+        # for backward compatibility
+        if num_key_value_heads is None:
+            num_key_value_heads = num_attention_heads
+        self.num_key_value_heads = num_key_value_heads
+        self.hidden_act = hidden_act
+        self.initializer_range = initializer_range
+        self.rms_norm_eps = rms_norm_eps
+        self.use_cache = use_cache
+        self.rope_theta = rope_theta
+        self.attention_dropout = attention_dropout
+        self.rope_scaling = rope_scaling
+        self.dna_token_id = image_token_id
+        # Validate the correctness of rotary position embeddings parameters
+        # BC: if there is a 'type' field, move it to 'rope_type'.
+        # and change type from 'mrope' to 'default' because `mrope` does default RoPE calculations
+        # one can set it to "linear"/"dynamic" etc. to have scaled RoPE
+        # TODO: @raushan update config in the hub
+        if self.rope_scaling is not None and "type" in self.rope_scaling:
+            if self.rope_scaling["type"] == "mrope":
+                self.rope_scaling["type"] = "default"
+            self.rope_scaling["rope_type"] = self.rope_scaling["type"]
+        rope_config_validation(self, ignore_keys={"mrope_section"})
+        super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
+__all__ = ["DLConfig"]

BioReason-main/bioreason/models/dl/processing_dl.py ADDED Viewed

	@@ -0,0 +1,275 @@

+from typing import List, Optional, Union, Dict, Any, Tuple
+import torch
+from torch import nn
+import torch.nn.functional as F
+from transformers import AutoTokenizer
+from transformers.processing_utils import (
+    CommonKwargs,
+    ProcessingKwargs,
+    ProcessorMixin,
+    Unpack,
+)
+from transformers.feature_extraction_utils import BatchFeature
+from transformers.tokenization_utils_base import PreTokenizedInput, TextInput
+from transformers.utils import logging
+from bioreason.utils.dna_utils import DNAInput
+class DLDNAKwargs(CommonKwargs):
+    """Keyword arguments specific to DNA processing"""
+    max_length_text: Optional[int]
+    max_length_dna: Optional[int]
+class DLProcessorKwargs(ProcessingKwargs, total=False):
+    """Processing keyword arguments for the DL processor"""
+    dna_kwargs: DLDNAKwargs
+    _defaults = {
+        "text_kwargs": {
+            "padding": False,
+        },
+    }
+class DLProcessor(ProcessorMixin):
+    r"""
+    Constructs a DL processor which wraps a NucleotideTransformer DNA processor and a Qwen2_5 tokenizer into a single processor.
+    This processor handles both text and DNA sequence processing to prepare inputs for the DNALLMModel.
+    Args:
+        tokenizer (PreTrainedTokenizerBase, *optional*):
+            The text tokenizer used for processing text inputs.
+        dna_tokenizer (PreTrainedTokenizerBase, *optional*):
+            The DNA tokenizer used for processing DNA sequences.
+        chat_template (`str`, *optional*):
+            A Jinja template for chat formatting. If None, will use the tokenizer's template.
+    """
+    attributes = ["tokenizer", "dna_tokenizer"]
+    valid_kwargs = ["model", "chat_template"]
+    tokenizer_class = (
+        "Qwen2Tokenizer", "Qwen2TokenizerFast",
+        "GPT2TokenizerFast",
+    )
+    dna_tokenizer_class = ("EsmTokenizer", "Evo2Tokenizer")
+    def __init__(
+        self, tokenizer=None, dna_tokenizer=None, chat_template=None, **kwargs
+    ):
+        """
+        Initialize the processor with text and DNA tokenizers.
+        Args:
+            tokenizer: Text tokenizer (usually from a language model)
+            dna_tokenizer: DNA tokenizer (usually from a DNA model)
+            chat_template: Template for formatting chat conversations
+            **kwargs: Additional arguments
+        """
+        self.tokenizer = tokenizer
+        self.dna_tokenizer = dna_tokenizer
+        self.dna_token = (
+            "<|dna_pad|>"
+            if not hasattr(self.tokenizer, "dna_token")
+            else self.tokenizer.dna_token
+        )
+        # Get chat template from tokenizer if not provided
+        if chat_template is None and hasattr(self.tokenizer, "chat_template"):
+            chat_template = self.tokenizer.chat_template
+        super().__init__(tokenizer, dna_tokenizer, chat_template=chat_template)
+        # The GRPO trainer might expect this to be set
+        if not hasattr(self.tokenizer, 'pad_token') or self.tokenizer.pad_token is None:
+            self.tokenizer.pad_token = self.tokenizer.eos_token
+    def tokenize_dna_sequences(
+        self,
+        batch_dna_sequences: List[List[str]],
+        max_length: int = 2048,
+        return_tensors: str = "pt",
+        device: str = "cuda",
+    ) -> Dict[str, Any]:
+        """
+        Tokenize a batch of DNA sequences.
+        Args:
+            batch_dna_sequences: List of lists of DNA sequences per batch item
+            max_length: Maximum allowed length for DNA sequences
+            return_tensors: Return format for tensors ("pt" for PyTorch)
+            device: Device to place tensors on
+        Returns:
+            Dict containing:
+                - dna_tokenized: The tokenized DNA sequences
+                - batch_idx_map: Mapping of which sequences belong to which batch item
+        """
+        # Create a mapping to track which sequences belong to which batch item
+        batch_idx_map = []
+        all_sequences = []
+        # Flatten all sequences with batch tracking
+        for batch_idx, dna_sequences in enumerate(batch_dna_sequences):
+            for seq in dna_sequences:
+                all_sequences.append(seq)
+                batch_idx_map.append(batch_idx)
+        # If no sequences in the entire batch, return empty dict
+        if not all_sequences:
+            return {"dna_tokenized": None, "batch_idx_map": []}
+        # Tokenize all sequences at once
+        dna_tokenized = self.dna_tokenizer(
+            all_sequences,
+            padding=True,
+            truncation=True,
+            max_length=max_length,
+            return_tensors=return_tensors,
+            return_attention_mask=True,
+        )
+        return {"dna_tokenized": dna_tokenized, "batch_idx_map": batch_idx_map}
+    def __call__(
+        self,
+        batch_dna_sequences: Optional[List[List[str]]] = None,
+        text: Optional[
+            Union[
+                TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]
+            ]
+        ] = None,
+        max_length_text: int = 512,
+        max_length_dna: int = 2048,
+        return_tensors: str = "pt",
+        device: str = "cuda",
+        **kwargs: Unpack[DLProcessorKwargs],
+    ) -> BatchFeature:
+        """
+        Process text and DNA sequences for model input.
+        Args:
+            batch_dna_sequences: List of lists of DNA sequences per batch item
+            text: Input text or list of texts
+            max_length_text: Maximum length for text sequences
+            max_length_dna: Maximum length for DNA sequences
+            return_tensors: Return format for tensors
+            device: Device to place tensors on
+            **kwargs: Additional processor keyword arguments
+        Returns:
+            BatchFeature with tokenized inputs for the model
+        """
+        output_kwargs = self._merge_kwargs(
+            DLProcessorKwargs,
+            tokenizer_init_kwargs=self.tokenizer.init_kwargs,
+            **kwargs,
+        )
+        # Ensure text is a list
+        if not isinstance(text, list):
+            text = [text]
+        # flattened_dna_sequences = [dna_sequence for dna_sequences in batch_dna_sequences for dna_sequence in dna_sequences]
+        dna_inputs = {}
+        if batch_dna_sequences is not None:
+            # Tokenize DNA sequences
+            dna_processing_result = self.tokenize_dna_sequences(
+                batch_dna_sequences,
+                max_length=max_length_dna,
+                return_tensors=return_tensors,
+                device=device,
+            )
+            # Replace DNA tokens in text if needed
+            index = 0
+            for i in range(len(text)):
+                while self.dna_token in text[i]:
+                    num_dna_tokens = (dna_processing_result['dna_tokenized']['input_ids'][index] != 1).sum().item()
+                    text[i] = text[i].replace(
+                        self.dna_token, "<|placeholder|>" * num_dna_tokens, 1
+                    )
+                    index += 1
+                text[i] = text[i].replace("<|placeholder|>", self.dna_token)
+            # Add batch info to the output
+            dna_inputs = {
+                # "batch_dna_sequences": batch_dna_sequences,
+                "dna_tokenized": dna_processing_result["dna_tokenized"],
+                "batch_idx_map": dna_processing_result["batch_idx_map"],
+            }
+        # Tokenize text
+        text_kwargs = output_kwargs.get("text_kwargs", {})
+        if 'padding' in text_kwargs:
+            del text_kwargs['padding']
+        # print("__call__ (processor):", text)
+        text_inputs = self.tokenizer(
+            text,
+            max_length=max_length_text + 2 * max_length_dna,
+            return_tensors=return_tensors,
+            padding=True,
+            truncation=True,
+            **text_kwargs,
+        )
+        # The BatchFeature should have all required fields for the model's forward pass
+        return BatchFeature(data={**text_inputs, **dna_inputs})
+    def batch_decode(self, *args, **kwargs) -> List[str]:
+        """
+        This method forwards all its arguments to the tokenizer's batch_decode.
+        Returns:
+            List of decoded strings
+        """
+        return self.tokenizer.batch_decode(*args, **kwargs)
+    def decode(self, *args, **kwargs) -> str:
+        """
+        This method forwards all its arguments to the tokenizer's decode.
+        Returns:
+            Decoded string
+        """
+        return self.tokenizer.decode(*args, **kwargs)
+    def post_process_dna_to_text(
+        self,
+        generated_outputs: torch.Tensor,
+        skip_special_tokens: bool = True,
+        **kwargs,
+    ) -> List[str]:
+        """
+        Post-process the model output to decode the text.
+        Args:
+            generated_outputs: The token IDs generated by the model
+            skip_special_tokens: Whether to skip special tokens in the output
+            **kwargs: Additional arguments for the decoder
+        Returns:
+            List of decoded strings
+        """
+        return self.tokenizer.batch_decode(
+            generated_outputs,
+            skip_special_tokens=skip_special_tokens,
+            **kwargs,
+        )
+    @property
+    def model_input_names(self) -> List[str]:
+        """
+        Get the input names expected by the model.
+        Returns:
+            List of input names
+        """
+        tokenizer_input_names = self.tokenizer.model_input_names
+        dna_input_names = ["dna_tokenized", "batch_idx_map"]
+        return list(dict.fromkeys(tokenizer_input_names + dna_input_names))

BioReason-main/bioreason/models/dna_llm.py ADDED Viewed

	@@ -0,0 +1,306 @@

+import os
+from argparse import ArgumentParser
+import torch
+import torch.nn as nn
+from transformers import (
+    AutoTokenizer,
+    AutoModelForCausalLM,
+    AutoModelForMaskedLM,
+)
+from typing import Optional, List, Dict, Any, Union, Tuple
+from bioreason.utils.dna_utils import DNAInput
+from bioreason.models.dl.processing_dl import DLProcessor
+from bioreason.models.dl.chat_template_dl import CHAT_TEMPLATE
+from bioreason.models.evo2_tokenizer import Evo2Tokenizer
+class DNALLMModel(nn.Module):
+    """
+    A combined model that processes both DNA sequences and text inputs.
+    The model uses a DNA encoder (like NucleotideTransformer) to extract features from DNA sequences
+    and a text model (LLM) to process text inputs and generate responses. The DNA features are
+    projected to the text model's embedding space and prepended to the text embeddings.
+    """
+    def __init__(
+        self,
+        text_model_name: str,
+        dna_model_name: str,
+        cache_dir: Optional[str] = None,
+        max_length_dna: int = 2048,
+        max_length_text: int = 512,
+        text_model_finetune: bool = True,
+        dna_model_finetune: bool = True,
+        dna_is_evo2: bool = False,
+        dna_embedding_layer: str = None
+    ):
+        """
+        Initialize the DNALLMModel.
+        Args:
+            text_model_name: Name of the text model to be used.
+            dna_model_name: Name of the DNA model to be used.
+            cache_dir: Directory to cache the models.
+            max_length_dna: Maximum length of DNA sequences. Defaults to 2048.
+            max_length_text: Maximum length of text sequences. Defaults to 512.
+            text_model_finetune: Whether to finetune the text model. Defaults to True.
+            dna_model_finetune: Whether to finetune the DNA model. Defaults to True.
+            dna_is_evo2: Whether the DNA model is Evo2. Defaults to False.
+            dna_embedding_layer: Name of the layer to use for the Evo2 model. Defaults to None.
+        """
+        super().__init__()
+        self.text_model_finetune = text_model_finetune
+        self.dna_model_finetune = dna_model_finetune
+        self.max_length_dna = max_length_dna
+        self.max_length_text = max_length_text
+        self.dna_is_evo2 = dna_is_evo2
+        self.dna_embedding_layer = dna_embedding_layer
+        # Load the text model and tokenizer
+        self.text_model = AutoModelForCausalLM.from_pretrained(
+            text_model_name, cache_dir=cache_dir, trust_remote_code=True
+        )
+        self.text_tokenizer = AutoTokenizer.from_pretrained(text_model_name, trust_remote_code=True)
+        self.text_config = self.text_model.config
+        self.text_tokenizer.chat_template = CHAT_TEMPLATE
+        self.text_tokenizer.pad_token = self.text_tokenizer.eos_token
+        new_tokens = ["<|dna_start|>", "<|dna_pad|>", "<|dna_end|>"]
+        self.text_tokenizer.add_special_tokens({"additional_special_tokens": new_tokens})
+        self.dna_token_id = self.text_tokenizer.convert_tokens_to_ids("<|dna_pad|>")
+        # Load the DNA model and tokenizer
+        if not self.dna_is_evo2:
+            self.dna_model = AutoModelForMaskedLM.from_pretrained(
+                dna_model_name, cache_dir=cache_dir, trust_remote_code=True
+            )
+            self.dna_tokenizer = AutoTokenizer.from_pretrained(dna_model_name, trust_remote_code=True)
+            self.dna_config = self.dna_model.config
+        else:
+            from evo2 import Evo2
+            self.dna_model = Evo2(dna_model_name)
+            self.dna_tokenizer = Evo2Tokenizer(self.dna_model.tokenizer)
+            self.dna_config = self.dna_model.model.config
+            self.dna_embedding_layer = self.dna_embedding_layer
+        # Get model dimensions
+        self.text_hidden_size = self.text_config.hidden_size
+        self.dna_hidden_size = self.dna_config.hidden_size
+        # Create projection layer to map DNA embeddings to text model's embedding space
+        self.dna_projection = nn.Linear(self.dna_hidden_size, self.text_hidden_size)
+        # Create processor for handling inputs
+        self.processor = DLProcessor(tokenizer=self.text_tokenizer, dna_tokenizer=self.dna_tokenizer)
+    def process_dna_embeddings(
+        self,
+        dna_tokenized: Dict[str, torch.Tensor],
+        batch_idx_map: List[int],
+        batch_size: int,
+    ) -> List[torch.Tensor]:
+        """
+        Process DNA sequences to obtain embeddings.
+        Args:
+            dna_tokenized: Tokenized DNA sequences
+            batch_idx_map: Mapping of each sequence to its batch item
+            batch_size: Number of items in the batch
+        Returns:
+            List of tensor embeddings for each batch item
+        """
+        # Process all sequences to get DNA representations
+        with torch.no_grad():
+            # Handle different model types based on dna_is_evo2 attribute
+            if self.dna_is_evo2 and self.dna_embedding_layer is not None:  # Evo2 model
+                # Get embeddings from the specific layer in Evo2
+                hidden_states_list = []
+                for seq_idx in range(len(dna_tokenized["input_ids"])):
+                    # Extract single sequence
+                    input_ids = dna_tokenized["input_ids"][seq_idx:seq_idx+1]
+                    # Call Evo2 with return_embeddings=True
+                    _, embeddings = self.dna_model(
+                        input_ids,
+                        return_embeddings=True,
+                        layer_names=[self.dna_embedding_layer]
+                    )
+                    # Get embeddings for the specified layer
+                    seq_embeddings = embeddings[self.dna_embedding_layer].squeeze(0)
+                    hidden_states_list.append(seq_embeddings)
+                # Stack to get same format as non-Evo2 output
+                if hidden_states_list:
+                    hidden_states = torch.stack(hidden_states_list)
+                else:
+                    return [torch.zeros((0, self.text_hidden_size)) for _ in range(batch_size)]
+            else:  # Standard HuggingFace model
+                # Use existing code path for HF models
+                outputs = self.dna_model(
+                    input_ids=dna_tokenized["input_ids"],
+                    attention_mask=dna_tokenized["attention_mask"],
+                    output_hidden_states=True,
+                )
+                # Get the last hidden state
+                hidden_states = outputs.hidden_states[-1]  # shape: [n_seqs, seq_len, hidden_dim]
+        # Project all embeddings at once
+        hidden_states = hidden_states.to(device=self.dna_projection.weight.device, dtype=self.dna_projection.weight.dtype)
+        projected_states = self.dna_projection(hidden_states)
+        # Group embeddings by batch item
+        result = [[] for _ in range(batch_size)]
+        # For each sequence, get its embeddings and add to appropriate batch result
+        for seq_idx, batch_idx in enumerate(batch_idx_map):
+            # Get only the valid (non-padding) tokens
+            valid_length = dna_tokenized["attention_mask"][seq_idx].sum().item()
+            seq_embedding = projected_states[seq_idx, :valid_length]
+            result[batch_idx].append(seq_embedding)
+        # Concatenate embeddings for each batch item
+        for i in range(batch_size):
+            if result[i]:
+                result[i] = torch.cat(result[i], dim=0)
+            else:
+                result[i] = torch.zeros((0, self.text_hidden_size))
+        return result
+    def forward(
+        self,
+        input_ids: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        dna_tokenized: Optional[Dict[str, torch.Tensor]] = None,
+        batch_idx_map: Optional[List[int]] = None,
+        labels: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> torch.Tensor:
+        """
+        Generate text based on DNA and text inputs.
+        Args:
+            input_ids: Input IDs (used if provided directly)
+            attention_mask: Attention mask (used if provided directly)
+            dna_tokenized: Tokenized DNA sequences (used if provided directly)
+            batch_idx_map: Batch mapping for DNA sequences (used if provided directly)
+            labels: Labels for supervised fine-tuning (used if provided directly)
+            **kwargs: Additional arguments for generation
+        Returns:
+            Outputs from the text model
+        """
+        # Ensure required inputs are available
+        if input_ids is None or attention_mask is None:
+            raise ValueError("Either 'inputs' or 'input_ids'/'attention_mask' must be provided")
+        batch_size = input_ids.shape[0]
+        # Get text embeddings from the model's embedding layer
+        text_inputs_embeds = self.text_model.get_input_embeddings()(input_ids)
+        if dna_tokenized is not None and batch_idx_map:
+            batch_dna_embeds = self.process_dna_embeddings(dna_tokenized, batch_idx_map, batch_size)
+            mask = input_ids == self.dna_token_id
+            n_dna_tokens = mask.sum().item()
+            dna_embeds_flat = torch.cat(batch_dna_embeds, dim=0)
+            n_dna_features = dna_embeds_flat.shape[0]
+            if n_dna_features != n_dna_tokens:
+                raise ValueError(
+                    f"DNA features and DNA tokens do not match: features {n_dna_features}, tokens: {n_dna_tokens}"
+                )
+            # Ensure DNA embeddings have the same dtype as the text embeddings
+            dna_embeds_flat = dna_embeds_flat.to(dtype=text_inputs_embeds.dtype)
+            text_inputs_embeds[mask] = dna_embeds_flat
+        # Handle labels if provided (for training)
+        if labels is not None:
+            # TODO: Implement this
+            pass
+        # Forward pass through the text model (loss is computed if labels is provided)
+        outputs = self.text_model(
+            inputs_embeds=text_inputs_embeds,
+            attention_mask=attention_mask,
+            labels=labels,
+            **kwargs,
+        )
+        return outputs
+    def generate(
+        self,
+        input_ids: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        dna_tokenized: Optional[Dict[str, torch.Tensor]] = None,
+        batch_idx_map: Optional[List[int]] = None,
+        **generation_kwargs,
+    ) -> Union[torch.Tensor, List[str]]:
+        """
+        Generate text based on DNA and text inputs.
+        Args:
+            inputs: The preprocessed inputs from the processor (preferred method)
+            batch_dna_sequences: List of lists of DNA sequences per batch item (legacy method)
+            input_texts: List of input texts (legacy method)
+            input_ids: Input IDs (used if provided directly)
+            attention_mask: Attention mask (used if provided directly)
+            dna_tokenized: Tokenized DNA sequences (used if provided directly)
+            batch_idx_map: Batch mapping for DNA sequences (used if provided directly)
+            **generation_kwargs: Additional arguments for generation
+        Returns:
+            Generated token IDs which can be decoded using the processor
+        """
+        # Ensure required inputs are available
+        if input_ids is None or attention_mask is None:
+            raise ValueError("Either 'inputs' or 'input_ids'/'attention_mask' must be provided")
+        batch_size = input_ids.shape[0]
+        # Get text embeddings from the model's embedding layer
+        text_inputs_embeds = self.text_model.get_input_embeddings()(input_ids)
+        if dna_tokenized is not None and batch_idx_map:
+            batch_dna_embeds = self.process_dna_embeddings(dna_tokenized, batch_idx_map, batch_size)
+            mask = input_ids == self.dna_token_id
+            n_dna_tokens = mask.sum().item()
+            dna_embeds_flat = torch.cat(batch_dna_embeds, dim=0)
+            n_dna_features = dna_embeds_flat.shape[0]
+            if n_dna_features != n_dna_tokens:
+                raise ValueError(
+                    f"DNA features and DNA tokens do not match: features {n_dna_features}, tokens: {n_dna_tokens}"
+                )
+            # Ensure DNA embeddings have the same dtype as the text embeddings
+            dna_embeds_flat = dna_embeds_flat.to(dtype=text_inputs_embeds.dtype)
+            text_inputs_embeds[mask] = dna_embeds_flat
+        # Generation parameters may need adjustment based on model type
+        with torch.no_grad():
+            outputs = self.text_model.generate(
+                inputs_embeds=text_inputs_embeds,
+                attention_mask=attention_mask,
+                use_cache=True,
+                **generation_kwargs,
+            )
+        return outputs

BioReason-main/bioreason/models/dna_only.py ADDED Viewed

	@@ -0,0 +1,203 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from typing import Dict
+from transformers import AutoModelForMaskedLM, AutoTokenizer
+class SelfAttentionPooling(nn.Module):
+    def __init__(self, hidden_size, num_heads=8):
+        super().__init__()
+        # Use PyTorch's built-in multi-head attention
+        self.attention = nn.MultiheadAttention(
+            embed_dim=hidden_size,
+            num_heads=num_heads,
+            batch_first=True
+        )
+        # Learnable query vector
+        self.query = nn.Parameter(torch.randn(1, 1, hidden_size))
+    def forward(self, embeddings, attention_mask=None):
+        # Expand query to batch size
+        batch_size = embeddings.size(0)
+        query = self.query.expand(batch_size, -1, -1)
+        # Create key padding mask from attention mask if provided
+        key_padding_mask = None
+        if attention_mask is not None:
+            key_padding_mask = attention_mask == 0  # Convert to boolean mask where True means ignore
+        # Apply attention: query attends to embeddings
+        context, _ = self.attention(
+            query=query,                  # [batch_size, 1, hidden_size]
+            key=embeddings,               # [batch_size, seq_len, hidden_size]
+            value=embeddings,             # [batch_size, seq_len, hidden_size]
+            key_padding_mask=key_padding_mask
+        )
+        # Squeeze out the singleton dimension
+        return context.squeeze(1)         # [batch_size, hidden_size]
+class DNAClassifierModel(nn.Module):
+    """
+    A simple classifier that uses a DNA model with a classification head.
+    """
+    def __init__(
+        self,
+        dna_model_name: str,
+        cache_dir: str = None,
+        max_length_dna: int = 4096,
+        num_classes: int = 2,  # Binary classification by default
+        dna_is_evo2: bool = False,
+        dna_embedding_layer: str = None,
+        train_just_classifier: bool = True
+    ):
+        """
+        Initialize the DNAClassifierModel.
+        Args:
+            dna_model_name (str): Name of the DNA model to use
+            cache_dir (str): Directory to cache models
+            max_length_dna (int): Maximum sequence length
+            num_classes (int): Number of output classes
+            dna_is_evo2: Whether the DNA model is Evo2. Defaults to False
+            dna_embedding_layer: Name of the layer to use for the Evo2 model. Defaults to None
+            train_just_classifier: Whether to train just the classifier. Defaults to True
+        """
+        super().__init__()
+        self.dna_model_name = dna_model_name
+        self.cache_dir = cache_dir
+        self.max_length_dna = max_length_dna
+        self.num_classes = num_classes
+        self.dna_is_evo2 = dna_is_evo2
+        self.dna_embedding_layer = dna_embedding_layer
+        self.train_just_classifier = train_just_classifier
+        # Load the DNA model and tokenizer
+        if not self.dna_is_evo2:
+            self.dna_model = AutoModelForMaskedLM.from_pretrained(
+                dna_model_name, cache_dir=cache_dir, trust_remote_code=True
+            )
+            self.dna_tokenizer = AutoTokenizer.from_pretrained(dna_model_name, trust_remote_code=True)
+            self.dna_config = self.dna_model.config
+        else:
+            from evo2 import Evo2
+            from bioreason.models.evo2_tokenizer import Evo2Tokenizer
+            self.dna_model = Evo2(dna_model_name)
+            self.dna_tokenizer = Evo2Tokenizer(self.dna_model.tokenizer)
+            self.dna_config = self.dna_model.model.config
+            self.dna_embedding_layer = self.dna_embedding_layer
+        # Get hidden size from model config
+        self.hidden_size = self.dna_config.hidden_size
+        # Add the self-attention pooling module
+        self.pooler = SelfAttentionPooling(self.hidden_size)
+        # Create classification head that takes concatenated embeddings from both sequences
+        self.classifier = nn.Sequential(
+            nn.Linear(self.hidden_size * 2, self.hidden_size),
+            nn.ReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(self.hidden_size, num_classes),
+        )
+        self.max_length_dna = max_length_dna
+    def get_dna_embedding(self, input_ids: torch.Tensor, attention_mask: torch.Tensor):
+        """
+        Get DNA embedding for a single DNA sequence using self-attention pooling.
+        Args:
+            input_ids: DNA tokenized sequence
+            attention_mask: DNA tokenized sequence attention mask
+        Returns:
+            torch.Tensor: Tensor containing the self-attention pooled DNA embedding
+        """
+        # Add batch dimension if not present
+        if input_ids.dim() == 1:
+            input_ids = input_ids.unsqueeze(0)  # [1, seq_len]
+        # Handle attention mask - create if not provided or add batch dimension
+        if attention_mask is None:
+            attention_mask = torch.ones_like(input_ids)
+        elif attention_mask.dim() == 1:
+            attention_mask = attention_mask.unsqueeze(0)  # [1, seq_len]
+        # Get embeddings from DNA model
+        with torch.set_grad_enabled(not self.train_just_classifier):  # Enable gradients for fine-tuning
+            if self.dna_is_evo2 and self.dna_embedding_layer is not None:  # Evo2 model
+                # Get embeddings from the specific layer in Evo2
+                _, embeddings = self.dna_model(
+                    input_ids,
+                    return_embeddings=True,
+                    layer_names=[self.dna_embedding_layer]
+                )
+                # Get embeddings for the specified layer
+                hidden_states = embeddings[self.dna_embedding_layer]
+            else:
+                # Get embeddings from the last hidden state
+                outputs = self.dna_model(
+                    input_ids,
+                    attention_mask=attention_mask,
+                    output_hidden_states=True,
+                )
+                # Get the last hidden state
+                hidden_states = outputs.hidden_states[-1]
+        # Apply self-attention pooling to get a weighted representation
+        sequence_embedding = self.pooler(hidden_states, attention_mask)
+        return sequence_embedding.squeeze(0)
+    def forward(
+        self, ref_ids=None, alt_ids=None, ref_attention_mask=None, alt_attention_mask=None
+    ):
+        """
+        Forward pass of the model.
+        Args:
+            ref_ids: Reference sequence token IDsself.dna_model
+            alt_ids: Alternate sequence token IDsself.dna_model
+            ref_attention_mask: Reference sequence attention maskself.dna_model
+            alt_attention_mask: Alternate sequence attention maskself.dna_model
+        Returns:
+            torch.Tensor: Classification logits
+        """
+        batch_size = ref_ids.shape[0] if ref_ids is not None else alt_ids.shape[0]
+        if batch_size is None:
+            raise ValueError("Either token IDs must be provided")
+        ref_embeddings = []
+        alt_embeddings = []
+        # Process each example in the batch
+        for i in range(batch_size):
+            # Get sequence embeddings
+            ref_embed = self.get_dna_embedding(ref_ids[i], ref_attention_mask[i])
+            alt_embed = self.get_dna_embedding(alt_ids[i], alt_attention_mask[i])
+            ref_embeddings.append(ref_embed)
+            alt_embeddings.append(alt_embed)
+        # Stack embeddings
+        ref_embeddings = torch.stack(ref_embeddings)
+        alt_embeddings = torch.stack(alt_embeddings)
+        # Concatenate ref and alt embeddings
+        combined_embeddings = torch.cat([ref_embeddings, alt_embeddings], dim=1)
+        # Pass through classifier
+        logits = self.classifier(combined_embeddings)
+        return logits

BioReason-main/bioreason/models/evo2_tokenizer.py ADDED Viewed

	@@ -0,0 +1,219 @@

+from transformers.tokenization_utils import PreTrainedTokenizer
+from transformers.utils import logging
+from transformers import AutoTokenizer
+from transformers.tokenization_utils_base import BatchEncoding
+import torch
+import numpy as np
+from typing import List, Dict, Optional, Union, Tuple
+# Register the tokenizer with AutoTokenizer
+from transformers.models.auto import AutoTokenizer
+from transformers.models.auto.tokenization_auto import TOKENIZER_MAPPING
+from transformers.models.auto.configuration_auto import CONFIG_MAPPING
+logger = logging.get_logger(__name__)
+class Evo2Tokenizer(PreTrainedTokenizer):
+    """
+    Tokenizer for Evo2 models - wraps the CharLevelTokenizer to be compatible with HuggingFace.
+    """
+    vocab_files_names = {}  # No vocab files needed
+    model_input_names = ["input_ids", "attention_mask"]
+    def __init__(
+        self,
+        evo2_tokenizer,
+        bos_token="<s>",
+        eos_token="</s>",
+        pad_token="<pad>",
+        unk_token="<unk>",
+        **kwargs
+    ):
+        """
+        Initialize the Evo2Tokenizer.
+        Args:
+            evo2_tokenizer: The Evo2 CharLevelTokenizer to wrap
+            bos_token: Beginning of sequence token
+            eos_token: End of sequence token
+            pad_token: Padding token
+            unk_token: Unknown token
+        """
+        self.evo2_tokenizer = evo2_tokenizer
+        # Map special tokens to Evo2 tokenizer's special token IDs
+        self._pad_token = pad_token
+        self._eos_token = eos_token
+        self._bos_token = bos_token
+        self._unk_token = unk_token
+        # Initialize with special tokens
+        super().__init__(
+            bos_token=bos_token,
+            eos_token=eos_token,
+            pad_token=pad_token,
+            unk_token=unk_token,
+            **kwargs
+        )
+        # Set token IDs from Evo2 tokenizer
+        self.pad_token_id = self.evo2_tokenizer.pad_id
+        self.eos_token_id = self.evo2_tokenizer.eos_id
+    @property
+    def vocab_size(self) -> int:
+        """Return the vocab size of the tokenizer."""
+        return self.evo2_tokenizer.vocab_size
+    def get_vocab(self) -> Dict:
+        """Return vocab as a dictionary."""
+        # Evo2 CharLevelTokenizer doesn't have a traditional vocab dict
+        # Create a simple mapping of ASCII codes to tokens
+        return {chr(i): i for i in range(self.vocab_size)}
+    def _tokenize(self, text: str) -> List[int]:
+        """Tokenize a string using the Evo2 tokenizer."""
+        return [chr(int(token)) for token in self.evo2_tokenizer.tokenize(text)]
+    def _convert_token_to_id(self, token: str) -> int:
+        """Convert a token to an id using the Evo2 tokenizer."""
+        # Since tokens are just characters, convert to their ASCII value
+        return ord(token)
+    def _convert_id_to_token(self, index: int) -> str:
+        """Convert an id to a token using the Evo2 tokenizer."""
+        # Convert ASCII value back to character
+        return chr(index)
+    def convert_tokens_to_string(self, tokens: List[str]) -> str:
+        """Convert a sequence of tokens to a single string."""
+        return "".join(tokens)
+    def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
+        """No vocabulary to save for Evo2Tokenizer, so just return an empty tuple."""
+        return ()
+    def __call__(
+        self,
+        text: Union[str, List[str]],
+        text_pair: Optional[Union[str, List[str]]] = None,
+        padding: Union[bool, str] = False,
+        truncation: Union[bool, str] = False,
+        max_length: Optional[int] = None,
+        return_tensors: Optional[str] = None,
+        return_token_type_ids: Optional[bool] = None,
+        return_attention_mask: Optional[bool] = True,
+        **kwargs
+    ) -> Dict[str, torch.Tensor]:
+        """
+        Main tokenization method that handles batching and converts to tensors.
+        """
+        # Handle single string vs list of strings
+        if isinstance(text, str):
+            text = [text]
+        # Tokenize all sequences - note: tokenizer only accepts strings, not lists
+        input_ids_list = []
+        for seq in text:
+            # Tokenize and convert numpy.uint8 to Python integers
+            tokens = [int(token) for token in self.evo2_tokenizer.tokenize(seq)]
+            # Truncate if needed
+            if truncation and max_length and len(tokens) > max_length:
+                tokens = tokens[:max_length]
+            input_ids_list.append(tokens)
+        # Apply padding if needed
+        if padding:
+            if False:#max_length:
+                max_len = max_length
+            else:
+                max_len = max(len(ids) for ids in input_ids_list)
+            # Create padded sequences and attention masks
+            padded_input_ids = []
+            attention_mask = []
+            for ids in input_ids_list:
+                # Apply left padding (pad on the left)
+                padding_length = max_len - len(ids)
+                padded_ids = [self.pad_token_id] * padding_length + ids
+                mask = [0] * padding_length + [1] * len(ids)
+                padded_input_ids.append(padded_ids)
+                attention_mask.append(mask)
+            input_ids_list = padded_input_ids
+        else:
+            # Create attention mask without padding
+            attention_mask = [[1] * len(ids) for ids in input_ids_list]
+        # Create result dictionary
+        result = {"input_ids": input_ids_list}
+        if return_attention_mask:
+            result["attention_mask"] = attention_mask
+        # Convert to tensors if requested
+        if return_tensors == "pt":
+            result = {k: torch.tensor(v) for k, v in result.items()}
+        # Return a BatchEncoding object rather than a plain dictionary
+        return BatchEncoding(
+            data=result,
+            tensor_type=return_tensors,
+            prepend_batch_axis=False,  # Already handled in our tensor creation
+            encoding=None  # No encoding info from Evo2's tokenizer
+        )
+    def batch_decode(
+        self,
+        sequences: Union[List[int], List[List[int]], torch.Tensor],
+        skip_special_tokens: bool = False,
+        **kwargs
+    ) -> List[str]:
+        """
+        Decode a batch of token ids to strings.
+        """
+        if isinstance(sequences, torch.Tensor):
+            sequences = sequences.tolist()
+        return self.evo2_tokenizer.detokenize_batch(sequences)
+    def decode(
+        self,
+        token_ids: Union[int, List[int], torch.Tensor],
+        skip_special_tokens: bool = False,
+        **kwargs
+    ) -> str:
+        """
+        Decode a single sequence of token ids to a string.
+        """
+        if isinstance(token_ids, torch.Tensor):
+            token_ids = token_ids.tolist()
+        # Single sequence
+        if not isinstance(token_ids, list) or not token_ids or not isinstance(token_ids[0], (list, torch.Tensor)):
+            return self.evo2_tokenizer.detokenize(token_ids)
+        # Batch with one item
+        return self.batch_decode(token_ids, skip_special_tokens, **kwargs)[0]
+# Register the tokenizer - you'll need to do this when your script loads
+# You might want to put this in your __init__.py file
+def register_evo2_tokenizer():
+    """Register the Evo2Tokenizer with HuggingFace's AutoTokenizer."""
+    # This will register the tokenizer so AutoTokenizer.from_pretrained knows about it
+    AutoTokenizer.register("evo2", Evo2Tokenizer)
+    # If you have a config class, you would also register that
+    # from transformers.models.auto import AutoConfig
+    # AutoConfig.register("evo2", Evo2Config)
+    print("Evo2Tokenizer registered with AutoTokenizer")
+if __name__ == "__main__":
+    register_evo2_tokenizer()

BioReason-main/bioreason/trainer/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from .grpo_config import DNALLMGRPOConfig
+from .grpo_trainer import DNALLMGRPOTrainer
+__all__ = [
+    "DNALLMGRPOConfig",
+    "DNALLMGRPOTrainer",
+]

BioReason-main/bioreason/trainer/demo_grpo.py ADDED Viewed

	@@ -0,0 +1,811 @@

+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import textwrap
+import warnings
+from collections import defaultdict
+from typing import Any, Callable, Optional, Sized, Union
+from unittest.mock import patch
+import torch
+import torch.utils.data
+import transformers
+from accelerate.utils import broadcast_object_list, gather, gather_object, is_peft_model, set_seed
+from accelerate.utils.other import is_compiled_module
+from datasets import Dataset, IterableDataset
+from packaging import version
+from torch import nn
+from torch.utils.data import Sampler
+from transformers import (
+    AutoModelForCausalLM,
+    AutoModelForSequenceClassification,
+    AutoTokenizer,
+    GenerationConfig,
+    PreTrainedModel,
+    PreTrainedTokenizerBase,
+    Trainer,
+    TrainerCallback,
+    is_wandb_available,
+)
+from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled
+from transformers.utils import is_peft_available
+from trl.data_utils import apply_chat_template, is_conversational, maybe_apply_chat_template
+from trl.import_utils import is_vllm_available
+from trl.models import create_reference_model, prepare_deepspeed, unwrap_model_for_generation
+from trl import SyncRefModelCallback
+from trl import GRPOConfig
+from trl.trainer.utils import generate_model_card, get_comet_experiment_url, pad, selective_log_softmax
+if is_peft_available():
+    from peft import PeftConfig, get_peft_model
+if is_vllm_available():
+    from vllm import LLM, SamplingParams
+if is_wandb_available():
+    import wandb
+# What we call a reward function is a callable that takes a list of prompts and completions and returns a list of
+# rewards. When it's a string, it's a model ID, so it's loaded as a pretrained model.
+RewardFunc = Union[str, PreTrainedModel, Callable[[list, list], list[float]]]
+class RepeatRandomSampler(Sampler):
+    """
+    Sampler that repeats the indices of a dataset N times.
+    Args:
+        data_source (`Sized`):
+            Dataset to sample from.
+        repeat_count (`int`):
+            Number of times to repeat each index.
+        seed (`Optional[int]`):
+            Random seed for reproducibility (only affects this sampler).
+    Example:
+    ```python
+    >>> sampler = RepeatRandomSampler(["a", "b", "c", "d"], repeat_count=2)
+    >>> list(sampler)
+    [2, 2, 0, 0, 3, 3, 1, 1]
+    ```
+    """
+    def __init__(self, data_source: Sized, repeat_count: int, seed: Optional[int] = None):
+        self.data_source = data_source
+        self.repeat_count = repeat_count
+        self.num_samples = len(data_source)
+        self.seed = seed
+        self.generator = torch.Generator()  # Create a local random generator
+        if seed is not None:
+            self.generator.manual_seed(seed)
+    def __iter__(self):
+        indexes = [
+            idx
+            for idx in torch.randperm(self.num_samples, generator=self.generator).tolist()
+            for _ in range(self.repeat_count)
+        ]
+        return iter(indexes)
+    def __len__(self):
+        return self.num_samples * self.repeat_count
+# made this to test out the usual pipeline of GRPOTrainer data, and add my own debug messages
+class FakeGRPOTrainer(Trainer):
+    """
+    Trainer for the Group Relative Policy Optimization (GRPO) method. This algorithm was initially proposed in the
+    paper [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
+    Example:
+    ```python
+    from datasets import load_dataset
+    from trl import GRPOTrainer
+    dataset = load_dataset("trl-lib/tldr", split="train")
+    def reward_func(completions, **kwargs):
+        # Dummy reward function that rewards completions with more unique letters.
+        return [float(len(set(completion))) for completion in completions]
+    trainer = GRPOTrainer(
+        model="Qwen/Qwen2-0.5B-Instruct",
+        reward_funcs=reward_func,
+        train_dataset=dataset,
+    )
+    trainer.train()
+    ```
+    Args:
+        model (`Union[str, PreTrainedModel]`):
+            Model to be trained. Can be either:
+            - A string, being the *model id* of a pretrained model hosted inside a model repo on huggingface.co, or
+              a path to a *directory* containing model weights saved using
+              [`~transformers.PreTrainedModel.save_pretrained`], e.g., `'./my_model_directory/'`. The model is
+              loaded using [`~transformers.AutoModelForCausalLM.from_pretrained`] with the keywork arguments
+              in `args.model_init_kwargs`.
+            - A [`~transformers.PreTrainedModel`] object. Only causal language models are supported.
+        reward_funcs (`Union[RewardFunc, list[RewardFunc]]`):
+            Reward functions to be used for computing the rewards. To compute the rewards, we call all the reward
+            functions with the prompts and completions and sum the rewards. Can be either:
+            - A single reward function, such as:
+                - A string: The *model ID* of a pretrained model hosted inside a model repo on huggingface.co, or a
+                path to a *directory* containing model weights saved using
+                [`~transformers.PreTrainedModel.save_pretrained`], e.g., `'./my_model_directory/'`. The model is loaded
+                using [`~transformers.AutoModelForSequenceClassification.from_pretrained`] with `num_labels=1` and the
+                keyword arguments in `args.model_init_kwargs`.
+                - A [`~transformers.PreTrainedModel`] object: Only sequence classification models are supported.
+                - A custom reward function: The function is provided with the prompts and the generated completions,
+                  plus any additional columns in the dataset. It should return a list of rewards. For more details, see
+                  [Using a custom reward function](#using-a-custom-reward-function).
+            - A list of reward functions, where each item can independently be any of the above types. Mixing different
+            types within the list (e.g., a string model ID and a custom reward function) is allowed.
+        args ([`GRPOConfig`], *optional*, defaults to `None`):
+            Configuration for this trainer. If `None`, a default configuration is used.
+        train_dataset ([`~datasets.Dataset`] or [`~datasets.IterableDataset`]):
+            Dataset to use for training. It must include a column `"prompt"`. Any additional columns in the dataset is
+            ignored. The format of the samples can be either:
+            - [Standard](dataset_formats#standard): Each sample contains plain text.
+            - [Conversational](dataset_formats#conversational): Each sample contains structured messages (e.g., role
+              and content).
+        eval_dataset ([`~datasets.Dataset`], [`~datasets.IterableDataset`] or `dict[str, Union[Dataset, IterableDataset]]`):
+            Dataset to use for evaluation. It must meet the same requirements as `train_dataset`.
+        processing_class ([`~transformers.PreTrainedTokenizerBase`], *optional*, defaults to `None`):
+            Processing class used to process the data. The padding side must be set to "left". If `None`, the
+            processing class is loaded from the model's name with [`~transformers.AutoTokenizer.from_pretrained`].
+        reward_processing_classes (`Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]`, *optional*, defaults to `None`):
+            Processing classes corresponding to the reward functions specified in `reward_funcs`. Can be either:
+            - A single processing class: Used when `reward_funcs` contains only one reward function.
+            - A list of processing classes: Must match the order and length of the reward functions in `reward_funcs`.
+            If set to `None`, or if an element of the list corresponding to a [`~transformers.PreTrainedModel`] is
+            `None`, the tokenizer for the model is automatically loaded using [`~transformers.AutoTokenizer.from_pretrained`].
+            For elements in `reward_funcs` that are custom reward functions (not [`~transformers.PreTrainedModel`]),
+            the corresponding entries in `reward_processing_classes` are ignored.
+        callbacks (list of [`~transformers.TrainerCallback`], *optional*, defaults to `None`):
+            List of callbacks to customize the training loop. Will add those to the list of default callbacks
+            detailed in [here](https://huggingface.co/docs/transformers/main_classes/callback).
+            If you want to remove one of the default callbacks used, use the [`~transformers.Trainer.remove_callback`]
+            method.
+        optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`, *optional*, defaults to `(None, None)`):
+            A tuple containing the optimizer and the scheduler to use. Will default to an instance of [`AdamW`] on your
+            model and a scheduler given by [`get_linear_schedule_with_warmup`] controlled by `args`.
+        peft_config ([`~peft.PeftConfig`], *optional*, defaults to `None`):
+            PEFT configuration used to wrap the model. If `None`, the model is not wrapped.
+    """
+    _tag_names = ["trl", "grpo"]
+    def __init__(
+        self,
+        model: Union[str, PreTrainedModel],
+        reward_funcs: Union[RewardFunc, list[RewardFunc]],
+        args: GRPOConfig = None,
+        train_dataset: Optional[Union[Dataset, IterableDataset]] = None,
+        eval_dataset: Optional[Union[Dataset, IterableDataset, dict[str, Union[Dataset, IterableDataset]]]] = None,
+        processing_class: Optional[PreTrainedTokenizerBase] = None,
+        reward_processing_classes: Optional[Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]] = None,
+        callbacks: Optional[list[TrainerCallback]] = None,
+        optimizers: tuple[Optional[torch.optim.Optimizer], Optional[torch.optim.lr_scheduler.LambdaLR]] = (None, None),
+        peft_config: Optional["PeftConfig"] = None,
+    ):
+        # Args
+        if args is None:
+            model_name = model if isinstance(model, str) else model.config._name_or_path
+            model_name = model_name.split("/")[-1]
+            args = GRPOConfig(f"{model_name}-GRPO")
+        # Models
+        # Trained model
+        model_init_kwargs = args.model_init_kwargs or {}
+        if isinstance(model, str):
+            model_id = model
+            torch_dtype = model_init_kwargs.get("torch_dtype")
+            if isinstance(torch_dtype, torch.dtype) or torch_dtype == "auto" or torch_dtype is None:
+                pass  # torch_dtype is already a torch.dtype or "auto" or None
+            elif isinstance(torch_dtype, str):  # it's a str, but not "auto"
+                torch_dtype = getattr(torch, torch_dtype)
+                model_init_kwargs["torch_dtype"] = torch_dtype
+            else:
+                raise ValueError(
+                    "Invalid `torch_dtype` passed to `GRPOConfig`. Expected either 'auto' or a string representing "
+                    f"a `torch.dtype` (e.g., 'float32'), but got {torch_dtype}."
+                )
+            # Disable caching if gradient checkpointing is enabled (not supported)
+            model_init_kwargs["use_cache"] = (
+                False if args.gradient_checkpointing else model_init_kwargs.get("use_cache")
+            )
+            model = AutoModelForCausalLM.from_pretrained(model, **model_init_kwargs)
+        else:
+            model_id = model.config._name_or_path
+            if args.model_init_kwargs is not None:
+                raise ValueError(
+                    "You passed `model_init_kwargs` to the `GRPOConfig`, but your model is already instantiated. "
+                    "This argument can only be used when the `model` argument is a string."
+                )
+        if peft_config is not None:
+            model = get_peft_model(model, peft_config)
+        # Reference model
+        if is_deepspeed_zero3_enabled():
+            self.ref_model = AutoModelForCausalLM.from_pretrained(model_id, **model_init_kwargs)
+        elif not is_peft_model(model):
+            # If PEFT configuration is not provided, create a reference model based on the initial model.
+            self.ref_model = create_reference_model(model)
+        else:
+            # If PEFT is used, the reference model is not needed since the adapter can be disabled
+            # to revert to the initial model.
+            self.ref_model = None
+        # Processing class
+        if processing_class is None:
+            processing_class = AutoTokenizer.from_pretrained(model.config._name_or_path, padding_side="left")
+        # Reward functions
+        if not isinstance(reward_funcs, list):
+            reward_funcs = [reward_funcs]
+        for i, reward_func in enumerate(reward_funcs):
+            if isinstance(reward_func, str):
+                reward_funcs[i] = AutoModelForSequenceClassification.from_pretrained(
+                    reward_func, num_labels=1, **model_init_kwargs
+                )
+        self.reward_funcs = reward_funcs
+        # Reward weights
+        if args.reward_weights is not None:
+            if len(args.reward_weights) != len(reward_funcs):
+                raise ValueError(
+                    f"Number of reward weights ({len(args.reward_weights)}) must match number of reward "
+                    f"functions ({len(reward_funcs)})"
+                )
+            self.reward_weights = torch.tensor(args.reward_weights, dtype=torch.float32)
+        else:
+            self.reward_weights = torch.ones(len(reward_funcs), dtype=torch.float32)
+        # Reward processing class
+        if reward_processing_classes is None:
+            reward_processing_classes = [None] * len(reward_funcs)
+        elif not isinstance(reward_processing_classes, list):
+            reward_processing_classes = [reward_processing_classes]
+        else:
+            if len(reward_processing_classes) != len(reward_funcs):
+                raise ValueError("The number of reward processing classes must match the number of reward functions.")
+        for i, (reward_processing_class, reward_func) in enumerate(zip(reward_processing_classes, reward_funcs)):
+            if isinstance(reward_func, PreTrainedModel):
+                if reward_processing_class is None:
+                    reward_processing_class = AutoTokenizer.from_pretrained(reward_func.config._name_or_path)
+                if reward_processing_class.pad_token_id is None:
+                    reward_processing_class.pad_token = reward_processing_class.eos_token
+                # The reward model computes the reward for the latest non-padded token in the input sequence.
+                # So it's important to set the pad token ID to the padding token ID of the processing class.
+                reward_func.config.pad_token_id = reward_processing_class.pad_token_id
+                reward_processing_classes[i] = reward_processing_class
+        self.reward_processing_classes = reward_processing_classes
+        # Data collator
+        def data_collator(features):  # No data collation is needed in GRPO
+            return features
+        # Training arguments
+        self.max_prompt_length = args.max_prompt_length
+        self.max_completion_length = args.max_completion_length  # = |o_i| in the GRPO paper
+        self.num_generations = args.num_generations  # = G in the GRPO paper
+        self.use_vllm = args.use_vllm
+        self.beta = args.beta
+        # The trainer estimates the number of FLOPs (floating-point operations) using the number of elements in the
+        # input tensor associated with the key "input_ids". However, in GRPO, the sampled data does not include the
+        # "input_ids" key. Instead, the available keys is "prompt". As a result, the trainer issues the warning:
+        # "Could not estimate the number of tokens of the input, floating-point operations will not be computed." To
+        # suppress this warning, we set the "estimate_tokens" key in the model's "warnings_issued" dictionary to True.
+        # This acts as a flag to indicate that the warning has already been issued.
+        model.warnings_issued["estimate_tokens"] = True
+        # Initialize the metrics
+        self._metrics = defaultdict(list)
+        self.log_completions = args.log_completions
+        super().__init__(
+            model=model,
+            args=args,
+            data_collator=data_collator,
+            train_dataset=train_dataset,
+            eval_dataset=eval_dataset,
+            processing_class=processing_class,
+            callbacks=callbacks,
+            optimizers=optimizers,
+        )
+        # Check if the per_device_train/eval_batch_size * num processes can be divided by the number of generations
+        num_processes = self.accelerator.num_processes
+        global_batch_size = args.per_device_train_batch_size * num_processes
+        possible_values = [n_gen for n_gen in range(2, global_batch_size + 1) if (global_batch_size) % n_gen == 0]
+        if self.num_generations not in possible_values:
+            raise ValueError(
+                f"The global train batch size ({num_processes} x {args.per_device_train_batch_size}) must be evenly "
+                f"divisible by the number of generations per prompt ({self.num_generations}). Given the current train "
+                f"batch size, the valid values for the number of generations are: {possible_values}."
+            )
+        if self.args.eval_strategy != "no":
+            global_batch_size = args.per_device_eval_batch_size * num_processes
+            possible_values = [n_gen for n_gen in range(2, global_batch_size + 1) if (global_batch_size) % n_gen == 0]
+            if self.num_generations not in possible_values:
+                raise ValueError(
+                    f"The global eval batch size ({num_processes} x {args.per_device_eval_batch_size}) must be evenly "
+                    f"divisible by the number of generations per prompt ({self.num_generations}). Given the current "
+                    f"eval batch size, the valid values for the number of generations are: {possible_values}."
+                )
+        # Ensure each process receives a unique seed to prevent duplicate completions when generating with
+        # transformers if num_generations exceeds per_device_train_batch_size. We could skip it if we use vLLM, but
+        # it's safer to set it in all cases.
+        set_seed(args.seed, device_specific=True)
+        if self.use_vllm:
+            if not is_vllm_available():
+                raise ImportError(
+                    "vLLM is not available and `use_vllm` is set to True. Please install vLLM with "
+                    "`pip install vllm` to use it."
+                )
+            if self.accelerator.is_main_process:
+                vllm_device = self.args.vllm_device
+                if vllm_device == "auto":
+                    if torch.cuda.device_count() == 1:
+                        vllm_device = "cuda:0"  # particular case when training with onyl 1 GPU: share it
+                    else:
+                        vllm_device = f"cuda:{self.accelerator.num_processes}"  # take the next GPU idx
+                # Check that the requested device is available
+                if vllm_device.split(":")[0] == "cuda" and int(vllm_device.split(":")[1]) >= torch.cuda.device_count():
+                    raise ValueError(
+                        f"The requested device for vllm ({vllm_device}) is not available. You are likely using vLLM "
+                        "without restricting the number of GPUs for training. Set the `--num_processes` argument to a "
+                        "value lower than the number of GPUs available on your machine—typically, reducing it by one "
+                        f"is sufficient. In your case: `--num_processes {torch.cuda.device_count() - 1}`."
+                    )
+                # Check that the requested device is not also used for training
+                if vllm_device in {f"cuda:{idx}" for idx in range(self.accelerator.num_processes)}:
+                    warnings.warn(
+                        f"The requested device {vllm_device} is also being used for training. For higher throughput "
+                        "and to avoid out-of-memory errors, it is recommended to use a dedicated device for vLLM. "
+                        "If this is intentional, you may ignore this warning but should adjust "
+                        "`vllm_gpu_memory_utilization` accordingly."
+                    )
+                # vLLM is not compatible with accelerate. So we need to patch it to make sure we can (1) place the vLLM
+                # model on the desired device (world_size_patch) and (2) avoid a test that is not designed for our
+                # setting (profiling_patch).
+                world_size_patch = patch("torch.distributed.get_world_size", return_value=1)
+                profiling_patch = patch(
+                    "vllm.worker.worker.Worker._assert_memory_footprint_increased_during_profiling", return_value=None
+                )
+                with world_size_patch, profiling_patch:
+                    self.llm = LLM(
+                        model=model.name_or_path,
+                        device=vllm_device,
+                        gpu_memory_utilization=self.args.vllm_gpu_memory_utilization,
+                        dtype=self.args.vllm_dtype,
+                        # Automatic Prefix Caching caches the KV cache of existing queries, so that a new query can
+                        # directly reuse the KV cache if it shares the same prefix with one of the existing queries.
+                        # This is particularly useful here because we generate completions from the same prompts.
+                        enable_prefix_caching=True,
+                        max_model_len=self.args.vllm_max_model_len,
+                    )
+                self.sampling_params = SamplingParams(
+                    temperature=args.temperature,
+                    max_tokens=self.max_completion_length,
+                )
+            self._last_loaded_step = 0  # tag to avoid useless loading during grad accumulation
+            # When using vLLM, the main process is responsible for loading the model weights. This can cause process
+            # desynchronization and seems to lead to DeepSpeed hanging during initialization. To prevent this, we
+            # synchronize all processes after vLLM has been fully initialized.
+            self.accelerator.wait_for_everyone()
+        else:
+            self.generation_config = GenerationConfig(
+                max_new_tokens=self.max_completion_length,
+                do_sample=True,
+                temperature=args.temperature,
+                pad_token_id=processing_class.pad_token_id,
+            )
+        # Gradient accumulation requires scaled loss. Normally, loss scaling in the parent class depends on whether the
+        # model accepts loss-related kwargs. Since we compute our own loss, this check is irrelevant. We set
+        # self.model_accepts_loss_kwargs to False to enable scaling.
+        self.model_accepts_loss_kwargs = False
+        # Add tags to the model
+        self.model.add_model_tags(self._tag_names)
+        if self.ref_model is not None:
+            if self.is_deepspeed_enabled:
+                self.ref_model = prepare_deepspeed(self.ref_model, self.accelerator)
+            else:
+                self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True)
+        if args.sync_ref_model:
+            self.add_callback(SyncRefModelCallback(ref_model=self.ref_model, accelerator=self.accelerator))
+        for i, reward_func in enumerate(self.reward_funcs):
+            if isinstance(reward_func, PreTrainedModel):
+                self.reward_funcs[i] = self.accelerator.prepare_model(reward_func, evaluation_mode=True)
+    def _set_signature_columns_if_needed(self):
+        # If `self.args.remove_unused_columns` is True, non-signature columns are removed.
+        # By default, this method sets `self._signature_columns` to the model's expected inputs.
+        # In GRPOTrainer, we preprocess data, so using the model's signature columns doesn't work.
+        # Instead, we set them to the columns expected by the `training_step` method, hence the override.
+        if self._signature_columns is None:
+            self._signature_columns = ["prompt"]
+    def _get_train_sampler(self) -> Sampler:
+        # Returns a sampler that ensures each prompt is repeated across multiple processes. This guarantees that
+        # identical prompts are distributed to different GPUs, allowing rewards to be computed and normalized correctly
+        # within each prompt group. Using the same seed across processes ensures consistent prompt assignment,
+        # preventing discrepancies in group formation.
+        return RepeatRandomSampler(self.train_dataset, self.num_generations, seed=self.args.seed)
+    def _get_eval_sampler(self, eval_dataset) -> Sampler:
+        # Returns a sampler that ensures each prompt is repeated across multiple processes. This guarantees that
+        # identical prompts are distributed to different GPUs, allowing rewards to be computed and normalized correctly
+        # within each prompt group. Using the same seed across processes ensures consistent prompt assignment,
+        # preventing discrepancies in group formation.
+        return RepeatRandomSampler(eval_dataset, self.num_generations, seed=self.args.seed)
+    # Get the per-token log probabilities for the completions for the model and the reference model
+    def _get_per_token_logps(self, model, input_ids, attention_mask, logits_to_keep):
+        # We add 1 to `logits_to_keep` because the last logits of the sequence is later excluded
+        logits = model(input_ids=input_ids, attention_mask=attention_mask, logits_to_keep=logits_to_keep + 1).logits
+        logits = logits[:, :-1, :]  # (B, L-1, V), exclude the last logit: it corresponds to the next token pred
+        input_ids = input_ids[:, -logits_to_keep:]
+        # For transformers<=4.48, logits_to_keep argument isn't supported, so here we drop logits ourselves.
+        # See https://github.com/huggingface/trl/issues/2770
+        logits = logits[:, -logits_to_keep:]
+        return selective_log_softmax(logits, input_ids)  #  compute logprobs for the input tokens
+    def _move_model_to_vllm(self):
+        with unwrap_model_for_generation(
+            self.model, self.accelerator, gather_deepspeed3_params=self.args.ds3_gather_for_generation
+        ) as unwrapped_model:
+            if is_compiled_module(unwrapped_model):
+                unwrapped_model = unwrapped_model._orig_mod
+            if is_peft_model(unwrapped_model):
+                unwrapped_model.merge_adapter()
+                state_dict = unwrapped_model.state_dict()
+                # Remove base_model and base_layer prefixes
+                state_dict = {
+                    k.removeprefix("base_model.model.").replace(".base_layer", ""): v for k, v in state_dict.items()
+                }
+                # Remove values with adapter prefix (example: "_lora")
+                state_dict = {k: v for k, v in state_dict.items() if unwrapped_model.prefix not in k}
+                # When module to save, remove its prefix and discard the original module
+                state_dict = {
+                    k.replace("modules_to_save.default.", ""): v
+                    for k, v in state_dict.items()
+                    if "original_module" not in k
+                }
+            else:
+                state_dict = unwrapped_model.state_dict()
+            if self.accelerator.is_main_process:
+                llm_model = self.llm.llm_engine.model_executor.driver_worker.model_runner.model
+                llm_model.load_weights(state_dict.items())
+            # Unmerge the adapter to restore the model to its original state.
+            # This must be done after loading weights to ensure they correspond to the merged state.
+            if is_peft_model(unwrapped_model):
+                unwrapped_model.unmerge_adapter()
+    def _prepare_inputs(self, inputs: dict[str, Union[torch.Tensor, Any]]) -> dict[str, Union[torch.Tensor, Any]]:
+        device = self.accelerator.device
+        prompts = [x["prompt"] for x in inputs]
+        prompts_text = [maybe_apply_chat_template(example, self.processing_class)["prompt"] for example in inputs]
+        prompt_inputs = self.processing_class(
+            prompts_text, return_tensors="pt", padding=True, padding_side="left", add_special_tokens=False
+        )
+        prompt_inputs = super()._prepare_inputs(prompt_inputs)
+        prompt_ids, prompt_mask = prompt_inputs["input_ids"], prompt_inputs["attention_mask"]
+        if self.max_prompt_length is not None:
+            prompt_ids = prompt_ids[:, -self.max_prompt_length :]
+            prompt_mask = prompt_mask[:, -self.max_prompt_length :]
+        # Generate completions using either vLLM or regular generation
+        if self.args.use_vllm:
+            # First, have main process load weights if needed
+            if self.state.global_step != self._last_loaded_step:
+                self._move_model_to_vllm()
+                self._last_loaded_step = self.state.global_step
+            # Generate completions using vLLM: gather all prompts and use them in a single call in the main process
+            all_prompts_text = gather_object(prompts_text)
+            if self.accelerator.is_main_process:
+                outputs = self.llm.generate(all_prompts_text, sampling_params=self.sampling_params, use_tqdm=False)
+                completion_ids = [out.token_ids for completions in outputs for out in completions.outputs]
+            else:
+                completion_ids = [None] * len(all_prompts_text)
+            # Broadcast the completions from the main process to all processes, ensuring each process receives its
+            # corresponding slice.
+            completion_ids = broadcast_object_list(completion_ids, from_process=0)
+            process_slice = slice(
+                self.accelerator.process_index * len(prompts),
+                (self.accelerator.process_index + 1) * len(prompts),
+            )
+            completion_ids = completion_ids[process_slice]
+            # Pad the completions, and concatenate them with the prompts
+            completion_ids = [torch.tensor(ids, device=device) for ids in completion_ids]
+            completion_ids = pad(completion_ids, padding_value=self.processing_class.pad_token_id)
+            prompt_completion_ids = torch.cat([prompt_ids, completion_ids], dim=1)
+        else:
+            print("about to generate!!")
+            # Regular generation path
+            with unwrap_model_for_generation(self.model, self.accelerator) as unwrapped_model:
+                prompt_completion_ids = unwrapped_model.generate(
+                    prompt_ids, attention_mask=prompt_mask, generation_config=self.generation_config
+                )
+            print('prompts_ids', prompt_ids, 'attention_mask', prompt_mask)
+            print('prompt_completion_ids', prompt_completion_ids)
+            print('prompt len', prompt_ids.size(1))
+            # Compute prompt length and extract completion ids
+            prompt_length = prompt_ids.size(1)
+            prompt_ids = prompt_completion_ids[:, :prompt_length]
+            completion_ids = prompt_completion_ids[:, prompt_length:]
+        # Mask everything after the first EOS token
+        is_eos = completion_ids == self.processing_class.eos_token_id
+        eos_idx = torch.full((is_eos.size(0),), is_eos.size(1), dtype=torch.long, device=device)
+        eos_idx[is_eos.any(dim=1)] = is_eos.int().argmax(dim=1)[is_eos.any(dim=1)]
+        sequence_indices = torch.arange(is_eos.size(1), device=device).expand(is_eos.size(0), -1)
+        completion_mask = (sequence_indices <= eos_idx.unsqueeze(1)).int()
+        # Concatenate prompt_mask with completion_mask for logit computation
+        attention_mask = torch.cat([prompt_mask, completion_mask], dim=1)  # (B*G, P+C)
+        logits_to_keep = completion_ids.size(1)  # we only need to compute the logits for the completion tokens
+        with torch.inference_mode():
+            if self.ref_model is not None:
+                ref_per_token_logps = self._get_per_token_logps(
+                    self.ref_model, prompt_completion_ids, attention_mask, logits_to_keep
+                )
+            else:
+                with self.accelerator.unwrap_model(self.model).disable_adapter():
+                    ref_per_token_logps = self._get_per_token_logps(
+                        self.model, prompt_completion_ids, attention_mask, logits_to_keep
+                    )
+        # Decode the generated completions
+        completions_text = self.processing_class.batch_decode(completion_ids, skip_special_tokens=True)
+        if is_conversational(inputs[0]):
+            completions = []
+            for prompt, completion in zip(prompts, completions_text):
+                bootstrap = prompt.pop()["content"] if prompt[-1]["role"] == "assistant" else ""
+                completions.append([{"role": "assistant", "content": bootstrap + completion}])
+        else:
+            completions = completions_text
+        rewards_per_func = torch.zeros(len(prompts), len(self.reward_funcs), device=device)
+        for i, (reward_func, reward_processing_class) in enumerate(
+            zip(self.reward_funcs, self.reward_processing_classes)
+        ):
+            if isinstance(reward_func, nn.Module):  # Module instead of PretrainedModel for compat with compiled models
+                if is_conversational(inputs[0]):
+                    messages = [{"messages": p + c} for p, c in zip(prompts, completions)]
+                    texts = [apply_chat_template(x, reward_processing_class)["text"] for x in messages]
+                else:
+                    texts = [p + c for p, c in zip(prompts, completions)]
+                reward_inputs = reward_processing_class(
+                    texts, return_tensors="pt", padding=True, padding_side="right", add_special_tokens=False
+                )
+                reward_inputs = super()._prepare_inputs(reward_inputs)
+                with torch.inference_mode():
+                    rewards_per_func[:, i] = reward_func(**reward_inputs).logits[:, 0]  # Shape (B*G,)
+            else:
+                # Repeat all input columns (but "prompt" and "completion") to match the number of generations
+                keys = [key for key in inputs[0] if key not in ["prompt", "completion"]]
+                reward_kwargs = {key: [example[key] for example in inputs] for key in keys}
+                output_reward_func = reward_func(prompts=prompts, completions=completions, **reward_kwargs)
+                rewards_per_func[:, i] = torch.tensor(output_reward_func, dtype=torch.float32, device=device)
+        # Gather the reward per function: this part is crucial, because the rewards are normalized per group and the
+        # completions may be distributed across processes
+        rewards_per_func = gather(rewards_per_func)
+        # Apply weights to each reward function's output and sum
+        rewards = (rewards_per_func * self.reward_weights.to(device).unsqueeze(0)).sum(dim=1)
+        # Compute grouped-wise rewards
+        mean_grouped_rewards = rewards.view(-1, self.num_generations).mean(dim=1)
+        std_grouped_rewards = rewards.view(-1, self.num_generations).std(dim=1)
+        # Normalize the rewards to compute the advantages
+        mean_grouped_rewards = mean_grouped_rewards.repeat_interleave(self.num_generations, dim=0)
+        std_grouped_rewards = std_grouped_rewards.repeat_interleave(self.num_generations, dim=0)
+        advantages = (rewards - mean_grouped_rewards) / (std_grouped_rewards + 1e-4)
+        # Slice to keep only the local part of the data
+        process_slice = slice(
+            self.accelerator.process_index * len(prompts),
+            (self.accelerator.process_index + 1) * len(prompts),
+        )
+        advantages = advantages[process_slice]
+        # Log the metrics
+        reward_per_func = rewards_per_func.mean(0)
+        for i, reward_func in enumerate(self.reward_funcs):
+            if isinstance(reward_func, nn.Module):  # Module instead of PretrainedModel for compat with compiled models
+                reward_func_name = reward_func.config._name_or_path.split("/")[-1]
+            else:
+                reward_func_name = reward_func.__name__
+            self._metrics[f"rewards/{reward_func_name}"].append(reward_per_func[i].item())
+        self._metrics["reward"].append(rewards.mean().item())
+        self._metrics["reward_std"].append(std_grouped_rewards.mean().item())
+        if (
+            self.log_completions
+            and self.state.global_step % self.args.logging_steps == 0
+            and "wandb" in self.args.report_to
+        ):
+            import pandas as pd
+            # For logging
+            table = {
+                "step": [str(self.state.global_step)] * len(rewards),
+                "prompt": gather_object(prompts_text),
+                "completion": gather_object(completions_text),
+                "reward": rewards.tolist(),
+            }
+            df = pd.DataFrame(table)
+            if wandb.run is not None and self.accelerator.is_main_process:
+                wandb.log({"completions": wandb.Table(dataframe=df)})
+        return {
+            "prompt_ids": prompt_ids,
+            "prompt_mask": prompt_mask,
+            "completion_ids": completion_ids,
+            "completion_mask": completion_mask,
+            "ref_per_token_logps": ref_per_token_logps,
+            "advantages": advantages,
+        }
+    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
+        if return_outputs:
+            raise ValueError("The GRPOTrainer does not support returning outputs")
+        # Compute the per-token log probabilities for the model
+        prompt_ids, prompt_mask = inputs["prompt_ids"], inputs["prompt_mask"]
+        completion_ids, completion_mask = inputs["completion_ids"], inputs["completion_mask"]
+        input_ids = torch.cat([prompt_ids, completion_ids], dim=1)
+        attention_mask = torch.cat([prompt_mask, completion_mask], dim=1)
+        logits_to_keep = completion_ids.size(1)  # we only need to compute the logits for the completion tokens
+        per_token_logps = self._get_per_token_logps(model, input_ids, attention_mask, logits_to_keep)
+        # Compute the KL divergence between the model and the reference model
+        ref_per_token_logps = inputs["ref_per_token_logps"]
+        per_token_kl = torch.exp(ref_per_token_logps - per_token_logps) - (ref_per_token_logps - per_token_logps) - 1
+        # x - x.detach() allows for preserving gradients from x
+        advantages = inputs["advantages"]
+        per_token_loss = torch.exp(per_token_logps - per_token_logps.detach()) * advantages.unsqueeze(1)
+        per_token_loss = -(per_token_loss - self.beta * per_token_kl)
+        loss = ((per_token_loss * completion_mask).sum(dim=1) / completion_mask.sum(dim=1)).mean()
+        # Log the metrics
+        completion_length = self.accelerator.gather_for_metrics(completion_mask.sum(1)).float().mean().item()
+        self._metrics["completion_length"].append(completion_length)
+        mean_kl = ((per_token_kl * completion_mask).sum(dim=1) / completion_mask.sum(dim=1)).mean()
+        self._metrics["kl"].append(self.accelerator.gather_for_metrics(mean_kl).mean().item())
+        return loss
+    def prediction_step(self, model, inputs, prediction_loss_only, ignore_keys: Optional[list[str]] = None):
+        inputs = self._prepare_inputs(inputs)
+        print("about to loss")
+        with torch.no_grad():
+            with self.compute_loss_context_manager():
+                loss = self.compute_loss(model, inputs)
+            loss = loss.mean().detach()
+        print("loss computed")
+        return loss, None, None
+    def log(self, logs: dict[str, float], start_time: Optional[float] = None) -> None:
+        metrics = {key: sum(val) / len(val) for key, val in self._metrics.items()}  # average the metrics
+        # This method can be called both in training and evaluation. When called in evaluation, the keys in `logs`
+        # start with "eval_". We need to add the prefix "eval_" to the keys in `metrics` to match the format.
+        if next(iter(logs.keys())).startswith("eval_"):
+            metrics = {f"eval_{key}": val for key, val in metrics.items()}
+        logs = {**logs, **metrics}
+        if version.parse(transformers.__version__) >= version.parse("4.47.0.dev0"):
+            super().log(logs, start_time)
+        else:  # transformers<=4.46
+            super().log(logs)
+        self._metrics.clear()
+    def create_model_card(
+        self,
+        model_name: Optional[str] = None,
+        dataset_name: Optional[str] = None,
+        tags: Union[str, list[str], None] = None,
+    ):
+        """
+        Creates a draft of a model card using the information available to the `Trainer`.
+        Args:
+            model_name (`str` or `None`, *optional*, defaults to `None`):
+                Name of the model.
+            dataset_name (`str` or `None`, *optional*, defaults to `None`):
+                Name of the dataset used for training.
+            tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
+                Tags to be associated with the model card.
+        """
+        if not self.is_world_process_zero():
+            return
+        if hasattr(self.model.config, "_name_or_path") and not os.path.isdir(self.model.config._name_or_path):
+            base_model = self.model.config._name_or_path
+        else:
+            base_model = None
+        tags = tags or []
+        if isinstance(tags, str):
+            tags = [tags]
+        if hasattr(self.model.config, "unsloth_version"):
+            tags.append("unsloth")
+        citation = textwrap.dedent(
+            """\
+            @article{zhihong2024deepseekmath,
+                title        = {{DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models}},
+                author       = {Zhihong Shao and Peiyi Wang and Qihao Zhu and Runxin Xu and Junxiao Song and Mingchuan Zhang and Y. K. Li and Y. Wu and Daya Guo},
+                year         = 2024,
+                eprint       = {arXiv:2402.03300},
+            }
+            """
+        )
+        model_card = generate_model_card(
+            base_model=base_model,
+            model_name=model_name,
+            hub_model_id=self.hub_model_id,
+            dataset_name=dataset_name,
+            tags=tags,
+            wandb_url=wandb.run.get_url() if is_wandb_available() and wandb.run is not None else None,
+            comet_url=get_comet_experiment_url(),
+            trainer_name="GRPO",
+            trainer_citation=citation,
+            paper_title="DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models",
+            paper_id="2402.03300",
+        )
+        model_card.save(os.path.join(self.args.output_dir, "README.md"))

BioReason-main/bioreason/trainer/grpo_config.py ADDED Viewed

	@@ -0,0 +1,365 @@

+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from dataclasses import dataclass, field
+from typing import Optional, Union
+from transformers import TrainingArguments
+@dataclass
+class DNALLMGRPOConfig(TrainingArguments):
+    r"""
+    Configuration class for the [`GRPOTrainer`].
+    Only the parameters specific to GRPO training are listed here. For details on other parameters, refer to the
+    [`~transformers.TrainingArguments`] documentation.
+    Using [`~transformers.HfArgumentParser`] we can turn this class into
+    [argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the
+    command line.
+    Parameters:
+        > Parameters that control the model and reference model
+        model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
+            Keyword arguments for [`~transformers.AutoModelForCausalLM.from_pretrained`], used when the `model`
+            argument of the [`GRPOTrainer`] is provided as a string.
+        > Parameters that control the data preprocessing
+        remove_unused_columns (`bool`, *optional*, defaults to `False`):
+            Whether to only keep the column `"prompt"` in the dataset. If you use a custom reward function that
+            requires any column other than `"prompts"` and `"completions"`, you should keep this to `False`.
+        max_prompt_length (`int` or `None`, *optional*, defaults to `512`):
+            Maximum length of the prompt. If the prompt is longer than this value, it will be truncated left.
+        num_generations (`int` or `None`, *optional*, defaults to `8`):
+            Number of generations per prompt to sample. The global batch size (num_processes * per_device_batch_size)
+            must be divisible by this value.
+        max_completion_length (`int` or `None`, *optional*, defaults to `256`):
+            Maximum length of the generated completion.
+        ds3_gather_for_generation (`bool`, *optional*, defaults to `True`):
+            This setting applies to DeepSpeed ZeRO-3. If enabled, the policy model weights are gathered for generation,
+            improving generation speed. However, disabling this option allows training models that exceed the VRAM
+            capacity of a single GPU, albeit at the cost of slower generation. Disabling this option is not compatible
+            with vLLM generation.
+        > Parameters that control generation
+        temperature (`float`, defaults to `0.9`):
+            Temperature for sampling. The higher the temperature, the more random the completions.
+        top_p (`float`, *optional*, defaults to `1.0`):
+            Float that controls the cumulative probability of the top tokens to consider. Must be in (0, 1]. Set to
+            `1.0` to consider all tokens.
+        top_k (`int` or `None`, *optional*, defaults to `50`):
+            Number of highest probability vocabulary tokens to keep for top-k-filtering. If `None`, top-k-filtering is
+            disabled.
+        min_p (`float` or `None`, *optional*, defaults to `None`):
+            Minimum token probability, which will be scaled by the probability of the most likely token. It must be a
+            value between `0.0` and `1.0`. Typical values are in the `0.01-0.2` range.
+        repetition_penalty (`float`, *optional*, defaults to `1.0`):
+            Float that penalizes new tokens based on whether they appear in the prompt and the generated text so far.
+            Values > `1.0` encourage the model to use new tokens, while values < `1.0` encourage the model to repeat
+            tokens.
+        cache_implementation (`str` or `None`, *optional*, defaults to `None`):
+            Implementation of the cache method for faster generation when use_vllm is set to False.
+        > Parameters that control generation acceleration powered by vLLM
+        use_vllm (`bool`, *optional*, defaults to `False`):
+            Whether to use vLLM for generating completions. If set to `True`, ensure that a GPU is kept unused for
+            training, as vLLM will require one for generation. vLLM must be installed (`pip install vllm`).
+        vllm_device (`str`, *optional*, defaults to `"auto"`):
+            Device where vLLM generation will run, e.g. `"cuda:1"`. If set to `"auto"` (default), the system will
+            automatically select the next available GPU after the last one used for training. This assumes that
+            training has not already occupied all available GPUs. If only one device is available, the device will be
+            shared between both training and vLLM.
+        vllm_gpu_memory_utilization (`float`, *optional*, defaults to `0.9`):
+            Ratio (between 0 and 1) of GPU memory to reserve for the model weights, activations, and KV cache on the
+            device dedicated to generation powered by vLLM. Higher values will increase the KV cache size and thus
+            improve the model's throughput. However, if the value is too high, it may cause out-of-memory (OOM) errors
+            during initialization.
+        vllm_dtype (`str`, *optional*, defaults to `"auto"`):
+            Data type to use for vLLM generation. If set to `"auto"`, the data type will be automatically determined
+            based on the model configuration. Find the supported values in the vLLM documentation.
+        vllm_max_model_len (`int` or `None`, *optional*, defaults to `None`):
+            If set, the `max_model_len` to use for vLLM. This could be useful when running with reduced
+            `vllm_gpu_memory_utilization`, leading to a reduced KV cache size. If not set, vLLM will use the model
+            context size, which might be much larger than the KV cache, leading to inefficiencies.
+        vllm_enable_prefix_caching (`bool`, *optional*, defaults to `True`):
+            Whether to enable prefix caching in vLLM. If set to `True` (default), ensure that the model and the hardware
+            support this feature.
+        vllm_guided_decoding_regex (`str` or `None`, *optional*, defaults to `None`):
+            Regex for vLLM guided decoding. If `None` (default), guided decoding is disabled.
+        > Parameters that control the training
+        learning_rate (`float`, *optional*, defaults to `1e-6`):
+            Initial learning rate for [`AdamW`] optimizer. The default value replaces that of
+            [`~transformers.TrainingArguments`].
+        beta (`float`, *optional*, defaults to `0.04`):
+            KL coefficient. If `0.0`, the reference model is not loaded, reducing memory usage and improving training
+            speed, but may be numerically unstable for long training runs.
+        num_iterations (`int`, *optional*, defaults to `1`):
+            Number of iterations per batch (denoted as μ in the algorithm).
+        epsilon (`float`, *optional*, defaults to `0.2`):
+            Epsilon value for clipping.
+        epsilon_high (`float` or `None`, *optional*, defaults to `None`):
+            Upper-bound epsilon value for clipping. If not specified, it defaults to the same value as the lower-bound
+            specified in argument `epsilon`. Paper [DAPO](https://huggingface.co/papers/2503.14476) recommends `0.28`.
+        reward_weights (`list[float]` or `None`, *optional*, defaults to `None`):
+            Weights for each reward function. Must match the number of reward functions. If `None`, all rewards are
+            weighted equally with weight `1.0`.
+        sync_ref_model (`bool`, *optional*, defaults to `False`):
+            Whether to synchronize the reference model with the active model every `ref_model_sync_steps` steps, using
+            the `ref_model_mixup_alpha` parameter. This synchronization originites from the
+            [TR-DPO](https://huggingface.co/papers/2404.09656) paper.
+        ref_model_mixup_alpha (`float`, *optional*, defaults to `0.6`):
+            α parameter from the [TR-DPO](https://huggingface.co/papers/2404.09656) paper, which controls the mix
+            between the current policy and the previous reference policy during updates. The reference policy is
+            updated according to the equation: `π_ref = α * π_θ + (1 - α) * π_ref_prev`. To use this parameter, you
+            must set `sync_ref_model=True`.
+        ref_model_sync_steps (`int`, *optional*, defaults to `512`):
+            τ parameter from the [TR-DPO](https://huggingface.co/papers/2404.09656) paper, which determines how
+            frequently the current policy is synchronized with the reference policy. To use this parameter, you must
+            set `sync_ref_model=True`.
+        > Parameters that control the logging
+        log_completions (`bool`, *optional*, defaults to `False`):
+            Whether to log a sample of (prompt, completion) pairs every `logging_steps` steps. If `rich` is
+            installed, it prints the sample. If `wandb` logging is enabled, it logs it to `wandb`.
+    """
+    # Parameters that control the model and reference model
+    model_init_kwargs: Optional[dict] = field(
+        default=None,
+        metadata={
+            "help": "Keyword arguments for `transformers.AutoModelForCausalLM.from_pretrained`, used when the `model` "
+            "argument of the `GRPOTrainer` is provided as a string."
+        },
+    )
+    # Parameters that control the data preprocessing
+    # The default value remove_unused_columns is overwritten from the parent class, because in GRPO we usually rely on
+    # additional columns to compute the reward
+    remove_unused_columns: Optional[bool] = field(
+        default=False,
+        metadata={
+            "help": "Whether to only keep the column 'prompt' in the dataset. If you use a custom reward function "
+            "that requires any column other than 'prompts' and 'completions', you should keep this to `False`."
+        },
+    )
+    max_prompt_length: Optional[int] = field(
+        default=512,
+        metadata={
+            "help": "Maximum length of the prompt. If the prompt is longer than this value, it will be truncated left."
+        },
+    )
+    num_generations: Optional[int] = field(
+        default=8,
+        metadata={
+            "help": "Number of generations to sample. The global batch size (num_processes * per_device_batch_size) "
+            "must be divisible by this value."
+        },
+    )
+    max_completion_length: Optional[int] = field(
+        default=800,
+        metadata={"help": "Maximum length of the generated completion."},
+    )
+    ds3_gather_for_generation: bool = field(
+        default=True,
+        metadata={
+            "help": "This setting applies to DeepSpeed ZeRO-3. If enabled, the policy model weights are gathered for "
+            "generation, improving generation speed. However, disabling this option allows training models that "
+            "exceed the VRAM capacity of a single GPU, albeit at the cost of slower generation. Disabling this option "
+            "is not compatible with vLLM generation."
+        },
+    )
+    # Parameters that control generation
+    temperature: float = field(
+        default=0.6,
+        metadata={"help": "Temperature for sampling. The higher the temperature, the more random the completions."},
+    )
+    top_p: float = field(
+        default=0.95,
+        metadata={
+            "help": "Float that controls the cumulative probability of the top tokens to consider. Must be in (0, 1]. "
+            "Set to 1.0 to consider all tokens."
+        },
+    )
+    top_k: Optional[int] = field(
+        default=20,
+        metadata={
+            "help": "Number of highest probability vocabulary tokens to keep for top-k-filtering. If `None`, "
+            "top-k-filtering is disabled."
+        },
+    )
+    min_p: Optional[float] = field(
+        default=None,
+        metadata={
+            "help": "Minimum token probability, which will be scaled by the probability of the most likely token. It "
+            "must be a value between 0.0 and 1.0. Typical values are in the 0.01-0.2 range."
+        },
+    )
+    repetition_penalty: float = field(
+        default=1.0,
+        metadata={
+            "help": "Float that penalizes new tokens based on whether they appear in the prompt and the generated "
+            "text so far. Values > 1.0 encourage the model to use new tokens, while values < 1.0 encourage the model "
+            "to repeat tokens."
+        },
+    )
+    cache_implementation: Optional[str] = field(
+        default=None,
+        metadata={"help": "Implementation of the cache method for faster generation when use_vllm is set to False."},
+    )
+    # Parameters that control generation acceleration powered by vLLM
+    use_vllm: Optional[bool] = field(
+        default=False,
+        metadata={
+            "help": "Whether to use vLLM for generating completions. If set to `True`, ensure that a GPU is kept "
+            "unused for training, as vLLM will require one for generation. vLLM must be installed "
+            "(`pip install vllm`)."
+        },
+    )
+    vllm_device: Optional[str] = field(
+        default="auto",
+        metadata={
+            "help": "Device where vLLM generation will run, e.g. 'cuda:1'. If set to 'auto' (default), the system "
+            "will automatically select the next available GPU after the last one used for training. This assumes "
+            "that training has not already occupied all available GPUs."
+        },
+    )
+    vllm_gpu_memory_utilization: float = field(
+        default=0.9,
+        metadata={
+            "help": "Ratio (between 0 and 1) of GPU memory to reserve for the model weights, activations, and KV "
+            "cache on the device dedicated to generation powered by vLLM. Higher values will increase the KV cache "
+            "size and thus improve the model's throughput. However, if the value is too high, it may cause "
+            "out-of-memory (OOM) errors during initialization."
+        },
+    )
+    vllm_dtype: Optional[str] = field(
+        default="auto",
+        metadata={
+            "help": "Data type to use for vLLM generation. If set to 'auto', the data type will be automatically "
+            "determined based on the model configuration. Find the supported values in the vLLM documentation."
+        },
+    )
+    vllm_max_model_len: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "If set, the `max_model_len` to use for vLLM. This could be useful when running with reduced "
+            "`vllm_gpu_memory_utilization`, leading to a reduced KV cache size. If not set, vLLM will use the model "
+            "context size, which might be much larger than the KV cache, leading to inefficiencies."
+        },
+    )
+    vllm_enable_prefix_caching: Optional[bool] = field(
+        default=True,
+        metadata={
+            "help": "Whether to enable prefix caching in vLLM. If set to `True` (default), ensure that the model and "
+            "the hardware support this feature."
+        },
+    )
+    vllm_guided_decoding_regex: Optional[str] = field(
+        default=None,
+        metadata={"help": "Regex for vLLM guided decoding. If `None` (default), guided decoding is disabled."},
+    )
+    # Parameters that control the training
+    learning_rate: float = field(
+        default=1e-6,
+        metadata={
+            "help": "Initial learning rate for `AdamW` optimizer. The default value replaces that of "
+            "`transformers.TrainingArguments`."
+        },
+    )
+    beta: float = field(
+        default=0.04,
+        metadata={
+            "help": "KL coefficient. If `0.0`, the reference model is not loaded, reducing memory usage and improving "
+            "training speed, but may be numerically unstable for long training runs."
+        },
+    )
+    num_iterations: int = field(
+        default=1,
+        metadata={"help": "Number of iterations per batch (denoted as μ in the algorithm)."},
+    )
+    epsilon: float = field(
+        default=0.2,
+        metadata={"help": "Epsilon value for clipping."},
+    )
+    epsilon_high: Optional[float] = field(
+        default=None,
+        metadata={
+            "help": "Upper-bound epsilon value for clipping. If not specified, it defaults to the same value as the "
+            "lower-bound specified in argument `epsilon`. Paper DAPO recommends `0.28`."
+        },
+    )
+    reward_weights: Optional[list[float]] = field(
+        default=None,
+        metadata={
+            "help": "Weights for each reward function. Must match the number of reward functions. If `None`, all "
+            "rewards are weighted equally with weight `1.0`."
+        },
+    )
+    sync_ref_model: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to synchronize the reference model with the active model every `ref_model_sync_steps` "
+            "steps, using the `ref_model_mixup_alpha` parameter."
+        },
+    )
+    ref_model_mixup_alpha: float = field(
+        default=0.6,
+        metadata={
+            "help": "α parameter from the TR-DPO paper, which controls the mix between the current policy and the "
+            "previous reference policy during updates. The reference policy is updated according to the equation: "
+            "`π_ref = α * π_θ + (1 - α) * π_ref_prev`. To use this parameter, you must set `sync_ref_model=True`."
+        },
+    )
+    ref_model_sync_steps: int = field(
+        default=512,
+        metadata={
+            "help": "τ parameter from the TR-DPO paper, which determines how frequently the current policy is "
+            "synchronized with the reference policy. To use this parameter, you must set `sync_ref_model=True`."
+        },
+    )
+    # Parameters that control the logging
+    log_completions: bool = field(
+        default=True,
+        metadata={
+            "help": "Whether to log a sample of (prompt, completion) pairs every `logging_steps` steps. If `rich` is "
+            "installed, it prints the sample. If `wandb` logging is enabled, it logs it to `wandb`."
+        },
+    )
+    report_to: Union[None, str, list[str]] = field(
+        default="wandb", metadata={"help": "The list of integrations to report the results and logs to."}
+    )
+    logging_first_step: bool = field(default=False, metadata={"help": "Log the first global_step"})
+    logging_steps: float = field(
+        default=2,
+        metadata={
+            "help": (
+                "Log every X updates steps. Should be an integer or a float in range `[0,1)`. "
+                "If smaller than 1, will be interpreted as ratio of total training steps."
+            )
+        },
+    )

BioReason-main/bioreason/trainer/grpo_trainer.py ADDED Viewed

	@@ -0,0 +1,905 @@

+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import time
+import textwrap
+import pandas as pd
+from collections import defaultdict
+from typing import Any, Callable, Optional, Union, Sized
+import torch
+import torch.utils.data
+import transformers
+from datasets import Dataset, IterableDataset
+from packaging import version
+from transformers import (
+    AriaForConditionalGeneration,
+    AriaProcessor,
+    AutoModelForCausalLM,
+    AutoModelForSequenceClassification,
+    AutoProcessor,
+    AutoTokenizer,
+    GenerationConfig,
+    PreTrainedModel,
+    PreTrainedTokenizerBase,
+    Qwen2VLForConditionalGeneration,
+    Qwen2_5_VLForConditionalGeneration,
+    Trainer,
+    TrainerCallback,
+    is_wandb_available,
+)
+from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled
+from transformers.utils import is_peft_available
+from trl.data_utils import apply_chat_template, is_conversational, maybe_apply_chat_template
+from trl.models import create_reference_model, prepare_deepspeed, unwrap_model_for_generation
+from trl.trainer.grpo_config import GRPOConfig
+from trl.trainer.utils import generate_model_card, get_comet_experiment_url
+# from trl import GRPOTrainer
+from accelerate.utils import is_peft_model, set_seed, gather_object
+import PIL.Image
+import copy
+from torch.utils.data import Sampler
+import warnings
+if is_peft_available():
+    from peft import PeftConfig, get_peft_model, prepare_model_for_kbit_training
+if is_wandb_available():
+    import wandb
+from bioreason.dna_modules.dna_module import DNABaseModule
+from bioreason.trainer import DNALLMGRPOConfig
+# What we call a reward function is a callable that takes a list of prompts and completions and returns a list of
+# rewards. When it's a string, it's a model ID, so it's loaded as a pretrained model.
+RewardFunc = Union[str, PreTrainedModel, Callable[[list, list], list[float]]]
+class RepeatRandomSampler(Sampler):
+    """
+    Sampler that repeats the indices of a dataset in a structured manner.
+    Args:
+        data_source (`Sized`):
+            Dataset to sample from.
+        mini_repeat_count (`int`):
+            Number of times to repeat each index per batch.
+        batch_size (`int`, *optional*, defaults to `1`):
+            Number of unique indices per batch.
+        repeat_count (`int`, *optional*, defaults to `1`):
+            Number of times to repeat the full sampling process.
+        seed (`int` or `None`, *optional*, defaults to `None`):
+            Random seed for reproducibility.
+    """
+    def __init__(
+        self,
+        data_source: Sized,
+        mini_repeat_count: int,
+        batch_size: int = 1,
+        repeat_count: int = 1,
+        seed: Optional[int] = None,
+    ):
+        self.data_source = data_source
+        self.mini_repeat_count = mini_repeat_count
+        self.batch_size = batch_size
+        self.repeat_count = repeat_count
+        self.num_samples = len(data_source)
+        self.seed = seed
+        self.generator = torch.Generator()
+        if seed is not None:
+            self.generator.manual_seed(seed)
+    def __iter__(self):
+        indexes = torch.randperm(self.num_samples, generator=self.generator).tolist()
+        indexes = [indexes[i : i + self.batch_size] for i in range(0, len(indexes), self.batch_size)]
+        indexes = [chunk for chunk in indexes if len(chunk) == self.batch_size]
+        for chunk in indexes:
+            for _ in range(self.repeat_count):
+                for index in chunk:
+                    for _ in range(self.mini_repeat_count):
+                        yield index
+    def __len__(self) -> int:
+        return self.num_samples * self.mini_repeat_count * self.repeat_count
+class DNALLMGRPOTrainer(Trainer):
+    """
+    Trainer for the Group Relative Policy Optimization (GRPO) method. This algorithm was initially proposed in the
+    paper [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
+    Example:
+    ```python
+    from datasets import load_dataset
+    from trl import GRPOTrainer
+    dataset = load_dataset("trl-lib/tldr", split="train")
+    trainer = GRPOTrainer(
+        model="Qwen/Qwen2-0.5B-Instruct",
+        reward_funcs="weqweasdas/RM-Gemma-2B",
+        train_dataset=dataset,
+    )
+    trainer.train()
+    ```
+    Args:
+        model (`Union[str, PreTrainedModel]`):
+            Model to be trained. Can be either:
+            - A string, being the *model id* of a pretrained model hosted inside a model repo on huggingface.co, or
+              a path to a *directory* containing model weights saved using
+              [`~transformers.PreTrainedModel.save_pretrained`], e.g., `'./my_model_directory/'`. The model is
+              loaded using [`~transformers.AutoModelForCausalLM.from_pretrained`] with the keywork arguments
+              in `args.model_init_kwargs`.
+            - A [`~transformers.PreTrainedModel`] object. Only causal language models are supported.
+        reward_funcs (`Union[RewardFunc, list[RewardFunc]]`):
+            Reward functions to be used for computing the rewards. To compute the rewards, we call all the reward
+            functions with the prompts and completions and sum the rewards. Can be either:
+            - A single reward function, such as:
+                - A string: The *model ID* of a pretrained model hosted inside a model repo on huggingface.co, or a
+                path to a *directory* containing model weights saved using
+                [`~transformers.PreTrainedModel.save_pretrained`], e.g., `'./my_model_directory/'`. The model is loaded
+                using [`~transformers.AutoModelForSequenceClassification.from_pretrained`] with `num_labels=1` and the
+                keyword arguments in `args.model_init_kwargs`.
+                - A [`~transformers.PreTrainedModel`] object: Only sequence classification models are supported.
+                - A custom reward function: The function is provided with the prompts and the generated completions,
+                  plus any additional columns in the dataset. It should return a list of rewards. For more details, see
+                  [Using a custom reward function](#using-a-custom-reward-function).
+            - A list of reward functions, where each item can independently be any of the above types. Mixing different
+            types within the list (e.g., a string model ID and a custom reward function) is allowed.
+        args ([`GRPOConfig`], *optional*, defaults to `None`):
+            Configuration for this trainer. If `None`, a default configuration is used.
+        train_dataset ([`~datasets.Dataset`] or [`~datasets.IterableDataset`]):
+            Dataset to use for training. It must include a column `"prompt"`. Any additional columns in the dataset is
+            ignored. The format of the samples can be either:
+            - [Standard](dataset_formats#standard): Each sample contains plain text.
+            - [Conversational](dataset_formats#conversational): Each sample contains structured messages (e.g., role
+              and content).
+        eval_dataset ([`~datasets.Dataset`], [`~datasets.IterableDataset`] or `dict[str, Union[Dataset, IterableDataset]]`):
+            Dataset to use for evaluation. It must meet the same requirements as `train_dataset`.
+        processing_class ([`~transformers.PreTrainedTokenizerBase`], *optional*, defaults to `None`):
+            Processing class used to process the data. The padding side must be set to "left". If `None`, the
+            processing class is loaded from the model's name with [`~transformers.AutoTokenizer.from_pretrained`].
+        reward_processing_classes (`Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]`, *optional*, defaults to `None`):
+            Processing classes corresponding to the reward functions specified in `reward_funcs`. Can be either:
+            - A single processing class: Used when `reward_funcs` contains only one reward function.
+            - A list of processing classes: Must match the order and length of the reward functions in `reward_funcs`.
+            If set to `None`, or if an element of the list corresponding to a [`~transformers.PreTrainedModel`] is
+            `None`, the tokenizer for the model is automatically loaded using [`~transformers.AutoTokenizer.from_pretrained`].
+            For elements in `reward_funcs` that are custom reward functions (not [`~transformers.PreTrainedModel`]),
+            the corresponding entries in `reward_processing_classes` are ignored.
+        callbacks (list of [`~transformers.TrainerCallback`], *optional*, defaults to `None`):
+            List of callbacks to customize the training loop. Will add those to the list of default callbacks
+            detailed in [here](https://huggingface.co/docs/transformers/main_classes/callback).
+            If you want to remove one of the default callbacks used, use the [`~transformers.Trainer.remove_callback`]
+            method.
+        optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`, *optional*, defaults to `(None, None)`):
+            A tuple containing the optimizer and the scheduler to use. Will default to an instance of [`AdamW`] on your
+            model and a scheduler given by [`get_linear_schedule_with_warmup`] controlled by `args`.
+        peft_config ([`~peft.PeftConfig`], *optional*, defaults to `None`):
+            PEFT configuration used to wrap the model. If `None`, the model is not wrapped.
+    """
+    def __init__(
+        self,
+        model: Union[str, PreTrainedModel],
+        reward_funcs: Union[RewardFunc, list[RewardFunc]],
+        args: DNALLMGRPOConfig = None,
+        dna_module: DNABaseModule = None,
+        train_dataset: Optional[Union[Dataset, IterableDataset]] = None,
+        eval_dataset: Optional[Union[Dataset, IterableDataset, dict[str, Union[Dataset, IterableDataset]]]] = None,
+        processing_class: Optional[PreTrainedTokenizerBase] = None,
+        reward_processing_classes: Optional[Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]] = None,
+        callbacks: Optional[list[TrainerCallback]] = None,
+        optimizers: tuple[Optional[torch.optim.Optimizer], Optional[torch.optim.lr_scheduler.LambdaLR]] = (None, None),
+        peft_config: Optional["PeftConfig"] = None,
+        freeze_dna_modules: Optional[bool] = False,
+        attn_implementation: str = "flash_attention_2",
+        torch_dtype: str = "bfloat16",
+        **kwargs,
+    ):
+        # Args
+        if args is None:
+            model_name = model if isinstance(model, str) else model.config._name_or_path
+            model_name = model_name.split("/")[-1]
+            args = GRPOConfig(f"{model_name}-GRPO")
+        self.dna_module = dna_module
+        # Models
+        # Trained model
+        model_init_kwargs = args.model_init_kwargs or {}
+        # FIXME
+        # Remember to modify it in the invernvl
+        model_init_kwargs["attn_implementation"] = attn_implementation
+        if model_init_kwargs.get("torch_dtype") is None:
+            model_init_kwargs["torch_dtype"] = torch_dtype
+        assert not isinstance(model, str), "model must NOT be a string in the current implementation"
+        torch_dtype = model_init_kwargs.get("torch_dtype")
+        if isinstance(torch_dtype, torch.dtype) or torch_dtype == "auto" or torch_dtype is None:
+            pass  # torch_dtype is already a torch.dtype or "auto" or None
+        elif isinstance(torch_dtype, str):  # it's a str, but not "auto"
+            torch_dtype = getattr(torch, torch_dtype)
+        else:
+            raise ValueError(
+                "Invalid `torch_dtype` passed to `GRPOConfig`. Expected either 'auto' or a string representing "
+                f"a `torch.dtype` (e.g., 'float32'), but got {torch_dtype}."
+            )
+        # Disable caching if gradient checkpointing is enabled (not supported)
+        model_init_kwargs["use_cache"] = (
+            False if args.gradient_checkpointing else model_init_kwargs.get("use_cache")
+        )
+        # LoRA
+        self.dna_modules_keywords = self.dna_module.get_dnallm_modules_keywords()
+        if peft_config is not None:
+            print("Applying LoRA...")
+            def find_all_linear_names(model, multimodal_keywords):
+                cls = torch.nn.Linear
+                lora_module_names = set()
+                for name, module in model.named_modules():
+                    print('name:', name, 'module:', module)
+                    # LoRA is not applied to the DNA modules
+                    if any(mm_keyword in name for mm_keyword in multimodal_keywords):
+                        continue
+                    if isinstance(module, cls):
+                        lora_module_names.add(name)
+                for m in lora_module_names:  # needed for 16-bit
+                    if "embed_tokens" in m:
+                        lora_module_names.remove(m)
+                return list(lora_module_names)
+            target_modules = find_all_linear_names(model, self.dna_modules_keywords)
+            peft_config.target_modules = target_modules
+            model = prepare_model_for_kbit_training(model)
+            model = get_peft_model(model, peft_config)
+        # Freeze DNA modules
+        if freeze_dna_modules:
+            print("Freezing DNA modules...")
+            for p in model.dna_model.parameters():
+                p.requires_grad = False
+        # Make projection layer trainable
+        for p in model.dna_projection.parameters():
+            p.required_grad = True
+        # Compute the number of trainable parameters and print the parameter that is trainable
+        trainable_params = [p for p in model.parameters() if p.requires_grad]
+        total_params = sum(p.numel() for p in trainable_params)
+        # for n, p in model.named_parameters():
+        #     if p.requires_grad:
+        #         print(n, p.shape)
+        print(f"Total trainable parameters: {total_params}")
+        # Enable gradient checkpointing if requested
+        if args.gradient_checkpointing:
+            model = self._enable_gradient_checkpointing(model, args)
+        # Reference model
+        self.beta = args.beta
+        if self.beta == 0.0:
+            # If beta is 0.0, the reference model is not needed
+            self.ref_model = None
+        elif is_deepspeed_zero3_enabled():
+            self.ref_model = model_cls.from_pretrained(model_id, **model_init_kwargs)
+        elif is_peft_model(model):
+            # If PEFT is used, the reference model is not needed since the adapter can be disabled
+            # to revert to the initial model.
+            self.ref_model = None
+        else:
+            # If PEFT configuration is not provided, create a reference model based on the initial model.
+            self.ref_model = create_reference_model(model)
+        # Processing class
+        if processing_class is None:
+            processing_cls = self.dna_module.get_processing_class()
+            #if isinstance(model.text_model)
+            processing_class = processing_cls(tokenizer=model.text_tokenizer, dna_tokenizer=model.dna_tokenizer)
+            # print(model.tokenizer.chat_template)
+            for component, processing_keyword in self.dna_module.get_custom_processing_keywords():
+                if processing_keyword in kwargs:
+                    # If we cannot find component in processing_class, return the processing_class itself
+                    processing_component = getattr(processing_class, component, processing_class)
+                    setattr(processing_component, processing_keyword, kwargs[processing_keyword])
+            if getattr(processing_class, "tokenizer",  None) is not None:
+                pad_token_id = processing_class.tokenizer.pad_token_id
+                processing_class.pad_token_id = pad_token_id
+                processing_class.eos_token_id = processing_class.tokenizer.eos_token_id
+            else:
+                assert isinstance(processing_class, PreTrainedTokenizerBase), "processing_class must be an instance of PreTrainedTokenizerBase if it has no tokenizer attribute"
+                pad_token_id = processing_class.pad_token_id
+        self.dna_module.post_model_init(model, processing_class)
+        self.dna_module.post_model_init(self.ref_model, processing_class)
+        # Reward functions
+        if not isinstance(reward_funcs, list):
+            reward_funcs = [reward_funcs]
+        for i, reward_func in enumerate(reward_funcs):
+            if isinstance(reward_func, str):
+                reward_funcs[i] = AutoModelForSequenceClassification.from_pretrained(
+                    reward_func, num_labels=1, **model_init_kwargs
+                )
+        self.reward_funcs = reward_funcs
+        # Reward processing class
+        if reward_processing_classes is None:
+            reward_processing_classes = [None] * len(reward_funcs)
+        elif not isinstance(reward_processing_classes, list):
+            reward_processing_classes = [reward_processing_classes]
+        else:
+            if len(reward_processing_classes) != len(reward_funcs):
+                raise ValueError("The number of reward processing classes must match the number of reward functions.")
+        for i, (reward_processing_class, reward_func) in enumerate(zip(reward_processing_classes, reward_funcs)):
+            if isinstance(reward_func, PreTrainedModel):
+                if reward_processing_class is None:
+                    reward_processing_class = AutoTokenizer.from_pretrained(reward_func.config._name_or_path)
+                if reward_processing_class.pad_token_id is None:
+                    reward_processing_class.pad_token = reward_processing_class.eos_token
+                # The reward model computes the reward for the latest non-padded token in the input sequence.
+                # So it's important to set the pad token ID to the padding token ID of the processing class.
+                reward_func.config.pad_token_id = reward_processing_class.pad_token_id
+                reward_processing_classes[i] = reward_processing_class
+        self.reward_processing_classes = reward_processing_classes
+        # Data collator
+        def data_collator(features):  # No data collation is needed in GRPO
+            return features
+        # Training arguments
+        self.max_prompt_length = args.max_prompt_length
+        self.max_prompt_length = None
+        if args.max_prompt_length is not None:
+            warnings.warn("Setting max_prompt_length is currently not supported, it has been set to None")
+        self.max_completion_length = args.max_completion_length  # = |o_i| in the GRPO paper
+        self.num_generations = args.num_generations  # = G in the GRPO paper
+        self.generation_config = GenerationConfig(
+            max_new_tokens=self.max_completion_length,
+            do_sample=True,
+            temperature=0.6,
+            top_p=0.95,
+            top_k=20,
+            pad_token_id=pad_token_id,
+        )
+        if hasattr(self.dna_module, "get_eos_token_id"): # For InternVL
+            self.generation_config.eos_token_id = self.dna_module.get_eos_token_id(processing_class)
+        self.beta = args.beta
+        self.epsilon_low = args.epsilon
+        self.epsilon_high = args.epsilon_high if args.epsilon_high is not None else args.epsilon
+        # Multi-step
+        self.num_iterations = args.num_iterations  # = 𝜇 in the GRPO paper
+        # Tracks the number of iterations (forward + backward passes), including those within a gradient accumulation cycle
+        self._step = 0
+        # Buffer the batch to reuse generated outputs across multiple updates
+        self._buffered_inputs = [None] * args.gradient_accumulation_steps
+        # The trainer estimates the number of FLOPs (floating-point operations) using the number of elements in the
+        # input tensor associated with the key "input_ids". However, in GRPO, the sampled data does not include the
+        # "input_ids" key. Instead, the available keys is "prompt". As a result, the trainer issues the warning:
+        # "Could not estimate the number of tokens of the input, floating-point operations will not be computed." To
+        # suppress this warning, we set the "estimate_tokens" key in the model's "warnings_issued" dictionary to True.
+        # This acts as a flag to indicate that the warning has already been issued.
+        model.warnings_issued["estimate_tokens"] = True
+        # Initialize the metrics
+        self._metrics = defaultdict(list)
+        self.log_completions = args.log_completions
+        super().__init__(
+            model=model,
+            args=args,
+            data_collator=data_collator,
+            train_dataset=train_dataset,
+            eval_dataset=eval_dataset,
+            processing_class=processing_class,
+            callbacks=callbacks,
+            optimizers=optimizers,
+        )
+        # Check if the per_device_train/eval_batch_size * num processes can be divided by the number of generations
+        num_processes = self.accelerator.num_processes
+        global_batch_size = args.per_device_train_batch_size * num_processes
+        possible_values = [n_gen for n_gen in range(2, global_batch_size + 1) if (global_batch_size) % n_gen == 0]
+        if self.num_generations not in possible_values:
+            raise ValueError(
+                f"The global train batch size ({num_processes} x {args.per_device_train_batch_size}) must be evenly "
+                f"divisible by the number of generations per prompt ({self.num_generations}). Given the current train "
+                f"batch size, the valid values for the number of generations are: {possible_values}."
+            )
+        if self.args.eval_strategy != "no":
+            global_batch_size = args.per_device_eval_batch_size * num_processes
+            possible_values = [n_gen for n_gen in range(2, global_batch_size + 1) if (global_batch_size) % n_gen == 0]
+            if self.num_generations not in possible_values:
+                raise ValueError(
+                    f"The global eval batch size ({num_processes} x {args.per_device_eval_batch_size}) must be evenly "
+                    f"divisible by the number of generations per prompt ({self.num_generations}). Given the current "
+                    f"eval batch size, the valid values for the number of generations are: {possible_values}."
+                )
+        # Ensure each process receives a unique seed to prevent duplicate completions when generating with
+        # transformers if num_generations exceeds per_device_train_batch_size. We could skip it if we use vLLM, but
+        # it's safer to set it in all cases.
+        set_seed(args.seed, device_specific=True)
+        # Gradient accumulation requires scaled loss. Normally, loss scaling in the parent class depends on whether the
+        # model accepts loss-related kwargs. Since we compute our own loss, this check is irrelevant. We set
+        # self.model_accepts_loss_kwargs to False to enable scaling.
+        self.model_accepts_loss_kwargs = False
+        if self.ref_model is not None:
+            # if self.is_deepspeed_enabled:
+            if is_deepspeed_zero3_enabled():
+                self.ref_model = prepare_deepspeed(self.ref_model, self.accelerator)
+            else:
+                self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True)
+        for i, reward_func in enumerate(self.reward_funcs):
+            if isinstance(reward_func, PreTrainedModel):
+                self.reward_funcs[i] = self.accelerator.prepare_model(reward_func, evaluation_mode=True)
+    def _enable_gradient_checkpointing(self, model: PreTrainedModel, args: GRPOConfig) -> PreTrainedModel:
+        """Enables gradient checkpointing for the model."""
+        # Ensure use_cache is disabled
+        model.config.use_cache = False
+        # Enable gradient checkpointing on the base model for PEFT
+        if is_peft_model(model):
+            model.base_model.gradient_checkpointing_enable()
+        # Enable gradient checkpointing for non-PEFT models
+        else:
+            if getattr(model, "language_model", None) is not None:
+                # For InternVL; these operations are copied from the original training script of InternVL
+                model.language_model.config.use_cache = False
+                model.dna_model.gradient_checkpointing = True
+                model.dna_model.encoder.gradient_checkpointing = True
+                model.language_model._set_gradient_checkpointing()
+                # This line is necessary, otherwise the `model.gradient_checkpointing_enable()` will be executed during the training process, leading to an error since InternVL does not support this operation.
+                args.gradient_checkpointing = False
+            else:
+                model.gradient_checkpointing_enable()
+        gradient_checkpointing_kwargs = args.gradient_checkpointing_kwargs or {}
+        use_reentrant = (
+            "use_reentrant" not in gradient_checkpointing_kwargs or gradient_checkpointing_kwargs["use_reentrant"]
+        )
+        if use_reentrant:
+            model.enable_input_require_grads()
+        return model
+    def _set_signature_columns_if_needed(self):
+        # If `self.args.remove_unused_columns` is True, non-signature columns are removed.
+        # By default, this method sets `self._signature_columns` to the model's expected inputs.
+        # In GRPOTrainer, we preprocess data, so using the model's signature columns doesn't work.
+        # Instead, we set them to the columns expected by the `training_step` method, hence the override.
+        if self._signature_columns is None:
+            self._signature_columns = ["prompt"]
+    # Get the per-token log probabilities for the completions for the model and the reference model
+    def _get_per_token_logps(self, model, input_ids, attention_mask, **custom_multimodal_inputs):
+        logits = model(input_ids=input_ids, attention_mask=attention_mask, **custom_multimodal_inputs).logits  # (B, L, V)
+        logits = logits[:, :-1, :]  # (B, L-1, V), exclude the last logit: it corresponds to the next token pred
+        input_ids = input_ids[:, 1:]  # (B, L-1), exclude the first input ID since we don't have logits for it
+        # Compute the log probabilities for the input tokens. Use a loop to reduce memory peak.
+        per_token_logps = []
+        for logits_row, input_ids_row in zip(logits, input_ids):
+            log_probs = logits_row.log_softmax(dim=-1)
+            token_log_prob = torch.gather(log_probs, dim=1, index=input_ids_row.unsqueeze(1)).squeeze(1)
+            per_token_logps.append(token_log_prob)
+        return torch.stack(per_token_logps)
+    def _prepare_inputs(self, inputs):
+        # Simple pass-through, just like original
+        return inputs
+    def _get_key_from_inputs(self, x, key):
+        ele = x.get(key, None)
+        assert ele is not None, f"The key {key} is not found in the input"
+        if isinstance(ele, list):
+            return [e for e in ele]
+        else:
+            return [ele]
+    def _generate_and_score_completions(self, inputs: dict[str, Union[torch.Tensor, Any]], model) -> dict[str, Union[torch.Tensor, Any]]:
+        device = self.accelerator.device
+        prompts = [x["prompt"] for x in inputs]
+        prompts_text = self.dna_module.prepare_prompt(self.processing_class, inputs)
+        # Handle both pre-loaded images and image paths
+        batch_dna_sequences = []
+        print("_generate_and_score_completions (GRPO):")
+        for x in inputs:
+            #print('---')
+            #print(x)
+            if 'dna_sequences' in x:
+                dnas = self._get_key_from_inputs(x, "dna_sequences")
+            for dna in dnas:
+                # clean if desired
+                pass
+            batch_dna_sequences.append(dnas)
+            # NOTE: typically appends dna, so dna_sequences is all the dna in one list
+            # odd. trying this instead
+        prompt_inputs = self.dna_module.prepare_model_inputs(
+            self.processing_class,
+            model,
+            prompts_text,
+            batch_dna_sequences,
+            return_tensors="pt",
+            padding=True,
+            padding_side="left",
+            add_special_tokens=False,
+        )
+        prompt_inputs = super()._prepare_inputs(prompt_inputs)
+        prompt_ids, prompt_mask = prompt_inputs["input_ids"], prompt_inputs["attention_mask"]
+        # max_prompt_length is not supported yet
+        # if self.max_prompt_length is not None:
+        #     prompt_ids = prompt_ids[:, -self.max_prompt_length :]
+        #     prompt_inputs["input_ids"] = prompt_ids
+        #     prompt_mask = prompt_mask[:, -self.max_prompt_length :]
+        #     prompt_inputs["attention_mask"] = prompt_mask
+        # Generate completions
+        start = time.time()
+        with unwrap_model_for_generation(model, self.accelerator) as unwrapped_model:
+            kwargs = {k: v for k, v in prompt_inputs.items() if k not in self.dna_module.get_non_generate_params()}
+            generate_returned_result = unwrapped_model.generate(
+                **kwargs,
+                generation_config=self.generation_config
+            )
+            end = time.time()
+            print(f"Generation time: {end - start:.9f} seconds")
+            prompt_length = prompt_ids.size(1)
+            if not self.dna_module.is_embeds_input():
+                prompt_completion_ids = generate_returned_result
+                prompt_ids = prompt_completion_ids[:, :prompt_length]
+                completion_ids = prompt_completion_ids[:, prompt_length:]
+            else:
+                # In this case, the input of the LLM backbone is the embedding of the combination of the image and text prompt
+                # So the returned result of the `generate` method only contains the completion ids
+                completion_ids = generate_returned_result
+                prompt_completion_ids = torch.cat([prompt_ids, completion_ids], dim=1)
+        # Mask everything after the first EOS token
+        # print('completion:', completion_ids)
+        # print('generate_returned_result', generate_returned_result, generate_returned_result.shape)
+        # print('prompt_inputs["input_ids"]', prompt_inputs["input_ids"], prompt_inputs["input_ids"].shape)
+        # print('prompt_ids', prompt_ids, prompt_ids.shape)
+        # print('prompt_length', prompt_length)
+        # print('prompt_completion_ids', prompt_completion_ids, prompt_completion_ids.shape)
+        is_eos = completion_ids == self.processing_class.eos_token_id
+        eos_idx = torch.full((is_eos.size(0),), is_eos.size(1), dtype=torch.long, device=device)
+        eos_idx[is_eos.any(dim=1)] = is_eos.int().argmax(dim=1)[is_eos.any(dim=1)]
+        sequence_indices = torch.arange(is_eos.size(1), device=device).expand(is_eos.size(0), -1)
+        completion_mask = (sequence_indices <= eos_idx.unsqueeze(1)).int()
+        # Concatenate prompt_mask with completion_mask for logit computation
+        attention_mask = torch.cat([prompt_mask, completion_mask], dim=1)  # (B, P+C)
+        # Get the multimodal inputs
+        multimodal_keywords = self.dna_module.get_custom_multimodal_keywords()
+        multimodal_inputs = {k: prompt_inputs[k] if k in prompt_inputs else None for k in multimodal_keywords}
+        with torch.no_grad():
+            # When using num_iterations == 1, old_per_token_logps == per_token_logps, so we can skip its
+            # computation here, and use per_token_logps.detach() instead.
+            if self.num_iterations > 1:
+                old_per_token_logps = self._get_per_token_logps(
+                    model, prompt_completion_ids, attention_mask, **multimodal_inputs
+                )
+                old_per_token_logps = old_per_token_logps[:, prompt_length - 1:]
+            else:
+                old_per_token_logps = None
+            if self.beta == 0.0:
+                ref_per_token_logps = None
+            elif self.ref_model is not None:
+                ref_per_token_logps = self._get_per_token_logps(
+                    self.ref_model, prompt_completion_ids, attention_mask, **multimodal_inputs
+                )
+            else:
+                with self.accelerator.unwrap_model(model).disable_adapter():
+                    ref_per_token_logps = self._get_per_token_logps(
+                        model, prompt_completion_ids, attention_mask, **multimodal_inputs
+                    )
+        if ref_per_token_logps is not None:
+            ref_per_token_logps = ref_per_token_logps[:, prompt_length - 1:]
+        # Decode the generated completions
+        completions_text = self.processing_class.batch_decode(completion_ids, skip_special_tokens=True)
+        if is_conversational(inputs[0]):
+            completions = [[{"role": "assistant", "content": completion}] for completion in completions_text]
+        else:
+            completions = completions_text
+        # Compute the rewards
+        # No need to duplicate prompts as we're not generating multiple completions per prompt
+        print("Reward calculation...")
+        rewards_per_func = torch.zeros(len(prompts), len(self.reward_funcs), device=device)
+        for i, (reward_func, reward_processing_class) in enumerate(
+            zip(self.reward_funcs, self.reward_processing_classes)
+        ):
+            if isinstance(reward_func, PreTrainedModel):
+                if is_conversational(inputs[0]):
+                    messages = [{"messages": p + c} for p, c in zip(prompts, completions)]
+                    texts = [apply_chat_template(x, reward_processing_class)["text"] for x in messages]
+                else:
+                    texts = [p + c for p, c in zip(prompts, completions)]
+                reward_inputs = reward_processing_class(
+                    texts, return_tensors="pt", padding=True, padding_side="right", add_special_tokens=False
+                )
+                reward_inputs = super()._prepare_inputs(reward_inputs)
+                with torch.inference_mode():
+                    rewards_per_func[:, i] = reward_func(**reward_inputs).logits[:, 0]  # Shape (B*G,)
+            else:
+                # Repeat all input columns (but "prompt" and "completion") to match the number of generations
+                reward_kwargs = {key: [] for key in inputs[0].keys() if key not in ["prompt", "completion"]}
+                for key in reward_kwargs:
+                    for example in inputs:
+                        # No need to duplicate prompts as we're not generating multiple completions per prompt
+                        # reward_kwargs[key].extend([example[key]] * self.num_generations)
+                        reward_kwargs[key].extend([example[key]])
+                output_reward_func = reward_func(prompts=prompts, completions=completions, **reward_kwargs)
+                rewards_per_func[:, i] = torch.tensor(output_reward_func, dtype=torch.float32, device=device)
+        # Gather rewards across processes
+        rewards_per_func = self.accelerator.gather(rewards_per_func)
+        # Sum the rewards from all reward functions
+        rewards = rewards_per_func.sum(dim=1)
+        # Compute grouped-wise rewards
+        # Each group consists of num_generations completions for the same prompt
+        mean_grouped_rewards = rewards.view(-1, self.num_generations).mean(dim=1)
+        std_grouped_rewards = rewards.view(-1, self.num_generations).std(dim=1)
+        # Normalize the rewards to compute the advantages
+        mean_grouped_rewards = mean_grouped_rewards.repeat_interleave(self.num_generations, dim=0)
+        std_grouped_rewards = std_grouped_rewards.repeat_interleave(self.num_generations, dim=0)
+        advantages = (rewards - mean_grouped_rewards) / (std_grouped_rewards + 1e-4)
+        # Get only the local slice of advantages
+        process_slice = slice(
+            self.accelerator.process_index * len(prompts),
+            (self.accelerator.process_index + 1) * len(prompts),
+        )
+        advantages = advantages[process_slice]
+        # Log the metrics
+        print("Logging metrics...")
+        completion_length = self.accelerator.gather_for_metrics(completion_mask.sum(1)).float().mean().item()
+        self._metrics["completion_length"].append(completion_length)
+        reward_per_func = self.accelerator.gather_for_metrics(rewards_per_func).mean(0)
+        for i, reward_func in enumerate(self.reward_funcs):
+            if isinstance(reward_func, PreTrainedModel):
+                reward_func_name = reward_func.config._name_or_path.split("/")[-1]
+            else:
+                reward_func_name = reward_func.__name__
+            self._metrics[f"rewards/{reward_func_name}"].append(reward_per_func[i].item())
+        self._metrics["reward"].append(self.accelerator.gather_for_metrics(rewards).mean().item())
+        self._metrics["reward_std"].append(self.accelerator.gather_for_metrics(std_grouped_rewards).mean().item())
+        print(self.log_completions, self.state.global_step, self.args.logging_steps, self.args.report_to)
+        if (
+            self.log_completions
+            and self.state.global_step % self.args.logging_steps == 0
+            and "wandb" in self.args.report_to
+        ):
+            timestamp = time.time()
+            # Get the length of one of the other arrays
+            num_items = len(gather_object(prompts_text))
+            table = {
+                "step": [f"{self.state.global_step}_{timestamp}"] * num_items,  # Repeat to match length
+                "prompt": gather_object(prompts_text),
+                "completion": gather_object(completions_text),
+                "reward": rewards.tolist(),
+            }
+            df = pd.DataFrame(table)
+            if wandb.run is not None and self.accelerator.is_main_process:
+                wandb.log({f"completions_{self.state.global_step}_{timestamp}": wandb.Table(dataframe=df)})
+        return {
+            "prompt_ids": prompt_ids,
+            "prompt_mask": prompt_mask,
+            "completion_ids": completion_ids,
+            "completion_mask": completion_mask,
+            "old_per_token_logps": old_per_token_logps,
+            "ref_per_token_logps": ref_per_token_logps,
+            "advantages": advantages,
+            "multimodal_inputs": multimodal_inputs
+        }
+    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
+        if return_outputs:
+            raise ValueError("The GRPOTrainer does not support returning outputs")
+        # Check if we need to generate new completions or use buffered ones
+        print("index 1")
+        if self.state.global_step % self.num_iterations == 0:
+            inputs = self._generate_and_score_completions(inputs, model)
+            self._buffered_inputs[self._step % self.args.gradient_accumulation_steps] = inputs
+        else:
+            inputs = self._buffered_inputs[self._step % self.args.gradient_accumulation_steps]
+        self._step += 1
+        print("index 2")
+        # Get the prepared inputs
+        prompt_ids, prompt_mask = inputs["prompt_ids"], inputs["prompt_mask"]
+        completion_ids, completion_mask = inputs["completion_ids"], inputs["completion_mask"]
+        multimodal_inputs = inputs["multimodal_inputs"]
+        # Concatenate for full sequence
+        input_ids = torch.cat([prompt_ids, completion_ids], dim=1)
+        attention_mask = torch.cat([prompt_mask, completion_mask], dim=1)
+        print("index 3")
+        # Get the current policy's log probabilities
+        print("index 4")
+        per_token_logps = self._get_per_token_logps(model, input_ids, attention_mask, **multimodal_inputs)
+        # Get rid of the prompt (-1 because of the shift done in get_per_token_logps)
+        per_token_logps = per_token_logps[:, prompt_ids.size(1) - 1:]
+        # Get the advantages from inputs
+        advantages = inputs["advantages"]
+        print("index 5")
+        # When using num_iterations == 1, old_per_token_logps == per_token_logps, so we can skip its computation
+        # and use per_token_logps.detach() instead
+        old_per_token_logps = inputs["old_per_token_logps"] if self.num_iterations > 1 else per_token_logps.detach()
+        # Compute the policy ratio and clipped version
+        coef_1 = torch.exp(per_token_logps - old_per_token_logps)
+        coef_2 = torch.clamp(coef_1, 1 - self.epsilon_low, 1 + self.epsilon_high)
+        per_token_loss1 = coef_1 * advantages.unsqueeze(1)
+        per_token_loss2 = coef_2 * advantages.unsqueeze(1)
+        per_token_loss = -torch.min(per_token_loss1, per_token_loss2)
+        print("index 6")
+        # Add KL penalty if beta > 0
+        if self.beta > 0:
+            ref_per_token_logps = inputs["ref_per_token_logps"]
+            per_token_kl = torch.exp(ref_per_token_logps - per_token_logps) - (ref_per_token_logps - per_token_logps) - 1
+            per_token_loss = per_token_loss + self.beta * per_token_kl
+            # Log KL divergence
+            mean_kl = ((per_token_kl * completion_mask).sum(dim=1) / completion_mask.sum(dim=1)).mean()
+            self._metrics["kl"].append(self.accelerator.gather_for_metrics(mean_kl).mean().item())
+        # Compute final loss
+        print("Computing final loss...")
+        loss = ((per_token_loss * completion_mask).sum(dim=1) / completion_mask.sum(dim=1)).mean()
+        # Log clip ratio
+        is_clipped = (per_token_loss1 < per_token_loss2).float()
+        clip_ratio = (is_clipped * completion_mask).sum() / completion_mask.sum()
+        self._metrics["clip_ratio"].append(self.accelerator.gather_for_metrics(clip_ratio).mean().item())
+        return loss
+    def log(self, logs: dict[str, float], start_time: Optional[float] = None) -> None:
+        metrics = {key: sum(val) / len(val) for key, val in self._metrics.items()}  # average the metrics
+        logs = {**logs, **metrics}
+        if version.parse(transformers.__version__) >= version.parse("4.47.0.dev0"):
+            super().log(logs, start_time)
+        else:  # transformers<=4.46
+            super().log(logs)
+        self._metrics.clear()
+    def create_model_card(
+        self,
+        model_name: Optional[str] = None,
+        dataset_name: Optional[str] = None,
+        tags: Union[str, list[str], None] = None,
+    ):
+        """
+        Creates a draft of a model card using the information available to the `Trainer`.
+        Args:
+            model_name (`str` or `None`, *optional*, defaults to `None`):
+                Name of the model.
+            dataset_name (`str` or `None`, *optional*, defaults to `None`):
+                Name of the dataset used for training.
+            tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
+                Tags to be associated with the model card.
+        """
+        if not self.is_world_process_zero():
+            return
+        if hasattr(self.model.config, "_name_or_path") and not os.path.isdir(self.model.config._name_or_path):
+            base_model = self.model.config._name_or_path
+        else:
+            base_model = None
+        tags = tags or []
+        if isinstance(tags, str):
+            tags = [tags]
+        if hasattr(self.model.config, "unsloth_version"):
+            tags.append("unsloth")
+        citation = textwrap.dedent(
+            """\
+            @article{zhihong2024deepseekmath,
+                title        = {{DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models}},
+                author       = {Zhihong Shao and Peiyi Wang and Qihao Zhu and Runxin Xu and Junxiao Song and Mingchuan Zhang and Y. K. Li and Y. Wu and Daya Guo},
+                year         = 2024,
+                eprint       = {arXiv:2402.03300},
+            """
+        )
+        model_card = generate_model_card(
+            base_model=base_model,
+            model_name=model_name,
+            hub_model_id=self.hub_model_id,
+            dataset_name=dataset_name,
+            tags=tags,
+            wandb_url=wandb.run.get_url() if is_wandb_available() and wandb.run is not None else None,
+            comet_url=get_comet_experiment_url(),
+            trainer_name="GRPO",
+            trainer_citation=citation,
+            paper_title="DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models",
+            paper_id="2402.03300",
+        )
+        model_card.save(os.path.join(self.args.output_dir, "README.md"))
+    def _get_train_sampler(self) -> Sampler:
+        """Returns a sampler that ensures proper data sampling for GRPO training."""
+        effective_batch_size = (
+            self.args.per_device_train_batch_size
+            * self.accelerator.num_processes
+            * self.args.gradient_accumulation_steps
+        )
+        return RepeatRandomSampler(
+            data_source=self.train_dataset,
+            mini_repeat_count=self.num_generations,
+            batch_size=effective_batch_size // self.num_generations,
+            repeat_count=self.num_iterations,
+            seed=self.args.seed,
+        )
+    def _get_eval_sampler(self, eval_dataset) -> Sampler:
+        """Returns a sampler for evaluation."""
+        return RepeatRandomSampler(
+            data_source=eval_dataset,
+            mini_repeat_count=self.num_generations,
+            seed=self.args.seed,
+        )

BioReason-main/bioreason/utils/__init__.py ADDED Viewed

File without changes