File size: 17,807 Bytes

e34b94f

import torch
import os
import json
import logging
from typing import List, Dict

# torch.nanstd doesn't exist, so we define it here
def nanstd(tensor: torch.Tensor) -> torch.Tensor:
    """
    Compute the standard deviation of a tensor, ignoring NaNs. This function only supports 1D tensors.

    Args:
        tensor (`torch.Tensor`):
            Input tensor of shape `(N,)`.

    Returns:
        `torch.Tensor`:
            Standard deviation of the tensor, ignoring NaNs.
    """
    variance = torch.nanmean((tensor - torch.nanmean(tensor, keepdim=True)) ** 2)  # Compute variance ignoring NaNs
    count = torch.sum(~torch.isnan(tensor))  # Count of non-NaN values
    variance *= count / (count - 1)  # Bessel's correction
    return torch.sqrt(variance)

def nanmax(tensor: torch.Tensor) -> torch.Tensor:
    """
    Compute the maximum value of a tensor, ignoring NaNs. This function only supports 1D tensors.

    Args:
        tensor (`torch.Tensor`): Input tensor of shape `(N,)`.

    Returns:
        `torch.Tensor`: Maximum value of the tensor, ignoring NaNs. Returns NaN if all values are NaN.
    """
    if torch.isnan(tensor).all():
        return torch.tensor(float("nan"), dtype=tensor.dtype, device=tensor.device)
    return torch.max(tensor[~torch.isnan(tensor)])

def nanmin(tensor: torch.Tensor) -> torch.Tensor:
    """
    Compute the minimum value of a tensor, ignoring NaNs. This function only supports 1D tensors.

    Args:
        tensor (`torch.Tensor`): Input tensor of shape `(N,)`.

    Returns:
        `torch.Tensor`: Minimum value of the tensor, ignoring NaNs. Returns NaN if all values are NaN.
    """
    if torch.isnan(tensor).all():
        return torch.tensor(float("nan"), dtype=tensor.dtype, device=tensor.device)
    return torch.min(tensor[~torch.isnan(tensor)])


def init_grpo_log_files(output_dir: str) -> tuple[str, str]:
    """
    Initialize GRPO log files (human-readable txt and machine-readable jsonl).

    Returns the tuple of (txt_log_path, jsonl_log_path).
    """
    grpo_log_file = os.path.join(output_dir, "../logs/grpo_logs.txt")
    grpo_jsonl_file = os.path.join(output_dir, "../logs/grpo_samples.jsonl")
    os.makedirs(output_dir, exist_ok=True)
    os.makedirs(os.path.dirname(grpo_log_file), exist_ok=True)

    # Create/clear the log file
    with open(grpo_log_file, "w", encoding="utf-8") as f:
        f.write("=" * 80 + "\n")
        f.write("GRPO Training Logs - WeaverGRPOTrainer\n")
        f.write("=" * 80 + "\n\n")

    # Create/clear the JSONL file
    with open(grpo_jsonl_file, "w", encoding="utf-8"):
        pass

    return grpo_log_file, grpo_jsonl_file


def log_prompt_truncation(
    prompts_before: torch.Tensor,
    prompts_after: torch.Tensor,
    prompt_mask_before: torch.Tensor,
    prompt_mask_after: torch.Tensor,
    processing_class,
    max_prompt_length: int,
    sample_idx: int = 0
) -> None:
    """
    Log prompt before and after truncation in token format.
    Also checks if image/vision tokens were truncated.
    
    Args:
        prompts_before: Prompt token IDs before truncation [batch_size, seq_len_before]
        prompts_after: Prompt token IDs after truncation [batch_size, seq_len_after]
        prompt_mask_before: Attention mask before truncation
        prompt_mask_after: Attention mask after truncation
        processing_class: Tokenizer or processor for decoding
        max_prompt_length: Maximum prompt length configured
        sample_idx: Index of sample to log (default: 0, first sample in batch)
    """
    # Get tokenizer
    _tok = getattr(processing_class, "tokenizer", processing_class)
    
    # Check for vision/image tokens - use known IDs directly
    # Qwen2.5-VL vision token IDs:
    # 151652: <|vision_start|>
    # 151653: <|vision_end|>  
    # 151654: <|video_pad|>
    # 151655: <|image_pad|>
    vision_token_ids = [151652, 151653, 151654, 151655]
    
    # Also try to get them from tokenizer
    vision_token_names = ["<|vision_start|>", "<|vision_end|>", "<|image_pad|>", "<|video_pad|>", "<|vision_pad|>"]
    for token_name in vision_token_names:
        try:
            token_id = _tok.encode(token_name, add_special_tokens=False)
            if isinstance(token_id, list) and len(token_id) > 0:
                if token_id[0] not in vision_token_ids:
                    vision_token_ids.append(token_id[0])
        except Exception:
            pass
    
    # Extract single sample
    prompt_before = prompts_before[sample_idx]
    prompt_after = prompts_after[sample_idx]
    mask_before = prompt_mask_before[sample_idx]
    mask_after = prompt_mask_after[sample_idx]
    
    # Filter out padding tokens (where mask == 0)
    valid_tokens_before = prompt_before[mask_before.bool()].tolist()
    valid_tokens_after = prompt_after[mask_after.bool()].tolist()
    
    # Check if vision tokens were truncated
    vision_tokens_before = set(valid_tokens_before) & set(vision_token_ids)
    vision_tokens_after = set(valid_tokens_after) & set(vision_token_ids)
    vision_tokens_lost = vision_tokens_before - vision_tokens_after
    has_vision_loss = len(vision_tokens_lost) > 0
    
    # Convert token IDs to readable format with special tokens
    def tokens_to_readable(token_ids):
        """Convert token IDs to readable string with special tokens visible."""
        # ANSI escape codes for colors
        GREEN = "\033[92m"
        RESET = "\033[0m"
        
        tokens = []
        prev_tid = None
        consecutive_count = 0
        
        for tid in token_ids:
            try:
                # Decode single token
                token_str = _tok.decode([tid], skip_special_tokens=False)
                
                # Check if this is image_pad (151655) or other vision pad tokens
                is_image_pad = tid == 151655 or (tid in vision_token_ids and 'pad' in token_str.lower())
                
                # If consecutive image_pad tokens, just count them
                if is_image_pad and prev_tid == tid:
                    consecutive_count += 1
                    continue
                else:
                    # Output the previous consecutive tokens if any
                    if consecutive_count > 0 and prev_tid is not None:
                        prev_str = _tok.decode([prev_tid], skip_special_tokens=False)
                        tokens.append(f"{GREEN}[IMG]{prev_str.strip()}[/IMG]{RESET}×{consecutive_count + 1}")
                        consecutive_count = 0
                    
                    # Highlight vision tokens
                    if tid in vision_token_ids:
                        if is_image_pad:
                            prev_tid = tid
                            consecutive_count = 0
                            continue  # Will be added in next iteration or at the end
                        else:
                            tokens.append(f"{GREEN}[IMG]{token_str.strip()}[/IMG]{RESET}")
                    # Show special tokens
                    elif tid == _tok.pad_token_id:
                        tokens.append(f"<|pad|>")
                    elif tid == _tok.eos_token_id:
                        tokens.append(f"<|eos|>")
                    elif tid == _tok.bos_token_id:
                        tokens.append(f"<|bos|>")
                    elif token_str.strip() in ["<|im_start|>", "<|im_end|>", "<|im_sep|>"]:
                        tokens.append(token_str.strip())
                    else:
                        tokens.append(f"[{tid}:{repr(token_str)}]")
                    
                    prev_tid = tid
            except Exception:
                tokens.append(f"[{tid}:?]")
                prev_tid = tid
        
        # Handle any remaining consecutive tokens at the end
        if consecutive_count > 0 and prev_tid is not None:
            try:
                prev_str = _tok.decode([prev_tid], skip_special_tokens=False)
                tokens.append(f"{GREEN}[IMG]{prev_str.strip()}[/IMG]{RESET}×{consecutive_count + 1}")
            except Exception:
                pass
        
        return " ".join(tokens)
    
    # Log information
    logging.info("=" * 80)
    logging.info(f"[PROMPT TRUNCATION] Sample {sample_idx}")
    logging.info(f"Length before truncation: {len(valid_tokens_before)}")
    logging.info(f"Length after truncation: {len(valid_tokens_after)}")
    logging.info(f"Max prompt length: {max_prompt_length}")
    logging.info(f"Tokens truncated: {len(valid_tokens_before) - len(valid_tokens_after)}")
    
    # Warn if vision tokens were lost
    if has_vision_loss:
        logging.warning("⚠️  WARNING: IMAGE/VISION TOKENS WERE TRUNCATED!")
        logging.warning(f"⚠️  Lost vision token IDs: {vision_tokens_lost}")
        logging.warning(f"⚠️  Vision tokens before: {vision_tokens_before}")
        logging.warning(f"⚠️  Vision tokens after: {vision_tokens_after}")
        logging.warning("⚠️  The model will NOT see the image information!")
    elif len(vision_tokens_before) > 0:
        logging.info(f"✓ Vision tokens preserved: {vision_tokens_before}")
    
    logging.info("-" * 80)
    
    # Log tokens before truncation
    logging.info("[BEFORE TRUNCATION]")
    tokens_before_str = tokens_to_readable(valid_tokens_before)
    logging.info(f"Tokens: {tokens_before_str}")
    # logging.info(f"Decoded text: {_tok.decode(valid_tokens_before, skip_special_tokens=False)}")
    logging.info("-" * 80)
    
    # Log tokens after truncation
    logging.info("[AFTER TRUNCATION]")
    tokens_after_str = tokens_to_readable(valid_tokens_after)
    logging.info(f"Tokens: {tokens_after_str}")
    # logging.info(f"Decoded text: {_tok.decode(valid_tokens_after, skip_special_tokens=False)}")
    logging.info("=" * 80)


def log_rollout_input(
    prompts: torch.Tensor,
    prompt_mask: torch.Tensor,
    processing_class,
    sample_idx: int = 0
) -> None:
    """
    Log the input tokens before model generation (rollout).
    
    Args:
        prompts: Prompt token IDs [batch_size, seq_len]
        prompt_mask: Attention mask [batch_size, seq_len]
        processing_class: Tokenizer or processor for decoding
        sample_idx: Index of sample to log (default: 0, first sample in batch)
    """
    # Get tokenizer
    _tok = getattr(processing_class, "tokenizer", processing_class)
    
    # Check for vision/image tokens
    vision_token_names = ["<|vision_start|>", "<|vision_end|>", "<|image_pad|>", "<|video_pad|>", "<|vision_pad|>"]
    vision_token_ids = []
    for token_name in vision_token_names:
        try:
            token_id = _tok.encode(token_name, add_special_tokens=False)
            if isinstance(token_id, list) and len(token_id) > 0:
                vision_token_ids.append(token_id[0])
        except Exception:
            pass
    
    # Extract single sample
    prompt = prompts[sample_idx]
    mask = prompt_mask[sample_idx]
    
    # Filter out padding tokens
    valid_tokens = prompt[mask.bool()].tolist()
    
    # Check for vision tokens
    vision_tokens_present = set(valid_tokens) & set(vision_token_ids)
    has_vision = len(vision_tokens_present) > 0
    
    # Convert token IDs to readable format
    def tokens_to_readable(token_ids):
        """Convert token IDs to readable string with special tokens visible."""
        # ANSI escape codes for colors
        GREEN = "\033[92m"
        RESET = "\033[0m"
        
        tokens = []
        for tid in token_ids:
            try:
                token_str = _tok.decode([tid], skip_special_tokens=False)
                # Highlight vision tokens
                if tid in vision_token_ids:
                    tokens.append(f"{GREEN}[IMG]{token_str.strip()}[/IMG]{RESET}")
                # Show special tokens
                elif tid == _tok.pad_token_id:
                    tokens.append(f"<|pad|>")
                elif tid == _tok.eos_token_id:
                    tokens.append(f"<|eos|>")
                elif tid == _tok.bos_token_id:
                    tokens.append(f"<|bos|>")
                elif token_str.strip() in ["<|im_start|>", "<|im_end|>", "<|im_sep|>"]:
                    tokens.append(token_str.strip())
                else:
                    tokens.append(f"[{tid}:{repr(token_str)}]")
            except Exception:
                tokens.append(f"[{tid}:?]")
        return " ".join(tokens)
    
    # Log information
    logging.info("=" * 80)
    logging.info(f"[ROLLOUT INPUT] Sample {sample_idx}")
    logging.info(f"Prompt length: {len(valid_tokens)} tokens")
    logging.info(f"Batch shape: {prompts.shape}")
    
    if has_vision:
        logging.info(f"✓ Contains vision tokens: {vision_tokens_present}")
    else:
        logging.info("ℹ️  No vision tokens detected (text-only prompt)")
    
    logging.info("-" * 80)
    
    # Log tokens
    logging.info("[INPUT TOKENS]")
    tokens_str = tokens_to_readable(valid_tokens)
    logging.info(f"Tokens: {tokens_str}")
    logging.info(f"Decoded text: {_tok.decode(valid_tokens, skip_special_tokens=False)}")
    logging.info("=" * 80)


def persist_grpo_logs(
    log_file: str,
    jsonl_file: str,
    step: int,
    mode: str,
    prompt_texts: list[str],
    completion_texts: list[str],
    rewards: list[float],
    rewards_by_func: dict[str, list[float]],
    token_counts: list[int],
    ground_truths: list[str] | None,
    solutions_extracted: list[str] | None,
    verifies: list[bool] | None,
    reward_func_names: list[str],
    stop_reasons: list[str] | None = None,
) -> None:
    """
    Append per-sample human-readable and JSONL logs for GRPO.
    """
    try:
        # Flatten possibly nested lists (from distributed gather)
        def _flatten(lst):
            if isinstance(lst, list) and len(lst) > 0 and isinstance(lst[0], list):
                return [item for sub in lst for item in sub]
            return lst

        prompt_texts = _flatten(prompt_texts)
        completion_texts = _flatten(completion_texts)
        rewards = _flatten(rewards)
        token_counts = _flatten(token_counts)
        rewards_by_func = {k: _flatten(v) for k, v in rewards_by_func.items()}
        stop_reasons = _flatten(stop_reasons) if stop_reasons is not None else None
        ground_truths = _flatten(ground_truths) if ground_truths is not None else None
        solutions_extracted = _flatten(solutions_extracted) if solutions_extracted is not None else None
        verifies = _flatten(verifies) if verifies is not None else None

        # Guard against length mismatches
        n = min(
            len(prompt_texts),
            len(completion_texts),
            len(rewards),
            len(token_counts),
            *[len(rewards_by_func[name]) for name in reward_func_names],
            *( [len(ground_truths)] if ground_truths is not None else [] ),
            *( [len(solutions_extracted)] if solutions_extracted is not None else [] ),
            *( [len(verifies)] if verifies is not None else [] ),
            *( [len(stop_reasons)] if stop_reasons is not None else [] ),
        )
        if n == 0:
            return

        with open(log_file, "a", encoding="utf-8") as f_txt:
            f_txt.write(f"\n{'='*80}\n")
            f_txt.write(f"Step: {step} | Mode: {mode}\n")
            f_txt.write(f"{'='*80}\n")
            for idx in range(n):
                p_txt = prompt_texts[idx]
                c_txt = completion_texts[idx]
                r_total = rewards[idx]
                f_txt.write(f"\n[Sample {idx}]\n")
                f_txt.write(f"Prompt: {p_txt}\n")
                comp_str = ", ".join([f"{name}: {float(rewards_by_func[name][idx]):.6f}" for name in reward_func_names])
                f_txt.write(f"Reward: {float(r_total):.6f} | Components: {comp_str}\n")
                if ground_truths is not None:
                    f_txt.write(f"Ground truth: {ground_truths[idx]}\n")
                if solutions_extracted is not None:
                    f_txt.write(f"Solution: {solutions_extracted[idx]}\n")
                if verifies is not None:
                    f_txt.write(f"Verify: {bool(verifies[idx])}\n")
                s_reason = (
                    stop_reasons[idx]
                    if stop_reasons is not None and idx < len(stop_reasons)
                    else "unknown"
                )
                f_txt.write(f"Stop reason: {s_reason}\n")
                # Always place completion last in the per-sample block
                f_txt.write(f"Completion: {c_txt}\n")
                f_txt.write(f"{'-'*80}\n")

        with open(jsonl_file, "a", encoding="utf-8") as f_jsonl:
            for idx in range(n):
                s_reason = (
                    stop_reasons[idx]
                    if stop_reasons is not None and idx < len(stop_reasons)
                    else "unknown"
                )
                record = {
                    "reward": float(rewards[idx]),
                    "token_count": int(token_counts[idx]),
                    # "step": int(step),
                    # "mode": mode,
                    # "sample_index": int(idx),
                    "stop_reason": s_reason,
                }
                if ground_truths is not None:
                    record["ground_truth"] = ground_truths[idx]
                if solutions_extracted is not None:
                    record["solution"] = solutions_extracted[idx]
                if verifies is not None:
                    record["verify"] = bool(verifies[idx])
                # Ensure completion is always the last field
                record["completion"] = completion_texts[idx]
                f_jsonl.write(json.dumps(record, ensure_ascii=False) + "\n")
    except Exception as e:
        logging.warning(f"Failed to persist GRPO logs: {e}")