gemma-3-tiled-27b-it / configuration_gemma3_tiled.py
Fraser's picture
Remove unused import
e29c68a verified
"""
Configuration for Gemma3 with tiled image processing.
This extends the base Gemma3Config to support dynamic resolution images
by tiling them into a grid of 896x896 patches.
"""
from typing import Optional
from transformers import Gemma3Config
class Gemma3TiledConfig(Gemma3Config):
"""
Configuration class for Gemma3Tiled model.
Extends Gemma3Config with tiling-specific parameters for handling
high-resolution images by splitting them into a grid of tiles.
Args:
tile_size: Size of each tile (default: 896, matching Gemma3's vision encoder)
max_tiles_h: Maximum number of tiles in height dimension
max_tiles_w: Maximum number of tiles in width dimension
min_tiles: Minimum total number of tiles (to ensure some detail)
linebreak_token_id: Token ID to use for linebreak embedding lookup.
If None, will use the "\n" token ID.
"""
model_type = "gemma3_tiled"
def __init__(
self,
tile_size: int = 896,
max_tiles_h: int = 4,
max_tiles_w: int = 4,
min_tiles: int = 1,
linebreak_token_id: Optional[int] = None,
**kwargs,
):
super().__init__(**kwargs)
self.tile_size = tile_size
self.max_tiles_h = max_tiles_h
self.max_tiles_w = max_tiles_w
self.min_tiles = min_tiles
self.linebreak_token_id = linebreak_token_id
@property
def tokens_per_tile_side(self) -> int:
"""Number of tokens per side after projection (16 for 256 tokens per tile)."""
return int(self.mm_tokens_per_image ** 0.5) # sqrt(256) = 16
__all__ = ["Gemma3TiledConfig"]