Instructions to use katuni4ka/tiny-random-gemma4-dense with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use katuni4ka/tiny-random-gemma4-dense with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("image-text-to-text", model="katuni4ka/tiny-random-gemma4-dense", trust_remote_code=True)
messages = [
    {
        "role": "user",
        "content": [
            {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"},
            {"type": "text", "text": "What animal is on the candy?"}
        ]
    },
]
pipe(text=messages)

# Load model directly
from transformers import AutoProcessor, AutoModelForImageTextToText

processor = AutoProcessor.from_pretrained("katuni4ka/tiny-random-gemma4-dense", trust_remote_code=True)
model = AutoModelForImageTextToText.from_pretrained("katuni4ka/tiny-random-gemma4-dense", trust_remote_code=True)
messages = [
    {
        "role": "user",
        "content": [
            {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"},
            {"type": "text", "text": "What animal is on the candy?"}
        ]
    },
]
inputs = processor.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
).to(model.device)

outputs = model.generate(**inputs, max_new_tokens=40)
print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:]))

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use katuni4ka/tiny-random-gemma4-dense with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "katuni4ka/tiny-random-gemma4-dense"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "katuni4ka/tiny-random-gemma4-dense",
		"messages": [
			{
				"role": "user",
				"content": [
					{
						"type": "text",
						"text": "Describe this image in one sentence."
					},
					{
						"type": "image_url",
						"image_url": {
							"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
						}
					}
				]
			}
		]
	}'

Use Docker

docker model run hf.co/katuni4ka/tiny-random-gemma4-dense

SGLang

How to use katuni4ka/tiny-random-gemma4-dense with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "katuni4ka/tiny-random-gemma4-dense" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "katuni4ka/tiny-random-gemma4-dense",
		"messages": [
			{
				"role": "user",
				"content": [
					{
						"type": "text",
						"text": "Describe this image in one sentence."
					},
					{
						"type": "image_url",
						"image_url": {
							"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
						}
					}
				]
			}
		]
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "katuni4ka/tiny-random-gemma4-dense" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "katuni4ka/tiny-random-gemma4-dense",
		"messages": [
			{
				"role": "user",
				"content": [
					{
						"type": "text",
						"text": "Describe this image in one sentence."
					},
					{
						"type": "image_url",
						"image_url": {
							"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
						}
					}
				]
			}
		]
	}'

Docker Model Runner
How to use katuni4ka/tiny-random-gemma4-dense with Docker Model Runner:
```
docker model run hf.co/katuni4ka/tiny-random-gemma4-dense
```

tiny-random-gemma4-dense / configuration_gemma4.py

katuni4ka

Upload 10 files

9c3dbaf verified 26 days ago

raw

history blame contribute delete

11.7 kB

	# Copyright 2026 the HuggingFace Team. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#
	# Adapted for transformers 4.57.1 (trust_remote_code=True usage).
	# Changes from the 5.x source:
	# - Dropped @strict, @auto_docstring decorators
	# - Dropped base_model_tp_plan, base_model_pp_plan, sub_configs class attributes
	# - Converted @dataclass-style __post_init__ to regular __init__ with explicit parameters
	# - Nested configs handled via isinstance(x, dict) checks in __init__

	from transformers.configuration_utils import PretrainedConfig
	from transformers.utils import logging


	logger = logging.get_logger(__name__)


	class Gemma4AudioConfig(PretrainedConfig):
	r"""
	Configuration for the Gemma 4 Audio encoder.
	"""

	model_type = "gemma4_audio"

	def __init__(
	self,
	hidden_size=1024,
	num_hidden_layers=12,
	num_attention_heads=8,
	hidden_act="silu",
	subsampling_conv_channels=None,
	conv_kernel_size=5,
	residual_weight=0.5,
	attention_chunk_size=12,
	attention_context_left=13,
	attention_context_right=0,
	attention_logit_cap=50.0,
	attention_invalid_logits_value=-1.0e9,
	use_clipped_linears=True,
	rms_norm_eps=1e-6,
	gradient_clipping=1e10,
	output_proj_dims=1536,
	initializer_range=0.02,
	**kwargs,
	):
	self.hidden_size = hidden_size
	self.num_hidden_layers = num_hidden_layers
	self.num_attention_heads = num_attention_heads
	self.hidden_act = hidden_act
	self.subsampling_conv_channels = subsampling_conv_channels if subsampling_conv_channels is not None else [128, 32]
	# JSON serialization converts tuples to lists, ensure list
	if isinstance(self.subsampling_conv_channels, tuple):
	self.subsampling_conv_channels = list(self.subsampling_conv_channels)
	self.conv_kernel_size = conv_kernel_size
	self.residual_weight = residual_weight
	self.attention_chunk_size = attention_chunk_size
	self.attention_context_left = attention_context_left
	self.attention_context_right = attention_context_right
	self.attention_logit_cap = attention_logit_cap
	self.attention_invalid_logits_value = attention_invalid_logits_value
	self.use_clipped_linears = use_clipped_linears
	self.rms_norm_eps = rms_norm_eps
	self.gradient_clipping = gradient_clipping
	self.output_proj_dims = output_proj_dims
	self.initializer_range = initializer_range
	super().__init__(**kwargs)


	class Gemma4TextConfig(PretrainedConfig):
	r"""
	Configuration for the Gemma 4 text (decoder) model.
	"""

	model_type = "gemma4_text"
	keys_to_ignore_at_inference = ["past_key_values"]

	def __init__(
	self,
	vocab_size=262144,
	hidden_size=2304,
	intermediate_size=9216,
	num_hidden_layers=30,
	num_attention_heads=8,
	num_key_value_heads=4,
	head_dim=256,
	hidden_activation="gelu_pytorch_tanh",
	max_position_embeddings=131072,
	initializer_range=0.02,
	rms_norm_eps=1e-6,
	use_cache=True,
	pad_token_id=0,
	eos_token_id=1,
	bos_token_id=2,
	tie_word_embeddings=True,
	rope_parameters=None,
	attention_bias=False,
	attention_dropout=0.0,
	sliding_window=512,
	layer_types=None,
	final_logit_softcapping=None,
	use_bidirectional_attention=None,
	vocab_size_per_layer_input=262144,
	hidden_size_per_layer_input=256,
	num_global_key_value_heads=None,
	global_head_dim=512,
	attention_k_eq_v=False,
	num_kv_shared_layers=0,
	enable_moe_block=False,
	use_double_wide_mlp=False,
	num_experts=None,
	top_k_experts=None,
	moe_intermediate_size=None,
	**kwargs,
	):
	self.vocab_size = vocab_size
	self.hidden_size = hidden_size
	self.intermediate_size = intermediate_size
	self.num_hidden_layers = num_hidden_layers
	self.num_attention_heads = num_attention_heads
	self.num_key_value_heads = num_key_value_heads
	self.head_dim = head_dim
	self.hidden_activation = hidden_activation
	self.max_position_embeddings = max_position_embeddings
	self.initializer_range = initializer_range
	self.rms_norm_eps = rms_norm_eps
	self.use_cache = use_cache
	self.rope_parameters = rope_parameters
	self.attention_bias = attention_bias
	self.attention_dropout = attention_dropout
	self.sliding_window = sliding_window
	self.layer_types = layer_types
	self.final_logit_softcapping = final_logit_softcapping
	self.use_bidirectional_attention = use_bidirectional_attention
	self.vocab_size_per_layer_input = vocab_size_per_layer_input
	self.hidden_size_per_layer_input = hidden_size_per_layer_input
	self.num_global_key_value_heads = num_global_key_value_heads
	self.global_head_dim = global_head_dim
	self.attention_k_eq_v = attention_k_eq_v
	self.num_kv_shared_layers = num_kv_shared_layers
	self.enable_moe_block = enable_moe_block
	self.use_double_wide_mlp = use_double_wide_mlp
	self.num_experts = num_experts
	self.top_k_experts = top_k_experts
	self.moe_intermediate_size = moe_intermediate_size

	# Reproduce __post_init__ logic from the 5.x source
	if self.use_bidirectional_attention == "all":
	self.sliding_window = (self.sliding_window // 2) + 1

	if self.layer_types is None:
	sliding_window_pattern = 6 # by default 5:1
	self.layer_types = [
	"sliding_attention" if bool((i + 1) % sliding_window_pattern) else "full_attention"
	for i in range(self.num_hidden_layers)
	]

	if self.layer_types and (last_layer_type := self.layer_types[-1]) != "full_attention":
	logger.warning(
	f"Last layer must use `full_attention`, but got `{last_layer_type}`. Forcing last layer to `full_attention`."
	)
	self.layer_types[-1] = "full_attention"

	default_rope_params = {
	"sliding_attention": {"rope_type": "default", "rope_theta": 10_000.0},
	"full_attention": {"rope_type": "proportional", "partial_rotary_factor": 0.25, "rope_theta": 1_000_000.0},
	}
	if self.rope_parameters is None:
	self.rope_parameters = default_rope_params

	super().__init__(
	pad_token_id=pad_token_id,
	eos_token_id=eos_token_id,
	bos_token_id=bos_token_id,
	tie_word_embeddings=tie_word_embeddings,
	**kwargs,
	)


	class Gemma4VisionConfig(PretrainedConfig):
	r"""
	Configuration for the Gemma 4 Vision encoder.
	"""

	model_type = "gemma4_vision"
	default_theta = 100.0

	def __init__(
	self,
	hidden_size=768,
	intermediate_size=3072,
	num_hidden_layers=16,
	num_attention_heads=12,
	num_key_value_heads=12,
	head_dim=64,
	hidden_activation="gelu_pytorch_tanh",
	rms_norm_eps=1e-6,
	max_position_embeddings=131072,
	attention_bias=False,
	attention_dropout=0.0,
	rope_parameters=None,
	pooling_kernel_size=3,
	patch_size=16,
	position_embedding_size=10240,
	use_clipped_linears=False,
	standardize=False,
	initializer_range=0.02,
	**kwargs,
	):
	self.hidden_size = hidden_size
	self.intermediate_size = intermediate_size
	self.num_hidden_layers = num_hidden_layers
	self.num_attention_heads = num_attention_heads
	self.num_key_value_heads = num_key_value_heads
	self.head_dim = head_dim
	self.hidden_activation = hidden_activation
	self.rms_norm_eps = rms_norm_eps
	self.max_position_embeddings = max_position_embeddings
	self.attention_bias = attention_bias
	self.attention_dropout = attention_dropout
	self.rope_parameters = rope_parameters
	self.pooling_kernel_size = pooling_kernel_size
	self.patch_size = patch_size
	self.position_embedding_size = position_embedding_size
	self.use_clipped_linears = use_clipped_linears
	self.standardize = standardize
	self.initializer_range = initializer_range

	if self.rope_parameters is None:
	self.rope_parameters = {"rope_type": "default", "rope_theta": 100.0}

	super().__init__(**kwargs)


	class Gemma4Config(PretrainedConfig):
	r"""
	Configuration for Gemma4ForConditionalGeneration (multimodal: text + vision + audio).

	Example::

	>>> from configuration_gemma4 import Gemma4Config
	>>> cfg = Gemma4Config()
	>>> print(cfg.model_type)
	gemma4
	"""

	model_type = "gemma4"

	def __init__(
	self,
	text_config=None,
	vision_config=None,
	audio_config=None,
	boi_token_id=255999,
	eoi_token_id=258882,
	image_token_id=258880,
	video_token_id=258884,
	boa_token_id=256000,
	eoa_token_index=258883,
	audio_token_id=258881,
	initializer_range=0.02,
	tie_word_embeddings=True,
	**kwargs,
	):
	# Handle text_config
	if text_config is None:
	self.text_config = Gemma4TextConfig()
	logger.info("text_config is None. Using default Gemma4TextConfig.")
	elif isinstance(text_config, dict):
	self.text_config = Gemma4TextConfig(**text_config)
	else:
	self.text_config = text_config

	# Handle vision_config
	if vision_config is None:
	logger.info("vision_config is None. Gemma4Model.vision_tower will not be initialized.")
	self.vision_config = None
	elif isinstance(vision_config, dict):
	self.vision_config = Gemma4VisionConfig(**vision_config)
	else:
	self.vision_config = vision_config

	# Handle audio_config
	if audio_config is None:
	logger.info("audio_config is None. Gemma4Model.audio_tower will not be initialized.")
	self.audio_config = None
	elif isinstance(audio_config, dict):
	self.audio_config = Gemma4AudioConfig(**audio_config)
	else:
	self.audio_config = audio_config

	self.boi_token_id = boi_token_id
	self.eoi_token_id = eoi_token_id
	self.image_token_id = image_token_id
	self.video_token_id = video_token_id
	self.boa_token_id = boa_token_id
	self.eoa_token_index = eoa_token_index
	self.audio_token_id = audio_token_id
	self.initializer_range = initializer_range

	super().__init__(
	tie_word_embeddings=tie_word_embeddings,
	**kwargs,
	)

	def get_text_config(self, decoder=False): # noqa: ARG002
	return self.text_config


	__all__ = ["Gemma4AudioConfig", "Gemma4Config", "Gemma4TextConfig", "Gemma4VisionConfig"]