lingbot-vla / lingbotvla /models /modeling_layers.py

Upload folder using huggingface_hub

fb11af9 verified 2 months ago

2.04 kB

	# Copyright 2025 The HuggingFace Team. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	from functools import partial

	import torch.nn as nn


	class GradientCheckpointingLayer(nn.Module):
	"""Base class for layers with gradient checkpointing.

	This class enables gradient checkpointing functionality for a layer. By default, gradient checkpointing is disabled
	(`gradient_checkpointing = False`). When `model.set_gradient_checkpointing()` is called, gradient checkpointing is
	enabled by setting `gradient_checkpointing = True` and assigning a checkpointing function to `_gradient_checkpointing_func`.

	Important:

	When using gradient checkpointing with `use_reentrant=True`, inputs that require gradients (e.g. hidden states)
	must be passed as positional arguments (`*args`) rather than keyword arguments to properly propagate gradients.

	Example:

	```python
	>>> # Correct - hidden_states passed as positional arg
	>>> out = self.layer(hidden_states, attention_mask=attention_mask)

	>>> # Incorrect - hidden_states passed as keyword arg
	>>> out = self.layer(hidden_states=hidden_states, attention_mask=attention_mask)
	```
	"""

	gradient_checkpointing = False

	def __call__(self, args, *kwargs):
	if self.gradient_checkpointing and self.training:
	return self._gradient_checkpointing_func(partial(super().__call__, *kwargs), args)
	return super().__call__(args, *kwargs)