SparseVLM / sparsevlm /__init__.py
Aryan3108's picture
Upload folder using huggingface_hub
45c83c9 verified
Raw
History Blame Contribute Delete
1.46 kB
"""
sparsevlm — Training-free visual token sparsification for VLMs.
Quick start:
from sparsevlm import apply_sparsevlm, reset_n_vis
state = apply_sparsevlm(model, n_vis=256)
reset_n_vis(state, n_vis=256) # call before every new image
output = model.generate(...)
"""
from .patch import patch_qwen2vl, reset_n_vis, unpatch_qwen2vl, remove_hooks
from .generate import sparsevlm_generate
def apply_sparsevlm(
model,
n_vis: int = 256,
target_layers=None,
min_keep: int = 32,
tau: float = 0.5,
theta: float = 0.5,
) -> dict:
"""
Apply SparseVLM to a Qwen2.5-VL model. One call, no training needed.
Args:
model: Qwen2VLForConditionalGeneration
n_vis: visual tokens per image (Qwen2.5-VL-7B: ~256 for 448px)
target_layers: layers to prune at (default: every 4th from layer 2)
min_keep: never prune below this many visual tokens
tau: recycling fraction (paper default: 0.5)
theta: cluster ratio (paper default: 0.5)
Returns:
state dict — pass to reset_n_vis() before each new image
"""
return patch_qwen2vl(
model=model,
n_vis=n_vis,
target_layers=target_layers,
min_keep=min_keep,
tau=tau,
theta=theta,
)
__all__ = ["apply_sparsevlm", "reset_n_vis", "unpatch_qwen2vl",
"remove_hooks", "sparsevlm_generate"]
__version__ = "0.1.3"