""" sparsevlm — Training-free visual token sparsification for VLMs. Quick start: from sparsevlm import apply_sparsevlm, reset_n_vis state = apply_sparsevlm(model, n_vis=256) reset_n_vis(state, n_vis=256) # call before every new image output = model.generate(...) """ from .patch import patch_qwen2vl, reset_n_vis, unpatch_qwen2vl, remove_hooks from .generate import sparsevlm_generate def apply_sparsevlm( model, n_vis: int = 256, target_layers=None, min_keep: int = 32, tau: float = 0.5, theta: float = 0.5, ) -> dict: """ Apply SparseVLM to a Qwen2.5-VL model. One call, no training needed. Args: model: Qwen2VLForConditionalGeneration n_vis: visual tokens per image (Qwen2.5-VL-7B: ~256 for 448px) target_layers: layers to prune at (default: every 4th from layer 2) min_keep: never prune below this many visual tokens tau: recycling fraction (paper default: 0.5) theta: cluster ratio (paper default: 0.5) Returns: state dict — pass to reset_n_vis() before each new image """ return patch_qwen2vl( model=model, n_vis=n_vis, target_layers=target_layers, min_keep=min_keep, tau=tau, theta=theta, ) __all__ = ["apply_sparsevlm", "reset_n_vis", "unpatch_qwen2vl", "remove_hooks", "sparsevlm_generate"] __version__ = "0.1.3"