| """ |
| sparsevlm — Training-free visual token sparsification for VLMs. |
| |
| Quick start: |
| from sparsevlm import apply_sparsevlm, reset_n_vis |
| state = apply_sparsevlm(model, n_vis=256) |
| reset_n_vis(state, n_vis=256) # call before every new image |
| output = model.generate(...) |
| """ |
|
|
| from .patch import patch_qwen2vl, reset_n_vis, unpatch_qwen2vl, remove_hooks |
| from .generate import sparsevlm_generate |
|
|
|
|
| def apply_sparsevlm( |
| model, |
| n_vis: int = 256, |
| target_layers=None, |
| min_keep: int = 32, |
| tau: float = 0.5, |
| theta: float = 0.5, |
| ) -> dict: |
| """ |
| Apply SparseVLM to a Qwen2.5-VL model. One call, no training needed. |
| |
| Args: |
| model: Qwen2VLForConditionalGeneration |
| n_vis: visual tokens per image (Qwen2.5-VL-7B: ~256 for 448px) |
| target_layers: layers to prune at (default: every 4th from layer 2) |
| min_keep: never prune below this many visual tokens |
| tau: recycling fraction (paper default: 0.5) |
| theta: cluster ratio (paper default: 0.5) |
| |
| Returns: |
| state dict — pass to reset_n_vis() before each new image |
| """ |
| return patch_qwen2vl( |
| model=model, |
| n_vis=n_vis, |
| target_layers=target_layers, |
| min_keep=min_keep, |
| tau=tau, |
| theta=theta, |
| ) |
|
|
|
|
| __all__ = ["apply_sparsevlm", "reset_n_vis", "unpatch_qwen2vl", |
| "remove_hooks", "sparsevlm_generate"] |
| __version__ = "0.1.3" |
|
|