Add sparse transformer v19 with Triton-backed KNN scheduler and various backward modes. Includes utilities for synthetic data generation and model training. Implements chunked sparse updates and integrates with existing sparse linear layers.
bc1b8eb | [build-system] | |
| requires = ["setuptools>=61"] | |
| build-backend = "setuptools.build_meta" | |
| [project] | |
| name = "surprise-topk-gradient" | |
| version = "0.1.0" | |
| description = "Prototype for surprise Top-K gradient training experiments" | |
| requires-python = ">=3.10" | |
| dependencies = [ | |
| "numpy>=1.24", | |
| "torch>=2.0", | |
| ] | |
| [project.scripts] | |
| surprise-topk-gradient = "surprise_topk_gradient_prototype:main" | |
| [tool.setuptools] | |
| py-modules = ["surprise_topk_gradient_prototype"] | |