Initial GrandLine implementation: deterministic shard-first dataset preprocessing for LLM pretraining
ed59144 verified | [build-system] | |
| requires = ["hatchling"] | |
| build-backend = "hatchling.build" | |
| [project] | |
| name = "grandline" | |
| version = "0.1.0" | |
| description = "Deterministic shard-first dataset preprocessing for LLM pretraining" | |
| readme = "README.md" | |
| requires-python = ">=3.11" | |
| license = { text = "Apache-2.0" } | |
| authors = [{ name = "GrandLine Contributors" }] | |
| dependencies = [ | |
| "blake3>=1.0.0", | |
| "duckdb>=1.1.0", | |
| "pyarrow>=17.0.0", | |
| "transformers>=4.44.0", | |
| "tokenizers>=0.20.0", | |
| "pyyaml>=6.0", | |
| "tqdm>=4.66.0", | |
| "click>=8.1.0", | |
| "datasets>=3.0.0", | |
| ] | |
| [project.optional-dependencies] | |
| dev = [ | |
| "pytest>=8.0.0", | |
| "pytest-xdist", | |
| "ruff", | |
| "mypy", | |
| ] | |
| [project.scripts] | |
| grandline = "grandline.cli:main" | |
| [project.urls] | |
| Repository = "https://huggingface.co/dignity045/grandline" | |
| [tool.hatch.build.targets.wheel] | |
| packages = ["src/grandline"] | |
| [tool.ruff] | |
| src = ["src"] | |
| line-length = 100 | |
| target-version = "py311" | |
| [tool.ruff.lint] | |
| select = ["E", "F", "I", "UP", "B", "SIM"] | |
| [tool.mypy] | |
| python_version = "3.11" | |
| mypy_path = "src" | |
| strict = false | |
| ignore_missing_imports = true | |
| [tool.pytest.ini_options] | |
| testpaths = ["tests"] | |
| addopts = "-v --tb=short" | |