grandline / pyproject.toml
dignity045's picture
Initial GrandLine implementation: deterministic shard-first dataset preprocessing for LLM pretraining
ed59144 verified
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project]
name = "grandline"
version = "0.1.0"
description = "Deterministic shard-first dataset preprocessing for LLM pretraining"
readme = "README.md"
requires-python = ">=3.11"
license = { text = "Apache-2.0" }
authors = [{ name = "GrandLine Contributors" }]
dependencies = [
"blake3>=1.0.0",
"duckdb>=1.1.0",
"pyarrow>=17.0.0",
"transformers>=4.44.0",
"tokenizers>=0.20.0",
"pyyaml>=6.0",
"tqdm>=4.66.0",
"click>=8.1.0",
"datasets>=3.0.0",
]
[project.optional-dependencies]
dev = [
"pytest>=8.0.0",
"pytest-xdist",
"ruff",
"mypy",
]
[project.scripts]
grandline = "grandline.cli:main"
[project.urls]
Repository = "https://huggingface.co/dignity045/grandline"
[tool.hatch.build.targets.wheel]
packages = ["src/grandline"]
[tool.ruff]
src = ["src"]
line-length = 100
target-version = "py311"
[tool.ruff.lint]
select = ["E", "F", "I", "UP", "B", "SIM"]
[tool.mypy]
python_version = "3.11"
mypy_path = "src"
strict = false
ignore_missing_imports = true
[tool.pytest.ini_options]
testpaths = ["tests"]
addopts = "-v --tb=short"