llm-training / pyproject.toml

Upload folder using huggingface_hub

81b3473 verified about 2 months ago

4.2 kB

	[project]
	name = "prolewiki-llm"
	version = "0.1.0"
	description = "GRPO fine-tuning and reward functions for Marxist-Leninist language models"
	readme = "README.md"
	license = { text = "AGPL-3.0-only" }
	requires-python = ">=3.12"
	dependencies = [
	# Core ML
	"transformers>=4.40.0",
	"sentence-transformers>=3.0.0",
	"torch>=2.0.0",
	# NLP
	"spacy>=3.8.0",
	# Logging
	"wandb>=0.17.0",
	# Data
	"pydantic>=2.0.0",
	]

	[dependency-groups]
	dev = [
	# Testing
	"pytest>=8.0.0",
	"pytest-asyncio>=0.24.0",
	"pytest-cov>=6.0.0",
	"pytest-mock>=3.14.0",
	# Code Quality
	"mypy>=1.13.0",
	"ruff>=0.8.0",
	"pre-commit>=4.0.0",
	# Type Stubs
	"types-PyYAML>=6.0.0",
	]
	training = [
	# GRPO training (GPU required)
	"unsloth>=2024.8",
	"trl>=0.9.0",
	"peft>=0.12.0",
	"bitsandbytes>=0.43.0",
	"datasets>=2.20.0",
	"vllm>=0.5.0",
	]

	[build-system]
	requires = ["hatchling"]
	build-backend = "hatchling.build"

	[tool.hatch.build.targets.wheel]
	packages = ["src/prolewiki_llm"]

	# =============================================================================
	# PYTEST
	# =============================================================================
	[tool.pytest.ini_options]
	pythonpath = ["src"]
	testpaths = ["tests"]
	asyncio_mode = "auto"
	asyncio_default_fixture_loop_scope = "function"
	markers = [
	"unit: Fast unit tests for isolated components (no I/O)",
	"integration: Integration tests for shell scripts and Docker behavior",
	"slow: Tests that take significant time (NLI, embedding)",
	"gpu: Tests requiring GPU (training)",
	]

	# =============================================================================
	# RUFF (Linting + Formatting + Import Sorting)
	# =============================================================================
	[tool.ruff]
	line-length = 100
	target-version = "py312"
	src = ["src"]

	[tool.ruff.lint]
	select = [
	"E", # pycodestyle errors
	"W", # pycodestyle warnings
	"F", # pyflakes
	"I", # isort (import sorting)
	"B", # flake8-bugbear
	"C4", # flake8-comprehensions
	"UP", # pyupgrade
	"ARG", # unused arguments
	"SIM", # flake8-simplify
	"RUF", # ruff-specific
	]
	ignore = [
	"E501", # line length handled by formatter
	"RUF001", # ambiguous unicode (we use Cyrillic/Chinese text)
	]

	[tool.ruff.lint.per-file-ignores]
	"tests/*/.py" = [
	"ARG001", # unused mock function arguments are common in tests
	"ARG002", # unused method arguments (fixtures) are common in tests
	"SIM117", # nested with statements for multiple mocks are clearer
	]
	"src/prolewiki_llm/*/.py" = [
	"ARG001", # **kwargs required by GRPOTrainer reward function interface
	]

	[tool.ruff.lint.isort]
	known-first-party = ["prolewiki_llm"]

	[tool.ruff.format]
	quote-style = "double"
	indent-style = "space"

	# =============================================================================
	# MYPY (Type Checking)
	# =============================================================================
	[tool.mypy]
	python_version = "3.12"
	strict = true
	mypy_path = ["src"]
	explicit_package_bases = true
	warn_return_any = true
	warn_unused_configs = true
	disallow_untyped_defs = true
	disallow_incomplete_defs = true
	check_untyped_defs = true
	no_implicit_optional = true
	warn_redundant_casts = true
	warn_unused_ignores = true
	show_error_codes = true
	exclude = ["tests/", "build/", "dist/"]

	[[tool.mypy.overrides]]
	module = [
	"transformers",
	"transformers.*",
	"sentence_transformers.*",
	"spacy",
	"spacy.*",
	"torch",
	"torch.*",
	"wandb",
	"wandb.*",
	"datasets",
	"datasets.*",
	"trl",
	"trl.*",
	"unsloth",
	"unsloth.*",
	"vllm",
	"vllm.*",
	"peft",
	"peft.*",
	]
	ignore_missing_imports = true

	# =============================================================================
	# COVERAGE
	# =============================================================================
	[tool.coverage.run]
	source = ["src/prolewiki_llm"]
	branch = true
	omit = ["/tests/"]

	[tool.coverage.report]
	exclude_lines = [
	"pragma: no cover",
	"if TYPE_CHECKING:",
	"raise NotImplementedError",
	]