[project] name = "prolewiki-llm" version = "0.1.0" description = "GRPO fine-tuning and reward functions for Marxist-Leninist language models" readme = "README.md" license = { text = "AGPL-3.0-only" } requires-python = ">=3.12" dependencies = [ # Core ML "transformers>=4.40.0", "sentence-transformers>=3.0.0", "torch>=2.0.0", # NLP "spacy>=3.8.0", # Logging "wandb>=0.17.0", # Data "pydantic>=2.0.0", ] [dependency-groups] dev = [ # Testing "pytest>=8.0.0", "pytest-asyncio>=0.24.0", "pytest-cov>=6.0.0", "pytest-mock>=3.14.0", # Code Quality "mypy>=1.13.0", "ruff>=0.8.0", "pre-commit>=4.0.0", # Type Stubs "types-PyYAML>=6.0.0", ] training = [ # GRPO training (GPU required) "unsloth>=2024.8", "trl>=0.9.0", "peft>=0.12.0", "bitsandbytes>=0.43.0", "datasets>=2.20.0", "vllm>=0.5.0", ] [build-system] requires = ["hatchling"] build-backend = "hatchling.build" [tool.hatch.build.targets.wheel] packages = ["src/prolewiki_llm"] # ============================================================================= # PYTEST # ============================================================================= [tool.pytest.ini_options] pythonpath = ["src"] testpaths = ["tests"] asyncio_mode = "auto" asyncio_default_fixture_loop_scope = "function" markers = [ "unit: Fast unit tests for isolated components (no I/O)", "integration: Integration tests for shell scripts and Docker behavior", "slow: Tests that take significant time (NLI, embedding)", "gpu: Tests requiring GPU (training)", ] # ============================================================================= # RUFF (Linting + Formatting + Import Sorting) # ============================================================================= [tool.ruff] line-length = 100 target-version = "py312" src = ["src"] [tool.ruff.lint] select = [ "E", # pycodestyle errors "W", # pycodestyle warnings "F", # pyflakes "I", # isort (import sorting) "B", # flake8-bugbear "C4", # flake8-comprehensions "UP", # pyupgrade "ARG", # unused arguments "SIM", # flake8-simplify "RUF", # ruff-specific ] ignore = [ "E501", # line length handled by formatter "RUF001", # ambiguous unicode (we use Cyrillic/Chinese text) ] [tool.ruff.lint.per-file-ignores] "tests/**/*.py" = [ "ARG001", # unused mock function arguments are common in tests "ARG002", # unused method arguments (fixtures) are common in tests "SIM117", # nested with statements for multiple mocks are clearer ] "src/prolewiki_llm/**/*.py" = [ "ARG001", # **kwargs required by GRPOTrainer reward function interface ] [tool.ruff.lint.isort] known-first-party = ["prolewiki_llm"] [tool.ruff.format] quote-style = "double" indent-style = "space" # ============================================================================= # MYPY (Type Checking) # ============================================================================= [tool.mypy] python_version = "3.12" strict = true mypy_path = ["src"] explicit_package_bases = true warn_return_any = true warn_unused_configs = true disallow_untyped_defs = true disallow_incomplete_defs = true check_untyped_defs = true no_implicit_optional = true warn_redundant_casts = true warn_unused_ignores = true show_error_codes = true exclude = ["tests/", "build/", "dist/"] [[tool.mypy.overrides]] module = [ "transformers", "transformers.*", "sentence_transformers.*", "spacy", "spacy.*", "torch", "torch.*", "wandb", "wandb.*", "datasets", "datasets.*", "trl", "trl.*", "unsloth", "unsloth.*", "vllm", "vllm.*", "peft", "peft.*", ] ignore_missing_imports = true # ============================================================================= # COVERAGE # ============================================================================= [tool.coverage.run] source = ["src/prolewiki_llm"] branch = true omit = ["*/tests/*"] [tool.coverage.report] exclude_lines = [ "pragma: no cover", "if TYPE_CHECKING:", "raise NotImplementedError", ]