llm-training / pyproject.toml
percyraskova's picture
Upload folder using huggingface_hub
81b3473 verified
[project]
name = "prolewiki-llm"
version = "0.1.0"
description = "GRPO fine-tuning and reward functions for Marxist-Leninist language models"
readme = "README.md"
license = { text = "AGPL-3.0-only" }
requires-python = ">=3.12"
dependencies = [
# Core ML
"transformers>=4.40.0",
"sentence-transformers>=3.0.0",
"torch>=2.0.0",
# NLP
"spacy>=3.8.0",
# Logging
"wandb>=0.17.0",
# Data
"pydantic>=2.0.0",
]
[dependency-groups]
dev = [
# Testing
"pytest>=8.0.0",
"pytest-asyncio>=0.24.0",
"pytest-cov>=6.0.0",
"pytest-mock>=3.14.0",
# Code Quality
"mypy>=1.13.0",
"ruff>=0.8.0",
"pre-commit>=4.0.0",
# Type Stubs
"types-PyYAML>=6.0.0",
]
training = [
# GRPO training (GPU required)
"unsloth>=2024.8",
"trl>=0.9.0",
"peft>=0.12.0",
"bitsandbytes>=0.43.0",
"datasets>=2.20.0",
"vllm>=0.5.0",
]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.hatch.build.targets.wheel]
packages = ["src/prolewiki_llm"]
# =============================================================================
# PYTEST
# =============================================================================
[tool.pytest.ini_options]
pythonpath = ["src"]
testpaths = ["tests"]
asyncio_mode = "auto"
asyncio_default_fixture_loop_scope = "function"
markers = [
"unit: Fast unit tests for isolated components (no I/O)",
"integration: Integration tests for shell scripts and Docker behavior",
"slow: Tests that take significant time (NLI, embedding)",
"gpu: Tests requiring GPU (training)",
]
# =============================================================================
# RUFF (Linting + Formatting + Import Sorting)
# =============================================================================
[tool.ruff]
line-length = 100
target-version = "py312"
src = ["src"]
[tool.ruff.lint]
select = [
"E", # pycodestyle errors
"W", # pycodestyle warnings
"F", # pyflakes
"I", # isort (import sorting)
"B", # flake8-bugbear
"C4", # flake8-comprehensions
"UP", # pyupgrade
"ARG", # unused arguments
"SIM", # flake8-simplify
"RUF", # ruff-specific
]
ignore = [
"E501", # line length handled by formatter
"RUF001", # ambiguous unicode (we use Cyrillic/Chinese text)
]
[tool.ruff.lint.per-file-ignores]
"tests/**/*.py" = [
"ARG001", # unused mock function arguments are common in tests
"ARG002", # unused method arguments (fixtures) are common in tests
"SIM117", # nested with statements for multiple mocks are clearer
]
"src/prolewiki_llm/**/*.py" = [
"ARG001", # **kwargs required by GRPOTrainer reward function interface
]
[tool.ruff.lint.isort]
known-first-party = ["prolewiki_llm"]
[tool.ruff.format]
quote-style = "double"
indent-style = "space"
# =============================================================================
# MYPY (Type Checking)
# =============================================================================
[tool.mypy]
python_version = "3.12"
strict = true
mypy_path = ["src"]
explicit_package_bases = true
warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = true
disallow_incomplete_defs = true
check_untyped_defs = true
no_implicit_optional = true
warn_redundant_casts = true
warn_unused_ignores = true
show_error_codes = true
exclude = ["tests/", "build/", "dist/"]
[[tool.mypy.overrides]]
module = [
"transformers",
"transformers.*",
"sentence_transformers.*",
"spacy",
"spacy.*",
"torch",
"torch.*",
"wandb",
"wandb.*",
"datasets",
"datasets.*",
"trl",
"trl.*",
"unsloth",
"unsloth.*",
"vllm",
"vllm.*",
"peft",
"peft.*",
]
ignore_missing_imports = true
# =============================================================================
# COVERAGE
# =============================================================================
[tool.coverage.run]
source = ["src/prolewiki_llm"]
branch = true
omit = ["*/tests/*"]
[tool.coverage.report]
exclude_lines = [
"pragma: no cover",
"if TYPE_CHECKING:",
"raise NotImplementedError",
]