File size: 3,406 Bytes

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[project]
name = "BioRLHF"
version = "0.2.0"
description = "Biological Reinforcement Learning from Human Feedback - Fine-tuning LLMs for biological reasoning with verifier-based GRPO and calibrated uncertainty"
readme = "README.md"
license = "MIT"
requires-python = ">=3.9"
authors = [
    { name = "JangKeun Kim", email = "jangkeun.kim@med.cornell.edu" },
]
keywords = [
    "machine-learning",
    "llm",
    "fine-tuning",
    "biology",
    "transcriptomics",
    "rlhf",
    "dpo",
    "grpo",
    "verifiers",
    "spaceflight",
    "ai-safety",
    "uncertainty-calibration",
]
classifiers = [
    "Development Status :: 4 - Beta",
    "Intended Audience :: Science/Research",
    "License :: OSI Approved :: MIT License",
    "Operating System :: OS Independent",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3.9",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
    "Topic :: Scientific/Engineering :: Artificial Intelligence",
    "Topic :: Scientific/Engineering :: Bio-Informatics",
]
dependencies = [
    "torch>=2.0.0",
    "transformers>=4.36.0",
    "datasets>=2.14.0",
    "accelerate>=0.24.0",
    "peft>=0.6.0",
    "trl>=0.14.0",
    "bitsandbytes>=0.41.0",
    "wandb>=0.15.0",
    "pandas>=2.0.0",
    "numpy>=1.24.0",
    "scipy>=1.10.0",
    "scikit-learn>=1.3.0",
    "tqdm>=4.65.0",
    "jsonlines>=3.1.0",
]

[project.optional-dependencies]
dev = [
    "pytest>=7.0.0",
    "pytest-cov>=4.0.0",
    "black>=23.0.0",
    "ruff>=0.1.0",
    "mypy>=1.0.0",
    "pre-commit>=3.0.0",
]
flash-attn = [
    "flash-attn>=2.0.0",
]

[project.urls]
Homepage = "https://github.com/jang1563/BioRLHF"
Documentation = "https://github.com/jang1563/BioRLHF#readme"
Repository = "https://github.com/jang1563/BioRLHF"
Issues = "https://github.com/jang1563/BioRLHF/issues"

[project.scripts]
biorlhf-train = "biorlhf.cli:train"
biorlhf-evaluate = "biorlhf.cli:evaluate"
biorlhf-grpo = "biorlhf.cli:grpo_train"

[tool.hatch.build.targets.sdist]
include = [
    "/src",
    "/data",
]

[tool.hatch.build.targets.wheel]
packages = ["src/biorlhf"]

[tool.black]
line-length = 88
target-version = ["py39", "py310", "py311", "py312"]
include = '\.pyi?$'
exclude = '''
/(
    \.git
    | \.hg
    | \.mypy_cache
    | \.tox
    | \.venv
    | _build
    | buck-out
    | build
    | dist
    | wandb
)/
'''

[tool.ruff]
line-length = 88
target-version = "py39"
select = [
    "E",   # pycodestyle errors
    "W",   # pycodestyle warnings
    "F",   # pyflakes
    "I",   # isort
    "B",   # flake8-bugbear
    "C4",  # flake8-comprehensions
    "UP",  # pyupgrade
]
ignore = [
    "E501",  # line too long (handled by black)
    "B008",  # do not perform function calls in argument defaults
]

[tool.ruff.isort]
known-first-party = ["biorlhf"]

[tool.mypy]
python_version = "3.9"
warn_return_any = true
warn_unused_configs = true
ignore_missing_imports = true

[tool.pytest.ini_options]
testpaths = ["tests"]
python_files = ["test_*.py"]
addopts = "-v --cov=biorlhf --cov-report=term-missing"

[tool.coverage.run]
source = ["src/biorlhf"]
branch = true

[tool.coverage.report]
exclude_lines = [
    "pragma: no cover",
    "def __repr__",
    "raise AssertionError",
    "raise NotImplementedError",
]