dataforge-playground / pyproject.toml
Praneshrajan15's picture
Deploy DataForge playground API
791c076 verified
[project]
name = "dataforge15"
version = "0.1.0rc1"
description = "DataForge15: CLI-first data-quality detection and reversible repair for tabular data."
readme = "README.md"
license = "Apache-2.0"
requires-python = ">=3.11,<3.13"
keywords = ["data-quality", "ai-agent", "llm", "rl", "smt", "dbt"]
classifiers = [
"Development Status :: 3 - Alpha",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
]
dependencies = [
"pydantic>=2.7",
"typer>=0.24,<0.25",
"rich>=13.7",
"textual>=8.2,<9",
"z3-solver>=4.13",
"pyyaml>=6.0",
]
[project.optional-dependencies]
bench = [
"pandas>=2.2",
"httpx>=0.27",
"tenacity>=8.3",
"python-dotenv>=1.0",
"pyarrow>=16.0",
]
causal = [
"pandas>=2.2",
"numpy>=1.26",
"networkx>=3.3",
"causal-learn>=0.1.4",
"hyppo>=0.5.2",
"scipy>=1.13",
]
dev = [
"pytest>=9.0.3",
"pytest-cov>=5.0",
"pytest-benchmark>=4.0",
"pytest-xdist>=3.6",
"hypothesis>=6.100",
"mutmut>=3.5",
"build>=1.2",
"pip-audit>=2.10,<3",
"cyclonedx-bom>=7.3,<8",
"cryptography>=46.0.7",
"idna>=3.15",
"pip>=26.1.1",
"urllib3>=2.7",
"ruff>=0.11",
"mypy>=1.10",
"pandas-stubs>=2.2",
"types-PyYAML",
"huggingface_hub==1.13.0",
"httpx>=0.27",
"tenacity>=8.3",
"python-dotenv>=1.0",
"pyarrow>=16.0",
"networkx>=3.3",
"causal-learn>=0.1.4",
"hyppo>=0.5.2",
"scipy>=1.13",
"sqlglot>=25.0",
"duckdb>=1.0",
]
train = [
"trl==1.4.0",
"transformers==5.7.0",
"accelerate==1.13.0",
"peft==0.19.1",
"bitsandbytes==0.49.2",
"datasets==4.8.5",
"huggingface_hub==1.13.0",
"pyyaml==6.0.3",
"pandas==2.3.3",
"tensorboard==2.20.0",
]
eval = [
"matplotlib>=3.9",
"seaborn>=0.13",
]
providers = [
"httpx>=0.27",
"tenacity>=8.3",
"python-dotenv>=1.0",
]
pandas = [
"pandas>=2.2",
]
playground = [
"pandas>=2.2",
"fastapi>=0.136.1",
"starlette>=1.0.1,<2",
"uvicorn[standard]>=0.35",
"python-multipart>=0.0.27",
"slowapi>=0.1.9",
]
openenv = [
"pandas>=2.2",
"openenv-core[core]>=0.2.2",
"authlib>=1.7.1,!=1.7.0",
"cryptography>=46.0.7",
"duckdb>=1.0",
"sqlglot>=25.0",
"scipy>=1.13",
"networkx>=3.3",
"causal-learn>=0.1.4",
"hyppo>=0.5.2",
]
all = [
"dataforge15[bench,causal,dev,eval,pandas,playground,providers,train,openenv]",
]
[project.scripts]
dataforge15 = "dataforge.cli:app"
dataforge = "dataforge.cli:app"
[build-system]
requires = ["setuptools>=68", "wheel"]
build-backend = "setuptools.build_meta"
[tool.setuptools.packages.find]
where = ["."]
include = ["dataforge", "dataforge.*"]
exclude = ["data_quality_env", "data_quality_env.*"]
[tool.setuptools.package-data]
dataforge = [
"py.typed",
"fixtures/*.csv",
"fixtures/*.yaml",
"datasets/embedded/**/*.csv",
"safety/constitutions/*.yaml",
"safety/adversarial/*.yaml",
]
[tool.ruff]
line-length = 100
target-version = "py311"
extend-exclude = [".hf-space-repo", ".hf-space-stage", ".hf-space-stage-plan"]
[tool.ruff.lint]
select = ["E", "F", "W", "I", "N", "UP", "B", "A", "C4", "PIE", "RET", "SIM"]
ignore = ["E501"]
[tool.ruff.lint.per-file-ignores]
"data_quality_env/**/*.py" = ["B007", "B027", "E402", "E731", "F401", "F541", "F841", "I001", "N", "RET", "SIM", "UP"]
"training/kaggle/sft_warmup_kaggle.ipynb" = ["E402"]
[tool.ruff.format]
quote-style = "double"
indent-style = "space"
line-ending = "auto"
[tool.mypy]
strict = true
python_version = "3.11"
warn_unused_configs = true
warn_redundant_casts = true
warn_unused_ignores = true
disallow_untyped_defs = true
explicit_package_bases = true
exclude = [
"^\\.hf-space-repo/",
"^\\.hf-space-stage/",
"^\\.hf-space-stage-plan/",
"^[^/]*\\.py$", # loose root-level scripts (hackathon legacy)
"^(training|playground|benchmark_results|datasets)/",
]
[tool.pytest.ini_options]
minversion = "8.0"
addopts = "-ra --strict-markers --strict-config"
testpaths = ["tests"]
pythonpath = ["."]
markers = [
"slow: marks tests as slow",
"integration: marks tests as integration tests",
"requires_network: tests that need internet access",
"requires_llm: tests that call a free-tier LLM API",
]
[tool.coverage.run]
source = ["dataforge"]
branch = true
[tool.coverage.report]
fail_under = 90
exclude_lines = [
"pragma: no cover",
"raise NotImplementedError",
"if TYPE_CHECKING:",
]