| [project] |
| name = "dataforge15" |
| version = "0.1.0rc1" |
| description = "DataForge15: CLI-first data-quality detection and reversible repair for tabular data." |
| readme = "README.md" |
| license = "Apache-2.0" |
| requires-python = ">=3.11,<3.13" |
| keywords = ["data-quality", "ai-agent", "llm", "rl", "smt", "dbt"] |
| classifiers = [ |
| "Development Status :: 3 - Alpha", |
| "Programming Language :: Python :: 3.11", |
| "Programming Language :: Python :: 3.12", |
| ] |
| dependencies = [ |
| "pydantic>=2.7", |
| "typer>=0.24,<0.25", |
| "rich>=13.7", |
| "textual>=8.2,<9", |
| "z3-solver>=4.13", |
| "pyyaml>=6.0", |
| ] |
|
|
| [project.optional-dependencies] |
| bench = [ |
| "pandas>=2.2", |
| "httpx>=0.27", |
| "tenacity>=8.3", |
| "python-dotenv>=1.0", |
| "pyarrow>=16.0", |
| ] |
| causal = [ |
| "pandas>=2.2", |
| "numpy>=1.26", |
| "networkx>=3.3", |
| "causal-learn>=0.1.4", |
| "hyppo>=0.5.2", |
| "scipy>=1.13", |
| ] |
| dev = [ |
| "pytest>=9.0.3", |
| "pytest-cov>=5.0", |
| "pytest-benchmark>=4.0", |
| "pytest-xdist>=3.6", |
| "hypothesis>=6.100", |
| "mutmut>=3.5", |
| "build>=1.2", |
| "pip-audit>=2.10,<3", |
| "cyclonedx-bom>=7.3,<8", |
| "cryptography>=46.0.7", |
| "idna>=3.15", |
| "pip>=26.1.1", |
| "urllib3>=2.7", |
| "ruff>=0.11", |
| "mypy>=1.10", |
| "pandas-stubs>=2.2", |
| "types-PyYAML", |
| "huggingface_hub==1.13.0", |
| "httpx>=0.27", |
| "tenacity>=8.3", |
| "python-dotenv>=1.0", |
| "pyarrow>=16.0", |
| "networkx>=3.3", |
| "causal-learn>=0.1.4", |
| "hyppo>=0.5.2", |
| "scipy>=1.13", |
| "sqlglot>=25.0", |
| "duckdb>=1.0", |
| ] |
| train = [ |
| "trl==1.4.0", |
| "transformers==5.7.0", |
| "accelerate==1.13.0", |
| "peft==0.19.1", |
| "bitsandbytes==0.49.2", |
| "datasets==4.8.5", |
| "huggingface_hub==1.13.0", |
| "pyyaml==6.0.3", |
| "pandas==2.3.3", |
| "tensorboard==2.20.0", |
| ] |
| eval = [ |
| "matplotlib>=3.9", |
| "seaborn>=0.13", |
| ] |
| providers = [ |
| "httpx>=0.27", |
| "tenacity>=8.3", |
| "python-dotenv>=1.0", |
| ] |
| pandas = [ |
| "pandas>=2.2", |
| ] |
| playground = [ |
| "pandas>=2.2", |
| "fastapi>=0.136.1", |
| "starlette>=1.0.1,<2", |
| "uvicorn[standard]>=0.35", |
| "python-multipart>=0.0.27", |
| "slowapi>=0.1.9", |
| ] |
| openenv = [ |
| "pandas>=2.2", |
| "openenv-core[core]>=0.2.2", |
| "authlib>=1.7.1,!=1.7.0", |
| "cryptography>=46.0.7", |
| "duckdb>=1.0", |
| "sqlglot>=25.0", |
| "scipy>=1.13", |
| "networkx>=3.3", |
| "causal-learn>=0.1.4", |
| "hyppo>=0.5.2", |
| ] |
| all = [ |
| "dataforge15[bench,causal,dev,eval,pandas,playground,providers,train,openenv]", |
| ] |
|
|
| [project.scripts] |
| dataforge15 = "dataforge.cli:app" |
| dataforge = "dataforge.cli:app" |
|
|
| [build-system] |
| requires = ["setuptools>=68", "wheel"] |
| build-backend = "setuptools.build_meta" |
|
|
| [tool.setuptools.packages.find] |
| where = ["."] |
| include = ["dataforge", "dataforge.*"] |
| exclude = ["data_quality_env", "data_quality_env.*"] |
|
|
| [tool.setuptools.package-data] |
| dataforge = [ |
| "py.typed", |
| "fixtures/*.csv", |
| "fixtures/*.yaml", |
| "datasets/embedded/**/*.csv", |
| "safety/constitutions/*.yaml", |
| "safety/adversarial/*.yaml", |
| ] |
|
|
| [tool.ruff] |
| line-length = 100 |
| target-version = "py311" |
| extend-exclude = [".hf-space-repo", ".hf-space-stage", ".hf-space-stage-plan"] |
|
|
| [tool.ruff.lint] |
| select = ["E", "F", "W", "I", "N", "UP", "B", "A", "C4", "PIE", "RET", "SIM"] |
| ignore = ["E501"] |
|
|
| [tool.ruff.lint.per-file-ignores] |
| "data_quality_env/**/*.py" = ["B007", "B027", "E402", "E731", "F401", "F541", "F841", "I001", "N", "RET", "SIM", "UP"] |
| "training/kaggle/sft_warmup_kaggle.ipynb" = ["E402"] |
|
|
| [tool.ruff.format] |
| quote-style = "double" |
| indent-style = "space" |
| line-ending = "auto" |
|
|
| [tool.mypy] |
| strict = true |
| python_version = "3.11" |
| warn_unused_configs = true |
| warn_redundant_casts = true |
| warn_unused_ignores = true |
| disallow_untyped_defs = true |
| explicit_package_bases = true |
| exclude = [ |
| "^\\.hf-space-repo/", |
| "^\\.hf-space-stage/", |
| "^\\.hf-space-stage-plan/", |
| "^[^/]*\\.py$", |
| "^(training|playground|benchmark_results|datasets)/", |
| ] |
|
|
| [tool.pytest.ini_options] |
| minversion = "8.0" |
| addopts = "-ra --strict-markers --strict-config" |
| testpaths = ["tests"] |
| pythonpath = ["."] |
| markers = [ |
| "slow: marks tests as slow", |
| "integration: marks tests as integration tests", |
| "requires_network: tests that need internet access", |
| "requires_llm: tests that call a free-tier LLM API", |
| ] |
|
|
| [tool.coverage.run] |
| source = ["dataforge"] |
| branch = true |
|
|
| [tool.coverage.report] |
| fail_under = 90 |
| exclude_lines = [ |
| "pragma: no cover", |
| "raise NotImplementedError", |
| "if TYPE_CHECKING:", |
| ] |
|
|