Spaces:

Praneshrajan15
/

dataforge-playground

Running

App Files Files Community

dataforge-playground / pyproject.toml

Praneshrajan15

Deploy DataForge playground API

791c076 verified 4 days ago

raw

history blame contribute delete

4.51 kB

	[project]
	name = "dataforge15"
	version = "0.1.0rc1"
	description = "DataForge15: CLI-first data-quality detection and reversible repair for tabular data."
	readme = "README.md"
	license = "Apache-2.0"
	requires-python = ">=3.11,<3.13"
	keywords = ["data-quality", "ai-agent", "llm", "rl", "smt", "dbt"]
	classifiers = [
	"Development Status :: 3 - Alpha",
	"Programming Language :: Python :: 3.11",
	"Programming Language :: Python :: 3.12",
	]
	dependencies = [
	"pydantic>=2.7",
	"typer>=0.24,<0.25",
	"rich>=13.7",
	"textual>=8.2,<9",
	"z3-solver>=4.13",
	"pyyaml>=6.0",
	]

	[project.optional-dependencies]
	bench = [
	"pandas>=2.2",
	"httpx>=0.27",
	"tenacity>=8.3",
	"python-dotenv>=1.0",
	"pyarrow>=16.0",
	]
	causal = [
	"pandas>=2.2",
	"numpy>=1.26",
	"networkx>=3.3",
	"causal-learn>=0.1.4",
	"hyppo>=0.5.2",
	"scipy>=1.13",
	]
	dev = [
	"pytest>=9.0.3",
	"pytest-cov>=5.0",
	"pytest-benchmark>=4.0",
	"pytest-xdist>=3.6",
	"hypothesis>=6.100",
	"mutmut>=3.5",
	"build>=1.2",
	"pip-audit>=2.10,<3",
	"cyclonedx-bom>=7.3,<8",
	"cryptography>=46.0.7",
	"idna>=3.15",
	"pip>=26.1.1",
	"urllib3>=2.7",
	"ruff>=0.11",
	"mypy>=1.10",
	"pandas-stubs>=2.2",
	"types-PyYAML",
	"huggingface_hub==1.13.0",
	"httpx>=0.27",
	"tenacity>=8.3",
	"python-dotenv>=1.0",
	"pyarrow>=16.0",
	"networkx>=3.3",
	"causal-learn>=0.1.4",
	"hyppo>=0.5.2",
	"scipy>=1.13",
	"sqlglot>=25.0",
	"duckdb>=1.0",
	]
	train = [
	"trl==1.4.0",
	"transformers==5.7.0",
	"accelerate==1.13.0",
	"peft==0.19.1",
	"bitsandbytes==0.49.2",
	"datasets==4.8.5",
	"huggingface_hub==1.13.0",
	"pyyaml==6.0.3",
	"pandas==2.3.3",
	"tensorboard==2.20.0",
	]
	eval = [
	"matplotlib>=3.9",
	"seaborn>=0.13",
	]
	providers = [
	"httpx>=0.27",
	"tenacity>=8.3",
	"python-dotenv>=1.0",
	]
	pandas = [
	"pandas>=2.2",
	]
	playground = [
	"pandas>=2.2",
	"fastapi>=0.136.1",
	"starlette>=1.0.1,<2",
	"uvicorn[standard]>=0.35",
	"python-multipart>=0.0.27",
	"slowapi>=0.1.9",
	]
	openenv = [
	"pandas>=2.2",
	"openenv-core[core]>=0.2.2",
	"authlib>=1.7.1,!=1.7.0",
	"cryptography>=46.0.7",
	"duckdb>=1.0",
	"sqlglot>=25.0",
	"scipy>=1.13",
	"networkx>=3.3",
	"causal-learn>=0.1.4",
	"hyppo>=0.5.2",
	]
	all = [
	"dataforge15[bench,causal,dev,eval,pandas,playground,providers,train,openenv]",
	]

	[project.scripts]
	dataforge15 = "dataforge.cli:app"
	dataforge = "dataforge.cli:app"

	[build-system]
	requires = ["setuptools>=68", "wheel"]
	build-backend = "setuptools.build_meta"

	[tool.setuptools.packages.find]
	where = ["."]
	include = ["dataforge", "dataforge.*"]
	exclude = ["data_quality_env", "data_quality_env.*"]

	[tool.setuptools.package-data]
	dataforge = [
	"py.typed",
	"fixtures/*.csv",
	"fixtures/*.yaml",
	"datasets/embedded/*/.csv",
	"safety/constitutions/*.yaml",
	"safety/adversarial/*.yaml",
	]

	[tool.ruff]
	line-length = 100
	target-version = "py311"
	extend-exclude = [".hf-space-repo", ".hf-space-stage", ".hf-space-stage-plan"]

	[tool.ruff.lint]
	select = ["E", "F", "W", "I", "N", "UP", "B", "A", "C4", "PIE", "RET", "SIM"]
	ignore = ["E501"]

	[tool.ruff.lint.per-file-ignores]
	"data_quality_env/*/.py" = ["B007", "B027", "E402", "E731", "F401", "F541", "F841", "I001", "N", "RET", "SIM", "UP"]
	"training/kaggle/sft_warmup_kaggle.ipynb" = ["E402"]

	[tool.ruff.format]
	quote-style = "double"
	indent-style = "space"
	line-ending = "auto"

	[tool.mypy]
	strict = true
	python_version = "3.11"
	warn_unused_configs = true
	warn_redundant_casts = true
	warn_unused_ignores = true
	disallow_untyped_defs = true
	explicit_package_bases = true
	exclude = [
	"^\\.hf-space-repo/",
	"^\\.hf-space-stage/",
	"^\\.hf-space-stage-plan/",
	"^[^/]*\\.py$", # loose root-level scripts (hackathon legacy)
	"^(training\|playground\|benchmark_results\|datasets)/",
	]

	[tool.pytest.ini_options]
	minversion = "8.0"
	addopts = "-ra --strict-markers --strict-config"
	testpaths = ["tests"]
	pythonpath = ["."]
	markers = [
	"slow: marks tests as slow",
	"integration: marks tests as integration tests",
	"requires_network: tests that need internet access",
	"requires_llm: tests that call a free-tier LLM API",
	]

	[tool.coverage.run]
	source = ["dataforge"]
	branch = true

	[tool.coverage.report]
	fail_under = 90
	exclude_lines = [
	"pragma: no cover",
	"raise NotImplementedError",
	"if TYPE_CHECKING:",
	]