| [project] |
| name = "metrollm-bench" |
| version = "0.1.0" |
| description = "Benchmark for evaluating LLMs as transit kiosk intelligence" |
| requires-python = ">=3.12" |
| dependencies = [ |
| "networkx>=3.4", |
| "httpx>=0.28", |
| "fastapi>=0.115", |
| "uvicorn>=0.34", |
| "openai>=1.60", |
| "pyyaml>=6.0", |
| "pydantic>=2.10", |
| "anthropic>=0.84.0", |
| "python-dotenv>=1.2.2", |
| ] |
|
|
| [dependency-groups] |
| dev = ["pytest>=8.0"] |
|
|
| [project.scripts] |
| mock-server = "harness.mock_server:main" |
| run-bench = "harness.runner:main" |
| score-bench = "harness.scorer:main" |
| generate-cases = "cases.generator:main" |
| build-dashboard = "dashboard.build_data:main" |
|
|