"""FastAPI application for the CleanOps data-cleaning environment.""" from __future__ import annotations import copy import random from openenv.core import create_app from fastapi.responses import HTMLResponse, JSONResponse from cleanops_env.environment import CleanOpsEnvironment from cleanops_env.models import DataCleaningAction, DataCleaningObservation from cleanops_env.tasks import first_table_name, get_task_spec, sorted_rows app = create_app( CleanOpsEnvironment, DataCleaningAction, DataCleaningObservation, env_name="cleanops_env", max_concurrent_envs=4, ) @app.get("/demo/compare", include_in_schema=False) def demo_compare(task_id: str = "customer_contacts_easy", table_name: str | None = None, seed: int | None = None) -> JSONResponse: task_spec = get_task_spec(task_id) selected_table = table_name if table_name in task_spec.dirty_tables else first_table_name(task_spec) primary_key = task_spec.primary_keys[selected_table] before_rows = _seed_preview_rows(task_spec.dirty_tables[selected_table], primary_key, selected_table, seed) after_rows = _seed_preview_rows(task_spec.gold_tables[selected_table], primary_key, selected_table, seed) columns = sorted({column_name for row in before_rows + after_rows for column_name in row}) return JSONResponse( { "task_id": task_spec.task_id, "task_title": task_spec.title, "table_name": selected_table, "requested_seed": seed, "available_tables": list(task_spec.dirty_tables.keys()), "columns": columns, "before_rows": before_rows[:4], "after_rows": after_rows[:4], "before_row_count": len(before_rows), "after_row_count": len(after_rows), "solution_operation_ids": list(task_spec.solution_operation_ids), } ) def _seed_preview_rows( rows: list[dict[str, str]], primary_key: str, table_name: str, seed: int | None, ) -> list[dict[str, str]]: ordered_rows = sorted_rows(rows, primary_key) if seed is None or len(ordered_rows) <= 1: return ordered_rows shuffled_rows = copy.deepcopy(ordered_rows) random.Random(max(0, int(seed)) + sum(ord(char) for char in table_name)).shuffle(shuffled_rows) return shuffled_rows @app.get("/", include_in_schema=False) def root() -> HTMLResponse: return HTMLResponse( """
CleanOps simulates the kind of operational cleanup analysts actually do before data reaches a CRM, warehouse, or billing system. The UI below runs the same hosted benchmark API used by the evaluator.
This homepage is a thin demo over the live environment. It doesn’t fake results: every task button calls the deployed API.
The cards and table below are populated from a real
POST /reset response. Use the task buttons above to
switch between benchmark scenarios, or choose your own task and seed.
Loading live task data...
The evaluator checks these endpoints directly. This page exists to make the environment easier to inspect visually.
live
live
live
curl -X POST /reset -H "Content-Type: application/json" -d '{"task_id":"customer_contacts_easy","seed":7}'