Spaces:
Sleeping
Sleeping
Delete backend
Browse files- backend/__init__.py +0 -0
- backend/app/__init__.py +0 -0
- backend/app/api/__init__.py +0 -0
- backend/app/api/compare.py +0 -35
- backend/app/api/datasets.py +0 -89
- backend/app/api/experiments.py +0 -140
- backend/app/api/exports.py +0 -53
- backend/app/api/health.py +0 -7
- backend/app/api/presets.py +0 -13
- backend/app/api/runs.py +0 -29
- backend/app/db.py +0 -34
- backend/app/main.py +0 -45
- backend/app/models/__init__.py +0 -0
- backend/app/models/dataset.py +0 -14
- backend/app/models/experiment.py +0 -16
- backend/app/repositories/__init__.py +0 -0
- backend/app/repositories/dataset_repo.py +0 -35
- backend/app/repositories/experiment_repo.py +0 -45
- backend/app/services/__init__.py +0 -0
- backend/app/services/profiling_service.py +0 -23
- backend/app/utils/__init__.py +0 -0
- backend/app/utils/ids.py +0 -7
backend/__init__.py
DELETED
|
File without changes
|
backend/app/__init__.py
DELETED
|
File without changes
|
backend/app/api/__init__.py
DELETED
|
File without changes
|
backend/app/api/compare.py
DELETED
|
@@ -1,35 +0,0 @@
|
|
| 1 |
-
import json
|
| 2 |
-
|
| 3 |
-
from fastapi import APIRouter, Depends, Query
|
| 4 |
-
from sqlalchemy.orm import Session
|
| 5 |
-
|
| 6 |
-
from backend.app.db import get_db
|
| 7 |
-
from backend.app.repositories.experiment_repo import get_experiments_by_ids, list_experiments
|
| 8 |
-
|
| 9 |
-
router = APIRouter(tags=["compare"])
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
@router.get("/compare")
|
| 13 |
-
def compare_experiments(experiment_ids: str | None = Query(default=None), db: Session = Depends(get_db)):
|
| 14 |
-
if experiment_ids:
|
| 15 |
-
ids = [x.strip() for x in experiment_ids.split(",") if x.strip()]
|
| 16 |
-
experiments = get_experiments_by_ids(db, ids)
|
| 17 |
-
else:
|
| 18 |
-
experiments = list_experiments(db)
|
| 19 |
-
|
| 20 |
-
return {
|
| 21 |
-
"experiments": [
|
| 22 |
-
{
|
| 23 |
-
"experiment_id": exp.id,
|
| 24 |
-
"dataset_id": exp.dataset_id,
|
| 25 |
-
"algorithm": exp.algorithm,
|
| 26 |
-
"status": exp.status,
|
| 27 |
-
"config": json.loads(exp.config_json) if exp.config_json else {},
|
| 28 |
-
"metrics": json.loads(exp.metrics_json) if exp.metrics_json else {},
|
| 29 |
-
"summary": json.loads(exp.summary_json) if exp.summary_json else {},
|
| 30 |
-
"runtime_ms": exp.runtime_ms,
|
| 31 |
-
"error_message": exp.error_message,
|
| 32 |
-
}
|
| 33 |
-
for exp in experiments
|
| 34 |
-
]
|
| 35 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/api/datasets.py
DELETED
|
@@ -1,89 +0,0 @@
|
|
| 1 |
-
import json
|
| 2 |
-
from pathlib import Path
|
| 3 |
-
|
| 4 |
-
import pandas as pd
|
| 5 |
-
from fastapi import APIRouter, Depends, File, HTTPException, UploadFile
|
| 6 |
-
from sqlalchemy.orm import Session
|
| 7 |
-
|
| 8 |
-
from backend.app.db import get_db
|
| 9 |
-
from backend.app.repositories.dataset_repo import create_dataset, get_dataset, list_datasets
|
| 10 |
-
from backend.app.services.profiling_service import profile_dataframe
|
| 11 |
-
from backend.app.utils.ids import make_dataset_id
|
| 12 |
-
|
| 13 |
-
router = APIRouter(tags=["datasets"])
|
| 14 |
-
|
| 15 |
-
UPLOAD_DIR = Path("/data/uploads")
|
| 16 |
-
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
@router.get("/datasets")
|
| 20 |
-
def datasets_list(db: Session = Depends(get_db)):
|
| 21 |
-
datasets = list_datasets(db)
|
| 22 |
-
return {
|
| 23 |
-
"datasets": [
|
| 24 |
-
{
|
| 25 |
-
"id": d.id,
|
| 26 |
-
"name": d.name,
|
| 27 |
-
"row_count": d.row_count,
|
| 28 |
-
"column_count": d.column_count,
|
| 29 |
-
}
|
| 30 |
-
for d in datasets
|
| 31 |
-
]
|
| 32 |
-
}
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
@router.post("/datasets/upload")
|
| 36 |
-
async def upload_dataset(file: UploadFile = File(...), db: Session = Depends(get_db)):
|
| 37 |
-
if not file.filename:
|
| 38 |
-
raise HTTPException(status_code=400, detail="Missing file name")
|
| 39 |
-
|
| 40 |
-
suffix = Path(file.filename).suffix.lower()
|
| 41 |
-
if suffix not in {".csv", ".xlsx", ".xls"}:
|
| 42 |
-
raise HTTPException(status_code=400, detail="Only CSV and Excel files are supported")
|
| 43 |
-
|
| 44 |
-
dataset_id = make_dataset_id()
|
| 45 |
-
path = UPLOAD_DIR / f"{dataset_id}{suffix}"
|
| 46 |
-
|
| 47 |
-
content = await file.read()
|
| 48 |
-
path.write_bytes(content)
|
| 49 |
-
|
| 50 |
-
if suffix == ".csv":
|
| 51 |
-
df = pd.read_csv(path)
|
| 52 |
-
else:
|
| 53 |
-
df = pd.read_excel(path)
|
| 54 |
-
|
| 55 |
-
profile = profile_dataframe(df)
|
| 56 |
-
|
| 57 |
-
create_dataset(
|
| 58 |
-
db=db,
|
| 59 |
-
id=dataset_id,
|
| 60 |
-
name=file.filename,
|
| 61 |
-
file_path=str(path),
|
| 62 |
-
row_count=int(len(df)),
|
| 63 |
-
column_count=int(len(df.columns)),
|
| 64 |
-
schema_json=json.dumps({"columns": list(df.columns)}),
|
| 65 |
-
profile_json=json.dumps(profile),
|
| 66 |
-
)
|
| 67 |
-
|
| 68 |
-
return {
|
| 69 |
-
"dataset_id": dataset_id,
|
| 70 |
-
"name": file.filename,
|
| 71 |
-
"row_count": int(len(df)),
|
| 72 |
-
"column_count": int(len(df.columns)),
|
| 73 |
-
}
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
@router.get("/datasets/{dataset_id}/profile")
|
| 77 |
-
def dataset_profile(dataset_id: str, db: Session = Depends(get_db)):
|
| 78 |
-
dataset = get_dataset(db, dataset_id)
|
| 79 |
-
if not dataset:
|
| 80 |
-
raise HTTPException(status_code=404, detail="Dataset not found")
|
| 81 |
-
|
| 82 |
-
return {
|
| 83 |
-
"dataset_id": dataset.id,
|
| 84 |
-
"name": dataset.name,
|
| 85 |
-
"row_count": dataset.row_count,
|
| 86 |
-
"column_count": dataset.column_count,
|
| 87 |
-
"schema": json.loads(dataset.schema_json),
|
| 88 |
-
"profile": json.loads(dataset.profile_json),
|
| 89 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/api/experiments.py
DELETED
|
@@ -1,140 +0,0 @@
|
|
| 1 |
-
import json
|
| 2 |
-
import time
|
| 3 |
-
|
| 4 |
-
import pandas as pd
|
| 5 |
-
from fastapi import APIRouter, Depends, HTTPException
|
| 6 |
-
from pydantic import BaseModel
|
| 7 |
-
from sqlalchemy.orm import Session
|
| 8 |
-
from sklearn.cluster import AgglomerativeClustering, Birch, KMeans
|
| 9 |
-
from sklearn.decomposition import PCA
|
| 10 |
-
from sklearn.metrics import silhouette_score
|
| 11 |
-
|
| 12 |
-
from backend.app.db import get_db
|
| 13 |
-
from backend.app.repositories.dataset_repo import get_dataset
|
| 14 |
-
from backend.app.repositories.experiment_repo import create_experiment
|
| 15 |
-
from backend.app.utils.ids import make_experiment_id
|
| 16 |
-
|
| 17 |
-
router = APIRouter(tags=["experiments"])
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
class RunRequest(BaseModel):
|
| 21 |
-
dataset_id: str
|
| 22 |
-
name: str | None = None
|
| 23 |
-
algorithm: str = "kmeans"
|
| 24 |
-
n_clusters: int = 4
|
| 25 |
-
feature_columns: list[str]
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
@router.post("/experiments/run")
|
| 29 |
-
def run_experiment(req: RunRequest, db: Session = Depends(get_db)):
|
| 30 |
-
dataset = get_dataset(db, req.dataset_id)
|
| 31 |
-
if not dataset:
|
| 32 |
-
raise HTTPException(status_code=404, detail="Dataset not found")
|
| 33 |
-
|
| 34 |
-
if dataset.file_path.endswith(".csv"):
|
| 35 |
-
df = pd.read_csv(dataset.file_path)
|
| 36 |
-
else:
|
| 37 |
-
df = pd.read_excel(dataset.file_path)
|
| 38 |
-
|
| 39 |
-
if not req.feature_columns:
|
| 40 |
-
raise HTTPException(status_code=400, detail="feature_columns is required")
|
| 41 |
-
|
| 42 |
-
missing = [c for c in req.feature_columns if c not in df.columns]
|
| 43 |
-
if missing:
|
| 44 |
-
raise HTTPException(status_code=400, detail=f"Missing columns: {', '.join(missing)}")
|
| 45 |
-
|
| 46 |
-
X = df[req.feature_columns].copy()
|
| 47 |
-
X = pd.get_dummies(X)
|
| 48 |
-
X = X.fillna(0)
|
| 49 |
-
|
| 50 |
-
start = time.time()
|
| 51 |
-
|
| 52 |
-
if req.algorithm == "kmeans":
|
| 53 |
-
model = KMeans(n_clusters=req.n_clusters, n_init=10, random_state=42)
|
| 54 |
-
labels = model.fit_predict(X)
|
| 55 |
-
elif req.algorithm == "agglomerative":
|
| 56 |
-
model = AgglomerativeClustering(n_clusters=req.n_clusters)
|
| 57 |
-
labels = model.fit_predict(X)
|
| 58 |
-
elif req.algorithm == "birch":
|
| 59 |
-
model = Birch(n_clusters=req.n_clusters)
|
| 60 |
-
labels = model.fit_predict(X)
|
| 61 |
-
else:
|
| 62 |
-
raise HTTPException(status_code=400, detail="Unsupported algorithm")
|
| 63 |
-
|
| 64 |
-
runtime_ms = int((time.time() - start) * 1000)
|
| 65 |
-
unique_labels = sorted(set(labels.tolist()))
|
| 66 |
-
score = None
|
| 67 |
-
if len(unique_labels) > 1 and len(unique_labels) < len(X):
|
| 68 |
-
score = float(silhouette_score(X, labels))
|
| 69 |
-
|
| 70 |
-
pca = PCA(n_components=2, random_state=42)
|
| 71 |
-
coords = pca.fit_transform(X)
|
| 72 |
-
|
| 73 |
-
points = [
|
| 74 |
-
{
|
| 75 |
-
"row_index": int(i),
|
| 76 |
-
"cluster_label": int(labels[i]),
|
| 77 |
-
"x": float(coords[i][0]),
|
| 78 |
-
"y": float(coords[i][1]),
|
| 79 |
-
}
|
| 80 |
-
for i in range(len(labels))
|
| 81 |
-
]
|
| 82 |
-
|
| 83 |
-
cluster_sizes = {str(label): int((labels == label).sum()) for label in unique_labels}
|
| 84 |
-
experiment_id = make_experiment_id()
|
| 85 |
-
|
| 86 |
-
metrics = {
|
| 87 |
-
"silhouette_score": score,
|
| 88 |
-
"cluster_count": len(unique_labels),
|
| 89 |
-
"row_count": int(len(X)),
|
| 90 |
-
"runtime_ms": runtime_ms,
|
| 91 |
-
}
|
| 92 |
-
summary = {
|
| 93 |
-
"feature_columns": req.feature_columns,
|
| 94 |
-
"cluster_sizes": cluster_sizes,
|
| 95 |
-
"points": points,
|
| 96 |
-
}
|
| 97 |
-
|
| 98 |
-
create_experiment(
|
| 99 |
-
db=db,
|
| 100 |
-
id=experiment_id,
|
| 101 |
-
dataset_id=req.dataset_id,
|
| 102 |
-
algorithm=req.algorithm,
|
| 103 |
-
status="completed",
|
| 104 |
-
config_json=req.model_dump_json(),
|
| 105 |
-
metrics_json=json.dumps(metrics),
|
| 106 |
-
summary_json=json.dumps(summary),
|
| 107 |
-
runtime_ms=runtime_ms,
|
| 108 |
-
error_message=None,
|
| 109 |
-
)
|
| 110 |
-
|
| 111 |
-
return {
|
| 112 |
-
"experiment_id": experiment_id,
|
| 113 |
-
"status": "completed",
|
| 114 |
-
"silhouette_score": score,
|
| 115 |
-
"cluster_count": len(unique_labels),
|
| 116 |
-
"cluster_sizes": cluster_sizes,
|
| 117 |
-
"runtime_ms": runtime_ms,
|
| 118 |
-
"points": points,
|
| 119 |
-
}
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
@router.get("/experiments/{experiment_id}/results")
|
| 123 |
-
def experiment_results(experiment_id: str, db: Session = Depends(get_db)):
|
| 124 |
-
from backend.app.repositories.experiment_repo import get_experiment
|
| 125 |
-
|
| 126 |
-
exp = get_experiment(db, experiment_id)
|
| 127 |
-
if not exp:
|
| 128 |
-
raise HTTPException(status_code=404, detail="Experiment not found")
|
| 129 |
-
|
| 130 |
-
return {
|
| 131 |
-
"experiment_id": exp.id,
|
| 132 |
-
"dataset_id": exp.dataset_id,
|
| 133 |
-
"algorithm": exp.algorithm,
|
| 134 |
-
"status": exp.status,
|
| 135 |
-
"config": json.loads(exp.config_json) if exp.config_json else {},
|
| 136 |
-
"metrics": json.loads(exp.metrics_json) if exp.metrics_json else {},
|
| 137 |
-
"summary": json.loads(exp.summary_json) if exp.summary_json else {},
|
| 138 |
-
"runtime_ms": exp.runtime_ms,
|
| 139 |
-
"error_message": exp.error_message,
|
| 140 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/api/exports.py
DELETED
|
@@ -1,53 +0,0 @@
|
|
| 1 |
-
import io
|
| 2 |
-
import json
|
| 3 |
-
|
| 4 |
-
import pandas as pd
|
| 5 |
-
from fastapi import APIRouter, Depends, HTTPException
|
| 6 |
-
from fastapi.responses import StreamingResponse
|
| 7 |
-
from sqlalchemy.orm import Session
|
| 8 |
-
|
| 9 |
-
from backend.app.db import get_db
|
| 10 |
-
from backend.app.repositories.dataset_repo import get_dataset
|
| 11 |
-
from backend.app.repositories.experiment_repo import get_experiment
|
| 12 |
-
|
| 13 |
-
router = APIRouter(tags=["exports"])
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
@router.get("/exports/{experiment_id}")
|
| 17 |
-
def export_experiment(experiment_id: str, db: Session = Depends(get_db)):
|
| 18 |
-
experiment = get_experiment(db, experiment_id)
|
| 19 |
-
if not experiment:
|
| 20 |
-
raise HTTPException(status_code=404, detail="Experiment not found")
|
| 21 |
-
|
| 22 |
-
dataset = get_dataset(db, experiment.dataset_id)
|
| 23 |
-
if not dataset:
|
| 24 |
-
raise HTTPException(status_code=404, detail="Dataset not found")
|
| 25 |
-
|
| 26 |
-
if dataset.file_path.endswith(".csv"):
|
| 27 |
-
df = pd.read_csv(dataset.file_path)
|
| 28 |
-
else:
|
| 29 |
-
df = pd.read_excel(dataset.file_path)
|
| 30 |
-
|
| 31 |
-
summary = json.loads(experiment.summary_json) if experiment.summary_json else {}
|
| 32 |
-
points = summary.get("points", [])
|
| 33 |
-
if points and len(points) == len(df):
|
| 34 |
-
export_df = df.copy()
|
| 35 |
-
export_df["cluster_label"] = [p["cluster_label"] for p in points]
|
| 36 |
-
export_df["pca_x"] = [p["x"] for p in points]
|
| 37 |
-
export_df["pca_y"] = [p["y"] for p in points]
|
| 38 |
-
else:
|
| 39 |
-
export_df = df.copy()
|
| 40 |
-
|
| 41 |
-
metrics = json.loads(experiment.metrics_json) if experiment.metrics_json else {}
|
| 42 |
-
for key, value in metrics.items():
|
| 43 |
-
export_df[f"metric_{key}"] = value
|
| 44 |
-
|
| 45 |
-
buffer = io.StringIO()
|
| 46 |
-
export_df.to_csv(buffer, index=False)
|
| 47 |
-
buffer.seek(0)
|
| 48 |
-
|
| 49 |
-
return StreamingResponse(
|
| 50 |
-
iter([buffer.getvalue()]),
|
| 51 |
-
media_type="text/csv",
|
| 52 |
-
headers={"Content-Disposition": f"attachment; filename={experiment_id}_export.csv"},
|
| 53 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/api/health.py
DELETED
|
@@ -1,7 +0,0 @@
|
|
| 1 |
-
from fastapi import APIRouter
|
| 2 |
-
|
| 3 |
-
router = APIRouter(tags=["health"])
|
| 4 |
-
|
| 5 |
-
@router.get("/health")
|
| 6 |
-
def health():
|
| 7 |
-
return {"ok": True, "service": "clusterbuster-api"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/api/presets.py
DELETED
|
@@ -1,13 +0,0 @@
|
|
| 1 |
-
from fastapi import APIRouter
|
| 2 |
-
|
| 3 |
-
router = APIRouter(tags=["presets"])
|
| 4 |
-
|
| 5 |
-
@router.get("/presets/algorithms")
|
| 6 |
-
def list_algorithms():
|
| 7 |
-
return {
|
| 8 |
-
"algorithms": [
|
| 9 |
-
{"key": "kmeans", "label": "KMeans", "params": {"n_clusters": 4}},
|
| 10 |
-
{"key": "agglomerative", "label": "Agglomerative", "params": {"n_clusters": 4}},
|
| 11 |
-
{"key": "birch", "label": "Birch", "params": {"n_clusters": 4}},
|
| 12 |
-
]
|
| 13 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/api/runs.py
DELETED
|
@@ -1,29 +0,0 @@
|
|
| 1 |
-
import json
|
| 2 |
-
|
| 3 |
-
from fastapi import APIRouter, Depends
|
| 4 |
-
from sqlalchemy.orm import Session
|
| 5 |
-
|
| 6 |
-
from backend.app.db import get_db
|
| 7 |
-
from backend.app.repositories.experiment_repo import list_experiments
|
| 8 |
-
|
| 9 |
-
router = APIRouter(tags=["runs"])
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
@router.get("/runs")
|
| 13 |
-
def get_runs(db: Session = Depends(get_db)):
|
| 14 |
-
experiments = list_experiments(db)
|
| 15 |
-
return {
|
| 16 |
-
"runs": [
|
| 17 |
-
{
|
| 18 |
-
"experiment_id": exp.id,
|
| 19 |
-
"dataset_id": exp.dataset_id,
|
| 20 |
-
"algorithm": exp.algorithm,
|
| 21 |
-
"status": exp.status,
|
| 22 |
-
"metrics": json.loads(exp.metrics_json) if exp.metrics_json else {},
|
| 23 |
-
"summary": json.loads(exp.summary_json) if exp.summary_json else {},
|
| 24 |
-
"runtime_ms": exp.runtime_ms,
|
| 25 |
-
"error_message": exp.error_message,
|
| 26 |
-
}
|
| 27 |
-
for exp in experiments
|
| 28 |
-
]
|
| 29 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/db.py
DELETED
|
@@ -1,34 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
from sqlalchemy import create_engine
|
| 3 |
-
from sqlalchemy.orm import declarative_base, sessionmaker
|
| 4 |
-
|
| 5 |
-
DB_PATH = os.getenv("DB_PATH", "/data/clusterforge.db")
|
| 6 |
-
DATABASE_URL = f"sqlite:///{DB_PATH}"
|
| 7 |
-
|
| 8 |
-
engine = create_engine(
|
| 9 |
-
DATABASE_URL,
|
| 10 |
-
connect_args={"check_same_thread": False},
|
| 11 |
-
)
|
| 12 |
-
|
| 13 |
-
SessionLocal = sessionmaker(
|
| 14 |
-
autocommit=False,
|
| 15 |
-
autoflush=False,
|
| 16 |
-
bind=engine,
|
| 17 |
-
)
|
| 18 |
-
|
| 19 |
-
Base = declarative_base()
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
def init_db() -> None:
|
| 23 |
-
import backend.app.models.dataset
|
| 24 |
-
import backend.app.models.experiment
|
| 25 |
-
|
| 26 |
-
Base.metadata.create_all(bind=engine)
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
def get_db():
|
| 30 |
-
db = SessionLocal()
|
| 31 |
-
try:
|
| 32 |
-
yield db
|
| 33 |
-
finally:
|
| 34 |
-
db.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/main.py
DELETED
|
@@ -1,45 +0,0 @@
|
|
| 1 |
-
from fastapi import FastAPI
|
| 2 |
-
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
-
|
| 4 |
-
from backend.app.db import init_db
|
| 5 |
-
from backend.app.api.health import router as health_router
|
| 6 |
-
from backend.app.api.datasets import router as datasets_router
|
| 7 |
-
from backend.app.api.presets import router as presets_router
|
| 8 |
-
from backend.app.api.experiments import router as experiments_router
|
| 9 |
-
from backend.app.api.compare import router as compare_router
|
| 10 |
-
from backend.app.api.exports import router as exports_router
|
| 11 |
-
from backend.app.api.runs import router as runs_router
|
| 12 |
-
|
| 13 |
-
app = FastAPI(title="ClusterBuster API")
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
@app.on_event("startup")
|
| 17 |
-
def on_startup() -> None:
|
| 18 |
-
init_db()
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
origins = [
|
| 22 |
-
"http://localhost:3000",
|
| 23 |
-
"https://cluster-buster.vercel.app",
|
| 24 |
-
]
|
| 25 |
-
|
| 26 |
-
app.add_middleware(
|
| 27 |
-
CORSMiddleware,
|
| 28 |
-
allow_origins=origins,
|
| 29 |
-
allow_credentials=True,
|
| 30 |
-
allow_methods=["*"],
|
| 31 |
-
allow_headers=["*"],
|
| 32 |
-
)
|
| 33 |
-
|
| 34 |
-
app.include_router(health_router, prefix="/api")
|
| 35 |
-
app.include_router(datasets_router, prefix="/api")
|
| 36 |
-
app.include_router(presets_router, prefix="/api")
|
| 37 |
-
app.include_router(experiments_router, prefix="/api")
|
| 38 |
-
app.include_router(compare_router, prefix="/api")
|
| 39 |
-
app.include_router(exports_router, prefix="/api")
|
| 40 |
-
app.include_router(runs_router, prefix="/api")
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
@app.get("/")
|
| 44 |
-
def root():
|
| 45 |
-
return {"ok": True, "service": "clusterbuster-api"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/models/__init__.py
DELETED
|
File without changes
|
backend/app/models/dataset.py
DELETED
|
@@ -1,14 +0,0 @@
|
|
| 1 |
-
from sqlalchemy import Column, Integer, String, Text
|
| 2 |
-
from backend.app.db import Base
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
class Dataset(Base):
|
| 6 |
-
__tablename__ = "datasets"
|
| 7 |
-
|
| 8 |
-
id = Column(String, primary_key=True, index=True)
|
| 9 |
-
name = Column(String, nullable=False)
|
| 10 |
-
file_path = Column(String, nullable=False)
|
| 11 |
-
row_count = Column(Integer, nullable=False)
|
| 12 |
-
column_count = Column(Integer, nullable=False)
|
| 13 |
-
schema_json = Column(Text, nullable=False)
|
| 14 |
-
profile_json = Column(Text, nullable=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/models/experiment.py
DELETED
|
@@ -1,16 +0,0 @@
|
|
| 1 |
-
from sqlalchemy import Column, Integer, String, Text
|
| 2 |
-
from backend.app.db import Base
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
class Experiment(Base):
|
| 6 |
-
__tablename__ = "experiments"
|
| 7 |
-
|
| 8 |
-
id = Column(String, primary_key=True, index=True)
|
| 9 |
-
dataset_id = Column(String, nullable=False, index=True)
|
| 10 |
-
algorithm = Column(String, nullable=False)
|
| 11 |
-
status = Column(String, nullable=False)
|
| 12 |
-
config_json = Column(Text, nullable=True)
|
| 13 |
-
metrics_json = Column(Text, nullable=True)
|
| 14 |
-
summary_json = Column(Text, nullable=True)
|
| 15 |
-
runtime_ms = Column(Integer, nullable=True)
|
| 16 |
-
error_message = Column(Text, nullable=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/repositories/__init__.py
DELETED
|
File without changes
|
backend/app/repositories/dataset_repo.py
DELETED
|
@@ -1,35 +0,0 @@
|
|
| 1 |
-
from sqlalchemy.orm import Session
|
| 2 |
-
from backend.app.models.dataset import Dataset
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
def create_dataset(
|
| 6 |
-
db: Session,
|
| 7 |
-
id: str,
|
| 8 |
-
name: str,
|
| 9 |
-
file_path: str,
|
| 10 |
-
row_count: int,
|
| 11 |
-
column_count: int,
|
| 12 |
-
schema_json: str,
|
| 13 |
-
profile_json: str,
|
| 14 |
-
) -> Dataset:
|
| 15 |
-
dataset = Dataset(
|
| 16 |
-
id=id,
|
| 17 |
-
name=name,
|
| 18 |
-
file_path=file_path,
|
| 19 |
-
row_count=row_count,
|
| 20 |
-
column_count=column_count,
|
| 21 |
-
schema_json=schema_json,
|
| 22 |
-
profile_json=profile_json,
|
| 23 |
-
)
|
| 24 |
-
db.add(dataset)
|
| 25 |
-
db.commit()
|
| 26 |
-
db.refresh(dataset)
|
| 27 |
-
return dataset
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
def get_dataset(db: Session, dataset_id: str) -> Dataset | None:
|
| 31 |
-
return db.query(Dataset).filter(Dataset.id == dataset_id).first()
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
def list_datasets(db: Session) -> list[Dataset]:
|
| 35 |
-
return db.query(Dataset).order_by(Dataset.name.asc()).all()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/repositories/experiment_repo.py
DELETED
|
@@ -1,45 +0,0 @@
|
|
| 1 |
-
from sqlalchemy.orm import Session
|
| 2 |
-
from backend.app.models.experiment import Experiment
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
def create_experiment(
|
| 6 |
-
db: Session,
|
| 7 |
-
id: str,
|
| 8 |
-
dataset_id: str,
|
| 9 |
-
algorithm: str,
|
| 10 |
-
status: str,
|
| 11 |
-
config_json: str | None = None,
|
| 12 |
-
metrics_json: str | None = None,
|
| 13 |
-
summary_json: str | None = None,
|
| 14 |
-
runtime_ms: int | None = None,
|
| 15 |
-
error_message: str | None = None,
|
| 16 |
-
) -> Experiment:
|
| 17 |
-
experiment = Experiment(
|
| 18 |
-
id=id,
|
| 19 |
-
dataset_id=dataset_id,
|
| 20 |
-
algorithm=algorithm,
|
| 21 |
-
status=status,
|
| 22 |
-
config_json=config_json,
|
| 23 |
-
metrics_json=metrics_json,
|
| 24 |
-
summary_json=summary_json,
|
| 25 |
-
runtime_ms=runtime_ms,
|
| 26 |
-
error_message=error_message,
|
| 27 |
-
)
|
| 28 |
-
db.add(experiment)
|
| 29 |
-
db.commit()
|
| 30 |
-
db.refresh(experiment)
|
| 31 |
-
return experiment
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
def get_experiment(db: Session, experiment_id: str) -> Experiment | None:
|
| 35 |
-
return db.query(Experiment).filter(Experiment.id == experiment_id).first()
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
def list_experiments(db: Session) -> list[Experiment]:
|
| 39 |
-
return db.query(Experiment).order_by(Experiment.id.desc()).all()
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
def get_experiments_by_ids(db: Session, experiment_ids: list[str]) -> list[Experiment]:
|
| 43 |
-
if not experiment_ids:
|
| 44 |
-
return []
|
| 45 |
-
return db.query(Experiment).filter(Experiment.id.in_(experiment_ids)).all()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/services/__init__.py
DELETED
|
File without changes
|
backend/app/services/profiling_service.py
DELETED
|
@@ -1,23 +0,0 @@
|
|
| 1 |
-
def profile_dataframe(df):
|
| 2 |
-
numeric_cols = df.select_dtypes(include=["int64", "float64", "int32", "float32"]).columns.tolist()
|
| 3 |
-
categorical_cols = df.select_dtypes(include=["object", "bool"]).columns.tolist()
|
| 4 |
-
|
| 5 |
-
recommended = []
|
| 6 |
-
if numeric_cols:
|
| 7 |
-
recommended.extend(["kmeans", "birch"])
|
| 8 |
-
if categorical_cols:
|
| 9 |
-
recommended.append("agglomerative")
|
| 10 |
-
|
| 11 |
-
cols = []
|
| 12 |
-
for col in df.columns:
|
| 13 |
-
cols.append({
|
| 14 |
-
"name": col,
|
| 15 |
-
"inferred_type": str(df[col].dtype),
|
| 16 |
-
"missing_pct": float(df[col].isna().mean()),
|
| 17 |
-
"cardinality": int(df[col].nunique(dropna=True)),
|
| 18 |
-
})
|
| 19 |
-
|
| 20 |
-
return {
|
| 21 |
-
"columns": cols,
|
| 22 |
-
"recommended_algorithms": recommended,
|
| 23 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/utils/__init__.py
DELETED
|
File without changes
|
backend/app/utils/ids.py
DELETED
|
@@ -1,7 +0,0 @@
|
|
| 1 |
-
import secrets
|
| 2 |
-
|
| 3 |
-
def make_dataset_id() -> str:
|
| 4 |
-
return "ds_" + secrets.token_hex(4)
|
| 5 |
-
|
| 6 |
-
def make_experiment_id() -> str:
|
| 7 |
-
return "exp_" + secrets.token_hex(4)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|