Adisri99 commited on
Commit
4b9c1f1
·
verified ·
1 Parent(s): cfd01cb

Delete backend

Browse files
backend/app/api/compare.py DELETED
@@ -1,41 +0,0 @@
1
- import json
2
-
3
- from fastapi import APIRouter, Depends, Query
4
- from sqlalchemy.orm import Session
5
-
6
- from backend.app.db import get_db
7
- from backend.app.repositories.experiment_repo import (
8
- get_experiments_by_ids,
9
- list_experiments,
10
- )
11
-
12
- router = APIRouter(tags=["compare"])
13
-
14
-
15
- @router.get("/compare")
16
- def compare_experiments(
17
- experiment_ids: str | None = Query(default=None),
18
- db: Session = Depends(get_db),
19
- ):
20
- if experiment_ids:
21
- ids = [x.strip() for x in experiment_ids.split(",") if x.strip()]
22
- experiments = get_experiments_by_ids(db, ids)
23
- else:
24
- experiments = list_experiments(db)
25
-
26
- return {
27
- "experiments": [
28
- {
29
- "experiment_id": exp.id,
30
- "dataset_id": exp.dataset_id,
31
- "algorithm": exp.algorithm,
32
- "status": exp.status,
33
- "config": json.loads(exp.config_json) if exp.config_json else {},
34
- "metrics": json.loads(exp.metrics_json) if exp.metrics_json else {},
35
- "summary": json.loads(exp.summary_json) if exp.summary_json else {},
36
- "runtime_ms": exp.runtime_ms,
37
- "error_message": exp.error_message,
38
- }
39
- for exp in experiments
40
- ]
41
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/api/experiments.py DELETED
@@ -1,104 +0,0 @@
1
- import secrets
2
- import time
3
-
4
- import pandas as pd
5
- from fastapi import APIRouter, Depends, HTTPException
6
- from pydantic import BaseModel
7
- from sqlalchemy.orm import Session
8
- from sklearn.cluster import AgglomerativeClustering, Birch, KMeans
9
- from sklearn.metrics import silhouette_score
10
-
11
- from backend.app.db import get_db
12
- from backend.app.repositories.dataset_repo import get_dataset
13
- from backend.app.repositories.experiment_repo import create_experiment
14
-
15
- router = APIRouter(tags=["experiments"])
16
-
17
-
18
- class RunRequest(BaseModel):
19
- dataset_id: str
20
- name: str | None = None
21
- algorithm: str = "kmeans"
22
- n_clusters: int = 4
23
- feature_columns: list[str]
24
-
25
-
26
- @router.post("/experiments/run")
27
- def run_experiment(req: RunRequest, db: Session = Depends(get_db)):
28
- dataset = get_dataset(db, req.dataset_id)
29
- if not dataset:
30
- raise HTTPException(status_code=404, detail="Dataset not found")
31
-
32
- if dataset.file_path.endswith(".csv"):
33
- df = pd.read_csv(dataset.file_path)
34
- else:
35
- df = pd.read_excel(dataset.file_path)
36
-
37
- if not req.feature_columns:
38
- raise HTTPException(status_code=400, detail="feature_columns is required")
39
-
40
- missing = [c for c in req.feature_columns if c not in df.columns]
41
- if missing:
42
- raise HTTPException(status_code=400, detail=f"Missing columns: {', '.join(missing)}")
43
-
44
- X = df[req.feature_columns].copy()
45
- X = pd.get_dummies(X)
46
- X = X.fillna(0)
47
-
48
- start = time.time()
49
-
50
- if req.algorithm == "kmeans":
51
- model = KMeans(n_clusters=req.n_clusters, n_init=10, random_state=42)
52
- labels = model.fit_predict(X)
53
- elif req.algorithm == "agglomerative":
54
- model = AgglomerativeClustering(n_clusters=req.n_clusters)
55
- labels = model.fit_predict(X)
56
- elif req.algorithm == "birch":
57
- model = Birch(n_clusters=req.n_clusters)
58
- labels = model.fit_predict(X)
59
- else:
60
- raise HTTPException(status_code=400, detail="Unsupported algorithm")
61
-
62
- runtime_ms = int((time.time() - start) * 1000)
63
-
64
- unique_labels = sorted(set(labels.tolist()))
65
- score = None
66
- if len(unique_labels) > 1 and len(unique_labels) < len(X):
67
- score = float(silhouette_score(X, labels))
68
-
69
- cluster_sizes = {str(label): int((labels == label).sum()) for label in unique_labels}
70
- experiment_id = "exp_" + secrets.token_hex(4)
71
-
72
- metrics = {
73
- "silhouette_score": score,
74
- "cluster_count": len(unique_labels),
75
- "row_count": int(len(X)),
76
- "runtime_ms": runtime_ms,
77
- }
78
- summary = {
79
- "feature_columns": req.feature_columns,
80
- "cluster_sizes": cluster_sizes,
81
- }
82
-
83
- create_experiment(
84
- db=db,
85
- id=experiment_id,
86
- dataset_id=req.dataset_id,
87
- algorithm=req.algorithm,
88
- status="completed",
89
- config_json=req.model_dump_json(),
90
- metrics_json=pd.Series(metrics).to_json(),
91
- summary_json=pd.Series(summary).to_json(),
92
- runtime_ms=runtime_ms,
93
- error_message=None,
94
- )
95
-
96
- return {
97
- "experiment_id": experiment_id,
98
- "status": "completed",
99
- "clusters": labels.tolist(),
100
- "silhouette_score": score,
101
- "cluster_count": len(unique_labels),
102
- "cluster_sizes": cluster_sizes,
103
- "runtime_ms": runtime_ms,
104
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/api/exports.py DELETED
@@ -1,50 +0,0 @@
1
- import io
2
- import json
3
-
4
- import pandas as pd
5
- from fastapi import APIRouter, Depends, HTTPException
6
- from fastapi.responses import StreamingResponse
7
- from sqlalchemy.orm import Session
8
-
9
- from backend.app.db import get_db
10
- from backend.app.repositories.dataset_repo import get_dataset
11
- from backend.app.repositories.experiment_repo import get_experiment
12
-
13
- router = APIRouter(tags=["exports"])
14
-
15
-
16
- @router.get("/exports/{experiment_id}")
17
- def export_experiment(experiment_id: str, db: Session = Depends(get_db)):
18
- experiment = get_experiment(db, experiment_id)
19
- if not experiment:
20
- raise HTTPException(status_code=404, detail="Experiment not found")
21
-
22
- dataset = get_dataset(db, experiment.dataset_id)
23
- if not dataset:
24
- raise HTTPException(status_code=404, detail="Dataset not found")
25
-
26
- if dataset.file_path.endswith(".csv"):
27
- df = pd.read_csv(dataset.file_path)
28
- else:
29
- df = pd.read_excel(dataset.file_path)
30
-
31
- export_df = df.copy()
32
- export_df["experiment_id"] = experiment.id
33
- export_df["algorithm"] = experiment.algorithm
34
- export_df["experiment_status"] = experiment.status
35
-
36
- metrics = json.loads(experiment.metrics_json) if experiment.metrics_json else {}
37
- for key, value in metrics.items():
38
- export_df[f"metric_{key}"] = value
39
-
40
- buffer = io.StringIO()
41
- export_df.to_csv(buffer, index=False)
42
- buffer.seek(0)
43
-
44
- return StreamingResponse(
45
- iter([buffer.getvalue()]),
46
- media_type="text/csv",
47
- headers={
48
- "Content-Disposition": f"attachment; filename={experiment_id}_export.csv"
49
- },
50
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/api/runs.py DELETED
@@ -1,29 +0,0 @@
1
- import json
2
-
3
- from fastapi import APIRouter, Depends
4
- from sqlalchemy.orm import Session
5
-
6
- from backend.app.db import get_db
7
- from backend.app.repositories.experiment_repo import list_experiments
8
-
9
- router = APIRouter(tags=["runs"])
10
-
11
-
12
- @router.get("/runs")
13
- def get_runs(db: Session = Depends(get_db)):
14
- experiments = list_experiments(db)
15
- return {
16
- "runs": [
17
- {
18
- "experiment_id": exp.id,
19
- "dataset_id": exp.dataset_id,
20
- "algorithm": exp.algorithm,
21
- "status": exp.status,
22
- "metrics": json.loads(exp.metrics_json) if exp.metrics_json else {},
23
- "summary": json.loads(exp.summary_json) if exp.summary_json else {},
24
- "runtime_ms": exp.runtime_ms,
25
- "error_message": exp.error_message,
26
- }
27
- for exp in experiments
28
- ]
29
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/db.py DELETED
@@ -1,34 +0,0 @@
1
- import os
2
- from sqlalchemy import create_engine
3
- from sqlalchemy.orm import declarative_base, sessionmaker
4
-
5
- DB_PATH = os.getenv("DB_PATH", "/data/clusterforge.db")
6
- DATABASE_URL = f"sqlite:///{DB_PATH}"
7
-
8
- engine = create_engine(
9
- DATABASE_URL,
10
- connect_args={"check_same_thread": False},
11
- )
12
-
13
- SessionLocal = sessionmaker(
14
- autocommit=False,
15
- autoflush=False,
16
- bind=engine,
17
- )
18
-
19
- Base = declarative_base()
20
-
21
-
22
- def init_db() -> None:
23
- import backend.app.models.dataset
24
- import backend.app.models.experiment
25
-
26
- Base.metadata.create_all(bind=engine)
27
-
28
-
29
- def get_db():
30
- db = SessionLocal()
31
- try:
32
- yield db
33
- finally:
34
- db.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/main.py DELETED
@@ -1,45 +0,0 @@
1
- from fastapi import FastAPI
2
- from fastapi.middleware.cors import CORSMiddleware
3
-
4
- from backend.app.db import init_db
5
- from backend.app.api.health import router as health_router
6
- from backend.app.api.datasets import router as datasets_router
7
- from backend.app.api.presets import router as presets_router
8
- from backend.app.api.experiments import router as experiments_router
9
- from backend.app.api.compare import router as compare_router
10
- from backend.app.api.exports import router as exports_router
11
- from backend.app.api.runs import router as runs_router
12
-
13
- app = FastAPI(title="ClusterBuster API")
14
-
15
-
16
- @app.on_event("startup")
17
- def on_startup() -> None:
18
- init_db()
19
-
20
-
21
- origins = [
22
- "http://localhost:3000",
23
- "https://cluster-buster.vercel.app",
24
- ]
25
-
26
- app.add_middleware(
27
- CORSMiddleware,
28
- allow_origins=origins,
29
- allow_credentials=True,
30
- allow_methods=["*"],
31
- allow_headers=["*"],
32
- )
33
-
34
- app.include_router(health_router, prefix="/api")
35
- app.include_router(datasets_router, prefix="/api")
36
- app.include_router(presets_router, prefix="/api")
37
- app.include_router(experiments_router, prefix="/api")
38
- app.include_router(compare_router, prefix="/api")
39
- app.include_router(exports_router, prefix="/api")
40
- app.include_router(runs_router, prefix="/api")
41
-
42
-
43
- @app.get("/")
44
- def root():
45
- return {"ok": True, "service": "clusterbuster-api"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/models/experiment.py DELETED
@@ -1,16 +0,0 @@
1
- from sqlalchemy import Column, Integer, String, Text
2
- from backend.app.db import Base
3
-
4
-
5
- class Experiment(Base):
6
- __tablename__ = "experiments"
7
-
8
- id = Column(String, primary_key=True, index=True)
9
- dataset_id = Column(String, nullable=False, index=True)
10
- algorithm = Column(String, nullable=False)
11
- status = Column(String, nullable=False)
12
- config_json = Column(Text, nullable=True)
13
- metrics_json = Column(Text, nullable=True)
14
- summary_json = Column(Text, nullable=True)
15
- runtime_ms = Column(Integer, nullable=True)
16
- error_message = Column(Text, nullable=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/repositories/experiment_repo.py DELETED
@@ -1,46 +0,0 @@
1
- from sqlalchemy.orm import Session
2
-
3
- from backend.app.models.experiment import Experiment
4
-
5
-
6
- def create_experiment(
7
- db: Session,
8
- id: str,
9
- dataset_id: str,
10
- algorithm: str,
11
- status: str,
12
- config_json: str | None = None,
13
- metrics_json: str | None = None,
14
- summary_json: str | None = None,
15
- runtime_ms: int | None = None,
16
- error_message: str | None = None,
17
- ) -> Experiment:
18
- experiment = Experiment(
19
- id=id,
20
- dataset_id=dataset_id,
21
- algorithm=algorithm,
22
- status=status,
23
- config_json=config_json,
24
- metrics_json=metrics_json,
25
- summary_json=summary_json,
26
- runtime_ms=runtime_ms,
27
- error_message=error_message,
28
- )
29
- db.add(experiment)
30
- db.commit()
31
- db.refresh(experiment)
32
- return experiment
33
-
34
-
35
- def get_experiment(db: Session, experiment_id: str) -> Experiment | None:
36
- return db.query(Experiment).filter(Experiment.id == experiment_id).first()
37
-
38
-
39
- def list_experiments(db: Session) -> list[Experiment]:
40
- return db.query(Experiment).order_by(Experiment.id.desc()).all()
41
-
42
-
43
- def get_experiments_by_ids(db: Session, experiment_ids: list[str]) -> list[Experiment]:
44
- if not experiment_ids:
45
- return []
46
- return db.query(Experiment).filter(Experiment.id.in_(experiment_ids)).all()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/services/profiling_service.py DELETED
@@ -1,25 +0,0 @@
1
- def profile_dataframe(df):
2
- numeric_cols = df.select_dtypes(include=["int64", "float64"]).columns.tolist()
3
- categorical_cols = df.select_dtypes(include=["object"]).columns.tolist()
4
-
5
- recommended = []
6
-
7
- if len(numeric_cols) > 0:
8
- recommended.append("kmeans")
9
- recommended.append("birch")
10
-
11
- if len(categorical_cols) > 0:
12
- recommended.append("agglomerative")
13
-
14
- return {
15
- "columns": [
16
- {
17
- "name": col,
18
- "inferred_type": str(df[col].dtype),
19
- "missing_pct": float(df[col].isna().mean()),
20
- "cardinality": int(df[col].nunique())
21
- }
22
- for col in df.columns
23
- ],
24
- "recommended_algorithms": recommended
25
- }