Spaces:
Sleeping
Sleeping
File size: 6,602 Bytes
ac5551d 8302f42 ac5551d 8302f42 ac5551d 8302f42 ac5551d 92faea1 ac5551d 92faea1 ac5551d 8302f42 ac5551d 8302f42 ac5551d 8302f42 ac5551d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 | """
api/routes/datasets.py β Dataset Discovery REST API (Cloud Registry Version).
"""
from __future__ import annotations
from typing import Optional
from datetime import datetime
from fastapi import APIRouter, HTTPException, Query
from adapters.roboflow_adapter import RoboflowAdapter
from datasets import registry as ds_reg
from models.dataset import (
Dataset, DatasetSummary, RoboflowSearchRequest,
)
from models.analytics import DatasetAnalytics, SplitAnalytics, QualityIssues, ClassDistributionItem
from observability.logger import audit, get_logger
log = get_logger("datasets_route")
router = APIRouter(prefix="/datasets", tags=["datasets"])
# ββ Analytics βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
@router.get("/{dataset_id}/analytics", response_model=DatasetAnalytics)
async def get_dataset_analytics(dataset_id: str):
"""
Fetch comprehensive analytics for a dataset.
"""
ds = await ds_reg.get_dataset(dataset_id)
if not ds:
raise HTTPException(404, f"Dataset {dataset_id!r} not found")
stats = ds.stats
analytics = DatasetAnalytics(
dataset_id=dataset_id,
healthScore=stats.health_score,
split=SplitAnalytics(
train=stats.split.train if stats.split.total > 0 else 70,
val=stats.split.val if stats.split.total > 0 else 20,
test=stats.split.test if stats.split.total > 0 else 10
),
qualityIssues=QualityIssues(
missingLabels=stats.missing_labels,
emptyImages=stats.empty_images,
duplicates=stats.duplicate_count,
outliers=int(ds.images * 0.005) # placeholder
),
classDistribution=[
ClassDistributionItem(name=name, count=int(ds.images / ds.classes) if ds.classes > 0 else 0)
for name in ds.class_names[:20]
]
)
return analytics
# ββ List / Search datasets ββββββββββββββββββββββββββββββββββββββββββββββββββββ
@router.get("", response_model=list[DatasetSummary])
async def list_datasets(
task: Optional[str] = Query(None),
format: Optional[str] = Query(None),
source: Optional[str] = Query(None),
status: Optional[str] = Query(None),
search: Optional[str] = Query(None),
starred: Optional[bool] = Query(None),
limit: int = Query(100, ge=1, le=1000),
offset: int = Query(0, ge=0),
):
try:
datasets = await ds_reg.get_all_datasets(
task=task, format=format, source=source,
status=status, search=search, starred=starred,
limit=limit, offset=offset,
)
return [_to_summary(d) for d in datasets]
except Exception as exc:
log.exception("list_datasets_error")
raise HTTPException(status_code=500, detail=str(exc))
# ββ Roboflow Search & Sync ββββββββββββββββββββββββββββββββββββββββββββββββββββ
@router.post("/search/roboflow", response_model=list[DatasetSummary])
async def search_roboflow(req: RoboflowSearchRequest):
"""
Live search Roboflow Universe. Results are cached for 1 hour.
"""
try:
datasets = await RoboflowAdapter.search_datasets(
api_key = req.api_key,
query = req.query,
workspace = req.workspace,
page = req.page,
page_size = req.page_size,
)
except Exception as exc:
log.error("roboflow_search_error", error=str(exc))
raise HTTPException(502, f"Roboflow API error: {exc}")
# Upsert to registry DB
await ds_reg.bulk_upsert_datasets(datasets)
await audit("roboflow_search", {"query": req.query, "count": len(datasets)})
return [_to_summary(d) for d in datasets]
@router.post("/sync/roboflow", response_model=dict)
async def sync_roboflow_workspace(
api_key: str = Query(..., description="Roboflow API key"),
workspace: str = Query(..., description="Workspace slug"),
):
"""Sync all datasets from a Roboflow workspace into the global registry."""
try:
datasets = await RoboflowAdapter.list_workspace_datasets(api_key, workspace)
except Exception as exc:
raise HTTPException(502, f"Roboflow API error: {exc}")
count = await ds_reg.bulk_upsert_datasets(datasets)
return {"synced": count, "workspace": workspace}
# ββ Dataset detail ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
@router.get("/{dataset_id}", response_model=Dataset)
async def get_dataset(dataset_id: str):
ds = await ds_reg.get_dataset(dataset_id)
if not ds:
raise HTTPException(404, f"Dataset {dataset_id!r} not found")
return ds
# ββ Star βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
@router.post("/{dataset_id}/star", response_model=dict)
async def toggle_star(dataset_id: str):
new_val = await ds_reg.toggle_starred(dataset_id)
return {"dataset_id": dataset_id, "starred": new_val}
# ββ Helper ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def _to_summary(d: Dataset) -> DatasetSummary:
health_score = 0.0
try:
if hasattr(d, 'stats') and d.stats:
health_score = getattr(d.stats, 'health_score', 0.0)
except Exception:
pass
return DatasetSummary(
id = d.id,
name = d.name,
task = str(d.task),
format = str(d.format),
source = str(d.source),
status = str(d.status),
images = d.images,
classes = d.classes,
size_label = d.size_label,
tags = d.tags,
starred = d.starred,
import_progress = d.import_progress,
health_score = health_score,
created_at = d.created_at,
updated_at = d.updated_at,
)
|