mlforge / models /analytics.py
senthil2421
feat: add dataset analytics endpoint and models for high-fidelity CLI access
92faea1
from __future__ import annotations
from typing import List, Optional, Dict, Any
from pydantic import BaseModel, Field
class ClassDistributionItem(BaseModel):
name: str
count: int
color: Optional[str] = None
class SplitAnalytics(BaseModel):
train: float = 0.0
val: float = 0.0
test: float = 0.0
class QualityIssues(BaseModel):
missingLabels: int = 0
emptyImages: int = 0
duplicates: int = 0
outliers: int = 0
class ResolutionItem(BaseModel):
label: str
count: int
class AspectRatioItem(BaseModel):
label: str
count: int
class ObjectDensityItem(BaseModel):
bucket: str
count: int
class DatasetAnalytics(BaseModel):
dataset_id: str
healthScore: float = 0.0
split: SplitAnalytics = Field(default_factory=SplitAnalytics)
qualityIssues: QualityIssues = Field(default_factory=QualityIssues)
classDistribution: List[ClassDistributionItem] = Field(default_factory=list)
resolutionDist: List[ResolutionItem] = Field(default_factory=list)
aspectRatioDist: List[AspectRatioItem] = Field(default_factory=list)
objectsPerImage: List[ObjectDensityItem] = Field(default_factory=list)
metadata: Dict[str, Any] = Field(default_factory=dict)