File size: 3,070 Bytes
f60a6c1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
from dataclasses import dataclass, fields, asdict, field
from typing import Any
from numpy import ndarray
from scorevision.chute_template.schemas import SVFrameResult
from scorevision.chute_template.schemas import TVPredictInput
from scorevision.vlm_pipeline.domain_specific_schemas.challenge_types import (
ChallengeType,
)
@dataclass
class Evaluation:
@property
def average(self) -> float:
values = [float(getattr(self, f.name)) for f in fields(self)]
return sum(values) / len(values) if values else 0.0
def __float__(self) -> float:
return self.average
def to_dict(self) -> dict:
return asdict(self)
@dataclass
class KeypointsScore(Evaluation):
floor_markings_alignment: float = (
0.0 # How correct are the keypoint detections based on the alignment of the transformed floor lines?
)
@dataclass
class ActionScore(Evaluation):
categorisation: float = (
0.0 # How correct are the action labels for the scene compared to the Pseudo GT annotations?
)
@dataclass
class ObjectsScore(Evaluation):
bbox_placement: float = (
0.0 # How correct are the objects compared with the PseudoGT annotations (IoU)?
)
categorisation: float = (
0.0 # How correctly are the objects categorised compared with the PseudoGT annotations (i.e. player, ball)?
)
team: float = (
0.0 # How correctly are the teams categorised compared with the PseudoGT annotations?
)
enumeration: float = (
0.0 # How correct are the number of objects detected compared with the PseudoGT annotations?
)
tracking_stability: float = (
0.0 # How stable/smooth are these object detections across the video
)
@dataclass
class LatencyScore(Evaluation):
inference: float = (
0.0 # How quickly does the miner take to produce predictions for the video (1/2**t)
)
@dataclass
class TotalScore(Evaluation):
action: ActionScore = field(default_factory=ActionScore)
keypoints: KeypointsScore = field(default_factory=KeypointsScore)
objects: ObjectsScore = field(default_factory=ObjectsScore)
latency: LatencyScore = field(default_factory=LatencyScore)
@dataclass
class SVChallenge:
env: str
payload: TVPredictInput
meta: dict[str, Any]
prompt: str
challenge_id: str
frame_numbers: list[int]
frames: list[ndarray]
dense_optical_flow_frames: list[ndarray]
api_task_id: str | int | None = None
challenge_type: ChallengeType | None = None
@dataclass
class SVRunOutput:
success: bool
latency_ms: float
predictions: dict[str, list[SVFrameResult]] | None
error: str | None
model: str | None = None
@dataclass
class SVPredictResult:
success: bool
model: str | None
latency_seconds: float
predictions: dict[str, Any] | None
error: str | None
raw: dict[str, Any] | None = None
@dataclass
class SVEvaluation:
acc_breakdown: dict[str, float]
acc: float
latency_ms: float
score: float
details: dict[str, Any]
|