""" ArcisVLM API — Pydantic request/response schemas. All API contracts are defined here for type safety and auto-generated OpenAPI docs. """ from __future__ import annotations from pydantic import BaseModel, Field from typing import Optional # --------------------------------------------------------------------------- # Inference # --------------------------------------------------------------------------- class QueryRequest(BaseModel): """Submit a visual question to the VLM.""" image_path: Optional[str] = Field(None, description="Local file path to the image") image_base64: Optional[str] = Field(None, description="Base64-encoded image bytes") question: str = Field(..., min_length=1, description="Natural-language question or instruction") task_type: str = Field("vqa", description="Task hint: vqa, detect, alert, caption, count, ocr, reason") max_tokens: int = Field(256, ge=1, le=4096) temperature: float = Field(0.7, ge=0.0, le=2.0) camera_id: Optional[str] = Field(None, description="Camera ID to pull latest frame from") class QueryResponse(BaseModel): """Response from a VLM inference query.""" answer: str confidence: float expert_used: str processing_time_ms: float task_id: str = "" metadata: dict = {} # Multimodal output fields output_type: str = "text" detections: list[dict] = [] counts: dict = {} text_regions: list[dict] = [] alert: dict = {} analysis: dict = {} tracks: list[dict] = [] scene_attributes: dict = {} annotated_frame_base64: Optional[str] = None clip_frames_base64: list[str] = [] class EmbeddingRequest(BaseModel): """Get the raw JEPA embedding for an image (+ optional query).""" image_path: Optional[str] = None image_base64: Optional[str] = None query: Optional[str] = None class EmbeddingResponse(BaseModel): """Raw embedding vector.""" embedding: list[float] dimension: int # --------------------------------------------------------------------------- # Streams (camera management) # --------------------------------------------------------------------------- class StreamStartRequest(BaseModel): """Start ingesting from an RTSP camera.""" camera_id: str = Field(..., min_length=1) rtsp_url: str = Field(..., min_length=1) target_fps: float = Field(2.0, ge=0.1, le=30.0) tasks: list[str] = Field(default=["detect", "alert"], description="Auto-inference task types") class StreamStopRequest(BaseModel): """Stop ingesting from a camera.""" camera_id: str class StreamStatusResponse(BaseModel): """Status of a single camera stream.""" camera_id: str state: str frames_captured: int = 0 frames_dropped: int = 0 actual_fps: float = 0.0 reconnect_count: int = 0 # --------------------------------------------------------------------------- # Alerts # --------------------------------------------------------------------------- class AlertRuleCreate(BaseModel): """Create an alert rule.""" rule_id: str = Field(..., min_length=1) condition_type: str = Field(..., description="presence, absence, count_above, count_below") target_object: str = Field(..., min_length=1) threshold: Optional[int] = None action: str = Field("log", description="webhook, log, escalate") webhook_url: Optional[str] = None class AlertRuleResponse(BaseModel): """An alert rule.""" rule_id: str condition_type: str target_object: str threshold: Optional[int] action: str webhook_url: Optional[str] enabled: bool = True class AlertHistoryItem(BaseModel): """A fired alert event.""" rule_id: str timestamp: float camera_id: str = "" description: str = "" actions_taken: list[str] = [] # --------------------------------------------------------------------------- # Agents # --------------------------------------------------------------------------- class AgentInfo(BaseModel): """Status of a single agent.""" agent_id: str expert_type: str status: str tasks_processed: int avg_latency_ms: float healthy: bool class AgentPoolStatus(BaseModel): """Full agent pool status.""" agents: dict[str, list[AgentInfo]] = {} total_agents: int = 0 metrics: dict = {} # --------------------------------------------------------------------------- # Health # --------------------------------------------------------------------------- class HealthResponse(BaseModel): model: str = "arcisvlm-1.6b" version: str = "1.0.0" status: str = "ok" model_loaded: bool = False agents_ready: bool = False # --------------------------------------------------------------------------- # Metrics # --------------------------------------------------------------------------- class GPUStats(BaseModel): name: str = "" utilization_pct: float = 0.0 memory_used_mb: float = 0.0 memory_total_mb: float = 0.0 temperature_c: float = 0.0 class MetricsResponse(BaseModel): gpu: list[GPUStats] = [] inference_count: int = 0 avg_latency_ms: float = 0.0 p95_latency_ms: float = 0.0 p99_latency_ms: float = 0.0 queries_per_sec: float = 0.0 uptime_seconds: float = 0.0 model_params: int = 0 # --------------------------------------------------------------------------- # HyperMother # --------------------------------------------------------------------------- class AdapterCacheEntry(BaseModel): camera_id: str scene_hash: str rank: int = 16 sigma: float = 0.0 confidence: float = 0.0 age_seconds: float = 0.0 class HyperMotherStatus(BaseModel): enabled: bool = False cache_size: int = 0 cache_max: int = 500 cache_hit_rate: float = 0.0 adapters: list[AdapterCacheEntry] = [] dynamic_route_count: int = 0 static_fallback_count: int = 0 confidence_threshold: float = 0.7 # --------------------------------------------------------------------------- # Dreamer # --------------------------------------------------------------------------- class DreamPrediction(BaseModel): step: int cosine_similarity: float = 0.0 mse: float = 0.0 confidence: float = 0.0 class DreamerStatus(BaseModel): enabled: bool = False total_dreams: int = 0 avg_cosine_sim: float = 0.0 avg_confidence: float = 0.0 recent_predictions: list[DreamPrediction] = [] rl_reward_avg: float = 0.0