arcisvlm / api /models.py
Hardik Sanghvi
feat: integrate Gemma 4 E2B backbone for production-quality VLM inference
7a564e3
Raw
History Blame Contribute Delete
6.45 kB
"""
ArcisVLM API — Pydantic request/response schemas.
All API contracts are defined here for type safety and auto-generated OpenAPI docs.
"""
from __future__ import annotations
from pydantic import BaseModel, Field
from typing import Optional
# ---------------------------------------------------------------------------
# Inference
# ---------------------------------------------------------------------------
class QueryRequest(BaseModel):
"""Submit a visual question to the VLM."""
image_path: Optional[str] = Field(None, description="Local file path to the image")
image_base64: Optional[str] = Field(None, description="Base64-encoded image bytes")
question: str = Field(..., min_length=1, description="Natural-language question or instruction")
task_type: str = Field("vqa", description="Task hint: vqa, detect, alert, caption, count, ocr, reason")
max_tokens: int = Field(256, ge=1, le=4096)
temperature: float = Field(0.7, ge=0.0, le=2.0)
camera_id: Optional[str] = Field(None, description="Camera ID to pull latest frame from")
class QueryResponse(BaseModel):
"""Response from a VLM inference query."""
answer: str
confidence: float
expert_used: str
processing_time_ms: float
task_id: str = ""
metadata: dict = {}
# Multimodal output fields
output_type: str = "text"
detections: list[dict] = []
counts: dict = {}
text_regions: list[dict] = []
alert: dict = {}
analysis: dict = {}
tracks: list[dict] = []
scene_attributes: dict = {}
annotated_frame_base64: Optional[str] = None
clip_frames_base64: list[str] = []
class EmbeddingRequest(BaseModel):
"""Get the raw JEPA embedding for an image (+ optional query)."""
image_path: Optional[str] = None
image_base64: Optional[str] = None
query: Optional[str] = None
class EmbeddingResponse(BaseModel):
"""Raw embedding vector."""
embedding: list[float]
dimension: int
# ---------------------------------------------------------------------------
# Streams (camera management)
# ---------------------------------------------------------------------------
class StreamStartRequest(BaseModel):
"""Start ingesting from an RTSP camera."""
camera_id: str = Field(..., min_length=1)
rtsp_url: str = Field(..., min_length=1)
target_fps: float = Field(2.0, ge=0.1, le=30.0)
tasks: list[str] = Field(default=["detect", "alert"], description="Auto-inference task types")
class StreamStopRequest(BaseModel):
"""Stop ingesting from a camera."""
camera_id: str
class StreamStatusResponse(BaseModel):
"""Status of a single camera stream."""
camera_id: str
state: str
frames_captured: int = 0
frames_dropped: int = 0
actual_fps: float = 0.0
reconnect_count: int = 0
# ---------------------------------------------------------------------------
# Alerts
# ---------------------------------------------------------------------------
class AlertRuleCreate(BaseModel):
"""Create an alert rule."""
rule_id: str = Field(..., min_length=1)
condition_type: str = Field(..., description="presence, absence, count_above, count_below")
target_object: str = Field(..., min_length=1)
threshold: Optional[int] = None
action: str = Field("log", description="webhook, log, escalate")
webhook_url: Optional[str] = None
class AlertRuleResponse(BaseModel):
"""An alert rule."""
rule_id: str
condition_type: str
target_object: str
threshold: Optional[int]
action: str
webhook_url: Optional[str]
enabled: bool = True
class AlertHistoryItem(BaseModel):
"""A fired alert event."""
rule_id: str
timestamp: float
camera_id: str = ""
description: str = ""
actions_taken: list[str] = []
# ---------------------------------------------------------------------------
# Agents
# ---------------------------------------------------------------------------
class AgentInfo(BaseModel):
"""Status of a single agent."""
agent_id: str
expert_type: str
status: str
tasks_processed: int
avg_latency_ms: float
healthy: bool
class AgentPoolStatus(BaseModel):
"""Full agent pool status."""
agents: dict[str, list[AgentInfo]] = {}
total_agents: int = 0
metrics: dict = {}
# ---------------------------------------------------------------------------
# Health
# ---------------------------------------------------------------------------
class HealthResponse(BaseModel):
model: str = "arcisvlm-1.6b"
version: str = "1.0.0"
status: str = "ok"
model_loaded: bool = False
agents_ready: bool = False
# ---------------------------------------------------------------------------
# Metrics
# ---------------------------------------------------------------------------
class GPUStats(BaseModel):
name: str = ""
utilization_pct: float = 0.0
memory_used_mb: float = 0.0
memory_total_mb: float = 0.0
temperature_c: float = 0.0
class MetricsResponse(BaseModel):
gpu: list[GPUStats] = []
inference_count: int = 0
avg_latency_ms: float = 0.0
p95_latency_ms: float = 0.0
p99_latency_ms: float = 0.0
queries_per_sec: float = 0.0
uptime_seconds: float = 0.0
model_params: int = 0
# ---------------------------------------------------------------------------
# HyperMother
# ---------------------------------------------------------------------------
class AdapterCacheEntry(BaseModel):
camera_id: str
scene_hash: str
rank: int = 16
sigma: float = 0.0
confidence: float = 0.0
age_seconds: float = 0.0
class HyperMotherStatus(BaseModel):
enabled: bool = False
cache_size: int = 0
cache_max: int = 500
cache_hit_rate: float = 0.0
adapters: list[AdapterCacheEntry] = []
dynamic_route_count: int = 0
static_fallback_count: int = 0
confidence_threshold: float = 0.7
# ---------------------------------------------------------------------------
# Dreamer
# ---------------------------------------------------------------------------
class DreamPrediction(BaseModel):
step: int
cosine_similarity: float = 0.0
mse: float = 0.0
confidence: float = 0.0
class DreamerStatus(BaseModel):
enabled: bool = False
total_dreams: int = 0
avg_cosine_sim: float = 0.0
avg_confidence: float = 0.0
recent_predictions: list[DreamPrediction] = []
rl_reward_avg: float = 0.0