Hardik Sanghvi

feat: integrate Gemma 4 E2B backbone for production-quality VLM inference

7a564e3 3 months ago

6.45 kB

	"""
	ArcisVLM API — Pydantic request/response schemas.

	All API contracts are defined here for type safety and auto-generated OpenAPI docs.
	"""

	from __future__ import annotations

	from pydantic import BaseModel, Field
	from typing import Optional


	# ---------------------------------------------------------------------------
	# Inference
	# ---------------------------------------------------------------------------
	class QueryRequest(BaseModel):
	"""Submit a visual question to the VLM."""
	image_path: Optional[str] = Field(None, description="Local file path to the image")
	image_base64: Optional[str] = Field(None, description="Base64-encoded image bytes")
	question: str = Field(..., min_length=1, description="Natural-language question or instruction")
	task_type: str = Field("vqa", description="Task hint: vqa, detect, alert, caption, count, ocr, reason")
	max_tokens: int = Field(256, ge=1, le=4096)
	temperature: float = Field(0.7, ge=0.0, le=2.0)
	camera_id: Optional[str] = Field(None, description="Camera ID to pull latest frame from")


	class QueryResponse(BaseModel):
	"""Response from a VLM inference query."""
	answer: str
	confidence: float
	expert_used: str
	processing_time_ms: float
	task_id: str = ""
	metadata: dict = {}
	# Multimodal output fields
	output_type: str = "text"
	detections: list[dict] = []
	counts: dict = {}
	text_regions: list[dict] = []
	alert: dict = {}
	analysis: dict = {}
	tracks: list[dict] = []
	scene_attributes: dict = {}
	annotated_frame_base64: Optional[str] = None
	clip_frames_base64: list[str] = []


	class EmbeddingRequest(BaseModel):
	"""Get the raw JEPA embedding for an image (+ optional query)."""
	image_path: Optional[str] = None
	image_base64: Optional[str] = None
	query: Optional[str] = None


	class EmbeddingResponse(BaseModel):
	"""Raw embedding vector."""
	embedding: list[float]
	dimension: int


	# ---------------------------------------------------------------------------
	# Streams (camera management)
	# ---------------------------------------------------------------------------
	class StreamStartRequest(BaseModel):
	"""Start ingesting from an RTSP camera."""
	camera_id: str = Field(..., min_length=1)
	rtsp_url: str = Field(..., min_length=1)
	target_fps: float = Field(2.0, ge=0.1, le=30.0)
	tasks: list[str] = Field(default=["detect", "alert"], description="Auto-inference task types")


	class StreamStopRequest(BaseModel):
	"""Stop ingesting from a camera."""
	camera_id: str


	class StreamStatusResponse(BaseModel):
	"""Status of a single camera stream."""
	camera_id: str
	state: str
	frames_captured: int = 0
	frames_dropped: int = 0
	actual_fps: float = 0.0
	reconnect_count: int = 0


	# ---------------------------------------------------------------------------
	# Alerts
	# ---------------------------------------------------------------------------
	class AlertRuleCreate(BaseModel):
	"""Create an alert rule."""
	rule_id: str = Field(..., min_length=1)
	condition_type: str = Field(..., description="presence, absence, count_above, count_below")
	target_object: str = Field(..., min_length=1)
	threshold: Optional[int] = None
	action: str = Field("log", description="webhook, log, escalate")
	webhook_url: Optional[str] = None


	class AlertRuleResponse(BaseModel):
	"""An alert rule."""
	rule_id: str
	condition_type: str
	target_object: str
	threshold: Optional[int]
	action: str
	webhook_url: Optional[str]
	enabled: bool = True


	class AlertHistoryItem(BaseModel):
	"""A fired alert event."""
	rule_id: str
	timestamp: float
	camera_id: str = ""
	description: str = ""
	actions_taken: list[str] = []


	# ---------------------------------------------------------------------------
	# Agents
	# ---------------------------------------------------------------------------
	class AgentInfo(BaseModel):
	"""Status of a single agent."""
	agent_id: str
	expert_type: str
	status: str
	tasks_processed: int
	avg_latency_ms: float
	healthy: bool


	class AgentPoolStatus(BaseModel):
	"""Full agent pool status."""
	agents: dict[str, list[AgentInfo]] = {}
	total_agents: int = 0
	metrics: dict = {}


	# ---------------------------------------------------------------------------
	# Health
	# ---------------------------------------------------------------------------
	class HealthResponse(BaseModel):
	model: str = "arcisvlm-1.6b"
	version: str = "1.0.0"
	status: str = "ok"
	model_loaded: bool = False
	agents_ready: bool = False


	# ---------------------------------------------------------------------------
	# Metrics
	# ---------------------------------------------------------------------------
	class GPUStats(BaseModel):
	name: str = ""
	utilization_pct: float = 0.0
	memory_used_mb: float = 0.0
	memory_total_mb: float = 0.0
	temperature_c: float = 0.0


	class MetricsResponse(BaseModel):
	gpu: list[GPUStats] = []
	inference_count: int = 0
	avg_latency_ms: float = 0.0
	p95_latency_ms: float = 0.0
	p99_latency_ms: float = 0.0
	queries_per_sec: float = 0.0
	uptime_seconds: float = 0.0
	model_params: int = 0


	# ---------------------------------------------------------------------------
	# HyperMother
	# ---------------------------------------------------------------------------
	class AdapterCacheEntry(BaseModel):
	camera_id: str
	scene_hash: str
	rank: int = 16
	sigma: float = 0.0
	confidence: float = 0.0
	age_seconds: float = 0.0


	class HyperMotherStatus(BaseModel):
	enabled: bool = False
	cache_size: int = 0
	cache_max: int = 500
	cache_hit_rate: float = 0.0
	adapters: list[AdapterCacheEntry] = []
	dynamic_route_count: int = 0
	static_fallback_count: int = 0
	confidence_threshold: float = 0.7


	# ---------------------------------------------------------------------------
	# Dreamer
	# ---------------------------------------------------------------------------
	class DreamPrediction(BaseModel):
	step: int
	cosine_similarity: float = 0.0
	mse: float = 0.0
	confidence: float = 0.0


	class DreamerStatus(BaseModel):
	enabled: bool = False
	total_dreams: int = 0
	avg_cosine_sim: float = 0.0
	avg_confidence: float = 0.0
	recent_predictions: list[DreamPrediction] = []
	rl_reward_avg: float = 0.0