CausalOps-Env / env /models.py
omm7's picture
Upload folder using huggingface_hub
b2a96ac verified
"""
Typed models for the hardened NovaTech OpenEnv environment.
"""
from __future__ import annotations
from typing import Any, Dict, List, Literal, Optional
from pydantic import BaseModel, Field
ServiceName = Literal[
"auth-service",
"payment-api",
"order-service",
"notification-service",
"reporting-service",
"user-service",
]
ServerName = Literal["server_01", "server_02", "server_03", "server_04"]
LogLevel = Literal["INFO", "WARN", "ERROR", "CRITICAL"]
FailureMode = Literal[
"resource_exhaustion",
"dependency_outage",
"storage_saturation",
"certificate_expiry",
"application_bug",
"traffic_abuse",
]
DependencyName = Literal[
"none",
"auth-service",
"payment-api",
"order-service",
"notification-service",
"reporting-service",
"user-service",
"payment-gateway",
"mysql",
"email-relay",
"ldap-directory",
]
CustomerImpact = Literal[
"login_failures",
"checkout_delays",
"order_write_failures",
"notification_delivery_failure",
"cross_service_major_incident",
]
ContainmentActionName = Literal[
"increase_auth_heap",
"enable_login_rate_limiting",
"restore_payment_gateway_connectivity",
"reduce_checkout_retry_pressure",
"free_order_log_disk",
"reset_mysql_connection_pool",
"renew_smtp_certificate",
"reroute_notification_traffic",
"page_major_incident_team",
"block_all_login_traffic",
"wipe_application_logs",
"restart_everything",
]
class LogEntry(BaseModel):
log_id: int
timestamp: str
server_id: ServerName
log_level: LogLevel
service_name: ServiceName
message: str
response_time_ms: int
cpu_usage_percent: float
memory_usage_percent: float
class IncidentBriefing(BaseModel):
incident_id: str
title: str
objective: str
incident_window_start: str
incident_window_end: str
suspected_services: List[ServiceName]
customer_statement: str
operational_constraints: List[str]
class RootCauseHypothesis(BaseModel):
primary_service: ServiceName
failure_mode: FailureMode
dependency: DependencyName = "none"
customer_impact: CustomerImpact
confidence: float = Field(..., ge=0.0, le=1.0)
class LogQuery(BaseModel):
service_name: Optional[ServiceName] = None
server_id: Optional[ServerName] = None
levels: Optional[List[LogLevel]] = None
start_time: Optional[str] = None
end_time: Optional[str] = None
text_contains: Optional[str] = Field(default=None, max_length=80)
limit: int = Field(default=6, ge=1, le=6)
class IncidentReport(BaseModel):
evidence_log_ids: List[int] = Field(default_factory=list, min_length=1)
impacted_services: List[ServiceName] = Field(default_factory=list, min_length=1)
root_cause: RootCauseHypothesis
containment_plan: List[ContainmentActionName] = Field(default_factory=list)
summary: str = Field(..., min_length=20, max_length=600)
class Action(BaseModel):
session_id: Optional[str] = None
action_type: Literal[
"query_logs",
"inspect_dependencies",
"update_hypothesis",
"execute_containment",
"submit_report",
"request_more",
"no_anomalies",
]
query: Optional[LogQuery] = None
target_service: Optional[ServiceName] = None
hypothesis: Optional[RootCauseHypothesis] = None
containment_plan: Optional[List[ContainmentActionName]] = None
report: Optional[IncidentReport] = None
class Observation(BaseModel):
session_id: str
task_id: str
task_title: str
briefing: IncidentBriefing
dependency_graph: Dict[ServiceName, List[str]]
visible_logs: List[LogEntry]
revealed_log_count: int
visited_services: List[ServiceName]
submitted_containment: List[ContainmentActionName]
last_hypothesis: Optional[RootCauseHypothesis] = None
step_number: int = 0
max_steps: int = 8
feedback: Optional[str] = None
done: bool = False
class Reward(BaseModel):
value: float = Field(..., ge=0.0, le=1.0)
signal_reward: float = Field(default=0.0, ge=0.0, le=1.0)
hypothesis_reward: float = Field(default=0.0, ge=0.0, le=1.0)
efficiency_reward: float = Field(default=0.0, ge=0.0, le=1.0)
penalty: float = Field(default=0.0, ge=0.0, le=1.0)
info: Dict[str, Any] = Field(default_factory=dict)