MarisUK's picture
Maris AI model sync
f440f03 verified
use axum::{extract::State, http::StatusCode, Json};
use serde::{Deserialize, Serialize};
use sqlx::Row;
use uuid::Uuid;
use crate::app_state::AppState;
use super::system::{
build_author, build_platform_manifest, CloudCapabilityManifest, DiagnosticsManifest,
OperatorOsManifest, PlatformContractsManifest, PluginPlatformManifest,
ProcessRuntimeManifest, RoboticsCapabilityManifest, SelfImprovementManifest,
TranslationLayerManifest,
};
const LATENCY_DEGRADED_THRESHOLD_MS: i64 = 1_500;
const LATENCY_WATCH_THRESHOLD_MS: i64 = 900;
const HEALTH_SCORE_DEGRADED: f64 = 42.0;
const HEALTH_SCORE_WATCH: f64 = 68.0;
const HEALTH_SCORE_TOOL_MEMORY_WATCH: f64 = 69.0;
const HEALTH_SCORE_HEALTHY: f64 = 88.0;
const HEALTH_SCORE_HEALTHY_ALT: f64 = 87.0;
#[derive(Serialize, Clone)]
pub struct LeaderboardEntry {
pub branch: String,
pub evaluations: i64,
pub avg_outcome_score: Option<f64>,
pub avg_latency_ms: Option<i64>,
pub failed_evaluations: i64,
pub source: &'static str,
}
#[derive(Serialize, Clone)]
pub struct ImprovementRecommendationRecord {
pub id: String,
pub agent_session_id: String,
pub source_task_id: Option<String>,
pub target: String,
pub title: String,
pub summary: String,
pub confidence_score: f64,
pub created_at: String,
}
#[derive(Serialize, Clone)]
pub struct StatusMetric {
pub status: String,
pub count: i64,
}
#[derive(Serialize, Clone)]
pub struct SelfImprovementConsole {
pub manifest: SelfImprovementManifest,
pub total_evaluations: i64,
pub automated_recommendations: i64,
pub leaderboard: Vec<LeaderboardEntry>,
pub recommendations: Vec<ImprovementRecommendationRecord>,
}
#[derive(Serialize, Clone)]
pub struct ProcessRuntimeConsole {
pub manifest: ProcessRuntimeManifest,
pub active_agent_sessions: i64,
pub running_agent_tasks: i64,
pub tool_calls_24h: i64,
pub avg_tool_latency_ms: Option<i64>,
pub gpu_jobs_by_status: Vec<StatusMetric>,
}
#[derive(Serialize, Clone)]
pub struct PluginPlatformConsole {
pub manifest: PluginPlatformManifest,
pub listed_metadata_fields: usize,
}
#[derive(Serialize, Clone)]
pub struct TranslationLayerConsole {
pub manifest: TranslationLayerManifest,
pub supported_planes: usize,
}
#[derive(Serialize, Clone)]
pub struct HealthSignalStatus {
pub signal: String,
pub status: String,
pub summary: String,
pub score: Option<f64>,
pub source: &'static str,
}
#[derive(Serialize, Clone)]
pub struct DiagnosticTimelineEventRecord {
pub id: String,
pub incident_id: String,
pub event_type: String,
pub summary: String,
pub created_at: String,
}
#[derive(Serialize, Clone)]
pub struct DiagnosticIncidentRecord {
pub id: String,
pub incident_key: String,
pub title: String,
pub status: String,
pub severity: String,
pub affected_scope: String,
pub root_cause_summary: Option<String>,
pub recovery_action: Option<String>,
pub health_score: Option<f64>,
pub opened_at: String,
pub resolved_at: Option<String>,
}
#[derive(Serialize, Clone)]
pub struct GuardrailActionRecord {
pub id: String,
pub incident_id: Option<String>,
pub action_type: String,
pub trigger_signal: String,
pub status: String,
pub scope: String,
pub summary: String,
pub created_at: String,
}
#[derive(Serialize, Clone)]
pub struct DiagnosticsConsole {
pub manifest: DiagnosticsManifest,
pub health_score: f64,
pub open_incidents: i64,
pub active_guardrails: i64,
pub degraded_capabilities: i64,
pub health_signals: Vec<HealthSignalStatus>,
pub incidents: Vec<DiagnosticIncidentRecord>,
pub timeline: Vec<DiagnosticTimelineEventRecord>,
pub guardrail_actions: Vec<GuardrailActionRecord>,
}
#[derive(Serialize, Clone)]
pub struct PlatformFoundationConsole {
pub manifest: PlatformContractsManifest,
pub organizations: i64,
pub teams: i64,
pub workspaces: i64,
pub artifacts: i64,
pub notifications: i64,
}
#[derive(Serialize, Clone)]
pub struct CloudConsole {
pub manifest: CloudCapabilityManifest,
pub usage_plans: i64,
pub billing_accounts: i64,
pub billing_events: i64,
}
#[derive(Serialize, Clone)]
pub struct RoboticsConsole {
pub manifest: RoboticsCapabilityManifest,
pub fleet_assets: i64,
}
#[derive(Serialize, Clone)]
pub struct OperatorOsConsole {
pub manifest: OperatorOsManifest,
pub active_centers: usize,
pub notification_backlog: i64,
}
#[derive(Serialize, Clone)]
pub struct OperationsConsoleResponse {
pub summary: String,
pub generated_at: String,
pub self_improvement: SelfImprovementConsole,
pub process_runtime: ProcessRuntimeConsole,
pub plugin_platform: PluginPlatformConsole,
pub translation_layer: TranslationLayerConsole,
pub diagnostics: DiagnosticsConsole,
pub platform_foundation: PlatformFoundationConsole,
pub cloud: CloudConsole,
pub robotics: RoboticsConsole,
pub operator_os: OperatorOsConsole,
}
#[derive(Debug, Deserialize)]
pub struct SubmitTaskOutcomeRequest {
pub agent_session_id: String,
pub task_id: String,
pub agent_branch: String,
pub evaluator: String,
pub suite_name: String,
pub outcome_score: f64,
pub latency_ms: Option<i32>,
pub token_efficiency_score: Option<f64>,
pub regression_status: Option<String>,
pub feedback: Option<String>,
pub evidence: Option<serde_json::Value>,
pub signals: Option<OutcomeSignals>,
}
#[derive(Debug, Deserialize, Serialize, Clone, Default)]
pub struct OutcomeSignals {
pub policy_violations: Option<u32>,
pub tool_failures: Option<u32>,
pub memory_misses: Option<u32>,
pub prompt_clarity_issues: Option<u32>,
}
#[derive(Debug, Serialize)]
pub struct SubmitTaskOutcomeResponse {
pub evaluation_id: String,
pub outcome: String,
pub recommendation_count: usize,
}
#[derive(Debug, Clone)]
struct GeneratedRecommendation {
target: &'static str,
title: String,
summary: String,
confidence_score: f64,
}
pub async fn operations_console(
State(state): State<AppState>,
) -> Result<Json<OperationsConsoleResponse>, (StatusCode, Json<serde_json::Value>)> {
let manifest = build_platform_manifest(build_author(&state));
let postgres = state.postgres.as_ref();
let total_evaluations = count_rows(
postgres,
"SELECT COUNT(*) AS count FROM agent_task_evaluations",
)
.await?;
let automated_recommendations = count_rows(
postgres,
"SELECT COUNT(*) AS count FROM failed_session_recommendations",
)
.await?;
let active_agent_sessions = count_rows(
postgres,
"SELECT COUNT(*) AS count FROM agent_sessions WHERE status = 'running'",
)
.await?;
let running_agent_tasks = count_rows(
postgres,
"SELECT COUNT(*) AS count FROM agent_tasks WHERE status = 'running'",
)
.await?;
let tool_calls_24h = count_rows(
postgres,
"SELECT COUNT(*) AS count FROM agent_tool_calls WHERE created_at >= NOW() - INTERVAL '24 hours'",
)
.await?;
let avg_tool_latency_ms = optional_avg_latency(
postgres,
"SELECT AVG(latency_ms)::BIGINT AS avg_latency_ms FROM agent_tool_calls WHERE latency_ms IS NOT NULL AND created_at >= NOW() - INTERVAL '24 hours'",
)
.await?;
let organizations = count_rows(postgres, "SELECT COUNT(*) AS count FROM organizations").await?;
let teams = count_rows(postgres, "SELECT COUNT(*) AS count FROM teams").await?;
let workspaces = count_rows(postgres, "SELECT COUNT(*) AS count FROM workspaces").await?;
let artifacts =
count_rows(postgres, "SELECT COUNT(*) AS count FROM workspace_artifacts").await?;
let notifications =
count_rows(postgres, "SELECT COUNT(*) AS count FROM operator_notifications").await?;
let usage_plans = count_rows(postgres, "SELECT COUNT(*) AS count FROM usage_plans").await?;
let billing_accounts =
count_rows(postgres, "SELECT COUNT(*) AS count FROM billing_accounts").await?;
let billing_events =
count_rows(postgres, "SELECT COUNT(*) AS count FROM billing_ledger_events").await?;
let fleet_assets = count_rows(postgres, "SELECT COUNT(*) AS count FROM fleet_assets").await?;
let open_incidents = count_rows(
postgres,
"SELECT COUNT(*) AS count FROM diagnostic_incidents WHERE status <> 'resolved'",
)
.await?;
let active_guardrails = count_rows(
postgres,
"SELECT COUNT(*) AS count FROM diagnostic_guardrail_actions WHERE status IN ('planned', 'triggered')",
)
.await?;
let degraded_capabilities =
count_rows(postgres, "SELECT COUNT(*) AS count FROM diagnostic_incidents WHERE status = 'degraded'")
.await?;
let gpu_jobs_by_status = load_gpu_jobs_by_status(postgres).await?;
let mut leaderboard = load_leaderboard(postgres).await?;
if leaderboard.is_empty() {
leaderboard = manifest
.branches
.iter()
.take(4)
.map(|branch| LeaderboardEntry {
branch: branch.branch.to_string(),
evaluations: 0,
avg_outcome_score: None,
avg_latency_ms: None,
failed_evaluations: 0,
source: "manifest_seed",
})
.collect();
}
let recommendations = load_recommendations(postgres).await?;
let incidents = load_incidents(postgres).await?;
let timeline = load_incident_timeline(postgres).await?;
let guardrail_actions = load_guardrail_actions(postgres).await?;
let health_signals = build_health_signals(
avg_tool_latency_ms,
&leaderboard,
&recommendations,
active_guardrails,
);
let health_score = calculate_health_score(&health_signals);
Ok(Json(OperationsConsoleResponse {
summary:
"Vienota operations console ar diagnostics, platform foundation, evals, runtime telemetry, billing un fleet pārskatu."
.to_string(),
generated_at: chrono::Utc::now().to_rfc3339(),
self_improvement: SelfImprovementConsole {
manifest: manifest.self_improvement.clone(),
total_evaluations,
automated_recommendations,
leaderboard,
recommendations,
},
process_runtime: ProcessRuntimeConsole {
manifest: manifest.process_runtime.clone(),
active_agent_sessions,
running_agent_tasks,
tool_calls_24h,
avg_tool_latency_ms,
gpu_jobs_by_status,
},
plugin_platform: PluginPlatformConsole {
listed_metadata_fields: manifest.plugin_platform.listing_metadata.len(),
manifest: manifest.plugin_platform.clone(),
},
translation_layer: TranslationLayerConsole {
supported_planes: manifest.translation_layer.translation_planes.len(),
manifest: manifest.translation_layer.clone(),
},
diagnostics: DiagnosticsConsole {
manifest: manifest.diagnostics.clone(),
health_score,
open_incidents,
active_guardrails,
degraded_capabilities,
health_signals,
incidents,
timeline,
guardrail_actions,
},
platform_foundation: PlatformFoundationConsole {
manifest: manifest.platform_contracts.clone(),
organizations,
teams,
workspaces,
artifacts,
notifications,
},
cloud: CloudConsole {
manifest: manifest.cloud.clone(),
usage_plans,
billing_accounts,
billing_events,
},
robotics: RoboticsConsole {
manifest: manifest.robotics.clone(),
fleet_assets,
},
operator_os: OperatorOsConsole {
active_centers: manifest.operator_os.centers.len(),
notification_backlog: notifications,
manifest: manifest.operator_os.clone(),
},
}))
}
pub async fn submit_task_outcome(
State(state): State<AppState>,
Json(req): Json<SubmitTaskOutcomeRequest>,
) -> Result<Json<SubmitTaskOutcomeResponse>, (StatusCode, Json<serde_json::Value>)> {
validate_submit_request(&req)?;
let Some(pool) = state.postgres.as_ref() else {
return Err(error(
StatusCode::SERVICE_UNAVAILABLE,
"PostgreSQL nav pieejams eval scoring glabāšanai",
));
};
let agent_session_id = Uuid::parse_str(req.agent_session_id.trim())
.map_err(|_| error(StatusCode::BAD_REQUEST, "Nederīgs agent_session_id"))?;
let task_id = Uuid::parse_str(req.task_id.trim())
.map_err(|_| error(StatusCode::BAD_REQUEST, "Nederīgs task_id"))?;
let outcome = classify_outcome(req.outcome_score);
let regression_status = normalize_agent_task_status(req.regression_status.as_deref())?;
let feedback = req.feedback.clone().unwrap_or_default();
let evidence = req
.evidence
.clone()
.unwrap_or_else(|| serde_json::json!({ "signals": req.signals }));
let evaluation_row = sqlx::query(
r#"
INSERT INTO agent_task_evaluations (
agent_session_id,
task_id,
agent_branch,
evaluator,
suite_name,
outcome,
outcome_score,
latency_ms,
token_efficiency_score,
regression_status,
feedback,
evidence_json
)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10::agent_task_status, $11, $12)
RETURNING id
"#,
)
.bind(agent_session_id)
.bind(task_id)
.bind(req.agent_branch.trim())
.bind(req.evaluator.trim())
.bind(req.suite_name.trim())
.bind(outcome)
.bind(req.outcome_score)
.bind(req.latency_ms)
.bind(req.token_efficiency_score)
.bind(regression_status.as_deref())
.bind(feedback)
.bind(evidence)
.fetch_one(pool)
.await
.map_err(internal_error)?;
let recommendations = build_recommendations(&req);
for recommendation in &recommendations {
sqlx::query(
r#"
INSERT INTO failed_session_recommendations (
agent_session_id,
source_task_id,
target,
title,
summary,
confidence_score,
recommendation_json
)
VALUES ($1, $2, $3, $4, $5, $6, $7)
"#,
)
.bind(agent_session_id)
.bind(task_id)
.bind(recommendation.target)
.bind(&recommendation.title)
.bind(&recommendation.summary)
.bind(recommendation.confidence_score)
.bind(serde_json::json!({
"agent_branch": req.agent_branch.trim(),
"suite_name": req.suite_name.trim(),
"evaluator": req.evaluator.trim(),
"outcome_score": req.outcome_score,
}))
.execute(pool)
.await
.map_err(internal_error)?;
}
Ok(Json(SubmitTaskOutcomeResponse {
evaluation_id: evaluation_row.get::<Uuid, _>("id").to_string(),
outcome: outcome.to_string(),
recommendation_count: recommendations.len(),
}))
}
async fn count_rows(
pool: Option<&sqlx::PgPool>,
query: &str,
) -> Result<i64, (StatusCode, Json<serde_json::Value>)> {
let Some(pool) = pool else {
return Ok(0);
};
let row = sqlx::query(query).fetch_one(pool).await.map_err(internal_error)?;
Ok(row.get::<i64, _>("count"))
}
async fn optional_avg_latency(
pool: Option<&sqlx::PgPool>,
query: &str,
) -> Result<Option<i64>, (StatusCode, Json<serde_json::Value>)> {
let Some(pool) = pool else {
return Ok(None);
};
let row = sqlx::query(query).fetch_one(pool).await.map_err(internal_error)?;
Ok(row.get::<Option<i64>, _>("avg_latency_ms"))
}
async fn load_gpu_jobs_by_status(
pool: Option<&sqlx::PgPool>,
) -> Result<Vec<StatusMetric>, (StatusCode, Json<serde_json::Value>)> {
let Some(pool) = pool else {
return Ok(Vec::new());
};
let rows = sqlx::query(
r#"
SELECT status::text AS status, COUNT(*) AS count
FROM vision_gpu_jobs
GROUP BY status
ORDER BY status
"#,
)
.fetch_all(pool)
.await
.map_err(internal_error)?;
Ok(rows
.into_iter()
.map(|row| StatusMetric {
status: row.get("status"),
count: row.get("count"),
})
.collect())
}
async fn load_leaderboard(
pool: Option<&sqlx::PgPool>,
) -> Result<Vec<LeaderboardEntry>, (StatusCode, Json<serde_json::Value>)> {
let Some(pool) = pool else {
return Ok(Vec::new());
};
let rows = sqlx::query(
r#"
SELECT
agent_branch,
COUNT(*) AS evaluations,
AVG(outcome_score)::DOUBLE PRECISION AS avg_outcome_score,
AVG(latency_ms)::BIGINT AS avg_latency_ms,
COUNT(*) FILTER (WHERE outcome = 'failed') AS failed_evaluations
FROM agent_task_evaluations
GROUP BY agent_branch
ORDER BY avg_outcome_score DESC NULLS LAST, evaluations DESC
LIMIT 10
"#,
)
.fetch_all(pool)
.await
.map_err(internal_error)?;
Ok(rows
.into_iter()
.map(|row| LeaderboardEntry {
branch: row.get("agent_branch"),
evaluations: row.get("evaluations"),
avg_outcome_score: row.get("avg_outcome_score"),
avg_latency_ms: row.get("avg_latency_ms"),
failed_evaluations: row.get("failed_evaluations"),
source: "live",
})
.collect())
}
async fn load_recommendations(
pool: Option<&sqlx::PgPool>,
) -> Result<Vec<ImprovementRecommendationRecord>, (StatusCode, Json<serde_json::Value>)> {
let Some(pool) = pool else {
return Ok(Vec::new());
};
let rows = sqlx::query(
r#"
SELECT id, agent_session_id, source_task_id, target, title, summary, confidence_score, created_at
FROM failed_session_recommendations
ORDER BY created_at DESC, confidence_score DESC
LIMIT 8
"#,
)
.fetch_all(pool)
.await
.map_err(internal_error)?;
Ok(rows
.into_iter()
.map(|row| ImprovementRecommendationRecord {
id: row.get::<Uuid, _>("id").to_string(),
agent_session_id: row.get::<Uuid, _>("agent_session_id").to_string(),
source_task_id: row
.get::<Option<Uuid>, _>("source_task_id")
.map(|value| value.to_string()),
target: row.get("target"),
title: row.get("title"),
summary: row.get("summary"),
confidence_score: row.get("confidence_score"),
created_at: row.get::<chrono::DateTime<chrono::Utc>, _>("created_at").to_rfc3339(),
})
.collect())
}
async fn load_incidents(
pool: Option<&sqlx::PgPool>,
) -> Result<Vec<DiagnosticIncidentRecord>, (StatusCode, Json<serde_json::Value>)> {
let Some(pool) = pool else {
return Ok(vec![DiagnosticIncidentRecord {
id: Uuid::nil().to_string(),
incident_key: "manifest-seed-platform-foundation".to_string(),
title: "Platform foundation seeded for diagnostics center".to_string(),
status: "monitoring".to_string(),
severity: "info".to_string(),
affected_scope: "platform.foundation".to_string(),
root_cause_summary: Some(
"Persistence layer vēl tiek sagatavota pilniem live incidentiem.".to_string(),
),
recovery_action: Some(
"Pieslēgt PostgreSQL un sākt pierakstīt diagnostic_incidents/diagnostic_timeline_events."
.to_string(),
),
health_score: Some(82.0),
opened_at: chrono::Utc::now().to_rfc3339(),
resolved_at: None,
}]);
};
let rows = sqlx::query(
r#"
SELECT id, incident_key, title, status, severity::text AS severity, affected_scope,
root_cause_summary, recovery_action, health_score, opened_at, resolved_at
FROM diagnostic_incidents
ORDER BY opened_at DESC
LIMIT 6
"#,
)
.fetch_all(pool)
.await
.map_err(internal_error)?;
Ok(rows
.into_iter()
.map(|row| DiagnosticIncidentRecord {
id: row.get::<Uuid, _>("id").to_string(),
incident_key: row.get("incident_key"),
title: row.get("title"),
status: row.get("status"),
severity: row.get("severity"),
affected_scope: row.get("affected_scope"),
root_cause_summary: row.get("root_cause_summary"),
recovery_action: row.get("recovery_action"),
health_score: row.get("health_score"),
opened_at: row.get::<chrono::DateTime<chrono::Utc>, _>("opened_at").to_rfc3339(),
resolved_at: row
.get::<Option<chrono::DateTime<chrono::Utc>>, _>("resolved_at")
.map(|value| value.to_rfc3339()),
})
.collect())
}
async fn load_incident_timeline(
pool: Option<&sqlx::PgPool>,
) -> Result<Vec<DiagnosticTimelineEventRecord>, (StatusCode, Json<serde_json::Value>)> {
let Some(pool) = pool else {
return Ok(vec![
DiagnosticTimelineEventRecord {
id: Uuid::nil().to_string(),
incident_id: Uuid::nil().to_string(),
event_type: "health.observed".to_string(),
summary: "Modelētais diagnostics timeline gaida pirmos live incidentus.".to_string(),
created_at: chrono::Utc::now().to_rfc3339(),
},
DiagnosticTimelineEventRecord {
id: Uuid::nil().to_string(),
incident_id: Uuid::nil().to_string(),
event_type: "guardrail.ready".to_string(),
summary: "Fallback, safe-mode un approval escalation guardrails ir manifestēti operatoram."
.to_string(),
created_at: chrono::Utc::now().to_rfc3339(),
},
]);
};
let rows = sqlx::query(
r#"
SELECT id, incident_id, event_type, summary, created_at
FROM diagnostic_timeline_events
ORDER BY created_at DESC
LIMIT 12
"#,
)
.fetch_all(pool)
.await
.map_err(internal_error)?;
Ok(rows
.into_iter()
.map(|row| DiagnosticTimelineEventRecord {
id: row.get::<Uuid, _>("id").to_string(),
incident_id: row.get::<Uuid, _>("incident_id").to_string(),
event_type: row.get("event_type"),
summary: row.get("summary"),
created_at: row.get::<chrono::DateTime<chrono::Utc>, _>("created_at").to_rfc3339(),
})
.collect())
}
async fn load_guardrail_actions(
pool: Option<&sqlx::PgPool>,
) -> Result<Vec<GuardrailActionRecord>, (StatusCode, Json<serde_json::Value>)> {
let Some(pool) = pool else {
return Ok(vec![GuardrailActionRecord {
id: Uuid::nil().to_string(),
incident_id: None,
action_type: "safe_mode".to_string(),
trigger_signal: "platform.foundation".to_string(),
status: "planned".to_string(),
scope: "operator_os".to_string(),
summary: "Pie degradācijas jāspēj automātiski samazināt risku un prasīt operator approval."
.to_string(),
created_at: chrono::Utc::now().to_rfc3339(),
}]);
};
let rows = sqlx::query(
r#"
SELECT id, incident_id, action_type, trigger_signal, status, scope, summary, created_at
FROM diagnostic_guardrail_actions
ORDER BY created_at DESC
LIMIT 8
"#,
)
.fetch_all(pool)
.await
.map_err(internal_error)?;
Ok(rows
.into_iter()
.map(|row| GuardrailActionRecord {
id: row.get::<Uuid, _>("id").to_string(),
incident_id: row
.get::<Option<Uuid>, _>("incident_id")
.map(|value| value.to_string()),
action_type: row.get("action_type"),
trigger_signal: row.get("trigger_signal"),
status: row.get("status"),
scope: row.get("scope"),
summary: row.get("summary"),
created_at: row.get::<chrono::DateTime<chrono::Utc>, _>("created_at").to_rfc3339(),
})
.collect())
}
fn build_health_signals(
avg_tool_latency_ms: Option<i64>,
leaderboard: &[LeaderboardEntry],
recommendations: &[ImprovementRecommendationRecord],
active_guardrails: i64,
) -> Vec<HealthSignalStatus> {
let average_outcome = if leaderboard.is_empty() {
None
} else {
Some(
leaderboard
.iter()
.filter_map(|entry| entry.avg_outcome_score)
.sum::<f64>()
/ leaderboard
.iter()
.filter(|entry| entry.avg_outcome_score.is_some())
.count()
.max(1) as f64,
)
};
let latency_signal = match avg_tool_latency_ms {
Some(latency) if latency > LATENCY_DEGRADED_THRESHOLD_MS => {
("degraded", Some(HEALTH_SCORE_DEGRADED), "Tool latency pārsniedz drošo robežu.")
}
Some(latency) if latency > LATENCY_WATCH_THRESHOLD_MS => (
"watch",
Some(HEALTH_SCORE_WATCH),
"Tool latentums pieaug un vajag throughput limiting.",
),
Some(latency) => (
"healthy",
Some(HEALTH_SCORE_HEALTHY),
if latency > 0 {
"Tool latentums ir stabils."
} else {
"Tool latentuma dati vēl krājas."
},
),
None => ("observing", None, "Trūkst pietiekamu live latentuma datu."),
};
let drift_signal = match average_outcome {
Some(score) if score < 60.0 => ("degraded", Some(score), "Eval leaderboard rāda kritisku kvalitātes kritumu."),
Some(score) if score < 78.0 => ("watch", Some(score), "Redzama branch kvalitātes svārstība."),
Some(score) => ("healthy", Some(score), "Branch kvalitāte ir stabila."),
None => ("observing", None, "Vēl nav pietiekami outcome benchmarki."),
};
let memory_recommendations = recommendations
.iter()
.filter(|item| item.target == "memory")
.count();
let memory_signal = match memory_recommendations {
count if count >= 3 => ("degraded", Some(49.0), "Bieži memory miss ieteikumi liecina par retrieval problēmām."),
1 | 2 => ("watch", Some(71.0), "Ir memory kvalitātes signāli, kurus jāpieskata."),
_ => ("healthy", Some(86.0), "Nav novērots memory miss klasteris."),
};
let tool_signal = match active_guardrails {
count if count >= 3 => ("degraded", Some(44.0), "Daudzi guardrail triggeri norāda uz tool nestabilitāti."),
1 | 2 => (
"watch",
Some(HEALTH_SCORE_TOOL_MEMORY_WATCH),
"Daļa rīku darbojas nestabili, guardrails jau reaģē.",
),
_ => ("healthy", Some(HEALTH_SCORE_HEALTHY_ALT), "Tools šobrīd izskatās stabili."),
};
vec![
HealthSignalStatus {
signal: "model_drift".to_string(),
status: drift_signal.0.to_string(),
summary: drift_signal.2.to_string(),
score: drift_signal.1,
source: if average_outcome.is_some() { "live" } else { "manifest_seed" },
},
HealthSignalStatus {
signal: "latency_spikes".to_string(),
status: latency_signal.0.to_string(),
summary: latency_signal.2.to_string(),
score: latency_signal.1,
source: if avg_tool_latency_ms.is_some() { "live" } else { "manifest_seed" },
},
HealthSignalStatus {
signal: "memory_quality".to_string(),
status: memory_signal.0.to_string(),
summary: memory_signal.2.to_string(),
score: memory_signal.1,
source: if recommendations.is_empty() { "manifest_seed" } else { "live" },
},
HealthSignalStatus {
signal: "tool_instability".to_string(),
status: tool_signal.0.to_string(),
summary: tool_signal.2.to_string(),
score: tool_signal.1,
source: if active_guardrails == 0 { "manifest_seed" } else { "live" },
},
]
}
fn calculate_health_score(signals: &[HealthSignalStatus]) -> f64 {
let scores = signals.iter().filter_map(|signal| signal.score).collect::<Vec<_>>();
if scores.is_empty() {
75.0
} else {
(scores.iter().sum::<f64>() / scores.len() as f64 * 100.0).round() / 100.0
}
}
fn validate_submit_request(
req: &SubmitTaskOutcomeRequest,
) -> Result<(), (StatusCode, Json<serde_json::Value>)> {
if req.agent_branch.trim().is_empty()
|| req.evaluator.trim().is_empty()
|| req.suite_name.trim().is_empty()
{
return Err(error(
StatusCode::BAD_REQUEST,
"agent_branch, evaluator un suite_name ir obligāti",
));
}
if !(0.0..=100.0).contains(&req.outcome_score) {
return Err(error(
StatusCode::BAD_REQUEST,
"outcome_score jābūt diapazonā no 0 līdz 100",
));
}
if let Some(score) = req.token_efficiency_score {
if !(0.0..=100.0).contains(&score) {
return Err(error(
StatusCode::BAD_REQUEST,
"token_efficiency_score jābūt diapazonā no 0 līdz 100",
));
}
}
Ok(())
}
fn classify_outcome(score: f64) -> &'static str {
if score >= 90.0 {
"excellent"
} else if score >= 70.0 {
"passing"
} else if score >= 50.0 {
"needs_review"
} else {
"failed"
}
}
fn normalize_agent_task_status(
status: Option<&str>,
) -> Result<Option<String>, (StatusCode, Json<serde_json::Value>)> {
let Some(status) = status.map(str::trim).filter(|value| !value.is_empty()) else {
return Ok(None);
};
match status {
"queued" | "running" | "completed" | "failed" | "cancelled" => {
Ok(Some(status.to_string()))
}
_ => Err(error(
StatusCode::BAD_REQUEST,
"regression_status jābūt queued, running, completed, failed vai cancelled",
)),
}
}
fn build_recommendations(req: &SubmitTaskOutcomeRequest) -> Vec<GeneratedRecommendation> {
let signals = req.signals.as_ref().cloned().unwrap_or_default();
let mut recommendations = Vec::new();
if req.outcome_score < 70.0 || signals.prompt_clarity_issues.unwrap_or(0) > 0 {
recommendations.push(GeneratedRecommendation {
target: "prompt",
title: "Prompt contract needs tightening".to_string(),
summary:
"Nepieciešams precizēt sistēmas promptu, success criteria and task decomposition instrukcijas."
.to_string(),
confidence_score: 72.0
+ f64::from(signals.prompt_clarity_issues.unwrap_or(0)).min(20.0),
});
}
if signals.policy_violations.unwrap_or(0) > 0 {
recommendations.push(GeneratedRecommendation {
target: "policy",
title: "Policy guardrails should be reinforced".to_string(),
summary:
"Neveiksmīgā sesija norāda uz policy pārkāpuma riskiem; vajag stingrākus approval vai refusal noteikumus."
.to_string(),
confidence_score: 80.0
+ f64::from(signals.policy_violations.unwrap_or(0)).min(15.0),
});
}
if signals.tool_failures.unwrap_or(0) > 0 || req.latency_ms.unwrap_or_default() > 2_000 {
recommendations.push(GeneratedRecommendation {
target: "tool",
title: "Tool runtime path should be optimized".to_string(),
summary:
"Retry vai liels latentums rāda, ka jāoptimizē tool izvēle, job scheduling vai timeout politika."
.to_string(),
confidence_score: if req.latency_ms.unwrap_or_default() > 2_000 {
78.0
} else {
74.0 + f64::from(signals.tool_failures.unwrap_or(0)).min(18.0)
},
});
}
if signals.memory_misses.unwrap_or(0) > 0 {
recommendations.push(GeneratedRecommendation {
target: "memory",
title: "Memory retrieval should be improved".to_string(),
summary:
"Cross-session vai cross-lingual memory retrieval izskatās nepietiekams konkrētajam uzdevumam."
.to_string(),
confidence_score: 76.0
+ f64::from(signals.memory_misses.unwrap_or(0)).min(18.0),
});
}
if recommendations.is_empty() && req.outcome_score < 50.0 {
recommendations.push(GeneratedRecommendation {
target: "prompt",
title: "Fallback improvement path generated".to_string(),
summary:
"Ļoti zems score bez diagnostikas signāliem — sākuma punkts ir prompt un task policy pārskatīšana."
.to_string(),
confidence_score: 68.0,
});
}
recommendations
}
fn internal_error(err: impl std::fmt::Display) -> (StatusCode, Json<serde_json::Value>) {
error(StatusCode::INTERNAL_SERVER_ERROR, &err.to_string())
}
fn error(status: StatusCode, message: &str) -> (StatusCode, Json<serde_json::Value>) {
(status, Json(serde_json::json!({ "error": message })))
}
#[cfg(test)]
mod tests {
use super::{
build_health_signals, build_recommendations, calculate_health_score, classify_outcome,
normalize_agent_task_status, OutcomeSignals, SubmitTaskOutcomeRequest,
};
#[test]
fn classifies_outcomes_with_expected_buckets() {
assert_eq!(classify_outcome(95.0), "excellent");
assert_eq!(classify_outcome(75.0), "passing");
assert_eq!(classify_outcome(55.0), "needs_review");
assert_eq!(classify_outcome(35.0), "failed");
}
#[test]
fn recommendation_builder_maps_failure_signals_to_targets() {
let recommendations = build_recommendations(&SubmitTaskOutcomeRequest {
agent_session_id: uuid::Uuid::nil().to_string(),
task_id: uuid::Uuid::nil().to_string(),
agent_branch: "planner".to_string(),
evaluator: "nightly".to_string(),
suite_name: "task_regression_grid".to_string(),
outcome_score: 42.0,
latency_ms: Some(3_100),
token_efficiency_score: Some(34.0),
regression_status: Some("failed".to_string()),
feedback: None,
evidence: None,
signals: Some(OutcomeSignals {
policy_violations: Some(1),
tool_failures: Some(2),
memory_misses: Some(1),
prompt_clarity_issues: Some(1),
}),
});
let targets = recommendations
.iter()
.map(|item| item.target)
.collect::<Vec<_>>();
assert_eq!(targets, vec!["prompt", "policy", "tool", "memory"]);
}
#[test]
fn regression_status_validation_rejects_unknown_values() {
assert_eq!(
normalize_agent_task_status(Some("completed")).unwrap(),
Some("completed".to_string())
);
assert!(normalize_agent_task_status(Some("oops")).is_err());
}
#[test]
fn health_signals_produce_platform_score() {
let signals = build_health_signals(
Some(1_200),
&[super::LeaderboardEntry {
branch: "planner".to_string(),
evaluations: 4,
avg_outcome_score: Some(74.0),
avg_latency_ms: Some(900),
failed_evaluations: 1,
source: "live",
}],
&[super::ImprovementRecommendationRecord {
id: uuid::Uuid::nil().to_string(),
agent_session_id: uuid::Uuid::nil().to_string(),
source_task_id: None,
target: "memory".to_string(),
title: "Memory drift".to_string(),
summary: "Need retrieval tuning".to_string(),
confidence_score: 82.0,
created_at: "2026-04-04T00:00:00Z".to_string(),
}],
1,
);
assert_eq!(signals.len(), 4);
assert!(calculate_health_score(&signals) > 0.0);
}
}