Falgunisharma's picture
Improve environment depth: investigate action, cascading deps, rich observations, harder hard task
5f6895d
"""FastAPI server for the Feature Flag Cleanup OpenEnv environment."""
import sys
import os
# Add project root to path so env module can be imported
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from typing import Any, Dict, Optional
from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import JSONResponse, HTMLResponse
from pydantic import BaseModel
from env.environment import FeatureFlagCleanupEnv
from env.models import FlagAction, FlagObservation, FlagState
app = FastAPI(
title="Feature Flag Cleanup Agent",
description="OpenEnv environment where AI agents learn to triage and clean up stale feature flags",
version="0.1.0",
)
# Global environment instance
_env = FeatureFlagCleanupEnv()
# --- Root endpoint ---
@app.get("/", response_class=HTMLResponse)
def root():
return """<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Feature Flag Cleanup Agent</title>
<style>
* { margin: 0; padding: 0; box-sizing: border-box; }
body { font-family: 'Segoe UI', system-ui, -apple-system, sans-serif; background: #0f172a; color: #e2e8f0; min-height: 100vh; }
.container { max-width: 900px; margin: 0 auto; padding: 20px; }
h1 { text-align: center; font-size: 1.8rem; margin: 20px 0 5px; color: #f8fafc; }
.subtitle { text-align: center; color: #94a3b8; margin-bottom: 25px; font-size: 0.95rem; }
/* Task selector */
.task-bar { display: flex; gap: 10px; justify-content: center; margin-bottom: 20px; }
.task-btn { padding: 10px 28px; border: 2px solid #334155; border-radius: 10px; background: #1e293b; color: #cbd5e1; cursor: pointer; font-size: 0.95rem; font-weight: 600; transition: all 0.2s; }
.task-btn:hover { border-color: #60a5fa; color: #60a5fa; }
.task-btn.active { border-color: #3b82f6; background: #1e3a5f; color: #60a5fa; }
.task-btn .diff { display: block; font-size: 0.7rem; font-weight: 400; color: #64748b; margin-top: 2px; }
/* Score bar */
.score-bar { display: flex; gap: 15px; justify-content: center; margin-bottom: 20px; flex-wrap: wrap; }
.score-item { background: #1e293b; border: 1px solid #334155; border-radius: 10px; padding: 12px 20px; text-align: center; min-width: 120px; }
.score-item .label { font-size: 0.75rem; color: #64748b; text-transform: uppercase; letter-spacing: 0.05em; }
.score-item .value { font-size: 1.4rem; font-weight: 700; color: #f8fafc; margin-top: 2px; }
/* Flag card */
.flag-card { background: #1e293b; border: 1px solid #334155; border-radius: 14px; padding: 24px; margin-bottom: 20px; }
.flag-header { display: flex; justify-content: space-between; align-items: start; margin-bottom: 16px; }
.flag-name { font-size: 1.3rem; font-weight: 700; color: #f8fafc; font-family: 'Courier New', monospace; }
.flag-desc { color: #94a3b8; font-size: 0.9rem; margin-top: 4px; }
.badges { display: flex; gap: 6px; flex-wrap: wrap; }
.badge { padding: 3px 10px; border-radius: 20px; font-size: 0.72rem; font-weight: 600; text-transform: uppercase; letter-spacing: 0.03em; }
.badge-danger { background: #450a0a; color: #fca5a5; border: 1px solid #7f1d1d; }
.badge-warning { background: #451a03; color: #fcd34d; border: 1px solid #78350f; }
.badge-success { background: #052e16; color: #86efac; border: 1px solid #14532d; }
.badge-info { background: #0c1929; color: #93c5fd; border: 1px solid #1e3a5f; }
.flag-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 10px 30px; margin-top: 14px; }
.flag-attr { display: flex; justify-content: space-between; padding: 6px 0; border-bottom: 1px solid #1a2332; }
.flag-attr .key { color: #64748b; font-size: 0.85rem; }
.flag-attr .val { color: #e2e8f0; font-size: 0.85rem; font-weight: 600; }
/* Rollout bar */
.rollout-bar { margin-top: 14px; }
.rollout-track { height: 8px; background: #334155; border-radius: 4px; overflow: hidden; }
.rollout-fill { height: 100%; border-radius: 4px; transition: width 0.5s ease; }
.rollout-label { display: flex; justify-content: space-between; margin-top: 4px; font-size: 0.75rem; color: #64748b; }
/* Action buttons */
.actions { display: flex; gap: 10px; justify-content: center; margin-bottom: 20px; flex-wrap: wrap; }
.action-btn { padding: 14px 30px; border: 2px solid; border-radius: 12px; font-size: 1rem; font-weight: 700; cursor: pointer; transition: all 0.2s; text-transform: uppercase; letter-spacing: 0.05em; min-width: 140px; }
.action-btn:hover { transform: translateY(-2px); box-shadow: 0 4px 15px rgba(0,0,0,0.3); }
.action-btn:disabled { opacity: 0.3; cursor: not-allowed; transform: none; }
.btn-remove { background: #1c0a0a; border-color: #dc2626; color: #fca5a5; }
.btn-remove:hover:not(:disabled) { background: #dc2626; color: #fff; }
.btn-keep { background: #0a1c0a; border-color: #16a34a; color: #86efac; }
.btn-keep:hover:not(:disabled) { background: #16a34a; color: #fff; }
.btn-deprecate { background: #1c1a0a; border-color: #ca8a04; color: #fde047; }
.btn-deprecate:hover:not(:disabled) { background: #ca8a04; color: #fff; }
.btn-escalate { background: #0a0a1c; border-color: #7c3aed; color: #c4b5fd; }
.btn-escalate:hover:not(:disabled) { background: #7c3aed; color: #fff; }
.btn-investigate { background: #0a1a1c; border-color: #0891b2; color: #67e8f9; }
.btn-investigate:hover:not(:disabled) { background: #0891b2; color: #fff; }
.code-block { background: #0f172a; border: 1px solid #1e293b; border-radius: 8px; padding: 10px 14px; margin-top: 10px; font-family: 'Courier New', monospace; font-size: 0.78rem; color: #94a3b8; white-space: pre-wrap; overflow-x: auto; max-height: 120px; }
.context-block { background: #1a1a2e; border: 1px solid #2d2d44; border-radius: 8px; padding: 10px 14px; margin-top: 8px; font-size: 0.82rem; color: #a5b4fc; }
.context-label { font-weight: 700; color: #64748b; font-size: 0.75rem; text-transform: uppercase; margin-bottom: 4px; }
.cascade-warn { background: #451a03; border: 1px solid #78350f; border-radius: 8px; padding: 10px 14px; margin-top: 8px; color: #fcd34d; font-weight: 600; font-size: 0.85rem; }
.investigation-notes { background: #052e16; border: 1px solid #14532d; border-radius: 8px; padding: 10px 14px; margin-top: 8px; color: #86efac; font-size: 0.85rem; }
/* Feedback toast */
.feedback { text-align: center; padding: 16px; border-radius: 10px; margin-bottom: 16px; font-size: 1rem; font-weight: 600; display: none; animation: fadeIn 0.3s; }
.feedback.show { display: block; }
.feedback.good { background: #052e16; border: 1px solid #14532d; color: #86efac; }
.feedback.bad { background: #450a0a; border: 1px solid #7f1d1d; color: #fca5a5; }
.feedback.neutral { background: #1e293b; border: 1px solid #334155; color: #fcd34d; }
/* History */
.history { background: #1e293b; border: 1px solid #334155; border-radius: 14px; padding: 18px; }
.history h3 { font-size: 0.9rem; color: #64748b; text-transform: uppercase; letter-spacing: 0.05em; margin-bottom: 10px; }
.history-item { display: flex; justify-content: space-between; align-items: center; padding: 8px 12px; border-radius: 8px; margin-bottom: 4px; font-size: 0.85rem; }
.history-item.correct { background: #052e1640; }
.history-item.wrong { background: #450a0a40; }
.history-item .flag-n { color: #94a3b8; font-family: monospace; flex: 1; }
.history-item .act { padding: 2px 8px; border-radius: 4px; font-weight: 600; font-size: 0.75rem; margin: 0 8px; }
.history-item .rew { font-weight: 700; min-width: 50px; text-align: right; }
/* Final screen */
.final-screen { text-align: center; padding: 40px; }
.final-score { font-size: 4rem; font-weight: 800; }
.final-label { font-size: 1.1rem; color: #94a3b8; margin-bottom: 20px; }
.play-again { padding: 14px 40px; border: 2px solid #3b82f6; border-radius: 12px; background: #1e3a5f; color: #60a5fa; font-size: 1rem; font-weight: 700; cursor: pointer; }
.play-again:hover { background: #3b82f6; color: #fff; }
/* Start screen */
.start-screen { text-align: center; padding: 60px 20px; }
.start-screen p { color: #94a3b8; margin: 15px 0 30px; max-width: 500px; margin-left: auto; margin-right: auto; line-height: 1.6; }
@keyframes fadeIn { from { opacity: 0; transform: translateY(-5px); } to { opacity: 1; transform: translateY(0); } }
.hidden { display: none !important; }
</style>
</head>
<body>
<div class="container">
<h1>Feature Flag Cleanup Agent</h1>
<p class="subtitle">Decide the fate of each feature flag: remove, keep, deprecate, or escalate</p>
<!-- Task selector -->
<div class="task-bar">
<button class="task-btn" onclick="startTask('easy')">Easy<span class="diff">8 obvious flags</span></button>
<button class="task-btn" onclick="startTask('medium')">Medium<span class="diff">8 ambiguous flags</span></button>
<button class="task-btn" onclick="startTask('hard')">Hard<span class="diff">10 complex flags</span></button>
</div>
<!-- Score bar -->
<div class="score-bar">
<div class="score-item"><div class="label">Step</div><div class="value" id="step-num">-</div></div>
<div class="score-item"><div class="label">Reward</div><div class="value" id="cum-reward">-</div></div>
<div class="score-item"><div class="label">Remaining</div><div class="value" id="remaining">-</div></div>
</div>
<!-- Start screen -->
<div id="start-screen" class="start-screen">
<p>You're a senior engineer at a large tech company. Feature flags have piled up. For each flag, decide: should it be <strong>removed</strong>, <strong>kept</strong>, <strong>deprecated</strong>, or <strong>escalated</strong> for human review? You can also <strong>investigate</strong> to reveal hidden notes before deciding (costs -0.05).</p>
<p style="color:#64748b; font-size:0.85rem;">Pick a difficulty above to begin.</p>
</div>
<!-- Feedback toast -->
<div id="feedback" class="feedback"></div>
<!-- Flag card -->
<div id="flag-card" class="flag-card hidden">
<div class="flag-header">
<div>
<div class="flag-name" id="flag-name"></div>
<div class="flag-desc" id="flag-desc"></div>
</div>
<div class="badges" id="badges"></div>
</div>
<div class="rollout-bar">
<div class="rollout-track"><div class="rollout-fill" id="rollout-fill"></div></div>
<div class="rollout-label"><span>Rollout</span><span id="rollout-pct"></span></div>
</div>
<div class="flag-grid" id="flag-grid"></div>
</div>
<!-- Action buttons -->
<div class="actions" id="action-btns">
<button class="action-btn btn-remove hidden" onclick="doAction('remove')">Remove</button>
<button class="action-btn btn-keep hidden" onclick="doAction('keep')">Keep</button>
<button class="action-btn btn-deprecate hidden" onclick="doAction('deprecate')">Deprecate</button>
<button class="action-btn btn-escalate hidden" onclick="doAction('escalate')">Escalate</button>
<button class="action-btn btn-investigate hidden" onclick="doAction('investigate')">Investigate</button>
</div>
<!-- Final screen -->
<div id="final-screen" class="final-screen hidden"></div>
<!-- History -->
<div id="history" class="history hidden">
<h3>Action History</h3>
<div id="history-list"></div>
</div>
</div>
<script>
const API = window.location.origin;
let currentObs = null;
let stepCount = 0;
let cumReward = 0;
let historyItems = [];
async function startTask(taskId) {
document.querySelectorAll('.task-btn').forEach(b => b.classList.remove('active'));
event.target.closest('.task-btn').classList.add('active');
const res = await fetch(API + '/reset', {
method: 'POST', headers: {'Content-Type': 'application/json'},
body: JSON.stringify({task_id: taskId})
});
const data = await res.json();
currentObs = data.observation;
stepCount = 0; cumReward = 0; historyItems = [];
document.getElementById('start-screen').classList.add('hidden');
document.getElementById('final-screen').classList.add('hidden');
document.getElementById('flag-card').classList.remove('hidden');
document.querySelectorAll('.action-btn').forEach(b => b.classList.remove('hidden'));
document.getElementById('history').classList.remove('hidden');
document.getElementById('history-list').innerHTML = '';
hideFeedback();
renderFlag(currentObs);
updateScoreBar();
}
function renderFlag(obs) {
document.getElementById('flag-name').textContent = obs.flag_name;
document.getElementById('flag-desc').textContent = obs.description;
// Rollout bar
const pct = Math.round(obs.rollout_percentage * 100);
const fill = document.getElementById('rollout-fill');
fill.style.width = pct + '%';
fill.style.background = pct === 100 ? '#16a34a' : pct >= 50 ? '#ca8a04' : '#3b82f6';
document.getElementById('rollout-pct').textContent = pct + '%';
// Badges
const badges = document.getElementById('badges');
badges.innerHTML = '';
if (obs.is_kill_switch) badges.innerHTML += '<span class="badge badge-danger">Kill Switch</span>';
if (obs.has_active_incident) badges.innerHTML += '<span class="badge badge-danger">Active Incident</span>';
if (obs.in_active_experiment) badges.innerHTML += '<span class="badge badge-warning">A/B Test</span>';
if (obs.has_dependencies) badges.innerHTML += '<span class="badge badge-warning">Has Deps</span>';
if (!obs.owner_active) badges.innerHTML += '<span class="badge badge-info">Owner Left</span>';
// Grid
const grid = document.getElementById('flag-grid');
let gridHtml = [
['Age', obs.age_days + ' days'],
['Last Modified', obs.last_modified_days + ' days ago'],
['Owner', obs.owner + (obs.owner_active ? '' : ' (inactive)')],
['Code References', obs.num_code_references],
['Usage (30d)', obs.usage_last_30d.toLocaleString()],
['Services', obs.services.join(', ')],
['Dependencies', obs.dependent_flags.length ? obs.dependent_flags.join(', ') : 'None'],
['Flags Remaining', obs.flags_remaining],
].map(([k, v]) => '<div class="flag-attr"><span class="key">' + k + '</span><span class="val">' + v + '</span></div>').join('');
// Rich context
if (obs.related_incidents && obs.related_incidents.length > 0) {
gridHtml += '<div class="flag-attr" style="grid-column:1/-1"><span class="key">Related Incidents</span><span class="val" style="color:#fca5a5">' + obs.related_incidents.join('; ') + '</span></div>';
}
grid.innerHTML = gridHtml;
// Code snippet
let extraHtml = '';
if (obs.code_snippet) {
extraHtml += '<div class="context-label">Code Snippet</div><div class="code-block">' + obs.code_snippet.replace(/</g,'&lt;') + '</div>';
}
if (obs.pr_context) {
extraHtml += '<div class="context-label" style="margin-top:10px">PR Context</div><div class="context-block">' + obs.pr_context + '</div>';
}
if (obs.last_commit_message) {
extraHtml += '<div class="context-label" style="margin-top:10px">Last Commit</div><div class="context-block">' + obs.last_commit_message + '</div>';
}
if (obs.cascade_warning) {
extraHtml += '<div class="cascade-warn">' + obs.cascade_warning + '</div>';
}
if (obs.investigation_notes) {
extraHtml += '<div class="context-label" style="margin-top:10px">Investigation Notes</div><div class="investigation-notes">' + obs.investigation_notes + '</div>';
}
// Add extra html after grid
let extraDiv = document.getElementById('extra-context');
if (!extraDiv) { extraDiv = document.createElement('div'); extraDiv.id = 'extra-context'; document.getElementById('flag-card').appendChild(extraDiv); }
extraDiv.innerHTML = extraHtml;
}
async function doAction(action) {
document.querySelectorAll('.action-btn').forEach(b => b.disabled = true);
const res = await fetch(API + '/step', {
method: 'POST', headers: {'Content-Type': 'application/json'},
body: JSON.stringify({action: {action: action}})
});
const data = await res.json();
const reward = data.reward;
const info = data.info;
stepCount++;
cumReward += reward;
// Show feedback
showFeedback(reward, action, info.correct_action, info.flag_name);
// Add to history
addHistory(info.flag_name, action, info.correct_action, reward);
updateScoreBar();
if (data.done) {
const gradeRes = await fetch(API + '/grade', {method: 'POST'});
const gradeData = await gradeRes.json();
showFinal(gradeData.score, gradeData.task_id);
} else {
currentObs = data.observation;
const delay = action === 'investigate' ? 200 : 800;
setTimeout(() => {
renderFlag(currentObs);
document.querySelectorAll('.action-btn').forEach(b => b.disabled = false);
}, delay);
}
}
function showFeedback(reward, action, correct, flagName) {
const fb = document.getElementById('feedback');
const isCorrect = action === correct;
const cls = reward >= 0.7 ? 'good' : reward >= 0 ? 'neutral' : 'bad';
const emoji = reward >= 0.7 ? 'Correct!' : reward >= 0 ? 'Acceptable' : 'Wrong!';
fb.className = 'feedback show ' + cls;
fb.innerHTML = '<strong>' + emoji + '</strong> You chose <strong>' + action + '</strong>' +
(isCorrect ? '' : ' &mdash; best action was <strong>' + correct + '</strong>') +
' &nbsp; <strong>' + (reward >= 0 ? '+' : '') + reward.toFixed(1) + '</strong>';
}
function hideFeedback() {
document.getElementById('feedback').className = 'feedback';
}
function addHistory(flagName, action, correct, reward) {
const list = document.getElementById('history-list');
const isCorrect = action === correct;
const colors = {remove:'#fca5a5', keep:'#86efac', deprecate:'#fde047', escalate:'#c4b5fd'};
const item = document.createElement('div');
item.className = 'history-item ' + (isCorrect ? 'correct' : 'wrong');
item.innerHTML = '<span class="flag-n">' + flagName + '</span>' +
'<span class="act" style="color:' + (colors[action]||'#fff') + '">' + action + '</span>' +
'<span class="rew" style="color:' + (reward >= 0 ? '#86efac' : '#fca5a5') + '">' + (reward >= 0 ? '+' : '') + reward.toFixed(1) + '</span>';
list.prepend(item);
}
function updateScoreBar() {
document.getElementById('step-num').textContent = stepCount;
document.getElementById('cum-reward').textContent = cumReward.toFixed(1);
document.getElementById('cum-reward').style.color = cumReward >= 0 ? '#86efac' : '#fca5a5';
document.getElementById('remaining').textContent = currentObs ? currentObs.flags_remaining : '-';
}
function showFinal(score, taskId) {
document.getElementById('flag-card').classList.add('hidden');
document.querySelectorAll('.action-btn').forEach(b => b.classList.add('hidden'));
const fs = document.getElementById('final-screen');
fs.classList.remove('hidden');
const color = score >= 0.8 ? '#86efac' : score >= 0.6 ? '#fde047' : '#fca5a5';
const label = score >= 0.8 ? 'Excellent!' : score >= 0.6 ? 'Good effort!' : 'Keep practicing!';
fs.innerHTML = '<div class="final-label">' + taskId.toUpperCase() + ' task complete</div>' +
'<div class="final-score" style="color:' + color + '">' + (score * 100).toFixed(1) + '%</div>' +
'<div class="final-label">' + label + '</div>' +
'<br><button class="play-again" onclick="location.reload()">Play Again</button>';
}
</script>
</body>
</html>"""
# --- Request/Response Models ---
class ResetRequest(BaseModel):
task_id: str = "easy"
class ResetResponse(BaseModel):
observation: Dict[str, Any]
reward: Optional[float] = None
done: bool = False
class StepRequest(BaseModel):
action: Dict[str, Any]
timeout_s: Optional[float] = None
class StepResponse(BaseModel):
observation: Dict[str, Any]
reward: float
done: bool
info: Dict[str, Any] = {}
class StateResponse(BaseModel):
episode_id: Optional[str] = None
step_count: int = 0
task_id: str = "easy"
current_flag_index: int = 0
total_flags: int = 0
cumulative_reward: float = 0.0
flags_processed: list = []
class GradeResponse(BaseModel):
score: float
task_id: str
class SchemaResponse(BaseModel):
action: Dict[str, Any]
observation: Dict[str, Any]
state: Dict[str, Any]
class MetadataResponse(BaseModel):
name: str
description: str
version: str
author: str
# --- Endpoints ---
@app.get("/health")
def health():
return {"status": "healthy"}
@app.get("/metadata", response_model=MetadataResponse)
def metadata():
return MetadataResponse(
name="feature-flag-cleanup",
description="OpenEnv environment where AI agents learn to triage and clean up stale feature flags in a simulated engineering organization",
version="0.1.0",
author="Falguni",
)
@app.get("/schema", response_model=SchemaResponse)
def schema():
return SchemaResponse(
action=FlagAction.model_json_schema(),
observation=FlagObservation.model_json_schema(),
state=FlagState.model_json_schema(),
)
@app.post("/reset", response_model=ResetResponse)
def reset(request: ResetRequest = ResetRequest()):
obs = _env.reset(task_id=request.task_id)
return ResetResponse(observation=obs, reward=None, done=False)
@app.post("/step", response_model=StepResponse)
def step(request: StepRequest):
try:
obs, reward, done, info = _env.step(request.action)
return StepResponse(observation=obs, reward=reward, done=done, info=info)
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
@app.get("/state", response_model=StateResponse)
def state():
s = _env.state()
return StateResponse(**s)
@app.post("/grade", response_model=GradeResponse)
def grade():
score = _env.grade()
s = _env.state()
return GradeResponse(score=score, task_id=s["task_id"])
@app.post("/mcp")
async def mcp(request: Request):
"""MCP (Model Context Protocol) JSON-RPC endpoint."""
body = await request.json()
# Return a valid JSON-RPC 2.0 response
return JSONResponse(content={
"jsonrpc": "2.0",
"id": body.get("id", 1),
"result": {
"name": "feature-flag-cleanup",
"description": "OpenEnv environment for feature flag triage and cleanup",
"tools": [
{
"name": "reset",
"description": "Reset the environment for a new episode",
"parameters": {"task_id": {"type": "string", "enum": ["easy", "medium", "hard"]}},
},
{
"name": "step",
"description": "Take an action on the current feature flag",
"parameters": {"action": {"type": "string", "enum": ["remove", "keep", "deprecate", "escalate"]}},
},
{
"name": "state",
"description": "Get current environment state",
"parameters": {},
},
],
},
})
def main():
"""Entry point for the server."""
import uvicorn
port = int(os.environ.get("PORT", 7860))
uvicorn.run("server.app:app", host="0.0.0.0", port=port, reload=False)
if __name__ == "__main__":
main()