Spaces:
Running
Running
File size: 16,182 Bytes
c745a99 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 | """Unit tests for the EpisodeTracker β command history, rollback detection, and grading helpers.
These are pure unit tests that do not require MiniStack or Docker.
Run:
python -m pytest tests/test_episode_tracker.py -v
"""
from server.services.episode_tracker import (
EpisodeTracker,
StepRecord,
_command_mentions_resource,
_extract_resource_name,
_parse_aws_command,
)
# ---------------------------------------------------------------------------
# _parse_aws_command
# ---------------------------------------------------------------------------
class TestParseAwsCommand:
def test_standard_command(self) -> None:
assert _parse_aws_command("aws s3api create-bucket --bucket foo") == (
"s3api",
"create-bucket",
)
def test_simple_service(self) -> None:
assert _parse_aws_command("aws iam list-roles") == ("iam", "list-roles")
def test_too_few_parts(self) -> None:
assert _parse_aws_command("aws s3") == (None, None)
def test_not_aws(self) -> None:
assert _parse_aws_command("gcloud compute instances list") == (None, None)
def test_empty_string(self) -> None:
assert _parse_aws_command("") == (None, None)
def test_leading_whitespace(self) -> None:
assert _parse_aws_command(" aws lambda list-functions") == (
"lambda",
"list-functions",
)
# ---------------------------------------------------------------------------
# _command_mentions_resource
# ---------------------------------------------------------------------------
class TestCommandMentionsResource:
def test_flag_match(self) -> None:
assert _command_mentions_resource(
"aws s3api create-bucket --bucket my-bucket", "my-bucket"
)
def test_flag_value_syntax(self) -> None:
assert _command_mentions_resource(
"aws dynamodb describe-table --table-name=orders", "orders"
)
def test_function_name_flag(self) -> None:
assert _command_mentions_resource(
"aws lambda invoke --function-name processor /dev/null", "processor"
)
def test_arn_word_boundary(self) -> None:
assert _command_mentions_resource(
"aws lambda create-event-source-mapping "
"--event-source-arn arn:aws:sqs:us-east-1:000000000000:my-queue",
"my-queue",
)
def test_no_match(self) -> None:
assert not _command_mentions_resource(
"aws s3api create-bucket --bucket other-bucket", "my-bucket"
)
def test_different_resource_no_match(self) -> None:
assert not _command_mentions_resource(
"aws s3api create-bucket --bucket test-bucket", "prod-bucket"
)
def test_role_name(self) -> None:
assert _command_mentions_resource(
"aws iam attach-role-policy --role-name my-role "
"--policy-arn arn:aws:iam::aws:policy/ReadOnly",
"my-role",
)
# ---------------------------------------------------------------------------
# _extract_resource_name
# ---------------------------------------------------------------------------
class TestExtractResourceName:
def test_bucket(self) -> None:
assert _extract_resource_name("aws s3api create-bucket --bucket demo") == "demo"
def test_table_name_equals(self) -> None:
assert (
_extract_resource_name("aws dynamodb describe-table --table-name=users")
== "users"
)
def test_no_resource_flag(self) -> None:
assert _extract_resource_name("aws sts get-caller-identity") is None
def test_first_flag_wins(self) -> None:
cmd = "aws s3api put-object --bucket first --name second"
assert _extract_resource_name(cmd) == "first"
# ---------------------------------------------------------------------------
# EpisodeTracker β record_step & basic properties
# ---------------------------------------------------------------------------
class TestRecordStep:
def test_returns_step_record(self) -> None:
t = EpisodeTracker()
step = t.record_step("aws s3 ls", True, "buckets...", "")
assert isinstance(step, StepRecord)
assert step.command == "aws s3 ls"
assert step.success is True
assert step.step_number == 0
def test_increments_step_counter(self) -> None:
t = EpisodeTracker()
t.record_step("aws s3 ls", True, "", "")
t.record_step("aws ec2 describe-instances", True, "", "")
assert t.step_count == 2
def test_command_history(self) -> None:
t = EpisodeTracker()
t.record_step("cmd1", True, "", "")
t.record_step("cmd2", False, "", "err")
assert len(t.command_history) == 2
assert t.command_history[0].command == "cmd1"
assert t.command_history[1].success is False
def test_history_is_copy(self) -> None:
t = EpisodeTracker()
t.record_step("cmd", True, "", "")
history = t.command_history
history.clear()
assert t.step_count == 1 # internal state not affected
# ---------------------------------------------------------------------------
# EpisodeTracker β reset
# ---------------------------------------------------------------------------
class TestReset:
def test_clears_all_state(self) -> None:
t = EpisodeTracker()
t.record_step("aws s3 ls", True, "", "")
t.credit_operation("ls", None)
t.record_hint()
t.previous_progress = 0.5
t.reset()
assert t.step_count == 0
assert t.command_history == []
assert t.hints_used == 0
assert t.previous_progress == 0.0
assert not t.is_operation_already_credited("ls", None)
# ---------------------------------------------------------------------------
# EpisodeTracker β has_executed_operation
# ---------------------------------------------------------------------------
class TestHasExecutedOperation:
def test_matches_successful_command(self) -> None:
t = EpisodeTracker()
t.record_step("aws s3api create-bucket --bucket demo", True, "", "")
assert t.has_executed_operation("create-bucket")
def test_ignores_failed_command(self) -> None:
t = EpisodeTracker()
t.record_step("aws s3api create-bucket --bucket demo", False, "", "err")
assert not t.has_executed_operation("create-bucket")
def test_matches_with_resource(self) -> None:
t = EpisodeTracker()
t.record_step("aws s3api create-bucket --bucket demo", True, "", "")
assert t.has_executed_operation("create-bucket", "demo")
def test_wrong_resource(self) -> None:
t = EpisodeTracker()
t.record_step("aws s3api create-bucket --bucket demo", True, "", "")
assert not t.has_executed_operation("create-bucket", "other")
def test_wrong_operation(self) -> None:
t = EpisodeTracker()
t.record_step("aws s3api create-bucket --bucket demo", True, "", "")
assert not t.has_executed_operation("delete-bucket")
def test_resource_none_matches_any(self) -> None:
t = EpisodeTracker()
t.record_step("aws dynamodb create-table --table-name orders", True, "", "")
assert t.has_executed_operation("create-table")
assert t.has_executed_operation("create-table", "orders")
def test_empty_history(self) -> None:
assert not EpisodeTracker().has_executed_operation("anything")
# ---------------------------------------------------------------------------
# EpisodeTracker β has_used_service
# ---------------------------------------------------------------------------
class TestHasUsedService:
def test_exact_service(self) -> None:
t = EpisodeTracker()
t.record_step("aws sqs create-queue --queue-name q1", True, "", "")
assert t.has_used_service("sqs")
def test_substring_match(self) -> None:
t = EpisodeTracker()
t.record_step("aws s3api create-bucket --bucket b", True, "", "")
assert t.has_used_service("s3") # "s3" in "s3api"
def test_ignores_failed(self) -> None:
t = EpisodeTracker()
t.record_step("aws iam list-roles", False, "", "err")
assert not t.has_used_service("iam")
def test_no_match(self) -> None:
t = EpisodeTracker()
t.record_step("aws s3 ls", True, "", "")
assert not t.has_used_service("lambda")
def test_non_aws_command(self) -> None:
t = EpisodeTracker()
t.record_step("echo hello", True, "hello", "")
assert not t.has_used_service("echo")
# ---------------------------------------------------------------------------
# EpisodeTracker β credit_operation / is_operation_already_credited
# ---------------------------------------------------------------------------
class TestCreditedOperations:
def test_not_credited_by_default(self) -> None:
t = EpisodeTracker()
assert not t.is_operation_already_credited("create-bucket", "demo")
def test_credit_and_check(self) -> None:
t = EpisodeTracker()
t.credit_operation("create-bucket", "demo")
assert t.is_operation_already_credited("create-bucket", "demo")
def test_different_resource_not_credited(self) -> None:
t = EpisodeTracker()
t.credit_operation("create-bucket", "demo")
assert not t.is_operation_already_credited("create-bucket", "other")
def test_none_resource(self) -> None:
t = EpisodeTracker()
t.credit_operation("list-buckets", None)
assert t.is_operation_already_credited("list-buckets", None)
assert not t.is_operation_already_credited("list-buckets", "demo")
# ---------------------------------------------------------------------------
# EpisodeTracker β hints
# ---------------------------------------------------------------------------
class TestHints:
def test_initial_zero(self) -> None:
assert EpisodeTracker().hints_used == 0
def test_record_hint_increments(self) -> None:
t = EpisodeTracker()
assert t.record_hint() == 1
assert t.record_hint() == 2
assert t.hints_used == 2
def test_reset_clears_hints(self) -> None:
t = EpisodeTracker()
t.record_hint()
t.reset()
assert t.hints_used == 0
# ---------------------------------------------------------------------------
# EpisodeTracker β previous_progress
# ---------------------------------------------------------------------------
class TestPreviousProgress:
def test_default_zero(self) -> None:
assert EpisodeTracker().previous_progress == 0.0
def test_setter(self) -> None:
t = EpisodeTracker()
t.previous_progress = 0.75
assert t.previous_progress == 0.75
# ---------------------------------------------------------------------------
# EpisodeTracker β detect_rollbacks
# ---------------------------------------------------------------------------
class TestDetectRollbacks:
def test_no_rollbacks(self) -> None:
t = EpisodeTracker()
t.record_step("aws s3api create-bucket --bucket demo", True, "", "")
assert t.detect_rollbacks() == 0
def test_create_then_delete(self) -> None:
t = EpisodeTracker()
t.record_step("aws s3api create-bucket --bucket demo", True, "", "")
t.record_step("aws s3api delete-bucket --bucket demo", True, "", "")
assert t.detect_rollbacks() == 1
def test_failed_delete_not_counted(self) -> None:
t = EpisodeTracker()
t.record_step("aws s3api create-bucket --bucket demo", True, "", "")
t.record_step("aws s3api delete-bucket --bucket demo", False, "", "err")
assert t.detect_rollbacks() == 0
def test_different_resource_not_counted(self) -> None:
t = EpisodeTracker()
t.record_step("aws s3api create-bucket --bucket a", True, "", "")
t.record_step("aws s3api delete-bucket --bucket b", True, "", "")
assert t.detect_rollbacks() == 0
def test_multiple_rollbacks(self) -> None:
t = EpisodeTracker()
t.record_step("aws s3api create-bucket --bucket a", True, "", "")
t.record_step("aws s3api delete-bucket --bucket a", True, "", "")
t.record_step("aws dynamodb create-table --table-name t1", True, "", "")
t.record_step("aws dynamodb delete-table --table-name t1", True, "", "")
assert t.detect_rollbacks() == 2
def test_attach_detach_role_policy(self) -> None:
t = EpisodeTracker()
t.record_step(
"aws iam attach-role-policy --role-name r1 "
"--policy-arn arn:aws:iam::aws:policy/ReadOnly",
True,
"",
"",
)
t.record_step(
"aws iam detach-role-policy --role-name r1 "
"--policy-arn arn:aws:iam::aws:policy/ReadOnly",
True,
"",
"",
)
assert t.detect_rollbacks() == 1
def test_failed_create_not_tracked(self) -> None:
t = EpisodeTracker()
t.record_step("aws s3api create-bucket --bucket demo", False, "", "err")
t.record_step("aws s3api delete-bucket --bucket demo", True, "", "")
assert t.detect_rollbacks() == 0
# ---------------------------------------------------------------------------
# EpisodeTracker β detect_idempotent_retries
# ---------------------------------------------------------------------------
class TestDetectIdempotentRetries:
def test_no_retries(self) -> None:
t = EpisodeTracker()
t.record_step("aws s3api create-bucket --bucket demo", True, "", "")
assert t.detect_idempotent_retries() == 0
def test_already_exists_then_success(self) -> None:
t = EpisodeTracker()
t.record_step(
"aws s3api create-bucket --bucket demo",
False,
"",
"BucketAlreadyOwnedByYou",
)
t.record_step("aws s3api put-object --bucket demo --key f", True, "", "")
assert t.detect_idempotent_retries() == 1
def test_already_exists_no_followup(self) -> None:
t = EpisodeTracker()
t.record_step(
"aws s3api create-bucket --bucket demo",
False,
"",
"BucketAlreadyExists",
)
# No next step
assert t.detect_idempotent_retries() == 0
def test_already_exists_followed_by_failure(self) -> None:
t = EpisodeTracker()
t.record_step(
"aws sqs create-queue --queue-name q",
False,
"",
"QueueNameExists",
)
t.record_step("aws sqs send-message --queue-url q", False, "", "err")
assert t.detect_idempotent_retries() == 0
def test_generic_already_exists(self) -> None:
t = EpisodeTracker()
t.record_step(
"aws lambda create-function --function-name fn",
False,
"",
"Resource already exists",
)
t.record_step("aws lambda invoke --function-name fn", True, "", "")
assert t.detect_idempotent_retries() == 1
def test_non_create_failure_ignored(self) -> None:
t = EpisodeTracker()
t.record_step(
"aws s3api delete-bucket --bucket demo",
False,
"",
"BucketAlreadyExists", # nonsensical but tests the guard
)
t.record_step("aws s3 ls", True, "", "")
assert t.detect_idempotent_retries() == 0
def test_multiple_retries(self) -> None:
t = EpisodeTracker()
t.record_step(
"aws s3api create-bucket --bucket a",
False,
"",
"BucketAlreadyExists",
)
t.record_step("aws s3api put-object --bucket a --key f", True, "", "")
t.record_step(
"aws sqs create-queue --queue-name q",
False,
"",
"QueueNameExists",
)
t.record_step("aws sqs send-message --queue-url q", True, "", "")
assert t.detect_idempotent_retries() == 2
|