File size: 16,182 Bytes
c745a99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
"""Unit tests for the EpisodeTracker β€” command history, rollback detection, and grading helpers.

These are pure unit tests that do not require MiniStack or Docker.

Run:
    python -m pytest tests/test_episode_tracker.py -v
"""

from server.services.episode_tracker import (
    EpisodeTracker,
    StepRecord,
    _command_mentions_resource,
    _extract_resource_name,
    _parse_aws_command,
)


# ---------------------------------------------------------------------------
# _parse_aws_command
# ---------------------------------------------------------------------------


class TestParseAwsCommand:
    def test_standard_command(self) -> None:
        assert _parse_aws_command("aws s3api create-bucket --bucket foo") == (
            "s3api",
            "create-bucket",
        )

    def test_simple_service(self) -> None:
        assert _parse_aws_command("aws iam list-roles") == ("iam", "list-roles")

    def test_too_few_parts(self) -> None:
        assert _parse_aws_command("aws s3") == (None, None)

    def test_not_aws(self) -> None:
        assert _parse_aws_command("gcloud compute instances list") == (None, None)

    def test_empty_string(self) -> None:
        assert _parse_aws_command("") == (None, None)

    def test_leading_whitespace(self) -> None:
        assert _parse_aws_command("  aws lambda list-functions") == (
            "lambda",
            "list-functions",
        )


# ---------------------------------------------------------------------------
# _command_mentions_resource
# ---------------------------------------------------------------------------


class TestCommandMentionsResource:
    def test_flag_match(self) -> None:
        assert _command_mentions_resource(
            "aws s3api create-bucket --bucket my-bucket", "my-bucket"
        )

    def test_flag_value_syntax(self) -> None:
        assert _command_mentions_resource(
            "aws dynamodb describe-table --table-name=orders", "orders"
        )

    def test_function_name_flag(self) -> None:
        assert _command_mentions_resource(
            "aws lambda invoke --function-name processor /dev/null", "processor"
        )

    def test_arn_word_boundary(self) -> None:
        assert _command_mentions_resource(
            "aws lambda create-event-source-mapping "
            "--event-source-arn arn:aws:sqs:us-east-1:000000000000:my-queue",
            "my-queue",
        )

    def test_no_match(self) -> None:
        assert not _command_mentions_resource(
            "aws s3api create-bucket --bucket other-bucket", "my-bucket"
        )

    def test_different_resource_no_match(self) -> None:
        assert not _command_mentions_resource(
            "aws s3api create-bucket --bucket test-bucket", "prod-bucket"
        )

    def test_role_name(self) -> None:
        assert _command_mentions_resource(
            "aws iam attach-role-policy --role-name my-role "
            "--policy-arn arn:aws:iam::aws:policy/ReadOnly",
            "my-role",
        )


# ---------------------------------------------------------------------------
# _extract_resource_name
# ---------------------------------------------------------------------------


class TestExtractResourceName:
    def test_bucket(self) -> None:
        assert _extract_resource_name("aws s3api create-bucket --bucket demo") == "demo"

    def test_table_name_equals(self) -> None:
        assert (
            _extract_resource_name("aws dynamodb describe-table --table-name=users")
            == "users"
        )

    def test_no_resource_flag(self) -> None:
        assert _extract_resource_name("aws sts get-caller-identity") is None

    def test_first_flag_wins(self) -> None:
        cmd = "aws s3api put-object --bucket first --name second"
        assert _extract_resource_name(cmd) == "first"


# ---------------------------------------------------------------------------
# EpisodeTracker β€” record_step & basic properties
# ---------------------------------------------------------------------------


class TestRecordStep:
    def test_returns_step_record(self) -> None:
        t = EpisodeTracker()
        step = t.record_step("aws s3 ls", True, "buckets...", "")
        assert isinstance(step, StepRecord)
        assert step.command == "aws s3 ls"
        assert step.success is True
        assert step.step_number == 0

    def test_increments_step_counter(self) -> None:
        t = EpisodeTracker()
        t.record_step("aws s3 ls", True, "", "")
        t.record_step("aws ec2 describe-instances", True, "", "")
        assert t.step_count == 2

    def test_command_history(self) -> None:
        t = EpisodeTracker()
        t.record_step("cmd1", True, "", "")
        t.record_step("cmd2", False, "", "err")
        assert len(t.command_history) == 2
        assert t.command_history[0].command == "cmd1"
        assert t.command_history[1].success is False

    def test_history_is_copy(self) -> None:
        t = EpisodeTracker()
        t.record_step("cmd", True, "", "")
        history = t.command_history
        history.clear()
        assert t.step_count == 1  # internal state not affected


# ---------------------------------------------------------------------------
# EpisodeTracker β€” reset
# ---------------------------------------------------------------------------


class TestReset:
    def test_clears_all_state(self) -> None:
        t = EpisodeTracker()
        t.record_step("aws s3 ls", True, "", "")
        t.credit_operation("ls", None)
        t.record_hint()
        t.previous_progress = 0.5

        t.reset()

        assert t.step_count == 0
        assert t.command_history == []
        assert t.hints_used == 0
        assert t.previous_progress == 0.0
        assert not t.is_operation_already_credited("ls", None)


# ---------------------------------------------------------------------------
# EpisodeTracker β€” has_executed_operation
# ---------------------------------------------------------------------------


class TestHasExecutedOperation:
    def test_matches_successful_command(self) -> None:
        t = EpisodeTracker()
        t.record_step("aws s3api create-bucket --bucket demo", True, "", "")
        assert t.has_executed_operation("create-bucket")

    def test_ignores_failed_command(self) -> None:
        t = EpisodeTracker()
        t.record_step("aws s3api create-bucket --bucket demo", False, "", "err")
        assert not t.has_executed_operation("create-bucket")

    def test_matches_with_resource(self) -> None:
        t = EpisodeTracker()
        t.record_step("aws s3api create-bucket --bucket demo", True, "", "")
        assert t.has_executed_operation("create-bucket", "demo")

    def test_wrong_resource(self) -> None:
        t = EpisodeTracker()
        t.record_step("aws s3api create-bucket --bucket demo", True, "", "")
        assert not t.has_executed_operation("create-bucket", "other")

    def test_wrong_operation(self) -> None:
        t = EpisodeTracker()
        t.record_step("aws s3api create-bucket --bucket demo", True, "", "")
        assert not t.has_executed_operation("delete-bucket")

    def test_resource_none_matches_any(self) -> None:
        t = EpisodeTracker()
        t.record_step("aws dynamodb create-table --table-name orders", True, "", "")
        assert t.has_executed_operation("create-table")
        assert t.has_executed_operation("create-table", "orders")

    def test_empty_history(self) -> None:
        assert not EpisodeTracker().has_executed_operation("anything")


# ---------------------------------------------------------------------------
# EpisodeTracker β€” has_used_service
# ---------------------------------------------------------------------------


class TestHasUsedService:
    def test_exact_service(self) -> None:
        t = EpisodeTracker()
        t.record_step("aws sqs create-queue --queue-name q1", True, "", "")
        assert t.has_used_service("sqs")

    def test_substring_match(self) -> None:
        t = EpisodeTracker()
        t.record_step("aws s3api create-bucket --bucket b", True, "", "")
        assert t.has_used_service("s3")  # "s3" in "s3api"

    def test_ignores_failed(self) -> None:
        t = EpisodeTracker()
        t.record_step("aws iam list-roles", False, "", "err")
        assert not t.has_used_service("iam")

    def test_no_match(self) -> None:
        t = EpisodeTracker()
        t.record_step("aws s3 ls", True, "", "")
        assert not t.has_used_service("lambda")

    def test_non_aws_command(self) -> None:
        t = EpisodeTracker()
        t.record_step("echo hello", True, "hello", "")
        assert not t.has_used_service("echo")


# ---------------------------------------------------------------------------
# EpisodeTracker β€” credit_operation / is_operation_already_credited
# ---------------------------------------------------------------------------


class TestCreditedOperations:
    def test_not_credited_by_default(self) -> None:
        t = EpisodeTracker()
        assert not t.is_operation_already_credited("create-bucket", "demo")

    def test_credit_and_check(self) -> None:
        t = EpisodeTracker()
        t.credit_operation("create-bucket", "demo")
        assert t.is_operation_already_credited("create-bucket", "demo")

    def test_different_resource_not_credited(self) -> None:
        t = EpisodeTracker()
        t.credit_operation("create-bucket", "demo")
        assert not t.is_operation_already_credited("create-bucket", "other")

    def test_none_resource(self) -> None:
        t = EpisodeTracker()
        t.credit_operation("list-buckets", None)
        assert t.is_operation_already_credited("list-buckets", None)
        assert not t.is_operation_already_credited("list-buckets", "demo")


# ---------------------------------------------------------------------------
# EpisodeTracker β€” hints
# ---------------------------------------------------------------------------


class TestHints:
    def test_initial_zero(self) -> None:
        assert EpisodeTracker().hints_used == 0

    def test_record_hint_increments(self) -> None:
        t = EpisodeTracker()
        assert t.record_hint() == 1
        assert t.record_hint() == 2
        assert t.hints_used == 2

    def test_reset_clears_hints(self) -> None:
        t = EpisodeTracker()
        t.record_hint()
        t.reset()
        assert t.hints_used == 0


# ---------------------------------------------------------------------------
# EpisodeTracker β€” previous_progress
# ---------------------------------------------------------------------------


class TestPreviousProgress:
    def test_default_zero(self) -> None:
        assert EpisodeTracker().previous_progress == 0.0

    def test_setter(self) -> None:
        t = EpisodeTracker()
        t.previous_progress = 0.75
        assert t.previous_progress == 0.75


# ---------------------------------------------------------------------------
# EpisodeTracker β€” detect_rollbacks
# ---------------------------------------------------------------------------


class TestDetectRollbacks:
    def test_no_rollbacks(self) -> None:
        t = EpisodeTracker()
        t.record_step("aws s3api create-bucket --bucket demo", True, "", "")
        assert t.detect_rollbacks() == 0

    def test_create_then_delete(self) -> None:
        t = EpisodeTracker()
        t.record_step("aws s3api create-bucket --bucket demo", True, "", "")
        t.record_step("aws s3api delete-bucket --bucket demo", True, "", "")
        assert t.detect_rollbacks() == 1

    def test_failed_delete_not_counted(self) -> None:
        t = EpisodeTracker()
        t.record_step("aws s3api create-bucket --bucket demo", True, "", "")
        t.record_step("aws s3api delete-bucket --bucket demo", False, "", "err")
        assert t.detect_rollbacks() == 0

    def test_different_resource_not_counted(self) -> None:
        t = EpisodeTracker()
        t.record_step("aws s3api create-bucket --bucket a", True, "", "")
        t.record_step("aws s3api delete-bucket --bucket b", True, "", "")
        assert t.detect_rollbacks() == 0

    def test_multiple_rollbacks(self) -> None:
        t = EpisodeTracker()
        t.record_step("aws s3api create-bucket --bucket a", True, "", "")
        t.record_step("aws s3api delete-bucket --bucket a", True, "", "")
        t.record_step("aws dynamodb create-table --table-name t1", True, "", "")
        t.record_step("aws dynamodb delete-table --table-name t1", True, "", "")
        assert t.detect_rollbacks() == 2

    def test_attach_detach_role_policy(self) -> None:
        t = EpisodeTracker()
        t.record_step(
            "aws iam attach-role-policy --role-name r1 "
            "--policy-arn arn:aws:iam::aws:policy/ReadOnly",
            True,
            "",
            "",
        )
        t.record_step(
            "aws iam detach-role-policy --role-name r1 "
            "--policy-arn arn:aws:iam::aws:policy/ReadOnly",
            True,
            "",
            "",
        )
        assert t.detect_rollbacks() == 1

    def test_failed_create_not_tracked(self) -> None:
        t = EpisodeTracker()
        t.record_step("aws s3api create-bucket --bucket demo", False, "", "err")
        t.record_step("aws s3api delete-bucket --bucket demo", True, "", "")
        assert t.detect_rollbacks() == 0


# ---------------------------------------------------------------------------
# EpisodeTracker β€” detect_idempotent_retries
# ---------------------------------------------------------------------------


class TestDetectIdempotentRetries:
    def test_no_retries(self) -> None:
        t = EpisodeTracker()
        t.record_step("aws s3api create-bucket --bucket demo", True, "", "")
        assert t.detect_idempotent_retries() == 0

    def test_already_exists_then_success(self) -> None:
        t = EpisodeTracker()
        t.record_step(
            "aws s3api create-bucket --bucket demo",
            False,
            "",
            "BucketAlreadyOwnedByYou",
        )
        t.record_step("aws s3api put-object --bucket demo --key f", True, "", "")
        assert t.detect_idempotent_retries() == 1

    def test_already_exists_no_followup(self) -> None:
        t = EpisodeTracker()
        t.record_step(
            "aws s3api create-bucket --bucket demo",
            False,
            "",
            "BucketAlreadyExists",
        )
        # No next step
        assert t.detect_idempotent_retries() == 0

    def test_already_exists_followed_by_failure(self) -> None:
        t = EpisodeTracker()
        t.record_step(
            "aws sqs create-queue --queue-name q",
            False,
            "",
            "QueueNameExists",
        )
        t.record_step("aws sqs send-message --queue-url q", False, "", "err")
        assert t.detect_idempotent_retries() == 0

    def test_generic_already_exists(self) -> None:
        t = EpisodeTracker()
        t.record_step(
            "aws lambda create-function --function-name fn",
            False,
            "",
            "Resource already exists",
        )
        t.record_step("aws lambda invoke --function-name fn", True, "", "")
        assert t.detect_idempotent_retries() == 1

    def test_non_create_failure_ignored(self) -> None:
        t = EpisodeTracker()
        t.record_step(
            "aws s3api delete-bucket --bucket demo",
            False,
            "",
            "BucketAlreadyExists",  # nonsensical but tests the guard
        )
        t.record_step("aws s3 ls", True, "", "")
        assert t.detect_idempotent_retries() == 0

    def test_multiple_retries(self) -> None:
        t = EpisodeTracker()
        t.record_step(
            "aws s3api create-bucket --bucket a",
            False,
            "",
            "BucketAlreadyExists",
        )
        t.record_step("aws s3api put-object --bucket a --key f", True, "", "")
        t.record_step(
            "aws sqs create-queue --queue-name q",
            False,
            "",
            "QueueNameExists",
        )
        t.record_step("aws sqs send-message --queue-url q", True, "", "")
        assert t.detect_idempotent_retries() == 2