ssaraf1 commited on
Commit
c52546e
·
verified ·
1 Parent(s): ceb3690

Upload eval_results_v3_verified.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. eval_results_v3_verified.json +686 -0
eval_results_v3_verified.json ADDED
@@ -0,0 +1,686 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "id": 1,
4
+ "name": "WS-N1: Reminder sent \u2192 move to booking",
5
+ "category": "NEXT",
6
+ "expected": "NEXT",
7
+ "prediction": "FORK",
8
+ "correct": false,
9
+ "time": 2.75
10
+ },
11
+ {
12
+ "id": 2,
13
+ "name": "Mid workflow \u2014 after inspection",
14
+ "category": "NEXT",
15
+ "expected": "NEXT",
16
+ "prediction": "NEXT",
17
+ "correct": true,
18
+ "time": 0.75
19
+ },
20
+ {
21
+ "id": 3,
22
+ "name": "Near completion \u2014 final delivery step",
23
+ "category": "NEXT",
24
+ "expected": "NEXT",
25
+ "prediction": "NEXT",
26
+ "correct": true,
27
+ "time": 0.7
28
+ },
29
+ {
30
+ "id": 4,
31
+ "name": "Booking failed \u2014 customer didn't answer",
32
+ "category": "RETRY",
33
+ "expected": "RETRY",
34
+ "prediction": "RETRY",
35
+ "correct": true,
36
+ "time": 0.88
37
+ },
38
+ {
39
+ "id": 5,
40
+ "name": "Communication delivery failed",
41
+ "category": "RETRY",
42
+ "expected": "RETRY",
43
+ "prediction": "RETRY",
44
+ "correct": true,
45
+ "time": 0.87
46
+ },
47
+ {
48
+ "id": 6,
49
+ "name": "Parallel opportunity \u2014 two independent tasks",
50
+ "category": "FORK",
51
+ "expected": "FORK",
52
+ "prediction": "FORK",
53
+ "correct": true,
54
+ "time": 0.88
55
+ },
56
+ {
57
+ "id": 7,
58
+ "name": "Merge parallel branches",
59
+ "category": "JOIN",
60
+ "expected": "JOIN",
61
+ "prediction": "JOIN",
62
+ "correct": true,
63
+ "time": 0.8
64
+ },
65
+ {
66
+ "id": 8,
67
+ "name": "Join after both parallel tasks complete",
68
+ "category": "JOIN",
69
+ "expected": "JOIN",
70
+ "prediction": "JOIN",
71
+ "correct": true,
72
+ "time": 0.8
73
+ },
74
+ {
75
+ "id": 9,
76
+ "name": "Intent mismatch \u2014 customer asks about insurance",
77
+ "category": "META",
78
+ "expected": "META",
79
+ "prediction": "NEXT",
80
+ "correct": false,
81
+ "time": 0.81
82
+ },
83
+ {
84
+ "id": 10,
85
+ "name": "Missing capability \u2014 no actor can handle this",
86
+ "category": "META",
87
+ "expected": "META",
88
+ "prediction": "NEXT",
89
+ "correct": false,
90
+ "time": 0.74
91
+ },
92
+ {
93
+ "id": 11,
94
+ "name": "Last step to success \u2014 follow-up or finish",
95
+ "category": "NEXT",
96
+ "expected": "NEXT",
97
+ "prediction": "NEXT",
98
+ "correct": true,
99
+ "time": 0.81
100
+ },
101
+ {
102
+ "id": 12,
103
+ "name": "Post-fork branch progression \u2014 single branch advances",
104
+ "category": "NEXT",
105
+ "expected": "NEXT",
106
+ "prediction": "NEXT",
107
+ "correct": true,
108
+ "time": 0.7
109
+ },
110
+ {
111
+ "id": 13,
112
+ "name": "High SLA pressure \u2014 must advance quickly",
113
+ "category": "NEXT",
114
+ "expected": "NEXT",
115
+ "prediction": "NEXT",
116
+ "correct": true,
117
+ "time": 0.7
118
+ },
119
+ {
120
+ "id": 14,
121
+ "name": "Choose mandatory path over optional",
122
+ "category": "NEXT",
123
+ "expected": "NEXT",
124
+ "prediction": "NEXT",
125
+ "correct": true,
126
+ "time": 0.81
127
+ },
128
+ {
129
+ "id": 15,
130
+ "name": "Linear progression after retry recovery",
131
+ "category": "NEXT",
132
+ "expected": "NEXT",
133
+ "prediction": "NEXT",
134
+ "correct": true,
135
+ "time": 0.7
136
+ },
137
+ {
138
+ "id": 16,
139
+ "name": "Early workflow \u2014 low goal progress, clear path",
140
+ "category": "NEXT",
141
+ "expected": "NEXT",
142
+ "prediction": "NEXT",
143
+ "correct": true,
144
+ "time": 0.71
145
+ },
146
+ {
147
+ "id": 17,
148
+ "name": "Cost-constrained single path \u2014 budget nearly exhausted",
149
+ "category": "NEXT",
150
+ "expected": "NEXT",
151
+ "prediction": "NEXT",
152
+ "correct": true,
153
+ "time": 0.7
154
+ },
155
+ {
156
+ "id": 18,
157
+ "name": "Retry nearing exhaustion \u2014 3rd attempt",
158
+ "category": "RETRY",
159
+ "expected": "RETRY",
160
+ "prediction": "RETRY",
161
+ "correct": true,
162
+ "time": 0.89
163
+ },
164
+ {
165
+ "id": 19,
166
+ "name": "API call timeout \u2014 transient error retry",
167
+ "category": "RETRY",
168
+ "expected": "RETRY",
169
+ "prediction": "RETRY",
170
+ "correct": true,
171
+ "time": 0.88
172
+ },
173
+ {
174
+ "id": 20,
175
+ "name": "Partial data received \u2014 retry for completeness",
176
+ "category": "RETRY",
177
+ "expected": "RETRY",
178
+ "prediction": "RETRY",
179
+ "correct": true,
180
+ "time": 0.89
181
+ },
182
+ {
183
+ "id": 21,
184
+ "name": "Communication retry \u2014 first failure in notification",
185
+ "category": "RETRY",
186
+ "expected": "RETRY",
187
+ "prediction": "RETRY",
188
+ "correct": true,
189
+ "time": 0.89
190
+ },
191
+ {
192
+ "id": 22,
193
+ "name": "Semantic parallel \u2014 independent actors (AGENT + SYSTEM)",
194
+ "category": "FORK",
195
+ "expected": "FORK",
196
+ "prediction": "FORK",
197
+ "correct": true,
198
+ "time": 0.9
199
+ },
200
+ {
201
+ "id": 23,
202
+ "name": "Semantic parallel \u2014 independent resources (DEVICE + API)",
203
+ "category": "FORK",
204
+ "expected": "FORK",
205
+ "prediction": "FORK",
206
+ "correct": true,
207
+ "time": 0.91
208
+ },
209
+ {
210
+ "id": 24,
211
+ "name": "Semantic parallel \u2014 context-independent tasks",
212
+ "category": "FORK",
213
+ "expected": "FORK",
214
+ "prediction": "FORK",
215
+ "correct": true,
216
+ "time": 0.89
217
+ },
218
+ {
219
+ "id": 25,
220
+ "name": "Semantic parallel \u2014 three independent branches",
221
+ "category": "FORK",
222
+ "expected": "FORK",
223
+ "prediction": "FORK",
224
+ "correct": true,
225
+ "time": 0.9
226
+ },
227
+ {
228
+ "id": 26,
229
+ "name": "Fork opportunity under low resource pressure",
230
+ "category": "FORK",
231
+ "expected": "FORK",
232
+ "prediction": "FORK",
233
+ "correct": true,
234
+ "time": 0.89
235
+ },
236
+ {
237
+ "id": 27,
238
+ "name": "Nested fork \u2014 already in parallel, fork deeper",
239
+ "category": "FORK",
240
+ "expected": "FORK",
241
+ "prediction": "FORK",
242
+ "correct": true,
243
+ "time": 0.91
244
+ },
245
+ {
246
+ "id": 28,
247
+ "name": "Fork with different actor types \u2014 HUMAN + SYSTEM",
248
+ "category": "FORK",
249
+ "expected": "FORK",
250
+ "prediction": "FORK",
251
+ "correct": true,
252
+ "time": 0.93
253
+ },
254
+ {
255
+ "id": 29,
256
+ "name": "Semantic join \u2014 downstream needs both outputs",
257
+ "category": "JOIN",
258
+ "expected": "JOIN",
259
+ "prediction": "JOIN",
260
+ "correct": true,
261
+ "time": 0.82
262
+ },
263
+ {
264
+ "id": 30,
265
+ "name": "Semantic join \u2014 multi-input dependency merge",
266
+ "category": "JOIN",
267
+ "expected": "JOIN",
268
+ "prediction": "JOIN",
269
+ "correct": true,
270
+ "time": 0.82
271
+ },
272
+ {
273
+ "id": 31,
274
+ "name": "Deep parallel join \u2014 parallel_depth=2",
275
+ "category": "JOIN",
276
+ "expected": "JOIN",
277
+ "prediction": "JOIN",
278
+ "correct": true,
279
+ "time": 0.81
280
+ },
281
+ {
282
+ "id": 32,
283
+ "name": "Join after retry in one branch",
284
+ "category": "JOIN",
285
+ "expected": "JOIN",
286
+ "prediction": "JOIN",
287
+ "correct": true,
288
+ "time": 0.81
289
+ },
290
+ {
291
+ "id": 33,
292
+ "name": "Join with high resource pressure \u2014 eager to merge",
293
+ "category": "JOIN",
294
+ "expected": "JOIN",
295
+ "prediction": "JOIN",
296
+ "correct": true,
297
+ "time": 0.81
298
+ },
299
+ {
300
+ "id": 34,
301
+ "name": "Join \u2014 both branches complete, clear dependency",
302
+ "category": "JOIN",
303
+ "expected": "JOIN",
304
+ "prediction": "JOIN",
305
+ "correct": true,
306
+ "time": 0.81
307
+ },
308
+ {
309
+ "id": 35,
310
+ "name": "Anomaly \u2014 policy violation detected",
311
+ "category": "META",
312
+ "expected": "META",
313
+ "prediction": "VIOLATION",
314
+ "correct": false,
315
+ "time": 0.99
316
+ },
317
+ {
318
+ "id": 36,
319
+ "name": "Anomaly \u2014 external dependency unavailable",
320
+ "category": "META",
321
+ "expected": "META",
322
+ "prediction": "RETRY",
323
+ "correct": false,
324
+ "time": 0.91
325
+ },
326
+ {
327
+ "id": 37,
328
+ "name": "Anomaly \u2014 unsupported request type",
329
+ "category": "META",
330
+ "expected": "META",
331
+ "prediction": "NEXT",
332
+ "correct": false,
333
+ "time": 0.83
334
+ },
335
+ {
336
+ "id": 38,
337
+ "name": "Anomaly \u2014 data integrity failure",
338
+ "category": "META",
339
+ "expected": "META",
340
+ "prediction": "NEXT",
341
+ "correct": false,
342
+ "time": 0.83
343
+ },
344
+ {
345
+ "id": 39,
346
+ "name": "Anomaly \u2014 security concern mid-workflow",
347
+ "category": "META",
348
+ "expected": "META",
349
+ "prediction": "NEXT",
350
+ "correct": false,
351
+ "time": 0.86
352
+ },
353
+ {
354
+ "id": 40,
355
+ "name": "Anomaly \u2014 contradictory outcomes from parallel branches",
356
+ "category": "META",
357
+ "expected": "META",
358
+ "prediction": "RETRY",
359
+ "correct": false,
360
+ "time": 0.94
361
+ },
362
+ {
363
+ "id": 41,
364
+ "name": "PB-N1: Fork opportunity blocked by high resource pressure",
365
+ "category": "NEXT",
366
+ "expected": "NEXT",
367
+ "prediction": "FORK",
368
+ "correct": false,
369
+ "time": 0.97
370
+ },
371
+ {
372
+ "id": 42,
373
+ "name": "PB-N2: Fork opportunity blocked by high uncertainty",
374
+ "category": "NEXT",
375
+ "expected": "NEXT",
376
+ "prediction": "FORK",
377
+ "correct": false,
378
+ "time": 0.89
379
+ },
380
+ {
381
+ "id": 43,
382
+ "name": "PB-N3: Fork opportunity blocked \u2014 already in parallel",
383
+ "category": "NEXT",
384
+ "expected": "NEXT",
385
+ "prediction": "FORK",
386
+ "correct": false,
387
+ "time": 0.89
388
+ },
389
+ {
390
+ "id": 44,
391
+ "name": "PB-N4: Fork opportunity blocked \u2014 high cost budget consumed",
392
+ "category": "NEXT",
393
+ "expected": "NEXT",
394
+ "prediction": "FORK",
395
+ "correct": false,
396
+ "time": 0.92
397
+ },
398
+ {
399
+ "id": 45,
400
+ "name": "PB-N5: Join topology present but no active parallel \u2014 just advance",
401
+ "category": "NEXT",
402
+ "expected": "NEXT",
403
+ "prediction": "JOIN",
404
+ "correct": false,
405
+ "time": 0.76
406
+ },
407
+ {
408
+ "id": 46,
409
+ "name": "PB-N6: Retry exhausted \u2014 failure but max retries hit \u2192 advance",
410
+ "category": "NEXT",
411
+ "expected": "NEXT",
412
+ "prediction": "RETRY",
413
+ "correct": false,
414
+ "time": 0.9
415
+ },
416
+ {
417
+ "id": 47,
418
+ "name": "PB-N7: Forkable + join_ready present simultaneously \u2014 prioritize join",
419
+ "category": "NEXT",
420
+ "expected": "NEXT",
421
+ "prediction": "JOIN",
422
+ "correct": false,
423
+ "time": 0.81
424
+ },
425
+ {
426
+ "id": 48,
427
+ "name": "PB-N8: High SLA pressure forces skip of optional parallel",
428
+ "category": "NEXT",
429
+ "expected": "NEXT",
430
+ "prediction": "FORK",
431
+ "correct": false,
432
+ "time": 0.9
433
+ },
434
+ {
435
+ "id": 49,
436
+ "name": "PB-R1: Failure on first attempt \u2014 low pressure, retry obvious",
437
+ "category": "RETRY",
438
+ "expected": "RETRY",
439
+ "prediction": "RETRY",
440
+ "correct": true,
441
+ "time": 0.91
442
+ },
443
+ {
444
+ "id": 50,
445
+ "name": "PB-R2: Retry under moderate SLA pressure \u2014 still worthwhile",
446
+ "category": "RETRY",
447
+ "expected": "RETRY",
448
+ "prediction": "RETRY",
449
+ "correct": true,
450
+ "time": 0.93
451
+ },
452
+ {
453
+ "id": 51,
454
+ "name": "PB-R3: Retry inside parallel branch \u2014 failure doesn't block join",
455
+ "category": "RETRY",
456
+ "expected": "RETRY",
457
+ "prediction": "RETRY",
458
+ "correct": true,
459
+ "time": 0.89
460
+ },
461
+ {
462
+ "id": 52,
463
+ "name": "PB-R4: Retry after soft failure \u2014 data quality issue",
464
+ "category": "RETRY",
465
+ "expected": "RETRY",
466
+ "prediction": "RETRY",
467
+ "correct": true,
468
+ "time": 0.92
469
+ },
470
+ {
471
+ "id": 53,
472
+ "name": "PB-F1: Classic fork \u2014 low pressure, actors independent, no parallel yet",
473
+ "category": "FORK",
474
+ "expected": "FORK",
475
+ "prediction": "FORK",
476
+ "correct": true,
477
+ "time": 0.95
478
+ },
479
+ {
480
+ "id": 54,
481
+ "name": "PB-F2: Fork after successful checkpoint \u2014 resources freed",
482
+ "category": "FORK",
483
+ "expected": "FORK",
484
+ "prediction": "FORK",
485
+ "correct": true,
486
+ "time": 0.9
487
+ },
488
+ {
489
+ "id": 55,
490
+ "name": "PB-F3: Fork with three branches \u2014 all actors different",
491
+ "category": "FORK",
492
+ "expected": "FORK",
493
+ "prediction": "FORK",
494
+ "correct": true,
495
+ "time": 0.9
496
+ },
497
+ {
498
+ "id": 56,
499
+ "name": "PB-F4: Fork opportunity \u2014 moderate goal progress, low risk",
500
+ "category": "FORK",
501
+ "expected": "FORK",
502
+ "prediction": "FORK",
503
+ "correct": true,
504
+ "time": 0.94
505
+ },
506
+ {
507
+ "id": 57,
508
+ "name": "PB-J1: Classic join \u2014 parallel active, join_ready, both branches done",
509
+ "category": "JOIN",
510
+ "expected": "JOIN",
511
+ "prediction": "JOIN",
512
+ "correct": true,
513
+ "time": 0.84
514
+ },
515
+ {
516
+ "id": 58,
517
+ "name": "PB-J2: Join under SLA pressure \u2014 merge quickly",
518
+ "category": "JOIN",
519
+ "expected": "JOIN",
520
+ "prediction": "JOIN",
521
+ "correct": true,
522
+ "time": 0.82
523
+ },
524
+ {
525
+ "id": 59,
526
+ "name": "PB-J3: Join at high parallel depth \u2014 reduce complexity",
527
+ "category": "JOIN",
528
+ "expected": "JOIN",
529
+ "prediction": "JOIN",
530
+ "correct": true,
531
+ "time": 0.83
532
+ },
533
+ {
534
+ "id": 60,
535
+ "name": "PB-J4: Join with post-retry convergence \u2014 one branch recovered",
536
+ "category": "JOIN",
537
+ "expected": "JOIN",
538
+ "prediction": "JOIN",
539
+ "correct": true,
540
+ "time": 0.82
541
+ },
542
+ {
543
+ "id": 61,
544
+ "name": "PB-M1: Very high uncertainty + anomaly outcome \u2192 clear META",
545
+ "category": "META",
546
+ "expected": "META",
547
+ "prediction": "NEXT",
548
+ "correct": false,
549
+ "time": 0.84
550
+ },
551
+ {
552
+ "id": 62,
553
+ "name": "PB-M2: Missing context \u2014 no eligible node can satisfy requirement",
554
+ "category": "META",
555
+ "expected": "META",
556
+ "prediction": "NEXT",
557
+ "correct": false,
558
+ "time": 0.88
559
+ },
560
+ {
561
+ "id": 63,
562
+ "name": "PB-M3: Repeated failures + high uncertainty \u2192 META not RETRY",
563
+ "category": "META",
564
+ "expected": "META",
565
+ "prediction": "RETRY",
566
+ "correct": false,
567
+ "time": 0.91
568
+ },
569
+ {
570
+ "id": 64,
571
+ "name": "PB-M4: Regulatory compliance flag \u2014 must halt for review",
572
+ "category": "META",
573
+ "expected": "META",
574
+ "prediction": "NEXT",
575
+ "correct": false,
576
+ "time": 0.83
577
+ },
578
+ {
579
+ "id": 65,
580
+ "name": "PB-X1: Ambiguous outcome \u2014 moderate uncertainty \u2192 NEXT (not META)",
581
+ "category": "NEXT",
582
+ "expected": "NEXT",
583
+ "prediction": "NEXT",
584
+ "correct": true,
585
+ "time": 0.81
586
+ },
587
+ {
588
+ "id": 66,
589
+ "name": "PB-X2: Success after many retries \u2014 just advance (not retry again)",
590
+ "category": "NEXT",
591
+ "expected": "NEXT",
592
+ "prediction": "NEXT",
593
+ "correct": true,
594
+ "time": 0.69
595
+ },
596
+ {
597
+ "id": 67,
598
+ "name": "PB-X3: Multiple eligible including terminator \u2014 mid-progress, keep going",
599
+ "category": "NEXT",
600
+ "expected": "NEXT",
601
+ "prediction": "CONTINUE",
602
+ "correct": false,
603
+ "time": 0.94
604
+ },
605
+ {
606
+ "id": 68,
607
+ "name": "PB-X4: Low progress + terminator available \u2192 still advance (too early to quit)",
608
+ "category": "NEXT",
609
+ "expected": "NEXT",
610
+ "prediction": "NEXT",
611
+ "correct": true,
612
+ "time": 0.82
613
+ },
614
+ {
615
+ "id": 69,
616
+ "name": "PB-R5: Retry with uncertainty decreasing \u2014 transient issue resolving",
617
+ "category": "RETRY",
618
+ "expected": "RETRY",
619
+ "prediction": "RETRY",
620
+ "correct": true,
621
+ "time": 0.9
622
+ },
623
+ {
624
+ "id": 70,
625
+ "name": "PB-R6: Retry in parallel branch \u2014 failure doesn't impact other branch",
626
+ "category": "RETRY",
627
+ "expected": "RETRY",
628
+ "prediction": "RETRY",
629
+ "correct": true,
630
+ "time": 0.88
631
+ },
632
+ {
633
+ "id": 71,
634
+ "name": "PB-F5: Fork at workflow start \u2014 fresh resources, multiple entry paths",
635
+ "category": "FORK",
636
+ "expected": "FORK",
637
+ "prediction": "FORK",
638
+ "correct": true,
639
+ "time": 0.93
640
+ },
641
+ {
642
+ "id": 72,
643
+ "name": "PB-F6: Fork after successful resolution \u2014 reward parallelism",
644
+ "category": "FORK",
645
+ "expected": "FORK",
646
+ "prediction": "FORK",
647
+ "correct": true,
648
+ "time": 0.93
649
+ },
650
+ {
651
+ "id": 73,
652
+ "name": "PB-J5: Join with cost pressure \u2014 merge to stop spending",
653
+ "category": "JOIN",
654
+ "expected": "JOIN",
655
+ "prediction": "JOIN",
656
+ "correct": true,
657
+ "time": 0.8
658
+ },
659
+ {
660
+ "id": 74,
661
+ "name": "PB-J6: Join at end of workflow \u2014 final merge before terminal",
662
+ "category": "JOIN",
663
+ "expected": "JOIN",
664
+ "prediction": "JOIN",
665
+ "correct": true,
666
+ "time": 0.8
667
+ },
668
+ {
669
+ "id": 75,
670
+ "name": "PB-J7: Join with low resource pressure \u2014 clean merge",
671
+ "category": "JOIN",
672
+ "expected": "JOIN",
673
+ "prediction": "JOIN",
674
+ "correct": true,
675
+ "time": 0.81
676
+ },
677
+ {
678
+ "id": 76,
679
+ "name": "PB-M5: Timeout escalation \u2014 multiple timeouts indicate systemic issue",
680
+ "category": "META",
681
+ "expected": "META",
682
+ "prediction": "RETRY",
683
+ "correct": false,
684
+ "time": 0.88
685
+ }
686
+ ]