File size: 44,585 Bytes
1b42f19
b08a347
 
 
 
 
 
 
 
 
1b42f19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71fa486
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be67b73
71fa486
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a32325
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be67b73
 
 
 
874e22a
be67b73
 
 
 
 
 
 
 
 
 
 
 
 
 
874e22a
be67b73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1b42f19
 
 
 
 
 
 
6a32325
1b42f19
6a32325
1b42f19
71fa486
 
 
 
6a32325
71fa486
be67b73
71fa486
 
1b42f19
 
 
6a32325
1b42f19
be67b73
1b42f19
71fa486
 
 
 
6a32325
71fa486
be67b73
71fa486
 
 
 
 
6a32325
71fa486
be67b73
71fa486
 
1b42f19
 
 
6a32325
1b42f19
be67b73
1b42f19
71fa486
 
 
 
6a32325
71fa486
be67b73
71fa486
 
1b42f19
be67b73
 
 
 
 
 
 
 
1b42f19
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
"""
Deterministic Adversarial Seed Data Engine.

> **Hackathon Judges Note:** 
> This is not generic dummy data. Our seeds specifically inject malicious 
> real-world SQL edge cases to pressure-test frontier LLM logic:
> - **O'Brien (Task 1):** Tests if the agent uses proper parameterization/escaping.
> - **Duplicate Emails (Task 2):** Tests `DISTINCT` vs standard `INSERT` logic.
> - **Orphaned FKs (Task 3):** Tests the agent's ability to safely `CASCADE` or audit-log invalid relations before dropping columns.
> - **NULL salary rows (Task 3):** Tests strict type constraints handling.

EVERY value in this file is a hardcoded constant. No datetime.now(),
no random(), no runtime generation. This guarantees deterministic
grader behavior across every execution.
"""

import sqlite3
from typing import Dict, List, Tuple


# =============================================================================
# TASK 1: Column Restructure (Easy)
# =============================================================================
# Agent must merge first_name + last_name into full_name without data loss.
# Adversarial: O'Brien (apostrophe), McDonald (capital mid-word).

TASK1_SOURCE_DDL = """
CREATE TABLE users (
    id INTEGER PRIMARY KEY,
    first_name TEXT NOT NULL,
    last_name TEXT NOT NULL
);
"""

TASK1_SOURCE_DATA = [
    (1, "John", "O'Brien"),
    (2, "Mary", "McDonald"),
    (3, "Alice", "Smith"),
    (4, "Bob", "Jones"),
    (5, "Carol", "White"),
]

TASK1_TARGET_DDL = """CREATE TABLE users (
    id INTEGER PRIMARY KEY,
    full_name TEXT NOT NULL
);"""

TASK1_EXPECTED_ROWS: List[Tuple] = [
    (1, "John O'Brien"),
    (2, "Mary McDonald"),
    (3, "Alice Smith"),
    (4, "Bob Jones"),
    (5, "Carol White"),
]


def seed_task1(conn: sqlite3.Connection) -> None:
    """Seed the database for Task 1: Column Restructure."""
    conn.executescript(TASK1_SOURCE_DDL)
    conn.executemany(
        "INSERT INTO users (id, first_name, last_name) VALUES (?, ?, ?)",
        TASK1_SOURCE_DATA,
    )
    conn.commit()


# =============================================================================
# TASK 2: Table Normalization (Medium)
# =============================================================================
# Agent must split flat purchases table into customers + orders with FK.
# Adversarial: alice@example.com appears 3 times (forces SELECT DISTINCT),
# "Laptop, 15-inch" has a comma (breaks naive CSV parsing).

TASK2_SOURCE_DDL = """
CREATE TABLE purchases (
    purchase_id INTEGER PRIMARY KEY,
    item_name TEXT NOT NULL,
    price INTEGER NOT NULL,
    customer_name TEXT NOT NULL,
    customer_email TEXT NOT NULL
);
"""

TASK2_SOURCE_DATA = [
    (1, "Laptop, 15-inch", 80000, "Alice Smith", "alice@example.com"),
    (2, "Mouse", 2500, "Bob Jones", "bob@example.com"),
    (3, "Keyboard", 4500, "Alice Smith", "alice@example.com"),
    (4, "Monitor", 25000, "Carol White", "carol@example.com"),
    (5, "Webcam", 3500, "Alice Smith", "alice@example.com"),
    (6, "USB Hub", 1500, "Bob Jones", "bob@example.com"),
    (7, "Headphones", 6000, "Carol White", "carol@example.com"),
]

TASK2_TARGET_DDL = """CREATE TABLE customers (
    id INTEGER PRIMARY KEY,
    name TEXT NOT NULL,
    email TEXT NOT NULL UNIQUE
);

CREATE TABLE orders (
    id INTEGER PRIMARY KEY,
    customer_id INTEGER NOT NULL,
    item_name TEXT NOT NULL,
    price INTEGER NOT NULL,
    FOREIGN KEY (customer_id) REFERENCES customers(id)
);"""

TASK2_EXPECTED_CUSTOMER_COUNT = 3
TASK2_EXPECTED_ORDER_COUNT = 7


def seed_task2(conn: sqlite3.Connection) -> None:
    """Seed the database for Task 2: Table Normalization."""
    conn.executescript(TASK2_SOURCE_DDL)
    conn.executemany(
        "INSERT INTO purchases (purchase_id, item_name, price, customer_name, customer_email) "
        "VALUES (?, ?, ?, ?, ?)",
        TASK2_SOURCE_DATA,
    )
    conn.commit()


# =============================================================================
# TASK 3: Cascade Migration (Hard)
# =============================================================================
# Agent must fix types, enforce FKs, and handle orphaned/NULL records.
# Adversarial: salary as "$50,000" strings, one NULL salary,
# two orphaned assets referencing nonexistent employees.

TASK3_SOURCE_DDL = """
CREATE TABLE companies (
    id INTEGER PRIMARY KEY,
    name TEXT
);

CREATE TABLE departments (
    id INTEGER PRIMARY KEY,
    company_id INTEGER,
    name TEXT
);

CREATE TABLE employees (
    id INTEGER PRIMARY KEY,
    department_id INTEGER,
    name TEXT,
    salary TEXT
);

CREATE TABLE assets (
    id INTEGER PRIMARY KEY,
    employee_id INTEGER,
    description TEXT
);
"""

TASK3_COMPANIES_DATA = [
    (1, "Acme Corp"),
    (2, "Globex Inc"),
]

TASK3_DEPARTMENTS_DATA = [
    (1, 1, "Engineering"),
    (2, 1, "Marketing"),
    (3, 2, "Sales"),
]

TASK3_EMPLOYEES_DATA = [
    (1, 1, "Dave Kumar", "$90000"),
    (2, 1, "Eve Chen", "$75000"),
    (3, 2, "Frank O'Neill", "$60000"),
    (4, 3, "Grace Lee", "$85000"),
    (5, 3, "Hal Patel", None),  # NULL salary β€” violates target NOT NULL
]

TASK3_ASSETS_DATA = [
    (1, 1, "MacBook Pro"),
    (2, 2, "Dell Monitor"),
    (3, 3, "Standing Desk"),
    (4, 99, "Orphaned Laptop"),   # employee_id=99 does not exist
    (5, 100, "Orphaned Chair"),   # employee_id=100 does not exist
]

TASK3_TARGET_DDL = """CREATE TABLE companies (
    id INTEGER PRIMARY KEY,
    name TEXT NOT NULL
);

CREATE TABLE departments (
    id INTEGER PRIMARY KEY,
    company_id INTEGER NOT NULL,
    name TEXT NOT NULL,
    FOREIGN KEY (company_id) REFERENCES companies(id)
);

CREATE TABLE employees (
    id INTEGER PRIMARY KEY,
    department_id INTEGER NOT NULL,
    name TEXT NOT NULL,
    salary INTEGER NOT NULL,
    FOREIGN KEY (department_id) REFERENCES departments(id)
);

CREATE TABLE assets (
    id INTEGER PRIMARY KEY,
    employee_id INTEGER NOT NULL,
    description TEXT NOT NULL,
    FOREIGN KEY (employee_id) REFERENCES employees(id)
);

CREATE TABLE audit_log (
    id INTEGER PRIMARY KEY,
    source_table TEXT NOT NULL,
    original_row_json TEXT NOT NULL,
    reason TEXT NOT NULL
);"""

# Expected audit_log entries: 1 NULL salary employee + 2 orphaned assets = 3 rows
TASK3_EXPECTED_AUDIT_COUNT = 3
TASK3_EXPECTED_AUDIT_ENTRIES = [
    ("employees", "null_salary"),
    ("assets", "orphaned_record"),
    ("assets", "orphaned_record"),
]

# Expected employee salaries after migration (Hal Patel removed)
TASK3_EXPECTED_SALARIES: Dict[int, int] = {
    1: 90000,
    2: 75000,
    3: 60000,
    4: 85000,
}

TASK3_EXPECTED_EMPLOYEE_COUNT = 4


def seed_task3(conn: sqlite3.Connection) -> None:
    """Seed the database for Task 3: Cascade Migration."""
    conn.executescript(TASK3_SOURCE_DDL)
    conn.executemany(
        "INSERT INTO companies (id, name) VALUES (?, ?)",
        TASK3_COMPANIES_DATA,
    )
    conn.executemany(
        "INSERT INTO departments (id, company_id, name) VALUES (?, ?, ?)",
        TASK3_DEPARTMENTS_DATA,
    )
    conn.executemany(
        "INSERT INTO employees (id, department_id, name, salary) VALUES (?, ?, ?, ?)",
        TASK3_EMPLOYEES_DATA,
    )
    conn.executemany(
        "INSERT INTO assets (id, employee_id, description) VALUES (?, ?, ?)",
        TASK3_ASSETS_DATA,
    )
    conn.commit()


# =============================================================================
# TASK 4: Soft-Delete Restoration (Easy)
# =============================================================================
# Agent must restore deleted products from a deletion_log, add is_deleted/deleted_at columns.
# Adversarial: "O'Brien Desk" (apostrophe), stock=0 on Webcam (must NOT confuse with is_deleted).

TASK4_SOURCE_DDL = """
CREATE TABLE products (
    id INTEGER PRIMARY KEY,
    name TEXT NOT NULL,
    price REAL NOT NULL,
    stock INTEGER NOT NULL
);

CREATE TABLE deletion_log (
    id INTEGER PRIMARY KEY,
    product_id INTEGER NOT NULL,
    product_name TEXT NOT NULL,
    product_price REAL NOT NULL,
    product_stock INTEGER NOT NULL,
    deleted_at TEXT NOT NULL
);
"""

TASK4_PRODUCTS_DATA = [
    (1, "Laptop",       999.99, 15),
    (2, "O'Brien Desk", 249.99, 8),
    (3, "Monitor",      399.99, 23),
    (4, "Keyboard",     89.99,  45),
    (5, "Mouse",        29.99,  102),
]

TASK4_DELETION_LOG_DATA = [
    (1, 6, "Headphones", 149.99, 30, "2024-01-15"),
    (2, 7, "Webcam",      79.99,  0, "2024-02-20"),   # stock=0 but NOT is_deleted=1
    (3, 8, "USB-C Hub",   49.99, 12, "2024-03-10"),
]

TASK4_TARGET_DDL = """CREATE TABLE products (
    id INTEGER PRIMARY KEY,
    name TEXT NOT NULL,
    price REAL NOT NULL,
    stock INTEGER NOT NULL,
    is_deleted INTEGER NOT NULL DEFAULT 0,
    deleted_at TEXT
);"""

TASK4_EXPECTED_ROW_COUNT = 8
TASK4_EXPECTED_ID_SUM = 36           # 1+2+3+4+5+6+7+8
TASK4_EXPECTED_DELETED_COUNT = 3     # ids 6,7,8
TASK4_EXPECTED_ACTIVE_COUNT = 5      # ids 1-5


def seed_task4(conn: sqlite3.Connection) -> None:
    """Seed the database for Task 4: Soft-Delete Restoration."""
    conn.executescript(TASK4_SOURCE_DDL)
    conn.executemany(
        "INSERT INTO products (id, name, price, stock) VALUES (?, ?, ?, ?)",
        TASK4_PRODUCTS_DATA,
    )
    conn.executemany(
        "INSERT INTO deletion_log (id, product_id, product_name, product_price, product_stock, deleted_at) "
        "VALUES (?, ?, ?, ?, ?, ?)",
        TASK4_DELETION_LOG_DATA,
    )
    conn.commit()


# =============================================================================
# TASK 5: Schema Version Merge (Medium)
# =============================================================================
# Agent must merge products_v1 (price as "$XX.XX" TEXT) and products_v2 (price as REAL)
# into a single products table. v2 wins on ID conflicts. Must add source column.
# Adversarial: id=101 high ID, NULL category, "$" price coercion, conflicting rows.

TASK5_SOURCE_DDL = """
CREATE TABLE products_v1 (
    id INTEGER PRIMARY KEY,
    name TEXT NOT NULL,
    price TEXT NOT NULL,
    category TEXT,
    supplier TEXT
);

CREATE TABLE products_v2 (
    id INTEGER PRIMARY KEY,
    name TEXT NOT NULL,
    unit_cost REAL NOT NULL,
    category TEXT NOT NULL,
    brand TEXT,
    sku TEXT
);
"""

TASK5_V1_DATA = [
    (1,   "Widget A",    "$12.50", "Electronics", "AcmeCo"),
    (2,   "Widget B",    "$8.99",  "Electronics", "AcmeCo"),
    (3,   "Gadget X",    "$45.00", None,          "TechCorp"),
    (4,   "Gadget Y",    "$32.50", "Tools",       "TechCorp"),
    (5,   "Doohickey",   "$5.99",  "Office",      "SupplyPro"),
    (101, "Legacy Item", "$99.99", "Electronics", "OldCo"),
]

TASK5_V2_DATA = [
    (1, "Widget A",          12.50, "Electronics", "AcmeCo",  "SKU-001"),
    (2, "Widget B Updated",   9.99, "Electronics", "AcmeCo",  "SKU-002"),
    (6, "New Product F",     67.00, "Tools",       "NewCorp", "SKU-006"),
    (7, "New Product G",     23.50, "Office",      "NewCorp", "SKU-007"),
    (8, "New Product H",     11.00, "Electronics", "ImportCo","SKU-008"),
]

TASK5_TARGET_DDL = """CREATE TABLE products (
    id INTEGER PRIMARY KEY,
    name TEXT NOT NULL,
    price REAL NOT NULL,
    category TEXT,
    supplier TEXT,
    brand TEXT,
    sku TEXT,
    source TEXT NOT NULL
);"""

TASK5_EXPECTED_ROW_COUNT = 9
TASK5_EXPECTED_PRICE_SUM = round(12.50 + 9.99 + 45.00 + 32.50 + 5.99 + 99.99 + 67.00 + 23.50 + 11.00, 2)
TASK5_EXPECTED_BOTH_SOURCE_COUNT = 2      # ids 1 and 2


def seed_task5(conn: sqlite3.Connection) -> None:
    """Seed the database for Task 5: Schema Version Merge."""
    conn.executescript(TASK5_SOURCE_DDL)
    conn.executemany(
        "INSERT INTO products_v1 (id, name, price, category, supplier) VALUES (?, ?, ?, ?, ?)",
        TASK5_V1_DATA,
    )
    conn.executemany(
        "INSERT INTO products_v2 (id, name, unit_cost, category, brand, sku) VALUES (?, ?, ?, ?, ?, ?)",
        TASK5_V2_DATA,
    )
    conn.commit()


# =============================================================================
# TASK 6: Multi-Entity Extraction (Medium β€” Hard End)
# =============================================================================
# Agent must decompose a sales_records god-table into 3NF (5 tables).
# Adversarial: leading whitespace email, empty customer email, comma in SKU.

TASK6_SOURCE_DDL = """
CREATE TABLE sales_records (
    id INTEGER PRIMARY KEY,
    rep_name TEXT NOT NULL,
    rep_email TEXT NOT NULL,
    rep_region TEXT NOT NULL,
    customer_name TEXT NOT NULL,
    customer_email TEXT NOT NULL,
    customer_tier TEXT NOT NULL,
    product_name TEXT NOT NULL,
    product_sku TEXT NOT NULL,
    product_category TEXT NOT NULL,
    quantity INTEGER NOT NULL,
    unit_price REAL NOT NULL,
    discount_pct INTEGER NOT NULL DEFAULT 0,
    sale_date TEXT NOT NULL
);
"""

TASK6_SOURCE_DATA = [
    (1,  "Alice Chen",   " alice@company.com", "North", "Globex Corp",    "globex@corp.com",   "enterprise", "Widget Pro",  "WIDGET-001", "Electronics", 5,  299.99, 10, "2024-01-10"),
    (2,  "Alice Chen",   "alice@company.com",  "North", "Initech LLC",    "info@initech.com",  "basic",      "Widget Pro",  "WIDGET-001", "Electronics", 2,  299.99, 0,  "2024-01-15"),
    (3,  "Bob Martinez", "bob@company.com",    "South", "Globex Corp",    "globex@corp.com",   "enterprise", "Gadget X",    "GADGET-X01", "Hardware",    10, 89.99,  5,  "2024-01-20"),
    (4,  "Bob Martinez", "bob@company.com",    "South", "Umbrella Inc",   "sales@umbrella.co", "premium",    "Gadget X",    "GADGET-X01", "Hardware",    3,  89.99,  0,  "2024-02-01"),
    (5,  "Carol White",  "carol@company.com",  "East",  "Initech LLC",    "info@initech.com",  "basic",      "Tool Kit",    "TOOLS,001",  "Hardware",    1,  199.99, 0,  "2024-02-05"),
    (6,  "Alice Chen",   "alice@company.com",  "North", "Pendant Corp",   "",                  "free",       "Widget Pro",  "WIDGET-001", "Electronics", 7,  299.99, 15, "2024-02-10"),
    (7,  "Carol White",  "carol@company.com",  "East",  "Globex Corp",    "globex@corp.com",   "enterprise", "Nano Device", "NANO-D01",   "Electronics", 2,  549.99, 20, "2024-02-15"),
    (8,  "Bob Martinez", "bob@company.com",    "South", "Umbrella Inc",   "sales@umbrella.co", "premium",    "Tool Kit",    "TOOLS,001",  "Hardware",    4,  199.99, 10, "2024-03-01"),
    (9,  "Alice Chen",   "alice@company.com",  "North", "Initech LLC",    "info@initech.com",  "basic",      "Nano Device", "NANO-D01",   "Electronics", 1,  549.99, 0,  "2024-03-05"),
    (10, "Carol White",  "carol@company.com",  "East",  "Umbrella Inc",   "sales@umbrella.co", "premium",    "Cable Bundle","CABLE-5PK",  "Accessories", 20, 14.99,  0,  "2024-03-10"),
    (11, "Bob Martinez", "bob@company.com",    "South", "Globex Corp",    "globex@corp.com",   "enterprise", "Cable Bundle","CABLE-5PK",  "Accessories", 15, 14.99,  5,  "2024-03-15"),
    (12, "Carol White",  "carol@company.com",  "East",  "Pendant Corp",   "orders@pendant.io", "free",       "Gadget X",    "GADGET-X01", "Hardware",    6,  89.99,  0,  "2024-03-20"),
]

TASK6_TARGET_DDL = """CREATE TABLE salespersons (
    id INTEGER PRIMARY KEY,
    name TEXT NOT NULL,
    email TEXT NOT NULL UNIQUE,
    region TEXT NOT NULL
);

CREATE TABLE customers (
    id INTEGER PRIMARY KEY,
    name TEXT NOT NULL,
    email TEXT NOT NULL UNIQUE,
    tier TEXT NOT NULL
);

CREATE TABLE products (
    id INTEGER PRIMARY KEY,
    name TEXT NOT NULL,
    sku TEXT NOT NULL UNIQUE,
    category TEXT NOT NULL
);

CREATE TABLE sales (
    id INTEGER PRIMARY KEY,
    salesperson_id INTEGER NOT NULL,
    customer_id INTEGER NOT NULL,
    product_id INTEGER NOT NULL,
    quantity INTEGER NOT NULL,
    unit_price REAL NOT NULL,
    discount_pct INTEGER NOT NULL DEFAULT 0,
    sale_date TEXT NOT NULL,
    FOREIGN KEY (salesperson_id) REFERENCES salespersons(id),
    FOREIGN KEY (customer_id) REFERENCES customers(id),
    FOREIGN KEY (product_id) REFERENCES products(id)
);

CREATE TABLE data_issues (
    id INTEGER PRIMARY KEY,
    source_table TEXT NOT NULL,
    source_row_id INTEGER NOT NULL,
    issue_type TEXT NOT NULL,
    issue_detail TEXT NOT NULL
);"""

TASK6_EXPECTED_SALESPERSON_COUNT = 3
TASK6_EXPECTED_CUSTOMER_COUNT = 3   # Pendant Corp row 6 excluded (empty email)
TASK6_EXPECTED_PRODUCT_COUNT = 5
TASK6_EXPECTED_SALES_COUNT = 11     # row 6 excluded
TASK6_EXPECTED_DATA_ISSUES_COUNT = 1


def seed_task6(conn: sqlite3.Connection) -> None:
    """Seed the database for Task 6: Multi-Entity Extraction."""
    conn.executescript(TASK6_SOURCE_DDL)
    conn.executemany(
        "INSERT INTO sales_records (id, rep_name, rep_email, rep_region, "
        "customer_name, customer_email, customer_tier, product_name, product_sku, "
        "product_category, quantity, unit_price, discount_pct, sale_date) "
        "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
        TASK6_SOURCE_DATA,
    )
    conn.commit()


# =============================================================================
# TASK 7: Dual-Source Consolidation (Hard)
# =============================================================================
# Agent must merge 6 source tables from two incompatible systems (Legacy CRM + Modern SaaS)
# into 4 unified target tables. Cross-system email dedup, currency coercion, orphan detection.

TASK7_LEGACY_CUSTOMERS_DDL = """
CREATE TABLE legacy_customers (
    id INTEGER PRIMARY KEY,
    full_name TEXT,
    contact_email TEXT,
    phone TEXT,
    account_type TEXT,
    join_date TEXT
);
"""

TASK7_LEGACY_ORDERS_DDL = """
CREATE TABLE legacy_orders (
    id INTEGER PRIMARY KEY,
    customer_id INTEGER,
    product_code TEXT,
    total_amount TEXT,
    order_status TEXT,
    order_date TEXT
);
"""

TASK7_LEGACY_PRODUCTS_DDL = """
CREATE TABLE legacy_products (
    code TEXT PRIMARY KEY,
    description TEXT,
    unit_price TEXT
);
"""

TASK7_MODERN_USERS_DDL = """
CREATE TABLE modern_users (
    uuid TEXT PRIMARY KEY,
    display_name TEXT,
    email_address TEXT,
    subscription_tier INTEGER,
    created_at TEXT
);
"""

TASK7_MODERN_TRANSACTIONS_DDL = """
CREATE TABLE modern_transactions (
    id INTEGER PRIMARY KEY,
    user_uuid TEXT,
    item_sku TEXT,
    amount REAL,
    currency TEXT,
    tx_status INTEGER,
    created_at TEXT
);
"""

TASK7_MODERN_CATALOG_DDL = """
CREATE TABLE modern_catalog (
    sku TEXT PRIMARY KEY,
    title TEXT,
    base_price REAL
);
"""

TASK7_LEGACY_CUSTOMERS_DATA = [
    (1, "Alice Johnson", "alice@example.com", "+1-555-0101", "premium", "2021-03-15"),
    (2, "Bob Chen",      "bob@example.com",   "+1-555-0102", "basic",   "2022-07-01"),
    (3, "Carol Davis",   None,                "+1-555-0103", "free",    "2023-01-10"),
    (4, "Dave Wilson",   "dave@example.com",  "+1-555-0104", "premium", "2021-11-20"),
    (5, "Eve Martinez",  "eve@example.com",   "+1-555-0105", "free",    "2023-06-05"),
]

TASK7_MODERN_USERS_DATA = [
    ("uuid-A1", "Alice J.",    "alice@example.com", 3, "2021-03-15"),
    ("uuid-B2", "R. Bob Chen", "bob@example.com",   2, "2022-07-01"),
    ("uuid-F6", "Frank Lee",   "frank@example.com", 4, "2022-09-30"),
    ("uuid-G7", "Grace Kim",   "grace@example.com", 1, "2024-01-15"),
]

TASK7_LEGACY_ORDERS_DATA = [
    (1, 1, "PROD-A", "$1,234.56", "delivered", "2022-01-10"),
    (2, 2, "PROD-B", "$89.99",    "shipped",   "2022-03-15"),
    (3, 4, "PROD-A", "$2,500.00", "delivered", "2022-05-20"),
    (4, 3, "PROD-C", "$45.00",    "pending",   "2023-02-01"),
]

TASK7_LEGACY_PRODUCTS_DATA = [
    ("PROD-A", "Enterprise Widget",   "$1,234.56"),
    ("PROD-B", "Basic Gadget",        "$89.99"),
    ("PROD-C", "Starter Kit",         "$45.00"),
]

TASK7_MODERN_TRANSACTIONS_DATA = [
    (1, "uuid-A1",   "SKU-001", 299.99, "USD",  3, "2023-06-01"),
    (2, "uuid-B2",   "SKU-002", 89.99,  None,   2, "2023-07-15"),
    (3, "uuid-F6",   "SKU-001", 299.99, None,   3, "2023-08-20"),
    (4, "uuid-DEAD", "SKU-003", 15.99,  None,   1, "2023-09-01"),   # orphan
    (5, "uuid-G7",   "SKU-002", 89.99,  "USD",  4, "2023-10-10"),
    (6, "uuid-A1",   "SKU-003", 15.99,  "EUR",  5, "2023-11-01"),
]

TASK7_MODERN_CATALOG_DATA = [
    ("SKU-001", "Pro Widget",    299.99),
    ("SKU-002", "Smart Gadget",  89.99),
    ("SKU-003", "Mini Accessory", 15.99),
]

TASK7_TARGET_DDL = """CREATE TABLE unified_customers (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    legacy_id INTEGER,
    modern_uuid TEXT,
    name TEXT,
    email TEXT,
    phone TEXT,
    tier TEXT NOT NULL DEFAULT 'free',
    source TEXT NOT NULL,
    created_at TEXT
);

CREATE TABLE unified_products (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    code TEXT NOT NULL UNIQUE,
    title TEXT NOT NULL,
    price REAL NOT NULL,
    source TEXT NOT NULL
);

CREATE TABLE unified_orders (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    customer_id INTEGER NOT NULL,
    product_id INTEGER,
    amount REAL NOT NULL,
    currency TEXT NOT NULL DEFAULT 'USD',
    status TEXT NOT NULL,
    order_date TEXT,
    source TEXT NOT NULL,
    FOREIGN KEY (customer_id) REFERENCES unified_customers(id)
);

CREATE TABLE migration_issues (
    id INTEGER PRIMARY KEY,
    source_system TEXT NOT NULL,
    source_table TEXT NOT NULL,
    source_id TEXT NOT NULL,
    issue_type TEXT NOT NULL,
    resolution TEXT NOT NULL
);"""

TASK7_EXPECTED_UNIFIED_CUSTOMERS = 7
TASK7_EXPECTED_BOTH_SOURCE_COUNT = 2
TASK7_EXPECTED_UNIFIED_ORDERS = 9
TASK7_EXPECTED_MIGRATION_ISSUES = 2

# Tier mapping: 1β†’'free', 2β†’'basic', 3β†’'premium', 4β†’'enterprise'
TASK7_TIER_MAP = {1: "free", 2: "basic", 3: "premium", 4: "enterprise"}
# Status mapping: 1β†’'pending', 2β†’'processing', 3β†’'complete', 4β†’'failed', 5β†’'refunded'
TASK7_STATUS_MAP = {1: "pending", 2: "processing", 3: "complete", 4: "failed", 5: "refunded"}


def seed_task7(conn: sqlite3.Connection) -> None:
    """Seed the database for Task 7: Dual-Source Consolidation."""
    conn.executescript(TASK7_LEGACY_CUSTOMERS_DDL)
    conn.executescript(TASK7_LEGACY_ORDERS_DDL)
    conn.executescript(TASK7_LEGACY_PRODUCTS_DDL)
    conn.executescript(TASK7_MODERN_USERS_DDL)
    conn.executescript(TASK7_MODERN_TRANSACTIONS_DDL)
    conn.executescript(TASK7_MODERN_CATALOG_DDL)

    conn.executemany("INSERT INTO legacy_customers VALUES (?, ?, ?, ?, ?, ?)", TASK7_LEGACY_CUSTOMERS_DATA)
    conn.executemany("INSERT INTO legacy_orders VALUES (?, ?, ?, ?, ?, ?)", TASK7_LEGACY_ORDERS_DATA)
    conn.executemany("INSERT INTO legacy_products VALUES (?, ?, ?)", TASK7_LEGACY_PRODUCTS_DATA)
    conn.executemany("INSERT INTO modern_users VALUES (?, ?, ?, ?, ?)", TASK7_MODERN_USERS_DATA)
    conn.executemany("INSERT INTO modern_transactions VALUES (?, ?, ?, ?, ?, ?, ?)", TASK7_MODERN_TRANSACTIONS_DATA)
    conn.executemany("INSERT INTO modern_catalog VALUES (?, ?, ?)", TASK7_MODERN_CATALOG_DATA)
    conn.commit()


# =============================================================================
# Golden Migration Functions
# =============================================================================
# These produce the CORRECT expected database state from any seed data.
# Used by the dynamic grader to compare against the agent's output.
# If seed data changes, the golden DB auto-updates β€” no hardcoded literals.


def golden_task1(conn: sqlite3.Connection) -> None:
    """Golden migration for Task 1: Column Restructure."""
    conn.execute("CREATE TABLE users_new (id INTEGER PRIMARY KEY, full_name TEXT NOT NULL)")
    conn.execute(
        "INSERT INTO users_new (id, full_name) "
        "SELECT id, first_name || ' ' || last_name FROM users"
    )
    conn.execute("DROP TABLE users")
    conn.execute("ALTER TABLE users_new RENAME TO users")
    conn.commit()


def golden_task2(conn: sqlite3.Connection) -> None:
    """Golden migration for Task 2: Table Normalization."""
    conn.execute("PRAGMA foreign_keys = OFF")
    conn.execute(
        "CREATE TABLE customers ("
        "id INTEGER PRIMARY KEY, name TEXT NOT NULL, email TEXT NOT NULL UNIQUE)"
    )
    conn.execute(
        "INSERT INTO customers (name, email) "
        "SELECT DISTINCT customer_name, customer_email FROM purchases"
    )
    conn.execute(
        "CREATE TABLE orders ("
        "id INTEGER PRIMARY KEY, customer_id INTEGER NOT NULL, "
        "item_name TEXT NOT NULL, price INTEGER NOT NULL, "
        "FOREIGN KEY (customer_id) REFERENCES customers(id))"
    )
    conn.execute(
        "INSERT INTO orders (customer_id, item_name, price) "
        "SELECT c.id, p.item_name, p.price "
        "FROM purchases p JOIN customers c ON p.customer_email = c.email"
    )
    conn.execute("DROP TABLE purchases")
    conn.execute("PRAGMA foreign_keys = ON")
    conn.commit()


def golden_task3(conn: sqlite3.Connection) -> None:
    """Golden migration for Task 3: Cascade Migration."""
    conn.execute("PRAGMA foreign_keys = OFF")
    # Create audit_log
    conn.execute(
        "CREATE TABLE audit_log (id INTEGER PRIMARY KEY, source_table TEXT NOT NULL, "
        "original_row_json TEXT NOT NULL, reason TEXT NOT NULL)"
    )
    # Log orphaned assets
    conn.execute(
        "INSERT INTO audit_log (source_table, original_row_json, reason) "
        "SELECT 'assets', '{\"id\":' || id || ',\"employee_id\":' || employee_id || '}', 'orphaned_record' "
        "FROM assets WHERE employee_id NOT IN (SELECT id FROM employees)"
    )
    # Log NULL salary employees
    conn.execute(
        "INSERT INTO audit_log (source_table, original_row_json, reason) "
        "SELECT 'employees', '{\"id\":' || id || ',\"name\":\"' || name || '\"}', 'null_salary' "
        "FROM employees WHERE salary IS NULL"
    )
    # Rebuild companies
    conn.execute("CREATE TABLE companies_new (id INTEGER PRIMARY KEY, name TEXT NOT NULL)")
    conn.execute("INSERT INTO companies_new SELECT id, name FROM companies")
    conn.execute("DROP TABLE companies")
    conn.execute("ALTER TABLE companies_new RENAME TO companies")
    # Rebuild departments
    conn.execute(
        "CREATE TABLE departments_new (id INTEGER PRIMARY KEY, company_id INTEGER NOT NULL, "
        "name TEXT NOT NULL, FOREIGN KEY (company_id) REFERENCES companies(id))"
    )
    conn.execute("INSERT INTO departments_new SELECT id, company_id, name FROM departments")
    conn.execute("DROP TABLE departments")
    conn.execute("ALTER TABLE departments_new RENAME TO departments")
    # Rebuild employees (remove NULL salary, coerce TEXT to INT)
    conn.execute(
        "CREATE TABLE employees_new (id INTEGER PRIMARY KEY, department_id INTEGER NOT NULL, "
        "name TEXT NOT NULL, salary INTEGER NOT NULL, "
        "FOREIGN KEY (department_id) REFERENCES departments(id))"
    )
    conn.execute(
        "INSERT INTO employees_new (id, department_id, name, salary) "
        "SELECT id, department_id, name, "
        "CAST(REPLACE(REPLACE(salary, '$', ''), ',', '') AS INTEGER) "
        "FROM employees WHERE salary IS NOT NULL"
    )
    conn.execute("DROP TABLE employees")
    conn.execute("ALTER TABLE employees_new RENAME TO employees")
    # Rebuild assets (remove orphans)
    conn.execute(
        "CREATE TABLE assets_new (id INTEGER PRIMARY KEY, employee_id INTEGER NOT NULL, "
        "description TEXT NOT NULL, FOREIGN KEY (employee_id) REFERENCES employees(id))"
    )
    conn.execute(
        "INSERT INTO assets_new SELECT id, employee_id, description FROM assets "
        "WHERE employee_id IN (SELECT id FROM employees)"
    )
    conn.execute("DROP TABLE assets")
    conn.execute("ALTER TABLE assets_new RENAME TO assets")
    conn.execute("PRAGMA foreign_keys = ON")
    conn.commit()


def golden_task4(conn: sqlite3.Connection) -> None:
    """Golden migration for Task 4: Soft-Delete Restoration."""
    conn.execute("PRAGMA foreign_keys = OFF")
    # Create new table with extra columns
    conn.execute(
        "CREATE TABLE products_new (id INTEGER PRIMARY KEY, name TEXT NOT NULL, "
        "price REAL NOT NULL, stock INTEGER NOT NULL, "
        "is_deleted INTEGER NOT NULL DEFAULT 0, deleted_at TEXT)"
    )
    # Copy existing products as active
    conn.execute(
        "INSERT INTO products_new (id, name, price, stock, is_deleted, deleted_at) "
        "SELECT id, name, price, stock, 0, NULL FROM products"
    )
    # Restore deleted products from log
    conn.execute(
        "INSERT INTO products_new (id, name, price, stock, is_deleted, deleted_at) "
        "SELECT product_id, product_name, product_price, product_stock, 1, deleted_at "
        "FROM deletion_log"
    )
    conn.execute("DROP TABLE products")
    conn.execute("ALTER TABLE products_new RENAME TO products")
    conn.execute("DROP TABLE deletion_log")
    conn.execute("PRAGMA foreign_keys = ON")
    conn.commit()


def golden_task5(conn: sqlite3.Connection) -> None:
    """Golden migration for Task 5: Schema Version Merge."""
    conn.execute("PRAGMA foreign_keys = OFF")
    conn.execute(
        "CREATE TABLE products (id INTEGER PRIMARY KEY, name TEXT NOT NULL, "
        "price REAL NOT NULL, category TEXT, supplier TEXT, brand TEXT, "
        "sku TEXT, source TEXT NOT NULL)"
    )
    # Insert v1-only rows
    conn.execute(
        "INSERT INTO products (id, name, price, category, supplier, brand, sku, source) "
        "SELECT id, name, CAST(REPLACE(REPLACE(price, '$', ''), ',', '') AS REAL), "
        "category, supplier, NULL, NULL, 'v1' "
        "FROM products_v1 WHERE id NOT IN (SELECT id FROM products_v2)"
    )
    # Insert v2-only rows
    conn.execute(
        "INSERT INTO products (id, name, price, category, supplier, brand, sku, source) "
        "SELECT id, name, unit_cost, category, NULL, brand, sku, 'v2' "
        "FROM products_v2 WHERE id NOT IN (SELECT id FROM products_v1)"
    )
    # Insert conflict rows (v2 wins for name/price)
    conn.execute(
        "INSERT INTO products (id, name, price, category, supplier, brand, sku, source) "
        "SELECT v2.id, v2.name, v2.unit_cost, v2.category, v1.supplier, v2.brand, v2.sku, 'both' "
        "FROM products_v2 v2 JOIN products_v1 v1 ON v2.id = v1.id"
    )
    conn.execute("DROP TABLE products_v1")
    conn.execute("DROP TABLE products_v2")
    conn.execute("PRAGMA foreign_keys = ON")
    conn.commit()


def golden_task6(conn: sqlite3.Connection) -> None:
    """Golden migration for Task 6: Multi-Entity Extraction."""
    conn.execute("PRAGMA foreign_keys = OFF")
    # Create target tables
    conn.execute(
        "CREATE TABLE salespersons (id INTEGER PRIMARY KEY, name TEXT NOT NULL, "
        "email TEXT NOT NULL UNIQUE, region TEXT NOT NULL)"
    )
    conn.execute(
        "CREATE TABLE customers (id INTEGER PRIMARY KEY, name TEXT NOT NULL, "
        "email TEXT NOT NULL UNIQUE, tier TEXT NOT NULL)"
    )
    conn.execute(
        "CREATE TABLE products (id INTEGER PRIMARY KEY, name TEXT NOT NULL, "
        "sku TEXT NOT NULL UNIQUE, category TEXT NOT NULL)"
    )
    conn.execute(
        "CREATE TABLE sales (id INTEGER PRIMARY KEY, salesperson_id INTEGER NOT NULL, "
        "customer_id INTEGER NOT NULL, product_id INTEGER NOT NULL, "
        "quantity INTEGER NOT NULL, unit_price REAL NOT NULL, "
        "discount_pct INTEGER NOT NULL DEFAULT 0, sale_date TEXT NOT NULL, "
        "FOREIGN KEY (salesperson_id) REFERENCES salespersons(id), "
        "FOREIGN KEY (customer_id) REFERENCES customers(id), "
        "FOREIGN KEY (product_id) REFERENCES products(id))"
    )
    conn.execute(
        "CREATE TABLE data_issues (id INTEGER PRIMARY KEY, source_table TEXT NOT NULL, "
        "source_row_id INTEGER NOT NULL, issue_type TEXT NOT NULL, "
        "issue_detail TEXT NOT NULL)"
    )
    # Populate salespersons (TRIM email)
    conn.execute(
        "INSERT INTO salespersons (name, email, region) "
        "SELECT DISTINCT rep_name, TRIM(rep_email), rep_region FROM sales_records"
    )
    # Populate customers (exclude empty email rows)
    conn.execute(
        "INSERT INTO customers (name, email, tier) "
        "SELECT DISTINCT customer_name, customer_email, customer_tier "
        "FROM sales_records WHERE customer_email IS NOT NULL AND customer_email != ''"
    )
    # Populate products
    conn.execute(
        "INSERT INTO products (name, sku, category) "
        "SELECT DISTINCT product_name, product_sku, product_category FROM sales_records"
    )
    # Populate sales (exclude rows with empty customer email)
    conn.execute(
        "INSERT INTO sales (salesperson_id, customer_id, product_id, quantity, "
        "unit_price, discount_pct, sale_date) "
        "SELECT sp.id, c.id, p.id, sr.quantity, sr.unit_price, sr.discount_pct, sr.sale_date "
        "FROM sales_records sr "
        "JOIN salespersons sp ON TRIM(sr.rep_email) = sp.email "
        "JOIN customers c ON sr.customer_email = c.email "
        "JOIN products p ON sr.product_sku = p.sku "
        "WHERE sr.customer_email IS NOT NULL AND sr.customer_email != ''"
    )
    # Log data issues (empty email)
    conn.execute(
        "INSERT INTO data_issues (source_table, source_row_id, issue_type, issue_detail) "
        "SELECT 'sales_records', id, 'empty_email', "
        "'Customer email is empty for: ' || customer_name "
        "FROM sales_records WHERE customer_email IS NULL OR customer_email = ''"
    )
    conn.execute("DROP TABLE sales_records")
    conn.execute("PRAGMA foreign_keys = ON")
    conn.commit()


def golden_task7(conn: sqlite3.Connection) -> None:
    """Golden migration for Task 7: Dual-Source Consolidation."""
    conn.execute("PRAGMA foreign_keys = OFF")

    # Create unified_customers
    conn.execute(
        "CREATE TABLE unified_customers (id INTEGER PRIMARY KEY AUTOINCREMENT, "
        "legacy_id INTEGER, modern_uuid TEXT, name TEXT, email TEXT, phone TEXT, "
        "tier TEXT NOT NULL DEFAULT 'free', source TEXT NOT NULL, created_at TEXT)"
    )
    # Insert legacy-only customers (no email match in modern)
    conn.execute(
        "INSERT INTO unified_customers (legacy_id, modern_uuid, name, email, phone, tier, source, created_at) "
        "SELECT lc.id, NULL, lc.full_name, lc.contact_email, lc.phone, lc.account_type, 'legacy', lc.join_date "
        "FROM legacy_customers lc "
        "WHERE lc.contact_email IS NULL OR lc.contact_email NOT IN (SELECT email_address FROM modern_users WHERE email_address IS NOT NULL)"
    )
    # Insert modern-only users (no email match in legacy)
    conn.execute(
        "INSERT INTO unified_customers (legacy_id, modern_uuid, name, email, phone, tier, source, created_at) "
        "SELECT NULL, mu.uuid, mu.display_name, mu.email_address, NULL, "
        "CASE mu.subscription_tier "
        "  WHEN 1 THEN 'free' WHEN 2 THEN 'basic' WHEN 3 THEN 'premium' WHEN 4 THEN 'enterprise' "
        "  ELSE 'free' END, "
        "'modern', mu.created_at "
        "FROM modern_users mu "
        "WHERE mu.email_address NOT IN (SELECT contact_email FROM legacy_customers WHERE contact_email IS NOT NULL)"
    )
    # Insert matched (both) customers β€” legacy name + modern tier
    conn.execute(
        "INSERT INTO unified_customers (legacy_id, modern_uuid, name, email, phone, tier, source, created_at) "
        "SELECT lc.id, mu.uuid, lc.full_name, lc.contact_email, lc.phone, "
        "CASE mu.subscription_tier "
        "  WHEN 1 THEN 'free' WHEN 2 THEN 'basic' WHEN 3 THEN 'premium' WHEN 4 THEN 'enterprise' "
        "  ELSE 'free' END, "
        "'both', lc.join_date "
        "FROM legacy_customers lc "
        "JOIN modern_users mu ON lc.contact_email = mu.email_address "
        "WHERE lc.contact_email IS NOT NULL"
    )

    # Create unified_products
    conn.execute(
        "CREATE TABLE unified_products (id INTEGER PRIMARY KEY AUTOINCREMENT, "
        "code TEXT NOT NULL UNIQUE, title TEXT NOT NULL, price REAL NOT NULL, "
        "source TEXT NOT NULL)"
    )
    # Legacy products
    conn.execute(
        "INSERT INTO unified_products (code, title, price, source) "
        "SELECT code, description, "
        "CAST(REPLACE(REPLACE(unit_price, '$', ''), ',', '') AS REAL), 'legacy' "
        "FROM legacy_products"
    )
    # Modern products (no code overlap expected)
    conn.execute(
        "INSERT INTO unified_products (code, title, price, source) "
        "SELECT sku, title, base_price, 'modern' "
        "FROM modern_catalog"
    )

    # Create migration_issues
    conn.execute(
        "CREATE TABLE migration_issues (id INTEGER PRIMARY KEY, "
        "source_system TEXT NOT NULL, source_table TEXT NOT NULL, "
        "source_id TEXT NOT NULL, issue_type TEXT NOT NULL, "
        "resolution TEXT NOT NULL)"
    )
    # Log NULL email customer
    conn.execute(
        "INSERT INTO migration_issues (source_system, source_table, source_id, issue_type, resolution) "
        "SELECT 'legacy', 'legacy_customers', CAST(id AS TEXT), 'null_email', "
        "'Imported without email' "
        "FROM legacy_customers WHERE contact_email IS NULL"
    )
    # Log orphaned transactions
    conn.execute(
        "INSERT INTO migration_issues (source_system, source_table, source_id, issue_type, resolution) "
        "SELECT 'modern', 'modern_transactions', CAST(id AS TEXT), 'orphaned_record', "
        "'User UUID not found: ' || user_uuid "
        "FROM modern_transactions WHERE user_uuid NOT IN (SELECT uuid FROM modern_users)"
    )

    # Create unified_orders
    conn.execute(
        "CREATE TABLE unified_orders (id INTEGER PRIMARY KEY AUTOINCREMENT, "
        "customer_id INTEGER NOT NULL, product_id INTEGER, amount REAL NOT NULL, "
        "currency TEXT NOT NULL DEFAULT 'USD', status TEXT NOT NULL, "
        "order_date TEXT, source TEXT NOT NULL, "
        "FOREIGN KEY (customer_id) REFERENCES unified_customers(id))"
    )
    # Legacy orders
    conn.execute(
        "INSERT INTO unified_orders (customer_id, product_id, amount, currency, status, order_date, source) "
        "SELECT uc.id, up.id, "
        "CAST(REPLACE(REPLACE(lo.total_amount, '$', ''), ',', '') AS REAL), "
        "'USD', lo.order_status, lo.order_date, 'legacy' "
        "FROM legacy_orders lo "
        "JOIN legacy_customers lc ON lo.customer_id = lc.id "
        "JOIN unified_customers uc ON (uc.legacy_id = lc.id) "
        "LEFT JOIN unified_products up ON lo.product_code = up.code"
    )
    # Modern transactions (exclude orphans)
    conn.execute(
        "INSERT INTO unified_orders (customer_id, product_id, amount, currency, status, order_date, source) "
        "SELECT uc.id, up.id, mt.amount, "
        "COALESCE(mt.currency, 'USD'), "
        "CASE mt.tx_status "
        "  WHEN 1 THEN 'pending' WHEN 2 THEN 'processing' WHEN 3 THEN 'complete' "
        "  WHEN 4 THEN 'failed' WHEN 5 THEN 'refunded' ELSE 'unknown' END, "
        "mt.created_at, 'modern' "
        "FROM modern_transactions mt "
        "JOIN modern_users mu ON mt.user_uuid = mu.uuid "
        "JOIN unified_customers uc ON (uc.modern_uuid = mu.uuid OR uc.email = mu.email_address) "
        "LEFT JOIN unified_products up ON mt.item_sku = up.code"
    )

    # Clean up source tables
    conn.execute("DROP TABLE legacy_customers")
    conn.execute("DROP TABLE legacy_orders")
    conn.execute("DROP TABLE legacy_products")
    conn.execute("DROP TABLE modern_users")
    conn.execute("DROP TABLE modern_transactions")
    conn.execute("DROP TABLE modern_catalog")
    conn.execute("PRAGMA foreign_keys = ON")
    conn.commit()


# =============================================================================
# TASK 8: Data Poisoning & Quarantine Routing (Extreme)
# =============================================================================

TASK8_TARGET_DDL = """
CREATE TABLE inventory (
    id INTEGER PRIMARY KEY,
    name TEXT NOT NULL,
    price REAL NOT NULL,
    sku TEXT UNIQUE
);

CREATE TABLE inventory_quarantine (
    id INTEGER PRIMARY KEY,
    raw_name TEXT,
    raw_price TEXT,
    raw_sku TEXT,
    error_reason TEXT
);
""".strip()

def seed_task8(conn):
    conn.execute("CREATE TABLE staging_data (id INTEGER, item TEXT, cost TEXT, sku_code TEXT)")
    data = [
        (1, "Oscilloscope", "1500.00", "OSC-001"),
        (2, "Multimeter", "  75.50 ", "MUL-002"),
        (3, "Soldering Iron", "$45.00", "SLD-003"),
        (4, "Lead Solder", "N/A", "LSD-004"),
        (5, "DC Power Supply", "299.99", "PWR-005"),
        (6, "Unknown Device", "INVALID", "UNK-006"),
        (7, "Wire Strippers", "$ 12.50", "WRE-007"),
    ]
    conn.executemany("INSERT INTO staging_data VALUES (?,?,?,?)", data)
    conn.commit()

def golden_task8(conn):
    conn.execute("CREATE TABLE inventory (id INTEGER PRIMARY KEY, name TEXT NOT NULL, price REAL NOT NULL, sku TEXT UNIQUE)")
    conn.execute("CREATE TABLE inventory_quarantine (id INTEGER PRIMARY KEY, raw_name TEXT, raw_price TEXT, raw_sku TEXT, error_reason TEXT)")
    
    # Process staging_data
    cursor = conn.execute("SELECT id, item, cost, sku_code FROM staging_data")
    for row in cursor.fetchall():
        rid, name, cost, sku = row
        clean_cost = cost.replace("$", "").strip()
        
        try:
            price = float(clean_cost)
            conn.execute("INSERT INTO inventory (id, name, price, sku) VALUES (?,?,?,?)", (rid, name, price, sku))
        except ValueError:
            conn.execute("INSERT INTO inventory_quarantine (raw_name, raw_price, raw_sku, error_reason) VALUES (?,?,?,?)", 
                         (name, cost, sku, "invalid_numeric_format"))
    conn.commit()

# =============================================================================
# Task Registry
# =============================================================================

TASKS = {
    "column-restructure": {
        "seed_fn": seed_task1,
        "golden_fn": golden_task1,
        "target_ddl": TASK1_TARGET_DDL,
        "description": "Merge first_name and last_name into a single full_name column (concatenated with a space) without data loss. Apostrophes in names (e.g., O'Brien) must be preserved.",
        "difficulty": "easy",
        "max_steps": 10,
    },
    "soft-delete-restoration": {
        "seed_fn": seed_task4,
        "golden_fn": golden_task4,
        "target_ddl": TASK4_TARGET_DDL,
        "description": "Restore deleted products from the deletion_log table back into the products table. Use product_id from deletion_log (NOT the log's id column) as the primary key. Add is_deleted (1) and deleted_at values from log. Original rows stay as is_deleted=0, deleted_at=NULL.",
        "difficulty": "easy",
        "max_steps": 10,
    },
    "table-normalization": {
        "seed_fn": seed_task2,
        "golden_fn": golden_task2,
        "target_ddl": TASK2_TARGET_DDL,
        "description": "Normalize a flat purchases table into customers and orders tables linked by customer_id (FK). Ensure customers are distinct by email.",
        "difficulty": "medium",
        "max_steps": 15,
    },
    "schema-version-merge": {
        "seed_fn": seed_task5,
        "golden_fn": golden_task5,
        "target_ddl": TASK5_TARGET_DDL,
        "description": "Merge products_v1 (Legacy) and products_v2 (Modern) with ID collision logic: Modern (v2) wins. Coerce v1 price strings ($) to REAL.",
        "difficulty": "medium",
        "max_steps": 15,
    },
    "multi-entity-extraction": {
        "seed_fn": seed_task6,
        "golden_fn": golden_task6,
        "target_ddl": TASK6_TARGET_DDL,
        "description": "Decompose sales_records into 3NF: salespersons, customers, products, and sales. Route rows with missing emails to data_issues.",
        "difficulty": "medium",
        "max_steps": 15,
    },
    "cascade-migration": {
        "seed_fn": seed_task3,
        "golden_fn": golden_task3,
        "target_ddl": TASK3_TARGET_DDL,
        "description": "Multi-table FK cascade with type coercion for salary and orphan logging for assets.",
        "difficulty": "hard",
        "max_steps": 20,
    },
    "dual-source-consolidation": {
        "seed_fn": seed_task7,
        "golden_fn": golden_task7,
        "target_ddl": TASK7_TARGET_DDL,
        "description": "Consolidate Legacy CRM and Modern SaaS data with cross-system email deduping and complex state/type mapping.",
        "difficulty": "hard",
        "max_steps": 20,
    },
    "data-poisoning-quarantine": {
        "seed_fn": seed_task8,
        "golden_fn": golden_task8,
        "target_ddl": TASK8_TARGET_DDL,
        "description": "The ultimate technical test: Migrate inventory from a 'poisoned' staging table. Cleanse raw price strings and route un-coerceable rows (like 'N/A') to a quarantine table while maintaining strict schema integrity.",
        "difficulty": "extreme",
        "max_steps": 15,
    },
}