File size: 2,841 Bytes
a39d8ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import json
import re
import sys
from pathlib import Path

sys.path.insert(0, str(Path(__file__).parent))
from data_factory.templates import ALL_TEMPLATES

# Define strict categorical swaps based on the exact schemas
SWAP_RULES = {
    "ecommerce": [
        (r"'gold'", r"gold", ["'silver'", "'bronze'"], ["silver", "bronze"]),
        (r"'delivered'", r"delivered", ["'pending'", "'processing'", "'shipped'", "'cancelled'"], ["pending", "processing", "shipped", "cancelled"]),
        (r"'India'", r"India", ["'USA'", "'Germany'", "'UK'", "'Canada'"], ["USA", "Germany", "UK", "Canada"])
    ],
    "healthcare": [
        (r"'severe'", r"severe", ["'mild'", "'moderate'"], ["mild", "moderate"]),
        (r"'completed'", r"completed", ["'scheduled'", "'cancelled'", "'no_show'"], ["scheduled", "cancelled", "no-show"])
    ],
    "finance": [
        (r"'active'", r"active", ["'dormant'", "'closed'"], ["dormant", "closed"]),
        (r"'credit'", r"credit", ["'debit'"], ["debit"]),
        (r"'verified'", r"verified", ["'pending'", "'rejected'"], ["pending", "rejected"])
    ],
    "hr": [
        (r"'active'", r"active", ["'resigned'", "'terminated'"], ["resigned", "terminated"])
    ]
}

def generate_swaps():
    expanded_templates = []
    
    for template in ALL_TEMPLATES:
        expanded_templates.append(template) # Keep the original
        domain = template["domain"]
        
        if domain not in SWAP_RULES:
            continue
            
        for sql_target, nl_target, sql_replacements, nl_replacements in SWAP_RULES[domain]:
            if re.search(sql_target, template["sql"], re.IGNORECASE):
                for sql_repl, nl_repl in zip(sql_replacements, nl_replacements):
                    new_template = template.copy()
                    
                    # Swap in SQL
                    new_template["sql"] = re.sub(sql_target, sql_repl, template["sql"], flags=re.IGNORECASE)
                    
                    # Swap in NL and Description
                    new_template["base_nl"] = re.sub(nl_target, nl_repl, template["base_nl"], flags=re.IGNORECASE)
                    new_template["description"] = re.sub(nl_target, nl_repl, template["description"], flags=re.IGNORECASE)
                    
                    # Create a unique ID
                    new_template["id"] = f"{template.get('id', 'temp')}_swap_{nl_repl.replace(' ', '_')}"
                    
                    expanded_templates.append(new_template)
                    
    return expanded_templates

if __name__ == "__main__":
    swapped = generate_swaps()
    print(f"Original Templates: {len(ALL_TEMPLATES)}")
    print(f"After Value Swapping: {len(swapped)}")
    
    with open("swapped_templates.json", "w") as f:
        json.dump(swapped, f, indent=2)
    print("Saved to swapped_templates.json")