File size: 2,634 Bytes
8e079ed
 
 
7d5172b
 
 
 
71c8fbc
 
7d5172b
71c8fbc
7d5172b
8e079ed
 
1b948dc
8e079ed
7d5172b
8e079ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import gradio as gr
import stanza



import torch
import numpy
# Allowlist numpy.ndarray for weights-only loading
torch.serialization.add_safe_globals([numpy.ndarray])
# Allowlist the required global for weights-only loading
# torch.serialization.add_safe_globals([numpy.core.multiarray._reconstruct])

# Download and initialize the Stanza pipeline
stanza.download('en')
nlp = stanza.Pipeline(lang='en', processors='tokenize,pos,constituency')


def generate_reordering_rule(english, reordered):
    tree = nlp(english).sentences[0].constituency
    reordered_tokens = reordered.split()
    rules = []

    def extract(node):
        if not hasattr(node, 'children') or all(isinstance(c, str) for c in node.children):
            return

        child_labels = tuple(c.label if hasattr(c, 'label') else c for c in node.children)
        rule_key = (node.label, child_labels)

        # Get leaf tokens for each child
        child_tokens = []
        for c in node.children:
            if hasattr(c, 'leaf_labels'):
                child_tokens.append(' '.join(c.leaf_labels()))
            else:
                child_tokens.append(c)

        # Try to find token positions in reordered sentence
        positions = []
        for tok in child_tokens:
            first_word = tok.split()[0]
            try:
                pos = reordered_tokens.index(first_word)
            except ValueError:
                pos = -1
            positions.append(pos)

        # Infer reordering function
        reordered_indices = sorted(range(len(child_tokens)), key=lambda i: positions[i])
        rule_func = f"lambda {', '.join(f'c{i}' for i in range(len(child_tokens)))}: " \
                    f"{' + '.join(f'c{i}' for i in reordered_indices)}"

        rules.append((rule_key, rule_func))

        for c in node.children:
            if hasattr(c, 'children'):
                extract(c)

    extract(tree)

    # Format rules for display
    if not rules:
        return "No reordering rules could be inferred."
    return "\n".join([f"{key}: {func}" for key, func in rules])

# Gradio interface
demo = gr.Interface(
    fn=generate_reordering_rule,
    inputs=[
        gr.Textbox(lines=2, label="English Sentence", placeholder="e.g. I want to eat the cake."),
        gr.Textbox(lines=2, label="Reordered Sentence", placeholder="e.g. I cake the eat to want.")
    ],
    outputs="text",
    title="Reordering Rule Generator",
    description="Enter an English sentence and its reordered version. This app extracts syntactic transformation rules using Stanza's constituency parser."
)

if __name__ == "__main__":
    demo.launch()