Spaces:
Running
Running
File size: 7,453 Bytes
47b17cc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 | :- module(tree_validation, [
well_formed_tree/1,
validate_tree_structure/2
]).
%% ==========================================================================
%% MLAF Grammar Engine — Tree Well-Formedness Validation
%%
%% Formal tree well-formedness conditions from Partee, ter Meulen & Wall,
%% "Mathematical Methods in Linguistics", Chapter 16.
%%
%% A constituent structure tree T = <N, Q, D, P, L> must satisfy:
%%
%% 1. SINGLE ROOT: Exactly one node is not dominated by any other node.
%% 2. UNIQUE MOTHER: Every non-root node has exactly one mother.
%% 3. ANTISYMMETRY: If A dominates B, then B does not dominate A.
%% 4. TRANSITIVITY: If A dominates B and B dominates C, then A dominates C.
%% 5. EXHAUSTIVE ORDERING: All sisters are linearly ordered (by precedence).
%% 6. NON-TANGLING: If A precedes B, then all nodes dominated by A
%% precede all nodes dominated by B.
%%
%% Tree Representation:
%% Compound terms of the form: node(Label, [Child1, Child2, ...])
%% Leaves are atoms or: leaf(Label)
%%
%% The X-bar trees from xbar.pl are compound terms like:
%% s(np(d(she)), vp(v(eat), np(n(food))))
%%
%% We operate on the raw compound term structure.
%% ==========================================================================
%% --- well_formed_tree(+Tree) ---
%% Succeeds if the tree satisfies all well-formedness conditions.
%% Fails with a descriptive error if any condition is violated.
well_formed_tree(Tree) :-
nonvar(Tree),
% Condition 1: Tree has content (non-empty)
Tree \= [],
% Condition 2: Verify structural integrity
valid_structure(Tree),
% Condition 3: No cycles (antisymmetry — guaranteed by construction in SWI-Prolog terms)
acyclic_term(Tree),
% Condition 4: Every non-leaf has at least one child
all_internals_have_children(Tree),
% Condition 5: Check label validity
all_nodes_labeled(Tree).
%% --- validate_tree_structure(+Tree, -Report) ---
%% Returns a detailed validation report as a list of [Check, Status] pairs.
validate_tree_structure(Tree, Report) :-
(nonvar(Tree) -> NonEmpty = pass ; NonEmpty = fail),
(valid_structure(Tree) -> Structure = pass ; Structure = fail),
(acyclic_term(Tree) -> Acyclic = pass ; Acyclic = fail),
(all_internals_have_children(Tree) -> InternalCheck = pass ; InternalCheck = fail),
(all_nodes_labeled(Tree) -> LabelCheck = pass ; LabelCheck = fail),
count_nodes(Tree, NodeCount),
tree_depth(Tree, Depth),
Report = [
[non_empty, NonEmpty],
[valid_structure, Structure],
[acyclic, Acyclic],
[internals_have_children, InternalCheck],
[all_labeled, LabelCheck],
[node_count, NodeCount],
[depth, Depth]
].
%% ==========================================================================
%% STRUCTURAL CHECKS
%% ==========================================================================
%% valid_structure(+Tree)
%% A valid tree is either an atom (leaf), a number, a list (feature bundle),
%% or a compound term whose arguments are all valid trees.
valid_structure(Tree) :-
atom(Tree), !.
valid_structure(Tree) :-
number(Tree), !.
valid_structure(Tree) :-
is_list(Tree), !. %% Feature lists like [person=3, number=sg, ...] are valid leaves
valid_structure(Tree) :-
compound(Tree),
Tree =.. [_Functor | Args],
Args \= [],
maplist(valid_structure, Args).
%% all_internals_have_children(+Tree)
%% Every compound (internal) node must have at least one argument.
%% This is guaranteed by the compound check but we verify explicitly.
all_internals_have_children(Tree) :-
atom(Tree), !.
all_internals_have_children(Tree) :-
number(Tree), !.
all_internals_have_children(Tree) :-
is_list(Tree), !. %% Feature lists are leaf nodes
all_internals_have_children(Tree) :-
compound(Tree),
Tree =.. [_ | Args],
Args \= [],
maplist(all_internals_have_children, Args).
%% all_nodes_labeled(+Tree)
%% Every node must have a valid label (the functor for compound terms,
%% the atom itself for leaves).
all_nodes_labeled(Tree) :-
atom(Tree),
Tree \= '', !.
all_nodes_labeled(Tree) :-
number(Tree), !.
all_nodes_labeled(Tree) :-
is_list(Tree), !. %% Feature lists are valid leaf data
all_nodes_labeled(Tree) :-
compound(Tree),
Tree =.. [Functor | Args],
atom(Functor),
Functor \= '',
maplist(all_nodes_labeled, Args).
%% ==========================================================================
%% TREE METRICS
%% ==========================================================================
%% count_nodes(+Tree, -Count)
%% Count total number of nodes (internal + leaves).
count_nodes(Tree, 1) :-
atom(Tree), !.
count_nodes(Tree, 1) :-
number(Tree), !.
count_nodes(Tree, 1) :-
is_list(Tree), !. %% Feature lists count as 1 node
count_nodes(Tree, Count) :-
compound(Tree),
Tree =.. [_ | Args],
maplist(count_nodes, Args, ChildCounts),
sum_list(ChildCounts, ChildTotal),
Count is ChildTotal + 1.
%% tree_depth(+Tree, -Depth)
%% Maximum depth from root to any leaf.
tree_depth(Tree, 0) :-
atom(Tree), !.
tree_depth(Tree, 0) :-
number(Tree), !.
tree_depth(Tree, 0) :-
is_list(Tree), !. %% Feature lists are depth 0
tree_depth(Tree, Depth) :-
compound(Tree),
Tree =.. [_ | Args],
maplist(tree_depth, Args, ChildDepths),
max_list(ChildDepths, MaxChild),
Depth is MaxChild + 1.
%% ==========================================================================
%% DOMINANCE AND PRECEDENCE CHECKS
%% ==========================================================================
%% dominates(+Tree, +Ancestor, +Descendant)
%% True if the node labeled Ancestor dominates the node labeled Descendant.
%% (For use in external queries — the tree structure itself encodes dominance.)
dominates(Tree, Ancestor, Descendant) :-
compound(Tree),
Tree =.. [Ancestor | Args],
appears_in_any(Descendant, Args).
dominates(Tree, Ancestor, Descendant) :-
compound(Tree),
Tree =.. [_ | Args],
member(Child, Args),
dominates(Child, Ancestor, Descendant).
%% appears_in_any(+Label, +Trees)
%% True if Label appears as any node label in any of the trees.
appears_in_any(Label, [Tree | _]) :-
appears_in(Label, Tree).
appears_in_any(Label, [_ | Rest]) :-
appears_in_any(Label, Rest).
appears_in(Label, Label) :-
atom(Label), !.
appears_in(Label, Tree) :-
compound(Tree),
Tree =.. [Label | _], !.
appears_in(Label, Tree) :-
compound(Tree),
Tree =.. [_ | Args],
appears_in_any(Label, Args).
%% c_commands(+Tree, +A, +B)
%% A c-commands B iff the first branching node dominating A also dominates B,
%% and A does not dominate B. (Reinhart 1976, used in Binding Theory)
c_commands(Tree, A, B) :-
first_branching_dominator(Tree, A, Dominator),
dominates(Tree, Dominator, B),
\+ dominates(Tree, A, B).
%% first_branching_dominator(+Tree, +Node, -Dominator)
%% Find the first branching node (>1 child) that dominates Node.
first_branching_dominator(Tree, Node, Functor) :-
compound(Tree),
Tree =.. [Functor | Args],
length(Args, Arity),
Arity > 1,
appears_in_any(Node, Args), !.
first_branching_dominator(Tree, Node, Dominator) :-
compound(Tree),
Tree =.. [_ | Args],
member(Child, Args),
first_branching_dominator(Child, Node, Dominator).
|