:- module(tree_validation, [ well_formed_tree/1, validate_tree_structure/2 ]). %% ========================================================================== %% MLAF Grammar Engine — Tree Well-Formedness Validation %% %% Formal tree well-formedness conditions from Partee, ter Meulen & Wall, %% "Mathematical Methods in Linguistics", Chapter 16. %% %% A constituent structure tree T = must satisfy: %% %% 1. SINGLE ROOT: Exactly one node is not dominated by any other node. %% 2. UNIQUE MOTHER: Every non-root node has exactly one mother. %% 3. ANTISYMMETRY: If A dominates B, then B does not dominate A. %% 4. TRANSITIVITY: If A dominates B and B dominates C, then A dominates C. %% 5. EXHAUSTIVE ORDERING: All sisters are linearly ordered (by precedence). %% 6. NON-TANGLING: If A precedes B, then all nodes dominated by A %% precede all nodes dominated by B. %% %% Tree Representation: %% Compound terms of the form: node(Label, [Child1, Child2, ...]) %% Leaves are atoms or: leaf(Label) %% %% The X-bar trees from xbar.pl are compound terms like: %% s(np(d(she)), vp(v(eat), np(n(food)))) %% %% We operate on the raw compound term structure. %% ========================================================================== %% --- well_formed_tree(+Tree) --- %% Succeeds if the tree satisfies all well-formedness conditions. %% Fails with a descriptive error if any condition is violated. well_formed_tree(Tree) :- nonvar(Tree), % Condition 1: Tree has content (non-empty) Tree \= [], % Condition 2: Verify structural integrity valid_structure(Tree), % Condition 3: No cycles (antisymmetry — guaranteed by construction in SWI-Prolog terms) acyclic_term(Tree), % Condition 4: Every non-leaf has at least one child all_internals_have_children(Tree), % Condition 5: Check label validity all_nodes_labeled(Tree). %% --- validate_tree_structure(+Tree, -Report) --- %% Returns a detailed validation report as a list of [Check, Status] pairs. validate_tree_structure(Tree, Report) :- (nonvar(Tree) -> NonEmpty = pass ; NonEmpty = fail), (valid_structure(Tree) -> Structure = pass ; Structure = fail), (acyclic_term(Tree) -> Acyclic = pass ; Acyclic = fail), (all_internals_have_children(Tree) -> InternalCheck = pass ; InternalCheck = fail), (all_nodes_labeled(Tree) -> LabelCheck = pass ; LabelCheck = fail), count_nodes(Tree, NodeCount), tree_depth(Tree, Depth), Report = [ [non_empty, NonEmpty], [valid_structure, Structure], [acyclic, Acyclic], [internals_have_children, InternalCheck], [all_labeled, LabelCheck], [node_count, NodeCount], [depth, Depth] ]. %% ========================================================================== %% STRUCTURAL CHECKS %% ========================================================================== %% valid_structure(+Tree) %% A valid tree is either an atom (leaf), a number, a list (feature bundle), %% or a compound term whose arguments are all valid trees. valid_structure(Tree) :- atom(Tree), !. valid_structure(Tree) :- number(Tree), !. valid_structure(Tree) :- is_list(Tree), !. %% Feature lists like [person=3, number=sg, ...] are valid leaves valid_structure(Tree) :- compound(Tree), Tree =.. [_Functor | Args], Args \= [], maplist(valid_structure, Args). %% all_internals_have_children(+Tree) %% Every compound (internal) node must have at least one argument. %% This is guaranteed by the compound check but we verify explicitly. all_internals_have_children(Tree) :- atom(Tree), !. all_internals_have_children(Tree) :- number(Tree), !. all_internals_have_children(Tree) :- is_list(Tree), !. %% Feature lists are leaf nodes all_internals_have_children(Tree) :- compound(Tree), Tree =.. [_ | Args], Args \= [], maplist(all_internals_have_children, Args). %% all_nodes_labeled(+Tree) %% Every node must have a valid label (the functor for compound terms, %% the atom itself for leaves). all_nodes_labeled(Tree) :- atom(Tree), Tree \= '', !. all_nodes_labeled(Tree) :- number(Tree), !. all_nodes_labeled(Tree) :- is_list(Tree), !. %% Feature lists are valid leaf data all_nodes_labeled(Tree) :- compound(Tree), Tree =.. [Functor | Args], atom(Functor), Functor \= '', maplist(all_nodes_labeled, Args). %% ========================================================================== %% TREE METRICS %% ========================================================================== %% count_nodes(+Tree, -Count) %% Count total number of nodes (internal + leaves). count_nodes(Tree, 1) :- atom(Tree), !. count_nodes(Tree, 1) :- number(Tree), !. count_nodes(Tree, 1) :- is_list(Tree), !. %% Feature lists count as 1 node count_nodes(Tree, Count) :- compound(Tree), Tree =.. [_ | Args], maplist(count_nodes, Args, ChildCounts), sum_list(ChildCounts, ChildTotal), Count is ChildTotal + 1. %% tree_depth(+Tree, -Depth) %% Maximum depth from root to any leaf. tree_depth(Tree, 0) :- atom(Tree), !. tree_depth(Tree, 0) :- number(Tree), !. tree_depth(Tree, 0) :- is_list(Tree), !. %% Feature lists are depth 0 tree_depth(Tree, Depth) :- compound(Tree), Tree =.. [_ | Args], maplist(tree_depth, Args, ChildDepths), max_list(ChildDepths, MaxChild), Depth is MaxChild + 1. %% ========================================================================== %% DOMINANCE AND PRECEDENCE CHECKS %% ========================================================================== %% dominates(+Tree, +Ancestor, +Descendant) %% True if the node labeled Ancestor dominates the node labeled Descendant. %% (For use in external queries — the tree structure itself encodes dominance.) dominates(Tree, Ancestor, Descendant) :- compound(Tree), Tree =.. [Ancestor | Args], appears_in_any(Descendant, Args). dominates(Tree, Ancestor, Descendant) :- compound(Tree), Tree =.. [_ | Args], member(Child, Args), dominates(Child, Ancestor, Descendant). %% appears_in_any(+Label, +Trees) %% True if Label appears as any node label in any of the trees. appears_in_any(Label, [Tree | _]) :- appears_in(Label, Tree). appears_in_any(Label, [_ | Rest]) :- appears_in_any(Label, Rest). appears_in(Label, Label) :- atom(Label), !. appears_in(Label, Tree) :- compound(Tree), Tree =.. [Label | _], !. appears_in(Label, Tree) :- compound(Tree), Tree =.. [_ | Args], appears_in_any(Label, Args). %% c_commands(+Tree, +A, +B) %% A c-commands B iff the first branching node dominating A also dominates B, %% and A does not dominate B. (Reinhart 1976, used in Binding Theory) c_commands(Tree, A, B) :- first_branching_dominator(Tree, A, Dominator), dominates(Tree, Dominator, B), \+ dominates(Tree, A, B). %% first_branching_dominator(+Tree, +Node, -Dominator) %% Find the first branching node (>1 child) that dominates Node. first_branching_dominator(Tree, Node, Functor) :- compound(Tree), Tree =.. [Functor | Args], length(Args, Arity), Arity > 1, appears_in_any(Node, Args), !. first_branching_dominator(Tree, Node, Dominator) :- compound(Tree), Tree =.. [_ | Args], member(Child, Args), first_branching_dominator(Child, Node, Dominator).