mlaf-grammar-engine / prolog /tree_validation.pl
Shaankar39's picture
Initial deploy: MLAF grammar engine
47b17cc verified
:- module(tree_validation, [
well_formed_tree/1,
validate_tree_structure/2
]).
%% ==========================================================================
%% MLAF Grammar Engine — Tree Well-Formedness Validation
%%
%% Formal tree well-formedness conditions from Partee, ter Meulen & Wall,
%% "Mathematical Methods in Linguistics", Chapter 16.
%%
%% A constituent structure tree T = <N, Q, D, P, L> must satisfy:
%%
%% 1. SINGLE ROOT: Exactly one node is not dominated by any other node.
%% 2. UNIQUE MOTHER: Every non-root node has exactly one mother.
%% 3. ANTISYMMETRY: If A dominates B, then B does not dominate A.
%% 4. TRANSITIVITY: If A dominates B and B dominates C, then A dominates C.
%% 5. EXHAUSTIVE ORDERING: All sisters are linearly ordered (by precedence).
%% 6. NON-TANGLING: If A precedes B, then all nodes dominated by A
%% precede all nodes dominated by B.
%%
%% Tree Representation:
%% Compound terms of the form: node(Label, [Child1, Child2, ...])
%% Leaves are atoms or: leaf(Label)
%%
%% The X-bar trees from xbar.pl are compound terms like:
%% s(np(d(she)), vp(v(eat), np(n(food))))
%%
%% We operate on the raw compound term structure.
%% ==========================================================================
%% --- well_formed_tree(+Tree) ---
%% Succeeds if the tree satisfies all well-formedness conditions.
%% Fails with a descriptive error if any condition is violated.
well_formed_tree(Tree) :-
nonvar(Tree),
% Condition 1: Tree has content (non-empty)
Tree \= [],
% Condition 2: Verify structural integrity
valid_structure(Tree),
% Condition 3: No cycles (antisymmetry — guaranteed by construction in SWI-Prolog terms)
acyclic_term(Tree),
% Condition 4: Every non-leaf has at least one child
all_internals_have_children(Tree),
% Condition 5: Check label validity
all_nodes_labeled(Tree).
%% --- validate_tree_structure(+Tree, -Report) ---
%% Returns a detailed validation report as a list of [Check, Status] pairs.
validate_tree_structure(Tree, Report) :-
(nonvar(Tree) -> NonEmpty = pass ; NonEmpty = fail),
(valid_structure(Tree) -> Structure = pass ; Structure = fail),
(acyclic_term(Tree) -> Acyclic = pass ; Acyclic = fail),
(all_internals_have_children(Tree) -> InternalCheck = pass ; InternalCheck = fail),
(all_nodes_labeled(Tree) -> LabelCheck = pass ; LabelCheck = fail),
count_nodes(Tree, NodeCount),
tree_depth(Tree, Depth),
Report = [
[non_empty, NonEmpty],
[valid_structure, Structure],
[acyclic, Acyclic],
[internals_have_children, InternalCheck],
[all_labeled, LabelCheck],
[node_count, NodeCount],
[depth, Depth]
].
%% ==========================================================================
%% STRUCTURAL CHECKS
%% ==========================================================================
%% valid_structure(+Tree)
%% A valid tree is either an atom (leaf), a number, a list (feature bundle),
%% or a compound term whose arguments are all valid trees.
valid_structure(Tree) :-
atom(Tree), !.
valid_structure(Tree) :-
number(Tree), !.
valid_structure(Tree) :-
is_list(Tree), !. %% Feature lists like [person=3, number=sg, ...] are valid leaves
valid_structure(Tree) :-
compound(Tree),
Tree =.. [_Functor | Args],
Args \= [],
maplist(valid_structure, Args).
%% all_internals_have_children(+Tree)
%% Every compound (internal) node must have at least one argument.
%% This is guaranteed by the compound check but we verify explicitly.
all_internals_have_children(Tree) :-
atom(Tree), !.
all_internals_have_children(Tree) :-
number(Tree), !.
all_internals_have_children(Tree) :-
is_list(Tree), !. %% Feature lists are leaf nodes
all_internals_have_children(Tree) :-
compound(Tree),
Tree =.. [_ | Args],
Args \= [],
maplist(all_internals_have_children, Args).
%% all_nodes_labeled(+Tree)
%% Every node must have a valid label (the functor for compound terms,
%% the atom itself for leaves).
all_nodes_labeled(Tree) :-
atom(Tree),
Tree \= '', !.
all_nodes_labeled(Tree) :-
number(Tree), !.
all_nodes_labeled(Tree) :-
is_list(Tree), !. %% Feature lists are valid leaf data
all_nodes_labeled(Tree) :-
compound(Tree),
Tree =.. [Functor | Args],
atom(Functor),
Functor \= '',
maplist(all_nodes_labeled, Args).
%% ==========================================================================
%% TREE METRICS
%% ==========================================================================
%% count_nodes(+Tree, -Count)
%% Count total number of nodes (internal + leaves).
count_nodes(Tree, 1) :-
atom(Tree), !.
count_nodes(Tree, 1) :-
number(Tree), !.
count_nodes(Tree, 1) :-
is_list(Tree), !. %% Feature lists count as 1 node
count_nodes(Tree, Count) :-
compound(Tree),
Tree =.. [_ | Args],
maplist(count_nodes, Args, ChildCounts),
sum_list(ChildCounts, ChildTotal),
Count is ChildTotal + 1.
%% tree_depth(+Tree, -Depth)
%% Maximum depth from root to any leaf.
tree_depth(Tree, 0) :-
atom(Tree), !.
tree_depth(Tree, 0) :-
number(Tree), !.
tree_depth(Tree, 0) :-
is_list(Tree), !. %% Feature lists are depth 0
tree_depth(Tree, Depth) :-
compound(Tree),
Tree =.. [_ | Args],
maplist(tree_depth, Args, ChildDepths),
max_list(ChildDepths, MaxChild),
Depth is MaxChild + 1.
%% ==========================================================================
%% DOMINANCE AND PRECEDENCE CHECKS
%% ==========================================================================
%% dominates(+Tree, +Ancestor, +Descendant)
%% True if the node labeled Ancestor dominates the node labeled Descendant.
%% (For use in external queries — the tree structure itself encodes dominance.)
dominates(Tree, Ancestor, Descendant) :-
compound(Tree),
Tree =.. [Ancestor | Args],
appears_in_any(Descendant, Args).
dominates(Tree, Ancestor, Descendant) :-
compound(Tree),
Tree =.. [_ | Args],
member(Child, Args),
dominates(Child, Ancestor, Descendant).
%% appears_in_any(+Label, +Trees)
%% True if Label appears as any node label in any of the trees.
appears_in_any(Label, [Tree | _]) :-
appears_in(Label, Tree).
appears_in_any(Label, [_ | Rest]) :-
appears_in_any(Label, Rest).
appears_in(Label, Label) :-
atom(Label), !.
appears_in(Label, Tree) :-
compound(Tree),
Tree =.. [Label | _], !.
appears_in(Label, Tree) :-
compound(Tree),
Tree =.. [_ | Args],
appears_in_any(Label, Args).
%% c_commands(+Tree, +A, +B)
%% A c-commands B iff the first branching node dominating A also dominates B,
%% and A does not dominate B. (Reinhart 1976, used in Binding Theory)
c_commands(Tree, A, B) :-
first_branching_dominator(Tree, A, Dominator),
dominates(Tree, Dominator, B),
\+ dominates(Tree, A, B).
%% first_branching_dominator(+Tree, +Node, -Dominator)
%% Find the first branching node (>1 child) that dominates Node.
first_branching_dominator(Tree, Node, Functor) :-
compound(Tree),
Tree =.. [Functor | Args],
length(Args, Arity),
Arity > 1,
appears_in_any(Node, Args), !.
first_branching_dominator(Tree, Node, Dominator) :-
compound(Tree),
Tree =.. [_ | Args],
member(Child, Args),
first_branching_dominator(Child, Node, Dominator).