| // Copyright 2025 The Go Authors. All rights reserved. | |
| // Use of this source code is governed by a BSD-style | |
| // license that can be found in the LICENSE file. | |
| package unify | |
| import ( | |
| "fmt" | |
| "iter" | |
| "reflect" | |
| "strings" | |
| ) | |
| // An envSet is an immutable set of environments, where each environment is a | |
| // mapping from [ident]s to [Value]s. | |
| // | |
| // To keep this compact, we use an algebraic representation similar to | |
| // relational algebra. The atoms are zero, unit, or a singular binding: | |
| // | |
| // - A singular binding {x: v} is an environment set consisting of a single | |
| // environment that binds a single ident x to a single value v. | |
| // | |
| // - Zero (0) is the empty set. | |
| // | |
| // - Unit (1) is an environment set consisting of a single, empty environment | |
| // (no bindings). | |
| // | |
| // From these, we build up more complex sets of environments using sums and | |
| // cross products: | |
| // | |
| // - A sum, E + F, is simply the union of the two environment sets: E ∪ F | |
| // | |
| // - A cross product, E ⨯ F, is the Cartesian product of the two environment | |
| // sets, followed by joining each pair of environments: {e ⊕ f | (e, f) ∊ E ⨯ F} | |
| // | |
| // The join of two environments, e ⊕ f, is an environment that contains all of | |
| // the bindings in either e or f. To detect bugs, it is an error if an | |
| // identifier is bound in both e and f (however, see below for what we could do | |
| // differently). | |
| // | |
| // Environment sets form a commutative semiring and thus obey the usual | |
| // commutative semiring rules: | |
| // | |
| // e + 0 = e | |
| // e ⨯ 0 = 0 | |
| // e ⨯ 1 = e | |
| // e + f = f + e | |
| // e ⨯ f = f ⨯ e | |
| // | |
| // Furthermore, environments sets are additively and multiplicatively idempotent | |
| // because + and ⨯ are themselves defined in terms of sets: | |
| // | |
| // e + e = e | |
| // e ⨯ e = e | |
| // | |
| // # Examples | |
| // | |
| // To represent {{x: 1, y: 1}, {x: 2, y: 2}}, we build the two environments and | |
| // sum them: | |
| // | |
| // ({x: 1} ⨯ {y: 1}) + ({x: 2} ⨯ {y: 2}) | |
| // | |
| // If we add a third variable z that can be 1 or 2, independent of x and y, we | |
| // get four logical environments: | |
| // | |
| // {x: 1, y: 1, z: 1} | |
| // {x: 2, y: 2, z: 1} | |
| // {x: 1, y: 1, z: 2} | |
| // {x: 2, y: 2, z: 2} | |
| // | |
| // This could be represented as a sum of all four environments, but because z is | |
| // independent, we can use a more compact representation: | |
| // | |
| // (({x: 1} ⨯ {y: 1}) + ({x: 2} ⨯ {y: 2})) ⨯ ({z: 1} + {z: 2}) | |
| // | |
| // # Generalized cross product | |
| // | |
| // While cross-product is currently restricted to disjoint environments, we | |
| // could generalize the definition of joining two environments to: | |
| // | |
| // {xₖ: vₖ} ⊕ {xₖ: wₖ} = {xₖ: vₖ ∩ wₖ} (where unbound idents are bound to the [Top] value, ⟙) | |
| // | |
| // where v ∩ w is the unification of v and w. This itself could be coarsened to | |
| // | |
| // v ∩ w = v if w = ⟙ | |
| // = w if v = ⟙ | |
| // = v if v = w | |
| // = 0 otherwise | |
| // | |
| // We could use this rule to implement substitution. For example, E ⨯ {x: 1} | |
| // narrows environment set E to only environments in which x is bound to 1. But | |
| // we currently don't do this. | |
| type envSet struct { | |
| root *envExpr | |
| } | |
| type envExpr struct { | |
| // TODO: A tree-based data structure for this may not be ideal, since it | |
| // involves a lot of walking to find things and we often have to do deep | |
| // rewrites anyway for partitioning. Would some flattened array-style | |
| // representation be better, possibly combined with an index of ident uses? | |
| // We could even combine that with an immutable array abstraction (ala | |
| // Clojure) that could enable more efficient construction operations. | |
| kind envExprKind | |
| // For envBinding | |
| id *ident | |
| val *Value | |
| // For sum or product. Len must be >= 2 and none of the elements can have | |
| // the same kind as this node. | |
| operands []*envExpr | |
| } | |
| type envExprKind byte | |
| const ( | |
| envZero envExprKind = iota | |
| envUnit | |
| envProduct | |
| envSum | |
| envBinding | |
| ) | |
| var ( | |
| // topEnv is the unit value (multiplicative identity) of a [envSet]. | |
| topEnv = envSet{envExprUnit} | |
| // bottomEnv is the zero value (additive identity) of a [envSet]. | |
| bottomEnv = envSet{envExprZero} | |
| envExprZero = &envExpr{kind: envZero} | |
| envExprUnit = &envExpr{kind: envUnit} | |
| ) | |
| // bind binds id to each of vals in e. | |
| // | |
| // Its panics if id is already bound in e. | |
| // | |
| // Environments are typically initially constructed by starting with [topEnv] | |
| // and calling bind one or more times. | |
| func (e envSet) bind(id *ident, vals ...*Value) envSet { | |
| if e.isEmpty() { | |
| return bottomEnv | |
| } | |
| // TODO: If any of vals are _, should we just drop that val? We're kind of | |
| // inconsistent about whether an id missing from e means id is invalid or | |
| // means id is _. | |
| // Check that id isn't present in e. | |
| for range e.root.bindings(id) { | |
| panic("id " + id.name + " already present in environment") | |
| } | |
| // Create a sum of all the values. | |
| bindings := make([]*envExpr, 0, 1) | |
| for _, val := range vals { | |
| bindings = append(bindings, &envExpr{kind: envBinding, id: id, val: val}) | |
| } | |
| // Multiply it in. | |
| return envSet{newEnvExprProduct(e.root, newEnvExprSum(bindings...))} | |
| } | |
| func (e envSet) isEmpty() bool { | |
| return e.root.kind == envZero | |
| } | |
| // bindings yields all [envBinding] nodes in e with the given id. If id is nil, | |
| // it yields all binding nodes. | |
| func (e *envExpr) bindings(id *ident) iter.Seq[*envExpr] { | |
| // This is just a pre-order walk and it happens this is the only thing we | |
| // need a pre-order walk for. | |
| return func(yield func(*envExpr) bool) { | |
| var rec func(e *envExpr) bool | |
| rec = func(e *envExpr) bool { | |
| if e.kind == envBinding && (id == nil || e.id == id) { | |
| if !yield(e) { | |
| return false | |
| } | |
| } | |
| for _, o := range e.operands { | |
| if !rec(o) { | |
| return false | |
| } | |
| } | |
| return true | |
| } | |
| rec(e) | |
| } | |
| } | |
| // newEnvExprProduct constructs a product node from exprs, performing | |
| // simplifications. It does NOT check that bindings are disjoint. | |
| func newEnvExprProduct(exprs ...*envExpr) *envExpr { | |
| factors := make([]*envExpr, 0, 2) | |
| for _, expr := range exprs { | |
| switch expr.kind { | |
| case envZero: | |
| return envExprZero | |
| case envUnit: | |
| // No effect on product | |
| case envProduct: | |
| factors = append(factors, expr.operands...) | |
| default: | |
| factors = append(factors, expr) | |
| } | |
| } | |
| if len(factors) == 0 { | |
| return envExprUnit | |
| } else if len(factors) == 1 { | |
| return factors[0] | |
| } | |
| return &envExpr{kind: envProduct, operands: factors} | |
| } | |
| // newEnvExprSum constructs a sum node from exprs, performing simplifications. | |
| func newEnvExprSum(exprs ...*envExpr) *envExpr { | |
| // TODO: If all of envs are products (or bindings), factor any common terms. | |
| // E.g., x * y + x * z ==> x * (y + z). This is easy to do for binding | |
| // terms, but harder to do for more general terms. | |
| var have smallSet[*envExpr] | |
| terms := make([]*envExpr, 0, 2) | |
| for _, expr := range exprs { | |
| switch expr.kind { | |
| case envZero: | |
| // No effect on sum | |
| case envSum: | |
| for _, expr1 := range expr.operands { | |
| if have.Add(expr1) { | |
| terms = append(terms, expr1) | |
| } | |
| } | |
| default: | |
| if have.Add(expr) { | |
| terms = append(terms, expr) | |
| } | |
| } | |
| } | |
| if len(terms) == 0 { | |
| return envExprZero | |
| } else if len(terms) == 1 { | |
| return terms[0] | |
| } | |
| return &envExpr{kind: envSum, operands: terms} | |
| } | |
| func crossEnvs(env1, env2 envSet) envSet { | |
| // Confirm that envs have disjoint idents. | |
| var ids1 smallSet[*ident] | |
| for e := range env1.root.bindings(nil) { | |
| ids1.Add(e.id) | |
| } | |
| for e := range env2.root.bindings(nil) { | |
| if ids1.Has(e.id) { | |
| panic(fmt.Sprintf("%s bound on both sides of cross-product", e.id.name)) | |
| } | |
| } | |
| return envSet{newEnvExprProduct(env1.root, env2.root)} | |
| } | |
| func unionEnvs(envs ...envSet) envSet { | |
| exprs := make([]*envExpr, len(envs)) | |
| for i := range envs { | |
| exprs[i] = envs[i].root | |
| } | |
| return envSet{newEnvExprSum(exprs...)} | |
| } | |
| // envPartition is a subset of an env where id is bound to value in all | |
| // deterministic environments. | |
| type envPartition struct { | |
| id *ident | |
| value *Value | |
| env envSet | |
| } | |
| // partitionBy splits e by distinct bindings of id and removes id from each | |
| // partition. | |
| // | |
| // If there are environments in e where id is not bound, they will not be | |
| // reflected in any partition. | |
| // | |
| // It panics if e is bottom, since attempting to partition an empty environment | |
| // set almost certainly indicates a bug. | |
| func (e envSet) partitionBy(id *ident) []envPartition { | |
| if e.isEmpty() { | |
| // We could return zero partitions, but getting here at all almost | |
| // certainly indicates a bug. | |
| panic("cannot partition empty environment set") | |
| } | |
| // Emit a partition for each value of id. | |
| var seen smallSet[*Value] | |
| var parts []envPartition | |
| for n := range e.root.bindings(id) { | |
| if !seen.Add(n.val) { | |
| // Already emitted a partition for this value. | |
| continue | |
| } | |
| parts = append(parts, envPartition{ | |
| id: id, | |
| value: n.val, | |
| env: envSet{e.root.substitute(id, n.val)}, | |
| }) | |
| } | |
| return parts | |
| } | |
| // substitute replaces bindings of id to val with 1 and bindings of id to any | |
| // other value with 0 and simplifies the result. | |
| func (e *envExpr) substitute(id *ident, val *Value) *envExpr { | |
| switch e.kind { | |
| default: | |
| panic("bad kind") | |
| case envZero, envUnit: | |
| return e | |
| case envBinding: | |
| if e.id != id { | |
| return e | |
| } else if e.val != val { | |
| return envExprZero | |
| } else { | |
| return envExprUnit | |
| } | |
| case envProduct, envSum: | |
| // Substitute each operand. Sometimes, this won't change anything, so we | |
| // build the new operands list lazily. | |
| var nOperands []*envExpr | |
| for i, op := range e.operands { | |
| nOp := op.substitute(id, val) | |
| if nOperands == nil && op != nOp { | |
| // Operand diverged; initialize nOperands. | |
| nOperands = make([]*envExpr, 0, len(e.operands)) | |
| nOperands = append(nOperands, e.operands[:i]...) | |
| } | |
| if nOperands != nil { | |
| nOperands = append(nOperands, nOp) | |
| } | |
| } | |
| if nOperands == nil { | |
| // Nothing changed. | |
| return e | |
| } | |
| if e.kind == envProduct { | |
| return newEnvExprProduct(nOperands...) | |
| } else { | |
| return newEnvExprSum(nOperands...) | |
| } | |
| } | |
| } | |
| // A smallSet is a set optimized for stack allocation when small. | |
| type smallSet[T comparable] struct { | |
| array [32]T | |
| n int | |
| m map[T]struct{} | |
| } | |
| // Has returns whether val is in set. | |
| func (s *smallSet[T]) Has(val T) bool { | |
| arr := s.array[:s.n] | |
| for i := range arr { | |
| if arr[i] == val { | |
| return true | |
| } | |
| } | |
| _, ok := s.m[val] | |
| return ok | |
| } | |
| // Add adds val to the set and returns true if it was added (not already | |
| // present). | |
| func (s *smallSet[T]) Add(val T) bool { | |
| // Test for presence. | |
| if s.Has(val) { | |
| return false | |
| } | |
| // Add it | |
| if s.n < len(s.array) { | |
| s.array[s.n] = val | |
| s.n++ | |
| } else { | |
| if s.m == nil { | |
| s.m = make(map[T]struct{}) | |
| } | |
| s.m[val] = struct{}{} | |
| } | |
| return true | |
| } | |
| type ident struct { | |
| _ [0]func() // Not comparable (only compare *ident) | |
| name string | |
| } | |
| type Var struct { | |
| id *ident | |
| } | |
| func (d Var) Exact() bool { | |
| // These can't appear in concrete Values. | |
| panic("Exact called on non-concrete Value") | |
| } | |
| func (d Var) WhyNotExact() string { | |
| // These can't appear in concrete Values. | |
| return "WhyNotExact called on non-concrete Value" | |
| } | |
| func (d Var) decode(rv reflect.Value) error { | |
| return &inexactError{"var", rv.Type().String()} | |
| } | |
| func (d Var) unify(w *Value, e envSet, swap bool, uf *unifier) (Domain, envSet, error) { | |
| // TODO: Vars from !sums in the input can have a huge number of values. | |
| // Unifying these could be way more efficient with some indexes over any | |
| // exact values we can pull out, like Def fields that are exact Strings. | |
| // Maybe we try to produce an array of yes/no/maybe matches and then we only | |
| // have to do deeper evaluation of the maybes. We could probably cache this | |
| // on an envTerm. It may also help to special-case Var/Var unification to | |
| // pick which one to index versus enumerate. | |
| if vd, ok := w.Domain.(Var); ok && d.id == vd.id { | |
| // Unifying $x with $x results in $x. If we descend into this we'll have | |
| // problems because we strip $x out of the environment to keep ourselves | |
| // honest and then can't find it on the other side. | |
| // | |
| // TODO: I'm not positive this is the right fix. | |
| return vd, e, nil | |
| } | |
| // We need to unify w with the value of d in each possible environment. We | |
| // can save some work by grouping environments by the value of d, since | |
| // there will be a lot of redundancy here. | |
| var nEnvs []envSet | |
| envParts := e.partitionBy(d.id) | |
| for i, envPart := range envParts { | |
| exit := uf.enterVar(d.id, i) | |
| // Each branch logically gets its own copy of the initial environment | |
| // (narrowed down to just this binding of the variable), and each branch | |
| // may result in different changes to that starting environment. | |
| res, e2, err := w.unify(envPart.value, envPart.env, swap, uf) | |
| exit.exit() | |
| if err != nil { | |
| return nil, envSet{}, err | |
| } | |
| if res.Domain == nil { | |
| // This branch entirely failed to unify, so it's gone. | |
| continue | |
| } | |
| nEnv := e2.bind(d.id, res) | |
| nEnvs = append(nEnvs, nEnv) | |
| } | |
| if len(nEnvs) == 0 { | |
| // All branches failed | |
| return nil, bottomEnv, nil | |
| } | |
| // The effect of this is entirely captured in the environment. We can return | |
| // back the same Bind node. | |
| return d, unionEnvs(nEnvs...), nil | |
| } | |
| // An identPrinter maps [ident]s to unique string names. | |
| type identPrinter struct { | |
| ids map[*ident]string | |
| idGen map[string]int | |
| } | |
| func (p *identPrinter) unique(id *ident) string { | |
| if p.ids == nil { | |
| p.ids = make(map[*ident]string) | |
| p.idGen = make(map[string]int) | |
| } | |
| name, ok := p.ids[id] | |
| if !ok { | |
| gen := p.idGen[id.name] | |
| p.idGen[id.name]++ | |
| if gen == 0 { | |
| name = id.name | |
| } else { | |
| name = fmt.Sprintf("%s#%d", id.name, gen) | |
| } | |
| p.ids[id] = name | |
| } | |
| return name | |
| } | |
| func (p *identPrinter) slice(ids []*ident) string { | |
| var strs []string | |
| for _, id := range ids { | |
| strs = append(strs, p.unique(id)) | |
| } | |
| return fmt.Sprintf("[%s]", strings.Join(strs, ", ")) | |
| } | |