XPF / Code /contextRep.py
niobures's picture
XPF
4a08ba7 verified
#!/usr/bin/python3
from __future__ import print_function
from math import log
from collections import deque
class contextRep(object):
def __init__(self):
self.count = 0.0 # times this context was observed
self.contexts = dict() # continuation:context dictionary
self.precals = None # probs can be precalculated
self.terminal = 0.0 # number of times this context was final
def __repr__(self):
return repr([self.count, self.terminal, self.contexts])
def __str__(self):
return repr(self)
def add(self, seq, count, func=lambda x: None):
"""
add a full sequence to the representation
"""
if len(seq) > 0:
key = seq[0]
if not key in self.contexts:
self.contexts[key] = contextRep()
self.contexts[key].add(seq[1:], count, func)
else:
self.terminal += count
self.count += count
def prob(self, key, log2=False):
"""
get the probability of observing a particular continuation in
the given context
"""
if self.precals is None:
ret = self.contexts[key].count / self.count \
if key in self.contexts else 0.0
else:
ret = self.precals[key]
return ret if not log2 else log(ret, 2)
def probs(self, log2=False):
"""
Get the probabilities of getting all continuations in the given
context
"""
if self.precals is None:
ret = {key:self.prob(key, log2=log2) for key in self.contexts}
else:
ret = self.precals if not log2 \
else {p:log(self.precals[p], 2) for p in self.precals}
##else {p:log(p, 2) for p in self.precals}
return ret
def precalc(self):
"""
Create a static image of the probabilities
"""
self.precals = self.probs()
for key in self.contexts:
self.contexts[key].precalc()
def contextProb(self, seq, terminal=False):
"""
Create for each item in a sequence the probability of observing
it in the given context
"""
context = self
ret = deque()
for key in seq:
if context is not None and key in context.contexts:
ret.append(context.prob(key, False))
context = context.contexts[key]
else:
context = None
ret.append(0.0)
if terminal:
if context is not None:
ret.append(context.terminal / context.count)
else:
ret.append(0.0)
return list(ret)
def informativity_counts(self):
"""
Create for each item in a sequence the probability of observing
it in the given context
"""
retvals = {key:(-log(self.contexts[key].count / self.count, 2))
for key in self.contexts}
retcounts = {key:self.contexts[key].count for key in self.contexts}
for key in self.contexts:
(subvals, subcounts) = \
self.contexts[key].informativity_counts()
for key in subvals:
(selfval, selfcount) = (retvals[key], retcounts[key]) \
if key in retvals \
else (0.0, 0.0)
retvals[key] = (selfval*selfcount +
subvals[key]*subcounts[key]) / (subcounts[key]+selfcount)
retcounts[key] = selfcount + subcounts[key]
return (retvals, retcounts)
def informativity(self):
(informativity, counts) = self.informativity_counts()
return informativity
def iter(self, terminal=False, log2=False):
logfunc = (lambda x: -log(x, 2) if x < 1 else 0) if log2 else (lambda x: x)
if self.terminal > 0:
yield [{"seg":None, "prob":logfunc(self.terminal / self.count), "count":self.count}] if terminal else []
for key in sorted(self.contexts):
for cont in self.contexts[key].iter(terminal=terminal, log2=log2):
yield [{"seg":key,
"prob":logfunc(self.contexts[key].count / self.count),
"count":self.contexts[key].count}
] + cont
def __iter__(self):
for value in self.iter(log2=True, terminal=False):
yield value
##
## Returns a pure dictionary representation of the object
##
def asdict(self):
ret = {"count": self.count,
"contexts": {key: self.contexts[key].asdict() for key in self.contexts},
"precals": self.precals is None,
"terminal": self.terminal}
return ret
##
## reconstruct an object from a dictionary (created by asdict)
## I failed to create a static method and couldn't bother more with it.
## The only real reason to use this method + todict is to save contextRep objects in R / json easily
##
def populate(self, d):
self.count = d["count"]
self.terminal = d["terminal"]
self.contexts = {key:contextRep().populate(d["contexts"][key]) for key in d["contexts"]}
self.precals = None if d["precals"] is False else self.precalc()
return self
##
## Object equality (only to check todict / populate)
##
def __eq__(self, other):
if isinstance(other, contextRep):
return all([self.terminal == other.terminal,
self.count == other.count,
all(self.contexts[key] == other.contexts[key] if key in other.contexts else False
for key in self.contexts),
all(key in self.contexts for key in other.contexts)])
else:
return False
if __name__ == "__main__":
c = contextRep()
c.add("ab", 5)
c.add("ac", 5)
c.add("a", 5)
c.add("c", 15)
c.add("P AO1 R T N OY0".split(), 1)
print(c)
print(c.informativity())
print(c.probs())
print(c.contextProb("ab"))
print(c.contextProb("a"))
print(c.asdict())
c2 = contextRep()
c2.populate(c.asdict())
print(c2)
print(c2 == c)
print(c.informativity() == c2.informativity())
for v in c.iter(terminal=True):
print(v)