XPF / Code /contextRep.py

XPF

4a08ba7 verified 3 months ago

6.49 kB

	#!/usr/bin/python3
	from __future__ import print_function

	from math import log
	from collections import deque

	class contextRep(object):

	def __init__(self):
	self.count = 0.0 # times this context was observed
	self.contexts = dict() # continuation:context dictionary
	self.precals = None # probs can be precalculated
	self.terminal = 0.0 # number of times this context was final

	def __repr__(self):
	return repr([self.count, self.terminal, self.contexts])

	def __str__(self):
	return repr(self)

	def add(self, seq, count, func=lambda x: None):
	"""
	add a full sequence to the representation
	"""
	if len(seq) > 0:
	key = seq[0]
	if not key in self.contexts:
	self.contexts[key] = contextRep()
	self.contexts[key].add(seq[1:], count, func)
	else:
	self.terminal += count

	self.count += count

	def prob(self, key, log2=False):
	"""
	get the probability of observing a particular continuation in
	the given context
	"""
	if self.precals is None:
	ret = self.contexts[key].count / self.count \
	if key in self.contexts else 0.0
	else:
	ret = self.precals[key]

	return ret if not log2 else log(ret, 2)

	def probs(self, log2=False):
	"""
	Get the probabilities of getting all continuations in the given
	context
	"""
	if self.precals is None:
	ret = {key:self.prob(key, log2=log2) for key in self.contexts}
	else:
	ret = self.precals if not log2 \
	else {p:log(self.precals[p], 2) for p in self.precals}
	##else {p:log(p, 2) for p in self.precals}
	return ret

	def precalc(self):
	"""
	Create a static image of the probabilities
	"""
	self.precals = self.probs()
	for key in self.contexts:
	self.contexts[key].precalc()


	def contextProb(self, seq, terminal=False):
	"""
	Create for each item in a sequence the probability of observing
	it in the given context
	"""
	context = self
	ret = deque()
	for key in seq:
	if context is not None and key in context.contexts:
	ret.append(context.prob(key, False))
	context = context.contexts[key]
	else:
	context = None
	ret.append(0.0)

	if terminal:
	if context is not None:
	ret.append(context.terminal / context.count)
	else:
	ret.append(0.0)
	return list(ret)

	def informativity_counts(self):
	"""
	Create for each item in a sequence the probability of observing
	it in the given context
	"""
	retvals = {key:(-log(self.contexts[key].count / self.count, 2))
	for key in self.contexts}
	retcounts = {key:self.contexts[key].count for key in self.contexts}

	for key in self.contexts:
	(subvals, subcounts) = \
	self.contexts[key].informativity_counts()
	for key in subvals:
	(selfval, selfcount) = (retvals[key], retcounts[key]) \
	if key in retvals \
	else (0.0, 0.0)
	retvals[key] = (selfval*selfcount +
	subvals[key]*subcounts[key]) / (subcounts[key]+selfcount)
	retcounts[key] = selfcount + subcounts[key]


	return (retvals, retcounts)

	def informativity(self):
	(informativity, counts) = self.informativity_counts()
	return informativity


	def iter(self, terminal=False, log2=False):
	logfunc = (lambda x: -log(x, 2) if x < 1 else 0) if log2 else (lambda x: x)

	if self.terminal > 0:
	yield [{"seg":None, "prob":logfunc(self.terminal / self.count), "count":self.count}] if terminal else []

	for key in sorted(self.contexts):
	for cont in self.contexts[key].iter(terminal=terminal, log2=log2):
	yield [{"seg":key,
	"prob":logfunc(self.contexts[key].count / self.count),
	"count":self.contexts[key].count}
	] + cont


	def __iter__(self):
	for value in self.iter(log2=True, terminal=False):
	yield value


	##
	## Returns a pure dictionary representation of the object
	##
	def asdict(self):
	ret = {"count": self.count,
	"contexts": {key: self.contexts[key].asdict() for key in self.contexts},
	"precals": self.precals is None,
	"terminal": self.terminal}
	return ret

	##
	## reconstruct an object from a dictionary (created by asdict)
	## I failed to create a static method and couldn't bother more with it.
	## The only real reason to use this method + todict is to save contextRep objects in R / json easily
	##
	def populate(self, d):
	self.count = d["count"]
	self.terminal = d["terminal"]
	self.contexts = {key:contextRep().populate(d["contexts"][key]) for key in d["contexts"]}
	self.precals = None if d["precals"] is False else self.precalc()
	return self


	##
	## Object equality (only to check todict / populate)
	##
	def __eq__(self, other):
	if isinstance(other, contextRep):
	return all([self.terminal == other.terminal,
	self.count == other.count,
	all(self.contexts[key] == other.contexts[key] if key in other.contexts else False
	for key in self.contexts),
	all(key in self.contexts for key in other.contexts)])
	else:
	return False


	if __name__ == "__main__":

	c = contextRep()

	c.add("ab", 5)
	c.add("ac", 5)
	c.add("a", 5)
	c.add("c", 15)
	c.add("P AO1 R T N OY0".split(), 1)
	print(c)
	print(c.informativity())
	print(c.probs())
	print(c.contextProb("ab"))
	print(c.contextProb("a"))
	print(c.asdict())

	c2 = contextRep()
	c2.populate(c.asdict())
	print(c2)
	print(c2 == c)
	print(c.informativity() == c2.informativity())


	for v in c.iter(terminal=True):
	print(v)