Upload folder using huggingface_hub

41f6dd8 verified about 1 year ago

6.01 kB

	#include "search/vertex.hh"

	#include "search/context.hh"

	#include <boost/unordered_map.hpp>

	#include <algorithm>
	#include <functional>
	#include <cassert>

	namespace search {

	namespace {

	const uint64_t kCompleteAdd = static_cast<uint64_t>(-1);

	class DivideLeft {
	public:
	explicit DivideLeft(unsigned char index)
	: index_(index) {}

	uint64_t operator()(const lm::ngram::ChartState &state) const {
	return (index_ < state.left.length) ?
	state.left.pointers[index_] :
	(kCompleteAdd - state.left.full);
	}

	private:
	unsigned char index_;
	};

	class DivideRight {
	public:
	explicit DivideRight(unsigned char index)
	: index_(index) {}

	uint64_t operator()(const lm::ngram::ChartState &state) const {
	return (index_ < state.right.length) ?
	static_cast<uint64_t>(state.right.words[index_]) :
	(kCompleteAdd - state.left.full);
	}

	private:
	unsigned char index_;
	};

	template <class Divider> void Split(const Divider &divider, const std::vector<HypoState> &hypos, std::vector<VertexNode> &extend) {
	// Map from divider to index in extend.
	typedef boost::unordered_map<uint64_t, std::size_t> Lookup;
	Lookup lookup;
	for (std::vector<HypoState>::const_iterator i = hypos.begin(); i != hypos.end(); ++i) {
	uint64_t key = divider(i->state);
	std::pair<Lookup::iterator, bool> res(lookup.insert(std::make_pair(key, extend.size())));
	if (res.second) {
	extend.resize(extend.size() + 1);
	extend.back().AppendHypothesis(*i);
	} else {
	extend[res.first->second].AppendHypothesis(*i);
	}
	}
	//assert((extend.size() != 1) \|\| (hypos.size() == 1));
	}

	lm::WordIndex Identify(const lm::ngram::Right &right, unsigned char index) {
	return right.words[index];
	}

	uint64_t Identify(const lm::ngram::Left &left, unsigned char index) {
	return left.pointers[index];
	}

	template <class Side> class DetermineSame {
	public:
	DetermineSame(const Side &side, unsigned char guaranteed)
	: side_(side), guaranteed_(guaranteed), shared_(side.length), complete_(true) {}

	void Consider(const Side &other) {
	if (shared_ != other.length) {
	complete_ = false;
	if (shared_ > other.length)
	shared_ = other.length;
	}
	for (unsigned char i = guaranteed_; i < shared_; ++i) {
	if (Identify(side_, i) != Identify(other, i)) {
	shared_ = i;
	complete_ = false;
	return;
	}
	}
	}

	unsigned char Shared() const { return shared_; }

	bool Complete() const { return complete_; }

	private:
	const Side &side_;
	unsigned char guaranteed_, shared_;
	bool complete_;
	};

	// Custom enum to save memory: valid values of policy_.
	// Alternate and there is still alternation to do.
	const unsigned char kPolicyAlternate = 0;
	// Branch based on left state only, because right ran out or this is a left tree.
	const unsigned char kPolicyOneLeft = 1;
	// Branch based on right state only.
	const unsigned char kPolicyOneRight = 2;
	// Reveal everything in the next branch. Used to terminate the left/right policies.
	// static const unsigned char kPolicyEverything = 3;

	} // namespace

	namespace {
	struct GreaterByScore : public std::binary_function<const HypoState &, const HypoState &, bool> {
	bool operator()(const HypoState &first, const HypoState &second) const {
	return first.score > second.score;
	}
	};
	} // namespace

	void VertexNode::FinishRoot() {
	std::sort(hypos_.begin(), hypos_.end(), GreaterByScore());
	extend_.clear();
	// HACK: extend to one hypo so that root can be blank.
	state_.left.full = false;
	state_.left.length = 0;
	state_.right.length = 0;
	right_full_ = false;
	niceness_ = 0;
	policy_ = kPolicyAlternate;
	if (hypos_.size() == 1) {
	extend_.resize(1);
	extend_.front().AppendHypothesis(hypos_.front());
	extend_.front().FinishedAppending(0, 0);
	}
	if (hypos_.empty()) {
	bound_ = -INFINITY;
	} else {
	bound_ = hypos_.front().score;
	}
	}

	void VertexNode::FinishedAppending(const unsigned char common_left, const unsigned char common_right) {
	assert(!hypos_.empty());
	assert(extend_.empty());
	bound_ = hypos_.front().score;
	state_ = hypos_.front().state;
	bool all_full = state_.left.full;
	bool all_non_full = !state_.left.full;
	DetermineSame<lm::ngram::Left> left(state_.left, common_left);
	DetermineSame<lm::ngram::Right> right(state_.right, common_right);
	for (std::vector<HypoState>::const_iterator i = hypos_.begin() + 1; i != hypos_.end(); ++i) {
	all_full &= i->state.left.full;
	all_non_full &= !i->state.left.full;
	left.Consider(i->state.left);
	right.Consider(i->state.right);
	}
	state_.left.full = all_full && left.Complete();
	right_full_ = all_full && right.Complete();
	state_.left.length = left.Shared();
	state_.right.length = right.Shared();

	if (!all_full && !all_non_full) {
	policy_ = kPolicyAlternate;
	} else if (left.Complete()) {
	policy_ = kPolicyOneRight;
	} else if (right.Complete()) {
	policy_ = kPolicyOneLeft;
	} else {
	policy_ = kPolicyAlternate;
	}
	niceness_ = state_.left.length + state_.right.length;
	}

	void VertexNode::BuildExtend() {
	// Already built.
	if (!extend_.empty()) return;
	// Nothing to build since this is a leaf.
	if (hypos_.size() <= 1) return;
	bool left_branch = true;
	switch (policy_) {
	case kPolicyAlternate:
	left_branch = (state_.left.length <= state_.right.length);
	break;
	case kPolicyOneLeft:
	left_branch = true;
	break;
	case kPolicyOneRight:
	left_branch = false;
	break;
	}
	if (left_branch) {
	Split(DivideLeft(state_.left.length), hypos_, extend_);
	} else {
	Split(DivideRight(state_.right.length), hypos_, extend_);
	}
	for (std::vector<VertexNode>::iterator i = extend_.begin(); i != extend_.end(); ++i) {
	// TODO: provide more here for branching?
	i->FinishedAppending(state_.left.length, state_.right.length);
	}
	}

	} // namespace search