File size: 6,131 Bytes
fd49381 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
#include "util/file.hh"
#include "util/probing_hash_table.hh"
#include "util/mmap.hh"
#include "util/usage.hh"
#include <iostream>
namespace util {
namespace {
struct Entry {
typedef uint64_t Key;
Key key;
Key GetKey() const { return key; }
};
// I don't care if this doesn't run on Windows. Empirically /dev/urandom was faster than boost::random's Mersenne Twister.
class URandom {
public:
URandom() :
it_(buf_ + 1024), end_(buf_ + 1024),
file_(util::OpenReadOrThrow("/dev/urandom")) {}
uint64_t Get() {
if (it_ == end_) {
it_ = buf_;
util::ReadOrThrow(file_.get(), buf_, sizeof(buf_));
it_ = buf_;
}
return *it_++;
}
void Batch(uint64_t *begin, uint64_t *end) {
util::ReadOrThrow(file_.get(), begin, (end - begin) * sizeof(uint64_t));
}
private:
uint64_t buf_[1024];
uint64_t *it_, *end_;
util::scoped_fd file_;
};
struct PrefetchEntry {
uint64_t key;
const Entry *pointer;
};
template <class TableT, unsigned PrefetchSize> class PrefetchQueue {
public:
typedef TableT Table;
explicit PrefetchQueue(Table &table) : table_(table), cur_(0), twiddle_(false) {
for (PrefetchEntry *i = entries_; i != entries_ + PrefetchSize; ++i)
i->pointer = NULL;
}
void Add(uint64_t key) {
if (Cur().pointer) {
twiddle_ ^= table_.FindFromIdeal(Cur().key, Cur().pointer);
}
Cur().key = key;
Cur().pointer = table_.Ideal(key);
__builtin_prefetch(Cur().pointer, 0, 0);
Next();
}
bool Drain() {
if (Cur().pointer) {
for (PrefetchEntry *i = &Cur(); i < entries_ + PrefetchSize; ++i) {
twiddle_ ^= table_.FindFromIdeal(i->key, i->pointer);
}
}
for (PrefetchEntry *i = entries_; i < &Cur(); ++i) {
twiddle_ ^= table_.FindFromIdeal(i->key, i->pointer);
}
return twiddle_;
}
private:
PrefetchEntry &Cur() { return entries_[cur_]; }
void Next() {
++cur_;
cur_ = cur_ % PrefetchSize;
}
Table &table_;
PrefetchEntry entries_[PrefetchSize];
std::size_t cur_;
bool twiddle_;
PrefetchQueue(const PrefetchQueue&);
void operator=(const PrefetchQueue&);
};
template <class TableT> class Immediate {
public:
typedef TableT Table;
explicit Immediate(Table &table) : table_(table), twiddle_(false) {}
void Add(uint64_t key) {
typename Table::ConstIterator it;
twiddle_ ^= table_.Find(key, it);
}
bool Drain() const { return twiddle_; }
private:
Table &table_;
bool twiddle_;
};
std::size_t Size(uint64_t entries, float multiplier = 1.5) {
typedef util::ProbingHashTable<Entry, util::IdentityHash, std::equal_to<Entry::Key>, Power2Mod> Table;
// Always round up to power of 2 for fair comparison.
return Power2Mod::RoundBuckets(Table::Size(entries, multiplier) / sizeof(Entry)) * sizeof(Entry);
}
template <class Queue> bool Test(URandom &rn, uint64_t entries, const uint64_t *const queries_begin, const uint64_t *const queries_end, bool ordinary_malloc, float multiplier = 1.5) {
std::size_t size = Size(entries, multiplier);
scoped_memory backing;
if (ordinary_malloc) {
backing.reset(util::CallocOrThrow(size), size, scoped_memory::MALLOC_ALLOCATED);
} else {
util::HugeMalloc(size, true, backing);
}
typename Queue::Table table(backing.get(), size);
double start = CPUTime();
for (uint64_t i = 0; i < entries; ++i) {
Entry entry;
entry.key = rn.Get();
table.Insert(entry);
}
double inserted = CPUTime() - start;
double before_lookup = CPUTime();
Queue queue(table);
for (const uint64_t *i = queries_begin; i != queries_end; ++i) {
queue.Add(*i);
}
bool meaningless = queue.Drain();
std::cout << ' ' << (inserted / static_cast<double>(entries)) << ' ' << (CPUTime() - before_lookup) / static_cast<double>(queries_end - queries_begin) << std::flush;
return meaningless;
}
bool TestRun(uint64_t lookups = 20000000, float multiplier = 1.5) {
URandom rn;
util::scoped_memory queries;
HugeMalloc(lookups * sizeof(uint64_t), true, queries);
rn.Batch(static_cast<uint64_t*>(queries.get()), static_cast<uint64_t*>(queries.get()) + lookups);
uint64_t physical_mem_limit = util::GuessPhysicalMemory() / 2;
bool meaningless = true;
for (uint64_t i = 4; Size(i / multiplier) < physical_mem_limit; i *= 4) {
std::cout << static_cast<std::size_t>(i / multiplier) << ' ' << Size(i / multiplier);
typedef util::ProbingHashTable<Entry, util::IdentityHash, std::equal_to<Entry::Key>, Power2Mod> Table;
typedef util::ProbingHashTable<Entry, util::IdentityHash, std::equal_to<Entry::Key>, DivMod> TableDiv;
const uint64_t *const queries_begin = static_cast<const uint64_t*>(queries.get());
meaningless ^= util::Test<Immediate<TableDiv> >(rn, i / multiplier, queries_begin, queries_begin + lookups, true, multiplier);
meaningless ^= util::Test<Immediate<Table> >(rn, i / multiplier, queries_begin, queries_begin + lookups, true, multiplier);
meaningless ^= util::Test<PrefetchQueue<Table, 4> >(rn, i / multiplier, queries_begin, queries_begin + lookups, true, multiplier);
meaningless ^= util::Test<Immediate<Table> >(rn, i / multiplier, queries_begin, queries_begin + lookups, false, multiplier);
meaningless ^= util::Test<PrefetchQueue<Table, 2> >(rn, i / multiplier, queries_begin, queries_begin + lookups, false, multiplier);
meaningless ^= util::Test<PrefetchQueue<Table, 4> >(rn, i / multiplier, queries_begin, queries_begin + lookups, false, multiplier);
meaningless ^= util::Test<PrefetchQueue<Table, 8> >(rn, i / multiplier, queries_begin, queries_begin + lookups, false, multiplier);
meaningless ^= util::Test<PrefetchQueue<Table, 16> >(rn, i / multiplier, queries_begin, queries_begin + lookups, false, multiplier);
std::cout << std::endl;
}
return meaningless;
}
} // namespace
} // namespace util
int main() {
bool meaningless = false;
std::cout << "#CPU time\n";
meaningless ^= util::TestRun();
std::cerr << "Meaningless: " << meaningless << '\n';
}
|