| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| #include <iostream> |
| #include <cassert> |
| #include "tpt_tightindex.h" |
|
|
| namespace tpt |
| { |
|
|
| |
|
|
| |
| |
| void tightwrite(std::ostream& out, uint64_t data, bool flag) |
| { |
| |
| #ifdef LOG_WRITE_ACTIVITY |
| size_t bytes_written=1; |
| std::cerr << "starting at file position " << out.tellp() |
| << ": tightwrite " << data; |
| #endif |
| if (flag) |
| { |
| #ifdef LOG_WRITE_ACTIVITY |
| std::cerr << " with flag 1 "; |
| #endif |
| while (data >= 128) |
| { |
| char c = char(data%128)|char(-128); |
| out.put(c); |
| data >>= 7; |
| #ifdef LOG_WRITE_ACTIVITY |
| bytes_written++; |
| #endif |
| } |
| char c = char(data%128)|char(-128); |
| out.put(c); |
| } |
| else |
| { |
| #ifdef LOG_WRITE_ACTIVITY |
| std::cerr << " with flag 0 "; |
| #endif |
| while (data >= 128) |
| { |
| char c = data&127; |
| out.put(c); |
| data >>= 7; |
| #ifdef LOG_WRITE_ACTIVITY |
| bytes_written++; |
| #endif |
| } |
| char c = (data&127); |
| out.put(c); |
| } |
| #ifdef LOG_WRITE_ACTIVITY |
| std::cerr << " in " << bytes_written << " bytes" << std::endl; |
| #endif |
| } |
|
|
| |
| |
| |
|
|
| #define DEBUG_TIGHTREAD 0 |
|
|
| |
| |
| filepos_type |
| tightread(std::istream& in, std::ios::pos_type stop) |
| { |
| |
| |
| assert(in.rdbuf()->in_avail() > 0); |
| filepos_type data = 0; |
| short int bitshift = 7; |
| int pos = in.tellg(); |
| #if DEBUG_TIGHTREAD |
| if (debug) |
| cerr << bitpattern(uint(in.peek())) << " " << in.peek() |
| << " pos=" << in.tellg() << "\n"; |
| #endif |
| int buf = in.get(); |
| if (stop == std::ios::pos_type(0)) |
| stop = size_t(in.tellg())+in.rdbuf()->in_avail(); |
| else |
| stop = std::min(size_t(stop),size_t(in.tellg())+in.rdbuf()->in_avail()); |
| if (buf < 0) |
| std::cerr << "number read: " << buf << " " << pos << " " |
| << in.tellg() << std::endl; |
| assert (buf>=0); |
|
|
| if (buf >= 128) |
| { |
| data = buf-128; |
| while (in.tellg() < stop && in.peek() >= 128) |
| { |
| #if DEBUG_TIGHTREAD |
| if (debug) |
| cerr << bitpattern(uint(in.peek())) << " " << in.peek(); |
| #endif |
| |
| data += size_t(in.get()-128)<<bitshift; |
| bitshift += 7; |
| #if DEBUG_TIGHTREAD |
| if (debug) |
| cerr << " " << data << " pos=" << in.tellg() << std::endl; |
| #endif |
| } |
| } |
| else |
| { |
| data = buf; |
| while (in.tellg() < stop && in.peek() < 128) |
| { |
| |
| #if DEBUG_TIGHTREAD |
| if (debug) |
| cerr << bitpattern(uint(in.peek())) << " " << in.peek(); |
|
|
| #endif |
| data += size_t(in.get())<<bitshift; |
| bitshift += 7; |
| #if DEBUG_TIGHTREAD |
| if (debug) |
| cerr << " " << data << " pos=" << in.tellg() << "\n"; |
| #endif |
| } |
| } |
| return data; |
| } |
|
|
| #define DEBUG_TIGHTFIND 0 |
| #if DEBUG_TIGHTFIND |
| bool debug=true; |
| #endif |
| bool |
| tightfind_midpoint(std::istream& in, filepos_type start, filepos_type stop) |
| { |
| in.seekg((start+stop)/2); |
| |
| |
| |
| |
| |
|
|
| |
| while (static_cast<filepos_type>(in.tellg()) < stop && in.get() < 128) |
| { |
| #if DEBUG_TIGHTFIND |
| if (debug) |
| { |
| in.unget(); |
| char c = in.get(); |
| std::cerr << in.tellg() << " skipped key byte " << c << std::endl; |
| } |
| #endif |
| if (in.eof()) return false; |
| } |
| |
| while (static_cast<filepos_type>(in.tellg()) < stop && in.peek() >= 128) |
| { |
| #if DEBUG_TIGHTFIND |
| int r = in.get(); |
| if (debug) |
| std::cerr << in.tellg() << " skipped value byte " << r |
| << " next is " << in.peek() |
| << std::endl; |
| #else |
| in.get(); |
| #endif |
| } |
| return true; |
| } |
|
|
| char const* |
| tightfind_midpoint(char const* const start, |
| char const* const stop) |
| { |
| char const* mp = start + (stop - start)/2; |
| while (*mp < 0 && mp > start) mp--; |
| while (*mp >= 0 && mp > start) mp--; |
| return (*mp < 0) ? ++mp : mp; |
| } |
|
|
| bool |
| linear_search(std::istream& in, filepos_type start, filepos_type stop, |
| id_type key, unsigned char& flags) |
| { |
| in.seekg(start); |
|
|
| #if DEBUG_TIGHTFIND |
| if (debug) std::cerr << in.tellg() << " "; |
| #endif |
|
|
| |
| |
| |
|
|
| id_type foo; |
| for(foo = tightread(in,stop); |
| (foo>>FLAGBITS) < key; |
| foo = tightread(in,stop)) |
| { |
| |
| while (static_cast<filepos_type>(in.tellg()) < stop |
| && in.peek() >= 128) in.get(); |
|
|
| #if DEBUG_TIGHTFIND |
| if (debug) |
| std::cerr << (foo>>FLAGBITS) << " [" << key << "] " |
| << in.tellg() << std::endl; |
| #endif |
|
|
| if (in.tellg() == std::ios::pos_type(stop)) |
| return false; |
| } |
|
|
| #if DEBUG_TIGHTFIND |
| if (debug && (foo>>FLAGBITS)==key) |
| std::cerr << "found entry for " << key << std::endl; |
| std::cerr << "current file position is " << in.tellg() |
| << " (value read: " << key << std::endl; |
| #endif |
|
|
| assert(static_cast<filepos_type>(in.tellg()) < stop); |
| if ((foo>>FLAGBITS)==key) |
| { |
| flags = (foo%256); |
| flags &= FLAGMASK; |
| return true; |
| } |
| else |
| return false; |
| } |
|
|
| bool |
| tightfind(std::istream& in, filepos_type start, filepos_type stop, |
| id_type key, unsigned char& flags) |
| { |
| |
| #if DEBUG_TIGHTFIND |
| if (debug) |
| std::cerr << "looking for " << key |
| << " in range [" << start << ":" << stop << "]" << std::endl; |
| #endif |
| if (start==stop) return false; |
| assert(stop>start); |
| if ((start+1)==stop) return false; |
|
|
| unsigned int const granularity = sizeof(filepos_type)*5; |
| |
| |
| |
|
|
| if (stop > start + granularity) |
| if (!tightfind_midpoint(in,start,stop)) |
| return false; |
|
|
| if (stop <= start + granularity || in.tellg() == std::ios::pos_type(stop)) |
| { |
| |
| |
| return linear_search(in,start,stop,key,flags); |
| } |
|
|
| |
| filepos_type curpos = in.tellg(); |
| id_type foo = tightread(in,stop); |
| id_type tmpid = foo>>FLAGBITS; |
| if (tmpid == key) |
| { |
| flags = foo%256; |
| flags &= FLAGMASK; |
| #if DEBUG_TIGHTFIND |
| if (debug) std::cerr << "found entry for " << key << std::endl; |
| #endif |
| return true; |
| } |
| else if (tmpid > key) |
| { |
| #if DEBUG_TIGHTFIND |
| if (debug) std::cerr << foo << " > " << key << std::endl; |
| #endif |
| return tightfind(in,start,curpos,key,flags); |
| } |
| else |
| { |
| while (static_cast<filepos_type>(in.tellg()) < stop |
| && in.rdbuf()->in_avail() > 0 |
| && in.peek() >= 128) |
| in.get(); |
| if (in.rdbuf()->in_avail() == 0 || in.tellg() == std::ios::pos_type(stop)) |
| return false; |
| #if DEBUG_TIGHTFIND |
| if (debug) std::cerr << foo << " < " << key << std::endl; |
| #endif |
| return tightfind(in,in.tellg(),stop,key,flags); |
| } |
| } |
|
|
|
|
| char const* |
| tightfind(char const* const start, |
| char const* const stop, |
| id_type key, |
| unsigned char& flags) |
| { |
| |
|
|
| if (start==stop) return NULL; |
| assert(stop>start); |
| if ((start+1)==stop) return NULL; |
| char const* p = tightfind_midpoint(start,stop); |
| |
| |
| size_t foo; |
| char const* after = tightread(p,stop,foo); |
| id_type tmpId = foo>>FLAGBITS; |
| if (tmpId == key) |
| { |
| flags = foo%256; |
| flags &= FLAGMASK; |
| return after; |
| } |
| else if (tmpId > key) |
| { |
| return tightfind(start,p,key,flags); |
| } |
| else |
| { |
| while (*after<0 && ++after < stop); |
| if (after == stop) return NULL; |
| return tightfind(after,stop,key,flags); |
| } |
| } |
|
|
| char const* |
| tightfind_noflags(char const* const start, |
| char const* const stop, |
| id_type key) |
| { |
| |
|
|
| if (start==stop) return NULL; |
| assert(stop>start); |
| if ((start+1)==stop) return NULL; |
| char const* p = tightfind_midpoint(start,stop); |
| |
| |
| size_t foo; |
| char const* after = tightread(p,stop,foo); |
| if (foo == key) |
| return after; |
| else if (foo > key) |
| { |
| return tightfind_noflags(start,p,key); |
| } |
| else |
| { |
| while (*after<0 && ++after < stop); |
| if (after == stop) return NULL; |
| return tightfind_noflags(after,stop,key); |
| } |
| } |
|
|
| bool |
| linear_search_noflags(std::istream& in, filepos_type start, |
| filepos_type stop, id_type key) |
| { |
| std::ios::pos_type mystop = stop; |
|
|
| in.seekg(start); |
| id_type foo; |
| for(foo = tightread(in,stop); foo < key; foo = tightread(in,stop)) |
| { |
| |
| while (in.tellg() < mystop && in.peek() >= 128) |
| in.get(); |
| if (in.tellg() == mystop) |
| return false; |
| } |
| assert(in.tellg() < mystop); |
| return (foo==key); |
| } |
|
|
|
|
| bool |
| tightfind_noflags(std::istream& in, filepos_type start, |
| filepos_type stop, id_type key) |
| { |
| |
| if (start==stop) return false; |
| assert(stop>start); |
| if ((start+1)==stop) return false; |
|
|
| |
| |
| |
| unsigned int const granularity = sizeof(filepos_type)*5; |
| |
|
|
| if (stop > start + granularity) |
| if (!tightfind_midpoint(in,start,stop)) |
| return false; |
|
|
| |
| |
| |
| if (stop <= start + granularity || in.tellg() == std::ios::pos_type(stop)) |
| return linear_search_noflags(in,start,stop,key); |
|
|
| |
| filepos_type curpos = in.tellg(); |
| id_type foo = tightread(in,stop); |
| if (foo == key) |
| return true; |
|
|
| else if (foo > key) |
| return tightfind_noflags(in,start,curpos,key); |
|
|
| else |
| { |
| std::ios::pos_type mystop = stop; |
| while (in.tellg() < mystop |
| && in.rdbuf()->in_avail() > 0 |
| && in.peek() >= 128) |
| in.get(); |
| if (in.rdbuf()->in_avail() == 0 || in.tellg() == mystop) |
| return false; |
| return tightfind_noflags(in,in.tellg(),stop,key); |
| } |
| } |
|
|
| void tightwrite2(std::ostream& out, size_t data, bool flag) |
| { |
| |
| |
| short int foo = (data%32768); |
| if (flag) |
| { |
| foo += 32768; |
| while (data >= 32768) |
| { |
| out.write(reinterpret_cast<char*>(&foo),2); |
| data >>= 15; |
| foo = (data%32768)+32768; |
| } |
| } |
| else |
| { |
| while (data >= 32768) |
| { |
| out.write(reinterpret_cast<char*>(&foo),2); |
| data >>= 15; |
| foo = data%32768; |
| } |
| } |
| out.write(reinterpret_cast<char*>(&foo),2); |
| } |
|
|
| char const* |
| tightread8(char const* start, |
| char const* stop, |
| uint64_t& dest) |
| { |
| static char bitmask=127; |
| dest = 0; |
| if (*start < 0) |
| { |
| dest = (*start)&bitmask; |
| if (++start==stop || *start >= 0) return start; |
| dest += uint64_t((*start)&bitmask)<<7; |
| if (++start==stop || *start >= 0) return start; |
| dest += uint64_t((*start)&bitmask)<<14; |
| if (++start==stop || *start >= 0) return start; |
| dest += uint64_t((*start)&bitmask)<<21; |
| if (++start==stop || *start >= 0) return start; |
| dest += uint64_t((*start)&bitmask)<<28; |
| if (++start==stop || *start >= 0) return start; |
| dest += uint64_t((*start)&bitmask)<<35; |
| if (++start==stop || *start >= 0) return start; |
| dest += uint64_t((*start)&bitmask)<<42; |
| if (++start==stop || *start >= 0) return start; |
| dest += uint64_t((*start)&bitmask)<<49; |
| if (++start==stop || *start >= 0) return start; |
| dest += uint64_t((*start)&bitmask)<<56; |
| if (++start==stop || *start >= 0) return start; |
| dest += uint64_t((*start)&bitmask)<<63; |
| } |
| else |
| { |
| dest = *start; |
| if (++start==stop || *start < 0) return start; |
| dest += uint64_t(*start)<<7; |
| if (++start==stop || *start < 0) return start; |
| dest += uint64_t(*start)<<14; |
| if (++start==stop || *start < 0) return start; |
| dest += uint64_t(*start)<<21; |
| if (++start==stop || *start < 0) return start; |
| dest += uint64_t(*start)<<28; |
| if (++start==stop || *start < 0) return start; |
| dest += uint64_t(*start)<<35; |
| if (++start==stop || *start < 0) return start; |
| dest += uint64_t(*start)<<42; |
| if (++start==stop || *start < 0) return start; |
| dest += uint64_t(*start)<<49; |
| if (++start==stop || *start < 0) return start; |
| dest += uint64_t(*start)<<56; |
| if (++start==stop || *start < 0) return start; |
| dest += uint64_t(*start)<<63; |
| } |
| assert(start<stop); |
| return ++start; |
| } |
|
|
| char const* |
| tightread4(char const* start, |
| char const* stop, |
| uint32_t& dest) |
| { |
| static char bitmask=127; |
| dest = 0; |
| if (*start < 0) |
| { |
| dest = (*start)&bitmask; |
| if (++start==stop || *start >= 0) return start; |
| dest += uint32_t((*start)&bitmask)<<7; |
| if (++start==stop || *start >= 0) return start; |
| dest += uint32_t((*start)&bitmask)<<14; |
| if (++start==stop || *start >= 0) return start; |
| dest += uint32_t((*start)&bitmask)<<21; |
| if (++start==stop || *start >= 0) return start; |
| dest += uint32_t((*start)&bitmask)<<28; |
| } |
| else |
| { |
| dest = *start; |
| if (++start==stop || *start < 0) return start; |
| dest += uint32_t(*start)<<7; |
| if (++start==stop || *start < 0) return start; |
| dest += uint32_t(*start)<<14; |
| if (++start==stop || *start < 0) return start; |
| dest += uint32_t(*start)<<21; |
| if (++start==stop || *start < 0) return start; |
| dest += uint32_t(*start)<<28; |
| } |
| assert(start<stop); |
| return ++start; |
| } |
|
|
| char const* |
| tightread2(char const* start, |
| char const* stop, |
| uint16_t& dest) |
| { |
| static char bitmask=127; |
| dest = 0; |
| if (*start < 0) |
| { |
| dest = (*start)&bitmask; |
| if (++start==stop || *start >= 0) return start; |
| dest += uint32_t((*start)&bitmask)<<7; |
| if (++start==stop || *start >= 0) return start; |
| dest += uint32_t((*start)&bitmask)<<14; |
| } |
| else |
| { |
| dest = *start; |
| if (++start==stop || *start < 0) return start; |
| dest += uint32_t(*start)<<7; |
| if (++start==stop || *start < 0) return start; |
| dest += uint32_t(*start)<<14; |
| } |
| assert(start<stop); |
| return ++start; |
| } |
| } |
|
|