/* A random access file iterator, and a main function showing its use in std::binary_search. See also: http://wordaligned.org/articles/binary-search-revisited */ #include #include #include #include #include #include #include #include // File read error, thrown when low level file access fails. class file_read_error : public std::runtime_error { public: file_read_error(std::string const & what) : std::runtime_error(what) { } }; /* Here's an unusual iterator which can be used to binary search for whitespace-separated items in a text file. It masquerades as a random access iterator but a file is not usually a random access device. Nonetheless, file seek operations are quicker than stepping through the file item by item. The unusual thing is that the iterators correspond to file offsets rather than items within the file. Here's a short example where the items are numbers. +---+---+---+---+---+---+---+---+---+---+ | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | +---+---+---+---+---+---+---+---+---+---+ |'4'|'2'| | |'5'|'7'| |'1'|'3'|'3'| +---+---+---+---+---+---+---+---+---+---+ The graphic shows a text file which contains 3 numbers, 42, 57, 133, separated by whitespace. The file itself is 10 bytes long, and hence there are 10 iterators over the file, corresponding to actual file positions. To dereference an iterator, we step back through the file until we reach either whitespace or the start of the file. Then we look forwards again and read in the next item. In the graphic above: - Iterators 0 and 1 point to number 42. - Iterators 2, 3, 4 and 5 point to number 57. - Iterators 6, 7, 8, 9 point to number 133. Dereferencing an iterator always returns an item which is in the file, and all items in the file have iterators pointing to them, so std::binary_search based on these iterators is valid. The iterators also expose their underlying file positions directory (via the getpos() member function), and with a little thought we can make use of std::lower_bound() and std::upper_bound(). */ template class text_file_iter { typedef text_file_iter iter; private: // Sanity // Check things are OK, throwing an error on failure. void check(bool ok, std::string const & what) { if (!ok) { throw file_read_error(what); } } public: // Traits typedefs, which make this class usable with // algorithms which need a random access iterator. typedef std::random_access_iterator_tag iterator_category; typedef item value_type; typedef std::streamoff difference_type; typedef item * pointer; typedef item & reference; enum start_pos { begin, end }; public: // Lifecycle text_file_iter(iter const & other) : fname(other.fname) { open(); setpos(other.pos); } text_file_iter() : pos(-1) { } text_file_iter(std::string const & fname, start_pos where = begin) : fname(fname) , pos(-1) { open(); if (where == end) { seek_end(); } } ~text_file_iter() { close(); } iter & operator=(iter const & other) { close(); fname = other.fname; open(); setpos(other.pos); return *this; } public: // Comparison // Note: it is an error to compare iterators into different files. bool operator<(iter const & other) const { return pos < other.pos; } bool operator>(iter const & other) const { return pos > other.pos; } bool operator==(iter const & other) const { return pos == other.pos; } bool operator!=(iter const & other) const { return pos != other.pos; } public: // Iteration iter & operator++() { return *this += 1; } iter & operator--() { return *this -= 1; } iter operator++(int) { iter tmp(*this); ++(*this); return tmp; } iter operator--(int) { iter tmp(*this); --(*this); return tmp; } public: // Step iter & operator+=(difference_type n) { advance(n); return *this; } iter & operator-=(difference_type n) { advance(-n); return *this; } iter operator+(difference_type n) { iter result(*this); return result += n; } iter operator-(difference_type n) { iter result(*this); return result -= n; } public: // Distance difference_type operator-(iter & other) { return pos - other.pos; } public: // Access value_type operator*() { return (*this)[0]; } value_type operator[](difference_type n) { std::streampos restore = getpos(); advance(n); value_type const result = read(); setpos(restore); return result; } // Allow direct access to the underlying stream position std::streampos getpos() { std::streampos pos_ = in.tellg(); check(in, "getpos failed"); return pos_; } private: // Implementation details void open() { in.open(fname.c_str(), std::ios::binary); check(in, "open failed"); pos = getpos(); } void close() { if (in.is_open()) { in.close(); check(in, "close failed"); } } void advance(difference_type n) { check(in.seekg(n, std::ios_base::cur), "advance failed"); pos = getpos(); } void seek_end() { check(in.seekg(0, std::ios_base::end), "seek_end failed"); chop_whitespace(); pos = getpos(); } void chop_whitespace() { do { in.unget(); } while (isspace(in.peek())); in.get(); in.clear(); } void setpos(std::streampos newpos) { check(in.seekg(newpos), "setpos failed"); pos = newpos; } // Return the item at the current position value_type read() { item n = 0; // Reverse till we hit whitespace or the start of the file while (in && !isspace(in.peek())) { in.unget(); } in.clear(); check(in >> n, "read failed"); return n; } private: // State std::string fname; std::ifstream in; std::streampos pos; }; /* Accepts a number and the name of a sorted text file of numbers passed in on the command line. Binary searches the file for the number, returning 0 if found, 1 otherwise. */ int main(int argc, char * argv[]) { typedef long long number; typedef text_file_iter iter; number v; std::stringstream s; s << argv[1]; s >> v; iter b(argv[2], iter::begin); iter e(argv[2], iter::end); return std::binary_search(b, e, v) ? 0 : 1; }