| |
| |
| |
| |
| |
| |
| |
| #include <fstream> |
| #include <string> |
|
|
| #include "glog/logging.h" |
| #include "google/protobuf/text_format.h" |
| #include "stdint.h" |
|
|
| #include "caffe/proto/caffe.pb.h" |
| #include "caffe/util/format.hpp" |
| #include "caffe/util/math_functions.hpp" |
|
|
| #ifdef USE_LEVELDB |
| #include "leveldb/db.h" |
|
|
| uint32_t swap_endian(uint32_t val) { |
| val = ((val << 8) & 0xFF00FF00) | ((val >> 8) & 0xFF00FF); |
| return (val << 16) | (val >> 16); |
| } |
|
|
| void read_image(std::ifstream* image_file, std::ifstream* label_file, |
| uint32_t index, uint32_t rows, uint32_t cols, |
| char* pixels, char* label) { |
| image_file->seekg(index * rows * cols + 16); |
| image_file->read(pixels, rows * cols); |
| label_file->seekg(index + 8); |
| label_file->read(label, 1); |
| } |
|
|
| void convert_dataset(const char* image_filename, const char* label_filename, |
| const char* db_filename) { |
| |
| std::ifstream image_file(image_filename, std::ios::in | std::ios::binary); |
| std::ifstream label_file(label_filename, std::ios::in | std::ios::binary); |
| CHECK(image_file) << "Unable to open file " << image_filename; |
| CHECK(label_file) << "Unable to open file " << label_filename; |
| |
| uint32_t magic; |
| uint32_t num_items; |
| uint32_t num_labels; |
| uint32_t rows; |
| uint32_t cols; |
|
|
| image_file.read(reinterpret_cast<char*>(&magic), 4); |
| magic = swap_endian(magic); |
| CHECK_EQ(magic, 2051) << "Incorrect image file magic."; |
| label_file.read(reinterpret_cast<char*>(&magic), 4); |
| magic = swap_endian(magic); |
| CHECK_EQ(magic, 2049) << "Incorrect label file magic."; |
| image_file.read(reinterpret_cast<char*>(&num_items), 4); |
| num_items = swap_endian(num_items); |
| label_file.read(reinterpret_cast<char*>(&num_labels), 4); |
| num_labels = swap_endian(num_labels); |
| CHECK_EQ(num_items, num_labels); |
| image_file.read(reinterpret_cast<char*>(&rows), 4); |
| rows = swap_endian(rows); |
| image_file.read(reinterpret_cast<char*>(&cols), 4); |
| cols = swap_endian(cols); |
|
|
| |
| leveldb::DB* db; |
| leveldb::Options options; |
| options.create_if_missing = true; |
| options.error_if_exists = true; |
| leveldb::Status status = leveldb::DB::Open( |
| options, db_filename, &db); |
| CHECK(status.ok()) << "Failed to open leveldb " << db_filename |
| << ". Is it already existing?"; |
|
|
| char label_i; |
| char label_j; |
| char* pixels = new char[2 * rows * cols]; |
| std::string value; |
|
|
| caffe::Datum datum; |
| datum.set_channels(2); |
| datum.set_height(rows); |
| datum.set_width(cols); |
| LOG(INFO) << "A total of " << num_items << " items."; |
| LOG(INFO) << "Rows: " << rows << " Cols: " << cols; |
| for (int itemid = 0; itemid < num_items; ++itemid) { |
| int i = caffe::caffe_rng_rand() % num_items; |
| int j = caffe::caffe_rng_rand() % num_items; |
| read_image(&image_file, &label_file, i, rows, cols, |
| pixels, &label_i); |
| read_image(&image_file, &label_file, j, rows, cols, |
| pixels + (rows * cols), &label_j); |
| datum.set_data(pixels, 2*rows*cols); |
| if (label_i == label_j) { |
| datum.set_label(1); |
| } else { |
| datum.set_label(0); |
| } |
| datum.SerializeToString(&value); |
| std::string key_str = caffe::format_int(itemid, 8); |
| db->Put(leveldb::WriteOptions(), key_str, value); |
| } |
|
|
| delete db; |
| delete [] pixels; |
| } |
|
|
| int main(int argc, char** argv) { |
| if (argc != 4) { |
| printf("This script converts the MNIST dataset to the leveldb format used\n" |
| "by caffe to train a siamese network.\n" |
| "Usage:\n" |
| " convert_mnist_data input_image_file input_label_file " |
| "output_db_file\n" |
| "The MNIST dataset could be downloaded at\n" |
| " http://yann.lecun.com/exdb/mnist/\n" |
| "You should gunzip them after downloading.\n"); |
| } else { |
| google::InitGoogleLogging(argv[0]); |
| convert_dataset(argv[1], argv[2], argv[3]); |
| } |
| return 0; |
| } |
| #else |
| int main(int argc, char** argv) { |
| LOG(FATAL) << "This example requires LevelDB; compile with USE_LEVELDB."; |
| } |
| #endif |
|
|