From fd73d9cc3a05ca551cdd1c425c664e24e746b152 Mon Sep 17 00:00:00 2001 From: warptangent Date: Sat, 11 Jul 2015 22:46:16 -0700 Subject: [PATCH] Check and resize if needed at batch transaction start This currently only affects blockchain_import and blockchain_converter. When the number of blocks expected for the batch transaction is provided, make an estimate of the DB space needed. If not enough free space remains, resize the DB. The estimate is made based on: - the average size of the last 500 blocks, or if larger, a min. block size of 4k - a factor for the expanded size a block occupies in the DB across the sub-dbs/tables - a safety factor (1.7) to allow for a "reasonable" average block size increase over the batch Increase the DB size by whichever is greater: the estimated size needed or a minimum increase size, currently 128 MB. The conservative factors in the estimate help in testing that the resize occurs when needed, and without gratuitous size increases. For common use, the safety factor and minimum increase size could reasonably be increased. For testing, setting DEFAULT_MAPSIZE (blockchain_db/lmdb/db_lmdb.h) to 1 << 27 (128 MB) and recompiling will ensure DB resizes take place sooner and more frequently. --- src/blockchain_db/lmdb/db_lmdb.cpp | 108 ++++++++++++++++++++++++++++- src/blockchain_db/lmdb/db_lmdb.h | 6 +- 2 files changed, 109 insertions(+), 5 deletions(-) diff --git a/src/blockchain_db/lmdb/db_lmdb.cpp b/src/blockchain_db/lmdb/db_lmdb.cpp index 23fb7995..0d0b8ba8 100644 --- a/src/blockchain_db/lmdb/db_lmdb.cpp +++ b/src/blockchain_db/lmdb/db_lmdb.cpp @@ -233,7 +233,7 @@ void mdb_txn_safe::allow_new_txns() -void BlockchainLMDB::do_resize() +void BlockchainLMDB::do_resize(uint64_t increase_size) { MDB_envinfo mei; @@ -244,6 +244,11 @@ void BlockchainLMDB::do_resize() mdb_env_stat(m_env, &mst); uint64_t new_mapsize = (double)mei.me_mapsize * RESIZE_FACTOR; + // If given, use increase_size intead of above way of resizing. + // This is currently used for increasing by an estimated size at start of new + // batch txn. + if (increase_size > 0) + new_mapsize = mei.me_mapsize + increase_size; new_mapsize += (new_mapsize % mst.ms_psize); @@ -272,7 +277,8 @@ void BlockchainLMDB::do_resize() mdb_txn_safe::allow_new_txns(); } -bool BlockchainLMDB::need_resize() const +// threshold_size is used for batch transactions +bool BlockchainLMDB::need_resize(uint64_t threshold_size) const { MDB_envinfo mei; @@ -282,15 +288,107 @@ bool BlockchainLMDB::need_resize() const mdb_env_stat(m_env, &mst); + // size_used doesn't include data yet to be committed, which can be + // significant size during batch transactions. For that, we estimate the size + // needed at the beginning of the batch transaction and pass in the + // additional size needed. uint64_t size_used = mst.ms_psize * mei.me_last_pgno; + LOG_PRINT_L1("DB map size: " << mei.me_mapsize); + LOG_PRINT_L1("Space used: " << size_used); + LOG_PRINT_L1("Space remaining: " << mei.me_mapsize - size_used); + LOG_PRINT_L1("Size threshold: " << threshold_size); + LOG_PRINT_L1("Percent used: " << (double)size_used/mei.me_mapsize << " Percent threshold: " << RESIZE_PERCENT); + + if (threshold_size > 0) + { + if (mei.me_mapsize - size_used < threshold_size) + { + LOG_PRINT_L1("Threshold met (size-based)"); + return true; + } + else + return false; + } + if ((double)size_used / mei.me_mapsize > RESIZE_PERCENT) { + LOG_PRINT_L1("Threshold met (percent-based)"); return true; } return false; } +void BlockchainLMDB::check_and_resize_for_batch(uint64_t batch_num_blocks) +{ + LOG_PRINT_L1("[batch] checking DB size"); + const uint64_t min_increase_size = 128 * (1 << 20); + uint64_t threshold_size = 0; + uint64_t increase_size = 0; + if (batch_num_blocks > 0) + { + threshold_size = get_estimated_batch_size(batch_num_blocks); + LOG_PRINT_L1("calculated batch size: " << threshold_size); + + // The increased DB size could be a multiple of threshold_size, a fixed + // size increase (> threshold_size), or other variations. + // + // Currently we use the greater of threshold size and a minimum size. The + // minimum size increase is used to avoid frequent resizes when the batch + // size is set to a very small numbers of blocks. + increase_size = (threshold_size > min_increase_size) ? threshold_size : min_increase_size; + LOG_PRINT_L1("increase size: " << increase_size); + } + + // if threshold_size is 0 (i.e. number of blocks for batch not passed in), it + // will fall back to the percent-based threshold check instead of the + // size-based check + if (need_resize(threshold_size)) + { + LOG_PRINT_L0("[batch] DB resize needed"); + do_resize(increase_size); + } +} + +uint64_t BlockchainLMDB::get_estimated_batch_size(uint64_t batch_num_blocks) const +{ + uint64_t threshold_size = 0; + + // batch size estimate * batch safety factor = final size estimate + // Takes into account "reasonable" block size increases in batch. + float batch_safety_factor = 1.7f; + // estimate of stored block expanded from raw block, including denormalization and db overhead. + // Note that this probably doesn't grow linearly with block size. + float db_expand_factor = 4.5f; + uint64_t num_prev_blocks = 500; + // For resizing purposes, allow for at least 4k average block size. + uint64_t min_block_size = 4 * 1024; + + uint64_t block_stop = m_height - 1; + uint64_t block_start = 0; + if (block_stop >= num_prev_blocks) + block_start = block_stop - num_prev_blocks + 1; + uint32_t num_blocks_used = 0; + uint64_t total_block_size = 0; + for (uint64_t block_num = block_start; block_num <= block_stop; ++block_num) + { + uint32_t block_size = get_block_size(block_num); + total_block_size += block_size; + // Track number of blocks being totalled here instead of assuming, in case + // some blocks were to be skipped for being outliers. + ++num_blocks_used; + } + size_t avg_block_size = total_block_size / num_blocks_used; + LOG_PRINT_L1("average block size across recent " << num_blocks_used << " blocks: " << avg_block_size); + if (avg_block_size < min_block_size) + avg_block_size = min_block_size; + LOG_PRINT_L1("estimated average block size for batch: " << avg_block_size); + + threshold_size = avg_block_size * db_expand_factor * batch_num_blocks; + threshold_size = threshold_size * batch_safety_factor; + return threshold_size; +} + void BlockchainLMDB::add_block( const block& blk , const size_t& block_size , const difficulty_type& cumulative_difficulty @@ -1820,6 +1918,7 @@ bool BlockchainLMDB::has_key_image(const crypto::key_image& img) const return false; } +// batch_num_blocks: (optional) Used to check if resize needed before batch transaction starts. void BlockchainLMDB::batch_start(uint64_t batch_num_blocks) { LOG_PRINT_L3("BlockchainLMDB::" << __func__); @@ -1833,6 +1932,8 @@ void BlockchainLMDB::batch_start(uint64_t batch_num_blocks) throw0(DB_ERROR("batch transaction attempted, but m_write_txn already in use")); check_open(); + check_and_resize_for_batch(batch_num_blocks); + m_write_batch_txn = new mdb_txn_safe(); // NOTE: need to make sure it's destroyed properly when done @@ -1927,7 +2028,8 @@ uint64_t BlockchainLMDB::add_block( const block& blk if (m_height % 1000 == 0) { - if (need_resize()) + // for batch mode, DB resize check is done at start of batch transaction + if (! m_batch_active && need_resize()) { LOG_PRINT_L0("LMDB memory map needs resized, doing that now."); do_resize(); diff --git a/src/blockchain_db/lmdb/db_lmdb.h b/src/blockchain_db/lmdb/db_lmdb.h index 6c82939c..6f226254 100644 --- a/src/blockchain_db/lmdb/db_lmdb.h +++ b/src/blockchain_db/lmdb/db_lmdb.h @@ -199,9 +199,11 @@ public: virtual void pop_block(block& blk, std::vector& txs); private: - void do_resize(); + void do_resize(uint64_t size_increase=0); - bool need_resize() const; + bool need_resize(uint64_t threshold_size=0) const; + void check_and_resize_for_batch(uint64_t batch_num_blocks); + uint64_t get_estimated_batch_size(uint64_t batch_num_blocks) const; virtual void add_block( const block& blk , const size_t& block_size