Check and resize if needed at batch transaction start

This currently only affects blockchain_import and blockchain_converter.

When the number of blocks expected for the batch transaction is
provided, make an estimate of the DB space needed. If not enough free
space remains, resize the DB.

The estimate is made based on:
- the average size of the last 500 blocks, or if larger, a min. block
  size of 4k
- a factor for the expanded size a block occupies in the DB across the
  sub-dbs/tables
- a safety factor (1.7) to allow for a "reasonable" average block size
  increase over the batch

Increase the DB size by whichever is greater: the estimated size needed
or a minimum increase size, currently 128 MB.

The conservative factors in the estimate help in testing that the resize
occurs when needed, and without gratuitous size increases. For common
use, the safety factor and minimum increase size could reasonably be
increased.

For testing, setting DEFAULT_MAPSIZE (blockchain_db/lmdb/db_lmdb.h) to 1
<< 27 (128 MB) and recompiling will ensure DB resizes take place sooner
and more frequently.
This commit is contained in:
warptangent 2015-07-11 22:46:16 -07:00
parent f9e4afd52a
commit fd73d9cc3a
No known key found for this signature in database
GPG key ID: 0E490BEBFBE4E92D
2 changed files with 109 additions and 5 deletions

View file

@ -233,7 +233,7 @@ void mdb_txn_safe::allow_new_txns()
void BlockchainLMDB::do_resize() void BlockchainLMDB::do_resize(uint64_t increase_size)
{ {
MDB_envinfo mei; MDB_envinfo mei;
@ -244,6 +244,11 @@ void BlockchainLMDB::do_resize()
mdb_env_stat(m_env, &mst); mdb_env_stat(m_env, &mst);
uint64_t new_mapsize = (double)mei.me_mapsize * RESIZE_FACTOR; uint64_t new_mapsize = (double)mei.me_mapsize * RESIZE_FACTOR;
// If given, use increase_size intead of above way of resizing.
// This is currently used for increasing by an estimated size at start of new
// batch txn.
if (increase_size > 0)
new_mapsize = mei.me_mapsize + increase_size;
new_mapsize += (new_mapsize % mst.ms_psize); new_mapsize += (new_mapsize % mst.ms_psize);
@ -272,7 +277,8 @@ void BlockchainLMDB::do_resize()
mdb_txn_safe::allow_new_txns(); mdb_txn_safe::allow_new_txns();
} }
bool BlockchainLMDB::need_resize() const // threshold_size is used for batch transactions
bool BlockchainLMDB::need_resize(uint64_t threshold_size) const
{ {
MDB_envinfo mei; MDB_envinfo mei;
@ -282,15 +288,107 @@ bool BlockchainLMDB::need_resize() const
mdb_env_stat(m_env, &mst); mdb_env_stat(m_env, &mst);
// size_used doesn't include data yet to be committed, which can be
// significant size during batch transactions. For that, we estimate the size
// needed at the beginning of the batch transaction and pass in the
// additional size needed.
uint64_t size_used = mst.ms_psize * mei.me_last_pgno; uint64_t size_used = mst.ms_psize * mei.me_last_pgno;
LOG_PRINT_L1("DB map size: " << mei.me_mapsize);
LOG_PRINT_L1("Space used: " << size_used);
LOG_PRINT_L1("Space remaining: " << mei.me_mapsize - size_used);
LOG_PRINT_L1("Size threshold: " << threshold_size);
LOG_PRINT_L1("Percent used: " << (double)size_used/mei.me_mapsize << " Percent threshold: " << RESIZE_PERCENT);
if (threshold_size > 0)
{
if (mei.me_mapsize - size_used < threshold_size)
{
LOG_PRINT_L1("Threshold met (size-based)");
return true;
}
else
return false;
}
if ((double)size_used / mei.me_mapsize > RESIZE_PERCENT) if ((double)size_used / mei.me_mapsize > RESIZE_PERCENT)
{ {
LOG_PRINT_L1("Threshold met (percent-based)");
return true; return true;
} }
return false; return false;
} }
void BlockchainLMDB::check_and_resize_for_batch(uint64_t batch_num_blocks)
{
LOG_PRINT_L1("[batch] checking DB size");
const uint64_t min_increase_size = 128 * (1 << 20);
uint64_t threshold_size = 0;
uint64_t increase_size = 0;
if (batch_num_blocks > 0)
{
threshold_size = get_estimated_batch_size(batch_num_blocks);
LOG_PRINT_L1("calculated batch size: " << threshold_size);
// The increased DB size could be a multiple of threshold_size, a fixed
// size increase (> threshold_size), or other variations.
//
// Currently we use the greater of threshold size and a minimum size. The
// minimum size increase is used to avoid frequent resizes when the batch
// size is set to a very small numbers of blocks.
increase_size = (threshold_size > min_increase_size) ? threshold_size : min_increase_size;
LOG_PRINT_L1("increase size: " << increase_size);
}
// if threshold_size is 0 (i.e. number of blocks for batch not passed in), it
// will fall back to the percent-based threshold check instead of the
// size-based check
if (need_resize(threshold_size))
{
LOG_PRINT_L0("[batch] DB resize needed");
do_resize(increase_size);
}
}
uint64_t BlockchainLMDB::get_estimated_batch_size(uint64_t batch_num_blocks) const
{
uint64_t threshold_size = 0;
// batch size estimate * batch safety factor = final size estimate
// Takes into account "reasonable" block size increases in batch.
float batch_safety_factor = 1.7f;
// estimate of stored block expanded from raw block, including denormalization and db overhead.
// Note that this probably doesn't grow linearly with block size.
float db_expand_factor = 4.5f;
uint64_t num_prev_blocks = 500;
// For resizing purposes, allow for at least 4k average block size.
uint64_t min_block_size = 4 * 1024;
uint64_t block_stop = m_height - 1;
uint64_t block_start = 0;
if (block_stop >= num_prev_blocks)
block_start = block_stop - num_prev_blocks + 1;
uint32_t num_blocks_used = 0;
uint64_t total_block_size = 0;
for (uint64_t block_num = block_start; block_num <= block_stop; ++block_num)
{
uint32_t block_size = get_block_size(block_num);
total_block_size += block_size;
// Track number of blocks being totalled here instead of assuming, in case
// some blocks were to be skipped for being outliers.
++num_blocks_used;
}
size_t avg_block_size = total_block_size / num_blocks_used;
LOG_PRINT_L1("average block size across recent " << num_blocks_used << " blocks: " << avg_block_size);
if (avg_block_size < min_block_size)
avg_block_size = min_block_size;
LOG_PRINT_L1("estimated average block size for batch: " << avg_block_size);
threshold_size = avg_block_size * db_expand_factor * batch_num_blocks;
threshold_size = threshold_size * batch_safety_factor;
return threshold_size;
}
void BlockchainLMDB::add_block( const block& blk void BlockchainLMDB::add_block( const block& blk
, const size_t& block_size , const size_t& block_size
, const difficulty_type& cumulative_difficulty , const difficulty_type& cumulative_difficulty
@ -1820,6 +1918,7 @@ bool BlockchainLMDB::has_key_image(const crypto::key_image& img) const
return false; return false;
} }
// batch_num_blocks: (optional) Used to check if resize needed before batch transaction starts.
void BlockchainLMDB::batch_start(uint64_t batch_num_blocks) void BlockchainLMDB::batch_start(uint64_t batch_num_blocks)
{ {
LOG_PRINT_L3("BlockchainLMDB::" << __func__); LOG_PRINT_L3("BlockchainLMDB::" << __func__);
@ -1833,6 +1932,8 @@ void BlockchainLMDB::batch_start(uint64_t batch_num_blocks)
throw0(DB_ERROR("batch transaction attempted, but m_write_txn already in use")); throw0(DB_ERROR("batch transaction attempted, but m_write_txn already in use"));
check_open(); check_open();
check_and_resize_for_batch(batch_num_blocks);
m_write_batch_txn = new mdb_txn_safe(); m_write_batch_txn = new mdb_txn_safe();
// NOTE: need to make sure it's destroyed properly when done // NOTE: need to make sure it's destroyed properly when done
@ -1927,7 +2028,8 @@ uint64_t BlockchainLMDB::add_block( const block& blk
if (m_height % 1000 == 0) if (m_height % 1000 == 0)
{ {
if (need_resize()) // for batch mode, DB resize check is done at start of batch transaction
if (! m_batch_active && need_resize())
{ {
LOG_PRINT_L0("LMDB memory map needs resized, doing that now."); LOG_PRINT_L0("LMDB memory map needs resized, doing that now.");
do_resize(); do_resize();

View file

@ -199,9 +199,11 @@ public:
virtual void pop_block(block& blk, std::vector<transaction>& txs); virtual void pop_block(block& blk, std::vector<transaction>& txs);
private: private:
void do_resize(); void do_resize(uint64_t size_increase=0);
bool need_resize() const; bool need_resize(uint64_t threshold_size=0) const;
void check_and_resize_for_batch(uint64_t batch_num_blocks);
uint64_t get_estimated_batch_size(uint64_t batch_num_blocks) const;
virtual void add_block( const block& blk virtual void add_block( const block& blk
, const size_t& block_size , const size_t& block_size