Browse Source

Make CRollingBloomFilter set nTweak for you

While CBloomFilter is usually used with an explicitly set nTweak,
CRollingBloomFilter is only used internally. Requiring every caller to
set nTweak is error-prone and redundant; better to have the class handle
that for you with a high-quality randomness source.

Additionally when clearing the filter it makes sense to change nTweak as
well to recover from a bad setting, e.g. due to insufficient randomness
at initialization, so the clear() method is replaced by a reset() method
that sets a new, random, nTweak value.

(cherry picked from commit d2d7ee0e863b286e1c9f9c54659d494fb0a7712d)
pull/145/head
Peter Todd 9 years ago
parent
commit
83671efe99
  1. 19
      src/bloom.cpp
  2. 12
      src/bloom.h
  3. 2
      src/main.cpp
  4. 2
      src/net.cpp
  5. 6
      src/test/bloom_tests.cpp

19
src/bloom.cpp

@ -8,6 +8,7 @@
#include "hash.h"
#include "script/script.h"
#include "script/standard.h"
#include "random.h"
#include "streams.h"
#include <math.h>
@ -121,6 +122,12 @@ void CBloomFilter::clear()
isEmpty = true;
}
void CBloomFilter::reset(unsigned int nNewTweak)
{
clear();
nTweak = nNewTweak;
}
bool CBloomFilter::IsWithinSizeConstraints() const
{
return vData.size() <= MAX_BLOOM_FILTER_SIZE && nHashFuncs <= MAX_HASH_FUNCS;
@ -217,7 +224,8 @@ CRollingBloomFilter::CRollingBloomFilter(unsigned int nElements, double fpRate,
// inserted, so at least one always contains the last nElements
// inserted.
nBloomSize = nElements * 2;
nInsertions = 0;
reset(nTweak);
}
void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey)
@ -254,9 +262,12 @@ bool CRollingBloomFilter::contains(const uint256& hash) const
return contains(data);
}
void CRollingBloomFilter::clear()
void CRollingBloomFilter::reset(unsigned int nNewTweak)
{
b1.clear();
b2.clear();
if (!nNewTweak)
nNewTweak = GetRand(std::numeric_limits<unsigned int>::max());
b1.reset(nNewTweak);
b2.reset(nNewTweak);
nInsertions = 0;
}

12
src/bloom.h

@ -89,6 +89,7 @@ public:
bool contains(const uint256& hash) const;
void clear();
void reset(unsigned int nNewTweak);
//! True if the size is <= MAX_BLOOM_FILTER_SIZE and the number of hash functions is <= MAX_HASH_FUNCS
//! (catch a filter which was just deserialized which was too big)
@ -103,7 +104,11 @@ public:
/**
* RollingBloomFilter is a probabilistic "keep track of most recently inserted" set.
* Construct it with the number of items to keep track of, and a false-positive rate.
* Construct it with the number of items to keep track of, and a false-positive
* rate. Unlike CBloomFilter, by default nTweak is set to a cryptographically
* secure random value for you. Similarly rather than clear() the method
* reset() is provided, which also changes nTweak to decrease the impact of
* false-positives.
*
* contains(item) will always return true if item was one of the last N things
* insert()'ed ... but may also return true for items that were not inserted.
@ -111,14 +116,15 @@ public:
class CRollingBloomFilter
{
public:
CRollingBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweak);
CRollingBloomFilter(unsigned int nElements, double nFPRate,
unsigned int nTweak = 0);
void insert(const std::vector<unsigned char>& vKey);
void insert(const uint256& hash);
bool contains(const std::vector<unsigned char>& vKey) const;
bool contains(const uint256& hash) const;
void clear();
void reset(unsigned int nNewTweak = 0);
private:
unsigned int nBloomSize;

2
src/main.cpp

@ -4909,7 +4909,7 @@ bool SendMessages(CNode* pto, bool fSendTrickle)
{
// Periodically clear addrKnown to allow refresh broadcasts
if (nLastRebroadcast)
pnode->addrKnown.clear();
pnode->addrKnown.reset();
// Rebroadcast our address
AdvertizeLocal(pnode);

2
src/net.cpp

@ -1933,7 +1933,7 @@ unsigned int SendBufferSize() { return 1000*GetArg("-maxsendbuffer", 1*1000); }
CNode::CNode(SOCKET hSocketIn, CAddress addrIn, std::string addrNameIn, bool fInboundIn) :
ssSend(SER_NETWORK, INIT_PROTO_VERSION),
addrKnown(5000, 0.001, insecure_rand()),
addrKnown(5000, 0.001),
setInventoryKnown(SendBufferSize() / 1000)
{
nServices = 0;

6
src/test/bloom_tests.cpp

@ -469,7 +469,7 @@ static std::vector<unsigned char> RandomData()
BOOST_AUTO_TEST_CASE(rolling_bloom)
{
// last-100-entry, 1% false positive:
CRollingBloomFilter rb1(100, 0.01, 0);
CRollingBloomFilter rb1(100, 0.01, 1);
// Overfill:
static const int DATASIZE=399;
@ -500,7 +500,7 @@ BOOST_AUTO_TEST_CASE(rolling_bloom)
BOOST_CHECK(nHits < 175);
BOOST_CHECK(rb1.contains(data[DATASIZE-1]));
rb1.clear();
rb1.reset(1);
BOOST_CHECK(!rb1.contains(data[DATASIZE-1]));
// Now roll through data, make sure last 100 entries
@ -527,7 +527,7 @@ BOOST_AUTO_TEST_CASE(rolling_bloom)
BOOST_CHECK(nHits < 100);
// last-1000-entry, 0.01% false positive:
CRollingBloomFilter rb2(1000, 0.001, 0);
CRollingBloomFilter rb2(1000, 0.001, 1);
for (int i = 0; i < DATASIZE; i++) {
rb2.insert(data[i]);
}

Loading…
Cancel
Save