leveldb
bloom_test.cc
Go to the documentation of this file.
1 // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. See the AUTHORS file for names of contributors.
4 
6 
7 #include "util/coding.h"
8 #include "util/logging.h"
9 #include "util/testharness.h"
10 #include "util/testutil.h"
11 
12 namespace leveldb {
13 
14 static const int kVerbose = 1;
15 
16 static Slice Key(int i, char* buffer) {
17  EncodeFixed32(buffer, i);
18  return Slice(buffer, sizeof(uint32_t));
19 }
20 
21 class BloomTest {
22  private:
24  std::string filter_;
25  std::vector<std::string> keys_;
26 
27  public:
28  BloomTest() : policy_(NewBloomFilterPolicy(10)) { }
29 
31  delete policy_;
32  }
33 
34  void Reset() {
35  keys_.clear();
36  filter_.clear();
37  }
38 
39  void Add(const Slice& s) {
40  keys_.push_back(s.ToString());
41  }
42 
43  void Build() {
44  std::vector<Slice> key_slices;
45  for (size_t i = 0; i < keys_.size(); i++) {
46  key_slices.push_back(Slice(keys_[i]));
47  }
48  filter_.clear();
49  policy_->CreateFilter(&key_slices[0], static_cast<int>(key_slices.size()),
50  &filter_);
51  keys_.clear();
52  if (kVerbose >= 2) DumpFilter();
53  }
54 
55  size_t FilterSize() const {
56  return filter_.size();
57  }
58 
59  void DumpFilter() {
60  fprintf(stderr, "F(");
61  for (size_t i = 0; i+1 < filter_.size(); i++) {
62  const unsigned int c = static_cast<unsigned int>(filter_[i]);
63  for (int j = 0; j < 8; j++) {
64  fprintf(stderr, "%c", (c & (1 <<j)) ? '1' : '.');
65  }
66  }
67  fprintf(stderr, ")\n");
68  }
69 
70  bool Matches(const Slice& s) {
71  if (!keys_.empty()) {
72  Build();
73  }
74  return policy_->KeyMayMatch(s, filter_);
75  }
76 
77  double FalsePositiveRate() {
78  char buffer[sizeof(int)];
79  int result = 0;
80  for (int i = 0; i < 10000; i++) {
81  if (Matches(Key(i + 1000000000, buffer))) {
82  result++;
83  }
84  }
85  return result / 10000.0;
86  }
87 };
88 
89 TEST(BloomTest, EmptyFilter) {
90  ASSERT_TRUE(! Matches("hello"));
91  ASSERT_TRUE(! Matches("world"));
92 }
93 
94 TEST(BloomTest, Small) {
95  Add("hello");
96  Add("world");
97  ASSERT_TRUE(Matches("hello"));
98  ASSERT_TRUE(Matches("world"));
99  ASSERT_TRUE(! Matches("x"));
100  ASSERT_TRUE(! Matches("foo"));
101 }
102 
103 static int NextLength(int length) {
104  if (length < 10) {
105  length += 1;
106  } else if (length < 100) {
107  length += 10;
108  } else if (length < 1000) {
109  length += 100;
110  } else {
111  length += 1000;
112  }
113  return length;
114 }
115 
116 TEST(BloomTest, VaryingLengths) {
117  char buffer[sizeof(int)];
118 
119  // Count number of filters that significantly exceed the false positive rate
120  int mediocre_filters = 0;
121  int good_filters = 0;
122 
123  for (int length = 1; length <= 10000; length = NextLength(length)) {
124  Reset();
125  for (int i = 0; i < length; i++) {
126  Add(Key(i, buffer));
127  }
128  Build();
129 
130  ASSERT_LE(FilterSize(), static_cast<size_t>((length * 10 / 8) + 40))
131  << length;
132 
133  // All added keys must match
134  for (int i = 0; i < length; i++) {
135  ASSERT_TRUE(Matches(Key(i, buffer)))
136  << "Length " << length << "; key " << i;
137  }
138 
139  // Check false positive rate
140  double rate = FalsePositiveRate();
141  if (kVerbose >= 1) {
142  fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
143  rate*100.0, length, static_cast<int>(FilterSize()));
144  }
145  ASSERT_LE(rate, 0.02); // Must not be over 2%
146  if (rate > 0.0125) mediocre_filters++; // Allowed, but not too often
147  else good_filters++;
148  }
149  if (kVerbose >= 1) {
150  fprintf(stderr, "Filters: %d good, %d mediocre\n",
151  good_filters, mediocre_filters);
152  }
153  ASSERT_LE(mediocre_filters, good_filters/5);
154 }
155 
156 // Different bits-per-byte
157 
158 } // namespace leveldb
159 
160 int main(int argc, char** argv) {
162 }
uint64_t Key
#define ASSERT_LE(a, b)
Definition: testharness.h:111
virtual bool KeyMayMatch(const Slice &key, const Slice &filter) const =0
void EncodeFixed32(char *buf, uint32_t value)
Definition: coding.cc:9
int RunAllTests()
Definition: testharness.cc:36
std::string ToString() const
Definition: slice.h:66
const FilterPolicy * NewBloomFilterPolicy(int bits_per_key)
Definition: bloom.cc:91
#define ASSERT_TRUE(c)
Definition: testharness.h:105
TEST(AutoCompactTest, ReadAll)
void Add(const Slice &s)
Definition: bloom_test.cc:39
std::string filter_
Definition: bloom_test.cc:24
int main(int argc, char **argv)
Definition: bloom_test.cc:160
std::vector< std::string > keys_
Definition: bloom_test.cc:25
bool Matches(const Slice &s)
Definition: bloom_test.cc:70
double FalsePositiveRate()
Definition: bloom_test.cc:77
size_t FilterSize() const
Definition: bloom_test.cc:55
virtual void CreateFilter(const Slice *keys, int n, std::string *dst) const =0
static int NextLength(int length)
Definition: bloom_test.cc:103
const FilterPolicy * policy_
Definition: bloom_test.cc:23
static const int kVerbose
Definition: bloom_test.cc:14