17 |
ilm |
1 |
/*
|
|
|
2 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
|
|
|
3 |
*
|
182 |
ilm |
4 |
* Copyright 2011-2019 OpenConcerto, by ILM Informatique. All rights reserved.
|
17 |
ilm |
5 |
*
|
|
|
6 |
* The contents of this file are subject to the terms of the GNU General Public License Version 3
|
|
|
7 |
* only ("GPL"). You may not use this file except in compliance with the License. You can obtain a
|
|
|
8 |
* copy of the License at http://www.gnu.org/licenses/gpl-3.0.html See the License for the specific
|
|
|
9 |
* language governing permissions and limitations under the License.
|
|
|
10 |
*
|
|
|
11 |
* When distributing the software, include this License Header Notice in each file.
|
|
|
12 |
*/
|
|
|
13 |
|
|
|
14 |
package org.openconcerto.utils;
|
|
|
15 |
|
|
|
16 |
import java.io.Serializable;
|
|
|
17 |
import java.util.BitSet;
|
|
|
18 |
import java.util.Collection;
|
|
|
19 |
import java.util.Iterator;
|
|
|
20 |
import java.util.Random;
|
|
|
21 |
import java.util.Set;
|
|
|
22 |
|
|
|
23 |
public class BloomFilter<E> implements Set<E>, Serializable {
|
|
|
24 |
private static final long serialVersionUID = 3527833617516722215L;
|
|
|
25 |
private final int k;
|
|
|
26 |
private final BitSet bitSet;
|
|
|
27 |
private final int bitArraySize;
|
|
|
28 |
private final int expectedElements;
|
|
|
29 |
|
|
|
30 |
/**
|
|
|
31 |
* Construct a SimpleBloomFilter. You must specify the number of bits in the Bloom Filter, and
|
|
|
32 |
* also you should specify the number of items you expect to add. The latter is used to choose
|
|
|
33 |
* some optimal internal values to minimize the false-positive rate (which can be estimated with
|
|
|
34 |
* expectedFalsePositiveRate()).
|
|
|
35 |
*
|
|
|
36 |
* @param bitArraySize The number of bits in the bit array (often called 'm' in the context of
|
|
|
37 |
* bloom filters).
|
|
|
38 |
* @param expectedElements The typical number of items you expect to be added to the
|
|
|
39 |
* SimpleBloomFilter (often called 'n').
|
|
|
40 |
*/
|
|
|
41 |
public BloomFilter(int bitArraySize, int expectedElements) {
|
|
|
42 |
this.bitArraySize = bitArraySize;
|
|
|
43 |
this.expectedElements = expectedElements;
|
182 |
ilm |
44 |
this.k = (int) Math.ceil(((double) bitArraySize / expectedElements) * Math.log(2.0));
|
17 |
ilm |
45 |
bitSet = new BitSet(bitArraySize);
|
|
|
46 |
}
|
|
|
47 |
|
|
|
48 |
/**
|
|
|
49 |
* Calculates the approximate probability of the contains() method returning true for an object
|
|
|
50 |
* that had not previously been inserted into the bloom filter. This is known as the "false
|
|
|
51 |
* positive probability".
|
|
|
52 |
*
|
|
|
53 |
* @return The estimated false positive rate
|
|
|
54 |
*/
|
|
|
55 |
public double expectedFalsePositiveProbability() {
|
|
|
56 |
return Math.pow((1 - Math.exp(-k * (double) expectedElements / bitArraySize)), k);
|
|
|
57 |
}
|
|
|
58 |
|
|
|
59 |
/*
|
|
|
60 |
* @return This method will always return false
|
|
|
61 |
*
|
|
|
62 |
* @see java.util.Set#add(java.lang.Object)
|
|
|
63 |
*/
|
|
|
64 |
public boolean add(E o) {
|
|
|
65 |
final Random r = new Random(o.hashCode());
|
|
|
66 |
for (int x = 0; x < k; x++) {
|
|
|
67 |
bitSet.set(r.nextInt(bitArraySize), true);
|
|
|
68 |
}
|
|
|
69 |
return false;
|
|
|
70 |
}
|
|
|
71 |
|
|
|
72 |
/**
|
|
|
73 |
* @return This method will always return false
|
|
|
74 |
*/
|
|
|
75 |
public boolean addAll(Collection<? extends E> c) {
|
|
|
76 |
for (E o : c) {
|
|
|
77 |
add(o);
|
|
|
78 |
}
|
|
|
79 |
return false;
|
|
|
80 |
}
|
|
|
81 |
|
|
|
82 |
/**
|
|
|
83 |
* Clear the Bloom Filter
|
|
|
84 |
*/
|
|
|
85 |
public void clear() {
|
|
|
86 |
final int length = bitSet.length();
|
|
|
87 |
for (int x = 0; x < length; x++) {
|
|
|
88 |
bitSet.set(x, false);
|
|
|
89 |
}
|
|
|
90 |
}
|
|
|
91 |
|
|
|
92 |
/**
|
|
|
93 |
* @return False indicates that o was definitely not added to this Bloom Filter, true indicates
|
|
|
94 |
* that it probably was. The probability can be estimated using the
|
|
|
95 |
* expectedFalsePositiveProbability() method.
|
|
|
96 |
*/
|
|
|
97 |
public boolean contains(Object o) {
|
|
|
98 |
Random r = new Random(o.hashCode());
|
|
|
99 |
for (int x = 0; x < k; x++) {
|
|
|
100 |
if (!bitSet.get(r.nextInt(bitArraySize)))
|
|
|
101 |
return false;
|
|
|
102 |
}
|
|
|
103 |
return true;
|
|
|
104 |
}
|
|
|
105 |
|
|
|
106 |
public boolean containsAll(Collection<?> c) {
|
|
|
107 |
for (Object o : c) {
|
|
|
108 |
if (!contains(o))
|
|
|
109 |
return false;
|
|
|
110 |
}
|
|
|
111 |
return true;
|
|
|
112 |
}
|
|
|
113 |
|
|
|
114 |
public boolean isEmpty() {
|
|
|
115 |
throw new UnsupportedOperationException();
|
|
|
116 |
}
|
|
|
117 |
|
|
|
118 |
public Iterator<E> iterator() {
|
|
|
119 |
throw new UnsupportedOperationException();
|
|
|
120 |
}
|
|
|
121 |
|
|
|
122 |
public boolean remove(Object o) {
|
|
|
123 |
throw new UnsupportedOperationException();
|
|
|
124 |
}
|
|
|
125 |
|
|
|
126 |
public boolean removeAll(Collection<?> c) {
|
|
|
127 |
throw new UnsupportedOperationException();
|
|
|
128 |
}
|
|
|
129 |
|
|
|
130 |
public boolean retainAll(Collection<?> c) {
|
|
|
131 |
throw new UnsupportedOperationException();
|
|
|
132 |
}
|
|
|
133 |
|
|
|
134 |
public int size() {
|
|
|
135 |
throw new UnsupportedOperationException();
|
|
|
136 |
}
|
|
|
137 |
|
|
|
138 |
public Object[] toArray() {
|
|
|
139 |
throw new UnsupportedOperationException();
|
|
|
140 |
}
|
|
|
141 |
|
|
|
142 |
public <T> T[] toArray(T[] a) {
|
|
|
143 |
throw new UnsupportedOperationException();
|
|
|
144 |
}
|
|
|
145 |
|
|
|
146 |
public int getBitLength() {
|
|
|
147 |
return bitSet.length();
|
|
|
148 |
}
|
|
|
149 |
|
|
|
150 |
public static void main(String[] args) {
|
182 |
ilm |
151 |
BloomFilter<String> set = new BloomFilter<>(100, 5);
|
17 |
ilm |
152 |
|
|
|
153 |
// Add some things to the bloom filter
|
|
|
154 |
set.add("dog");
|
|
|
155 |
set.add("doq");
|
|
|
156 |
set.add("cat");
|
|
|
157 |
set.add("mouse");
|
|
|
158 |
set.add("dolphin");
|
|
|
159 |
|
|
|
160 |
// Test to see if the bloom filter remembers
|
|
|
161 |
String test = "dog";
|
|
|
162 |
if (set.contains(test)) {
|
|
|
163 |
System.out.println(test + " is in the set with probability " + (1 - set.expectedFalsePositiveProbability()));
|
|
|
164 |
} else {
|
|
|
165 |
System.out.println(test + " is definitely not in the set");
|
|
|
166 |
}
|
|
|
167 |
|
|
|
168 |
System.out.println("BloomFilter stored in " + set.getBitLength() / 8 + " bytes");
|
|
|
169 |
}
|
|
|
170 |
|
|
|
171 |
}
|