refactor: BloomFilter (#5325)

This commit is contained in:
Alex Klymenko 2024-08-15 10:30:53 +02:00 committed by GitHub
parent 777de1da99
commit 134b42c7ff
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 106 additions and 20 deletions

View File

@ -2,35 +2,61 @@ package com.thealgorithms.datastructures.bloomfilter;
import java.util.BitSet;
/**
* A generic BloomFilter implementation for probabilistic membership checking.
*
* @param <T> The type of elements to be stored in the Bloom filter.
*/
public class BloomFilter<T> {
private int numberOfHashFunctions;
private BitSet bitArray;
private Hash<T>[] hashFunctions;
private final int numberOfHashFunctions;
private final BitSet bitArray;
private final Hash<T>[] hashFunctions;
public BloomFilter(int numberOfHashFunctions, int n) {
/**
* Constructs a BloomFilter with a specified number of hash functions and bit array size.
*
* @param numberOfHashFunctions the number of hash functions to use
* @param bitArraySize the size of the bit array
*/
@SuppressWarnings("unchecked")
public BloomFilter(int numberOfHashFunctions, int bitArraySize) {
this.numberOfHashFunctions = numberOfHashFunctions;
hashFunctions = new Hash[numberOfHashFunctions];
bitArray = new BitSet(n);
insertHash();
this.bitArray = new BitSet(bitArraySize);
this.hashFunctions = new Hash[numberOfHashFunctions];
initializeHashFunctions();
}
private void insertHash() {
/**
* Initializes the hash functions with unique indices.
*/
private void initializeHashFunctions() {
for (int i = 0; i < numberOfHashFunctions; i++) {
hashFunctions[i] = new Hash(i);
hashFunctions[i] = new Hash<>(i);
}
}
/**
* Inserts an element into the Bloom filter.
*
* @param key the element to insert
*/
public void insert(T key) {
for (Hash<T> hash : hashFunctions) {
int position = hash.compute(key) % bitArray.size();
int position = Math.abs(hash.compute(key) % bitArray.size());
bitArray.set(position);
}
}
/**
* Checks if an element might be in the Bloom filter.
*
* @param key the element to check
* @return {@code true} if the element might be in the Bloom filter, {@code false} if it is definitely not
*/
public boolean contains(T key) {
for (Hash<T> hash : hashFunctions) {
int position = hash.compute(key) % bitArray.size();
int position = Math.abs(hash.compute(key) % bitArray.size());
if (!bitArray.get(position)) {
return false;
}
@ -38,24 +64,46 @@ public class BloomFilter<T> {
return true;
}
private class Hash<T> {
/**
* Inner class representing a hash function used by the Bloom filter.
*
* @param <T> The type of elements to be hashed.
*/
private static class Hash<T> {
int index;
private final int index;
/**
* Constructs a Hash function with a specified index.
*
* @param index the index of this hash function
*/
Hash(int index) {
this.index = index;
}
/**
* Computes the hash of the given key.
*
* @param key the element to hash
* @return the hash value
*/
public int compute(T key) {
return index * asciiString(String.valueOf(key));
}
/**
* Computes the ASCII value sum of the characters in a string.
*
* @param word the string to compute
* @return the sum of ASCII values of the characters
*/
private int asciiString(String word) {
int number = 0;
for (int i = 0; i < word.length(); i++) {
number += word.charAt(i);
int sum = 0;
for (char c : word.toCharArray()) {
sum += c;
}
return number;
return sum;
}
}
}

View File

@ -1,12 +1,19 @@
package com.thealgorithms.datastructures.bloomfilter;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
public class BloomFilterTest {
private BloomFilter<String> bloomFilter;
@BeforeEach
void setUp() {
bloomFilter = new BloomFilter<>(3, 100);
}
@Test
public void test1() {
public void testIntegerContains() {
BloomFilter<Integer> bloomFilter = new BloomFilter<>(3, 10);
bloomFilter.insert(3);
bloomFilter.insert(17);
@ -16,12 +23,43 @@ public class BloomFilterTest {
}
@Test
public void test2() {
BloomFilter<String> bloomFilter = new BloomFilter<>(4, 20);
public void testStringContains() {
bloomFilter.insert("omar");
bloomFilter.insert("mahamid");
Assertions.assertTrue(bloomFilter.contains("omar"));
Assertions.assertTrue(bloomFilter.contains("mahamid"));
}
@Test
void testInsertAndContains() {
bloomFilter.insert("hello");
bloomFilter.insert("world");
Assertions.assertTrue(bloomFilter.contains("hello"));
Assertions.assertTrue(bloomFilter.contains("world"));
Assertions.assertFalse(bloomFilter.contains("java"));
}
@Test
void testFalsePositive() {
bloomFilter.insert("apple");
bloomFilter.insert("banana");
Assertions.assertFalse(bloomFilter.contains("grape"));
Assertions.assertFalse(bloomFilter.contains("orange"));
}
@Test
void testMultipleInsertions() {
for (int i = 0; i < 100; i++) {
bloomFilter.insert("key" + i);
}
for (int i = 0; i < 100; i++) {
Assertions.assertTrue(bloomFilter.contains("key" + i));
}
Assertions.assertFalse(bloomFilter.contains("key" + 200));
}
}