Skip to content
This repository has been archived by the owner. It is now read-only.
Permalink
Browse files
Fixes #81 HyperLogLog.Builder improvements
We cannot make the constructors for the Builder match HyperLogLog
without risking behavior changes downstream, but we also have no
default constructor for either (and adding one could be contentious).

Therefore I have added static 'with' methods. This somewhat goes
against the expected builder pattern, but it's not as though
HyperLogLog.Builder was really following it anyway (there is only
one setting so it is closer to just being a supplier).

Additionally, it would have been a little silly to also implement
non-static 'with' methods since the code would look exactly the
same for both.

Lastly, the builder now performs eager validation of rsd/log2m.
This leads to extra validation calls, but I can live with that.
  • Loading branch information
tea-dragon committed Sep 11, 2014
1 parent f6cad60 commit 06bdb3f569a7fac50d5e1801359324e16929c270
Showing with 33 additions and 8 deletions.
  1. +33 −8 src/main/java/com/clearspring/analytics/stream/cardinality/HyperLogLog.java
@@ -97,6 +97,13 @@ private static int log2m(double rsd) {
return (int) (Math.log((1.106 / rsd) * (1.106 / rsd)) / Math.log(2));
}

private static void validateLog2m(int log2m) {
if (log2m < 0 || log2m > 30) {
throw new IllegalArgumentException("log2m argument is "
+ log2m + " and is outside the range [0, 30]");
}
}

/**
* Create a new HyperLogLog instance. The log2m parameter defines the accuracy of
* the counter. The larger the log2m the better the accuracy.
@@ -117,10 +124,7 @@ public HyperLogLog(int log2m) {
*/
@Deprecated
public HyperLogLog(int log2m, RegisterSet registerSet) {
if (log2m < 0 || log2m > 30) {
throw new IllegalArgumentException("log2m argument is "
+ log2m + " and is outside the range [0, 30]");
}
validateLog2m(log2m);
this.registerSet = registerSet;
this.log2m = log2m;
int m = 1 << this.log2m;
@@ -286,24 +290,45 @@ private Object readResolve() {

public static class Builder implements IBuilder<ICardinality>, Serializable {

private double rsd;
private final int log2m;

/**
* Uses the given RSD percentage to determine how many bytes the constructed HyperLogLog will use.
*
* @deprecated Use {@link #withRsd(double)} instead. This builder's constructors did not match the (already
* themselves ambiguous) constructors of the HyperLogLog class, but there is no way to make them match without
* risking behavior changes downstream.
*/
@Deprecated
public Builder(double rsd) {
this.rsd = rsd;
this(log2m(rsd));
}

/** This constructor is private to prevent behavior change for ambiguous usages. (Legacy support). */
private Builder(int log2m) {
validateLog2m(log2m);
this.log2m = log2m;
}

@Override
public HyperLogLog build() {
return new HyperLogLog(rsd);
return new HyperLogLog(log2m);
}

@Override
public int sizeof() {
int log2m = log2m(rsd);
int k = 1 << log2m;
return RegisterSet.getBits(k) * 4;
}

public static Builder withLog2m(int log2m) {
return new Builder(log2m);
}

public static Builder withRsd(double rsd) {
return new Builder(rsd);
}

public static HyperLogLog build(byte[] bytes) throws IOException {
ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
return build(new DataInputStream(bais));

0 comments on commit 06bdb3f

Please sign in to comment.