User-defined switch point between sparse-dense HLL encodings.

This commit is contained in:
antirez 2014-04-15 17:46:51 +02:00
parent d541f65d66
commit 402110f9fd
5 changed files with 32 additions and 6 deletions

View File

@ -682,6 +682,20 @@ set-max-intset-entries 512
zset-max-ziplist-entries 128
zset-max-ziplist-value 64
# HyperLogLog sparse representation bytes limit. The limit includes the
# 16 bytes header. When an HyperLogLog using the sparse representation crosses
# this limit, it is convereted into the dense representation.
#
# A value greater than 16000 is totally useless, since at that point the
# dense representation is more memory efficient.
#
# The suggested value is ~ 3000 in order to have the benefits of
# the space efficient encoding without slowing down too much PFADD,
# which is O(N) with the sparse encoding. Thev value can be raised to
# ~ 10000 when CPU is not a concern, but space is, and the data set is
# composed of many HyperLogLogs with cardinality in the 0 - 15000 range.
hll-sparse-max-bytes 3000
# Active rehashing uses 1 millisecond every 100 milliseconds of CPU time in
# order to help rehashing the main Redis hash table (the one mapping top-level
# keys to values). The hash table implementation Redis uses (see dict.c)

View File

@ -391,6 +391,8 @@ void loadServerConfigFromString(char *config) {
server.zset_max_ziplist_entries = memtoll(argv[1], NULL);
} else if (!strcasecmp(argv[0],"zset-max-ziplist-value") && argc == 2) {
server.zset_max_ziplist_value = memtoll(argv[1], NULL);
} else if (!strcasecmp(argv[0],"hll-sparse-max-bytes") && argc == 2) {
server.hll_sparse_max_bytes = memtoll(argv[1], NULL);
} else if (!strcasecmp(argv[0],"rename-command") && argc == 3) {
struct redisCommand *cmd = lookupCommand(argv[1]);
int retval;
@ -765,6 +767,9 @@ void configSetCommand(redisClient *c) {
} else if (!strcasecmp(c->argv[2]->ptr,"zset-max-ziplist-value")) {
if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
server.zset_max_ziplist_value = ll;
} else if (!strcasecmp(c->argv[2]->ptr,"hll-sparse-max-bytes")) {
if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
server.hll_sparse_max_bytes = ll;
} else if (!strcasecmp(c->argv[2]->ptr,"lua-time-limit")) {
if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
server.lua_time_limit = ll;
@ -974,6 +979,8 @@ void configGetCommand(redisClient *c) {
server.zset_max_ziplist_entries);
config_get_numerical_field("zset-max-ziplist-value",
server.zset_max_ziplist_value);
config_get_numerical_field("hll-sparse-max-bytes",
server.hll_sparse_max_bytes);
config_get_numerical_field("lua-time-limit",server.lua_time_limit);
config_get_numerical_field("slowlog-log-slower-than",
server.slowlog_log_slower_than);
@ -1773,6 +1780,7 @@ int rewriteConfig(char *path) {
rewriteConfigNumericalOption(state,"set-max-intset-entries",server.set_max_intset_entries,REDIS_SET_MAX_INTSET_ENTRIES);
rewriteConfigNumericalOption(state,"zset-max-ziplist-entries",server.zset_max_ziplist_entries,REDIS_ZSET_MAX_ZIPLIST_ENTRIES);
rewriteConfigNumericalOption(state,"zset-max-ziplist-value",server.zset_max_ziplist_value,REDIS_ZSET_MAX_ZIPLIST_VALUE);
rewriteConfigNumericalOption(state,"hll-sparse-max-bytes",server.hll_sparse_max_bytes,REDIS_DEFAULT_HLL_SPARSE_MAX_BYTES);
rewriteConfigYesNoOption(state,"activerehashing",server.activerehashing,REDIS_DEFAULT_ACTIVE_REHASHING);
rewriteConfigClientoutputbufferlimitOption(state);
rewriteConfigNumericalOption(state,"hz",server.hz,REDIS_DEFAULT_HZ);

View File

@ -176,7 +176,7 @@
* involved in updating the sparse representation is not justified by the
* memory savings. The exact maximum length of the sparse representation
* when this implementation switches to the dense representation is
* configured via the define HLL_SPARSE_MAX.
* configured via the define server.hll_sparse_max_bytes.
*/
struct hllhdr {
@ -202,8 +202,6 @@ struct hllhdr {
#define HLL_SPARSE 1 /* Sparse encoding */
#define HLL_MAX_ENCODING 1
#define HLL_SPARSE_MAX 3000
static char *invalid_hll_err = "Corrupted HLL object detected";
/* =========================== Low level bit macros ========================= */
@ -634,7 +632,7 @@ int hllSparseToDense(robj *o) {
* As a side effect the function may promote the HLL representation from
* sparse to dense: this happens when a register requires to be set to a value
* not representable with the sparse representation, or when the resulting
* size would be greater than HLL_SPARSE_MAX. */
* size would be greater than server.hll_sparse_max_bytes. */
int hllSparseAdd(robj *o, unsigned char *ele, size_t elesize) {
struct hllhdr *hdr;
uint8_t oldcount, count, *sparse, *end, *p, *prev, *next;
@ -815,7 +813,8 @@ int hllSparseAdd(robj *o, unsigned char *ele, size_t elesize) {
int oldlen = is_xzero ? 2 : 1;
int deltalen = seqlen-oldlen;
if (deltalen > 0 && sdslen(o->ptr)+deltalen > HLL_SPARSE_MAX) goto promote;
if (deltalen > 0 &&
sdslen(o->ptr)+deltalen > server.hll_sparse_max_bytes) goto promote;
if (deltalen && next) memmove(next+deltalen,next,end-next);
sdsIncrLen(o->ptr,deltalen);
memcpy(p,seq,seqlen);
@ -1312,7 +1311,7 @@ void pfselftestCommand(redisClient *c) {
/* Make sure that for small cardinalities we use sparse
* encoding. */
if (j == checkpoint && j < HLL_SPARSE_MAX/2) {
if (j == checkpoint && j < server.hll_sparse_max_bytes/2) {
hdr2 = o->ptr;
if (hdr2->encoding != HLL_SPARSE) {
addReplyError(c, "TESTFAILED sparse encoding not used");

View File

@ -1421,6 +1421,7 @@ void initServerConfig() {
server.set_max_intset_entries = REDIS_SET_MAX_INTSET_ENTRIES;
server.zset_max_ziplist_entries = REDIS_ZSET_MAX_ZIPLIST_ENTRIES;
server.zset_max_ziplist_value = REDIS_ZSET_MAX_ZIPLIST_VALUE;
server.hll_sparse_max_bytes = REDIS_DEFAULT_HLL_SPARSE_MAX_BYTES;
server.shutdown_asap = 0;
server.repl_ping_slave_period = REDIS_REPL_PING_SLAVE_PERIOD;
server.repl_timeout = REDIS_REPL_TIMEOUT;

View File

@ -312,6 +312,9 @@
#define REDIS_ZSET_MAX_ZIPLIST_ENTRIES 128
#define REDIS_ZSET_MAX_ZIPLIST_VALUE 64
/* HyperLogLog defines */
#define REDIS_DEFAULT_HLL_SPARSE_MAX_BYTES 3000
/* Sets operations codes */
#define REDIS_OP_UNION 0
#define REDIS_OP_DIFF 1
@ -809,6 +812,7 @@ struct redisServer {
size_t set_max_intset_entries;
size_t zset_max_ziplist_entries;
size_t zset_max_ziplist_value;
size_t hll_sparse_max_bytes;
time_t unixtime; /* Unix time sampled every cron cycle. */
long long mstime; /* Like 'unixtime' but with milliseconds resolution. */
/* Pubsub */