Adjustments for active defrag defaults and tuning

Reduce default minimum effort, so that when fragmentation is just detected,
the impact on the latency will be minor.

Reduce the default maximum effort, mainly to prevent a case were a sudden
massive deletions, won't trigger an aggressive defrag that will cause latency.

When activedefrag is disabled mid-run, reset the 'running' info field, and
clear the scan cursor, so that when it'll be re-enabled, a new fresh scan will
start.

Clearing the 'running' variable is important since lowering the defragger
tunables mid-scan won't help, the defragger only considers new threshold when
a new scan starts, and during a scan it can only become more aggressive,
(when more severe fragmentation is detected), it'll never go less aggressive.
So by temporarily disabling activedefrag, one can lower th the tunables.

Removing the experimantal warning.
This commit is contained in:
Oran Agra 2019-11-10 09:38:50 +02:00
parent 0f026af185
commit 0bc3dab095
4 changed files with 41 additions and 27 deletions

View File

@ -1606,10 +1606,6 @@ rdb-save-incremental-fsync yes
########################### ACTIVE DEFRAGMENTATION ####################### ########################### ACTIVE DEFRAGMENTATION #######################
# #
# WARNING THIS FEATURE IS EXPERIMENTAL. However it was stress tested
# even in production and manually tested by multiple engineers for some
# time.
#
# What is active defragmentation? # What is active defragmentation?
# ------------------------------- # -------------------------------
# #
@ -1649,7 +1645,7 @@ rdb-save-incremental-fsync yes
# a good idea to leave the defaults untouched. # a good idea to leave the defaults untouched.
# Enabled active defragmentation # Enabled active defragmentation
# activedefrag yes # activedefrag no
# Minimum amount of fragmentation waste to start active defrag # Minimum amount of fragmentation waste to start active defrag
# active-defrag-ignore-bytes 100mb # active-defrag-ignore-bytes 100mb
@ -1660,11 +1656,13 @@ rdb-save-incremental-fsync yes
# Maximum percentage of fragmentation at which we use maximum effort # Maximum percentage of fragmentation at which we use maximum effort
# active-defrag-threshold-upper 100 # active-defrag-threshold-upper 100
# Minimal effort for defrag in CPU percentage # Minimal effort for defrag in CPU percentage, to be used when the lower
# active-defrag-cycle-min 5 # threshold is reached
# active-defrag-cycle-min 1
# Maximal effort for defrag in CPU percentage # Maximal effort for defrag in CPU percentage, to be used when the upper
# active-defrag-cycle-max 75 # threshold is reached
# active-defrag-cycle-max 25
# Maximum number of set/hash/zset/list fields that will be processed from # Maximum number of set/hash/zset/list fields that will be processed from
# the main dictionary scan # the main dictionary scan

View File

@ -919,10 +919,12 @@ int defragLaterItem(dictEntry *de, unsigned long *cursor, long long endtime) {
return 0; return 0;
} }
/* static variables serving defragLaterStep to continue scanning a key from were we stopped last time. */
static sds defrag_later_current_key = NULL;
static unsigned long defrag_later_cursor = 0;
/* returns 0 if no more work needs to be been done, and 1 if time is up and more work is needed. */ /* returns 0 if no more work needs to be been done, and 1 if time is up and more work is needed. */
int defragLaterStep(redisDb *db, long long endtime) { int defragLaterStep(redisDb *db, long long endtime) {
static sds current_key = NULL;
static unsigned long cursor = 0;
unsigned int iterations = 0; unsigned int iterations = 0;
unsigned long long prev_defragged = server.stat_active_defrag_hits; unsigned long long prev_defragged = server.stat_active_defrag_hits;
unsigned long long prev_scanned = server.stat_active_defrag_scanned; unsigned long long prev_scanned = server.stat_active_defrag_scanned;
@ -930,16 +932,15 @@ int defragLaterStep(redisDb *db, long long endtime) {
do { do {
/* if we're not continuing a scan from the last call or loop, start a new one */ /* if we're not continuing a scan from the last call or loop, start a new one */
if (!cursor) { if (!defrag_later_cursor) {
listNode *head = listFirst(db->defrag_later); listNode *head = listFirst(db->defrag_later);
/* Move on to next key */ /* Move on to next key */
if (current_key) { if (defrag_later_current_key) {
serverAssert(current_key == head->value); serverAssert(defrag_later_current_key == head->value);
sdsfree(head->value);
listDelNode(db->defrag_later, head); listDelNode(db->defrag_later, head);
cursor = 0; defrag_later_cursor = 0;
current_key = NULL; defrag_later_current_key = NULL;
} }
/* stop if we reached the last one. */ /* stop if we reached the last one. */
@ -948,21 +949,21 @@ int defragLaterStep(redisDb *db, long long endtime) {
return 0; return 0;
/* start a new key */ /* start a new key */
current_key = head->value; defrag_later_current_key = head->value;
cursor = 0; defrag_later_cursor = 0;
} }
/* each time we enter this function we need to fetch the key from the dict again (if it still exists) */ /* each time we enter this function we need to fetch the key from the dict again (if it still exists) */
dictEntry *de = dictFind(db->dict, current_key); dictEntry *de = dictFind(db->dict, defrag_later_current_key);
key_defragged = server.stat_active_defrag_hits; key_defragged = server.stat_active_defrag_hits;
do { do {
int quit = 0; int quit = 0;
if (defragLaterItem(de, &cursor, endtime)) if (defragLaterItem(de, &defrag_later_cursor, endtime))
quit = 1; /* time is up, we didn't finish all the work */ quit = 1; /* time is up, we didn't finish all the work */
/* Don't start a new BIG key in this loop, this is because the /* Don't start a new BIG key in this loop, this is because the
* next key can be a list, and scanLaterList must be done in once cycle */ * next key can be a list, and scanLaterList must be done in once cycle */
if (!cursor) if (!defrag_later_cursor)
quit = 1; quit = 1;
/* Once in 16 scan iterations, 512 pointer reallocations, or 64 fields /* Once in 16 scan iterations, 512 pointer reallocations, or 64 fields
@ -982,7 +983,7 @@ int defragLaterStep(redisDb *db, long long endtime) {
prev_defragged = server.stat_active_defrag_hits; prev_defragged = server.stat_active_defrag_hits;
prev_scanned = server.stat_active_defrag_scanned; prev_scanned = server.stat_active_defrag_scanned;
} }
} while(cursor); } while(defrag_later_cursor);
if(key_defragged != server.stat_active_defrag_hits) if(key_defragged != server.stat_active_defrag_hits)
server.stat_active_defrag_key_hits++; server.stat_active_defrag_key_hits++;
else else
@ -1039,6 +1040,21 @@ void activeDefragCycle(void) {
mstime_t latency; mstime_t latency;
int quit = 0; int quit = 0;
if (!server.active_defrag_enabled) {
if (server.active_defrag_running) {
/* if active defrag was disabled mid-run, start from fresh next time. */
server.active_defrag_running = 0;
if (db)
listEmpty(db->defrag_later);
defrag_later_current_key = NULL;
defrag_later_cursor = 0;
current_db = -1;
cursor = 0;
db = NULL;
}
return;
}
if (hasActiveChildProcess()) if (hasActiveChildProcess())
return; /* Defragging memory while there's a fork will just do damage. */ return; /* Defragging memory while there's a fork will just do damage. */

View File

@ -1691,8 +1691,7 @@ void databasesCron(void) {
} }
/* Defrag keys gradually. */ /* Defrag keys gradually. */
if (server.active_defrag_enabled) activeDefragCycle();
activeDefragCycle();
/* Perform hash tables rehashing if needed, but only if there are no /* Perform hash tables rehashing if needed, but only if there are no
* other processes saving the DB on disk. Otherwise rehashing is bad * other processes saving the DB on disk. Otherwise rehashing is bad
@ -2854,6 +2853,7 @@ void initServer(void) {
server.db[j].id = j; server.db[j].id = j;
server.db[j].avg_ttl = 0; server.db[j].avg_ttl = 0;
server.db[j].defrag_later = listCreate(); server.db[j].defrag_later = listCreate();
listSetFreeMethod(server.db[j].defrag_later,(void (*)(void*))sdsfree);
} }
evictionPoolAlloc(); /* Initialize the LRU keys pool. */ evictionPoolAlloc(); /* Initialize the LRU keys pool. */
server.pubsub_channels = dictCreate(&keylistDictType,NULL); server.pubsub_channels = dictCreate(&keylistDictType,NULL);

View File

@ -174,8 +174,8 @@ typedef long long ustime_t; /* microsecond time type. */
#define CONFIG_DEFAULT_DEFRAG_THRESHOLD_LOWER 10 /* don't defrag when fragmentation is below 10% */ #define CONFIG_DEFAULT_DEFRAG_THRESHOLD_LOWER 10 /* don't defrag when fragmentation is below 10% */
#define CONFIG_DEFAULT_DEFRAG_THRESHOLD_UPPER 100 /* maximum defrag force at 100% fragmentation */ #define CONFIG_DEFAULT_DEFRAG_THRESHOLD_UPPER 100 /* maximum defrag force at 100% fragmentation */
#define CONFIG_DEFAULT_DEFRAG_IGNORE_BYTES (100<<20) /* don't defrag if frag overhead is below 100mb */ #define CONFIG_DEFAULT_DEFRAG_IGNORE_BYTES (100<<20) /* don't defrag if frag overhead is below 100mb */
#define CONFIG_DEFAULT_DEFRAG_CYCLE_MIN 5 /* 5% CPU min (at lower threshold) */ #define CONFIG_DEFAULT_DEFRAG_CYCLE_MIN 1 /* 1% CPU min (at lower threshold) */
#define CONFIG_DEFAULT_DEFRAG_CYCLE_MAX 75 /* 75% CPU max (at upper threshold) */ #define CONFIG_DEFAULT_DEFRAG_CYCLE_MAX 25 /* 25% CPU max (at upper threshold) */
#define CONFIG_DEFAULT_DEFRAG_MAX_SCAN_FIELDS 1000 /* keys with more than 1000 fields will be processed separately */ #define CONFIG_DEFAULT_DEFRAG_MAX_SCAN_FIELDS 1000 /* keys with more than 1000 fields will be processed separately */
#define CONFIG_DEFAULT_PROTO_MAX_BULK_LEN (512ll*1024*1024) /* Bulk request max size */ #define CONFIG_DEFAULT_PROTO_MAX_BULK_LEN (512ll*1024*1024) /* Bulk request max size */
#define CONFIG_DEFAULT_TRACKING_TABLE_MAX_FILL 10 /* 10% tracking table max fill. */ #define CONFIG_DEFAULT_TRACKING_TABLE_MAX_FILL 10 /* 10% tracking table max fill. */