From 90b717e72340081ee87f0b85b4ef00b2a5bd2bf2 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Wed, 29 Jul 2020 17:05:14 +0300 Subject: [PATCH] Assertion and panic, print crash log without generating SIGSEGV This makes it possible to add tests that generate assertions, and run them with valgrind, making sure that there are no memory violations prior to the assertion. New config options: - crash-log-enabled - can be disabled for cleaner core dumps - crash-memcheck-enabled - useful for faster termination after a crash - use-exit-on-panic - to be used by the test suite so that valgrind can detect leaks and memory corruptions Other changes: - Crash log is printed even on system that dont HAVE_BACKTRACE, i.e. in both SIGSEGV and assert / panic - Assertion and panic won't print registers and code around EIP (which was useless), but will do fast memory test (which may still indicate that the assertion was due to memory corrpution) I had to reshuffle code in order to re-use it, so i extracted come code into function without actually doing any changes to the code: - logServerInfo - logModulesInfo - doFastMemoryTest (with the exception of it being conditional) - dumpCodeAroundEIP changes to the crash report on segfault: - logRegisters is called right after the stack trace (before info) done just in order to have more re-usable code - stack trace skips the first two items on the stack (the crash log and signal handler functions) --- redis.conf | 10 ++ src/config.c | 14 ++- src/debug.c | 261 ++++++++++++++++++++++++++++++++------------------- src/server.c | 21 +++-- src/server.h | 11 ++- 5 files changed, 205 insertions(+), 112 deletions(-) diff --git a/redis.conf b/redis.conf index d4e3e47f..8440b475 100644 --- a/redis.conf +++ b/redis.conf @@ -269,6 +269,16 @@ logfile "" # Specify the syslog facility. Must be USER or between LOCAL0-LOCAL7. # syslog-facility local0 +# To disable the built in crash log, which will possibly produce cleaner core +# dumps when they are needed, uncomment the following: +# +# crash-log-enabled no + +# To disable the fast memory check that's run as part of the crash log, which +# will possibly let redis terminate sooner, uncomment the following: +# +# crash-memcheck-enabled no + # Set the number of databases. The default database is DB 0, you can select # a different one on a per-connection basis using SELECT where # dbid is a number between 0 and 'databases'-1 diff --git a/src/config.c b/src/config.c index a68fddf1..52a859a7 100644 --- a/src/config.c +++ b/src/config.c @@ -2043,6 +2043,16 @@ static int updateAppendonly(int val, int prev, char **err) { return 1; } +static int updateSighandlerEnabled(int val, int prev, char **err) { + UNUSED(err); + UNUSED(prev); + if (val) + setupSignalHandlers(); + else + removeSignalHandlers(); + return 1; +} + static int updateMaxclients(long long val, long long prev, char **err) { /* Try to check if the OS is capable of supporting so many FDs. */ if (val > prev) { @@ -2132,7 +2142,9 @@ standardConfig configs[] = { createBoolConfig("cluster-enabled", NULL, IMMUTABLE_CONFIG, server.cluster_enabled, 0, NULL, NULL), createBoolConfig("appendonly", NULL, MODIFIABLE_CONFIG, server.aof_enabled, 0, NULL, updateAppendonly), createBoolConfig("cluster-allow-reads-when-down", NULL, MODIFIABLE_CONFIG, server.cluster_allow_reads_when_down, 0, NULL, NULL), - + createBoolConfig("crash-log-enabled", NULL, MODIFIABLE_CONFIG, server.crashlog_enabled, 1, NULL, updateSighandlerEnabled), + createBoolConfig("crash-memcheck-enabled", NULL, MODIFIABLE_CONFIG, server.memcheck_enabled, 1, NULL, NULL), + createBoolConfig("use-exit-on-panic", NULL, MODIFIABLE_CONFIG, server.use_exit_on_panic, 0, NULL, NULL), /* String Configs */ createStringConfig("aclfile", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, server.acl_filename, "", NULL, NULL), diff --git a/src/debug.c b/src/debug.c index 0bea6987..3b40e685 100644 --- a/src/debug.c +++ b/src/debug.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2009-2012, Salvatore Sanfilippo + * Copyright (c) 2009-2020, Salvatore Sanfilippo + * Copyright (c) 2020, Redis Labs, Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -30,10 +31,13 @@ #include "server.h" #include "sha1.h" /* SHA1 is used for DEBUG DIGEST */ #include "crc64.h" +#include "bio.h" #include #include #include +#include +#include #ifdef HAVE_BACKTRACE #include @@ -42,9 +46,6 @@ #else typedef ucontext_t sigcontext_t; #endif -#include -#include "bio.h" -#include #endif /* HAVE_BACKTRACE */ #ifdef __CYGWIN__ @@ -53,6 +54,15 @@ typedef ucontext_t sigcontext_t; #endif #endif +/* Globals */ +static int bug_report_start = 0; /* True if bug report header was already logged. */ + +/* Forward declarations */ +void bugReportStart(void); +void printCrashReport(void); +void bugReportEnd(int killViaSignal, int sig); +void logStackTrace(void *eip, int uplevel); + /* ================================= Debugging ============================== */ /* Compute the sha1 of string at 's' with 'len' bytes long. @@ -814,13 +824,14 @@ void _serverAssert(const char *estr, const char *file, int line) { bugReportStart(); serverLog(LL_WARNING,"=== ASSERTION FAILED ==="); serverLog(LL_WARNING,"==> %s:%d '%s' is not true",file,line,estr); + + if (server.crashlog_enabled) { #ifdef HAVE_BACKTRACE - server.assert_failed = estr; - server.assert_file = file; - server.assert_line = line; - serverLog(LL_WARNING,"(forcing SIGSEGV to print the bug report.)"); + logStackTrace(NULL, 1); #endif - *((char*)-1) = 'x'; + printCrashReport(); + } + bugReportEnd(0, 0); } void _serverAssertPrintClientInfo(const client *c) { @@ -897,18 +908,21 @@ void _serverPanic(const char *file, int line, const char *msg, ...) { serverLog(LL_WARNING,"------------------------------------------------"); serverLog(LL_WARNING,"!!! Software Failure. Press left mouse button to continue"); serverLog(LL_WARNING,"Guru Meditation: %s #%s:%d",fmtmsg,file,line); + + if (server.crashlog_enabled) { #ifdef HAVE_BACKTRACE - serverLog(LL_WARNING,"(forcing SIGSEGV in order to print the stack trace)"); + logStackTrace(NULL, 1); #endif - serverLog(LL_WARNING,"------------------------------------------------"); - *((char*)-1) = 'x'; + printCrashReport(); + } + bugReportEnd(0, 0); } void bugReportStart(void) { - if (server.bug_report_start == 0) { + if (bug_report_start == 0) { serverLogRaw(LL_WARNING|LL_RAW, "\n\n=== REDIS BUG REPORT START: Cut & paste starting from here ===\n"); - server.bug_report_start = 1; + bug_report_start = 1; } } @@ -980,6 +994,7 @@ void logStackContent(void **sp) { } } +/* Log dump of processor registers */ void logRegisters(ucontext_t *uc) { serverLog(LL_WARNING|LL_RAW, "\n------ REGISTERS ------\n"); @@ -1352,6 +1367,8 @@ void logRegisters(ucontext_t *uc) { #endif } +#endif /* HAVE_BACKTRACE */ + /* Return a file descriptor to write directly to the Redis log with the * write(2) syscall, that can be used in critical sections of the code * where the rest of Redis can't be trusted (for example during the memory @@ -1372,33 +1389,66 @@ void closeDirectLogFiledes(int fd) { if (!log_to_stdout) close(fd); } +#ifdef HAVE_BACKTRACE + /* Logs the stack trace using the backtrace() call. This function is designed - * to be called from signal handlers safely. */ -void logStackTrace(ucontext_t *uc) { - void *trace[101]; + * to be called from signal handlers safely. + * The eip argument is optional (can take NULL). + * The uplevel argument indicates how many of the calling functions to skip. + */ +void logStackTrace(void *eip, int uplevel) { + void *trace[100]; int trace_size = 0, fd = openDirectLogFiledes(); + char *msg; + uplevel++; /* skip this function */ if (fd == -1) return; /* If we can't log there is anything to do. */ - /* Generate the stack trace */ - trace_size = backtrace(trace+1, 100); + /* Get the stack trace first! */ + trace_size = backtrace(trace, 100); - if (getMcontextEip(uc) != NULL) { - char *msg1 = "EIP:\n"; - char *msg2 = "\nBacktrace:\n"; - if (write(fd,msg1,strlen(msg1)) == -1) {/* Avoid warning. */}; - trace[0] = getMcontextEip(uc); - backtrace_symbols_fd(trace, 1, fd); - if (write(fd,msg2,strlen(msg2)) == -1) {/* Avoid warning. */}; + msg = "\n------ STACK TRACE ------\n"; + if (write(fd,msg,strlen(msg)) == -1) {/* Avoid warning. */}; + + if (eip) { + /* Write EIP to the log file*/ + msg = "EIP:\n"; + if (write(fd,msg,strlen(msg)) == -1) {/* Avoid warning. */}; + backtrace_symbols_fd(&eip, 1, fd); } /* Write symbols to log file */ - backtrace_symbols_fd(trace+1, trace_size, fd); + msg = "\nBacktrace:\n"; + if (write(fd,msg,strlen(msg)) == -1) {/* Avoid warning. */}; + backtrace_symbols_fd(trace+uplevel, trace_size-uplevel, fd); /* Cleanup */ closeDirectLogFiledes(fd); } +#endif /* HAVE_BACKTRACE */ + +/* Log global server info */ +void logServerInfo(void) { + sds infostring, clients; + serverLogRaw(LL_WARNING|LL_RAW, "\n------ INFO OUTPUT ------\n"); + infostring = genRedisInfoString("all"); + serverLogRaw(LL_WARNING|LL_RAW, infostring); + serverLogRaw(LL_WARNING|LL_RAW, "\n------ CLIENT LIST OUTPUT ------\n"); + clients = getAllClientsInfoString(-1); + serverLogRaw(LL_WARNING|LL_RAW, clients); + sdsfree(infostring); + sdsfree(clients); +} + +/* Log modules info. Something we wanna do last since we fear it may crash. */ +void logModulesInfo(void) { + serverLogRaw(LL_WARNING|LL_RAW, "\n------ MODULES INFO OUTPUT ------\n"); + sds infostring = modulesCollectInfo(sdsempty(), NULL, 1, 0); + serverLogRaw(LL_WARNING|LL_RAW, infostring); + sdsfree(infostring); +} + /* Log information about the "current" client, that is, the client that is * currently being served by Redis. May be NULL if Redis is not serving a * client right now. */ @@ -1505,6 +1555,23 @@ int memtest_test_linux_anonymous_maps(void) { } #endif +void doFastMemoryTest(void) { +#if defined(HAVE_PROC_MAPS) + if (server.memcheck_enabled) { + /* Test memory */ + serverLogRaw(LL_WARNING|LL_RAW, "\n------ FAST MEMORY TEST ------\n"); + bioKillThreads(); + if (memtest_test_linux_anonymous_maps()) { + serverLogRaw(LL_WARNING|LL_RAW, + "!!! MEMORY ERROR DETECTED! Check your memory ASAP !!!\n"); + } else { + serverLogRaw(LL_WARNING|LL_RAW, + "Fast memory test PASSED, however your memory can still be broken. Please run a memory test for several hours if possible.\n"); + } + } +#endif +} + /* Scans the (assumed) x86 code starting at addr, for a max of `len` * bytes, searching for E8 (callq) opcodes, and dumping the symbols * and the call offset if they appear to be valid. */ @@ -1531,95 +1598,86 @@ void dumpX86Calls(void *addr, size_t len) { } } +void dumpCodeAroundEIP(void *eip) { + Dl_info info; + if (dladdr(eip, &info) != 0) { + serverLog(LL_WARNING|LL_RAW, + "\n------ DUMPING CODE AROUND EIP ------\n" + "Symbol: %s (base: %p)\n" + "Module: %s (base %p)\n" + "$ xxd -r -p /tmp/dump.hex /tmp/dump.bin\n" + "$ objdump --adjust-vma=%p -D -b binary -m i386:x86-64 /tmp/dump.bin\n" + "------\n", + info.dli_sname, info.dli_saddr, info.dli_fname, info.dli_fbase, + info.dli_saddr); + size_t len = (long)eip - (long)info.dli_saddr; + unsigned long sz = sysconf(_SC_PAGESIZE); + if (len < 1<<13) { /* we don't have functions over 8k (verified) */ + /* Find the address of the next page, which is our "safety" + * limit when dumping. Then try to dump just 128 bytes more + * than EIP if there is room, or stop sooner. */ + unsigned long next = ((unsigned long)eip + sz) & ~(sz-1); + unsigned long end = (unsigned long)eip + 128; + if (end > next) end = next; + len = end - (unsigned long)info.dli_saddr; + serverLogHexDump(LL_WARNING, "dump of function", + info.dli_saddr ,len); + dumpX86Calls(info.dli_saddr,len); + } + } +} + void sigsegvHandler(int sig, siginfo_t *info, void *secret) { - ucontext_t *uc = (ucontext_t*) secret; - void *eip = getMcontextEip(uc); - sds infostring, clients; - struct sigaction act; + UNUSED(secret); UNUSED(info); bugReportStart(); serverLog(LL_WARNING, "Redis %s crashed by signal: %d", REDIS_VERSION, sig); - if (eip != NULL) { - serverLog(LL_WARNING, - "Crashed running the instruction at: %p", eip); - } if (sig == SIGSEGV || sig == SIGBUS) { serverLog(LL_WARNING, "Accessing address: %p", (void*)info->si_addr); } - serverLog(LL_WARNING, - "Failed assertion: %s (%s:%d)", server.assert_failed, - server.assert_file, server.assert_line); - /* Log the stack trace */ - serverLogRaw(LL_WARNING|LL_RAW, "\n------ STACK TRACE ------\n"); - logStackTrace(uc); +#ifdef HAVE_BACKTRACE + ucontext_t *uc = (ucontext_t*) secret; + void *eip = getMcontextEip(uc); + if (eip != NULL) { + serverLog(LL_WARNING, + "Crashed running the instruction at: %p", eip); + } + logStackTrace(getMcontextEip(uc), 1); + + logRegisters(uc); +#endif + + printCrashReport(); + +#ifdef HAVE_BACKTRACE + if (eip != NULL) + dumpCodeAroundEIP(eip); +#endif + + bugReportEnd(1, sig); +} + +void printCrashReport(void) { /* Log INFO and CLIENT LIST */ - serverLogRaw(LL_WARNING|LL_RAW, "\n------ INFO OUTPUT ------\n"); - infostring = genRedisInfoString("all"); - serverLogRaw(LL_WARNING|LL_RAW, infostring); - serverLogRaw(LL_WARNING|LL_RAW, "\n------ CLIENT LIST OUTPUT ------\n"); - clients = getAllClientsInfoString(-1); - serverLogRaw(LL_WARNING|LL_RAW, clients); - sdsfree(infostring); - sdsfree(clients); + logServerInfo(); /* Log the current client */ logCurrentClient(); - /* Log dump of processor registers */ - logRegisters(uc); + /* Log modules info. Something we wanna do last since we fear it may crash. */ + logModulesInfo(); - /* Log Modules INFO */ - serverLogRaw(LL_WARNING|LL_RAW, "\n------ MODULES INFO OUTPUT ------\n"); - infostring = modulesCollectInfo(sdsempty(), NULL, 1, 0); - serverLogRaw(LL_WARNING|LL_RAW, infostring); - sdsfree(infostring); + /* Run memory test in case the crash was triggered by memory corruption. */ + doFastMemoryTest(); +} -#if defined(HAVE_PROC_MAPS) - /* Test memory */ - serverLogRaw(LL_WARNING|LL_RAW, "\n------ FAST MEMORY TEST ------\n"); - bioKillThreads(); - if (memtest_test_linux_anonymous_maps()) { - serverLogRaw(LL_WARNING|LL_RAW, - "!!! MEMORY ERROR DETECTED! Check your memory ASAP !!!\n"); - } else { - serverLogRaw(LL_WARNING|LL_RAW, - "Fast memory test PASSED, however your memory can still be broken. Please run a memory test for several hours if possible.\n"); - } -#endif - - if (eip != NULL) { - Dl_info info; - if (dladdr(eip, &info) != 0) { - serverLog(LL_WARNING|LL_RAW, - "\n------ DUMPING CODE AROUND EIP ------\n" - "Symbol: %s (base: %p)\n" - "Module: %s (base %p)\n" - "$ xxd -r -p /tmp/dump.hex /tmp/dump.bin\n" - "$ objdump --adjust-vma=%p -D -b binary -m i386:x86-64 /tmp/dump.bin\n" - "------\n", - info.dli_sname, info.dli_saddr, info.dli_fname, info.dli_fbase, - info.dli_saddr); - size_t len = (long)eip - (long)info.dli_saddr; - unsigned long sz = sysconf(_SC_PAGESIZE); - if (len < 1<<13) { /* we don't have functions over 8k (verified) */ - /* Find the address of the next page, which is our "safety" - * limit when dumping. Then try to dump just 128 bytes more - * than EIP if there is room, or stop sooner. */ - unsigned long next = ((unsigned long)eip + sz) & ~(sz-1); - unsigned long end = (unsigned long)eip + 128; - if (end > next) end = next; - len = end - (unsigned long)info.dli_saddr; - serverLogHexDump(LL_WARNING, "dump of function", - info.dli_saddr ,len); - dumpX86Calls(info.dli_saddr,len); - } - } - } +void bugReportEnd(int killViaSignal, int sig) { + struct sigaction act; serverLogRaw(LL_WARNING|LL_RAW, "\n=== REDIS BUG REPORT END. Make sure to include from START to END. ===\n\n" @@ -1631,6 +1689,12 @@ void sigsegvHandler(int sig, siginfo_t *info, void *secret) { /* free(messages); Don't call free() with possibly corrupted memory. */ if (server.daemonize && server.supervised == 0) unlink(server.pidfile); + if (!killViaSignal) { + if (server.use_exit_on_panic) + exit(1); + abort(); + } + /* Make sure we exit with the right signal at the end. So for instance * the core will be dumped if enabled. */ sigemptyset (&act.sa_mask); @@ -1639,7 +1703,6 @@ void sigsegvHandler(int sig, siginfo_t *info, void *secret) { sigaction (sig, &act, NULL); kill(getpid(),sig); } -#endif /* HAVE_BACKTRACE */ /* ==================== Logging functions for debugging ===================== */ @@ -1679,7 +1742,7 @@ void watchdogSignalHandler(int sig, siginfo_t *info, void *secret) { serverLogFromHandler(LL_WARNING,"\n--- WATCHDOG TIMER EXPIRED ---"); #ifdef HAVE_BACKTRACE - logStackTrace(uc); + logStackTrace(getMcontextEip(uc), 1); #else serverLogFromHandler(LL_WARNING,"Sorry: no support for backtrace()."); #endif diff --git a/src/server.c b/src/server.c index 3b45bea5..45db1cbc 100644 --- a/src/server.c +++ b/src/server.c @@ -2442,10 +2442,6 @@ void initServerConfig(void) { server.rpoplpushCommand = lookupCommandByCString("rpoplpush"); /* Debugging */ - server.assert_failed = ""; - server.assert_file = ""; - server.assert_line = 0; - server.bug_report_start = 0; server.watchdog_period = 0; /* By default we want scripts to be always replicated by effects @@ -4812,16 +4808,27 @@ void setupSignalHandlers(void) { sigaction(SIGTERM, &act, NULL); sigaction(SIGINT, &act, NULL); -#ifdef HAVE_BACKTRACE sigemptyset(&act.sa_mask); act.sa_flags = SA_NODEFER | SA_RESETHAND | SA_SIGINFO; act.sa_sigaction = sigsegvHandler; + if(server.crashlog_enabled) { + sigaction(SIGSEGV, &act, NULL); + sigaction(SIGBUS, &act, NULL); + sigaction(SIGFPE, &act, NULL); + sigaction(SIGILL, &act, NULL); + } + return; +} + +void removeSignalHandlers(void) { + struct sigaction act; + sigemptyset(&act.sa_mask); + act.sa_flags = SA_NODEFER | SA_RESETHAND; + act.sa_handler = SIG_DFL; sigaction(SIGSEGV, &act, NULL); sigaction(SIGBUS, &act, NULL); sigaction(SIGFPE, &act, NULL); sigaction(SIGILL, &act, NULL); -#endif - return; } /* This is the signal handler for children process. It is currently useful diff --git a/src/server.h b/src/server.h index 018dd460..efb7d2ee 100644 --- a/src/server.h +++ b/src/server.h @@ -1274,6 +1274,11 @@ struct redisServer { int syslog_enabled; /* Is syslog enabled? */ char *syslog_ident; /* Syslog ident */ int syslog_facility; /* Syslog facility */ + int crashlog_enabled; /* Enable signal handler for crashlog. + * disable for clean core dumps. */ + int memcheck_enabled; /* Enable memory check on crash. */ + int use_exit_on_panic; /* Use exit() on panic and assert rather than + * abort(). useful for Valgrind. */ /* Replication (master) */ char replid[CONFIG_RUN_ID_SIZE+1]; /* My current replication ID. */ char replid2[CONFIG_RUN_ID_SIZE+1]; /* replid inherited from master*/ @@ -1439,10 +1444,6 @@ struct redisServer { old "requirepass" directive for backward compatibility with Redis <= 5. */ /* Assert & bug reporting */ - const char *assert_failed; - const char *assert_file; - int assert_line; - int bug_report_start; /* True if bug report header was already logged. */ int watchdog_period; /* Software watchdog period in ms. 0 = off */ /* System hardware info */ size_t system_memory_size; /* Total memory in system as reported by OS */ @@ -1975,6 +1976,7 @@ int freeMemoryIfNeeded(void); int freeMemoryIfNeededAndSafe(void); int processCommand(client *c); void setupSignalHandlers(void); +void removeSignalHandlers(void); struct redisCommand *lookupCommand(sds name); struct redisCommand *lookupCommandByCString(char *s); struct redisCommand *lookupCommandOrOriginal(sds name); @@ -2432,7 +2434,6 @@ void *realloc(void *ptr, size_t size) __attribute__ ((deprecated)); void _serverAssertWithInfo(const client *c, const robj *o, const char *estr, const char *file, int line); void _serverAssert(const char *estr, const char *file, int line); void _serverPanic(const char *file, int line, const char *msg, ...); -void bugReportStart(void); void serverLogObjectDebugInfo(const robj *o); void sigsegvHandler(int sig, siginfo_t *info, void *secret); sds genRedisInfoString(const char *section);