Hello community, here is the log from the commit of package sbd for openSUSE:Factory checked in at 2019-04-04 14:13:40 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/sbd (Old) and /work/SRC/openSUSE:Factory/.sbd.new.3908 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Package is "sbd" Thu Apr 4 14:13:40 2019 rev:28 rq:691441 version:1.4.0+20190326.c38c5e6 Changes: -------- --- /work/SRC/openSUSE:Factory/sbd/sbd.changes 2019-01-28 20:50:44.465759885 +0100 +++ /work/SRC/openSUSE:Factory/.sbd.new.3908/sbd.changes 2019-04-04 14:13:42.149336265 +0200 @@ -1,0 +2,19 @@ +Tue Apr 2 10:24:42 UTC 2019 - Yan Gao <ygao@suse.com> + +- Update to version 1.4.0+20190326.c38c5e6: +- sbd-pacemaker: bail out of status earlier +- sbd-pacemaker: make handling of cib-connection loss more robust + +------------------------------------------------------------------- +Mon Mar 11 16:43:07 UTC 2019 - Yan Gao <ygao@suse.com> + +- Update to version 1.4.0+20190311.0159a3c: +- sbd-cluster: finalize cmap connection if disconnected from cluster (bsc#1128059) + +------------------------------------------------------------------- +Tue Feb 12 14:05:52 UTC 2019 - ygao@suse.com + +- Update to version 1.4.0+20190201.f949aa8: +- fail earlier on invalid servants + +------------------------------------------------------------------- Old: ---- sbd-1.4.0+20190123.1829c40.tar.xz New: ---- sbd-1.4.0+20190326.c38c5e6.tar.xz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ sbd.spec ++++++ --- /var/tmp/diff_new_pack.kpg8m3/_old 2019-04-04 14:13:42.889336621 +0200 +++ /var/tmp/diff_new_pack.kpg8m3/_new 2019-04-04 14:13:42.889336621 +0200 @@ -23,7 +23,7 @@ %endif Name: sbd -Version: 1.4.0+20190123.1829c40 +Version: 1.4.0+20190326.c38c5e6 Release: 0 Summary: Storage-based death License: GPL-2.0-or-later ++++++ _servicedata ++++++ --- /var/tmp/diff_new_pack.kpg8m3/_old 2019-04-04 14:13:42.925336638 +0200 +++ /var/tmp/diff_new_pack.kpg8m3/_new 2019-04-04 14:13:42.929336640 +0200 @@ -1,6 +1,6 @@ <servicedata> <service name="tar_scm"> <param name="url">https://github.com/ClusterLabs/sbd.git</param> - <param name="changesrevision">091e10ae3f62239251b53bf7d81d47a57a9b82f2</param> + <param name="changesrevision">79b778debfee5b4ab2d099b2bfc7385f45597f70</param> </service> </servicedata> \ No newline at end of file ++++++ sbd-1.4.0+20190123.1829c40.tar.xz -> sbd-1.4.0+20190326.c38c5e6.tar.xz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sbd-1.4.0+20190123.1829c40/src/sbd-cluster.c new/sbd-1.4.0+20190326.c38c5e6/src/sbd-cluster.c --- old/sbd-1.4.0+20190123.1829c40/src/sbd-cluster.c 2019-01-23 19:26:40.000000000 +0100 +++ new/sbd-1.4.0+20190326.c38c5e6/src/sbd-cluster.c 2019-03-26 11:38:35.000000000 +0100 @@ -174,6 +174,25 @@ return TRUE; } +static void +cmap_destroy(void) +{ + if (cmap_source) { + g_source_destroy(cmap_source); + cmap_source = NULL; + } + + if (track_handle) { + cmap_track_delete(cmap_handle, track_handle); + track_handle = 0; + } + + if (cmap_handle) { + cmap_finalize(cmap_handle); + cmap_handle = 0; + } +} + static gboolean sbd_get_two_node(void) { @@ -217,18 +236,7 @@ return TRUE; out: - if (cmap_source) { - g_source_destroy(cmap_source); - cmap_source = NULL; - } - if (track_handle) { - cmap_track_delete(cmap_handle, track_handle); - track_handle = 0; - } - if (cmap_handle) { - cmap_finalize(cmap_handle); - cmap_handle = 0; - } + cmap_destroy(); return FALSE; } @@ -327,6 +335,12 @@ { cl_log(LOG_WARNING, "Lost connection to %s", name_for_cluster_type(get_cluster_type())); + if (get_cluster_type() != pcmk_cluster_unknown) { +#if SUPPORT_COROSYNC && CHECK_TWO_NODE + cmap_destroy(); +#endif + } + set_servant_health(pcmk_health_unclean, LOG_ERR, "Cluster connection terminated"); notify_parent(); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sbd-1.4.0+20190123.1829c40/src/sbd-inquisitor.c new/sbd-1.4.0+20190326.c38c5e6/src/sbd-inquisitor.c --- old/sbd-1.4.0+20190123.1829c40/src/sbd-inquisitor.c 2019-01-23 19:26:40.000000000 +0100 +++ new/sbd-1.4.0+20190326.c38c5e6/src/sbd-inquisitor.c 2019-03-26 11:38:35.000000000 +0100 @@ -42,19 +42,36 @@ struct servants_list_item *newbie; if (lookup_servant_by_dev(devname)) { - cl_log(LOG_DEBUG, "Servant %s already exists", devname); - return; + cl_log(LOG_DEBUG, "Servant %s already exists", devname); + return; } newbie = malloc(sizeof(*newbie)); - if (!newbie) { - fprintf(stderr, "malloc failed in recruit_servant.\n"); - exit(1); + if (newbie) { + memset(newbie, 0, sizeof(*newbie)); + newbie->devname = strdup(devname); + newbie->pid = pid; + newbie->first_start = 1; + } + if (!newbie || !newbie->devname) { + fprintf(stderr, "heap allocation failed in recruit_servant.\n"); + exit(1); + } + + /* some sanity-check on our newbie */ + if (sbd_is_disk(newbie)) { + cl_log(LOG_INFO, "Monitoring %s", devname); + disk_count++; + } else if (sbd_is_pcmk(newbie) || sbd_is_cluster(newbie)) { + /* alive just after pcmk and cluster servants have shown up */ + newbie->outdated = 1; + } else { + /* toss our newbie */ + cl_log(LOG_ERR, "Refusing to recruit unrecognized servant %s", devname); + free((void *) newbie->devname); + free(newbie); + return; } - memset(newbie, 0, sizeof(*newbie)); - newbie->devname = strdup(devname); - newbie->pid = pid; - newbie->first_start = 1; if (!s) { servants_leader = newbie; @@ -65,12 +82,6 @@ } servant_count++; - if(sbd_is_disk(newbie)) { - cl_log(LOG_INFO, "Monitoring %s", devname); - disk_count++; - } else { - newbie->outdated = 1; - } } int assign_servant(const char* devname, functionp_t functionp, int mode, const void* argp) @@ -148,7 +159,7 @@ if (sbd_is_disk(s)) { #if SUPPORT_SHARED_DISK DBGLOG(LOG_INFO, "Starting servant for device %s", s->devname); - s->pid = assign_servant(s->devname, servant, start_mode, s); + s->pid = assign_servant(s->devname, servant_md, start_mode, s); #else cl_log(LOG_ERR, "Shared disk functionality not supported"); return; @@ -479,19 +490,19 @@ if (sbd_is_disk(s)) { if (WIFEXITED(status)) { switch(WEXITSTATUS(status)) { - case EXIT_MD_IO_FAIL: + case EXIT_MD_SERVANT_IO_FAIL: DBGLOG(LOG_INFO, "Servant for %s requests to be disowned", s->devname); break; - case EXIT_MD_REQUEST_RESET: + case EXIT_MD_SERVANT_REQUEST_RESET: cl_log(LOG_WARNING, "%s requested a reset", s->devname); do_reset(); break; - case EXIT_MD_REQUEST_SHUTOFF: + case EXIT_MD_SERVANT_REQUEST_SHUTOFF: cl_log(LOG_WARNING, "%s requested a shutoff", s->devname); do_off(); break; - case EXIT_MD_REQUEST_CRASHDUMP: + case EXIT_MD_SERVANT_REQUEST_CRASHDUMP: cl_log(LOG_WARNING, "%s requested a crashdump", s->devname); do_crashdump(); break; @@ -499,6 +510,22 @@ break; } } + } else if (sbd_is_pcmk(s)) { + if (WIFEXITED(status)) { + switch(WEXITSTATUS(status)) { + case EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN: + DBGLOG(LOG_INFO, "PCMK-Servant has exited gracefully"); + /* revert to state prior to pacemaker-detection */ + s->restarts = 0; + s->restart_blocked = 0; + cluster_appeared = 0; + s->outdated = 1; + s->t_last.tv_sec = 0; + break; + default: + break; + } + } } cleanup_servant_by_pid(pid); } @@ -785,12 +812,14 @@ if (lpc > last) { entry = calloc(1, 1 + lpc - last); + if (!entry) { + fprintf(stderr, "heap allocation failed parsing device-line.\n"); + exit(1); + } rc = sscanf(line + last, "%[^;]", entry); } - if (entry == NULL) { - /* Skip */ - } else if (rc != 1) { + if (rc != 1) { cl_log(LOG_WARNING, "Could not parse (%d %d): %s", last, lpc, line + last); } else { cl_log(LOG_DEBUG, "Adding '%s'", entry); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sbd-1.4.0+20190123.1829c40/src/sbd-md.c new/sbd-1.4.0+20190326.c38c5e6/src/sbd-md.c --- old/sbd-1.4.0+20190123.1829c40/src/sbd-md.c 2019-01-23 19:26:40.000000000 +0100 +++ new/sbd-1.4.0+20190326.c38c5e6/src/sbd-md.c 2019-03-26 11:38:35.000000000 +0100 @@ -1031,7 +1031,7 @@ return 0; } -int servant(const char *diskname, int mode, const void* argp) +int servant_md(const char *diskname, int mode, const void* argp) { struct sector_mbox_s *s_mbox = NULL; struct sector_node_s *s_node = NULL; @@ -1046,11 +1046,6 @@ char uuid[37]; const struct servants_list_item *s = argp; - if (!diskname) { - cl_log(LOG_ERR, "Empty disk name %s.", diskname); - return -1; - } - cl_log(LOG_INFO, "Servant starting for device %s", diskname); /* Block most of the signals */ @@ -1066,19 +1061,19 @@ st = open_device(diskname, LOG_WARNING); if (!st) { - exit(EXIT_MD_IO_FAIL); + exit(EXIT_MD_SERVANT_IO_FAIL); } s_header = header_get(st); if (!s_header) { cl_log(LOG_ERR, "Not a valid header on %s", diskname); - exit(EXIT_MD_IO_FAIL); + exit(EXIT_MD_SERVANT_IO_FAIL); } if (servant_check_timeout_inconsistent(s_header) < 0) { cl_log(LOG_ERR, "Timeouts on %s do not match first device", diskname); - exit(EXIT_MD_IO_FAIL); + exit(EXIT_MD_SERVANT_IO_FAIL); } if (s_header->minor_version > 0) { @@ -1091,14 +1086,14 @@ cl_log(LOG_ERR, "No slot allocated, and automatic allocation failed for disk %s.", diskname); - rc = EXIT_MD_IO_FAIL; + rc = EXIT_MD_SERVANT_IO_FAIL; goto out; } s_node = sector_alloc(); if (slot_read(st, mbox, s_node) < 0) { cl_log(LOG_ERR, "Unable to read node entry on %s", diskname); - exit(EXIT_MD_IO_FAIL); + exit(EXIT_MD_SERVANT_IO_FAIL); } cl_log(LOG_NOTICE, "Monitoring slot %d on disk %s", mbox, diskname); @@ -1114,7 +1109,7 @@ if (mode > 0) { if (mbox_read(st, mbox, s_mbox) < 0) { cl_log(LOG_ERR, "mbox read failed during start-up in servant."); - rc = EXIT_MD_IO_FAIL; + rc = EXIT_MD_SERVANT_IO_FAIL; goto out; } if (s_mbox->cmd != SBD_MSG_EXIT && @@ -1130,7 +1125,7 @@ DBGLOG(LOG_INFO, "First servant start - zeroing inbox"); memset(s_mbox, 0, sizeof(*s_mbox)); if (mbox_write(st, mbox, s_mbox) < 0) { - rc = EXIT_MD_IO_FAIL; + rc = EXIT_MD_SERVANT_IO_FAIL; goto out; } } @@ -1159,28 +1154,28 @@ s_header_retry = header_get(st); if (!s_header_retry) { cl_log(LOG_ERR, "No longer found a valid header on %s", diskname); - exit(EXIT_MD_IO_FAIL); + exit(EXIT_MD_SERVANT_IO_FAIL); } if (memcmp(s_header, s_header_retry, sizeof(*s_header)) != 0) { cl_log(LOG_ERR, "Header on %s changed since start-up!", diskname); - exit(EXIT_MD_IO_FAIL); + exit(EXIT_MD_SERVANT_IO_FAIL); } free(s_header_retry); s_node_retry = sector_alloc(); if (slot_read(st, mbox, s_node_retry) < 0) { cl_log(LOG_ERR, "slot read failed in servant."); - exit(EXIT_MD_IO_FAIL); + exit(EXIT_MD_SERVANT_IO_FAIL); } if (memcmp(s_node, s_node_retry, sizeof(*s_node)) != 0) { cl_log(LOG_ERR, "Node entry on %s changed since start-up!", diskname); - exit(EXIT_MD_IO_FAIL); + exit(EXIT_MD_SERVANT_IO_FAIL); } free(s_node_retry); if (mbox_read(st, mbox, s_mbox) < 0) { cl_log(LOG_ERR, "mbox read failed in servant."); - exit(EXIT_MD_IO_FAIL); + exit(EXIT_MD_SERVANT_IO_FAIL); } if (s_mbox->cmd > 0) { @@ -1195,14 +1190,14 @@ sigqueue(ppid, SIG_TEST, signal_value); break; case SBD_MSG_RESET: - exit(EXIT_MD_REQUEST_RESET); + exit(EXIT_MD_SERVANT_REQUEST_RESET); case SBD_MSG_OFF: - exit(EXIT_MD_REQUEST_SHUTOFF); + exit(EXIT_MD_SERVANT_REQUEST_SHUTOFF); case SBD_MSG_EXIT: sigqueue(ppid, SIG_EXITREQ, signal_value); break; case SBD_MSG_CRASHDUMP: - exit(EXIT_MD_REQUEST_CRASHDUMP); + exit(EXIT_MD_SERVANT_REQUEST_CRASHDUMP); default: /* FIXME: An "unknown" message might result diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sbd-1.4.0+20190123.1829c40/src/sbd-pacemaker.c new/sbd-1.4.0+20190326.c38c5e6/src/sbd-pacemaker.c --- old/sbd-1.4.0+20190123.1829c40/src/sbd-pacemaker.c 2019-01-23 19:26:40.000000000 +0100 +++ new/sbd-1.4.0+20190326.c38c5e6/src/sbd-pacemaker.c 2019-03-26 11:38:35.000000000 +0100 @@ -103,6 +103,9 @@ static long last_refresh = 0; +static int pcmk_clean_shutdown = 0; +static int pcmk_shutdown = 0; + static gboolean mon_timer_reconnect(gpointer data) { @@ -128,10 +131,26 @@ { if (cib) { cib->cmds->signoff(cib); + /* retrigger as last one might have been skipped */ + mon_refresh_state(NULL); + if (pcmk_clean_shutdown) { + /* assume a graceful pacemaker-shutdown */ + clean_up(EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN); + } + /* getting here we aren't sure about the pacemaker-state + so try to use the timeout to reconnect and get + everything sorted out again + */ + pcmk_shutdown = 0; set_servant_health(pcmk_health_transient, LOG_WARNING, "Disconnected from CIB"); timer_id_reconnect = g_timeout_add(reconnect_msec, mon_timer_reconnect, NULL); } cib_connected = 0; + /* no sense in looking into outdated cib, trying to apply patch, ... */ + if (current_cib) { + free_xml(current_cib); + current_cib = NULL; + } return; } @@ -171,7 +190,7 @@ mon_timer_notify(gpointer data) { static int counter = 0; - int counter_max = timeout_watchdog / timeout_loop; + int counter_max = timeout_watchdog / timeout_loop / 2; if (timer_id_notify > 0) { g_source_remove(timer_id_notify); @@ -257,7 +276,7 @@ static int updates = 0; static int ever_had_quorum = FALSE; - node_t *node = pe_find_node(data_set->nodes, local_uname); + node_t *node = NULL; updates++; @@ -267,11 +286,15 @@ return; } + node = pe_find_node(data_set->nodes, local_uname); - if (node == NULL) { + if ((node == NULL) || (node->details == NULL)) { set_servant_health(pcmk_health_unknown, LOG_WARNING, "Node state: %s is UNKNOWN", local_uname); + notify_parent(); + return; + } - } else if (node->details->online == FALSE) { + if (node->details->online == FALSE) { set_servant_health(pcmk_health_unknown, LOG_WARNING, "Node state: OFFLINE"); } else if (node->details->unclean) { @@ -280,11 +303,6 @@ } else if (node->details->pending) { set_servant_health(pcmk_health_pending, LOG_WARNING, "Node state: pending"); -#if 0 - } else if (node->details->shutdown) { - set_servant_health(pcmk_health_shutdown, LOG_WARNING, "Node state: shutting down"); -#endif - } else if (data_set->flags & pe_flag_have_quorum) { set_servant_health(pcmk_health_online, LOG_INFO, "Node state: online"); ever_had_quorum = TRUE; @@ -315,6 +333,12 @@ } } + if (node->details->shutdown) { + pcmk_shutdown = 1; + } + if (pcmk_shutdown && !(node->details->running_rsc)) { + pcmk_clean_shutdown = 1; + } notify_parent(); return; } @@ -339,7 +363,7 @@ static mainloop_timer_t *refresh_timer = NULL; if(refresh_timer == NULL) { - refresh_timer = mainloop_timer_add("refresh", 2000, FALSE, mon_trigger_refresh, NULL); + refresh_timer = mainloop_timer_add("refresh", reconnect_msec, FALSE, mon_trigger_refresh, NULL); refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_state, refresh_timer); } @@ -369,9 +393,9 @@ } /* Refresh - * - immediately if the last update was more than 5s ago + * - immediately if the last update was more than 1s ago * - every 10 updates - * - at most 2s after the last update + * - at most 1s after the last update */ if (updates > 10 || (now - last_refresh) > (reconnect_msec / 1000)) { mon_refresh_state(refresh_timer); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sbd-1.4.0+20190123.1829c40/src/sbd.h new/sbd-1.4.0+20190326.c38c5e6/src/sbd.h --- old/sbd-1.4.0+20190123.1829c40/src/sbd.h 2019-01-23 19:26:40.000000000 +0100 +++ new/sbd-1.4.0+20190326.c38c5e6/src/sbd.h 2019-03-26 11:38:35.000000000 +0100 @@ -54,10 +54,13 @@ /* FIXME: should add dynamic check of SIG_XX >= SIGRTMAX */ /* exit status for disk-servant */ -#define EXIT_MD_IO_FAIL 20 -#define EXIT_MD_REQUEST_RESET 21 -#define EXIT_MD_REQUEST_SHUTOFF 22 -#define EXIT_MD_REQUEST_CRASHDUMP 23 +#define EXIT_MD_SERVANT_IO_FAIL 20 +#define EXIT_MD_SERVANT_REQUEST_RESET 21 +#define EXIT_MD_SERVANT_REQUEST_SHUTOFF 22 +#define EXIT_MD_SERVANT_REQUEST_CRASHDUMP 23 + +/* exit status for pcmk-servant */ +#define EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN 30 #define HOG_CHAR 0xff #define SECTOR_NAME_MAX 63 @@ -175,7 +178,7 @@ int dump_headers(struct servants_list_item *servants); unsigned long get_first_msgwait(struct servants_list_item *servants); int messenger(const char *name, const char *msg, struct servants_list_item *servants); -int servant(const char *diskname, int mode, const void* argp); +int servant_md(const char *diskname, int mode, const void* argp); #endif int servant_pcmk(const char *diskname, int mode, const void* argp);