commit openais for openSUSE:Factory
Hello community, here is the log from the commit of package openais for openSUSE:Factory checked in at Wed Jun 10 17:10:17 CEST 2009. -------- --- openais/openais.changes 2009-03-09 14:33:32.000000000 +0100 +++ /mounts/work_src_done/STABLE/openais/openais.changes 2009-06-09 01:57:56.000000000 +0200 @@ -1,0 +2,42 @@ +Tue Jun 9 01:47:55 CEST 2009 - ro@suse.de + +- fix build with gcc-4.4 (#elif -> #else) +- renamed dprintf to my_dprintf to avoid collision with function + from glibc + +------------------------------------------------------------------- +Thu Mar 19 12:50:00 CET 2009 - lmb@suse.de + +- aisexec tried logging to stderr which was closed, causing sporadic + start failures on fast nodes (bnc#486613). +- Change default timings for openais.conf to stabilize membership + (bnc#486147). + +------------------------------------------------------------------- +Mon Mar 16 13:15:00 CET 2009 - lmb@suse.de + +- bug-483878_ipcs-destroy-take4.patch: removed, is upstream. + +------------------------------------------------------------------- +Mon Mar 16 07:19:54 CET 2009 - abeekhof@suse.de + +- Removed the following patches that are now upstream + - openais-whitetank-ckpt-list-init.patch + - openais-whitetank-ipc-fix-race-take2.patch + - whitetank-cpg-empty-list-del.patch + - openais-whitetank-cpg-refcount-add.patch +- Update source tarball to r1733 + + changes structure of ipc system to allow reference counting in + lib_init_fn and lib_exit_fn for services (cpg) which record the + conn_info data structure there + + change totempg interface to allow for reserve and release instead of send_ok primative + + Remove reserved count from totempg_mcast operations + + Delete process info from list parsing if ipc connection dies + + The ckpt service with new versions of openais would segfault with older + + CPGs with large node IDS greater then 0xffffff would not synchronize + + Prevent logging deadlocks due to fork() in service engines. + + Patch to fix refcounting problem for checkpoints from leaving nodes. + + Remove pass by value in the ckpt service. + + Serialize ipc access so only one thread accesses a service engine at a + +------------------------------------------------------------------- calling whatdependson for head-i586 Old: ---- openais-whitetank-ckpt-list-init.patch openais-whitetank-cpg-refcount-add.patch openais-whitetank-ipc-fix-race-take2.patch whitetank-cpg-empty-list-del.patch New: ---- openais-dprintf.diff openais-else.diff ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ openais.spec ++++++ --- /var/tmp/diff_new_pack.R24708/_old 2009-06-10 17:09:36.000000000 +0200 +++ /var/tmp/diff_new_pack.R24708/_new 2009-06-10 17:09:36.000000000 +0200 @@ -31,17 +31,15 @@ Name: openais Summary: The OpenAIS Standards-Based Cluster Framework executive and APIs Version: 0.80.3 -Release: 23 +Release: 24 License: BSD 3-Clause Group: Productivity/Clustering/HA Url: http://www.openais.org/ Source: openais.tar.gz Patch1: suse.diff Patch2: openais-init.diff -Patch3: openais-whitetank-cpg-refcount-add.patch -Patch4: openais-whitetank-ckpt-list-init.patch -Patch5: openais-whitetank-ipc-fix-race-take2.patch -Patch7: whitetank-cpg-empty-list-del.patch +Patch3: openais-else.diff +Patch4: openais-dprintf.diff BuildRoot: %{_tmppath}/%{name}-%{version}-build Requires(pre): /usr/sbin/useradd Requires(post): /sbin/chkconfig @@ -144,10 +142,8 @@ %setup -n openais %patch1 -p1 %patch2 -p1 -%patch3 -p0 -%patch4 -p0 -%patch5 -p0 -%patch7 -p0 +%patch3 +%patch4 %build # -O3 required for performance reasons @@ -263,6 +259,36 @@ %{_mandir}/man3/evs_*.3* %changelog +* Tue Jun 09 2009 ro@suse.de +- fix build with gcc-4.4 (#elif -> #else) +- renamed dprintf to my_dprintf to avoid collision with function + from glibc +* Thu Mar 19 2009 lmb@suse.de +- aisexec tried logging to stderr which was closed, causing sporadic + start failures on fast nodes (bnc#486613). +- Change default timings for openais.conf to stabilize membership + (bnc#486147). +* Mon Mar 16 2009 lmb@suse.de +- bug-483878_ipcs-destroy-take4.patch: removed, is upstream. +* Mon Mar 16 2009 abeekhof@suse.de +- Removed the following patches that are now upstream + - openais-whitetank-ckpt-list-init.patch + - openais-whitetank-ipc-fix-race-take2.patch + - whitetank-cpg-empty-list-del.patch + - openais-whitetank-cpg-refcount-add.patch +- Update source tarball to r1733 + + changes structure of ipc system to allow reference counting in + lib_init_fn and lib_exit_fn for services (cpg) which record the + conn_info data structure there + + change totempg interface to allow for reserve and release instead of send_ok primative + + Remove reserved count from totempg_mcast operations + + Delete process info from list parsing if ipc connection dies + + The ckpt service with new versions of openais would segfault with older + + CPGs with large node IDS greater then 0xffffff would not synchronize + + Prevent logging deadlocks due to fork() in service engines. + + Patch to fix refcounting problem for checkpoints from leaving nodes. + + Remove pass by value in the ckpt service. + + Serialize ipc access so only one thread accesses a service engine at a * Mon Mar 09 2009 abeekhof@suse.de - HAE11 RC6 - Remove patch to MAX_INTERFACES since the use of multiple links ++++++ openais-dprintf.diff ++++++ --- exec/amf.c +++ exec/amf.c @@ -417,7 +417,7 @@ unsigned int *joined_list, int joined_list_entries, struct memb_ring_id *ring_id) { - dprintf ("amf_confchg_fn : type = %d,mnum = %d,jnum = %d,lnum = %d\n", + my_dprintf ("amf_confchg_fn : type = %d,mnum = %d,jnum = %d,lnum = %d\n", configuration_type,member_list_entries, joined_list_entries,left_list_entries); } @@ -431,7 +431,7 @@ comp = amf_pd->comp; assert (comp != NULL); comp->conn = NULL; - dprintf ("Lib exit from comp %s\n", getSaNameT (&comp->name)); + my_dprintf ("Lib exit from comp %s\n", getSaNameT (&comp->name)); return (0); } --- exec/amfcluster.c +++ exec/amfcluster.c @@ -107,7 +107,7 @@ struct amf_application *app; struct amf_cluster *cluster = _cluster; - dprintf("2nd Cluster start timer expired, assigning workload to application\n"); + my_dprintf("2nd Cluster start timer expired, assigning workload to application\n"); for (app = cluster->application_head; app != NULL; app = app->next) { amf_application_assign_workload (app, this_amf_node); --- exec/amfcomp.c +++ exec/amfcomp.c @@ -557,7 +557,7 @@ static int clc_csi_remove_callback (struct amf_comp *comp) { - dprintf ("clc_tcsi_remove_callback\n"); + my_dprintf ("clc_tcsi_remove_callback\n"); return (0); } @@ -597,7 +597,7 @@ struct clc_command_run_data *clc_command_run_data; - dprintf ("clc_cli_cleanup\n"); + my_dprintf ("clc_cli_cleanup\n"); clc_command_run_data = malloc (sizeof (struct clc_command_run_data)); if (clc_command_run_data == NULL) { openais_exit_error (AIS_DONE_OUT_OF_MEMORY); @@ -619,7 +619,7 @@ static int clc_cli_cleanup_local (struct amf_comp *comp) { - dprintf ("clc_cli_cleanup_local\n"); + my_dprintf ("clc_cli_cleanup_local\n"); return (0); } @@ -628,7 +628,7 @@ { int res; - dprintf ("clc terminate for comp %s\n", getSaNameT (&comp->name)); + my_dprintf ("clc terminate for comp %s\n", getSaNameT (&comp->name)); assert (0); operational_state_comp_set (comp, SA_AMF_OPERATIONAL_DISABLED); comp_presence_state_set (comp, SA_AMF_PRESENCE_TERMINATING); @@ -794,7 +794,7 @@ struct res_lib_amf_csiremovecallback res_lib_amf_csiremovecallback; struct csi_remove_callback_data *csi_remove_callback_data; - dprintf ("\t%s\n", getSaNameT (&comp->name)); + my_dprintf ("\t%s\n", getSaNameT (&comp->name)); res_lib_amf_csiremovecallback.header.id = MESSAGE_RES_AMF_CSIREMOVECALLBACK; res_lib_amf_csiremovecallback.header.size = sizeof (struct res_lib_amf_csiremovecallback); @@ -922,7 +922,7 @@ static void healthcheck_deactivate ( struct amf_healthcheck *healthcheck_active) { - dprintf ("deactivating healthcheck for component %s\n", + my_dprintf ("deactivating healthcheck for component %s\n", getSaNameT (&healthcheck_active->comp->name)); poll_timer_delete (aisexec_poll_handle, @@ -1229,7 +1229,7 @@ goto error_exit; } - dprintf ("Healthcheckstart: '%s', key '%s'", + my_dprintf ("Healthcheckstart: '%s', key '%s'", comp->name.value, healthcheckKey->key); /* @@ -1285,7 +1285,7 @@ struct amf_healthcheck *healthcheck; SaAisErrorT error = SA_AIS_OK; - dprintf ("Healthcheckstop: '%s', key '%s'", + my_dprintf ("Healthcheckstop: '%s', key '%s'", comp->name.value, healthcheckKey->key); if (healthcheckKey == NULL) { @@ -1397,7 +1397,7 @@ struct component_terminate_callback_data *component_terminate_callback_data; component_terminate_callback_data = data; - dprintf ("Lib component terminate callback response, error: %d", error); + my_dprintf ("Lib component terminate callback response, error: %d", error); amf_comp_healthcheck_deactivate (component_terminate_callback_data->comp); escalation_policy_restart (component_terminate_callback_data->comp); return 1; @@ -1442,7 +1442,7 @@ switch (interface) { case AMF_RESPONSE_CSISETCALLBACK: { struct amf_csi_assignment *csi_assignment = data; - dprintf ("CSI '%s' set callback response from '%s', error: %d", + my_dprintf ("CSI '%s' set callback response from '%s', error: %d", csi_assignment->csi->name.value, csi_assignment->comp->name.value, error); comp = csi_assignment->comp; @@ -1459,7 +1459,7 @@ } case AMF_RESPONSE_CSIREMOVECALLBACK: { struct amf_csi_assignment *csi_assignment = data; - dprintf ("Lib csi '%s' remove callback response from '%s', error: %d", + my_dprintf ("Lib csi '%s' remove callback response from '%s', error: %d", csi_assignment->csi->name.value, csi_assignment->comp->name.value, error); comp = csi_assignment->comp; @@ -1476,7 +1476,7 @@ } case AMF_RESPONSE_COMPONENTTERMINATECALLBACK: { struct component_terminate_callback_data *callback_data = data; - dprintf ("Lib comp '%s' terminate callback response, error: %d", + my_dprintf ("Lib comp '%s' terminate callback response, error: %d", callback_data->comp->name.value, error); comp_presence_state_set (callback_data->comp, SA_AMF_PRESENCE_UNINSTANTIATED); @@ -1521,7 +1521,7 @@ */ void amf_comp_terminate (struct amf_comp *comp) { - dprintf ("comp terminate '%s'\n", getSaNameT (&comp->name)); + my_dprintf ("comp terminate '%s'\n", getSaNameT (&comp->name)); amf_comp_healthcheck_stop (comp, NULL); comp_presence_state_set (comp, SA_AMF_PRESENCE_TERMINATING); @@ -1540,7 +1540,7 @@ */ void amf_comp_restart (struct amf_comp *comp) { - dprintf ("comp restart '%s'\n", getSaNameT (&comp->name)); + my_dprintf ("comp restart '%s'\n", getSaNameT (&comp->name)); comp_presence_state_set (comp, SA_AMF_PRESENCE_RESTARTING); comp->saAmfCompRestartCount += 1; amf_comp_healthcheck_stop (comp, NULL); @@ -1565,7 +1565,7 @@ assert (comp != NULL && csi_name != NULL && ha_state != NULL); - dprintf ("comp ha state get from comp '%s' CSI '%s'\n", + my_dprintf ("comp ha state get from comp '%s' CSI '%s'\n", getSaNameT (&comp->name), csi_name->value); assignment = csi_assignment_find_in (comp, csi_name); @@ -1593,7 +1593,7 @@ struct amf_healthcheck *healthcheck; SaAisErrorT error = SA_AIS_OK; - dprintf ("Healthcheckconfirm: '%s', key '%s'", + my_dprintf ("Healthcheckconfirm: '%s', key '%s'", comp->name.value, healthcheckKey->key); healthcheck = amf_comp_find_healthcheck (comp, healthcheckKey); --- exec/amfsg.c +++ exec/amfsg.c @@ -173,13 +173,13 @@ if (si->assigned_sis->su == su) { si_assignment = si->assigned_sis; si->assigned_sis = si_assignment->next; - dprintf ("first"); + my_dprintf ("first"); } else { si_assignment = si->assigned_sis->next; si->assigned_sis->next = NULL; - dprintf ("second"); + my_dprintf ("second"); } - dprintf ("%p, %d, %d", + my_dprintf ("%p, %d, %d", si_assignment, si_assignment->name.length, si->assigned_sis->name.length); assert (si_assignment != NULL); @@ -309,7 +309,7 @@ sg->avail_state = SG_AC_Idle; amf_application_sg_assigned (sg->application, sg); } else { - dprintf ("%d, %d", si_assignment_cnt, confirmed_assignments); + my_dprintf ("%d, %d", si_assignment_cnt, confirmed_assignments); } break; case SG_AC_AssigningStandBy: @@ -326,7 +326,7 @@ break; } default: - dprintf ("%d, %d, %d", sg->avail_state, si_assignment_cnt, + my_dprintf ("%d, %d, %d", sg->avail_state, si_assignment_cnt, confirmed_assignments); amf_runtime_attributes_print (amf_cluster); assert (0); @@ -463,7 +463,7 @@ } if (total_assigned == 0) { - dprintf ("Info: No SIs assigned!"); + my_dprintf ("Info: No SIs assigned!"); } } @@ -513,7 +513,7 @@ su = su->next; } if (total_assigned == 0) { - dprintf ("Info: No SIs assigned!"); + my_dprintf ("Info: No SIs assigned!"); } } @@ -594,14 +594,14 @@ * to assign based upon reduction procedure */ if ((inservice_count - active_sus_needed) < 0) { - dprintf ("assignment VI - partial assignment with SIs drop outs\n"); + my_dprintf ("assignment VI - partial assignment with SIs drop outs\n"); su_active_assign = active_sus_needed; su_standby_assign = 0; su_spare_assign = 0; } else if ((inservice_count - active_sus_needed - standby_sus_needed) < 0) { - dprintf ("assignment V - partial assignment with reduction " + my_dprintf ("assignment V - partial assignment with reduction " "of standby units\n"); su_active_assign = active_sus_needed; @@ -615,7 +615,7 @@ if ((sg->saAmfSGMaxStandbySIsperSUs * units_for_standby) <= sg_si_count_get (sg)) { - dprintf ("IV: full assignment with reduction of active service units\n"); + my_dprintf ("IV: full assignment with reduction of active service units\n"); su_active_assign = inservice_count - standby_sus_needed; su_standby_assign = standby_sus_needed; su_spare_assign = 0; @@ -623,27 +623,27 @@ if ((sg->saAmfSGMaxActiveSIsperSUs * units_for_active) <= sg_si_count_get (sg)) { - dprintf ("III: full assignment with reduction of standby " + my_dprintf ("III: full assignment with reduction of standby " "service units\n"); su_active_assign = sg->saAmfSGNumPrefActiveSUs; su_standby_assign = units_for_standby; su_spare_assign = 0; } else if (ii_spare == 0) { - dprintf ("II: full assignment with spare reduction\n"); + my_dprintf ("II: full assignment with spare reduction\n"); su_active_assign = sg->saAmfSGNumPrefActiveSUs; su_standby_assign = sg->saAmfSGNumPrefStandbySUs; su_spare_assign = 0; } else { - dprintf ("I: full assignment with spares\n"); + my_dprintf ("I: full assignment with spares\n"); su_active_assign = sg->saAmfSGNumPrefActiveSUs; su_standby_assign = sg->saAmfSGNumPrefStandbySUs; su_spare_assign = ii_spare; } - dprintf ("(inservice=%d) (assigning active=%d) (assigning standby=%d)" + my_dprintf ("(inservice=%d) (assigning active=%d) (assigning standby=%d)" " (assigning spares=%d)\n", inservice_count, su_active_assign, su_standby_assign, su_spare_assign); sg_assign_nm_active (sg, su_active_assign); --- exec/amfsu.c +++ exec/amfsu.c @@ -201,7 +201,7 @@ { struct amf_csi_assignment *csi_assignment; - dprintf (" Creating CSI '%s' to comp '%s' with hastate %s\n", + my_dprintf (" Creating CSI '%s' to comp '%s' with hastate %s\n", getSaNameT (&csi->name), getSaNameT (&comp->name), amf_ha_state (ha_state)); @@ -274,7 +274,7 @@ { struct amf_si_assignment *si_assignment; - dprintf ("Creating SI '%s' to SU '%s' with hastate %s\n", + my_dprintf ("Creating SI '%s' to SU '%s' with hastate %s\n", getSaNameT (&si->name), getSaNameT (&su->name), amf_ha_state (ha_state)); @@ -425,7 +425,7 @@ } break; default: - dprintf ("state %d", su->restart_control_state); + my_dprintf ("state %d", su->restart_control_state); assert (0); } break; --- exec/print.h +++ exec/print.h @@ -128,7 +128,7 @@ } \ } while(0) -#define dprintf(format, args...) do { \ +#define my_dprintf(format, args...) do { \ if (LOG_LEVEL_DEBUG <= loggers[logger_identifier].level) { \ internal_log_printf2 (__FILE__, __LINE__, LOG_LEVEL_DEBUG, logger_identifier, format, ##args); \ } \ --- test/subscription.c +++ test/subscription.c @@ -218,7 +218,7 @@ return time_buf; } -#define dprintf(format, ...) \ +#define my_dprintf(format, ...) \ { \ if (did_dot) { \ printf("\n"); \ @@ -246,13 +246,13 @@ #endif if (!quiet) - dprintf("event_callback called\n"); + my_dprintf("event_callback called\n"); if (!quiet) - dprintf("sub ID: %x\n", subscription_id); + my_dprintf("sub ID: %x\n", subscription_id); if (!quiet) - dprintf("event_handle %llx\n", (unsigned long long)event_handle); + my_dprintf("event_handle %llx\n", (unsigned long long)event_handle); if (!quiet) - dprintf("event data size %llu\n", (unsigned long long)event_data_size); + my_dprintf("event data size %llu\n", (unsigned long long)event_data_size); evt_pat_get_array.patterns[0].patternSize = PAT_SIZE; evt_pat_get_array.patterns[1].patternSize = PAT_SIZE; @@ -269,35 +269,35 @@ ); if (result != SA_AIS_OK) { get_sa_error(result, result_buf, result_buf_len); - dprintf("event get attr result(2): %s\n", result_buf); + my_dprintf("event get attr result(2): %s\n", result_buf); goto evt_free; } if (!quiet) { - dprintf("pattern array count: %llu\n", + my_dprintf("pattern array count: %llu\n", (unsigned long long)evt_pat_get_array.patternsNumber); for (i = 0; i < evt_pat_get_array.patternsNumber; i++) { - dprintf( "pattern %d =\"%s\"\n", i, + my_dprintf( "pattern %d =\"%s\"\n", i, evt_pat_get_array.patterns[i].pattern); } - dprintf("priority: 0x%x\n", priority); - dprintf("retention: 0x%llx\n", (unsigned long long)retention_time); - dprintf("publisher name content: \"%s\"\n", + my_dprintf("priority: 0x%x\n", priority); + my_dprintf("retention: 0x%llx\n", (unsigned long long)retention_time); + my_dprintf("publisher name content: \"%s\"\n", publisher_name.value); } if (event_id == SA_EVT_EVENTID_LOST) { - dprintf("*** Events have been dropped at %s", + my_dprintf("*** Events have been dropped at %s", ais_time_str(publish_time)); if ((evt_pat_get_array.patternsNumber == 0)|| (strcmp((char *)evt_pat_get_array.patterns[0].pattern, SA_EVT_LOST_EVENT) != 0)) { - dprintf("*** Received SA_EVT_EVENTID_LOST but pattern is wrong: %s\n", + my_dprintf("*** Received SA_EVT_EVENTID_LOST but pattern is wrong: %s\n", evt_pat_get_array.patterns[0].pattern); } } if (quiet < 2) { - dprintf("event id: 0x%016llx\n", (unsigned long long)event_id); + my_dprintf("event id: 0x%016llx\n", (unsigned long long)event_id); } if (quiet == 2) { if ((++evt_count % EVT_FREQ) == 0) { @@ -319,7 +319,7 @@ if ((last_event_id[idx] >> 32) == (event_id >> 32)) { last_event_id[idx]++; if (last_event_id[idx] != event_id) { - dprintf("*** expected %016llx got %016llx event_id\n", + my_dprintf("*** expected %016llx got %016llx event_id\n", (unsigned long long)last_event_id[idx], (unsigned long long)event_id); last_event_id[idx] = event_id; @@ -329,13 +329,13 @@ } } if (idx == MAX_NODES) { - dprintf("*** Too many nodes in cluster\n"); + my_dprintf("*** Too many nodes in cluster\n"); exit(1); } #endif if (event_data_size != user_data_size) { - dprintf("unexpected data size: e=%d, a=%llu\n", + my_dprintf("unexpected data size: e=%d, a=%llu\n", user_data_size, (unsigned long long)event_data_size); goto evt_free; } @@ -345,21 +345,21 @@ &received_size); if (result != SA_AIS_OK) { get_sa_error(result, result_buf, result_buf_len); - dprintf("event get data result: %s\n", result_buf); + my_dprintf("event get data result: %s\n", result_buf); goto evt_free; } if (received_size != event_data_size) { - dprintf("event data mismatch e=%llu, a=%llu\n", + my_dprintf("event data mismatch e=%llu, a=%llu\n", (unsigned long long)event_data_size, (unsigned long long)received_size); goto evt_free; } if (memcmp(user_data, event_data, user_data_size) != 0 ) { - dprintf("event data doesn't match specified file data\n"); + my_dprintf("event data doesn't match specified file data\n"); goto evt_free; } if (!quiet) { - dprintf("Received %d bytes of data OK\n", + my_dprintf("Received %d bytes of data OK\n", user_data_size); } @@ -367,7 +367,7 @@ result = saEvtEventFree(event_handle); if (!quiet) { get_sa_error(result, result_buf, result_buf_len); - dprintf("event free result: %s\n", result_buf); + my_dprintf("event free result: %s\n", result_buf); } } ++++++ openais-else.diff ++++++ --- exec/crypto.c +++ exec/crypto.c @@ -37,7 +37,7 @@ #define ENDIAN_LITTLE #elif _BYTE_ORDER == _BIG_ENDIAN #define ENDIAN_BIG -#elif +#else #warning "cannot detect byte order" #endif ++++++ openais.tar.gz ++++++ diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/openais/exec/aispoll.c new/openais/exec/aispoll.c --- old/openais/exec/aispoll.c 2009-01-20 19:14:50.000000000 +0100 +++ new/openais/exec/aispoll.c 2009-03-13 19:59:02.000000000 +0100 @@ -57,8 +57,6 @@ struct pollfd *ufds; int poll_entry_count; struct timerlist timerlist; - void (*serialize_lock_fn) (void); - void (*serialize_unlock_fn) (void); int stop_requested; }; @@ -71,9 +69,7 @@ .iterator = 0 }; -poll_handle poll_create ( - void (*serialize_lock_fn) (void), - void (*serialize_unlock_fn) (void)) +poll_handle poll_create (void) { poll_handle handle; struct poll_instance *poll_instance; @@ -94,8 +90,6 @@ poll_instance->ufds = 0; poll_instance->poll_entry_count = 0; poll_instance->stop_requested = 0; - poll_instance->serialize_lock_fn = serialize_lock_fn; - poll_instance->serialize_unlock_fn = serialize_unlock_fn; timerlist_init (&poll_instance->timerlist); return (handle); @@ -405,13 +399,11 @@ if (poll_instance->ufds[i].fd != -1 && poll_instance->ufds[i].revents) { - poll_instance->serialize_lock_fn(); res = poll_instance->poll_entries[i].dispatch_fn (handle, poll_instance->ufds[i].fd, poll_instance->ufds[i].revents, poll_instance->poll_entries[i].data); - poll_instance->serialize_unlock_fn(); /* * Remove dispatch functions that return -1 */ @@ -420,9 +412,7 @@ } } } - poll_instance->serialize_lock_fn(); timerlist_expire (&poll_instance->timerlist); - poll_instance->serialize_unlock_fn(); } /* for (;;) */ hdb_handle_put (&poll_instance_database, handle); diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/openais/exec/aispoll.h new/openais/exec/aispoll.h --- old/openais/exec/aispoll.h 2007-06-25 04:22:54.000000000 +0200 +++ new/openais/exec/aispoll.h 2009-03-13 19:59:02.000000000 +0100 @@ -40,9 +40,7 @@ typedef void * poll_timer_handle; typedef unsigned int poll_handle; -poll_handle poll_create ( - void (*serialize_lock) (void), - void (*serialize_unlock) (void)); +poll_handle poll_create (void); int poll_destroy (poll_handle poll_handle); diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/openais/exec/ckpt.c new/openais/exec/ckpt.c --- old/openais/exec/ckpt.c 2009-02-13 10:11:05.000000000 +0100 +++ new/openais/exec/ckpt.c 2009-03-13 10:44:36.000000000 +0100 @@ -350,6 +350,8 @@ static int ckpt_sync_process (void); static void ckpt_sync_abort(void); +static int nodeid_in_membership (unsigned int nodeid); + static void sync_refcount_increment ( struct checkpoint *checkpoint, unsigned int nodeid); @@ -819,6 +821,7 @@ my_should_sync = 1; } + first_configuration = 0; } @@ -890,7 +893,7 @@ static void ckpt_checkpoint_remove_cleanup ( void *conn, - mar_name_t checkpoint_name, + mar_name_t *checkpoint_name, mar_uint32_t ckpt_id) { struct list_head *list; @@ -903,7 +906,7 @@ checkpoint_cleanup = list_entry (list, struct checkpoint_cleanup, list); if (mar_name_match (&checkpoint_cleanup->checkpoint_name, - &checkpoint_name) && + checkpoint_name) && (checkpoint_cleanup->ckpt_id == ckpt_id)) { list_del (&checkpoint_cleanup->list); @@ -995,6 +998,7 @@ openais_timer_delete (checkpoint->retention_timer); list_del (&checkpoint->expiry_list); + list_init (&checkpoint->expiry_list); /* * Release all checkpoint sections for this checkpoint @@ -1534,6 +1538,7 @@ } list_del (&checkpoint->expiry_list); + list_init (&checkpoint->expiry_list); list = my_checkpoint_expiry_list_head.next; } my_token_callback_active = 0; @@ -2526,7 +2531,7 @@ ckpt_checkpoint_remove_cleanup ( conn, - req_lib_ckpt_checkpointclose->checkpoint_name, + &req_lib_ckpt_checkpointclose->checkpoint_name, req_lib_ckpt_checkpointclose->ckpt_id); assert (totempg_groups_mcast_joined (openais_group_handle, &iovec, 1, TOTEMPG_AGREED) == 0); } @@ -3313,6 +3318,21 @@ /* * Recovery after network partition or merge */ +int nodeid_in_membership ( + unsigned int nodeid) +{ + unsigned int i; + int found = 0; + + for (i = 0; i < my_old_member_list_entries; i++) { + if (nodeid == my_old_member_list[i]) { + found = 1; + break; + } + } + return (found); +} + void sync_refcount_increment ( struct checkpoint *checkpoint, unsigned int nodeid) @@ -3853,6 +3873,22 @@ return; } + /* + * Discard checkpoints that are used to synchronize the global_ckpt_id + * also setting the global ckpt_id as well. + */ + if (memcmp (&req_exec_ckpt_sync_checkpoint_section->checkpoint_name.value, + GLOBALID_CHECKPOINT_NAME, + req_exec_ckpt_sync_checkpoint_section->checkpoint_name.length) == 0) { + + if (req_exec_ckpt_sync_checkpoint_section->ckpt_id >= global_ckpt_id) { + global_ckpt_id = req_exec_ckpt_sync_checkpoint_section->ckpt_id + 1; + } + + LEAVE(); + return; + } + checkpoint = checkpoint_find_specific ( &sync_checkpoint_list_head, &req_exec_ckpt_sync_checkpoint_section->checkpoint_name, @@ -3967,6 +4003,23 @@ return; } + /* + * Discard checkpoints that are used to synchronize the global_ckpt_id + * also setting the global ckpt_id as well. + */ + if (memcmp (&req_exec_ckpt_sync_checkpoint_refcount->checkpoint_name.value, + GLOBALID_CHECKPOINT_NAME, + req_exec_ckpt_sync_checkpoint_refcount->checkpoint_name.length) == 0) { + + if (req_exec_ckpt_sync_checkpoint_refcount->ckpt_id >= global_ckpt_id) { + global_ckpt_id = req_exec_ckpt_sync_checkpoint_refcount->ckpt_id + 1; + } + + LEAVE(); + return; + } + + checkpoint = checkpoint_find_specific ( &sync_checkpoint_list_head, &req_exec_ckpt_sync_checkpoint_refcount->checkpoint_name, @@ -3975,11 +4028,25 @@ assert (checkpoint != NULL); for (i = 0; i < PROCESSOR_COUNT_MAX; i++) { + /* + * If nodeid is zero, done processing list + */ if (req_exec_ckpt_sync_checkpoint_refcount->refcount_set[i].nodeid == 0) { break; } + + /* + * if nodeid not in membership, check next one + */ + if (nodeid_in_membership (req_exec_ckpt_sync_checkpoint_refcount->refcount_set[i].nodeid) == 0) { + continue; + } for (j = 0; j < PROCESSOR_COUNT_MAX; j++) { + /* + * If new entry in the new checkpoint (nodeid=0) then add it + */ if (checkpoint->refcount_set[j].nodeid == 0) { + checkpoint->refcount_set[j].nodeid = req_exec_ckpt_sync_checkpoint_refcount->refcount_set[i].nodeid; checkpoint->refcount_set[j].refcount = @@ -3990,6 +4057,9 @@ break; } + /* + * If old entry in checkpoint equals messages nodeid, add it to reference count + */ if (req_exec_ckpt_sync_checkpoint_refcount->refcount_set[i].nodeid == checkpoint->refcount_set[j].nodeid) { checkpoint->refcount_set[j].refcount += req_exec_ckpt_sync_checkpoint_refcount->refcount_set[i].refcount; diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/openais/exec/cpg.c new/openais/exec/cpg.c --- old/openais/exec/cpg.c 2009-02-13 10:11:05.000000000 +0100 +++ new/openais/exec/cpg.c 2009-03-10 09:09:42.000000000 +0100 @@ -485,8 +485,9 @@ notify_info.nodeid = totempg_my_nodeid_get(); notify_info.reason = CONFCHG_CPG_REASON_PROCDOWN; cpg_node_joinleave_send(gi, pi, MESSAGE_REQ_EXEC_CPG_PROCLEAVE, CONFCHG_CPG_REASON_PROCDOWN); - list_del(&pi->list); } + list_del(&pi->list); + openais_conn_refcount_dec (conn); return (0); } @@ -615,7 +616,7 @@ struct memb_ring_id *ring_id) { int i; - uint32_t lowest_nodeid = 0xffffff; + uint32_t lowest_nodeid = 0xffffffff; struct iovec req_exec_cpg_iovec; /* We don't send the library joinlist in here because it can end up @@ -901,7 +902,6 @@ res_lib_cpg_mcast->pid = req_exec_cpg_mcast->pid; res_lib_cpg_mcast->nodeid = nodeid; if (message_source_is_local (&req_exec_cpg_mcast->source)) { - openais_conn_refcount_dec (req_exec_cpg_mcast->source.conn); process_info = (struct process_info *)openais_conn_private_data_get (req_exec_cpg_mcast->source.conn); } memcpy(&res_lib_cpg_mcast->group_name, &gi->group_name, @@ -992,7 +992,7 @@ struct process_info *pi = (struct process_info *)openais_conn_private_data_get (conn); pi->conn = conn; -// openais_conn_info_refcnt_inc (conn); + openais_conn_refcount_inc (conn); log_printf(LOG_LEVEL_DEBUG, "lib_init_fn: conn=%p, pi=%p\n", conn, pi); return (0); } @@ -1102,7 +1102,6 @@ req_exec_cpg_iovec[1].iov_len = msglen; // TODO: guarantee type... - openais_conn_refcount_inc (conn); result = totempg_groups_mcast_joined (openais_group_handle, req_exec_cpg_iovec, 2, TOTEMPG_AGREED); assert(result == 0); diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/openais/exec/evs.c new/openais/exec/evs.c --- old/openais/exec/evs.c 2009-02-13 10:11:05.000000000 +0100 +++ new/openais/exec/evs.c 2009-03-03 13:39:37.000000000 +0100 @@ -374,7 +374,6 @@ struct res_lib_evs_mcast_joined res_lib_evs_mcast_joined; struct iovec req_exec_evs_mcast_iovec[3]; struct req_exec_evs_mcast req_exec_evs_mcast; - int send_ok = 0; int res; struct evs_pd *evs_pd = (struct evs_pd *)openais_conn_private_data_get (conn); @@ -393,8 +392,6 @@ req_exec_evs_mcast_iovec[1].iov_len = evs_pd->group_entries * sizeof (struct evs_group); req_exec_evs_mcast_iovec[2].iov_base = &req_lib_evs_mcast_joined->msg; req_exec_evs_mcast_iovec[2].iov_len = req_lib_evs_mcast_joined->msg_len; -// TODO this doesn't seem to work for some reason - send_ok = totempg_groups_send_ok_joined (openais_group_handle, req_exec_evs_mcast_iovec, 3); res = totempg_groups_mcast_joined (openais_group_handle, req_exec_evs_mcast_iovec, 3, TOTEMPG_AGREED); // TODO @@ -420,7 +417,6 @@ struct iovec req_exec_evs_mcast_iovec[3]; struct req_exec_evs_mcast req_exec_evs_mcast; char *msg_addr; - int send_ok = 0; int res; req_exec_evs_mcast.header.size = sizeof (struct req_exec_evs_mcast) + @@ -443,8 +439,6 @@ req_exec_evs_mcast_iovec[2].iov_base = msg_addr; req_exec_evs_mcast_iovec[2].iov_len = req_lib_evs_mcast_groups->msg_len; -// TODO this is wacky - send_ok = totempg_groups_send_ok_joined (openais_group_handle, req_exec_evs_mcast_iovec, 3); res = totempg_groups_mcast_joined (openais_group_handle, req_exec_evs_mcast_iovec, 3, TOTEMPG_AGREED); if (res == 0) { error = EVS_OK; diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/openais/exec/ipc.c new/openais/exec/ipc.c --- old/openais/exec/ipc.c 2009-02-24 11:53:53.000000000 +0100 +++ new/openais/exec/ipc.c 2009-03-13 19:59:02.000000000 +0100 @@ -99,6 +99,10 @@ static unsigned int g_gid_valid = 0; +static void (*ipc_serialize_lock_fn) (void); + +static void (*ipc_serialize_unlock_fn) (void); + DECLARE_LIST_INIT (conn_info_list_head); struct outq_item { @@ -116,13 +120,21 @@ }; #endif +enum conn_state { + CONN_STATE_THREAD_INACTIVE = 0, + CONN_STATE_THREAD_ACTIVE = 1, + CONN_STATE_THREAD_REQUEST_EXIT = 2, + CONN_STATE_THREAD_DESTROYED = 3, + CONN_STATE_LIB_EXIT_CALLED = 4, + CONN_STATE_DISCONNECT_INACTIVE = 5 +}; + struct conn_info { int fd; pthread_t thread; pthread_attr_t thread_attr; unsigned int service; - int destroyed; - int disconnect_requested; + enum conn_state state; int notify_flow_control_enabled; int refcount; key_t shmkey; @@ -148,44 +160,94 @@ static void ipc_disconnect (struct conn_info *conn_info); +static int ipc_thread_active (void *conn) +{ + struct conn_info *conn_info = (struct conn_info *)conn; + int retval = 0; + + pthread_mutex_lock (&conn_info->mutex); + if (conn_info->state == CONN_STATE_THREAD_ACTIVE) { + retval = 1; + } + pthread_mutex_unlock (&conn_info->mutex); + return (retval); +} + +static int ipc_thread_exiting (void *conn) +{ + struct conn_info *conn_info = (struct conn_info *)conn; + int retval = 1; + + pthread_mutex_lock (&conn_info->mutex); + if (conn_info->state == CONN_STATE_THREAD_INACTIVE) { + retval = 0; + } else + if (conn_info->state == CONN_STATE_THREAD_ACTIVE) { + retval = 0; + } + pthread_mutex_unlock (&conn_info->mutex); + return (retval); +} + +/* + * returns 0 if should be called again, -1 if finished + */ static inline int conn_info_destroy (struct conn_info *conn_info) { unsigned int res; + void *retval; list_del (&conn_info->list); list_init (&conn_info->list); - if (conn_info->service == SOCKET_SERVICE_INIT) { + if (conn_info->state == CONN_STATE_THREAD_REQUEST_EXIT) { + res = pthread_join (conn_info->thread, &retval); + conn_info->state = CONN_STATE_THREAD_DESTROYED; + return (0); + } + + if (conn_info->state == CONN_STATE_THREAD_INACTIVE || + conn_info->state == CONN_STATE_DISCONNECT_INACTIVE) { list_del (&conn_info->list); close (conn_info->fd); free (conn_info); + return (-1); + } + + if (conn_info->state == CONN_STATE_THREAD_ACTIVE) { + pthread_kill (conn_info->thread, SIGUSR1); return (0); } + + ipc_serialize_lock_fn(); /* - * Destroy shared memory segment and semaphore + * Retry library exit function if busy */ - if (conn_info->destroyed == 0) { - openais_conn_refcount_dec (conn_info); - shmdt (conn_info->mem); - res = shmctl (conn_info->shmid, IPC_RMID, NULL); - semctl (conn_info->semid, 0, IPC_RMID); - conn_info->destroyed = 1; + if (conn_info->state == CONN_STATE_THREAD_DESTROYED) { + res = ais_service[conn_info->service]->lib_exit_fn (conn_info); + if (res == -1) { + ipc_serialize_unlock_fn(); + return (0); + } else { + conn_info->state = CONN_STATE_LIB_EXIT_CALLED; + } } pthread_mutex_lock (&conn_info->mutex); if (conn_info->refcount > 0) { pthread_mutex_unlock (&conn_info->mutex); - return (-1); + ipc_serialize_unlock_fn(); + return (0); } + list_del (&conn_info->list); pthread_mutex_unlock (&conn_info->mutex); /* - * Retry library exit function if busy + * Destroy shared memory segment and semaphore */ - res = ais_service[conn_info->service]->lib_exit_fn (conn_info); - if (res == -1) { - return (-1); - } + shmdt (conn_info->mem); + res = shmctl (conn_info->shmid, IPC_RMID, NULL); + semctl (conn_info->semid, 0, IPC_RMID); /* * Free allocated data needed to retry exiting library IPC connection @@ -194,9 +256,9 @@ free (conn_info->private_data); } close (conn_info->fd); - list_del (&conn_info->list); free (conn_info); - return (0); + ipc_serialize_unlock_fn(); + return (-1); } struct res_overlay { @@ -242,31 +304,35 @@ struct res_overlay res_overlay; struct iovec send_ok_joined_iovec; int send_ok = 0; + int reserved_msgs = 0; int flow_control = 0; - int send_ok_joined = 0; for (;;) { sop.sem_num = 0; sop.sem_op = -1; sop.sem_flg = 0; retry_semop: + if (ipc_thread_active (conn_info) == 0) { + openais_conn_refcount_dec (conn_info); + pthread_exit (0); + } res = semop (conn_info->semid, &sop, 1); if ((res == -1) && (errno == EINTR || errno == EAGAIN)) { goto retry_semop; } else if ((res == -1) && (errno == EINVAL || errno == EIDRM)) { - openais_conn_refcount_dec (conn); - return (0); - } - if (conn_info->destroyed || conn_info->disconnect_requested) { - break; + openais_conn_refcount_dec (conn_info); + pthread_exit (0); } + openais_conn_refcount_inc (conn_info); + header = (mar_req_header_t *)conn_info->mem->req_buffer; send_ok_joined_iovec.iov_base = (char *)header; send_ok_joined_iovec.iov_len = header->size; - send_ok_joined = totempg_groups_send_ok_joined (openais_group_handle, + reserved_msgs = totempg_groups_joined_reserve ( + openais_group_handle, &send_ok_joined_iovec, 1); /* Sanity check service and header.id */ @@ -292,12 +358,14 @@ } else if(send_ok && flow_control == OPENAIS_FLOW_CONTROL_REQUIRED - && (send_ok_joined == 0 || sync_in_process() != 0)) { + && (reserved_msgs == 0 || sync_in_process() != 0)) { send_ok = 0; } if (send_ok) { + ipc_serialize_lock_fn(); ais_service[conn_info->service]->lib_service[header->id].lib_handler_fn (conn_info, header); + ipc_serialize_unlock_fn(); } else { /* * Overload, tell library to retry @@ -310,9 +378,11 @@ openais_response_send (conn_info, &res_overlay, res_overlay.header.size); } + + totempg_groups_joined_release (reserved_msgs); + openais_conn_refcount_dec (conn); } - openais_conn_refcount_dec (conn); - return (NULL); + pthread_exit (0); } static int @@ -455,18 +525,6 @@ return (0); } -static int poll_handler_connection_destroy( - struct conn_info *conn_info) -{ - int res; - res = conn_info_destroy (conn_info); - if (res == -1) { - return (0); - } else { - return (-1); - } -} - static int poll_handler_connection ( poll_handle handle, int fd, @@ -479,11 +537,16 @@ char buf; + if (ipc_thread_exiting (conn_info)) { + return conn_info_destroy (conn_info); + } + /* - * If an error occurs, try to exit if possible + * If an error occurs, request exit */ - if ((conn_info->disconnect_requested) || (revent & (POLLERR|POLLHUP))) { - return poll_handler_connection_destroy (conn_info); + if (revent & (POLLERR|POLLHUP)) { + ipc_disconnect (conn_info); + return (0); } /* @@ -509,8 +572,6 @@ conn_info->shmkey = req_setup->shmkey; conn_info->semkey = req_setup->semkey; conn_info->service = req_setup->service; - conn_info->destroyed = 0; - conn_info->disconnect_requested = 0; conn_info->refcount = 0; conn_info->notify_flow_control_enabled = 0; conn_info->setup_bytes_read = 0; @@ -520,8 +581,12 @@ conn_info->mem = shmat (conn_info->shmid, NULL, 0); conn_info->semid = semget (conn_info->semkey, 3, 0600); conn_info->pending_semops = 0; - conn_info->refcount = 1; - openais_conn_refcount_inc (conn_info); + + /* + * ipc thread is the only reference at startup + */ + conn_info->refcount = 1; + conn_info->state = CONN_STATE_THREAD_ACTIVE; conn_info->private_data = malloc (ais_service[conn_info->service]->private_data_size); memset (conn_info->private_data, 0, @@ -539,7 +604,7 @@ pthread_attr_setstacksize (&conn_info->thread_attr, 200000); #endif - pthread_attr_setdetachstate (&conn_info->thread_attr, PTHREAD_CREATE_DETACHED); + pthread_attr_setdetachstate (&conn_info->thread_attr, PTHREAD_CREATE_JOINABLE); res = pthread_create (&conn_info->thread, &conn_info->thread_attr, pthread_ipc_consumer, @@ -554,6 +619,7 @@ } } else if (revent & POLLIN) { + openais_conn_refcount_inc (conn_info); res = recv (fd, &buf, 1, MSG_NOSIGNAL); if (res == 1) { switch (buf) { @@ -562,26 +628,29 @@ break; case MESSAGE_REQ_CHANGE_EUID: if (priv_change (conn_info) == -1) { - return poll_handler_connection_destroy (conn_info); + ipc_disconnect (conn_info); } break; default: res = 0; break; } + openais_conn_refcount_dec (conn_info); } #if defined(OPENAIS_SOLARIS) || defined(OPENAIS_BSD) || defined(OPENAIS_DARWIN) /* On many OS poll never return POLLHUP or POLLERR. * EOF is detected when recvmsg return 0. */ if (res == 0) { - return poll_handler_connection_destroy (conn_info); + ipc_disconnect (conn_info); + return (0); } #endif } + openais_conn_refcount_inc (conn_info); pthread_mutex_lock (&conn_info->mutex); - if ((conn_info->disconnect_requested == 0) && (revent & POLLOUT)) { + if ((conn_info->state == CONN_STATE_THREAD_ACTIVE) && (revent & POLLOUT)) { buf = !list_empty (&conn_info->outq_head); for (; conn_info->pending_semops;) { res = send (conn_info->fd, &buf, 1, MSG_NOSIGNAL); @@ -607,19 +676,25 @@ } } pthread_mutex_unlock (&conn_info->mutex); + openais_conn_refcount_dec (conn_info); return (0); } static void ipc_disconnect (struct conn_info *conn_info) { + if (conn_info->state == CONN_STATE_THREAD_INACTIVE) { + conn_info->state = CONN_STATE_DISCONNECT_INACTIVE; + return; + } + if (conn_info->state != CONN_STATE_THREAD_ACTIVE) { + return; + } pthread_mutex_lock (&conn_info->mutex); - conn_info->disconnect_requested = 1; + conn_info->state = CONN_STATE_THREAD_REQUEST_EXIT; pthread_mutex_unlock (&conn_info->mutex); - poll_dispatch_modify (aisexec_poll_handle, - conn_info->fd, POLLOUT|POLLNVAL, - poll_handler_connection); + pthread_kill (conn_info->thread, SIGUSR1); } static int conn_info_create (int fd) @@ -634,6 +709,7 @@ conn_info->fd = fd; conn_info->service = SOCKET_SERVICE_INIT; + conn_info->state = CONN_STATE_THREAD_INACTIVE; list_init (&conn_info->outq_head); list_init (&conn_info->list); list_add (&conn_info->list, &conn_info_list_head); @@ -737,12 +813,19 @@ source->conn = conn; } -void openais_ipc_init (unsigned int gid_valid) +void openais_ipc_init ( + unsigned int gid_valid, + void (*serialize_lock_fn) (void), + void (*serialize_unlock_fn) (void)) { int libais_server_fd; struct sockaddr_un un_addr; int res; + ipc_serialize_lock_fn = serialize_lock_fn; + + ipc_serialize_unlock_fn = serialize_unlock_fn; + /* * Create socket for libais clients, name socket, listen for connections */ @@ -802,7 +885,6 @@ shmdt (conn_info->mem); shmctl (conn_info->shmid, IPC_RMID, NULL); semctl (conn_info->semid, 0, IPC_RMID); - conn_info->destroyed = 1; pthread_kill (conn_info->thread, SIGUSR1); } @@ -1039,12 +1121,9 @@ /* * Exit transmission if the connection is dead */ - pthread_mutex_lock (&conn_info->mutex); - if (conn_info->destroyed || conn_info->disconnect_requested) { - pthread_mutex_unlock (&conn_info->mutex); + if (ipc_thread_active (conn) == 0) { return; } - pthread_mutex_unlock (&conn_info->mutex); bytes_left = shared_mem_dispatch_bytes_left (conn_info); for (i = 0; i < iov_len; i++) { diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/openais/exec/ipc.h new/openais/exec/ipc.h --- old/openais/exec/ipc.h 2009-02-13 10:11:05.000000000 +0100 +++ new/openais/exec/ipc.h 2009-03-13 19:59:02.000000000 +0100 @@ -39,7 +39,10 @@ extern int message_source_is_local (mar_message_source_t *source); -extern void openais_ipc_init (unsigned int gid_valid); +extern void openais_ipc_init ( + unsigned int gid_valid, + void (*serialize_lock_fn) (void), + void (*serialize_unlock_fn) (void)); extern void *openais_conn_private_data_get (void *conn); diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/openais/exec/lck.c new/openais/exec/lck.c --- old/openais/exec/lck.c 2008-01-15 11:15:54.000000000 +0100 +++ new/openais/exec/lck.c 2009-03-03 13:39:37.000000000 +0100 @@ -504,11 +504,7 @@ iovec.iov_base = (char *)&req_exec_lck_resourceclose; iovec.iov_len = sizeof (req_exec_lck_resourceclose); - if (totempg_groups_send_ok_joined (openais_group_handle, &iovec, 1)) { - assert (totempg_groups_mcast_joined (openais_group_handle, &iovec, 1, TOTEMPG_AGREED) == 0); - return (0); - } - + assert (totempg_groups_mcast_joined (openais_group_handle, &iovec, 1, TOTEMPG_AGREED) == 0); return (-1); } @@ -1356,9 +1352,7 @@ iovecs[0].iov_base = (char *)&req_exec_lck_resourceclose; iovecs[0].iov_len = sizeof (req_exec_lck_resourceclose); - if (totempg_groups_send_ok_joined (openais_group_handle, iovecs, 1)) { - assert (totempg_groups_mcast_joined (openais_group_handle, iovecs, 1, TOTEMPG_AGREED) == 0); - } + assert (totempg_groups_mcast_joined (openais_group_handle, iovecs, 1, TOTEMPG_AGREED) == 0); } else { log_printf (LOG_LEVEL_ERROR, "#### LCK: Could Not Find the Checkpoint to close so Returning Error. ####\n"); diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/openais/exec/main.c new/openais/exec/main.c --- old/openais/exec/main.c 2009-02-24 11:57:34.000000000 +0100 +++ new/openais/exec/main.c 2009-03-13 20:47:05.000000000 +0100 @@ -242,6 +242,7 @@ /* * Call configuration change for all services */ + serialize_mutex_lock (); for (i = 0; i < service_count; i++) { if (ais_service[i] && ais_service[i]->confchg_fn) { ais_service[i]->confchg_fn (configuration_type, @@ -250,6 +251,7 @@ joined_list, joined_list_entries, ring_id); } } + serialize_mutex_unlock (); } static void aisexec_uid_determine (struct main_config *main_config) @@ -411,6 +413,8 @@ return; } + serialize_mutex_lock (); + if (endian_conversion_required) { ais_service[service]->exec_service[fn_id].exec_endian_convert_fn (header); @@ -418,6 +422,8 @@ ais_service[service]->exec_service[fn_id].exec_handler_fn (header, nodeid); + + serialize_mutex_unlock (); } int main (int argc, char **argv) @@ -472,9 +478,7 @@ log_printf (LOG_LEVEL_NOTICE, "AIS Executive Service: started and ready to provide service.\n"); - aisexec_poll_handle = poll_create ( - serialize_mutex_lock, - serialize_mutex_unlock); + aisexec_poll_handle = poll_create (); /* * Load the object database interface @@ -625,7 +629,9 @@ aisexec_mempool_init (); - openais_ipc_init (gid_valid); + openais_ipc_init (gid_valid, + serialize_mutex_lock, + serialize_mutex_unlock); /* * Start main processing loop diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/openais/exec/Makefile new/openais/exec/Makefile --- old/openais/exec/Makefile 2009-02-18 03:19:31.000000000 +0100 +++ new/openais/exec/Makefile 2009-03-11 03:25:10.000000000 +0100 @@ -166,7 +166,7 @@ endif aisexec: $(EXEC_OBJS) libtotem_pg.a - $(CC) $(LDFLAGS) $(EXEC_OBJS) $(EXEC_LIBS) -o aisexec + $(CC) $(LDFLAGS) $(EXEC_OBJS) $(EXEC_LIBS) -o aisexec -lpthread libtotem_pg.a: $(TOTEM_OBJS) $(AR) -rc libtotem_pg.a $(TOTEM_OBJS) diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/openais/exec/print.c new/openais/exec/print.c --- old/openais/exec/print.c 2007-06-24 08:33:09.000000000 +0200 +++ new/openais/exec/print.c 2009-03-11 03:25:10.000000000 +0100 @@ -276,11 +276,28 @@ } } +static void print_atfork_prepare(void) +{ + pthread_mutex_lock (&log_mode_mutex); +} + +static void print_atfork_done(void) +{ + pthread_mutex_unlock (&log_mode_mutex); +} + + int log_setup (char **error_string, struct main_config *config) { int i; static char error_string_response[512]; + /* + * Prevent deadlocks in the child if one of the parent threads happens + * to be logging when fork() is called + */ + pthread_atfork(print_atfork_prepare, print_atfork_done, print_atfork_done); + if (config->logmode & LOG_MODE_FILE) { log_file_fp = fopen (config->logfile, "a+"); if (log_file_fp == 0) { diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/openais/exec/totempg.c new/openais/exec/totempg.c --- old/openais/exec/totempg.c 2009-02-11 04:47:23.000000000 +0100 +++ new/openais/exec/totempg.c 2009-03-06 01:29:43.000000000 +0100 @@ -144,6 +144,8 @@ static int mcast_packed_msg_count = 0; +static int totempg_reserved = 0; + /* * Function and data used to log messages */ @@ -221,8 +223,6 @@ .mutex = PTHREAD_MUTEX_INITIALIZER }; -static int send_ok (int msg_size); - static unsigned char next_fragment = 1; static pthread_mutex_t totempg_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -234,6 +234,10 @@ #define log_printf(level, format, args...) \ totempg_log_printf (__FILE__, __LINE__, level, format, ##args) +static int msg_count_send_ok (int msg_count); + +static int byte_count_send_ok (int byte_count); + static struct assembly *assembly_ref (unsigned int nodeid) { struct assembly *assembly; @@ -757,7 +761,7 @@ total_size += iovec[i].iov_len; } - if (send_ok (total_size + sizeof(unsigned short) * + if (byte_count_send_ok (total_size + sizeof(unsigned short) * (mcast_packed_msg_count+1)) == 0) { pthread_mutex_unlock (&mcast_msg_mutex); @@ -880,23 +884,50 @@ /* * Determine if a message of msg_size could be queued */ -#define FUZZY_AVAIL_SUBTRACT 5 -static int send_ok ( - int msg_size) +static int msg_count_send_ok ( + int msg_count) { int avail = 0; - int total; - avail = totemmrp_avail () - FUZZY_AVAIL_SUBTRACT; + avail = totemmrp_avail () - totempg_reserved - 1; - /* - * msg size less then totempg_totem_config->net_mtu - 25 will take up - * a full message, so add +1 - * totempg_totem_config->net_mtu - 25 is for the totempg_mcast header - */ - total = (msg_size / (totempg_totem_config->net_mtu - 25)) + 1; + return (avail > msg_count); +} - return (avail >= total); +/* + * This function should only be used by multicasting operations to determine + * if there is sufficient room in the multicast queue for the message. Notice + * that totempg_reserved is not subtracted because the space is already + * reserved. + */ +static int byte_count_send_ok ( + int byte_count) +{ + unsigned int msg_count = 0; + int avail = 0; + + avail = totemmrp_avail () - 1; + + msg_count = (byte_count / (totempg_totem_config->net_mtu - 25)) + 1; + + return (avail > msg_count); +} + +static int send_reserve ( + int msg_size) +{ + unsigned int msg_count = 0; + + msg_count = (msg_size / (totempg_totem_config->net_mtu - 25)) + 1; + totempg_reserved += msg_count; + + return (msg_count); +} + +static void send_release ( + int msg_count) +{ + totempg_reserved -= msg_count; } int totempg_callback_token_create ( @@ -1083,7 +1114,7 @@ return (res); } -int totempg_groups_send_ok_joined ( +int totempg_groups_joined_reserve ( totempg_groups_handle handle, struct iovec *iovec, int iov_len) @@ -1092,6 +1123,7 @@ unsigned int size = 0; unsigned int i; unsigned int res; + unsigned int reserved = 0; pthread_mutex_lock (&totempg_mutex); pthread_mutex_lock (&mcast_msg_mutex); @@ -1108,20 +1140,28 @@ size += iovec[i].iov_len; } - /* - * 2000 is a number chosen to represent the maximum size of - * the totempg header used in the transmission of messages - */ - size += 2000; - - res = send_ok (size); + reserved = send_reserve (size); + if (msg_count_send_ok (reserved) == 0) { + send_release (reserved); + reserved = 0; + } hdb_handle_put (&totempg_groups_instance_database, handle); error_exit: pthread_mutex_unlock (&mcast_msg_mutex); pthread_mutex_unlock (&totempg_mutex); - return (res); + return (reserved); +} + +void totempg_groups_joined_release (int msg_count) +{ + + pthread_mutex_lock (&totempg_mutex); + pthread_mutex_lock (&mcast_msg_mutex); + send_release (msg_count); + pthread_mutex_unlock (&mcast_msg_mutex); + pthread_mutex_unlock (&totempg_mutex); } int totempg_groups_mcast_groups ( @@ -1199,7 +1239,7 @@ size += iovec[i].iov_len; } - res = send_ok (size); + res = msg_count_send_ok (size); hdb_handle_put (&totempg_groups_instance_database, handle); error_exit: diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/openais/exec/totempg.h new/openais/exec/totempg.h --- old/openais/exec/totempg.h 2007-12-10 22:50:09.000000000 +0100 +++ new/openais/exec/totempg.h 2009-03-03 13:39:37.000000000 +0100 @@ -111,10 +111,13 @@ int iov_len, int guarantee); -extern int totempg_groups_send_ok_joined ( +extern int totempg_groups_joined_reserve ( totempg_groups_handle handle, struct iovec *iovec, int iov_len); + +extern void totempg_groups_joined_release ( + int msg_count); extern int totempg_groups_mcast_groups ( totempg_groups_handle handle, diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/openais/test/cpgbench.c new/openais/test/cpgbench.c --- old/openais/test/cpgbench.c 2009-02-13 10:11:05.000000000 +0100 +++ new/openais/test/cpgbench.c 2009-03-13 19:55:40.000000000 +0100 @@ -107,13 +107,7 @@ */ cpg_flow_control_state_get (handle, &flow_control_state); if (flow_control_state == CPG_FLOW_CONTROL_DISABLED) { -retry: res = cpg_mcast_joined (handle, CPG_TYPE_AGREED, &iov, 1); - if (res == CPG_ERR_TRY_AGAIN) { - break; - } - } else { - break; } res = cpg_dispatch (handle, CPG_DISPATCH_ALL); if (res != CPG_OK) { ++++++ suse.diff ++++++ --- /var/tmp/diff_new_pack.R24708/_old 2009-06-10 17:09:37.000000000 +0200 +++ /var/tmp/diff_new_pack.R24708/_new 2009-06-10 17:09:37.000000000 +0200 @@ -1,7 +1,8 @@ -diff -r f3557238ec7d -r 56ecfd378ab4 conf/openais.conf ---- a/conf/openais.conf -+++ b/conf/openais.conf -@@ -1,22 +1,67 @@ +Index: openais/conf/openais.conf +=================================================================== +--- openais.orig/conf/openais.conf ++++ openais/conf/openais.conf +@@ -1,22 +1,70 @@ # Please read the openais.conf.5 manual page +aisexec { @@ -23,16 +24,16 @@ - threads: 0 + + # How long before declaring a token lost (ms) -+ token: 3000 ++ token: 5000 + + # How many token retransmits before forming a new configuration + token_retransmits_before_loss_const: 10 + + # How long to wait for join messages in the membership protocol (ms) -+ join: 60 ++ join: 2000 + + # How long to wait for consensus to be achieved before starting a new round of membership configuration (ms) -+ consensus: 1500 ++ consensus: 2500 + + # Turn off the virtual synchrony filter + vsftype: none @@ -40,6 +41,9 @@ + # Number of messages that may be sent by one processor on receipt of the token + max_messages: 20 + ++ # Stagger sending the node join messages by 1..send_join ms ++ send_join: 45 ++ + # Limit generated nodeids to 31-bits (positive signed integers) + clear_node_high_bit: yes + @@ -66,20 +70,21 @@ } logging { -+ debug: off -+ fileline: off -+ to_syslog: yes - to_stderr: yes +- to_stderr: yes - to_file: yes - logfile: /tmp/ais -- debug: off + debug: off ++ fileline: off ++ to_syslog: yes ++ to_stderr: off + syslog_facility: daemon timestamp: on } -diff -r f3557238ec7d -r 56ecfd378ab4 exec/clm.c ---- a/exec/clm.c -+++ b/exec/clm.c +Index: openais/exec/clm.c +=================================================================== +--- openais.orig/exec/clm.c ++++ openais/exec/clm.c @@ -277,8 +277,14 @@ static void my_cluster_node_load (void) iface_string = totemip_print (&interfaces[0]); @@ -97,24 +102,27 @@ my_cluster_node.node_address.length = strlen ((char *)my_cluster_node.node_address.value); if (totempg_my_family_get () == AF_INET) { -diff -r f3557238ec7d -r 56ecfd378ab4 exec/ipc.c ---- a/exec/ipc.c -+++ b/exec/ipc.c -@@ -321,8 +321,10 @@ req_setup_send ( +Index: openais/exec/ipc.c +=================================================================== +--- openais.orig/exec/ipc.c ++++ openais/exec/ipc.c +@@ -391,9 +391,11 @@ req_setup_send ( int error) { mar_res_setup_t res_setup; -+ unsigned int res; -+ -+ memset(&res_setup, 0, sizeof(mar_res_setup_t)); - res_setup.error = error; -- unsigned int res; +- res_setup.error = error; + unsigned int res; ++ memset(&res_setup, 0, sizeof(mar_res_setup_t)); ++ res_setup.error = error; ++ retry_send: res = send (conn_info->fd, &res_setup, sizeof (mar_res_setup_t), MSG_WAITALL); -diff -r f3557238ec7d -r 56ecfd378ab4 exec/main.c ---- a/exec/main.c -+++ b/exec/main.c + if (res == -1 && errno == EINTR) { +Index: openais/exec/main.c +=================================================================== +--- openais.orig/exec/main.c ++++ openais/exec/main.c @@ -113,14 +113,6 @@ static void sigsegv_handler (int num) raise (SIGSEGV); } @@ -130,7 +138,7 @@ #define LOCALHOST_IP inet_addr("127.0.0.1") totempg_groups_handle openais_group_handle; -@@ -463,7 +455,6 @@ int main (int argc, char **argv) +@@ -469,7 +461,6 @@ int main (int argc, char **argv) signal (SIGINT, sigintr_handler); signal (SIGUSR2, sigusr2_handler); signal (SIGSEGV, sigsegv_handler); @@ -138,9 +146,10 @@ signal (SIGQUIT, sigquit_handler); openais_timer_init ( -diff -r f3557238ec7d -r 56ecfd378ab4 exec/service.c ---- a/exec/service.c -+++ b/exec/service.c +Index: openais/exec/service.c +=================================================================== +--- openais.orig/exec/service.c ++++ openais/exec/service.c @@ -298,34 +298,41 @@ extern unsigned int openais_service_unli struct objdb_iface_ver0 *objdb) { @@ -162,7 +171,7 @@ - strlen ("name"), - (void *)&service_name, - NULL); - +- - objdb->object_key_get (object_service_handle, - "ver", - strlen ("ver"), @@ -171,13 +180,14 @@ - - openais_service_unlink_common( - objdb, object_service_handle, service_name, *service_ver); + +- objdb->object_destroy (object_service_handle); + int lpc = SERVICE_HANDLER_MAXIMUM_COUNT - 1; + for( ; lpc >= 0; lpc--) { + if(ais_service[lpc] != NULL) { + /* unload */ + log_printf(LOG_LEVEL_NOTICE, "Unloading slot %d: %s\n", ais_service[lpc]->id, ais_service[lpc]->name); - -- objdb->object_destroy (object_service_handle); ++ objdb->object_find_reset (OBJECT_PARENT_HANDLE); + while (objdb->object_find (OBJECT_PARENT_HANDLE, + "service", strlen ("service"), &object_service_handle) == 0) { @@ -204,9 +214,10 @@ } return (0); -diff -r f3557238ec7d -r 56ecfd378ab4 exec/totemip.c ---- a/exec/totemip.c -+++ b/exec/totemip.c +Index: openais/exec/totemip.c +=================================================================== +--- openais.orig/exec/totemip.c ++++ openais/exec/totemip.c @@ -445,6 +445,8 @@ int totemip_iface_check(struct totem_ip_ struct totem_ip_address ipaddr; static char rcvbuf[NETLINK_BUFSIZE]; @@ -216,9 +227,10 @@ *interface_up = 0; *interface_num = 0; memset(&ipaddr, 0, sizeof(ipaddr)); -diff -r f3557238ec7d -r 56ecfd378ab4 exec/totemsrp.c ---- a/exec/totemsrp.c -+++ b/exec/totemsrp.c +Index: openais/exec/totemsrp.c +=================================================================== +--- openais.orig/exec/totemsrp.c ++++ openais/exec/totemsrp.c @@ -2569,9 +2569,12 @@ static int token_hold_cancel_send (struc token_hold_cancel.header.nodeid = instance->my_id.addr[0].nodeid; assert (token_hold_cancel.header.nodeid); @@ -232,9 +244,10 @@ iovec[1].iov_base = &instance->my_ring_id; iovec[1].iov_len = sizeof (struct memb_ring_id); -diff -r f3557238ec7d -r 56ecfd378ab4 lib/util.c ---- a/lib/util.c -+++ b/lib/util.c +Index: openais/lib/util.c +=================================================================== +--- openais.orig/lib/util.c ++++ openais/lib/util.c @@ -114,6 +114,9 @@ openais_send ( char *rbuf = (char *)msg; int processed = 0; ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Remember to have fun... -- To unsubscribe, e-mail: opensuse-commit+unsubscribe@opensuse.org For additional commands, e-mail: opensuse-commit+help@opensuse.org
participants (1)
-
root@Hilbert.suse.de