pacemaker  1.1.17-b36b869ca8
Scalable High-Availability cluster resource manager
membership.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 #include <crm_internal.h>
19 
20 #ifndef _GNU_SOURCE
21 # define _GNU_SOURCE
22 #endif
23 
24 #include <sys/param.h>
25 #include <sys/types.h>
26 #include <stdio.h>
27 #include <unistd.h>
28 #include <string.h>
29 #include <glib.h>
30 #include <crm/common/ipc.h>
31 #include <crm/cluster/internal.h>
32 #include <crm/msg_xml.h>
33 #include <crm/stonith-ng.h>
34 
35 #define s_if_plural(i) (((i) == 1)? "" : "s")
36 
37 /* The peer cache remembers cluster nodes that have been seen.
38  * This is managed mostly automatically by libcluster, based on
39  * cluster membership events.
40  *
41  * Because cluster nodes can have conflicting names or UUIDs,
42  * the hash table key is a uniquely generated ID.
43  */
44 GHashTable *crm_peer_cache = NULL;
45 
46 /*
47  * The remote peer cache tracks pacemaker_remote nodes. While the
48  * value has the same type as the peer cache's, it is tracked separately for
49  * three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs,
50  * so the name (which is also the UUID) is used as the hash table key; there
51  * is no equivalent of membership events, so management is not automatic; and
52  * most users of the peer cache need to exclude pacemaker_remote nodes.
53  *
54  * That said, using a single cache would be more logical and less error-prone,
55  * so it would be a good idea to merge them one day.
56  *
57  * libcluster provides two avenues for populating the cache:
58  * crm_remote_peer_get(), crm_remote_peer_cache_add() and
59  * crm_remote_peer_cache_remove() directly manage it,
60  * while crm_remote_peer_cache_refresh() populates it via the CIB.
61  */
62 GHashTable *crm_remote_peer_cache = NULL;
63 
64 unsigned long long crm_peer_seq = 0;
65 gboolean crm_have_quorum = FALSE;
66 static gboolean crm_autoreap = TRUE;
67 
68 int
70 {
71  if (crm_remote_peer_cache == NULL) {
72  return 0;
73  }
74  return g_hash_table_size(crm_remote_peer_cache);
75 }
76 
88 crm_node_t *
89 crm_remote_peer_get(const char *node_name)
90 {
91  crm_node_t *node;
92 
93  if (node_name == NULL) {
94  errno = -EINVAL;
95  return NULL;
96  }
97 
98  /* Return existing cache entry if one exists */
99  node = g_hash_table_lookup(crm_remote_peer_cache, node_name);
100  if (node) {
101  return node;
102  }
103 
104  /* Allocate a new entry */
105  node = calloc(1, sizeof(crm_node_t));
106  if (node == NULL) {
107  return NULL;
108  }
109 
110  /* Populate the essential information */
111  node->flags = crm_remote_node;
112  node->uuid = strdup(node_name);
113  if (node->uuid == NULL) {
114  free(node);
115  errno = -ENOMEM;
116  return NULL;
117  }
118 
119  /* Add the new entry to the cache */
120  g_hash_table_replace(crm_remote_peer_cache, node->uuid, node);
121  crm_trace("added %s to remote cache", node_name);
122 
123  /* Update the entry's uname, ensuring peer status callbacks are called */
124  crm_update_peer_uname(node, node_name);
125  return node;
126 }
127 
136 void
137 crm_remote_peer_cache_add(const char *node_name)
138 {
139  CRM_ASSERT(crm_remote_peer_get(node_name) != NULL);
140 }
141 
142 void
143 crm_remote_peer_cache_remove(const char *node_name)
144 {
145  if (g_hash_table_remove(crm_remote_peer_cache, node_name)) {
146  crm_trace("removed %s from remote peer cache", node_name);
147  }
148 }
149 
161 static const char *
162 remote_state_from_cib(xmlNode *node_state)
163 {
164  const char *status;
165 
166  status = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
167  if (status && !crm_is_true(status)) {
168  status = CRM_NODE_LOST;
169  } else {
170  status = CRM_NODE_MEMBER;
171  }
172  return status;
173 }
174 
175 /* user data for looping through remote node xpath searches */
176 struct refresh_data {
177  const char *field; /* XML attribute to check for node name */
178  gboolean has_state; /* whether to update node state based on XML */
179 };
180 
188 static void
189 remote_cache_refresh_helper(xmlNode *result, void *user_data)
190 {
191  struct refresh_data *data = user_data;
192  const char *remote = crm_element_value(result, data->field);
193  const char *state = NULL;
194  crm_node_t *node;
195 
196  CRM_CHECK(remote != NULL, return);
197 
198  /* Determine node's state, if the result has it */
199  if (data->has_state) {
200  state = remote_state_from_cib(result);
201  }
202 
203  /* Check whether cache already has entry for node */
204  node = g_hash_table_lookup(crm_remote_peer_cache, remote);
205 
206  if (node == NULL) {
207  /* Node is not in cache, so add a new entry for it */
208  node = crm_remote_peer_get(remote);
209  CRM_ASSERT(node);
210  if (state) {
211  crm_update_peer_state(__FUNCTION__, node, state, 0);
212  }
213 
214  } else if (is_set(node->flags, crm_node_dirty)) {
215  /* Node is in cache and hasn't been updated already, so mark it clean */
217  if (state) {
218  crm_update_peer_state(__FUNCTION__, node, state, 0);
219  }
220  }
221 }
222 
223 static void
224 mark_dirty(gpointer key, gpointer value, gpointer user_data)
225 {
226  set_bit(((crm_node_t*)value)->flags, crm_node_dirty);
227 }
228 
229 static gboolean
230 is_dirty(gpointer key, gpointer value, gpointer user_data)
231 {
232  return is_set(((crm_node_t*)value)->flags, crm_node_dirty);
233 }
234 
235 /* search string to find CIB resources entries for guest nodes */
236 #define XPATH_GUEST_NODE_CONFIG \
237  "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \
238  "//" XML_TAG_META_SETS "//" XML_CIB_TAG_NVPAIR \
239  "[@name='" XML_RSC_ATTR_REMOTE_NODE "']"
240 
241 /* search string to find CIB resources entries for remote nodes */
242 #define XPATH_REMOTE_NODE_CONFIG \
243  "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \
244  "[@type='remote'][@provider='pacemaker']"
245 
246 /* search string to find CIB node status entries for pacemaker_remote nodes */
247 #define XPATH_REMOTE_NODE_STATUS \
248  "//" XML_TAG_CIB "//" XML_CIB_TAG_STATUS "//" XML_CIB_TAG_STATE \
249  "[@" XML_NODE_IS_REMOTE "='true']"
250 
256 void
258 {
259  struct refresh_data data;
260 
261  crm_peer_init();
262 
263  /* First, we mark all existing cache entries as dirty,
264  * so that later we can remove any that weren't in the CIB.
265  * We don't empty the cache, because we need to detect changes in state.
266  */
267  g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL);
268 
269  /* Look for guest nodes and remote nodes in the status section */
270  data.field = "id";
271  data.has_state = TRUE;
273  remote_cache_refresh_helper, &data);
274 
275  /* Look for guest nodes and remote nodes in the configuration section,
276  * because they may have just been added and not have a status entry yet.
277  * In that case, the cached node state will be left NULL, so that the
278  * peer status callback isn't called until we're sure the node started
279  * successfully.
280  */
281  data.field = "value";
282  data.has_state = FALSE;
284  remote_cache_refresh_helper, &data);
285  data.field = "id";
286  data.has_state = FALSE;
288  remote_cache_refresh_helper, &data);
289 
290  /* Remove all old cache entries that weren't seen in the CIB */
291  g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL);
292 }
293 
294 gboolean
296 {
297  if(node == NULL) {
298  return FALSE;
299  }
300 
301  if (is_set(node->flags, crm_remote_node)) {
302  /* remote nodes are never considered active members. This
303  * guarantees they will never be considered for DC membership.*/
304  return FALSE;
305  }
306 #if SUPPORT_COROSYNC
307  if (is_openais_cluster()) {
308  return crm_is_corosync_peer_active(node);
309  }
310 #endif
311 #if SUPPORT_HEARTBEAT
312  if (is_heartbeat_cluster()) {
313  return crm_is_heartbeat_peer_active(node);
314  }
315 #endif
316  crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type()));
317  return FALSE;
318 }
319 
320 static gboolean
321 crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data)
322 {
323  crm_node_t *node = value;
324  crm_node_t *search = user_data;
325 
326  if (search == NULL) {
327  return FALSE;
328 
329  } else if (search->id && node->id != search->id) {
330  return FALSE;
331 
332  } else if (search->id == 0 && safe_str_neq(node->uname, search->uname)) {
333  return FALSE;
334 
335  } else if (crm_is_peer_active(value) == FALSE) {
336  crm_info("Removing node with name %s and id %u from membership cache",
337  (node->uname? node->uname : "unknown"), node->id);
338  return TRUE;
339  }
340  return FALSE;
341 }
342 
351 guint
352 reap_crm_member(uint32_t id, const char *name)
353 {
354  int matches = 0;
355  crm_node_t search;
356 
357  if (crm_peer_cache == NULL) {
358  crm_trace("Membership cache not initialized, ignoring purge request");
359  return 0;
360  }
361 
362  search.id = id;
363  search.uname = name ? strdup(name) : NULL;
364  matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, &search);
365  if(matches) {
366  crm_notice("Purged %d peer%s with id=%u%s%s from the membership cache",
367  matches, s_if_plural(matches), search.id,
368  (search.uname? " and/or uname=" : ""),
369  (search.uname? search.uname : ""));
370 
371  } else {
372  crm_info("No peers with id=%u%s%s to purge from the membership cache",
373  search.id, (search.uname? " and/or uname=" : ""),
374  (search.uname? search.uname : ""));
375  }
376 
377  free(search.uname);
378  return matches;
379 }
380 
381 static void
382 crm_count_peer(gpointer key, gpointer value, gpointer user_data)
383 {
384  guint *count = user_data;
385  crm_node_t *node = value;
386 
387  if (crm_is_peer_active(node)) {
388  *count = *count + 1;
389  }
390 }
391 
392 guint
394 {
395  guint count = 0;
396 
397  if (crm_peer_cache) {
398  g_hash_table_foreach(crm_peer_cache, crm_count_peer, &count);
399  }
400  return count;
401 }
402 
403 static void
404 destroy_crm_node(gpointer data)
405 {
406  crm_node_t *node = data;
407 
408  crm_trace("Destroying entry for node %u: %s", node->id, node->uname);
409 
410  free(node->addr);
411  free(node->uname);
412  free(node->state);
413  free(node->uuid);
414  free(node->expected);
415  free(node);
416 }
417 
418 void
420 {
421  if (crm_peer_cache == NULL) {
422  crm_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, free, destroy_crm_node);
423  }
424 
425  if (crm_remote_peer_cache == NULL) {
426  crm_remote_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, destroy_crm_node);
427  }
428 }
429 
430 void
432 {
433  if (crm_peer_cache != NULL) {
434  crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache));
435  g_hash_table_destroy(crm_peer_cache);
436  crm_peer_cache = NULL;
437  }
438 
439  if (crm_remote_peer_cache != NULL) {
440  crm_trace("Destroying remote peer cache with %d members", g_hash_table_size(crm_remote_peer_cache));
441  g_hash_table_destroy(crm_remote_peer_cache);
442  crm_remote_peer_cache = NULL;
443  }
444 }
445 
446 void (*crm_status_callback) (enum crm_status_type, crm_node_t *, const void *) = NULL;
447 
458 void
459 crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *))
460 {
461  crm_status_callback = dispatch;
462 }
463 
475 void
476 crm_set_autoreap(gboolean autoreap)
477 {
478  crm_autoreap = autoreap;
479 }
480 
481 static void crm_dump_peer_hash(int level, const char *caller)
482 {
483  GHashTableIter iter;
484  const char *id = NULL;
485  crm_node_t *node = NULL;
486 
487  g_hash_table_iter_init(&iter, crm_peer_cache);
488  while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) {
489  do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id);
490  }
491 }
492 
493 static gboolean crm_hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
494 {
495  if(value == user_data) {
496  return TRUE;
497  }
498  return FALSE;
499 }
500 
501 crm_node_t *
502 crm_find_peer_full(unsigned int id, const char *uname, int flags)
503 {
504  crm_node_t *node = NULL;
505 
506  CRM_ASSERT(id > 0 || uname != NULL);
507 
508  crm_peer_init();
509 
510  if (flags & CRM_GET_PEER_REMOTE) {
511  node = g_hash_table_lookup(crm_remote_peer_cache, uname);
512  }
513 
514  if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) {
515  node = crm_find_peer(id, uname);
516  }
517  return node;
518 }
519 
520 crm_node_t *
521 crm_get_peer_full(unsigned int id, const char *uname, int flags)
522 {
523  crm_node_t *node = NULL;
524 
525  CRM_ASSERT(id > 0 || uname != NULL);
526 
527  crm_peer_init();
528 
529  if (flags & CRM_GET_PEER_REMOTE) {
530  node = g_hash_table_lookup(crm_remote_peer_cache, uname);
531  }
532 
533  if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) {
534  node = crm_get_peer(id, uname);
535  }
536  return node;
537 }
538 
539 crm_node_t *
540 crm_find_peer(unsigned int id, const char *uname)
541 {
542  GHashTableIter iter;
543  crm_node_t *node = NULL;
544  crm_node_t *by_id = NULL;
545  crm_node_t *by_name = NULL;
546 
547  CRM_ASSERT(id > 0 || uname != NULL);
548 
549  crm_peer_init();
550 
551  if (uname != NULL) {
552  g_hash_table_iter_init(&iter, crm_peer_cache);
553  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
554  if(node->uname && strcasecmp(node->uname, uname) == 0) {
555  crm_trace("Name match: %s = %p", node->uname, node);
556  by_name = node;
557  break;
558  }
559  }
560  }
561 
562  if (id > 0) {
563  g_hash_table_iter_init(&iter, crm_peer_cache);
564  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
565  if(node->id == id) {
566  crm_trace("ID match: %u = %p", node->id, node);
567  by_id = node;
568  break;
569  }
570  }
571  }
572 
573  node = by_id; /* Good default */
574  if(by_id == by_name) {
575  /* Nothing to do if they match (both NULL counts) */
576  crm_trace("Consistent: %p for %u/%s", by_id, id, uname);
577 
578  } else if(by_id == NULL && by_name) {
579  crm_trace("Only one: %p for %u/%s", by_name, id, uname);
580 
581  if(id && by_name->id) {
582  crm_dump_peer_hash(LOG_WARNING, __FUNCTION__);
583  crm_crit("Node %u and %u share the same name '%s'",
584  id, by_name->id, uname);
585  node = NULL; /* Create a new one */
586 
587  } else {
588  node = by_name;
589  }
590 
591  } else if(by_name == NULL && by_id) {
592  crm_trace("Only one: %p for %u/%s", by_id, id, uname);
593 
594  if(uname && by_id->uname) {
595  crm_dump_peer_hash(LOG_WARNING, __FUNCTION__);
596  crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct",
597  uname, by_id->uname, id, uname);
598  }
599 
600  } else if(uname && by_id->uname) {
601  if(safe_str_eq(uname, by_id->uname)) {
602  crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id);
603  g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name);
604 
605  } else {
606  crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname);
607  crm_dump_peer_hash(LOG_INFO, __FUNCTION__);
608  crm_abort(__FILE__, __FUNCTION__, __LINE__, "member weirdness", TRUE, TRUE);
609  }
610 
611  } else if(id && by_name->id) {
612  crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname);
613 
614  } else {
615  /* Simple merge */
616 
617  /* Only corosync based clusters use nodeid's
618  *
619  * The functions that call crm_update_peer_state() only know nodeid
620  * so 'by_id' is authorative when merging
621  *
622  * Same for crm_update_peer_proc()
623  */
624  crm_dump_peer_hash(LOG_DEBUG, __FUNCTION__);
625 
626  crm_info("Merging %p into %p", by_name, by_id);
627  g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name);
628  }
629 
630  return node;
631 }
632 
633 #if SUPPORT_COROSYNC
634 static guint
635 crm_remove_conflicting_peer(crm_node_t *node)
636 {
637  int matches = 0;
638  GHashTableIter iter;
639  crm_node_t *existing_node = NULL;
640 
641  if (node->id == 0 || node->uname == NULL) {
642  return 0;
643  }
644 
645 # if !SUPPORT_PLUGIN
646  if (corosync_cmap_has_config("nodelist") != 0) {
647  return 0;
648  }
649 # endif
650 
651  g_hash_table_iter_init(&iter, crm_peer_cache);
652  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) {
653  if (existing_node->id > 0
654  && existing_node->id != node->id
655  && existing_node->uname != NULL
656  && strcasecmp(existing_node->uname, node->uname) == 0) {
657 
658  if (crm_is_peer_active(existing_node)) {
659  continue;
660  }
661 
662  crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u",
663  existing_node->id, existing_node->uname, node->id);
664 
665  g_hash_table_iter_remove(&iter);
666  matches++;
667  }
668  }
669 
670  return matches;
671 }
672 #endif
673 
674 /* coverity[-alloc] Memory is referenced in one or both hashtables */
675 crm_node_t *
676 crm_get_peer(unsigned int id, const char *uname)
677 {
678  crm_node_t *node = NULL;
679  char *uname_lookup = NULL;
680 
681  CRM_ASSERT(id > 0 || uname != NULL);
682 
683  crm_peer_init();
684 
685  node = crm_find_peer(id, uname);
686 
687  /* if uname wasn't provided, and find_peer did not turn up a uname based on id.
688  * we need to do a lookup of the node name using the id in the cluster membership. */
689  if ((node == NULL || node->uname == NULL) && (uname == NULL)) {
690  uname_lookup = get_node_name(id);
691  }
692 
693  if (uname_lookup) {
694  uname = uname_lookup;
695  crm_trace("Inferred a name of '%s' for node %u", uname, id);
696 
697  /* try to turn up the node one more time now that we know the uname. */
698  if (node == NULL) {
699  node = crm_find_peer(id, uname);
700  }
701  }
702 
703 
704  if (node == NULL) {
705  char *uniqueid = crm_generate_uuid();
706 
707  node = calloc(1, sizeof(crm_node_t));
708  CRM_ASSERT(node);
709 
710  crm_info("Created entry %s/%p for node %s/%u (%d total)",
711  uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache));
712  g_hash_table_replace(crm_peer_cache, uniqueid, node);
713  }
714 
715  if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) {
716  crm_info("Node %u is now known as %s", id, uname);
717  }
718 
719  if(id > 0 && node->id == 0) {
720  node->id = id;
721  }
722 
723  if (uname && (node->uname == NULL)) {
724  crm_update_peer_uname(node, uname);
725  }
726 
727  if(node->uuid == NULL) {
728  const char *uuid = crm_peer_uuid(node);
729 
730  if (uuid) {
731  crm_info("Node %u has uuid %s", id, uuid);
732 
733  } else {
734  crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname);
735  }
736  }
737 
738  free(uname_lookup);
739 
740  return node;
741 }
742 
754 crm_node_t *
755 crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t seen, int32_t votes,
756  uint32_t children, const char *uuid, const char *uname, const char *addr,
757  const char *state)
758 {
759 #if SUPPORT_PLUGIN
760  gboolean addr_changed = FALSE;
761  gboolean votes_changed = FALSE;
762 #endif
763  crm_node_t *node = NULL;
764 
765  id = get_corosync_id(id, uuid);
766  node = crm_get_peer(id, uname);
767 
768  CRM_ASSERT(node != NULL);
769 
770  if (node->uuid == NULL) {
771  if (is_openais_cluster()) {
772  /* Yes, overrule whatever was passed in */
773  crm_peer_uuid(node);
774 
775  } else if (uuid != NULL) {
776  node->uuid = strdup(uuid);
777  }
778  }
779 
780  if (children > 0) {
781  if (crm_update_peer_proc(source, node, children, state) == NULL) {
782  return NULL;
783  }
784  }
785 
786  if (state != NULL) {
787  if (crm_update_peer_state(source, node, state, seen) == NULL) {
788  return NULL;
789  }
790  }
791 #if SUPPORT_HEARTBEAT
792  if (born != 0) {
793  node->born = born;
794  }
795 #endif
796 
797 #if SUPPORT_PLUGIN
798  /* These were only used by the plugin */
799  if (born != 0) {
800  node->born = born;
801  }
802 
803  if (votes > 0 && node->votes != votes) {
804  votes_changed = TRUE;
805  node->votes = votes;
806  }
807 
808  if (addr != NULL) {
809  if (node->addr == NULL || crm_str_eq(node->addr, addr, FALSE) == FALSE) {
810  addr_changed = TRUE;
811  free(node->addr);
812  node->addr = strdup(addr);
813  }
814  }
815  if (addr_changed || votes_changed) {
816  crm_info("%s: Node %s: id=%u state=%s addr=%s%s votes=%d%s born=" U64T " seen=" U64T
817  " proc=%.32x", source, node->uname, node->id, node->state,
818  node->addr, addr_changed ? " (new)" : "", node->votes,
819  votes_changed ? " (new)" : "", node->born, node->last_seen, node->processes);
820  }
821 #endif
822 
823  return node;
824 }
825 
837 void
839 {
840  int i, len = strlen(uname);
841 
842  for (i = 0; i < len; i++) {
843  if (uname[i] >= 'A' && uname[i] <= 'Z') {
844  crm_warn("Node names with capitals are discouraged, consider changing '%s'",
845  uname);
846  break;
847  }
848  }
849 
850  free(node->uname);
851  node->uname = strdup(uname);
852  if (crm_status_callback) {
854  }
855 
856 #if SUPPORT_COROSYNC
857  if (is_openais_cluster() && !is_set(node->flags, crm_remote_node)) {
858  crm_remove_conflicting_peer(node);
859  }
860 #endif
861 }
862 
879 crm_node_t *
880 crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status)
881 {
882  uint32_t last = 0;
883  gboolean changed = FALSE;
884 
885  CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL",
886  source, peer2text(flag), status); return NULL);
887 
888  /* Pacemaker doesn't spawn processes on remote nodes */
889  if (is_set(node->flags, crm_remote_node)) {
890  return node;
891  }
892 
893  last = node->processes;
894  if (status == NULL) {
895  node->processes = flag;
896  if (node->processes != last) {
897  changed = TRUE;
898  }
899 
900  } else if (safe_str_eq(status, ONLINESTATUS)) {
901  if ((node->processes & flag) != flag) {
902  set_bit(node->processes, flag);
903  changed = TRUE;
904  }
905 #if SUPPORT_PLUGIN
906  } else if (safe_str_eq(status, CRM_NODE_MEMBER)) {
907  if (flag > 0 && node->processes != flag) {
908  node->processes = flag;
909  changed = TRUE;
910  }
911 #endif
912 
913  } else if (node->processes & flag) {
914  clear_bit(node->processes, flag);
915  changed = TRUE;
916  }
917 
918  if (changed) {
919  if (status == NULL && flag <= crm_proc_none) {
920  crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname,
921  node->id);
922  } else {
923  crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id,
924  peer2text(flag), status);
925  }
926 
927  /* Call the client callback first, then update the peer state,
928  * in case the node will be reaped
929  */
930  if (crm_status_callback) {
932  }
933 
934  /* The client callback shouldn't touch the peer caches,
935  * but as a safety net, bail if the peer cache was destroyed.
936  */
937  if (crm_peer_cache == NULL) {
938  return NULL;
939  }
940 
941  if (crm_autoreap) {
942  node = crm_update_peer_state(__FUNCTION__, node,
943  is_set(node->processes, crm_get_cluster_proc())?
945  }
946  } else {
947  crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id,
948  peer2text(flag), status);
949  }
950  return node;
951 }
952 
953 void
954 crm_update_peer_expected(const char *source, crm_node_t * node, const char *expected)
955 {
956  char *last = NULL;
957  gboolean changed = FALSE;
958 
959  CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected);
960  return);
961 
962  /* Remote nodes don't participate in joins */
963  if (is_set(node->flags, crm_remote_node)) {
964  return;
965  }
966 
967  last = node->expected;
968  if (expected != NULL && safe_str_neq(node->expected, expected)) {
969  node->expected = strdup(expected);
970  changed = TRUE;
971  }
972 
973  if (changed) {
974  crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id,
975  expected, last);
976  free(last);
977  } else {
978  crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname,
979  node->id, expected);
980  }
981 }
982 
999 static crm_node_t *
1000 crm_update_peer_state_iter(const char *source, crm_node_t * node, const char *state, int membership, GHashTableIter *iter)
1001 {
1002  gboolean is_member;
1003 
1004  CRM_CHECK(node != NULL,
1005  crm_err("Could not set state for unknown host to %s"
1006  CRM_XS " source=%s", state, source);
1007  return NULL);
1008 
1009  is_member = safe_str_eq(state, CRM_NODE_MEMBER);
1010  if (membership && is_member) {
1011  node->last_seen = membership;
1012  }
1013 
1014  if (state && safe_str_neq(node->state, state)) {
1015  char *last = node->state;
1016  enum crm_status_type status_type = is_set(node->flags, crm_remote_node)?
1018 
1019  node->state = strdup(state);
1020  crm_notice("Node %s state is now %s " CRM_XS
1021  " nodeid=%u previous=%s source=%s", node->uname, state,
1022  node->id, (last? last : "unknown"), source);
1023  if (crm_status_callback) {
1024  crm_status_callback(status_type, node, last);
1025  }
1026  free(last);
1027 
1028  if (crm_autoreap && !is_member && !is_set(node->flags, crm_remote_node)) {
1029  /* We only autoreap from the peer cache, not the remote peer cache,
1030  * because the latter should be managed only by
1031  * crm_remote_peer_cache_refresh().
1032  */
1033  if(iter) {
1034  crm_notice("Purged 1 peer with id=%u and/or uname=%s from the membership cache", node->id, node->uname);
1035  g_hash_table_iter_remove(iter);
1036 
1037  } else {
1038  reap_crm_member(node->id, node->uname);
1039  }
1040  node = NULL;
1041  }
1042 
1043  } else {
1044  crm_trace("Node %s state is unchanged (%s) " CRM_XS
1045  " nodeid=%u source=%s", node->uname, state, node->id, source);
1046  }
1047  return node;
1048 }
1049 
1065 crm_node_t *
1066 crm_update_peer_state(const char *source, crm_node_t * node, const char *state, int membership)
1067 {
1068  return crm_update_peer_state_iter(source, node, state, membership, NULL);
1069 }
1070 
1077 void
1078 crm_reap_unseen_nodes(uint64_t membership)
1079 {
1080  GHashTableIter iter;
1081  crm_node_t *node = NULL;
1082 
1083  crm_trace("Reaping unseen nodes...");
1084  g_hash_table_iter_init(&iter, crm_peer_cache);
1085  while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) {
1086  if (node->last_seen != membership) {
1087  if (node->state) {
1088  /*
1089  * Calling crm_update_peer_state_iter() allows us to
1090  * remove the node from crm_peer_cache without
1091  * invalidating our iterator
1092  */
1093  crm_update_peer_state_iter(__FUNCTION__, node, CRM_NODE_LOST, membership, &iter);
1094 
1095  } else {
1096  crm_info("State of node %s[%u] is still unknown",
1097  node->uname, node->id);
1098  }
1099  }
1100  }
1101 }
1102 
1103 int
1104 crm_terminate_member(int nodeid, const char *uname, void *unused)
1105 {
1106  /* Always use the synchronous, non-mainloop version */
1107  return stonith_api_kick(nodeid, uname, 120, TRUE);
1108 }
1109 
1110 int
1111 crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
1112 {
1113  return stonith_api_kick(nodeid, uname, 120, TRUE);
1114 }
uint32_t votes
Definition: internal.h:50
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:164
void crm_remote_peer_cache_add(const char *node_name)
Add a node to the remote peer cache.
Definition: membership.c:137
void crm_reap_unseen_nodes(uint64_t membership)
Definition: membership.c:1078
#define crm_notice(fmt, args...)
Definition: logging.h:250
#define CRM_NODE_LOST
Definition: cluster.h:43
#define XPATH_REMOTE_NODE_STATUS
Definition: membership.c:247
GHashTable * crm_peer_cache
Definition: membership.c:44
gboolean is_openais_cluster(void)
Definition: cluster.c:630
#define crm_crit(fmt, args...)
Definition: logging.h:247
gboolean safe_str_neq(const char *a, const char *b)
Definition: strings.c:150
char * crm_generate_uuid(void)
Definition: utils.c:1952
uint64_t flags
Definition: cluster.h:76
void crm_peer_destroy(void)
Definition: membership.c:431
uint32_t id
Definition: cluster.h:73
gboolean is_heartbeat_cluster(void)
Definition: cluster.c:645
uint64_t born
Definition: cluster.h:74
char * uuid
Definition: cluster.h:83
int stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off)
Definition: st_client.c:2550
crm_node_t * crm_find_peer(unsigned int id, const char *uname)
Definition: membership.c:540
int get_corosync_id(int id, const char *uuid)
Definition: cluster.c:96
gboolean crm_have_quorum
Definition: membership.c:65
crm_node_t * crm_find_peer_full(unsigned int id, const char *uname, int flags)
Definition: membership.c:502
GHashTable * crm_remote_peer_cache
Definition: membership.c:62
char * addr
Definition: cluster.h:87
#define clear_bit(word, bit)
Definition: crm_internal.h:193
unsigned long long crm_peer_seq
Definition: membership.c:64
char * get_node_name(uint32_t nodeid)
Definition: cluster.c:301
void crm_set_autoreap(gboolean autoreap)
Tell the library whether to automatically reap lost nodes.
Definition: membership.c:476
void crm_peer_init(void)
Definition: membership.c:419
void crm_remote_peer_cache_remove(const char *node_name)
Definition: membership.c:143
gboolean crm_is_corosync_peer_active(const crm_node_t *node)
Definition: corosync.c:468
char uname[MAX_NAME]
Definition: internal.h:53
int crm_remote_peer_cache_size(void)
Definition: membership.c:69
#define crm_warn(fmt, args...)
Definition: logging.h:249
#define set_bit(word, bit)
Definition: crm_internal.h:192
uint32_t processes
Definition: cluster.h:79
crm_node_t * crm_get_peer_full(unsigned int id, const char *uname, int flags)
Definition: membership.c:521
crm_node_t * crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t seen, int32_t votes, uint32_t children, const char *uuid, const char *uname, const char *addr, const char *state)
Definition: membership.c:755
guint reap_crm_member(uint32_t id, const char *name)
Remove all peer cache entries matching a node ID and/or uname.
Definition: membership.c:352
gboolean crm_is_peer_active(const crm_node_t *node)
Definition: membership.c:295
uint32_t id
Definition: internal.h:48
guint crm_strcase_hash(gconstpointer v)
Definition: strings.c:280
#define XPATH_GUEST_NODE_CONFIG
Definition: membership.c:236
crm_status_type
Definition: cluster.h:198
void crm_update_peer_expected(const char *source, crm_node_t *node, const char *expected)
Definition: membership.c:954
#define crm_trace(fmt, args...)
Definition: logging.h:254
#define do_crm_log(level, fmt, args...)
Log a message.
Definition: logging.h:129
const char * crm_element_value(xmlNode *data, const char *name)
Definition: xml.c:5134
int corosync_cmap_has_config(const char *prefix)
Definition: corosync.c:594
#define CRM_NODE_MEMBER
Definition: cluster.h:44
void crm_update_peer_uname(crm_node_t *node, const char *uname)
Definition: membership.c:838
gboolean crm_str_eq(const char *a, const char *b, gboolean use_case)
Definition: strings.c:213
void crm_set_status_callback(void(*dispatch)(enum crm_status_type, crm_node_t *, const void *))
Set a client function that will be called after peer status changes.
Definition: membership.c:459
const char * name_for_cluster_type(enum cluster_type_e type)
Definition: cluster.c:468
int crm_terminate_member(int nodeid, const char *uname, void *unused)
Definition: membership.c:1104
char * expected
Definition: cluster.h:85
void(* crm_status_callback)(enum crm_status_type, crm_node_t *, const void *)
Definition: membership.c:446
#define CRM_XS
Definition: logging.h:42
void crm_remote_peer_cache_refresh(xmlNode *cib)
Repopulate the remote peer cache based on CIB XML.
Definition: membership.c:257
guint crm_active_peers(void)
Definition: membership.c:393
crm_node_t * crm_remote_peer_get(const char *node_name)
Get a remote node peer cache entry, creating it if necessary.
Definition: membership.c:89
#define crm_err(fmt, args...)
Definition: logging.h:248
Fencing aka. STONITH.
#define uint32_t
Definition: stdint.in.h:158
int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
Definition: membership.c:1111
#define CRM_ASSERT(expr)
Definition: error.h:35
char data[0]
Definition: internal.h:58
char * state
Definition: cluster.h:84
void crm_foreach_xpath_result(xmlNode *xml, const char *xpath, void(*helper)(xmlNode *, void *), void *user_data)
Run a supplied function for each result of an xpath search.
Definition: xpath.c:179
#define U64T
Definition: config.h:753
Wrappers for and extensions to libqb IPC.
crm_node_t * crm_update_peer_proc(const char *source, crm_node_t *node, uint32_t flag, const char *status)
Definition: membership.c:880
int32_t votes
Definition: cluster.h:78
char * uname
Definition: cluster.h:82
uint64_t last_seen
Definition: cluster.h:75
#define XML_NODE_IN_CLUSTER
Definition: msg_xml.h:258
gboolean crm_is_true(const char *s)
Definition: strings.c:165
#define safe_str_eq(a, b)
Definition: util.h:64
#define ONLINESTATUS
Definition: util.h:49
void crm_abort(const char *file, const char *function, int line, const char *condition, gboolean do_core, gboolean do_fork)
Definition: utils.c:970
crm_node_t * crm_get_peer(unsigned int id, const char *uname)
Definition: membership.c:676
#define XPATH_REMOTE_NODE_CONFIG
Definition: membership.c:242
#define crm_info(fmt, args...)
Definition: logging.h:251
const char * crm_peer_uuid(crm_node_t *node)
Definition: cluster.c:135
uint64_t flags
Definition: remote.c:156
#define s_if_plural(i)
Definition: membership.c:35
#define int32_t
Definition: stdint.in.h:157
enum cluster_type_e get_cluster_type(void)
Definition: cluster.c:513
crm_node_t * crm_update_peer_state(const char *source, crm_node_t *node, const char *state, int membership)
Update a node&#39;s state and membership information.
Definition: membership.c:1066