diff --git a/ansible/roles/rabbitmq/defaults/main.yml b/ansible/roles/rabbitmq/defaults/main.yml index ee983d672c2dd2c999941fae63b4372d562970a4..4c059eadb00a80fc498c044f7a2b153f5600c7a5 100644 --- a/ansible/roles/rabbitmq/defaults/main.yml +++ b/ansible/roles/rabbitmq/defaults/main.yml @@ -89,10 +89,16 @@ rabbitmq_cluster_partition_handling: "pause_minority" # More details see: # https://www.rabbitmq.com/ha.html#promoting-unsynchronised-mirrors rabbitmq_ha_promote_on_shutdown: +# The number of rabbitmq replicas should follow this advice: +# https://www.rabbitmq.com/ha.html#replication-factor +# This means, if you have three rabbit nodes, we request two +# replicas of all queues and exchanges. +# Note: this assumes an odd number of rabbitmq nodes. # If no replica count is specified, replicates across all nodes with definition # "ha-mode":"all". Otherwise, uses # "ha-mode":"exactly","ha-params":{{ rabbitmq_ha_replica_count | int }} -rabbitmq_ha_replica_count: +rabbitmq_server_count: "{{ groups[role_rabbitmq_groups] | length }}" +rabbitmq_ha_replica_count: "{{ (rabbitmq_server_count | int // 2 + 1) }}" rabbitmq_extra_config: {} #################### diff --git a/releasenotes/notes/rabbitmq-ha-improve-performance-8f29c7657d2999dd.yaml b/releasenotes/notes/rabbitmq-ha-improve-performance-8f29c7657d2999dd.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a21f7061b493173c6a07fe59bf1b27d86ee17749 --- /dev/null +++ b/releasenotes/notes/rabbitmq-ha-improve-performance-8f29c7657d2999dd.yaml @@ -0,0 +1,18 @@ +--- +upgrade: + - | + RabbitMQ replica count has changed from n to (n//2+1) where n is the number + of RabbitMQ nodes. That is, for a 3 node clusters, we request exactly 2 + replicas, for a 1 node cluster, we request 1 replica, and for a 5 node + cluster, we request 3 replicas. This only has an effect if + `om_enable_rabbitmq_high_availability` is set to `True`, otherwise queues + are not replicated. The number of mirrored queues is not changed + automatically, and instead requires the queues to be recreated (for + example, by restarting RabbitMQ). + This follows the good practice advice here: + https://www.rabbitmq.com/ha.html#replication-factor + A major motivation is to reduce the load on RabbitMQ in larger + deployments. It is hoped, the improved performance should also + help rabbitmq recover more quickly from cluster issues. + Note that the contents of the RabbitMQ definitions.json are now changed, + meaning RabbitMQ containers will be restarted on next deploy/upgrade.