From 6cf22b0cb1f2dc4d8910409284fa5757a7dd67a1 Mon Sep 17 00:00:00 2001 From: John Garbutt <john.garbutt@stackhpc.com> Date: Fri, 17 Dec 2021 17:34:44 +0000 Subject: [PATCH] Improve RabbitMQ performance by reducing ha replicas Currently we do not follow the RabbitMQ advice on replicas here: https://www.rabbitmq.com/ha.html#replication-factor Here we reduce the number of replicas to n // 2 + 1 as advised above. The hope it this helps speed up recovery from rabbit issues. Related-Bug: #1954925 Change-Id: Ib6bcb26c499c9884faa4a0cd51abaec00cacb096 --- ansible/roles/rabbitmq/defaults/main.yml | 8 +++++++- ...a-improve-performance-8f29c7657d2999dd.yaml | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 releasenotes/notes/rabbitmq-ha-improve-performance-8f29c7657d2999dd.yaml diff --git a/ansible/roles/rabbitmq/defaults/main.yml b/ansible/roles/rabbitmq/defaults/main.yml index ee983d672..4c059eadb 100644 --- a/ansible/roles/rabbitmq/defaults/main.yml +++ b/ansible/roles/rabbitmq/defaults/main.yml @@ -89,10 +89,16 @@ rabbitmq_cluster_partition_handling: "pause_minority" # More details see: # https://www.rabbitmq.com/ha.html#promoting-unsynchronised-mirrors rabbitmq_ha_promote_on_shutdown: +# The number of rabbitmq replicas should follow this advice: +# https://www.rabbitmq.com/ha.html#replication-factor +# This means, if you have three rabbit nodes, we request two +# replicas of all queues and exchanges. +# Note: this assumes an odd number of rabbitmq nodes. # If no replica count is specified, replicates across all nodes with definition # "ha-mode":"all". Otherwise, uses # "ha-mode":"exactly","ha-params":{{ rabbitmq_ha_replica_count | int }} -rabbitmq_ha_replica_count: +rabbitmq_server_count: "{{ groups[role_rabbitmq_groups] | length }}" +rabbitmq_ha_replica_count: "{{ (rabbitmq_server_count | int // 2 + 1) }}" rabbitmq_extra_config: {} #################### diff --git a/releasenotes/notes/rabbitmq-ha-improve-performance-8f29c7657d2999dd.yaml b/releasenotes/notes/rabbitmq-ha-improve-performance-8f29c7657d2999dd.yaml new file mode 100644 index 000000000..a21f7061b --- /dev/null +++ b/releasenotes/notes/rabbitmq-ha-improve-performance-8f29c7657d2999dd.yaml @@ -0,0 +1,18 @@ +--- +upgrade: + - | + RabbitMQ replica count has changed from n to (n//2+1) where n is the number + of RabbitMQ nodes. That is, for a 3 node clusters, we request exactly 2 + replicas, for a 1 node cluster, we request 1 replica, and for a 5 node + cluster, we request 3 replicas. This only has an effect if + `om_enable_rabbitmq_high_availability` is set to `True`, otherwise queues + are not replicated. The number of mirrored queues is not changed + automatically, and instead requires the queues to be recreated (for + example, by restarting RabbitMQ). + This follows the good practice advice here: + https://www.rabbitmq.com/ha.html#replication-factor + A major motivation is to reduce the load on RabbitMQ in larger + deployments. It is hoped, the improved performance should also + help rabbitmq recover more quickly from cluster issues. + Note that the contents of the RabbitMQ definitions.json are now changed, + meaning RabbitMQ containers will be restarted on next deploy/upgrade. -- GitLab