From df00ba22e7e9b7bbbd299e669d5219862f57cb08 Mon Sep 17 00:00:00 2001 From: Mark Goddard <mark@stackhpc.com> Date: Fri, 26 Mar 2021 17:24:05 +0000 Subject: [PATCH] CI: increase Ansible Galaxy retries & add delay We still see flakiness when downloading content from Ansible Galaxy, often HTTP 520. This change increases the retries from 3 to 10, and adds a 5 second delay between attempts. Change-Id: I0c46e5fcc6979027dc6f1bc5cc49e923a205f654 Related: https://github.com/ansible/galaxy/issues/2429 --- dev/functions | 14 ++++++++++---- tools/ansible-galaxy-retried.sh | 6 ++++-- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/dev/functions b/dev/functions index 019c54fd..00377528 100644 --- a/dev/functions +++ b/dev/functions @@ -225,13 +225,16 @@ function run_kayobe { } function control_host_bootstrap { + attempts=10 + interval=5 echo "Bootstrapping the Ansible control host" - for i in $(seq 1 3); do + for i in $(seq 1 $attempts); do if run_kayobe control host bootstrap; then chb_success=1 break fi - echo "Control host bootstrap failed - likely Ansible Galaxy flakiness. Retrying" + echo "Control host bootstrap failed - likely Ansible Galaxy flakiness. Sleeping $interval seconds before retrying" + sleep $interval done if [[ -z ${chb_success+x} ]]; then die $LINENO "Failed to bootstrap control host" @@ -241,13 +244,16 @@ function control_host_bootstrap { } function control_host_upgrade { + attempts=10 + interval=5 echo "Upgrading the Ansible control host" - for i in $(seq 1 3); do + for i in $(seq 1 $attempts); do if run_kayobe control host upgrade; then chu_success=1 break fi - echo "Control host upgrade failed - likely Ansible Galaxy flakiness. Retrying" + echo "Control host upgrade failed - likely Ansible Galaxy flakiness. Sleeping $interval seconds before retrying" + sleep $interval done if [[ -z ${chu_success+x} ]]; then die $LINENO "Failed to upgrade control host" diff --git a/tools/ansible-galaxy-retried.sh b/tools/ansible-galaxy-retried.sh index 4082f98e..846c132f 100755 --- a/tools/ansible-galaxy-retried.sh +++ b/tools/ansible-galaxy-retried.sh @@ -2,13 +2,15 @@ set -e -GALAXY_RETRIES=${GALAXY_RETRIES:-3} +GALAXY_RETRIES=${GALAXY_RETRIES:-10} +GALAXY_INTERVAL=${GALAXY_INTERVAL:-5} for i in $(seq 1 $GALAXY_RETRIES); do if ansible-galaxy "${@}"; then exit 0 fi - echo "Ansible Galaxy command failed. Retrying" + echo "Ansible Galaxy command failed. Sleeping $GALAXY_INTERVAL seconds before retry" + sleep $GALAXY_INTERVAL done echo "Failed to execute: ansible-galaxy ${@}" -- GitLab