From 6bdf202658e08bb9f43ca50334587b05dc4bac03 Mon Sep 17 00:00:00 2001
From: Mark Goddard <mark@stackhpc.com>
Date: Thu, 24 Oct 2019 15:01:42 +0100
Subject: [PATCH] Fix nova scheduler down after first docker restart
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Due to a Docker bug [1] we cannot use Docker to send
SIGHUP to the container because it will mark it as
stopped.
This patch sends the signal directly to the process,
bypassing Docker.

'changed_when: false' is also removed from the
relevant task as it definitely changes the state.
In the future we could do the refresh only if
there really is a need for another one.

[1] https://github.com/moby/moby/issues/11065

Change-Id: Ief73bbd24568d6941384ea3330ab45f11aa42d37
Co-authored-by: Radosław Piliszek <radoslaw.piliszek@gmail.com>
Closes-Bug: #1845244
---
 ansible/roles/nova/tasks/refresh_scheduler_cell_cache.yml | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/ansible/roles/nova/tasks/refresh_scheduler_cell_cache.yml b/ansible/roles/nova/tasks/refresh_scheduler_cell_cache.yml
index 286b111c4..a8ece2360 100644
--- a/ansible/roles/nova/tasks/refresh_scheduler_cell_cache.yml
+++ b/ansible/roles/nova/tasks/refresh_scheduler_cell_cache.yml
@@ -3,7 +3,10 @@
 # in nova scheduler.
 - name: Refresh cell cache in nova scheduler
   become: true
-  command: docker kill --signal HUP nova_scheduler
-  changed_when: False
+  # NOTE(yoctozepto): Normally we would send the signal via Docker but, due to a
+  # Docker bug (https://github.com/moby/moby/issues/11065), this might cause the
+  # container to be stopped if we restart Docker or reboot the server as we
+  # use the 'unless-stopped' restart policy by default.
+  shell: "kill -HUP `docker inspect -f '{% raw %}{{.State.Pid}}{% endraw %}' nova_scheduler`"
   when:
     - inventory_hostname in groups['nova-scheduler']
-- 
GitLab