From 365bb5177ddc044e567f91c579aa2a85d6592bb9 Mon Sep 17 00:00:00 2001
From: Mark Goddard <mark@stackhpc.com>
Date: Fri, 14 Dec 2018 12:01:05 +0000
Subject: [PATCH] Create cells before starting nova services

Nova services may reasonably expect cell databases to exist when they
start. The current cell setup tasks in kolla run after the nova
containers have started, meaning that cells may or may not exist in the
database when they start, depending on timing. In particular, we are
seeing issues in kolla CI currently with jobs timing out waiting for
nova compute services to start. The following error is seen in the nova
logs of these jobs, which may or may not be relevant:

No cells are configured, unable to continue

This change creates the cell0 and cell1 databases prior to starting nova
services.

In order to do this, we must create new containers in which to run the
nova-manage commands, because the nova-api container may not yet exist.
This required adding support to the kolla_docker module for specifying a
command for the container to run that overrides the image's command.

We also add the standard output and error to the module's result when a
non-detached container is run. A secondary benefit of this is that the
output of bootstrap containers is now displayed in the Ansible output if
the bootstrapping command fails, which will help with debugging.

Change-Id: I2c1e991064f9f588f398ccbabda94f69dc285e61
Closes-Bug: #1808575
---
 ansible/library/kolla_docker.py               | 46 +++++++++++++----
 ansible/roles/nova/tasks/create_cells.yml     | 51 +++++++++++++++++++
 ansible/roles/nova/tasks/deploy.yml           |  4 +-
 ...e_cell_setup.yml => discover_computes.yml} | 27 ----------
 tests/test_kolla_docker.py                    | 32 +++++++++++-
 5 files changed, 122 insertions(+), 38 deletions(-)
 create mode 100644 ansible/roles/nova/tasks/create_cells.yml
 rename ansible/roles/nova/tasks/{simple_cell_setup.yml => discover_computes.yml} (62%)

diff --git a/ansible/library/kolla_docker.py b/ansible/library/kolla_docker.py
index 16092ed11..7705c8b9b 100644
--- a/ansible/library/kolla_docker.py
+++ b/ansible/library/kolla_docker.py
@@ -72,6 +72,11 @@ options:
       - The username used to authenticate
     required: False
     type: str
+  command:
+    description:
+      - The command to execute in the container
+    required: False
+    type: str
   detach:
     description:
       - Detach from the container after it is created
@@ -214,6 +219,7 @@ EXAMPLES = '''
 
 import json
 import os
+import shlex
 import traceback
 
 import docker
@@ -229,6 +235,8 @@ class DockerWorker(object):
         self.module = module
         self.params = self.module.params
         self.changed = False
+        # Use this to store arguments to pass to exit_json().
+        self.result = {}
 
         # TLS not fully implemented
         # tls_config = self.generate_tls()
@@ -315,7 +323,8 @@ class DockerWorker(object):
             self.compare_volumes_from(container_info) or
             self.compare_environment(container_info) or
             self.compare_container_state(container_info) or
-            self.compare_dimensions(container_info)
+            self.compare_dimensions(container_info) or
+            self.compare_command(container_info)
         )
 
     def compare_ipc_mode(self, container_info):
@@ -482,6 +491,16 @@ class DockerWorker(object):
                 # '' or 0 - both falsey.
                 return True
 
+    def compare_command(self, container_info):
+        new_command = self.params.get('command')
+        if new_command is not None:
+            new_command_split = shlex.split(new_command)
+            new_path = new_command_split[0]
+            new_args = new_command_split[1:]
+            if (new_path != container_info['Path'] or
+                    new_args != container_info['Args']):
+                return True
+
     def parse_image(self):
         full_image = self.params.get('image')
 
@@ -638,6 +657,7 @@ class DockerWorker(object):
     def build_container_options(self):
         volumes, binds = self.generate_volumes()
         return {
+            'command': self.params.get('command'),
             'detach': self.params.get('detach'),
             'environment': self._format_env_vars(),
             'host_config': self.build_host_config(binds),
@@ -697,15 +717,22 @@ class DockerWorker(object):
             # dict all the time.
             if isinstance(rc, dict):
                 rc = rc['StatusCode']
+            # Include container's return code, standard output and error in the
+            # result.
+            self.result['rc'] = rc
+            self.result['stdout'] = self.dc.logs(self.params.get('name'),
+                                                 stdout=True, stderr=False)
+            self.result['stderr'] = self.dc.logs(self.params.get('name'),
+                                                 stdout=False, stderr=True)
+            if self.params.get('remove_on_exit'):
+                self.stop_container()
+                self.remove_container()
             if rc != 0:
                 self.module.fail_json(
-                    failed=True,
                     changed=True,
-                    msg="Container exited with non-zero return code %s" % rc
+                    msg="Container exited with non-zero return code %s" % rc,
+                    **self.result
                 )
-            if self.params.get('remove_on_exit'):
-                self.stop_container()
-                self.remove_container()
 
     def get_container_env(self):
         name = self.params.get('name')
@@ -817,6 +844,7 @@ def generate_module():
         auth_password=dict(required=False, type='str', no_log=True),
         auth_registry=dict(required=False, type='str'),
         auth_username=dict(required=False, type='str'),
+        command=dict(required=False, type='str'),
         detach=dict(required=False, type='bool', default=True),
         labels=dict(required=False, type='dict', default=dict()),
         name=dict(required=False, type='str'),
@@ -905,10 +933,10 @@ def main():
         # types. If we ever add method that will have to return some
         # meaningful data, we need to refactor all methods to return dicts.
         result = bool(getattr(dw, module.params.get('action'))())
-        module.exit_json(changed=dw.changed, result=result)
+        module.exit_json(changed=dw.changed, result=result, **dw.result)
     except Exception:
-        module.exit_json(failed=True, changed=True,
-                         msg=repr(traceback.format_exc()))
+        module.fail_json(changed=True, msg=repr(traceback.format_exc()),
+                         **dw.result)
 
 # import module snippets
 from ansible.module_utils.basic import *  # noqa
diff --git a/ansible/roles/nova/tasks/create_cells.yml b/ansible/roles/nova/tasks/create_cells.yml
new file mode 100644
index 000000000..a5affb5e9
--- /dev/null
+++ b/ansible/roles/nova/tasks/create_cells.yml
@@ -0,0 +1,51 @@
+---
+- name: Create cell0 mappings
+  vars:
+    nova_api: "{{ nova_services['nova-api'] }}"
+  become: true
+  kolla_docker:
+    action: "start_container"
+    command: bash -c 'sudo -E kolla_set_configs && nova-manage cell_v2 map_cell0'
+    common_options: "{{ docker_common_options }}"
+    detach: False
+    image: "{{ nova_api.image }}"
+    labels:
+      BOOTSTRAP:
+    name: "create_cell0_nova"
+    restart_policy: "never"
+    volumes: "{{ nova_api.volumes|reject('equalto', '')|list }}"
+  register: map_cell0
+  changed_when:
+    - map_cell0 is success
+    - '"Cell0 is already setup" not in map_cell0.stdout'
+  failed_when:
+    - map_cell0.rc != 0
+  run_once: True
+  delegate_to: "{{ groups[nova_api.group][0] }}"
+
+- include_tasks: bootstrap_service.yml
+  when: map_cell0.changed
+
+- name: Create base cell for legacy instances
+  vars:
+    nova_api: "{{ nova_services['nova-api'] }}"
+  become: true
+  kolla_docker:
+    action: "start_container"
+    command: bash -c 'sudo -E kolla_set_configs && nova-manage cell_v2 create_cell'
+    common_options: "{{ docker_common_options }}"
+    detach: False
+    image: "{{ nova_api.image }}"
+    labels:
+      BOOTSTRAP:
+    name: "create_cell_nova"
+    restart_policy: "never"
+    volumes: "{{ nova_api.volumes|reject('equalto', '')|list }}"
+  register: base_cell
+  changed_when:
+    - base_cell is success
+  failed_when:
+    - base_cell.rc != 0
+    - '"already exists" not in base_cell.stdout'
+  run_once: True
+  delegate_to: "{{ groups[nova_api.group][0] }}"
diff --git a/ansible/roles/nova/tasks/deploy.yml b/ansible/roles/nova/tasks/deploy.yml
index 0ea76bd06..5a3989713 100644
--- a/ansible/roles/nova/tasks/deploy.yml
+++ b/ansible/roles/nova/tasks/deploy.yml
@@ -21,7 +21,9 @@
   when: inventory_hostname in groups['nova-api'] or
         inventory_hostname in groups['compute']
 
+- include_tasks: create_cells.yml
+
 - name: Flush handlers
   meta: flush_handlers
 
-- include_tasks: simple_cell_setup.yml
+- include_tasks: discover_computes.yml
diff --git a/ansible/roles/nova/tasks/simple_cell_setup.yml b/ansible/roles/nova/tasks/discover_computes.yml
similarity index 62%
rename from ansible/roles/nova/tasks/simple_cell_setup.yml
rename to ansible/roles/nova/tasks/discover_computes.yml
index 4c82cc768..3035020fb 100644
--- a/ansible/roles/nova/tasks/simple_cell_setup.yml
+++ b/ansible/roles/nova/tasks/discover_computes.yml
@@ -1,31 +1,4 @@
 ---
-- name: Create cell0 mappings
-  command: >
-    docker exec nova_api nova-manage cell_v2 map_cell0
-  register: map_cell0
-  changed_when:
-    - map_cell0 is success
-    - '"Cell0 is already setup" not in map_cell0.stdout'
-  failed_when:
-    - map_cell0.rc != 0
-  run_once: True
-  delegate_to: "{{ groups['nova-api'][0] }}"
-
-- include_tasks: bootstrap_service.yml
-  when: map_cell0.changed
-
-- name: Create base cell for legacy instances
-  command: >
-    docker exec nova_api nova-manage cell_v2 create_cell
-  register: base_cell
-  changed_when:
-    - base_cell is success
-  failed_when:
-    - base_cell.rc != 0
-    - '"already exists" not in base_cell.stdout'
-  run_once: True
-  delegate_to: "{{ groups['nova-api'][0] }}"
-
 - name: Waiting for nova-compute service up
   command: >
     docker exec kolla_toolbox openstack
diff --git a/tests/test_kolla_docker.py b/tests/test_kolla_docker.py
index 79a4ca31e..ff0bee9ee 100644
--- a/tests/test_kolla_docker.py
+++ b/tests/test_kolla_docker.py
@@ -52,6 +52,7 @@ class ModuleArgsTest(base.BaseTestCase):
             auth_password=dict(required=False, type='str', no_log=True),
             auth_registry=dict(required=False, type='str'),
             auth_username=dict(required=False, type='str'),
+            command=dict(required=False, type='str'),
             detach=dict(required=False, type='bool', default=True),
             labels=dict(required=False, type='dict', default=dict()),
             name=dict(required=False, type='str'),
@@ -114,6 +115,7 @@ class ModuleArgsTest(base.BaseTestCase):
 FAKE_DATA = {
 
     'params': {
+        'command': None,
         'detach': True,
         'environment': {},
         'host_config': {
@@ -133,6 +135,7 @@ FAKE_DATA = {
                    'vendor': 'ubuntuOS'},
         'image': 'myregistrydomain.com:5000/ubuntu:16.04',
         'name': 'test_container',
+        'remove_on_exit': True,
         'volumes': None,
         'tty': False,
     },
@@ -210,8 +213,10 @@ class TestContainer(base.BaseTestCase):
         self.assertTrue(self.dw.changed)
         self.fake_data['params'].pop('dimensions')
         self.fake_data['params']['host_config']['blkio_weight'] = '10'
+        expected_args = {'command', 'detach', 'environment', 'host_config',
+                         'image', 'labels', 'name', 'tty', 'volumes'}
         self.dw.dc.create_container.assert_called_once_with(
-            **self.fake_data['params'])
+            **{k: self.fake_data['params'][k] for k in expected_args})
         self.dw.dc.create_host_config.assert_called_with(
             cap_add=None, network_mode='host', ipc_mode=None,
             pid_mode=None, volumes_from=None, blkio_weight=10,
@@ -295,6 +300,31 @@ class TestContainer(base.BaseTestCase):
         self.dw.dc.start.assert_called_once_with(
             container=self.fake_data['params'].get('name'))
 
+    def test_start_container_no_detach(self):
+        self.fake_data['params'].update({'name': 'my_container',
+                                         'detach': False})
+        self.dw = get_DockerWorker(self.fake_data['params'])
+        self.dw.dc.images = mock.MagicMock(
+            return_value=self.fake_data['images'])
+        self.dw.dc.containers = mock.MagicMock(side_effect=[
+            [], self.fake_data['containers'], self.fake_data['containers'],
+            self.fake_data['containers']])
+        self.dw.dc.wait = mock.MagicMock(return_value={'StatusCode': 0})
+        self.dw.dc.logs = mock.MagicMock(
+            side_effect=['fake stdout', 'fake stderr'])
+        self.dw.start_container()
+        self.assertTrue(self.dw.changed)
+        name = self.fake_data['params'].get('name')
+        self.dw.dc.wait.assert_called_once_with(name)
+        self.dw.dc.logs.assert_has_calls([
+            mock.call(name, stdout=True, stderr=False),
+            mock.call(name, stdout=False, stderr=True)])
+        self.dw.dc.stop.assert_called_once_with(name, timeout=10)
+        self.dw.dc.remove_container.assert_called_once_with(
+            container=name, force=True)
+        expected = {'rc': 0, 'stdout': 'fake stdout', 'stderr': 'fake stderr'}
+        self.assertEqual(expected, self.dw.result)
+
     def test_stop_container(self):
         self.dw = get_DockerWorker({'name': 'my_container',
                                     'action': 'stop_container'})
-- 
GitLab