From 9c1d085d2e52396d05397afb0f658224bda0087c Mon Sep 17 00:00:00 2001
From: Mark Goddard <mark@stackhpc.com>
Date: Fri, 12 Jan 2018 18:29:20 +0000
Subject: [PATCH] Workaround issue in CentOS cloud images with resolv.conf

The CentOS cloud images from 7.2 (1511) onwards have a bogus name server
entry in /etc/resolv.conf, 10.0.2.3. Cloud-init only appends name server
entries to this file, and will not remove this bogus entry. Typically this
leads to a delay of around 30 seconds when connecting via SSH, due to a
timeout in NSS. The workaround employed here is to remove this bogus entry
from the image using virt-customize, if it exists. See
https://bugs.centos.org/view.php?id=14369.

Fixes: #112
---
 ansible/group_vars/all/overcloud              | 12 +++++++
 ...overcloud-host-image-workaround-resolv.yml | 36 +++++++++++++++++++
 doc/source/release-notes.rst                  |  7 ++++
 etc/kayobe/overcloud.yml                      | 12 +++++++
 kayobe/cli/commands.py                        |  5 ++-
 kayobe/tests/unit/cli/test_commands.py        |  1 +
 6 files changed, 72 insertions(+), 1 deletion(-)
 create mode 100644 ansible/overcloud-host-image-workaround-resolv.yml

diff --git a/ansible/group_vars/all/overcloud b/ansible/group_vars/all/overcloud
index 58923c3d..5c3cff20 100644
--- a/ansible/group_vars/all/overcloud
+++ b/ansible/group_vars/all/overcloud
@@ -15,3 +15,15 @@ overcloud_groups: >
 # As a special case, the group 'ignore' can be used to specify hosts that
 # should not be added to the inventory.
 overcloud_group_hosts_map: {}
+
+###############################################################################
+# Overcloud host image configuration.
+
+# The CentOS cloud images from 7.2 (1511) onwards have a bogus name server
+# entry in /etc/resolv.conf, 10.0.2.3. Cloud-init only appends name server
+# entries to this file, and will not remove this bogus entry. Typically this
+# leads to a delay of around 30 seconds when connecting via SSH, due to a
+# timeout in NSS. The workaround employed here is to remove this bogus entry
+# from the image using virt-customize, if it exists. See
+# https://bugs.centos.org/view.php?id=14369.
+overcloud_host_image_workaround_resolv_enabled: True
diff --git a/ansible/overcloud-host-image-workaround-resolv.yml b/ansible/overcloud-host-image-workaround-resolv.yml
new file mode 100644
index 00000000..66567951
--- /dev/null
+++ b/ansible/overcloud-host-image-workaround-resolv.yml
@@ -0,0 +1,36 @@
+---
+# The CentOS cloud images from 7.2 (1511) onwards have a bogus name server
+# entry in /etc/resolv.conf, 10.0.2.3. Cloud-init only appends name server
+# entries to this file, and will not remove this bogus entry. Typically this
+# leads to a delay of around 30 seconds when connecting via SSH, due to a
+# timeout in NSS. The workaround employed here is to remove this bogus entry
+# from the image using virt-customize, if it exists. See
+# https://bugs.centos.org/view.php?id=14369.
+
+- name: Ensure the overcloud host image has bogus name server entries removed
+  hosts: seed
+  tags:
+    - overcloud-host-image-workaround
+  tasks:
+    - block:
+        - name: Ensure libguestfs-tools is installed
+          command: >
+            docker exec bifrost_deploy
+            bash -c '
+            ansible localhost
+            --connection local
+            --become
+            -m yum
+            -a "name=libguestfs-tools state=installed"'
+
+        - name: Ensure the overcloud host image has bogus name server entries removed
+          command: >
+            docker exec bifrost_deploy
+            bash -c '
+            export LIBGUESTFS_BACKEND=direct &&
+            ansible localhost
+            --connection local
+            --become
+            -m command
+            -a "virt-customize -a /httpboot/deployment_image.qcow2 --edit \"/etc/resolv.conf:s/^nameserver 10\.0\.2\.3\$//\""'
+      when: overcloud_host_image_workaround_resolv_enabled | bool
diff --git a/doc/source/release-notes.rst b/doc/source/release-notes.rst
index f3f868a2..74228aea 100644
--- a/doc/source/release-notes.rst
+++ b/doc/source/release-notes.rst
@@ -87,6 +87,13 @@ Upgrade Notes
 
   The previous behaviour of installing python dependencies directly to the host
   can be used by setting ``kolla_ansible_target_venv`` to ``None``.
+* Adds a workaround for an issue with CentOS cloud images 7.2 (1511) onwards,
+  which have a bogus name server entry in /etc/resolv.conf, 10.0.2.3.
+  Cloud-init only appends name server entries to this file, and will not remove
+  this bogus entry. Typically this leads to a delay of around 30 seconds when
+  connecting via SSH, due to a timeout in NSS. The workaround employed here is
+  to remove this bogus entry from the image using virt-customize, if it exists.
+  See https://bugs.centos.org/view.php?id=14369.
 
 Kayobe 3.0.0
 ============
diff --git a/etc/kayobe/overcloud.yml b/etc/kayobe/overcloud.yml
index 0d54d0cb..4b35737a 100644
--- a/etc/kayobe/overcloud.yml
+++ b/etc/kayobe/overcloud.yml
@@ -13,6 +13,18 @@
 # should not be added to the inventory.
 #overcloud_group_hosts_map:
 
+###############################################################################
+# Overcloud host image configuration.
+
+# The CentOS cloud images from 7.2 (1511) onwards have a bogus name server
+# entry in /etc/resolv.conf, 10.0.2.3. Cloud-init only appends name server
+# entries to this file, and will not remove this bogus entry. Typically this
+# leads to a delay of around 30 seconds when connecting via SSH, due to a
+# timeout in NSS. The workaround employed here is to remove this bogus entry
+# from the image using virt-customize, if it exists. See
+# https://bugs.centos.org/view.php?id=14369.
+#overcloud_host_image_workaround_resolv_enabled:
+
 ###############################################################################
 # Dummy variable to allow Ansible to accept this file.
 workaround_ansible_issue_8743: yes
diff --git a/kayobe/cli/commands.py b/kayobe/cli/commands.py
index 0f9b7fbe..0640f996 100644
--- a/kayobe/cli/commands.py
+++ b/kayobe/cli/commands.py
@@ -450,6 +450,7 @@ class SeedServiceDeploy(KollaAnsibleMixin, KayobeAnsibleMixin, VaultMixin,
     * Configures the bifrost service.
     * Deploys the bifrost container using kolla-ansible.
     * Builds disk images for the overcloud hosts using Diskimage Builder (DIB).
+    * Performs a workaround in the overcloud host image to fix resolv.conf.
     * Configures ironic inspector introspection rules in the bifrost inspector
       service.
     * When enabled, configures a Bare Metal Provisioning (BMP) environment for
@@ -465,7 +466,9 @@ class SeedServiceDeploy(KollaAnsibleMixin, KayobeAnsibleMixin, VaultMixin,
         self.run_kayobe_playbooks(parsed_args, playbooks)
         self.run_kolla_ansible_seed(parsed_args, "deploy-bifrost")
         playbooks = _build_playbook_list(
-            "seed-introspection-rules", "dell-switch-bmp")
+            "overcloud-host-image-workaround-resolv",
+            "seed-introspection-rules",
+            "dell-switch-bmp")
         self.run_kayobe_playbooks(parsed_args, playbooks)
 
 
diff --git a/kayobe/tests/unit/cli/test_commands.py b/kayobe/tests/unit/cli/test_commands.py
index bc528afa..ced4c27d 100644
--- a/kayobe/tests/unit/cli/test_commands.py
+++ b/kayobe/tests/unit/cli/test_commands.py
@@ -412,6 +412,7 @@ class TestCase(unittest.TestCase):
             mock.call(
                 mock.ANY,
                 [
+                    "ansible/overcloud-host-image-workaround-resolv.yml",
                     "ansible/seed-introspection-rules.yml",
                     "ansible/dell-switch-bmp.yml",
                 ],
-- 
GitLab