From 5250a00781a214911fec78718ef6dfb91154b0de Mon Sep 17 00:00:00 2001
From: SamYaple <sam@yaple.net>
Date: Fri, 18 Mar 2016 13:52:32 +0000
Subject: [PATCH] Allow external ceph journals and fix bootstrap

This allows us to specify external journals for osds which can greatly
improve performance when the external journals are on the solid-state
drives.

The new lookup and startup methods fix the previous races we had
preventing osds from being created properly.

This retains the same functionality as before and is completely
compatible with the previous method and labels, however this does set
new labels for all new bootstrap OSDs. This was due to a limitation
in the length of the name of a GPT partition.

Closes-Bug: #1558853
DocImpact
Partially-Implements: blueprint ceph-improvements
Change-Id: I61fd10cb35c67dabc53bd82270f26909ef51fc38
---
 ansible/roles/ceph/tasks/bootstrap_osds.yml | 16 +++-
 ansible/roles/ceph/tasks/start_osds.yml     |  4 +-
 docker/ceph/ceph-osd/extend_start.sh        | 37 +++++----
 docker/kolla-toolbox/find_disks.py          | 84 +++++++++++++++------
 4 files changed, 93 insertions(+), 48 deletions(-)

diff --git a/ansible/roles/ceph/tasks/bootstrap_osds.yml b/ansible/roles/ceph/tasks/bootstrap_osds.yml
index 16599170e6..471d173c90 100644
--- a/ansible/roles/ceph/tasks/bootstrap_osds.yml
+++ b/ansible/roles/ceph/tasks/bootstrap_osds.yml
@@ -2,7 +2,7 @@
 - name: Looking up disks to bootstrap for Ceph
   command: docker exec -t kolla_toolbox /usr/bin/ansible localhost
     -m find_disks
-    -a "partition_name='KOLLA_CEPH_OSD_BOOTSTRAP'"
+    -a "partition_name='KOLLA_CEPH_OSD_BOOTSTRAP' match_mode='prefix'"
   register: osd_lookup
   changed_when: "{{ osd_lookup.stdout.find('localhost | SUCCESS => ') != -1 and (osd_lookup.stdout.split('localhost | SUCCESS => ')[1]|from_json).changed }}"
   failed_when: osd_lookup.stdout.split()[2] != 'SUCCESS'
@@ -14,7 +14,7 @@
 - name: Looking up disks to bootstrap for Ceph
   command: docker exec -t kolla_toolbox /usr/bin/ansible localhost
     -m find_disks
-    -a "partition_name='KOLLA_CEPH_OSD_CACHE_BOOTSTRAP'"
+    -a "partition_name='KOLLA_CEPH_OSD_CACHE_BOOTSTRAP' match_mode='prefix'"
   register: osd_cache_lookup
   changed_when: "{{ osd_cache_lookup.stdout.find('localhost | SUCCESS => ') != -1 and (osd_cache_lookup.stdout.split('localhost | SUCCESS => ')[1]|from_json).changed }}"
   failed_when: osd_cache_lookup.stdout.split()[2] != 'SUCCESS'
@@ -32,6 +32,12 @@
       KOLLA_BOOTSTRAP:
       KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}"
       OSD_DEV: "{{ item.1.device }}"
+      OSD_PARTITION: "{{ item.1.partition }}"
+      OSD_PARTITION_NUM: "{{ item.1.partition_num }}"
+      JOURNAL_DEV: "{{ item.1.journal_device }}"
+      JOURNAL_PARTITION: "{{ item.1.journal }}"
+      JOURNAL_PARTITION_NUM: "{{ item.1.journal_num }}"
+      USE_EXTERNAL_JOURNAL: "{{ item.1.external_journal | bool }}"
       OSD_FILESYSTEM: "{{ ceph_osd_filesystem }}"
       OSD_INITIAL_WEIGHT: "{{ osd_initial_weight }}"
       HOSTNAME: "{{ hostvars[inventory_hostname]['ansible_' + storage_interface]['ipv4']['address'] }}"
@@ -56,6 +62,12 @@
       KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}"
       CEPH_CACHE:
       OSD_DEV: "{{ item.1.device }}"
+      OSD_PARTITION: "{{ item.1.partition }}"
+      OSD_PARTITION_NUM: "{{ item.1.partition_num }}"
+      JOURNAL_DEV: "{{ item.1.journal_device }}"
+      JOURNAL_PARTITION: "{{ item.1.journal }}"
+      JOURNAL_PARTITION_NUM: "{{ item.1.journal_num }}"
+      USE_EXTERNAL_JOURNAL: "{{ item.1.external_journal | bool }}"
       OSD_FILESYSTEM: "{{ ceph_osd_filesystem }}"
       OSD_INITIAL_WEIGHT: "{{ osd_initial_weight }}"
       HOSTNAME: "{{ hostvars[inventory_hostname]['ansible_' + storage_interface]['ipv4']['address'] }}"
diff --git a/ansible/roles/ceph/tasks/start_osds.yml b/ansible/roles/ceph/tasks/start_osds.yml
index 3f2bdd4cdc..63787dd120 100644
--- a/ansible/roles/ceph/tasks/start_osds.yml
+++ b/ansible/roles/ceph/tasks/start_osds.yml
@@ -2,7 +2,7 @@
 - name: Looking up OSDs for Ceph
   command: docker exec -t kolla_toolbox /usr/bin/ansible localhost
     -m find_disks
-    -a "partition_name='KOLLA_CEPH_DATA'"
+    -a "partition_name='KOLLA_CEPH_DATA' match_mode='prefix'"
   register: osd_lookup
   changed_when: "{{ osd_lookup.stdout.find('localhost | SUCCESS => ') != -1 and (osd_lookup.stdout.split('localhost | SUCCESS => ')[1]|from_json).changed }}"
   failed_when: osd_lookup.stdout.split()[2] != 'SUCCESS'
@@ -34,7 +34,7 @@
     environment:
       KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}"
       OSD_ID: "{{ item.0.stdout }}"
-      OSD_DEV: "{{ item.1['device'] }}"
+      JOURNAL_PARTITION: "{{ item.1.journal }}"
     image: "{{ ceph_osd_image_full }}"
     name: "ceph_osd_{{ item.0.stdout }}"
     pid_mode: "host"
diff --git a/docker/ceph/ceph-osd/extend_start.sh b/docker/ceph/ceph-osd/extend_start.sh
index 5db6f61c93..97119578a7 100644
--- a/docker/ceph/ceph-osd/extend_start.sh
+++ b/docker/ceph/ceph-osd/extend_start.sh
@@ -3,28 +3,22 @@
 # Bootstrap and exit if KOLLA_BOOTSTRAP variable is set. This catches all cases
 # of the KOLLA_BOOTSTRAP variable being set, including empty.
 if [[ "${!KOLLA_BOOTSTRAP[@]}" ]]; then
+    # NOTE(SamYaple): Static gpt partcodes
+    CEPH_JOURNAL_TYPE_CODE="45B0969E-9B03-4F30-B4C6-B4B80CEFF106"
+    CEPH_OSD_TYPE_CODE="4FBD7E29-9D25-41B8-AFD0-062C0CEFF05D"
+
     # Wait for ceph quorum before proceeding
     ceph quorum_status
 
-    # Formatting disk for ceph
-    sgdisk --zap-all -- "${OSD_DEV}"
-    sgdisk --new=2:1M:5G --change-name=2:KOLLA_CEPH_JOURNAL --typecode=2:45B0969E-9B03-4F30-B4C6-B4B80CEFF106 --mbrtogpt -- "${OSD_DEV}"
-    sgdisk --largest-new=1 --change-name=1:KOLLA_CEPH_DATA --typecode=1:4FBD7E29-9D25-41B8-AFD0-062C0CEFF05D -- "${OSD_DEV}"
-    # This command may throw errors that we can safely ignore
-    partprobe || true
+    if [[ "${USE_EXTERNAL_JOURNAL}" == "False" ]]; then
+        # Formatting disk for ceph
+        sgdisk --zap-all -- "${OSD_DEV}"
+        sgdisk --new=2:1M:5G -- "${JOURNAL_DEV}"
+        sgdisk --largest-new=1 -- "${OSD_DEV}"
+        # NOTE(SamYaple): This command may throw errors that we can safely ignore
+        partprobe || true
 
-    count=0
-    while [[ "${OSD_PARTITION}x" == "x" ]]; do
-        if [[ "${count}" -gt 5 ]]; then
-            echo "Could not find OSD Partition"
-            exit 1
-        fi
-        sleep 1
-        # We look up the appropriate device path with partition.
-        OSD_PARTITION=$(ls "${OSD_DEV}"* | egrep "${OSD_DEV}p?1")
-        count=$(( count + 1 ))
-    done
-    JOURNAL_PARTITION="${OSD_PARTITION%?}2"
+    fi
 
     OSD_ID=$(ceph osd create)
     OSD_DIR="/var/lib/ceph/osd/ceph-${OSD_ID}"
@@ -56,10 +50,13 @@ if [[ "${!KOLLA_BOOTSTRAP[@]}" ]]; then
 
     # Adding osd to crush map
     ceph osd crush add "${OSD_ID}" "${OSD_INITIAL_WEIGHT}" host="${HOSTNAME}${CEPH_ROOT_NAME:+-${CEPH_ROOT_NAME}}"
+
+    # Setting partition name based on ${OSD_ID}
+    sgdisk "--change-name=${OSD_PARTITION_NUM}:KOLLA_CEPH_DATA_${OSD_ID}" "--typecode=${OSD_PARTITION_NUM}:${CEPH_OSD_TYPE_CODE}" -- "${OSD_DEV}"
+    sgdisk "--change-name=${JOURNAL_PARTITION_NUM}:KOLLA_CEPH_DATA_${OSD_ID}_J" "--typecode=${JOURNAL_PARTITION_NUM}:${CEPH_JOURNAL_TYPE_CODE}" -- "${JOURNAL_DEV}"
+
     exit 0
 fi
 
-# We look up the appropriate journal since we cannot rely on symlinks
-JOURNAL_PARTITION=$(ls "${OSD_DEV}"* | egrep "${OSD_DEV}p?2")
 OSD_DIR="/var/lib/ceph/osd/ceph-${OSD_ID}"
 ARGS="-i ${OSD_ID} --osd-journal ${JOURNAL_PARTITION} -k ${OSD_DIR}/keyring"
diff --git a/docker/kolla-toolbox/find_disks.py b/docker/kolla-toolbox/find_disks.py
index 6ebf2b6a27..e72dcc6fba 100644
--- a/docker/kolla-toolbox/find_disks.py
+++ b/docker/kolla-toolbox/find_disks.py
@@ -66,6 +66,61 @@ EXAMPLES = '''
 
 import json
 import pyudev
+import re
+
+
+def is_dev_matched_by_name(dev, name, mode):
+    if dev.get('DEVTYPE', '') == 'partition':
+        dev_name = dev.get('ID_PART_ENTRY_NAME', '')
+    else:
+        dev_name = dev.get('ID_FS_LABEL', '')
+
+    if mode == 'strict':
+        return dev_name == name
+    elif mode == 'prefix':
+        return dev_name.startswith(name)
+    else:
+        return False
+
+
+def find_disk(ct, name, match_mode):
+    for dev in ct.list_devices(subsystem='block'):
+        if is_dev_matched_by_name(dev, name, match_mode):
+            yield dev
+
+
+def extract_disk_info(ct, dev, name):
+    if not dev:
+        return
+    kwargs = dict()
+    kwargs['fs_uuid'] = dev.get('ID_FS_UUID', '')
+    kwargs['fs_label'] = dev.get('ID_FS_LABEL', '')
+    if dev.get('DEVTYPE', '') == 'partition':
+        kwargs['device'] = dev.find_parent('block').device_node
+        kwargs['partition'] = dev.device_node
+        kwargs['partition_num'] = \
+            re.sub(r'.*[^\d$]', '', dev.device_node)
+        if is_dev_matched_by_name(dev, name, 'strict'):
+            kwargs['external_journal'] = False
+            kwargs['journal'] = dev.device_node[:-1] + '2'
+            kwargs['journal_device'] = kwargs['device']
+            kwargs['journal_num'] = 2
+        else:
+            kwargs['external_journal'] = True
+            journal_name = dev.get('ID_PART_ENTRY_NAME', '') + '_J'
+            for journal in find_disk(ct, journal_name, 'strict'):
+                kwargs['journal'] = journal.device_node
+                kwargs['journal_device'] = \
+                    journal.find_parent('block').device_node
+                kwargs['journal_num'] = \
+                    re.sub(r'.*[^\d$]', '', journal.device_node)
+                break
+            if 'journal' not in kwargs:
+                # NOTE(SamYaple): Journal not found, not returning info
+                return
+    else:
+        kwargs['device'] = dev.device_node
+    yield kwargs
 
 
 def main():
@@ -78,33 +133,14 @@ def main():
     match_mode = module.params.get('match_mode')
     name = module.params.get('name')
 
-    def is_dev_matched_by_name(dev, name):
-        if dev.get('DEVTYPE', '') == 'partition':
-            dev_name = dev.get('ID_PART_ENTRY_NAME', '')
-        else:
-            dev_name = dev.get('ID_FS_LABEL', '')
-
-        if match_mode == 'strict':
-            return dev_name == name
-        elif match_mode == 'prefix':
-            return dev_name.startswith(name)
-        else:
-            return False
-
     try:
         ret = list()
         ct = pyudev.Context()
-        for dev in ct.list_devices(subsystem='block'):
-            if is_dev_matched_by_name(dev, name):
-                fs_uuid = dev.get('ID_FS_UUID', '')
-                fs_label = dev.get('ID_FS_LABEL', '')
-                if dev.get('DEVTYPE', '') == 'partition':
-                    device_node = dev.find_parent('block').device_node
-                else:
-                    device_node = dev.device_node
-                ret.append({'device': device_node,
-                            'fs_uuid': fs_uuid,
-                            'fs_label': fs_label})
+        for dev in find_disk(ct, name, match_mode):
+            for info in extract_disk_info(ct, dev, name):
+                if info:
+                    ret.append(info)
+
         module.exit_json(disks=json.dumps(ret))
     except Exception as e:
         module.exit_json(failed=True, msg=repr(e))
-- 
GitLab