diff --git a/ansible/roles/octavia/templates/octavia-interface.service.j2 b/ansible/roles/octavia/templates/octavia-interface.service.j2 index 7f04d9fb428c95f71d12098ba85974a6d2fb3f9f..532cdc72e5d4939d91d8361076fe7fad3b951933 100644 --- a/ansible/roles/octavia/templates/octavia-interface.service.j2 +++ b/ansible/roles/octavia/templates/octavia-interface.service.j2 @@ -7,6 +7,10 @@ After=docker.service Type=oneshot User=root Group=root +Restart=on-failure +{% if octavia_interface_wait_timeout is defined %} +TimeoutStartSec={{ octavia_interface_wait_timeout }} +{% endif %} RemainAfterExit=true ExecStartPre=/sbin/ip link set dev {{ octavia_network_interface }} address {{ port_info.port.mac_address }} ExecStart=/sbin/dhclient -v {{ octavia_network_interface }} -cf /etc/dhcp/octavia-dhclient.conf diff --git a/doc/source/reference/networking/octavia.rst b/doc/source/reference/networking/octavia.rst index 72d2a044324e9c72937fb6c367e0bdf0d0e7e720..53f266065fb34dafeadd4e5ed916ae7594a3d395 100644 --- a/doc/source/reference/networking/octavia.rst +++ b/doc/source/reference/networking/octavia.rst @@ -437,6 +437,24 @@ Add ``octavia_network_type`` to ``globals.yml`` and set the value to ``tenant`` Next,follow the deployment instructions as normal. +Failure handling +---------------- + +On large deployments, where neutron-openvswitch-agent sync could takes +more then 5 minutes, you can get an error on octavia-interface.service +systemd unit, because it can't wait either o-hm0 interface is already +attached to br-int, or octavia management VxLAN is already configured +on that host. In this case you have to add ``octavia_interface_wait_timeout`` +to ``globals.yml`` and set the value to new timeout in seconds + +.. code-block:: yaml + + octavia_interface_wait_timeout: 1800 + +On deployments with up to 2500 network ports per network node sync process +could take up to 30mins. But you have to consider this value according +to your deployment size. + OVN provider ============ diff --git a/releasenotes/notes/fix-octavia-interface-timeout-5e87ea2501d5ab3c.yaml b/releasenotes/notes/fix-octavia-interface-timeout-5e87ea2501d5ab3c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1046e1ede5994e6903209ecf13c53d34f3deb650 --- /dev/null +++ b/releasenotes/notes/fix-octavia-interface-timeout-5e87ea2501d5ab3c.yaml @@ -0,0 +1,10 @@ +--- +fixes: + - | + Fixes 2067036. + Added ``octavia_interface_wait_timeout`` to control + octavia-interface.service timeout to be able wait + openvswitch agent sync has been finished and + octavia-lb-net is reachable from the host. + Also set restart policy for this unit to on-failure + `LP#2067036 <https://launchpad.net/bugs/2067036>`__