Skip to content
Snippets Groups Projects
ovs-dpdkctl.sh 15.5 KiB
Newer Older
  • Learn to ignore specific revisions
  • Sean Mooney's avatar
    Sean Mooney committed
    #!/bin/bash
    
    _XTRACE_OVS_DPDK_CTL=$(set +o | grep xtrace)
    if [[ "${OVS_DPDK_CTL_DEBUG}" == "True" ]]; then
        set -o xtrace
    fi
    
    FULL_PATH=$(realpath "${BASH_SOURCE[0]}")
    CONFIG_FILE=${CONFIG_FILE:-"/etc/default/ovs-dpdk.conf"}
    SERVICE_FILE="/etc/systemd/system/ovs-dpdkctl.service"
    
    BRIDGE_SERVICE_FILE="/etc/systemd/system/ovs-dpdk-bridge.service"
    
    Sean Mooney's avatar
    Sean Mooney committed
    
    function get_value {
        crudini --get $CONFIG_FILE $@
    }
    
    function set_value {
        crudini --set $CONFIG_FILE $1 $2 "$3"
    }
    
    function del_value {
        crudini --del $CONFIG_FILE $@
    }
    
    function is_set {
        output=$(crudini --get $CONFIG_FILE $* &> /dev/null ) && [ -n "$(crudini --get $CONFIG_FILE $*)" ]; echo $?
    }
    
    function show_config {
        cat $CONFIG_FILE
    }
    
    function get_config {
        echo $CONFIG_FILE
    }
    
    function del_config {
        rm -f $CONFIG_FILE
    }
    
    
    function is_redhat_family {
        [[ -e /etc/redhat-release ]] ; echo $?
    }
    
    
    Sean Mooney's avatar
    Sean Mooney committed
    function generate_pciwhitelist {
    
        _Whitelist=''
    
    Sean Mooney's avatar
    Sean Mooney committed
        for nic in $(list_dpdk_nics); do
            address="$(get_value $nic address)"
            if [ "$_Whitelist" == '' ]; then
                _Whitelist="-w $address"
            else
                _Whitelist="$_Whitelist -w $address"
            fi
        done
        echo $_Whitelist
    }
    
    function gen_port_mappings {
        OVS_BRIDGE_MAPPINGS=$(get_value ovs bridge_mappings)
        OVS_BRIDGES=${OVS_BRIDGE_MAPPINGS//,/ }
        OVS_DPDK_PORT_MAPPINGS=""
        ARRAY=( $OVS_BRIDGES )
        for net in "${ARRAY[@]}"; do
            bridge="${net##*:}"
            nic=${bridge/br-/}
            if [[ -z "$OVS_DPDK_PORT_MAPPINGS" ]]; then
                OVS_DPDK_PORT_MAPPINGS="$nic:$bridge"
            else
                OVS_DPDK_PORT_MAPPINGS="$OVS_DPDK_PORT_MAPPINGS,$nic:$bridge"
            fi
        done
        echo "$OVS_DPDK_PORT_MAPPINGS"
    }
    
    function gen_config {
        del_config
        touch $CONFIG_FILE
        set_value ovs bridge_mappings ${bridge_mappings:-""}
        set_value ovs port_mappings ${port_mappings:-$(gen_port_mappings)}
        set_value ovs cidr_mappings ${cidr_mappings:-""}
        set_value ovs ovs_coremask ${ovs_coremask:-"0x1"}
        set_value ovs pmd_coremask ${pmd_coremask:-"0x2"}
        set_value ovs ovs_mem_channels ${ovs_mem_channels:-4}
        set_value ovs ovs_socket_mem ${ovs_socket_mem:-"512"}
        set_value ovs dpdk_interface_driver ${dpdk_interface_driver:-"uio_pci_generic"}
        set_value ovs hugepage_mountpoint ${hugepage_mountpoint:-"/dev/hugepages"}
    
        set_value ovs physical_port_policy ${ovs_physical_port_policy:-"named"}
    
    Sean Mooney's avatar
    Sean Mooney committed
    
        ls -al /sys/class/net/* | awk '$0 ~ /pci/ {n=split($NF,a,"/"); print "\n[" a[n] "]\naddress = " a[n-2]  "\ndriver ="}' >> $CONFIG_FILE
    
        for nic in $(get_value | grep -v ovs); do
            set_value $nic driver $(get_driver_by_address $(get_value $nic address))
        done
        for nic in $(list_dpdk_nics); do
            set_value $nic driver ${dpdk_interface_driver:-"uio_pci_generic"}
        done
    
        set_value ovs pci_whitelist "${pci_whitelist:-$(generate_pciwhitelist)}"
    
    Sean Mooney's avatar
    Sean Mooney committed
    }
    
    function bind_nic {
        echo $2 > /sys/bus/pci/devices/$1/driver_override
    
        echo $1 > /sys/bus/pci/drivers/$2/bind
    
    Sean Mooney's avatar
    Sean Mooney committed
    }
    
    function unbind_nic {
        echo $1 > /sys/bus/pci/drivers/$2/unbind
        echo > /sys/bus/pci/devices/$1/driver_override
    }
    
    function list_dpdk_nics {
    
        for nic in $(get_value ovs port_mappings | tr ',' '\n' | cut -d : -f 1); do
    
    Sean Mooney's avatar
    Sean Mooney committed
            echo $nic;
        done
    }
    
    function bind_nics {
        for nic in $(list_dpdk_nics); do
            device_address="$(get_value $nic address)"
            current_driver="$(get_driver_by_address $device_address)"
            target_driver="$(get_value $nic driver)"
            if [ "$current_driver" != "$target_driver" ]; then
                set_value $nic old_driver $current_driver
                unbind_nic $device_address $current_driver
                bind_nic $device_address $target_driver
            fi
        done
    }
    
    function unbind_nics {
        for nic in $(list_dpdk_nics); do
            if [ "$(is_set $nic old_driver)" == 0 ]; then
                device_address="$(get_value $nic address)"
                current_driver="$(get_driver_by_address $device_address)"
                target_driver="$(get_value $nic old_driver)"
                if [ "$current_driver" != "$target_driver" ]; then
                    unbind_nic $device_address $current_driver
                    bind_nic $device_address $target_driver
                    del_value $nic old_driver
                fi
            fi
        done
    }
    
    function get_address_by_name {
        ls -al /sys/class/net/$1 | awk '$0 ~ /pci/ {n=split($NF,a,"/"); print a[n-2] }'
    }
    
    function get_driver_by_address {
        ls /sys/bus/pci/devices/$1/driver -al | awk '{n=split($NF,a,"/"); print a[n]}'
    }
    
    function get_port_bridge {
    
        for pair in $(get_value ovs port_mappings | tr ',' '\n'); do
    
    Sean Mooney's avatar
    Sean Mooney committed
            nic=`echo $pair | cut -f 1 -d ":"`
            if [[ "$nic" == "$1" ]]; then
                bridge=`echo $pair | cut -f 2 -d ":"`
                echo $bridge
                return 0
            fi
        done
        return 1
    }
    
    function init_ovs_db {
        ovs-vsctl init
        ovs-vsctl --no-wait set Open_vSwitch . other_config:pmd-cpu-mask="$(get_value ovs pmd_coremask)" \
        other_config:dpdk-init=True other_config:dpdk-lcore-mask="$(get_value ovs ovs_coremask)" \
        other_config:dpdk-mem-channels="$(get_value ovs ovs_mem_channels)" \
        other_config:dpdk-socket-mem="$(get_value ovs ovs_socket_mem)" \
        other_config:dpdk-hugepage-dir="$(get_value ovs hugepage_mountpoint)"  \
        other_config:dpdk-extra=" --proc-type primary $(get_value ovs pci_whitelist) "
    }
    
    function init_ovs_bridges {
        raw_bridge_mappings=$(get_value ovs bridge_mappings)
        bridge_mappings=( ${raw_bridge_mappings//,/ } )
        for pair in "${bridge_mappings[@]}"; do
            bridge=`echo $pair | cut -f 2 -d ":"`
            sudo ovs-vsctl --no-wait -- --may-exist add-br $bridge -- set Bridge $bridge datapath_type=netdev
        done
    }
    
    function init_ovs_interfaces {
    
        pci_port_pairs=''
    
    Sean Mooney's avatar
    Sean Mooney committed
        for nic in $(list_dpdk_nics); do
            address="$(get_value $nic address)"
            if [ "$pci_port_pairs" == '' ]; then
    
                pci_port_pairs="$address,$nic"
    
    Sean Mooney's avatar
    Sean Mooney committed
            else
                pci_port_pairs="$pci_port_pairs $address,$nic"
            fi
        done
        pci_port_pairs="$(echo $pci_port_pairs | sort)"
        dpdk_port_number=0
        for pair in  $pci_port_pairs; do
            addr="$(echo $pair | cut -f 1 -d ",")"
            nic="$(echo $pair | cut -f 2 -d ",")"
            bridge="$(get_port_bridge $nic)"
            # ovs 2.6 and older requires dpdkX names, ovs 2.7+ requires dpdk-devargs instead.
    
            if [ "$(get_value ovs physical_port_policy)" == "indexed" ]; then
                ovs-vsctl --no-wait --may-exist add-port $bridge "dpdk${dpdk_port_number}" \
                -- set Interface  "dpdk${dpdk_port_number}" type=dpdk
            else
                ovs-vsctl --may-exist add-port $bridge $nic \
                -- set Interface  $nic type=dpdk options:dpdk-devargs=$addr
            fi
    
    Sean Mooney's avatar
    Sean Mooney committed
    
            dpdk_port_number=$((dpdk_port_number+1))
        done
    }
    
    function init {
        init_ovs_db
        init_ovs_bridges
        init_ovs_interfaces
    }
    
    
    function install_network_manager_conf {
        pair=$(get_value ovs cidr_mappings)
        bridge=`echo $pair | cut -f 1 -d ":"`
        cidr=`echo $pair | cut -f 2 -d ":"`
        ip=`echo $cidr | cut -f 1 -d "/"`
        prefix=`echo $cidr | cut -f 2 -d "/"`
        mask=""
        full_octets=$(expr $prefix / 8)
        partial_octet=$(expr $prefix % 8)
    
        for octet in 0 1 2 3 ; do
            if [[ "$octet" < "$full_octets" ]]; then
                mask+=255
            elif [[ "$octet" == "$full_octets" ]]; then
                mask+=$((256 - 2**(8-$partial_octet)))
            else
                mask+=0
            fi
            [[ "$octet" < 3 ]] && mask+=.
        done
    
    johjuhyun's avatar
    johjuhyun committed
        if  [[ $(is_redhat_family) == 0 ]]; then
    
            cat << EOF | tee "/etc/sysconfig/network-scripts/ifcfg-$bridge"
    DEVICE=$bridge
    BOOTPROTO=static
    IPADDR=$ip
    NETMASK=$mask
    HOTPLUG=yes
    ONBOOT=yes
    
    EOF
    install_redhat_bridge_service $bridge
        else
            cat << EOF | tee "/etc/network/interfaces.d/$bridge.cfg"
        auto $bridge
        iface $bridge inet static
            address $ip
            netmask $mask
    EOF
    
        fi
    }
    
    function uninstall_network_manager_conf {
        pair=$(get_value ovs cidr_mappings)
        bridge=`echo $pair | cut -f 1 -d ":"`
    
    johjuhyun's avatar
    johjuhyun committed
        if  [[ $(is_redhat_family) == 0 ]]; then
    
            rm -f /etc/sysconfig/network-scripts/ifcfg-$bridge
        else
            rm -f /etc/network/interfaces.d/$bridge.cfg
        fi
    }
    
    
    Sean Mooney's avatar
    Sean Mooney committed
    function install_service {
        cat << EOF | tee "$SERVICE_FILE"
    
    [Unit]
    Description=configuration service for ovs-dpdk nics.
    Before=network-pre.target
    After=syslog.target
    
    [Service]
    # Uncomment to enable debug logging.
    # Environment=OVS_DPDK_CTL_DEBUG=True
    Environment=CONFIG_FILE=$CONFIG_FILE
    Type=oneshot
    RemainAfterExit=yes
    ExecStart=/bin/ovs-dpdkctl bind_nics
    ExecStop=/bin/ovs-dpdkctl unbind_nics
    
    [Install]
    WantedBy=multi-user.target
    
    EOF
        systemctl daemon-reload
        systemctl enable ovs-dpdkctl
    }
    
    
    function install_redhat_bridge_service {
        cat << EOF | tee "$BRIDGE_SERVICE_FILE"
    [Unit]
    Description=configuration service for ovs-dpdk bridge.
    After=docker.service
    After=network.target
    After=ovs-dpdkctl.service
    
    [Service]
    Type=simple
    RemainAfterExit=yes
    ExecStartPre=/bin/bash -c "[[ -e /sys/class/net/$1 ]]"
    ExecStart=/usr/sbin/ifup $1
    ExecStop=/usr/sbin/ifdown $1
    Restart=on-failure
    RestartSec=5
    
    [Install]
    WantedBy=multi-user.target
    
    EOF
        systemctl daemon-reload
        systemctl enable ovs-dpdk-bridge
    }
    
    
    Sean Mooney's avatar
    Sean Mooney committed
    function uninstall_service {
        systemctl disable ovs-dpdkctl
        rm -f "$SERVICE_FILE"
    
        if [ -e "$BRIDGE_SERVICE_FILE" ]; then
            systemctl disable ovs-dpdk-bridge
            rm -f "$BRIDGE_SERVICE_FILE"
        fi
    
    Sean Mooney's avatar
    Sean Mooney committed
        systemctl daemon-reload
    }
    
    
    function configure_kernel_modules {
        driver="$(get_value ovs dpdk_interface_driver)"
        lsmod | grep -ws $driver > /dev/null || modprobe $driver
    
    johjuhyun's avatar
    johjuhyun committed
        if  [[ $(is_redhat_family) == 0 ]]; then
    
            [[ ! -e /etc/modules-load.d/${driver}.conf ]] && echo $driver | tee /etc/modules-load.d/${driver}.conf
        else
            grep -ws $driver /etc/modules > /dev/null || echo $driver | tee -a /etc/modules
        fi
    }
    
    function unconfigure_kernel_modules {
        driver="$(get_value ovs dpdk_interface_driver)"
        lsmod | grep -ws $driver > /dev/null && rmmod $driver
    
    johjuhyun's avatar
    johjuhyun committed
        if [[ $(is_redhat_family) == 0 ]] ; then
    
            [[ -e /etc/modules-load.d/${driver}.conf ]] && rm -f /etc/modules-load.d/${driver}.conf
        else
            grep  -ws $driver /etc/modules > /dev/null && sed -e "s/$driver//" -i /etc/modules
        fi
    }
    
    
    Sean Mooney's avatar
    Sean Mooney committed
    function install {
    
        configure_kernel_modules
    
    Sean Mooney's avatar
    Sean Mooney committed
        if [ ! -e "$SERVICE_FILE" ]; then
            install_service
        fi
        if [ ! -e /bin/ovs-dpdkctl ]; then
            cp "$FULL_PATH" /bin/ovs-dpdkctl
            chmod +x /bin/ovs-dpdkctl
        fi
        if [ ! -e "$CONFIG_FILE" ]; then
            gen_config
        fi
        systemctl start ovs-dpdkctl
    
        install_network_manager_conf
    
    johjuhyun's avatar
    johjuhyun committed
        if  [[ $(is_redhat_family) == 0 ]]; then
    
            systemctl start ovs-dpdk-bridge
        fi
    
    Sean Mooney's avatar
    Sean Mooney committed
    }
    
    function uninstall {
        systemctl stop ovs-dpdkctl
        if [ -e "$SERVICE_FILE" ]; then
            uninstall_service
        fi
    
        uninstall_network_manager_conf
        unconfigure_kernel_modules
    
    Sean Mooney's avatar
    Sean Mooney committed
        if [ -e /bin/ovs-dpdkctl ]; then
            rm -f /bin/ovs-dpdkctl
        fi
        if [ -e "$CONFIG_FILE" ]; then
            rm -f "$CONFIG_FILE"
        fi
    
    function usage {
    
    Sean Mooney's avatar
    Sean Mooney committed
        cat << "EOF"
    ovs-dpdkctl.sh: A tool to configure ovs with dpdk.
    
    - This tool automate the process of binding host insterfacesto a dpdk
      compaible driver (uio_pci_generic | vfio-pci) at boot.
    - This tool automate bootstraping ovs so that it can use the
      dpdk accelerated netdev datapath.
    
    commands:
      - install:
        - installs ovs-dpdkctl as a systemd service.
        - installs ovs-dpdkctl binary.
        - generates ovs-dpdkctl configuration file.
        - starts ovs-dpdkctl service.
      - uninstall:
        - stops ovs-dpdkctl service.
        - uninstalls ovs-dpdkctl systemd service.
        - uninstalls ovs-dpdkctl binary.
        - removes ovs-dpdkctl configuration file.
      - bind_nics:
        - iterates over all dpdk interfaces defined in ovs-dpdkctl config
          and binds the interface to the target driver specifed in the config
          if current driver does not equal target.
      - unbind_nics:
        - iterates over all dpdk interfaces defined in ovs-dpdkctl config
          and restores the interface to its original non dpdk driver.
      - init:
        - defines dpdk specific configuration paramater in the ovsdb.
        - creates bridges as spcified by ovs bridge_mappings in
          ovs-dpdkctl config.
        - creates dpdk ports as defined by ovs port_mappings in
          ovs-dpdkctl config.
    
    Sean Mooney's avatar
    Sean Mooney committed
        - prints this message
    
    options:
      - debuging:
        - To enable debuging export OVS_DPDK_CTL_DEBUG=True
      - install:
    
    youri jeong's avatar
    youri jeong committed
        - The varibles described below can be defined to customise
    
    Sean Mooney's avatar
    Sean Mooney committed
          installation of ovs-dpdkctl.
          <variable>=<value> ovs-dpdkctl.sh install
        - bridge_mappings:
    
    youri jeong's avatar
    youri jeong committed
          - A comma separated list of physnet to bridge mappings.
    
    Sean Mooney's avatar
    Sean Mooney committed
          - Example: bridge_mappings=physnet1:br-ex1,physnet2:br-ex2
          - Default: ""
        - port_mappings:
    
    youri jeong's avatar
    youri jeong committed
          - A comma separated list of port to bridge mappings.
    
    Sean Mooney's avatar
    Sean Mooney committed
          - Example: port_mappings=eth1:br-ex1,eth2:br-ex2
          - Default: generated form bridge_mappings assuming bridge names
                     are constructed by appending br- to port name.
        - cidr_mappings:
    
    youri jeong's avatar
    youri jeong committed
          - A comma separated list of bridge to cidr mappings.
    
    Sean Mooney's avatar
    Sean Mooney committed
          - Example: cidr_mappings=br-ex1:192.168.1.1/24,br-ex2:192.168.2.1/24
          - Default: ""
        - ovs_coremask:
          - A hex encoded string container a bitmask of what cpu core
            to pin the non dataplane treads of the ovs-vswitchd to.
          - Node that only the first core of the bit mask is currently
            used by ovs.
          - Example: ovs_coremask=0x1
          - Default: "0x1"
        - pmd_coremask:
          - A hex encoded string container a bitmask of what cpu cores
            to pin the dataplane pool mode driver treads of the ovs-vswitchd to.
          - Each bit set in the bitmask will result in the creating of a pmd.
    
    youri jeong's avatar
    youri jeong committed
          - For best performance it is recommended to allocate at least 1 pmd per
            numa node. On systems with HyperThreading enabled it is recommended to also
    
    Sean Mooney's avatar
    Sean Mooney committed
            allocate the HT sibling core in the pmd_coremask.cores allocated
            to ovs with dpdk via the pmd_coremask should be removed from the
            nova vcpu_pin_set and isolated from the kernel scheduler.
          - Note it is not recommended to isolate cores in the nova vcpu_pin_set
            unless the host will be dedicated for vms that request cpu pinning.
          - Example: pmd_coremask=0x4
          - Default: "0x4"
        - ovs_mem_channels:
          - The number of memory channels supported by the plathforms.
          - Example: ovs_mem_channels=2
          - Default: "4"
        - ovs_socket_mem:
    
    youri jeong's avatar
    youri jeong committed
          - A comma separated list of hugepage memory, specifed in MBs per numa node,
    
    Sean Mooney's avatar
    Sean Mooney committed
            allocated to the ovs-vswitchd to use for the dpdk dataplane.
          - For best performance memory should be allocated evenly across all numa node
            that will run a pmd.
          - Example: ovs_socket_mem=512,512
          - Default: "512"
        - hugepage_mountpoint:
          - The hugepage mountpoint to use when allocating memory for dpdk.
          - Example: hugepage_mountpoint=/dev/my_custom_mountpoint
          - Default: "/dev/hugepages"
        - dpdk_interface_driver:
          - The dpdk compatible userspace driver to use when binding host interfaces.
          - Example: dpdk_interface_driver=vfio_pci
          - Default: "uio_pci_generic"
        - pci_whitelist:
    
    youri jeong's avatar
    youri jeong committed
          - A repeated space separated list of pci whitelist flags
    
    Sean Mooney's avatar
    Sean Mooney committed
            for allowed ovs-dpdk ports.
          - The pci_whitelist allows multiple dpdk primary process to
            utilise different pci devices without resulting in a conflict
            of ownership.
          - Example: pci_whitelist="-w <pci address 1> -w <pci address 2>"
          - Default: auto generated form port_mappings.
    EOF
    
    }
    
    if [ $# -ge  1 ]; then
        func=$1
        shift
    else
    
        func="usage"
    
    Sean Mooney's avatar
    Sean Mooney committed
    fi
    
    #replace with switch later
    eval "$func $@"
    
    
    ${_XTRACE_OVS_DPDK_CTL}