Changeset b148e3b


Ignore:
Timestamp:
11/25/16 10:13:11 (4 years ago)
Author:
Richard Sanger <rsanger@…>
Branches:
4.0.1-hotfixes, cachetimestamps, develop, dpdk-ndag, etsilive, master, ndag_format, rc-4.0.1, rc-4.0.2, rc-4.0.3, rc-4.0.4, ringdecrementfix, ringperformance, ringtimestampfixes
Children:
caf7841
Parents:
571e2f9
git-author:
Richard Sanger <rsanger@…> (11/25/16 00:22:43)
git-committer:
Richard Sanger <rsanger@…> (11/25/16 10:13:11)
Message:

Updates DPDK to latest release and improves performance

Thanks to Richard Cziva for supplying an intial patch for this.

We now recommend using the latest release of DPDK, ideally 16.04 or newer

To support newer releases

  • Fixes RSS hashing renames
  • Fixes deprecated rte_mempool_count
  • Fixes ETH_LINK_SPEED_X rename
  • Fixes TX minimum memory requirement
  • Fixes dropped vs errored counting in recent versions (for best results use 16.04 or newer)

Tuned to allow DPDK's SSE vector in supporting drivers mode for better performance.
Bumps default internal batch size up to 32 to matches DPDK in SSE vector mode.

Files:
4 edited

Legend:

Unmodified
Added
Removed
  • lib/format_dpdk.c

    ree6e802 rb148e3b  
    2323 *
    2424 *
    25  */
     25 *
    2626 * Kit capture format.
    2727 *
     
    6969 * code (that we still attempt to support).
    7070 *
    71  * DPDK v1.7.1 is recommended.
    72  * However 1.5 to 1.8 are likely supported.
     71 * DPDK 16.04 or newer is recommended.
     72 * However 1.6 and newer are still likely supported.
    7373 */
    7474#include <rte_eal.h>
     
    145145#else
    146146#       define DPDK_USE_NULL_QUEUE_CONFIG 0
     147#endif
     148
     149/* 2.0.0-rc1
     150 * Unifies RSS hash between cards
     151 */
     152#if RTE_VERSION >= RTE_VERSION_NUM(2, 0, 0, 1)
     153#       define RX_RSS_FLAGS (ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP | \
     154                             ETH_RSS_SCTP)
     155#else
     156#       define RX_RSS_FLAGS (ETH_RSS_IPV4_UDP | ETH_RSS_IPV6 | ETH_RSS_IPV4 | \
     157                             ETH_RSS_IPV4_TCP | ETH_RSS_IPV6_TCP |\
     158                             ETH_RSS_IPV6_UDP)
     159#endif
     160
     161/* v16.07-rc1 - deprecated
     162 * rte_mempool_avail_count to replace rte_mempool_count
     163 * rte_mempool_in_use_count to replace rte_mempool_free_count
     164 */
     165#if RTE_VERSION < RTE_VERSION_NUM(16, 7, 0, 1)
     166#define rte_mempool_avail_count rte_mempool_count
     167#define rte_mempool_in_use_count rte_mempool_free_count
    147168#endif
    148169
     
    170191#endif
    171192
     193/* 16.04-rc3 ETH_LINK_SPEED_X are replaced with ETH_SPEED_NUM_X.
     194 * ETH_LINK_SPEED_ are reused as flags, ugly.
     195 * We use the new way in this code.
     196 */
     197#ifndef ETH_SPEED_NUM_1G
     198        #define ETH_SPEED_NUM_1G ETH_LINK_SPEED_1000
     199        #define ETH_SPEED_NUM_10G ETH_LINK_SPEED_10G
     200        #define ETH_SPEED_NUM_20G ETH_LINK_SPEED_20G
     201        #define ETH_SPEED_NUM_40G ETH_LINK_SPEED_40G
     202#endif
    172203
    173204/* The default size of memory buffers to use - This is the max size of standard
     
    175206#define RX_MBUF_SIZE 1514
    176207
    177 /* The minimum number of memory buffers per queue tx or rx. Search for
    178  * _MIN_RING_DESC in DPDK. The largest minimum is 64 for 10GBit cards.
    179  */
    180 #define MIN_NB_BUF 64
     208/* The minimum number of memory buffers per queue tx or rx. Based on
     209 * the requirement of the memory pool with 128 per thread buffers, needing
     210 * at least 128*1.5 = 192 buffers. Our code allocates 128*2 to be safe.
     211 */
     212#define MIN_NB_BUF 128
    181213
    182214/* Number of receive memory buffers to use
     
    185217 * This can be increased in the driver and here.
    186218 * Should be at least MIN_NB_BUF.
    187  */
    188 #define NB_RX_MBUF 4096
     219 * We choose 2K rather than 4K because it enables the usage of sse vector
     220 * drivers which are significantly faster than using the larger buffer.
     221 */
     222#define NB_RX_MBUF (4096/2)
    189223
    190224/* Number of send memory buffers to use.
     
    229263 *
    230264 * Make sure you understand what these are doing before enabling them.
    231  * They might make traces incompatable with other builds etc.
     265 * They might make traces incompatible with other builds etc.
    232266 *
    233267 * These are also included to show how to do somethings which aren't
     
    236270
    237271/* Print verbose messages to stderr */
    238 #define DEBUG 0
     272#define DEBUG 1
    239273
    240274/* Use clock_gettime() for nanosecond resolution rather than gettimeofday()
    241275 * only turn on if you know clock_gettime is a vsyscall on your system
    242  * overwise could be a large overhead. Again gettimeofday() should be
     276 * otherwise could be a large overhead. Again gettimeofday() should be
    243277 * vsyscall also if it's not you should seriously consider updating your
    244278 * kernel.
     
    297331        int lcore;
    298332#if HAS_HW_TIMESTAMPS_82580
    299         /* Timestamping only relevent to RX */
     333        /* Timestamping only relevant to RX */
    300334        uint64_t ts_first_sys; /* Sytem timestamp of the first packet in nanoseconds */
    301335        uint32_t wrap_count; /* Number of times the NIC clock has wrapped around completely */
     
    959993                .rss_conf = {
    960994                        // .rss_key = &rss_key, // We set this per format
    961                         .rss_hf = ETH_RSS_IPV4_UDP | ETH_RSS_IPV6 | ETH_RSS_IPV4 | ETH_RSS_IPV4_TCP | ETH_RSS_IPV6_TCP | ETH_RSS_IPV6_UDP,
     995                        .rss_hf = RX_RSS_FLAGS,
    962996                },
    963997        },
     
    10871121        int i;
    10881122        struct rte_config *cfg = rte_eal_get_configuration();
     1123        (void) socket;
    10891124
    10901125        pthread_mutex_lock(&dpdk_lock);
     
    12461281                fprintf(stderr, "DPDK memory pool not empty %d of %d, please "
    12471282                        "free all packets before finishing a trace\n",
    1248                         rte_mempool_count(mempool), mempool->size);
     1283                        rte_mempool_avail_count(mempool), mempool->size);
    12491284        }
    12501285
     
    18421877retry_calc_wiretime:
    18431878        switch (format_data->link_speed) {
    1844         case ETH_LINK_SPEED_40G:
    1845                 wire_time /=  ETH_LINK_SPEED_40G;
     1879        case ETH_SPEED_NUM_40G:
     1880                wire_time /=  ETH_SPEED_NUM_40G;
    18461881                break;
    1847         case ETH_LINK_SPEED_20G:
    1848                 wire_time /= ETH_LINK_SPEED_20G;
     1882        case ETH_SPEED_NUM_20G:
     1883                wire_time /= ETH_SPEED_NUM_20G;
    18491884                break;
    1850         case ETH_LINK_SPEED_10G:
    1851                 wire_time /= ETH_LINK_SPEED_10G;
     1885        case ETH_SPEED_NUM_10G:
     1886                wire_time /= ETH_SPEED_NUM_10G;
    18521887                break;
    1853         case ETH_LINK_SPEED_1000:
    1854                 wire_time /= ETH_LINK_SPEED_1000;
     1888        case ETH_SPEED_NUM_1G:
     1889                wire_time /= ETH_SPEED_NUM_1G;
    18551890                break;
    18561891        case 0:
     
    21942229        stats->captured = dev_stats.ipackets;
    21952230
    2196         /* Not that we support adding filters but if we did this
    2197          * would work */
    2198         stats->filtered += dev_stats.fdirmiss;
    2199 
    22002231        stats->dropped_valid = true;
    22012232        stats->dropped = dev_stats.imissed;
    22022233
     2234#if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 2)
     2235        /* DPDK commit 86057c fixes ensures missed does not get counted as
     2236         * errors */
     2237        stats->errors_valid = true;
     2238        stats->errors = dev_stats.ierrors;
     2239#else
    22032240        /* DPDK errors includes drops */
    22042241        stats->errors_valid = true;
    22052242        stats->errors = dev_stats.ierrors - dev_stats.imissed;
    2206 
     2243#endif
    22072244        stats->received_valid = true;
    22082245        stats->received = dev_stats.ipackets + dev_stats.imissed;
  • lib/libtrace.h.in

    r0cdd231 rb148e3b  
    260260/** If the packet has allocated its own memory the buffer_control should be
    261261 * set to TRACE_CTRL_PACKET, so that the memory will be freed when the packet
    262  * is destroyed. If the packet has been zerocopied out of memory owned by
     262 * is destroyed. If the packet has been zero-copied out of memory owned by
    263263 * something else, e.g. a DAG card, it should be TRACE_CTRL_EXTERNAL.
    264264 *
     
    282282/** Enumeration of error codes */
    283283enum {
    284         /** No Error has occured.... yet. */
     284        /** No Error has occurred.... yet. */
    285285        TRACE_ERR_NOERROR       = 0,
    286286        /** The URI passed to trace_create() is unsupported, or badly formed */
     
    336336#endif
    337337        TRACE_DLT_PPP_SERIAL = 50,
    338         TRACE_DLT_LINKTYPE_RAW = 101, /**< See TRACE_DLT_RAW for explainations of pain. */
     338        TRACE_DLT_LINKTYPE_RAW = 101, /**< See TRACE_DLT_RAW for explanations of pain. */
    339339        TRACE_DLT_C_HDLC = 104,
    340340        TRACE_DLT_IEEE802_11 = 105,
     
    548548        uint64_t hash; /**< A hash of the packet as supplied by the user */
    549549        int error; /**< The error status of pread_packet */
    550         uint64_t internalid;            /** Internal indentifier for the pkt */
     550        uint64_t internalid;            /** Internal identifier for the pkt */
    551551        void *srcbucket;
    552552} libtrace_packet_t;
     
    576576    TRACE_RADIOTAP_LOCK_QUALITY = 7, /**< Barker Code lock quality (uint16) */
    577577    TRACE_RADIOTAP_TX_ATTENUATION = 8, /**< TX attenuation as unitless distance from max power (uint16) */
    578     TRACE_RADIOTAP_DB_TX_ATTENUATION = 9, /**< TX attenutation as dB from max power (uint16) */
     578    TRACE_RADIOTAP_DB_TX_ATTENUATION = 9, /**< TX attenuation as dB from max power (uint16) */
    579579    TRACE_RADIOTAP_DBM_TX_POWER = 10, /**< TX Power in dBm (int8) */
    580580    TRACE_RADIOTAP_ANTENNA = 11, /**< Antenna frame was rx'd or tx'd on (uint8) */
     
    11821182 * @param [out] format  A pointer that will be updated to point to an allocated
    11831183 *                      string holding the format component of the URI
    1184  * @return NULL if an error occured, otherwise return a pointer to the uridata
     1184 * @return NULL if an error occurred, otherwise return a pointer to the uridata
    11851185 * component
    11861186 *
     
    12071207 *  - rt:hostname:port
    12081208 *
    1209  *  If an error occured when attempting to open the trace file, a
     1209 *  If an error occurred when attempting to open the trace file, a
    12101210 *  trace is still returned so trace_is_err() should be called to find out
    1211  *  if an error occured. The trace is created in the configuration state, you
     1211 *  if an error occurred. The trace is created in the configuration state, you
    12121212 *  must call trace_start before attempting to read packets from the trace.
    12131213 */
     
    12381238 *  - pcap:/path/to/pcap/file
    12391239 *
    1240  *  If an error occured when attempting to open the output trace, a trace is
     1240 *  If an error occurred when attempting to open the output trace, a trace is
    12411241 *  still returned but trace_errno will be set. Use trace_is_err_out() and
    12421242 *  trace_perror_output() to get more information.
     
    13771377} trace_option_output_t;
    13781378
    1379 /* To add a new stat field update this list, and the relevent places in
     1379/* To add a new stat field update this list, and the relevant places in
    13801380 * libtrace_stat_t structure.
    13811381 */
     
    14301430        uint64_t filtered;
    14311431
    1432         /** The total number of good packets which have been recevied. Including
     1432        /** The total number of good packets which have been received. Including
    14331433         * those which are dropped and filtered. This does not include errors.
    14341434         *
     
    16101610 * Returns statistic counters for a trace, for a parallel trace this is a
    16111611 * combined total.
    1612  * Where possible these are retrived atomically, however this behaviour depends
     1612 * Where possible these are retrieved atomically, however this behaviour depends
    16131613 * on the underlying trace format.
    16141614 *
     
    16311631/**
    16321632 * Returns statistic counters for a single thread of a trace.
    1633  * Where possible these are retrived atomically, however this behaviour depends
     1633 * Where possible these are retrieved atomically, however this behaviour depends
    16341634 * on the underlying trace format.
    16351635 *
    16361636 * @param trace The input trace to examine.
    1637  * @param t An optional thread to received stats for or NULL to retrive stats
     1637 * @param t An optional thread to received stats for or NULL to retrieve stats
    16381638 *          for the current thread
    16391639 * @param stats Filled upon return with statistics about the trace, check the
     
    17341734 * function should be avoided where possible.
    17351735 *
    1736  * @par The reason you would want to use this function is that a zerocopied
     1736 * @par The reason you would want to use this function is that a zero-copied
    17371737 * packet from a device will be stored using memory owned by the device which
    17381738 * may be a limited resource. Copying the packet will ensure that the packet
     
    26932693 *
    26942694 * @note This function only works for OSPF version 2 packets.
    2695  * @note trace_get_next_ospf_lsa_v2() should be subequently used to process the LSAs
     2695 * @note trace_get_next_ospf_lsa_v2() should be subsequently used to process the LSAs
    26962696 */
    26972697DLLEXPORT SIMPLE_FUNCTION
     
    27162716 *
    27172717 * @note This function only works for OSPF version 2 packets.
    2718  * @note trace_get_next_ospf_lsa_header_v2() should be subequently used to process the LSA headers
     2718 * @note trace_get_next_ospf_lsa_header_v2() should be subsequently used to process the LSA headers
    27192719 */
    27202720DLLEXPORT SIMPLE_FUNCTION
     
    27392739 *
    27402740 * @note This function only works for OSPF version 2 packets.
    2741  * @note trace_get_next_ospf_link_v2() should be subequently used to process
     2741 * @note trace_get_next_ospf_link_v2() should be subsequently used to process
    27422742 * the links
    27432743 */
  • lib/trace_parallel.c

    ree6e802 rb148e3b  
    14711471                libtrace->config.reporter_thold = 100;
    14721472        if (libtrace->config.burst_size <= 0)
    1473                 libtrace->config.burst_size = 10;
     1473                libtrace->config.burst_size = 32;
    14741474        if (libtrace->config.thread_cache_size <= 0)
    1475                 libtrace->config.thread_cache_size = 20;
     1475                libtrace->config.thread_cache_size = 64;
    14761476        if (libtrace->config.cache_size <= 0)
    14771477                libtrace->config.cache_size = (libtrace->config.hasher_queue_size + 1) * libtrace->perpkt_thread_count;
  • test/test-tracetime-parallel.c

    r8decff7 rb148e3b  
    232232
    233233        trace_set_reporter_thold(trace, 1);
     234        trace_set_burst_size(trace, 10);
    234235
    235236        // Start it
Note: See TracChangeset for help on using the changeset viewer.