source: lib/format_dpdk.h @ 49f8ceb

cachetimestampsdevelopetsiliverc-4.0.4ringdecrementfixringperformance
Last change on this file since 49f8ceb was 6d17620, checked in by Richard Sanger <rsanger@…>, 3 years ago

Updates to DPDK to build with 18.02.1

  • Updates to improve backwards compatibility
  • Update to use a new portid_t as the size has changed between releases
  • Fix bug with variable order in tx, both values were zero so this didn't matter
  • Update build script to test newer dpdk versions, remove older versions no longer supported by new kernels
  • Property mode set to 100644
File size: 9.9 KB
Line 
1#ifndef LIBTRACE_FORMAT_DPDK_H_
2#define LIBTRACE_FORMAT_DPDK_H_
3
4#include <libtrace.h>
5#include "libtrace_int.h"
6
7/* We can deal with any minor differences by checking the RTE VERSION
8 * Typically DPDK backports some fixes (typically for building against
9 * newer kernels) to the older version of DPDK.
10 *
11 * These get released with the rX suffix. The following macros where added
12 * in these new releases.
13 *
14 * Below this is a log of version that required changes to the libtrace
15 * code (that we still attempt to support).
16 *
17 * DPDK 16.04 or newer is recommended.
18 * However 1.6 and newer are still likely supported.
19 */
20#include <rte_eal.h>
21#include <rte_version.h>
22#ifndef RTE_VERSION_NUM
23#       define RTE_VERSION_NUM(a,b,c,d) ((a) << 24 | (b) << 16 | (c) << 8 | (d))
24#endif
25#ifndef RTE_VER_PATCH_RELEASE
26#       define RTE_VER_PATCH_RELEASE 0
27#endif
28#ifndef RTE_VERSION
29#       define RTE_VERSION RTE_VERSION_NUM(RTE_VER_MAJOR,RTE_VER_MINOR, \
30        RTE_VER_PATCH_LEVEL, RTE_VER_PATCH_RELEASE)
31#endif
32
33/* 1.6.0r2 :
34 *      rte_eal_pci_set_blacklist() is removed
35 *      device_list is renamed to pci_device_list
36 *      In the 1.7.0 release rte_eal_pci_probe is called by rte_eal_init
37 *      as such we do apply the whitelist before rte_eal_init.
38 *      This also works correctly with DPDK 1.6.0r2.
39 *
40 * Replaced by:
41 *      rte_devargs (we can simply whitelist)
42 */
43#if RTE_VERSION <= RTE_VERSION_NUM(1, 6, 0, 1)
44#       define DPDK_USE_BLACKLIST 1
45#else
46#       define DPDK_USE_BLACKLIST 0
47#endif
48
49/*
50 * 1.7.0 :
51 *      rte_pmd_init_all is removed
52 *
53 * Replaced by:
54 *      Nothing, no longer needed
55 */
56#if RTE_VERSION < RTE_VERSION_NUM(1, 7, 0, 0)
57#       define DPDK_USE_PMD_INIT 1
58#else
59#       define DPDK_USE_PMD_INIT 0
60#endif
61
62/* 1.7.0-rc3 :
63 *
64 * Since 1.7.0-rc3 rte_eal_pci_probe is called as part of rte_eal_init.
65 * Somewhere between 1.7 and 1.8 calling it twice broke so we should not call
66 * it twice.
67 */
68#if RTE_VERSION < RTE_VERSION_NUM(1, 7, 0, 3)
69#       define DPDK_USE_PCI_PROBE 1
70#else
71#       define DPDK_USE_PCI_PROBE 0
72#endif
73
74/* 1.8.0-rc1 :
75 * LOG LEVEL is a command line option which overrides what
76 * we previously set it to.
77 */
78#if RTE_VERSION >= RTE_VERSION_NUM(1, 8, 0, 1)
79#       define DPDK_USE_LOG_LEVEL 1
80#else
81#       define DPDK_USE_LOG_LEVEL 0
82#endif
83
84/* 1.8.0-rc2
85 * rx/tx_conf thresholds can be set to NULL in rte_eth_rx/tx_queue_setup
86 * this uses the default values, which are better tuned per device
87 * See issue #26
88 */
89#if RTE_VERSION >= RTE_VERSION_NUM(1, 8, 0, 2)
90#       define DPDK_USE_NULL_QUEUE_CONFIG 1
91#else
92#       define DPDK_USE_NULL_QUEUE_CONFIG 0
93#endif
94
95/* 2.0.0-rc1
96 * Unifies RSS hash between cards
97 */
98#if RTE_VERSION >= RTE_VERSION_NUM(2, 0, 0, 1)
99#       define RX_RSS_FLAGS (ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP | \
100                             ETH_RSS_SCTP)
101#else
102#       define RX_RSS_FLAGS (ETH_RSS_IPV4_UDP | ETH_RSS_IPV6 | ETH_RSS_IPV4 | \
103                             ETH_RSS_IPV4_TCP | ETH_RSS_IPV6_TCP |\
104                             ETH_RSS_IPV6_UDP)
105#endif
106
107/* v16.07-rc1 - deprecated
108 * rte_mempool_avail_count to replace rte_mempool_count
109 * rte_mempool_in_use_count to replace rte_mempool_free_count
110 */
111#if RTE_VERSION < RTE_VERSION_NUM(16, 7, 0, 1)
112#define rte_mempool_avail_count rte_mempool_count
113#define rte_mempool_in_use_count rte_mempool_free_count
114#endif
115
116/* 17.05-rc1 deprecated, 17.08 removed
117 * rte_set_log_level -> rte_log_set_global_level
118 */
119#if RTE_VERSION < RTE_VERSION_NUM(17, 5, 0, 1)
120#define rte_log_set_global_level rte_set_log_level
121#endif
122
123/* 17.11-rc1 increases port size from 8 to 16bits
124 */
125#if RTE_VERSION >= RTE_VERSION_NUM(17, 11, 0, 1)
126typedef uint16_t portid_t;
127#else
128typedef uint8_t portid_t;
129#endif
130
131
132#include <rte_per_lcore.h>
133#include <rte_debug.h>
134#include <rte_errno.h>
135#include <rte_common.h>
136#include <rte_log.h>
137#include <rte_memcpy.h>
138#include <rte_prefetch.h>
139#include <rte_branch_prediction.h>
140#include <rte_pci.h>
141#include <rte_ether.h>
142#include <rte_ethdev.h>
143#include <rte_ring.h>
144#include <rte_mempool.h>
145#include <rte_mbuf.h>
146#include <rte_launch.h>
147#include <rte_lcore.h>
148#include <rte_per_lcore.h>
149#include <rte_cycles.h>
150#include <pthread.h>
151#ifdef __FreeBSD__
152#include <pthread_np.h>
153#endif
154
155
156/* 16.04-rc3 ETH_LINK_SPEED_X are replaced with ETH_SPEED_NUM_X.
157 * ETH_LINK_SPEED_ are reused as flags, ugly.
158 * We use the new way in this code.
159 */
160#ifndef ETH_SPEED_NUM_1G
161        #define ETH_SPEED_NUM_1G ETH_LINK_SPEED_1000
162        #define ETH_SPEED_NUM_10G ETH_LINK_SPEED_10G
163        #define ETH_SPEED_NUM_20G ETH_LINK_SPEED_20G
164        #define ETH_SPEED_NUM_40G ETH_LINK_SPEED_40G
165#endif
166
167/* The default size of memory buffers to use - This is the max size of standard
168 * ethernet packet less the size of the MAC CHECKSUM, rounded up to the
169 * next power of 2, plus the RTE_PKTMBUF_HEADROOM. */
170#define RX_MBUF_SIZE (2048 + RTE_PKTMBUF_HEADROOM)
171
172/* The minimum number of memory buffers per queue tx or rx. Based on
173 * the requirement of the memory pool with 128 per thread buffers, needing
174 * at least 128*1.5 = 192 buffers. Our code allocates 128*2 to be safe.
175 */
176#define MIN_NB_BUF 128
177
178/* Number of receive memory buffers to use
179 * By default this is limited by driver to 4k and must be a multiple of 128.
180 * A modification can be made to the driver to remove this limit.
181 * This can be increased in the driver and here.
182 * Should be at least MIN_NB_BUF.
183 * We choose 2K rather than 4K because it enables the usage of sse vector
184 * drivers which are significantly faster than using the larger buffer.
185 */
186#define NB_RX_MBUF (4096/2)
187
188/* Number of send memory buffers to use.
189 * Same limits apply as those to NB_TX_MBUF.
190 */
191#define NB_TX_MBUF 1024
192
193/* The size of the PCI blacklist needs to be big enough to contain
194 * every PCI device address (listed by lspci every bus:device.function tuple).
195 */
196#define BLACK_LIST_SIZE 50
197
198/* The maximum number of characters the mempool name can be */
199#define MEMPOOL_NAME_LEN 20
200
201/* For single threaded libtrace we read packets as a batch/burst
202 * this is the maximum size of said burst */
203#define BURST_SIZE 32
204
205
206/* ~~~~~~~~~~~~~~~~~~~~~~ Advance settings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
207 * THESE MAY REQUIRE MODIFICATIONS TO INTEL DPDK
208 *
209 * Make sure you understand what these are doing before enabling them.
210 * They might make traces incompatible with other builds etc.
211 *
212 * These are also included to show how to do somethings which aren't
213 * obvious in the DPDK documentation.
214 */
215
216/* Print verbose messages to stderr */
217#define DEBUG 0
218
219/* Use clock_gettime() for nanosecond resolution rather than gettimeofday()
220 * only turn on if you know clock_gettime is a vsyscall on your system
221 * otherwise could be a large overhead. Again gettimeofday() should be
222 * vsyscall also if it's not you should seriously consider updating your
223 * kernel.
224 */
225#ifdef HAVE_CLOCK_GETTIME
226/* You can turn this on (set to 1) to prefer clock_gettime */
227#define USE_CLOCK_GETTIME 1
228#else
229/* DON'T CHANGE THIS !!! */
230#define USE_CLOCK_GETTIME 0
231#endif
232
233/* This is fairly safe to turn on - currently there appears to be a 'bug'
234 * in DPDK that will remove the checksum by making the packet appear 4bytes
235 * smaller than what it really is. Most formats don't include the checksum
236 * hence writing out a port such as int: ring: and dpdk: assumes there
237 * is no checksum and will attempt to write the checksum as part of the
238 * packet
239 */
240#define GET_MAC_CRC_CHECKSUM 0
241
242/* This requires a modification of the pmd drivers (inside Intel DPDK)
243 * TODO this requires updating (packet sizes are wrong TS most likely also)
244 */
245#define HAS_HW_TIMESTAMPS_82580 0
246
247#if HAS_HW_TIMESTAMPS_82580
248# define TS_NBITS_82580     40
249/* The maximum on the +ve or -ve side that we can be, make it half way */
250# define MAXSKEW_82580 ((uint64_t) (.5 * (double)(1ull<<TS_NBITS_82580)))
251#define WITHIN_VARIANCE(v1,v2,var) (((v1) - (var) < (v2)) && ((v1) + (var) > (v2)))
252#endif
253
254/* As per Intel 82580 specification - mismatch in 82580 datasheet
255 * it states ts is stored in Big Endian, however its actually Little */
256struct hw_timestamp_82580 {
257        uint64_t reserved;
258        uint64_t timestamp; /* Little Endian only lower 40 bits are valid */
259};
260
261enum paused_state {
262        DPDK_NEVER_STARTED,
263        DPDK_RUNNING,
264        DPDK_PAUSED,
265};
266
267struct dpdk_per_stream_t
268{
269        uint16_t queue_id;
270        uint64_t ts_last_sys; /* System timestamp of our most recent packet in nanoseconds */
271        struct rte_mempool *mempool;
272        int lcore;
273#if HAS_HW_TIMESTAMPS_82580
274        /* Timestamping only relevant to RX */
275        uint64_t ts_first_sys; /* Sytem timestamp of the first packet in nanoseconds */
276        uint32_t wrap_count; /* Number of times the NIC clock has wrapped around completely */
277#endif
278} ALIGN_STRUCT(CACHE_LINE_SIZE);
279
280#if HAS_HW_TIMESTAMPS_82580
281#define DPDK_EMPTY_STREAM {-1, 0, NULL, -1, 0, 0}
282#else
283#define DPDK_EMPTY_STREAM {-1, 0, NULL, -1}
284#endif
285
286typedef struct dpdk_per_stream_t dpdk_per_stream_t;
287
288
289libtrace_eventobj_t dpdk_trace_event(libtrace_t *trace,
290                libtrace_packet_t *packet);
291int dpdk_pstart_input (libtrace_t *libtrace);
292int dpdk_start_input (libtrace_t *libtrace);
293int dpdk_config_input (libtrace_t *libtrace,
294                trace_option_t option, void *data);
295int dpdk_init_input (libtrace_t *libtrace);
296int dpdk_pause_input(libtrace_t * libtrace);
297int dpdk_fin_input(libtrace_t * libtrace);
298int dpdk_read_packet (libtrace_t *libtrace, libtrace_packet_t *packet);
299int dpdk_pregister_thread(libtrace_t *libtrace, libtrace_thread_t *t,
300                bool reading);
301void dpdk_punregister_thread(libtrace_t *libtrace, libtrace_thread_t *t);
302void dpdk_get_stats(libtrace_t *trace, libtrace_stat_t *stats);
303int dpdk_get_framing_length (const libtrace_packet_t *packet) ;
304int dpdk_read_packet_stream (libtrace_t *libtrace,
305                dpdk_per_stream_t *stream,
306                libtrace_message_queue_t *mesg,
307                struct rte_mbuf* pkts_burst[],
308                size_t nb_packets);
309int dpdk_prepare_packet(libtrace_t *libtrace,
310                libtrace_packet_t *packet, void *buffer,
311                libtrace_rt_types_t rt_type, uint32_t flags);
312#endif
Note: See TracBrowser for help on using the repository browser.