source: lib/format_dpdk.h @ 8efa986

cachetimestampsdevelopdpdk-ndagetsiliverc-4.0.3rc-4.0.4ringdecrementfixringperformance
Last change on this file since 8efa986 was 8efa986, checked in by Shane Alcock <salcock@…>, 3 years ago

Ensure dpdk snap length is rounded up to next power of 2

Otherwise, we run the risk of the mbufs being too small for our
anticipated packets.

Changed dpdk_get_framing_length and dpdk_get_stats to be accessible
via format_dpdk.h

  • Property mode set to 100644
File size: 9.6 KB
Line 
1#ifndef LIBTRACE_FORMAT_DPDK_H_
2#define LIBTRACE_FORMAT_DPDK_H_
3
4#include <libtrace.h>
5#include "libtrace_int.h"
6
7/* We can deal with any minor differences by checking the RTE VERSION
8 * Typically DPDK backports some fixes (typically for building against
9 * newer kernels) to the older version of DPDK.
10 *
11 * These get released with the rX suffix. The following macros where added
12 * in these new releases.
13 *
14 * Below this is a log of version that required changes to the libtrace
15 * code (that we still attempt to support).
16 *
17 * DPDK 16.04 or newer is recommended.
18 * However 1.6 and newer are still likely supported.
19 */
20#include <rte_eal.h>
21#include <rte_version.h>
22#ifndef RTE_VERSION_NUM
23#       define RTE_VERSION_NUM(a,b,c,d) ((a) << 24 | (b) << 16 | (c) << 8 | (d))
24#endif
25#ifndef RTE_VER_PATCH_RELEASE
26#       define RTE_VER_PATCH_RELEASE 0
27#endif
28#ifndef RTE_VERSION
29#       define RTE_VERSION RTE_VERSION_NUM(RTE_VER_MAJOR,RTE_VER_MINOR, \
30        RTE_VER_PATCH_LEVEL, RTE_VER_PATCH_RELEASE)
31#endif
32
33/* 1.6.0r2 :
34 *      rte_eal_pci_set_blacklist() is removed
35 *      device_list is renamed to pci_device_list
36 *      In the 1.7.0 release rte_eal_pci_probe is called by rte_eal_init
37 *      as such we do apply the whitelist before rte_eal_init.
38 *      This also works correctly with DPDK 1.6.0r2.
39 *
40 * Replaced by:
41 *      rte_devargs (we can simply whitelist)
42 */
43#if RTE_VERSION <= RTE_VERSION_NUM(1, 6, 0, 1)
44#       define DPDK_USE_BLACKLIST 1
45#else
46#       define DPDK_USE_BLACKLIST 0
47#endif
48
49/*
50 * 1.7.0 :
51 *      rte_pmd_init_all is removed
52 *
53 * Replaced by:
54 *      Nothing, no longer needed
55 */
56#if RTE_VERSION < RTE_VERSION_NUM(1, 7, 0, 0)
57#       define DPDK_USE_PMD_INIT 1
58#else
59#       define DPDK_USE_PMD_INIT 0
60#endif
61
62/* 1.7.0-rc3 :
63 *
64 * Since 1.7.0-rc3 rte_eal_pci_probe is called as part of rte_eal_init.
65 * Somewhere between 1.7 and 1.8 calling it twice broke so we should not call
66 * it twice.
67 */
68#if RTE_VERSION < RTE_VERSION_NUM(1, 7, 0, 3)
69#       define DPDK_USE_PCI_PROBE 1
70#else
71#       define DPDK_USE_PCI_PROBE 0
72#endif
73
74/* 1.8.0-rc1 :
75 * LOG LEVEL is a command line option which overrides what
76 * we previously set it to.
77 */
78#if RTE_VERSION >= RTE_VERSION_NUM(1, 8, 0, 1)
79#       define DPDK_USE_LOG_LEVEL 1
80#else
81#       define DPDK_USE_LOG_LEVEL 0
82#endif
83
84/* 1.8.0-rc2
85 * rx/tx_conf thresholds can be set to NULL in rte_eth_rx/tx_queue_setup
86 * this uses the default values, which are better tuned per device
87 * See issue #26
88 */
89#if RTE_VERSION >= RTE_VERSION_NUM(1, 8, 0, 2)
90#       define DPDK_USE_NULL_QUEUE_CONFIG 1
91#else
92#       define DPDK_USE_NULL_QUEUE_CONFIG 0
93#endif
94
95/* 2.0.0-rc1
96 * Unifies RSS hash between cards
97 */
98#if RTE_VERSION >= RTE_VERSION_NUM(2, 0, 0, 1)
99#       define RX_RSS_FLAGS (ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP | \
100                             ETH_RSS_SCTP)
101#else
102#       define RX_RSS_FLAGS (ETH_RSS_IPV4_UDP | ETH_RSS_IPV6 | ETH_RSS_IPV4 | \
103                             ETH_RSS_IPV4_TCP | ETH_RSS_IPV6_TCP |\
104                             ETH_RSS_IPV6_UDP)
105#endif
106
107/* v16.07-rc1 - deprecated
108 * rte_mempool_avail_count to replace rte_mempool_count
109 * rte_mempool_in_use_count to replace rte_mempool_free_count
110 */
111#if RTE_VERSION < RTE_VERSION_NUM(16, 7, 0, 1)
112#define rte_mempool_avail_count rte_mempool_count
113#define rte_mempool_in_use_count rte_mempool_free_count
114#endif
115
116#include <rte_per_lcore.h>
117#include <rte_debug.h>
118#include <rte_errno.h>
119#include <rte_common.h>
120#include <rte_log.h>
121#include <rte_memcpy.h>
122#include <rte_prefetch.h>
123#include <rte_branch_prediction.h>
124#include <rte_pci.h>
125#include <rte_ether.h>
126#include <rte_ethdev.h>
127#include <rte_ring.h>
128#include <rte_mempool.h>
129#include <rte_mbuf.h>
130#include <rte_launch.h>
131#include <rte_lcore.h>
132#include <rte_per_lcore.h>
133#include <rte_cycles.h>
134#include <pthread.h>
135#ifdef __FreeBSD__
136#include <pthread_np.h>
137#endif
138
139
140/* 16.04-rc3 ETH_LINK_SPEED_X are replaced with ETH_SPEED_NUM_X.
141 * ETH_LINK_SPEED_ are reused as flags, ugly.
142 * We use the new way in this code.
143 */
144#ifndef ETH_SPEED_NUM_1G
145        #define ETH_SPEED_NUM_1G ETH_LINK_SPEED_1000
146        #define ETH_SPEED_NUM_10G ETH_LINK_SPEED_10G
147        #define ETH_SPEED_NUM_20G ETH_LINK_SPEED_20G
148        #define ETH_SPEED_NUM_40G ETH_LINK_SPEED_40G
149#endif
150
151/* The default size of memory buffers to use - This is the max size of standard
152 * ethernet packet less the size of the MAC CHECKSUM, rounded up to the
153 * next power of 2, plus the RTE_PKTMBUF_HEADROOM. */
154#define RX_MBUF_SIZE (2048 + RTE_PKTMBUF_HEADROOM)
155
156/* The minimum number of memory buffers per queue tx or rx. Based on
157 * the requirement of the memory pool with 128 per thread buffers, needing
158 * at least 128*1.5 = 192 buffers. Our code allocates 128*2 to be safe.
159 */
160#define MIN_NB_BUF 128
161
162/* Number of receive memory buffers to use
163 * By default this is limited by driver to 4k and must be a multiple of 128.
164 * A modification can be made to the driver to remove this limit.
165 * This can be increased in the driver and here.
166 * Should be at least MIN_NB_BUF.
167 * We choose 2K rather than 4K because it enables the usage of sse vector
168 * drivers which are significantly faster than using the larger buffer.
169 */
170#define NB_RX_MBUF (4096/2)
171
172/* Number of send memory buffers to use.
173 * Same limits apply as those to NB_TX_MBUF.
174 */
175#define NB_TX_MBUF 1024
176
177/* The size of the PCI blacklist needs to be big enough to contain
178 * every PCI device address (listed by lspci every bus:device.function tuple).
179 */
180#define BLACK_LIST_SIZE 50
181
182/* The maximum number of characters the mempool name can be */
183#define MEMPOOL_NAME_LEN 20
184
185/* For single threaded libtrace we read packets as a batch/burst
186 * this is the maximum size of said burst */
187#define BURST_SIZE 32
188
189
190/* ~~~~~~~~~~~~~~~~~~~~~~ Advance settings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
191 * THESE MAY REQUIRE MODIFICATIONS TO INTEL DPDK
192 *
193 * Make sure you understand what these are doing before enabling them.
194 * They might make traces incompatible with other builds etc.
195 *
196 * These are also included to show how to do somethings which aren't
197 * obvious in the DPDK documentation.
198 */
199
200/* Print verbose messages to stderr */
201#define DEBUG 0
202
203/* Use clock_gettime() for nanosecond resolution rather than gettimeofday()
204 * only turn on if you know clock_gettime is a vsyscall on your system
205 * otherwise could be a large overhead. Again gettimeofday() should be
206 * vsyscall also if it's not you should seriously consider updating your
207 * kernel.
208 */
209#ifdef HAVE_CLOCK_GETTIME
210/* You can turn this on (set to 1) to prefer clock_gettime */
211#define USE_CLOCK_GETTIME 1
212#else
213/* DON'T CHANGE THIS !!! */
214#define USE_CLOCK_GETTIME 0
215#endif
216
217/* This is fairly safe to turn on - currently there appears to be a 'bug'
218 * in DPDK that will remove the checksum by making the packet appear 4bytes
219 * smaller than what it really is. Most formats don't include the checksum
220 * hence writing out a port such as int: ring: and dpdk: assumes there
221 * is no checksum and will attempt to write the checksum as part of the
222 * packet
223 */
224#define GET_MAC_CRC_CHECKSUM 0
225
226/* This requires a modification of the pmd drivers (inside Intel DPDK)
227 * TODO this requires updating (packet sizes are wrong TS most likely also)
228 */
229#define HAS_HW_TIMESTAMPS_82580 0
230
231#if HAS_HW_TIMESTAMPS_82580
232# define TS_NBITS_82580     40
233/* The maximum on the +ve or -ve side that we can be, make it half way */
234# define MAXSKEW_82580 ((uint64_t) (.5 * (double)(1ull<<TS_NBITS_82580)))
235#define WITHIN_VARIANCE(v1,v2,var) (((v1) - (var) < (v2)) && ((v1) + (var) > (v2)))
236#endif
237
238/* As per Intel 82580 specification - mismatch in 82580 datasheet
239 * it states ts is stored in Big Endian, however its actually Little */
240struct hw_timestamp_82580 {
241        uint64_t reserved;
242        uint64_t timestamp; /* Little Endian only lower 40 bits are valid */
243};
244
245enum paused_state {
246        DPDK_NEVER_STARTED,
247        DPDK_RUNNING,
248        DPDK_PAUSED,
249};
250
251struct dpdk_per_stream_t
252{
253        uint16_t queue_id;
254        uint64_t ts_last_sys; /* System timestamp of our most recent packet in nanoseconds */
255        struct rte_mempool *mempool;
256        int lcore;
257#if HAS_HW_TIMESTAMPS_82580
258        /* Timestamping only relevant to RX */
259        uint64_t ts_first_sys; /* Sytem timestamp of the first packet in nanoseconds */
260        uint32_t wrap_count; /* Number of times the NIC clock has wrapped around completely */
261#endif
262} ALIGN_STRUCT(CACHE_LINE_SIZE);
263
264#if HAS_HW_TIMESTAMPS_82580
265#define DPDK_EMPTY_STREAM {-1, 0, NULL, -1, 0, 0}
266#else
267#define DPDK_EMPTY_STREAM {-1, 0, NULL, -1}
268#endif
269
270typedef struct dpdk_per_stream_t dpdk_per_stream_t;
271
272
273libtrace_eventobj_t dpdk_trace_event(libtrace_t *trace,
274                libtrace_packet_t *packet);
275int dpdk_pstart_input (libtrace_t *libtrace);
276int dpdk_start_input (libtrace_t *libtrace);
277int dpdk_config_input (libtrace_t *libtrace,
278                trace_option_t option, void *data);
279int dpdk_init_input (libtrace_t *libtrace);
280int dpdk_pause_input(libtrace_t * libtrace);
281int dpdk_fin_input(libtrace_t * libtrace);
282int dpdk_read_packet (libtrace_t *libtrace, libtrace_packet_t *packet);
283int dpdk_pregister_thread(libtrace_t *libtrace, libtrace_thread_t *t,
284                bool reading);
285void dpdk_punregister_thread(libtrace_t *libtrace, libtrace_thread_t *t);
286void dpdk_get_stats(libtrace_t *trace, libtrace_stat_t *stats);
287int dpdk_get_framing_length (const libtrace_packet_t *packet) ;
288int dpdk_read_packet_stream (libtrace_t *libtrace,
289                dpdk_per_stream_t *stream,
290                libtrace_message_queue_t *mesg,
291                struct rte_mbuf* pkts_burst[],
292                size_t nb_packets);
293int dpdk_prepare_packet(libtrace_t *libtrace,
294                libtrace_packet_t *packet, void *buffer,
295                libtrace_rt_types_t rt_type, uint32_t flags);
296#endif
Note: See TracBrowser for help on using the repository browser.