- Timestamp:
- 12/05/17 14:56:48 (3 years ago)
- Branches:
- cachetimestamps, develop, dpdk-ndag, etsilive, master, rc-4.0.3, rc-4.0.4, ringdecrementfix, ringperformance
- Children:
- d83ba86a
- Parents:
- 18c908d
- Location:
- lib
- Files:
-
- 2 added
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
lib/Makefile.am
rea75ec2 rc7e547e 34 34 35 35 if HAVE_DPDK 36 NATIVEFORMATS+= format_dpdk.c 36 NATIVEFORMATS+= format_dpdk.c format_dpdkndag.c 37 37 # So we also make libtrace.mk in dpdk otherwise automake tries to expand 38 38 # it too early which I cannot seem to stop unless we use a path that -
lib/format_dpdk.c
rdb84bb2 rc7e547e 42 42 #include "libtrace_arphrd.h" 43 43 #include "hash_toeplitz.h" 44 #include "format_dpdk.h" 44 45 45 46 #ifdef HAVE_INTTYPES_H … … 59 60 #endif 60 61 61 /* We can deal with any minor differences by checking the RTE VERSION62 * Typically DPDK backports some fixes (typically for building against63 * newer kernels) to the older version of DPDK.64 *65 * These get released with the rX suffix. The following macros where added66 * in these new releases.67 *68 * Below this is a log of version that required changes to the libtrace69 * code (that we still attempt to support).70 *71 * DPDK 16.04 or newer is recommended.72 * However 1.6 and newer are still likely supported.73 */74 #include <rte_eal.h>75 #include <rte_version.h>76 #ifndef RTE_VERSION_NUM77 # define RTE_VERSION_NUM(a,b,c,d) ((a) << 24 | (b) << 16 | (c) << 8 | (d))78 #endif79 #ifndef RTE_VER_PATCH_RELEASE80 # define RTE_VER_PATCH_RELEASE 081 #endif82 #ifndef RTE_VERSION83 # define RTE_VERSION RTE_VERSION_NUM(RTE_VER_MAJOR,RTE_VER_MINOR, \84 RTE_VER_PATCH_LEVEL, RTE_VER_PATCH_RELEASE)85 #endif86 87 /* 1.6.0r2 :88 * rte_eal_pci_set_blacklist() is removed89 * device_list is renamed to pci_device_list90 * In the 1.7.0 release rte_eal_pci_probe is called by rte_eal_init91 * as such we do apply the whitelist before rte_eal_init.92 * This also works correctly with DPDK 1.6.0r2.93 *94 * Replaced by:95 * rte_devargs (we can simply whitelist)96 */97 #if RTE_VERSION <= RTE_VERSION_NUM(1, 6, 0, 1)98 # define DPDK_USE_BLACKLIST 199 #else100 # define DPDK_USE_BLACKLIST 0101 #endif102 103 /*104 * 1.7.0 :105 * rte_pmd_init_all is removed106 *107 * Replaced by:108 * Nothing, no longer needed109 */110 #if RTE_VERSION < RTE_VERSION_NUM(1, 7, 0, 0)111 # define DPDK_USE_PMD_INIT 1112 #else113 # define DPDK_USE_PMD_INIT 0114 #endif115 116 /* 1.7.0-rc3 :117 *118 * Since 1.7.0-rc3 rte_eal_pci_probe is called as part of rte_eal_init.119 * Somewhere between 1.7 and 1.8 calling it twice broke so we should not call120 * it twice.121 */122 #if RTE_VERSION < RTE_VERSION_NUM(1, 7, 0, 3)123 # define DPDK_USE_PCI_PROBE 1124 #else125 # define DPDK_USE_PCI_PROBE 0126 #endif127 128 /* 1.8.0-rc1 :129 * LOG LEVEL is a command line option which overrides what130 * we previously set it to.131 */132 #if RTE_VERSION >= RTE_VERSION_NUM(1, 8, 0, 1)133 # define DPDK_USE_LOG_LEVEL 1134 #else135 # define DPDK_USE_LOG_LEVEL 0136 #endif137 138 /* 1.8.0-rc2139 * rx/tx_conf thresholds can be set to NULL in rte_eth_rx/tx_queue_setup140 * this uses the default values, which are better tuned per device141 * See issue #26142 */143 #if RTE_VERSION >= RTE_VERSION_NUM(1, 8, 0, 2)144 # define DPDK_USE_NULL_QUEUE_CONFIG 1145 #else146 # define DPDK_USE_NULL_QUEUE_CONFIG 0147 #endif148 149 /* 2.0.0-rc1150 * Unifies RSS hash between cards151 */152 #if RTE_VERSION >= RTE_VERSION_NUM(2, 0, 0, 1)153 # define RX_RSS_FLAGS (ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP | \154 ETH_RSS_SCTP)155 #else156 # define RX_RSS_FLAGS (ETH_RSS_IPV4_UDP | ETH_RSS_IPV6 | ETH_RSS_IPV4 | \157 ETH_RSS_IPV4_TCP | ETH_RSS_IPV6_TCP |\158 ETH_RSS_IPV6_UDP)159 #endif160 161 /* v16.07-rc1 - deprecated162 * rte_mempool_avail_count to replace rte_mempool_count163 * rte_mempool_in_use_count to replace rte_mempool_free_count164 */165 #if RTE_VERSION < RTE_VERSION_NUM(16, 7, 0, 1)166 #define rte_mempool_avail_count rte_mempool_count167 #define rte_mempool_in_use_count rte_mempool_free_count168 #endif169 170 #include <rte_per_lcore.h>171 #include <rte_debug.h>172 #include <rte_errno.h>173 #include <rte_common.h>174 #include <rte_log.h>175 #include <rte_memcpy.h>176 #include <rte_prefetch.h>177 #include <rte_branch_prediction.h>178 #include <rte_pci.h>179 #include <rte_ether.h>180 #include <rte_ethdev.h>181 #include <rte_ring.h>182 #include <rte_mempool.h>183 #include <rte_mbuf.h>184 #include <rte_launch.h>185 #include <rte_lcore.h>186 #include <rte_per_lcore.h>187 #include <rte_cycles.h>188 #include <pthread.h>189 #ifdef __FreeBSD__190 #include <pthread_np.h>191 #endif192 193 /* 16.04-rc3 ETH_LINK_SPEED_X are replaced with ETH_SPEED_NUM_X.194 * ETH_LINK_SPEED_ are reused as flags, ugly.195 * We use the new way in this code.196 */197 #ifndef ETH_SPEED_NUM_1G198 #define ETH_SPEED_NUM_1G ETH_LINK_SPEED_1000199 #define ETH_SPEED_NUM_10G ETH_LINK_SPEED_10G200 #define ETH_SPEED_NUM_20G ETH_LINK_SPEED_20G201 #define ETH_SPEED_NUM_40G ETH_LINK_SPEED_40G202 #endif203 204 /* The default size of memory buffers to use - This is the max size of standard205 * ethernet packet less the size of the MAC CHECKSUM */206 #define RX_MBUF_SIZE 1514207 208 /* The minimum number of memory buffers per queue tx or rx. Based on209 * the requirement of the memory pool with 128 per thread buffers, needing210 * at least 128*1.5 = 192 buffers. Our code allocates 128*2 to be safe.211 */212 #define MIN_NB_BUF 128213 214 /* Number of receive memory buffers to use215 * By default this is limited by driver to 4k and must be a multiple of 128.216 * A modification can be made to the driver to remove this limit.217 * This can be increased in the driver and here.218 * Should be at least MIN_NB_BUF.219 * We choose 2K rather than 4K because it enables the usage of sse vector220 * drivers which are significantly faster than using the larger buffer.221 */222 #define NB_RX_MBUF (4096/2)223 224 /* Number of send memory buffers to use.225 * Same limits apply as those to NB_TX_MBUF.226 */227 #define NB_TX_MBUF 1024228 229 /* The size of the PCI blacklist needs to be big enough to contain230 * every PCI device address (listed by lspci every bus:device.function tuple).231 */232 #define BLACK_LIST_SIZE 50233 234 /* The maximum number of characters the mempool name can be */235 #define MEMPOOL_NAME_LEN 20236 237 /* For single threaded libtrace we read packets as a batch/burst238 * this is the maximum size of said burst */239 #define BURST_SIZE 32240 241 62 #define MBUF(x) ((struct rte_mbuf *) x) 242 63 /* Get the original placement of the packet data */ … … 259 80 #endif 260 81 261 /* ~~~~~~~~~~~~~~~~~~~~~~ Advance settings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~262 * THESE MAY REQUIRE MODIFICATIONS TO INTEL DPDK263 *264 * Make sure you understand what these are doing before enabling them.265 * They might make traces incompatible with other builds etc.266 *267 * These are also included to show how to do somethings which aren't268 * obvious in the DPDK documentation.269 */270 271 /* Print verbose messages to stderr */272 #define DEBUG 0273 274 /* Use clock_gettime() for nanosecond resolution rather than gettimeofday()275 * only turn on if you know clock_gettime is a vsyscall on your system276 * otherwise could be a large overhead. Again gettimeofday() should be277 * vsyscall also if it's not you should seriously consider updating your278 * kernel.279 */280 #ifdef HAVE_CLOCK_GETTIME281 /* You can turn this on (set to 1) to prefer clock_gettime */282 #define USE_CLOCK_GETTIME 1283 #else284 /* DON'T CHANGE THIS !!! */285 #define USE_CLOCK_GETTIME 0286 #endif287 288 /* This is fairly safe to turn on - currently there appears to be a 'bug'289 * in DPDK that will remove the checksum by making the packet appear 4bytes290 * smaller than what it really is. Most formats don't include the checksum291 * hence writing out a port such as int: ring: and dpdk: assumes there292 * is no checksum and will attempt to write the checksum as part of the293 * packet294 */295 #define GET_MAC_CRC_CHECKSUM 0296 297 /* This requires a modification of the pmd drivers (inside Intel DPDK)298 * TODO this requires updating (packet sizes are wrong TS most likely also)299 */300 #define HAS_HW_TIMESTAMPS_82580 0301 302 #if HAS_HW_TIMESTAMPS_82580303 # define TS_NBITS_82580 40304 /* The maximum on the +ve or -ve side that we can be, make it half way */305 # define MAXSKEW_82580 ((uint64_t) (.5 * (double)(1ull<<TS_NBITS_82580)))306 #define WITHIN_VARIANCE(v1,v2,var) (((v1) - (var) < (v2)) && ((v1) + (var) > (v2)))307 #endif308 82 309 83 static pthread_mutex_t dpdk_lock = PTHREAD_MUTEX_INITIALIZER; 310 84 /* Memory pools Per NUMA node */ 311 85 static struct rte_mempool * mem_pools[4][RTE_MAX_LCORE] = {{0}}; 312 313 /* As per Intel 82580 specification - mismatch in 82580 datasheet314 * it states ts is stored in Big Endian, however its actually Little */315 struct hw_timestamp_82580 {316 uint64_t reserved;317 uint64_t timestamp; /* Little Endian only lower 40 bits are valid */318 };319 320 enum paused_state {321 DPDK_NEVER_STARTED,322 DPDK_RUNNING,323 DPDK_PAUSED,324 };325 326 struct dpdk_per_stream_t327 {328 uint16_t queue_id;329 uint64_t ts_last_sys; /* System timestamp of our most recent packet in nanoseconds */330 struct rte_mempool *mempool;331 int lcore;332 #if HAS_HW_TIMESTAMPS_82580333 /* Timestamping only relevant to RX */334 uint64_t ts_first_sys; /* Sytem timestamp of the first packet in nanoseconds */335 uint32_t wrap_count; /* Number of times the NIC clock has wrapped around completely */336 #endif337 } ALIGN_STRUCT(CACHE_LINE_SIZE);338 339 #if HAS_HW_TIMESTAMPS_82580340 #define DPDK_EMPTY_STREAM {-1, 0, NULL, -1, 0, 0}341 #else342 #define DPDK_EMPTY_STREAM {-1, 0, NULL, -1}343 #endif344 345 typedef struct dpdk_per_stream_t dpdk_per_stream_t;346 86 347 87 /* Used by both input and output however some fields are not used … … 833 573 } 834 574 835 staticint dpdk_init_input (libtrace_t *libtrace) {575 int dpdk_init_input (libtrace_t *libtrace) { 836 576 dpdk_per_stream_t stream = DPDK_EMPTY_STREAM; 837 577 char err[500]; … … 916 656 * 917 657 */ 918 staticint dpdk_config_input (libtrace_t *libtrace,658 int dpdk_config_input (libtrace_t *libtrace, 919 659 trace_option_t option, 920 660 void *data) { … … 1512 1252 } 1513 1253 1514 staticint dpdk_start_input (libtrace_t *libtrace) {1254 int dpdk_start_input (libtrace_t *libtrace) { 1515 1255 char err[500]; 1516 1256 err[0] = 0; … … 1542 1282 } 1543 1283 1544 staticint dpdk_pstart_input (libtrace_t *libtrace) {1284 int dpdk_pstart_input (libtrace_t *libtrace) { 1545 1285 char err[500]; 1546 1286 int i=0, phys_cores=0; … … 1615 1355 * in any other manner including statistics functions. 1616 1356 */ 1617 staticint dpdk_pregister_thread(libtrace_t *libtrace, libtrace_thread_t *t, bool reading)1357 int dpdk_pregister_thread(libtrace_t *libtrace, libtrace_thread_t *t, bool reading) 1618 1358 { 1619 1359 #if DEBUG … … 1665 1405 * they are destroyed. 1666 1406 */ 1667 staticvoid dpdk_punregister_thread(libtrace_t *libtrace UNUSED, libtrace_thread_t *t UNUSED)1407 void dpdk_punregister_thread(libtrace_t *libtrace UNUSED, libtrace_thread_t *t UNUSED) 1668 1408 { 1669 1409 struct rte_config *cfg = rte_eal_get_configuration(); … … 1701 1441 } 1702 1442 1703 staticint dpdk_pause_input(libtrace_t * libtrace) {1443 int dpdk_pause_input(libtrace_t * libtrace) { 1704 1444 libtrace_list_node_t *tmp = FORMAT_DATA_HEAD(libtrace); 1705 1445 /* This stops the device, but can be restarted using rte_eth_dev_start() */ … … 1756 1496 } 1757 1497 1758 staticint dpdk_fin_input(libtrace_t * libtrace) {1498 int dpdk_fin_input(libtrace_t * libtrace) { 1759 1499 libtrace_list_node_t * n; 1760 1500 /* Free our memory structures */ … … 1858 1598 } 1859 1599 1860 staticint dpdk_prepare_packet(libtrace_t *libtrace UNUSED,1600 int dpdk_prepare_packet(libtrace_t *libtrace UNUSED, 1861 1601 libtrace_packet_t *packet, void *buffer, 1862 1602 libtrace_rt_types_t rt_type, uint32_t flags) { … … 2116 1856 /** Reads at least one packet or returns an error 2117 1857 */ 2118 static inlineint dpdk_read_packet_stream (libtrace_t *libtrace,1858 int dpdk_read_packet_stream (libtrace_t *libtrace, 2119 1859 dpdk_per_stream_t *stream, 2120 1860 libtrace_message_queue_t *mesg, … … 2181 1921 } 2182 1922 2183 staticint dpdk_read_packet (libtrace_t *libtrace, libtrace_packet_t *packet) {1923 int dpdk_read_packet (libtrace_t *libtrace, libtrace_packet_t *packet) { 2184 1924 int nb_rx; /* Number of rx packets we've received */ 2185 1925 dpdk_per_stream_t *stream = FORMAT_DATA_FIRST(libtrace); … … 2199 1939 if (FORMAT(libtrace)->burst_size != FORMAT(libtrace)->burst_offset) { 2200 1940 packet->buffer = FORMAT(libtrace)->burst_pkts[FORMAT(libtrace)->burst_offset++]; 1941 packet->trace = libtrace; 2201 1942 dpdk_prepare_packet(libtrace, packet, packet->buffer, packet->type, 0); 2202 1943 return 1; // TODO should be bytes read, which essentially useless anyway … … 2210 1951 FORMAT(libtrace)->burst_offset = 1; 2211 1952 packet->buffer = FORMAT(libtrace)->burst_pkts[0]; 1953 packet->trace = libtrace; 2212 1954 dpdk_prepare_packet(libtrace, packet, packet->buffer, packet->type, 0); 2213 1955 return 1; … … 2283 2025 * create a select()able file descriptor in DPDK. 2284 2026 */ 2285 staticlibtrace_eventobj_t dpdk_trace_event(libtrace_t *trace,2027 libtrace_eventobj_t dpdk_trace_event(libtrace_t *trace, 2286 2028 libtrace_packet_t *packet) { 2287 2029 libtrace_eventobj_t event = {0,0,0.0,0}; -
lib/libtrace.h.in
ra9d0e40 rc7e547e 396 396 TRACE_FORMAT_PCAPNG =18, /**< PCAP-NG trace file */ 397 397 TRACE_FORMAT_NDAG =19, /**< DAG multicast over a network */ 398 TRACE_FORMAT_DPDK_NDAG =20, /**< DAG multicast over a network, received via DPDK */ 398 399 }; 399 400 -
lib/libtrace_int.h
ra9d0e40 rc7e547e 1243 1243 /** Constructor for Intels DPDK format module */ 1244 1244 void dpdk_constructor(void); 1245 1246 /** Constructor for receiving network DAG via Intels DPDK format module */ 1247 void dpdkndag_constructor(void); 1248 1245 1249 #endif 1246 1250 -
lib/trace.c
rea75ec2 rc7e547e 152 152 #endif 153 153 #ifdef HAVE_DPDK 154 dpdk_constructor(); 154 dpdk_constructor(); 155 dpdkndag_constructor(); 155 156 #endif 156 157 }
Note: See TracChangeset
for help on using the changeset viewer.