Changeset e940b4f
- Timestamp:
- 12/14/17 13:11:45 (4 years ago)
- Branches:
- cachetimestamps, develop, etsilive, master, rc-4.0.3, rc-4.0.4, ringdecrementfix, ringperformance
- Children:
- 49969f2
- Parents:
- 18c908d (diff), a8cfe71 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent. - Files:
-
- 2 added
- 9 edited
Legend:
- Unmodified
- Added
- Removed
-
lib/Makefile.am
rea75ec2 rc7e547e 34 34 35 35 if HAVE_DPDK 36 NATIVEFORMATS+= format_dpdk.c 36 NATIVEFORMATS+= format_dpdk.c format_dpdkndag.c 37 37 # So we also make libtrace.mk in dpdk otherwise automake tries to expand 38 38 # it too early which I cannot seem to stop unless we use a path that -
lib/format_dpdk.c
rdb84bb2 ra6f2d1d 42 42 #include "libtrace_arphrd.h" 43 43 #include "hash_toeplitz.h" 44 #include "format_dpdk.h" 44 45 45 46 #ifdef HAVE_INTTYPES_H … … 54 55 #include <endian.h> 55 56 #include <string.h> 57 #include <math.h> 56 58 57 59 #if HAVE_LIBNUMA 58 60 #include <numa.h> 59 61 #endif 60 61 /* We can deal with any minor differences by checking the RTE VERSION62 * Typically DPDK backports some fixes (typically for building against63 * newer kernels) to the older version of DPDK.64 *65 * These get released with the rX suffix. The following macros where added66 * in these new releases.67 *68 * Below this is a log of version that required changes to the libtrace69 * code (that we still attempt to support).70 *71 * DPDK 16.04 or newer is recommended.72 * However 1.6 and newer are still likely supported.73 */74 #include <rte_eal.h>75 #include <rte_version.h>76 #ifndef RTE_VERSION_NUM77 # define RTE_VERSION_NUM(a,b,c,d) ((a) << 24 | (b) << 16 | (c) << 8 | (d))78 #endif79 #ifndef RTE_VER_PATCH_RELEASE80 # define RTE_VER_PATCH_RELEASE 081 #endif82 #ifndef RTE_VERSION83 # define RTE_VERSION RTE_VERSION_NUM(RTE_VER_MAJOR,RTE_VER_MINOR, \84 RTE_VER_PATCH_LEVEL, RTE_VER_PATCH_RELEASE)85 #endif86 87 /* 1.6.0r2 :88 * rte_eal_pci_set_blacklist() is removed89 * device_list is renamed to pci_device_list90 * In the 1.7.0 release rte_eal_pci_probe is called by rte_eal_init91 * as such we do apply the whitelist before rte_eal_init.92 * This also works correctly with DPDK 1.6.0r2.93 *94 * Replaced by:95 * rte_devargs (we can simply whitelist)96 */97 #if RTE_VERSION <= RTE_VERSION_NUM(1, 6, 0, 1)98 # define DPDK_USE_BLACKLIST 199 #else100 # define DPDK_USE_BLACKLIST 0101 #endif102 103 /*104 * 1.7.0 :105 * rte_pmd_init_all is removed106 *107 * Replaced by:108 * Nothing, no longer needed109 */110 #if RTE_VERSION < RTE_VERSION_NUM(1, 7, 0, 0)111 # define DPDK_USE_PMD_INIT 1112 #else113 # define DPDK_USE_PMD_INIT 0114 #endif115 116 /* 1.7.0-rc3 :117 *118 * Since 1.7.0-rc3 rte_eal_pci_probe is called as part of rte_eal_init.119 * Somewhere between 1.7 and 1.8 calling it twice broke so we should not call120 * it twice.121 */122 #if RTE_VERSION < RTE_VERSION_NUM(1, 7, 0, 3)123 # define DPDK_USE_PCI_PROBE 1124 #else125 # define DPDK_USE_PCI_PROBE 0126 #endif127 128 /* 1.8.0-rc1 :129 * LOG LEVEL is a command line option which overrides what130 * we previously set it to.131 */132 #if RTE_VERSION >= RTE_VERSION_NUM(1, 8, 0, 1)133 # define DPDK_USE_LOG_LEVEL 1134 #else135 # define DPDK_USE_LOG_LEVEL 0136 #endif137 138 /* 1.8.0-rc2139 * rx/tx_conf thresholds can be set to NULL in rte_eth_rx/tx_queue_setup140 * this uses the default values, which are better tuned per device141 * See issue #26142 */143 #if RTE_VERSION >= RTE_VERSION_NUM(1, 8, 0, 2)144 # define DPDK_USE_NULL_QUEUE_CONFIG 1145 #else146 # define DPDK_USE_NULL_QUEUE_CONFIG 0147 #endif148 149 /* 2.0.0-rc1150 * Unifies RSS hash between cards151 */152 #if RTE_VERSION >= RTE_VERSION_NUM(2, 0, 0, 1)153 # define RX_RSS_FLAGS (ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP | \154 ETH_RSS_SCTP)155 #else156 # define RX_RSS_FLAGS (ETH_RSS_IPV4_UDP | ETH_RSS_IPV6 | ETH_RSS_IPV4 | \157 ETH_RSS_IPV4_TCP | ETH_RSS_IPV6_TCP |\158 ETH_RSS_IPV6_UDP)159 #endif160 161 /* v16.07-rc1 - deprecated162 * rte_mempool_avail_count to replace rte_mempool_count163 * rte_mempool_in_use_count to replace rte_mempool_free_count164 */165 #if RTE_VERSION < RTE_VERSION_NUM(16, 7, 0, 1)166 #define rte_mempool_avail_count rte_mempool_count167 #define rte_mempool_in_use_count rte_mempool_free_count168 #endif169 170 #include <rte_per_lcore.h>171 #include <rte_debug.h>172 #include <rte_errno.h>173 #include <rte_common.h>174 #include <rte_log.h>175 #include <rte_memcpy.h>176 #include <rte_prefetch.h>177 #include <rte_branch_prediction.h>178 #include <rte_pci.h>179 #include <rte_ether.h>180 #include <rte_ethdev.h>181 #include <rte_ring.h>182 #include <rte_mempool.h>183 #include <rte_mbuf.h>184 #include <rte_launch.h>185 #include <rte_lcore.h>186 #include <rte_per_lcore.h>187 #include <rte_cycles.h>188 #include <pthread.h>189 #ifdef __FreeBSD__190 #include <pthread_np.h>191 #endif192 193 /* 16.04-rc3 ETH_LINK_SPEED_X are replaced with ETH_SPEED_NUM_X.194 * ETH_LINK_SPEED_ are reused as flags, ugly.195 * We use the new way in this code.196 */197 #ifndef ETH_SPEED_NUM_1G198 #define ETH_SPEED_NUM_1G ETH_LINK_SPEED_1000199 #define ETH_SPEED_NUM_10G ETH_LINK_SPEED_10G200 #define ETH_SPEED_NUM_20G ETH_LINK_SPEED_20G201 #define ETH_SPEED_NUM_40G ETH_LINK_SPEED_40G202 #endif203 204 /* The default size of memory buffers to use - This is the max size of standard205 * ethernet packet less the size of the MAC CHECKSUM */206 #define RX_MBUF_SIZE 1514207 208 /* The minimum number of memory buffers per queue tx or rx. Based on209 * the requirement of the memory pool with 128 per thread buffers, needing210 * at least 128*1.5 = 192 buffers. Our code allocates 128*2 to be safe.211 */212 #define MIN_NB_BUF 128213 214 /* Number of receive memory buffers to use215 * By default this is limited by driver to 4k and must be a multiple of 128.216 * A modification can be made to the driver to remove this limit.217 * This can be increased in the driver and here.218 * Should be at least MIN_NB_BUF.219 * We choose 2K rather than 4K because it enables the usage of sse vector220 * drivers which are significantly faster than using the larger buffer.221 */222 #define NB_RX_MBUF (4096/2)223 224 /* Number of send memory buffers to use.225 * Same limits apply as those to NB_TX_MBUF.226 */227 #define NB_TX_MBUF 1024228 229 /* The size of the PCI blacklist needs to be big enough to contain230 * every PCI device address (listed by lspci every bus:device.function tuple).231 */232 #define BLACK_LIST_SIZE 50233 234 /* The maximum number of characters the mempool name can be */235 #define MEMPOOL_NAME_LEN 20236 237 /* For single threaded libtrace we read packets as a batch/burst238 * this is the maximum size of said burst */239 #define BURST_SIZE 32240 62 241 63 #define MBUF(x) ((struct rte_mbuf *) x) … … 259 81 #endif 260 82 261 /* ~~~~~~~~~~~~~~~~~~~~~~ Advance settings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~262 * THESE MAY REQUIRE MODIFICATIONS TO INTEL DPDK263 *264 * Make sure you understand what these are doing before enabling them.265 * They might make traces incompatible with other builds etc.266 *267 * These are also included to show how to do somethings which aren't268 * obvious in the DPDK documentation.269 */270 271 /* Print verbose messages to stderr */272 #define DEBUG 0273 274 /* Use clock_gettime() for nanosecond resolution rather than gettimeofday()275 * only turn on if you know clock_gettime is a vsyscall on your system276 * otherwise could be a large overhead. Again gettimeofday() should be277 * vsyscall also if it's not you should seriously consider updating your278 * kernel.279 */280 #ifdef HAVE_CLOCK_GETTIME281 /* You can turn this on (set to 1) to prefer clock_gettime */282 #define USE_CLOCK_GETTIME 1283 #else284 /* DON'T CHANGE THIS !!! */285 #define USE_CLOCK_GETTIME 0286 #endif287 288 /* This is fairly safe to turn on - currently there appears to be a 'bug'289 * in DPDK that will remove the checksum by making the packet appear 4bytes290 * smaller than what it really is. Most formats don't include the checksum291 * hence writing out a port such as int: ring: and dpdk: assumes there292 * is no checksum and will attempt to write the checksum as part of the293 * packet294 */295 #define GET_MAC_CRC_CHECKSUM 0296 297 /* This requires a modification of the pmd drivers (inside Intel DPDK)298 * TODO this requires updating (packet sizes are wrong TS most likely also)299 */300 #define HAS_HW_TIMESTAMPS_82580 0301 302 #if HAS_HW_TIMESTAMPS_82580303 # define TS_NBITS_82580 40304 /* The maximum on the +ve or -ve side that we can be, make it half way */305 # define MAXSKEW_82580 ((uint64_t) (.5 * (double)(1ull<<TS_NBITS_82580)))306 #define WITHIN_VARIANCE(v1,v2,var) (((v1) - (var) < (v2)) && ((v1) + (var) > (v2)))307 #endif308 83 309 84 static pthread_mutex_t dpdk_lock = PTHREAD_MUTEX_INITIALIZER; 310 85 /* Memory pools Per NUMA node */ 311 86 static struct rte_mempool * mem_pools[4][RTE_MAX_LCORE] = {{0}}; 312 313 /* As per Intel 82580 specification - mismatch in 82580 datasheet314 * it states ts is stored in Big Endian, however its actually Little */315 struct hw_timestamp_82580 {316 uint64_t reserved;317 uint64_t timestamp; /* Little Endian only lower 40 bits are valid */318 };319 320 enum paused_state {321 DPDK_NEVER_STARTED,322 DPDK_RUNNING,323 DPDK_PAUSED,324 };325 326 struct dpdk_per_stream_t327 {328 uint16_t queue_id;329 uint64_t ts_last_sys; /* System timestamp of our most recent packet in nanoseconds */330 struct rte_mempool *mempool;331 int lcore;332 #if HAS_HW_TIMESTAMPS_82580333 /* Timestamping only relevant to RX */334 uint64_t ts_first_sys; /* Sytem timestamp of the first packet in nanoseconds */335 uint32_t wrap_count; /* Number of times the NIC clock has wrapped around completely */336 #endif337 } ALIGN_STRUCT(CACHE_LINE_SIZE);338 339 #if HAS_HW_TIMESTAMPS_82580340 #define DPDK_EMPTY_STREAM {-1, 0, NULL, -1, 0, 0}341 #else342 #define DPDK_EMPTY_STREAM {-1, 0, NULL, -1}343 #endif344 345 typedef struct dpdk_per_stream_t dpdk_per_stream_t;346 87 347 88 /* Used by both input and output however some fields are not used … … 833 574 } 834 575 835 staticint dpdk_init_input (libtrace_t *libtrace) {576 int dpdk_init_input (libtrace_t *libtrace) { 836 577 dpdk_per_stream_t stream = DPDK_EMPTY_STREAM; 837 578 char err[500]; … … 875 616 static int dpdk_init_output(libtrace_out_t *libtrace) 876 617 { 618 dpdk_per_stream_t stream = DPDK_EMPTY_STREAM; 877 619 char err[500]; 878 620 err[0] = 0; … … 897 639 FORMAT(libtrace)->burst_offset = 0; 898 640 641 FORMAT(libtrace)->per_stream = libtrace_list_init(sizeof(struct dpdk_per_stream_t)); 642 libtrace_list_push_back(FORMAT(libtrace)->per_stream, &stream); 643 899 644 if (dpdk_init_environment(libtrace->uridata, FORMAT(libtrace), err, sizeof(err)) != 0) { 900 645 trace_set_err_out(libtrace, TRACE_ERR_INIT_FAILED, "%s", err); … … 916 661 * 917 662 */ 918 staticint dpdk_config_input (libtrace_t *libtrace,663 int dpdk_config_input (libtrace_t *libtrace, 919 664 trace_option_t option, 920 665 void *data) { … … 1319 1064 port_conf.rxmode.max_rx_pkt_len = 0; 1320 1065 } else { 1066 double expn; 1067 1321 1068 /* Use jumbo frames */ 1322 1069 port_conf.rxmode.jumbo_frame = 1; 1323 1070 port_conf.rxmode.max_rx_pkt_len = format_data->snaplen; 1071 1072 /* Use less buffers if we're supporting jumbo frames 1073 * otherwise we won't be able to allocate memory. 1074 */ 1075 if (format_data->snaplen > 1500) { 1076 format_data->nb_rx_buf /= 2; 1077 } 1078 1079 /* snaplen should be rounded up to next power of two 1080 * to ensure enough memory is allocated for each 1081 * mbuf :( 1082 */ 1083 expn = ceil(log2((double)(format_data->snaplen))); 1084 format_data->snaplen = pow(2, (int)expn); 1324 1085 } 1325 1086 … … 1512 1273 } 1513 1274 1514 staticint dpdk_start_input (libtrace_t *libtrace) {1275 int dpdk_start_input (libtrace_t *libtrace) { 1515 1276 char err[500]; 1516 1277 err[0] = 0; … … 1542 1303 } 1543 1304 1544 staticint dpdk_pstart_input (libtrace_t *libtrace) {1305 int dpdk_pstart_input (libtrace_t *libtrace) { 1545 1306 char err[500]; 1546 1307 int i=0, phys_cores=0; … … 1615 1376 * in any other manner including statistics functions. 1616 1377 */ 1617 staticint dpdk_pregister_thread(libtrace_t *libtrace, libtrace_thread_t *t, bool reading)1378 int dpdk_pregister_thread(libtrace_t *libtrace, libtrace_thread_t *t, bool reading) 1618 1379 { 1619 1380 #if DEBUG … … 1665 1426 * they are destroyed. 1666 1427 */ 1667 staticvoid dpdk_punregister_thread(libtrace_t *libtrace UNUSED, libtrace_thread_t *t UNUSED)1428 void dpdk_punregister_thread(libtrace_t *libtrace UNUSED, libtrace_thread_t *t UNUSED) 1668 1429 { 1669 1430 struct rte_config *cfg = rte_eal_get_configuration(); … … 1701 1462 } 1702 1463 1703 staticint dpdk_pause_input(libtrace_t * libtrace) {1464 int dpdk_pause_input(libtrace_t * libtrace) { 1704 1465 libtrace_list_node_t *tmp = FORMAT_DATA_HEAD(libtrace); 1705 1466 /* This stops the device, but can be restarted using rte_eth_dev_start() */ … … 1756 1517 } 1757 1518 1758 staticint dpdk_fin_input(libtrace_t * libtrace) {1519 int dpdk_fin_input(libtrace_t * libtrace) { 1759 1520 libtrace_list_node_t * n; 1760 1521 /* Free our memory structures */ … … 1849 1610 } 1850 1611 1851 staticint dpdk_get_framing_length (const libtrace_packet_t *packet) {1612 int dpdk_get_framing_length (const libtrace_packet_t *packet) { 1852 1613 struct dpdk_addt_hdr * hdr = get_addt_hdr(packet); 1853 1614 if (hdr->flags & INCLUDES_HW_TIMESTAMP) … … 1858 1619 } 1859 1620 1860 staticint dpdk_prepare_packet(libtrace_t *libtrace UNUSED,1621 int dpdk_prepare_packet(libtrace_t *libtrace UNUSED, 1861 1622 libtrace_packet_t *packet, void *buffer, 1862 1623 libtrace_rt_types_t rt_type, uint32_t flags) { … … 2116 1877 /** Reads at least one packet or returns an error 2117 1878 */ 2118 static inlineint dpdk_read_packet_stream (libtrace_t *libtrace,1879 int dpdk_read_packet_stream (libtrace_t *libtrace, 2119 1880 dpdk_per_stream_t *stream, 2120 1881 libtrace_message_queue_t *mesg, … … 2181 1942 } 2182 1943 2183 staticint dpdk_read_packet (libtrace_t *libtrace, libtrace_packet_t *packet) {1944 int dpdk_read_packet (libtrace_t *libtrace, libtrace_packet_t *packet) { 2184 1945 int nb_rx; /* Number of rx packets we've received */ 2185 1946 dpdk_per_stream_t *stream = FORMAT_DATA_FIRST(libtrace); … … 2199 1960 if (FORMAT(libtrace)->burst_size != FORMAT(libtrace)->burst_offset) { 2200 1961 packet->buffer = FORMAT(libtrace)->burst_pkts[FORMAT(libtrace)->burst_offset++]; 1962 packet->trace = libtrace; 2201 1963 dpdk_prepare_packet(libtrace, packet, packet->buffer, packet->type, 0); 2202 1964 return 1; // TODO should be bytes read, which essentially useless anyway … … 2210 1972 FORMAT(libtrace)->burst_offset = 1; 2211 1973 packet->buffer = FORMAT(libtrace)->burst_pkts[0]; 1974 packet->trace = libtrace; 2212 1975 dpdk_prepare_packet(libtrace, packet, packet->buffer, packet->type, 0); 2213 1976 return 1; … … 2249 2012 } 2250 2013 2251 staticvoid dpdk_get_stats(libtrace_t *trace, libtrace_stat_t *stats) {2014 void dpdk_get_stats(libtrace_t *trace, libtrace_stat_t *stats) { 2252 2015 struct rte_eth_stats dev_stats = {0}; 2253 2016 … … 2283 2046 * create a select()able file descriptor in DPDK. 2284 2047 */ 2285 staticlibtrace_eventobj_t dpdk_trace_event(libtrace_t *trace,2048 libtrace_eventobj_t dpdk_trace_event(libtrace_t *trace, 2286 2049 libtrace_packet_t *packet) { 2287 2050 libtrace_eventobj_t event = {0,0,0.0,0}; -
lib/format_erf.c
r0317e3c r7ff881a 124 124 * so that the IP header is aligned on a 32 bit boundary. 125 125 */ 126 static in t erf_get_padding(const libtrace_packet_t *packet)126 static inline int erf_get_padding(const libtrace_packet_t *packet) 127 127 { 128 128 if (packet->trace->format->type==TRACE_FORMAT_ERF || 129 129 packet->trace->format->type == TRACE_FORMAT_NDAG || 130 packet->trace->format->type == TRACE_FORMAT_RAWERF) { 130 packet->trace->format->type == TRACE_FORMAT_RAWERF || 131 packet->trace->format->type == TRACE_FORMAT_DPDK_NDAG) { 131 132 dag_record_t *erfptr = (dag_record_t *)packet->header; 132 133 switch((erfptr->type & 0x7f)) { -
lib/format_ndag.c
r6452c2c r4f0f93f 231 231 "Failed to bind to multicast socket %s:%s -- %s\n", 232 232 groupaddr, portstr, strerror(errno)); 233 close(sock);234 233 sock = -1; 235 234 goto sockcreateover; … … 406 405 "Unable to join multicast group for nDAG control channel"); 407 406 trace_interrupt(); 407 pthread_exit(NULL); 408 408 } 409 409 -
lib/libtrace.h.in
ra9d0e40 rc7e547e 396 396 TRACE_FORMAT_PCAPNG =18, /**< PCAP-NG trace file */ 397 397 TRACE_FORMAT_NDAG =19, /**< DAG multicast over a network */ 398 TRACE_FORMAT_DPDK_NDAG =20, /**< DAG multicast over a network, received via DPDK */ 398 399 }; 399 400 -
lib/libtrace_int.h
ra9d0e40 rc7e547e 1243 1243 /** Constructor for Intels DPDK format module */ 1244 1244 void dpdk_constructor(void); 1245 1246 /** Constructor for receiving network DAG via Intels DPDK format module */ 1247 void dpdkndag_constructor(void); 1248 1245 1249 #endif 1246 1250 -
lib/trace.c
rea75ec2 rc7e547e 152 152 #endif 153 153 #ifdef HAVE_DPDK 154 dpdk_constructor(); 154 dpdk_constructor(); 155 dpdkndag_constructor(); 155 156 #endif 156 157 } -
tools/tracertstats/tracertstats.c
r8e11beb ra8cfe71 47 47 #include <getopt.h> 48 48 #include <inttypes.h> 49 #include <signal.h> 50 49 51 #include <lt_inttypes.h> 50 51 52 #include "libtrace_parallel.h" 52 53 #include "output.h" … … 80 81 uint64_t count; 81 82 uint64_t bytes; 83 84 struct libtrace_t *currenttrace; 85 86 static void cleanup_signal(int signal UNUSED) { 87 if (currenttrace) { 88 trace_pstop(currenttrace); 89 } 90 } 82 91 83 92 static void report_results(double ts,uint64_t count,uint64_t bytes) … … 281 290 trace_set_result_cb(repcbs, cb_result); 282 291 292 currenttrace = trace; 283 293 if (trace_pstart(trace, NULL, pktcbs, repcbs)==-1) { 284 294 trace_perror(trace,"Failed to start trace"); … … 329 339 330 340 int i; 341 struct sigaction sigact; 331 342 332 343 while(1) { … … 414 425 return 0; 415 426 } 416 427 428 sigact.sa_handler = cleanup_signal; 429 sigemptyset(&sigact.sa_mask); 430 sigact.sa_flags = SA_RESTART; 431 432 sigaction(SIGINT, &sigact, NULL); 433 sigaction(SIGTERM, &sigact, NULL); 434 435 417 436 for(i=optind;i<argc;++i) { 418 437 run_trace(argv[i]); -
tools/tracesplit/tracesplit.c
r8e11beb r92cf299 294 294 } 295 295 296 if (trace_config_output(output, 297 TRACE_OPTION_OUTPUT_COMPRESSTYPE, 298 &compress_type) == -1) { 299 trace_perror_output(output, "Unable to set compression type"); 300 } 296 if (compress_type != TRACE_OPTION_COMPRESSTYPE_NONE) { 297 if (trace_config_output(output, 298 TRACE_OPTION_OUTPUT_COMPRESSTYPE, 299 &compress_type) == -1) { 300 trace_perror_output(output, "Unable to set compression type"); 301 } 302 } 301 303 302 304 trace_start_output(output);
Note: See TracChangeset
for help on using the changeset viewer.