source: lib/format_linux_common.h @ ed6304c5

4.0.1-hotfixescachetimestampsdevelopdpdk-ndagetsilivelibtrace4ndag_formatpfringrc-4.0.1rc-4.0.2rc-4.0.3rc-4.0.4ringdecrementfixringperformanceringtimestampfixes
Last change on this file since ed6304c5 was ed6304c5, checked in by Richard Sanger <rsangerarj@…>, 7 years ago

For linux formats int/ring also use dev stats from /proc/net/dev to report
dropped packets.

This accounts for packets that dont even reach the kernel. The drop count
now matches 100% with sent packets.

There could be some slight discrepancies caused packets received between
opening/closing the socket and when we make our call to /proc/net/dev.
However this is a lot more realistic than previous numbers.

We might also be able to caclulate filtered packets from these numbers.

  • Property mode set to 100644
File size: 11.0 KB
Line 
1/* Various definitions required for the linux format. They were moved here,
2 * because format_linux.c had a lot of header information before the actual
3 * code. The linux headers have been copied into here rather than included to
4 * support RT on machines that don't have the linux headers (like a mac for
5 * example.
6 */
7
8#ifndef FORMAT_LINUX_COMMON_H
9#define FORMAT_LINUX_COMMON_H
10
11#include "libtrace.h"
12#include "libtrace_int.h"
13
14#ifdef HAVE_NETPACKET_PACKET_H
15
16#include <sys/socket.h>
17#include <netpacket/packet.h>
18#include <net/ethernet.h>
19#include <net/if_arp.h>
20
21#include <net/if.h>
22#include <sys/ioctl.h>
23#include <poll.h>
24#include <sys/mman.h>
25
26#include <fcntl.h>
27
28/* MAX_ORDER is defined in linux/mmzone.h. 11 is default for 3.0 kernels.
29 * max_order will be decreased by one if the ring buffer fails to allocate.
30 * Used to get the correct sized buffers from the kernel.
31 */
32#define MAX_ORDER 11
33/* Number of frames in the ring used by both TX and TR rings. More frames
34 * hopefully means less packet loss, especially if traffic comes in bursts.
35 */
36#define CONF_RING_FRAMES        0x100
37
38/* The maximum frames allowed to be waiting in the TX_RING before the kernel is
39 * notified to write them out. Make sure this is less than CONF_RING_FRAMES.
40 * Performance doesn't seem to increase any more when setting this above 10.
41 */
42#define TX_MAX_QUEUE            10
43
44#else   /* HAVE_NETPACKET_PACKET_H */
45
46/* Need to know what a sockaddr_ll looks like */
47struct sockaddr_ll {
48        uint16_t sll_family;
49        uint16_t sll_protocol;
50        int32_t  sll_ifindex;
51        uint16_t sll_hatype;
52        uint8_t  sll_pkttype;
53        uint8_t  sll_halen;
54        uint8_t  sll_addr[8];
55};
56
57/* Packet types.  */
58#define PACKET_HOST             0               /* To us.  */
59#define PACKET_BROADCAST        1               /* To all.  */
60#define PACKET_MULTICAST        2               /* To group.  */
61#define PACKET_OTHERHOST        3               /* To someone else.  */
62#define PACKET_OUTGOING         4               /* Originated by us . */
63#define PACKET_LOOPBACK         5
64#define PACKET_FASTROUTE        6
65
66/* Packet socket options.  */
67
68#define PACKET_ADD_MEMBERSHIP           1
69#define PACKET_DROP_MEMBERSHIP          2
70#define PACKET_RECV_OUTPUT              3
71#define PACKET_RX_RING                  5
72#define PACKET_STATISTICS               6
73
74#endif /* HAVE_NETPACKET_PACKET_H */
75
76struct tpacket_stats {
77        unsigned int tp_packets;
78        unsigned int tp_drops;
79};
80
81typedef enum { TS_NONE, TS_TIMEVAL, TS_TIMESPEC } timestamptype_t;
82
83/* linux/if_packet.h defines. They are here rather than including the header
84 * this means that we can interpret a ring frame on a kernel that doesn't
85 * support the format directly.
86 */
87#define PACKET_RX_RING  5
88#define PACKET_VERSION  10
89#define PACKET_HDRLEN   11
90#define PACKET_TX_RING  13
91#define PACKET_FANOUT   18
92#define TP_STATUS_USER  0x1
93#define TP_STATUS_SEND_REQUEST  0x1
94#define TP_STATUS_AVAILABLE     0x0
95#define TO_TP_HDR2(x)   ((struct tpacket2_hdr *) (x))
96#define TO_TP_HDR3(x)   ((struct tpacket3_hdr *) (x))
97#define TPACKET_ALIGNMENT       16
98#define TPACKET_ALIGN(x)        (((x)+TPACKET_ALIGNMENT-1)&~(TPACKET_ALIGNMENT-1))
99#define TPACKET2_HDRLEN         (TPACKET_ALIGN(sizeof(struct tpacket2_hdr)) + sizeof(struct sockaddr_ll))
100#define TPACKET3_HDRLEN         (TPACKET_ALIGN(sizeof(struct tpacket3_hdr)) + sizeof(struct sockaddr_ll))
101
102
103/* Since 3.1 kernel we have packet_fanout support */
104// schedule to socket by skb's rxhash - the implementation is bi-directional
105#define PACKET_FANOUT_HASH              0
106// schedule round robin
107#define PACKET_FANOUT_LB                1
108// schedule to the same socket that received the packet
109#define PACKET_FANOUT_CPU               2
110// Something to do with fragmented packets and hashing problems !! TODO figure out if this needs to be on
111#define PACKET_FANOUT_FLAG_DEFRAG       0x8000
112/* Included but unused by libtrace since Linux 3.10 */
113// if one socket if full roll over to the next
114#define PACKET_FANOUT_ROLLOVER          3
115// This flag makes any other system roll over
116#define PACKET_FANOUT_FLAG_ROLLOVER     0x1000
117/* Included but unused by libtrace since Linux 3.12 */
118// schedule random
119#define PACKET_FANOUT_RND               4
120
121
122enum tpacket_versions {
123        TPACKET_V1,
124        TPACKET_V2,
125        TPACKET_V3
126};
127
128struct tpacket2_hdr {
129        /* Frame status - in use by kernel or libtrace etc. */
130        uint32_t        tp_status;
131        /* Wire length */
132        uint32_t        tp_len;
133        /* Captured length */
134        uint32_t        tp_snaplen;
135        /* Offset in bytes from frame start to the mac (link layer) header */
136        uint16_t        tp_mac;
137        /* Offset in bytes from frame start to the net (network layer) header */
138        uint16_t        tp_net;
139        /* Timestamp */
140        uint32_t        tp_sec;
141        uint32_t        tp_nsec;
142        /* Not used VLAN tag control information */
143        uint16_t        tp_vlan_tci;
144        uint16_t        tp_padding;
145};
146
147struct tpacket_hdr_variant1 {
148        uint32_t        tp_rxhash;
149        uint32_t        tp_vlan_tci;
150};
151
152struct tpacket3_hdr {
153        uint32_t                tp_next_offset;
154        uint32_t                tp_sec;
155        uint32_t                tp_nsec;
156        uint32_t                tp_snaplen;
157        uint32_t                tp_len;
158        uint32_t                tp_status;
159        uint16_t                tp_mac;
160        uint16_t                tp_net;
161        /* pkt_hdr variants */
162        union {
163                struct tpacket_hdr_variant1 hv1;
164        };
165};
166
167struct tpacket_req {
168        unsigned int tp_block_size;  /* Minimal size of contiguous block */
169        unsigned int tp_block_nr;    /* Number of blocks */
170        unsigned int tp_frame_size;  /* Size of frame */
171        unsigned int tp_frame_nr;    /* Total number of frames */
172};
173
174
175/* A structure we use to hold statistic counters from the network cards
176 * as accessed via the /proc/net/dev
177 */
178struct linux_dev_stats {
179        char if_name[IF_NAMESIZE];
180        uint64_t rx_bytes;
181        uint64_t rx_packets;
182        uint64_t rx_errors;
183        uint64_t rx_drops;
184        uint64_t rx_fifo;
185        uint64_t rx_frame;
186        uint64_t rx_compressed;
187        uint64_t rx_multicast;
188        uint64_t tx_bytes;
189        uint64_t tx_packets;
190        uint64_t tx_errors;
191        uint64_t tx_drops;
192        uint64_t tx_fifo;
193        uint64_t tx_colls;
194        uint64_t tx_carrier;
195        uint64_t tx_compressed;
196};
197
198/* Note that this structure is passed over the wire in rt encapsulation, and
199 * thus we need to be careful with data sizes.  timeval's and timespec's
200 * can also change their size on 32/64 machines.
201 */
202struct linux_format_data_t {
203        /* The snap length for the capture */
204        int snaplen;
205        /* Flag indicating whether the interface should be placed in
206         * promiscuous mode */
207        int promisc;
208        /* The timestamp format used by the capture */
209        timestamptype_t timestamptype;
210        /* A BPF filter that is applied to every captured packet */
211        libtrace_filter_t *filter;
212        /* Statistics for the capture process, e.g. dropped packet counts */
213        struct tpacket_stats stats;
214        /* Statistics for the NIC rather than the socket */
215        struct linux_dev_stats dev_stats;
216        /* Flag indicating whether the statistics are current or not */
217        int stats_valid;
218        /* The current ring buffer layout */
219        struct tpacket_req req;
220        /* Used to determine buffer size for the ring buffer */
221        uint32_t max_order;
222        /* Used for the parallel case, fanout is the mode */
223        uint16_t fanout_flags;
224        /* The group lets Linux know which sockets to group together
225         * so we use a random here to try avoid collisions */
226        uint16_t fanout_group;
227        /* When running in parallel mode this is malloc'd with an array
228         * file descriptors from packet fanout will use, here we assume/hope
229         * that every ring can get setup the same */
230        libtrace_list_t *per_stream;
231};
232
233struct linux_format_data_out_t {
234        /* The file descriptor used to write the packets */
235        int fd;
236        /* The tx ring mmap location */
237        char * tx_ring;
238        /* The current frame number within the tx ring */
239        int txring_offset;
240        /* The current ring buffer layout */
241        struct tpacket_req req;
242        /* Our sockaddr structure, here so we can cache the interface number */
243        struct sockaddr_ll sock_hdr;
244        /* The (maximum) number of packets that haven't been written */
245        int queue;
246        /* The format this trace is using linuxring or linuxnative */
247        libtrace_rt_types_t format;
248        /* Used to determine buffer size for the ring buffer */
249        uint32_t max_order;
250};
251
252struct linux_per_stream_t {
253        /* File descriptor for the memory mapped stream */
254        int fd;
255        /* Memory mapped buffer */
256        char *rx_ring;
257        /* Offset within the mapped buffer */
258        int rxring_offset;
259} ALIGN_STRUCT(CACHE_LINE_SIZE);
260
261#define ZERO_LINUX_STREAM {-1, MAP_FAILED, 0}
262
263
264/* Format header for encapsulating packets captured using linux native */
265struct libtrace_linuxnative_header {
266        /* Timestamp of the packet, as a timeval */
267        struct {
268                uint32_t tv_sec;
269                uint32_t tv_usec;
270        } tv;
271        /* Timestamp of the packet, as a timespec */
272        struct {
273                uint32_t tv_sec;
274                uint32_t tv_nsec;
275        } ts;
276        /* The timestamp format used by the process that captured this packet */
277        uint8_t timestamptype;
278        /* Wire length */
279        uint32_t wirelen;
280        /* Capture length */
281        uint32_t caplen;
282        /* The linux native header itself */
283        struct sockaddr_ll hdr;
284};
285
286/* Helper macros to make addressing data in the above structures easier */
287#define DATA(x) ((struct linux_format_data_t *)x->format_data)
288#define DATA_OUT(x) ((struct linux_format_data_out_t *)x->format_data)
289#define STREAM_DATA(x) ((struct linux_per_stream_t *)x->data)
290
291#define FORMAT_DATA DATA(libtrace)
292#define FORMAT_DATA_OUT DATA_OUT(libtrace)
293
294#define FORMAT_DATA_HEAD FORMAT_DATA->per_stream->head
295#define FORMAT_DATA_FIRST ((struct linux_per_stream_t *)FORMAT_DATA_HEAD->data)
296
297/* Get the sockaddr_ll structure from a frame */
298#define GET_SOCKADDR_HDR(x)  ((struct sockaddr_ll *) (((char *) (x))\
299        + TPACKET_ALIGN(sizeof(struct tpacket2_hdr))))
300
301/* Common functions */
302#ifdef HAVE_NETPACKET_PACKET_H
303int linuxcommon_init_input(libtrace_t *libtrace);
304int linuxcommon_init_output(libtrace_out_t *libtrace);
305int linuxcommon_probe_filename(const char *filename);
306int linuxcommon_config_input(libtrace_t *libtrace, trace_option_t option,
307                             void *data);
308void linuxcommon_close_input_stream(libtrace_t *libtrace,
309                                    struct linux_per_stream_t *stream);
310int linuxcommon_start_input_stream(libtrace_t *libtrace,
311                                   struct linux_per_stream_t *stream);
312inline int linuxcommon_to_packet_fanout(libtrace_t *libtrace,
313                                        struct linux_per_stream_t *stream);
314int linuxcommon_pause_input(libtrace_t *libtrace);
315int linuxcommon_get_fd(const libtrace_t *libtrace);
316int linuxcommon_fin_input(libtrace_t *libtrace);
317int linuxcommon_pconfig_input(libtrace_t *libtrace,
318                              trace_parallel_option_t option,
319                              void *data);
320int linuxcommon_pregister_thread(libtrace_t *libtrace,
321                                 libtrace_thread_t *t,
322                                 bool reading);
323int linuxcommon_pstart_input(libtrace_t *libtrace,
324                             int (*start_stream)(libtrace_t *, struct linux_per_stream_t*));
325#endif /* HAVE_NETPACKET_PACKET_H */
326
327uint64_t linuxcommon_get_captured_packets(libtrace_t *libtrace);
328uint64_t linuxcommon_get_filtered_packets(libtrace_t *libtrace);
329uint64_t linuxcommon_get_dropped_packets(libtrace_t *libtrace);
330inline libtrace_direction_t linuxcommon_get_direction(uint8_t pkttype);
331inline libtrace_direction_t linuxcommon_set_direction(struct sockaddr_ll * skadr,
332                                                 libtrace_direction_t direction);
333inline libtrace_linktype_t linuxcommon_get_link_type(uint16_t linktype);
334
335
336#endif /* FORMAT_LINUX_COMMON_H */
Note: See TracBrowser for help on using the repository browser.