source: lib/format_linux.h @ 1871afc

4.0.1-hotfixescachetimestampsdevelopdpdk-ndagetsilivelibtrace4ndag_formatpfringrc-4.0.1rc-4.0.2rc-4.0.3rc-4.0.4ringdecrementfixringperformanceringtimestampfixes
Last change on this file since 1871afc was 1871afc, checked in by Richard Sanger <rsangerarj@…>, 6 years ago

Add missing files

  • Property mode set to 100644
File size: 10.8 KB
Line 
1/* Various definitions required for the linux format. They were moved here,
2 * because format_linux.c had a lot of header information before the actual
3 * code. The linux headers have been copied into here rather than included to
4 * support RT on machines that don't have the linux headers (like a mac for
5 * example.
6 */
7
8#include "libtrace.h"
9#include "libtrace_int.h"
10
11#ifdef HAVE_NETPACKET_PACKET_H
12
13#include <sys/socket.h>
14#include <netpacket/packet.h>
15#include <net/ethernet.h>
16#include <net/if_arp.h>
17
18#include <net/if.h>
19#include <sys/ioctl.h>
20#include <poll.h>
21#include <sys/mman.h>
22
23#include <fcntl.h>
24
25/* MAX_ORDER is defined in linux/mmzone.h. 10 is default for 2.4 kernel.
26 * max_order will be decreased by one if the ring buffer fails to allocate.
27 * Used to get correct sized buffers from the kernel.
28 */
29/* TODO: This is set to 11 in atleast the 3.x kernels. We should investigate
30 * setting this higher to see if it improves performance. If not, then I guess
31 * we can just leave it.
32 */
33#define MAX_ORDER 10
34
35/* Cached page size, the page size shouldn't be changing */
36static int pagesize = 0;
37
38/* Number of frames in the ring used by both TX and TR rings. More frames
39 * hopefully means less packet loss, especially if traffic comes in bursts.
40 */
41#define CONF_RING_FRAMES        0x100
42
43/* The maximum frames allowed to be waiting in the TX_RING before the kernel is
44 * notified to write them out. Make sure this is less than CONF_RING_FRAMES.
45 * Performance doesn't seem to increase any more when setting this above 10.
46 */
47#define TX_MAX_QUEUE            10
48
49#else   /* HAVE_NETPACKET_PACKET_H */
50
51/* Need to know what a sockaddr_ll looks like */
52struct sockaddr_ll {
53        uint16_t sll_family;
54        uint16_t sll_protocol;
55        int32_t  sll_ifindex;
56        uint16_t sll_hatype;
57        uint8_t  sll_pkttype;
58        uint8_t  sll_halen;
59        uint8_t  sll_addr[8];
60};
61
62/* Packet types.  */
63#define PACKET_HOST             0               /* To us.  */
64#define PACKET_BROADCAST        1               /* To all.  */
65#define PACKET_MULTICAST        2               /* To group.  */
66#define PACKET_OTHERHOST        3               /* To someone else.  */
67#define PACKET_OUTGOING         4               /* Originated by us . */
68#define PACKET_LOOPBACK         5
69#define PACKET_FASTROUTE        6
70
71/* Packet socket options.  */
72
73#define PACKET_ADD_MEMBERSHIP           1
74#define PACKET_DROP_MEMBERSHIP          2
75#define PACKET_RECV_OUTPUT              3
76#define PACKET_RX_RING                  5
77#define PACKET_STATISTICS               6
78
79#endif /* HAVE_NETPACKET_PACKET_H */
80
81struct tpacket_stats {
82        unsigned int tp_packets;
83        unsigned int tp_drops;
84};
85
86typedef enum { TS_NONE, TS_TIMEVAL, TS_TIMESPEC } timestamptype_t;
87
88/* linux/if_packet.h defines. They are here rather than including the header
89 * this means that we can interpret a ring frame on a kernel that doesn't
90 * support the format directly.
91 */
92#define PACKET_RX_RING  5
93#define PACKET_VERSION  10
94#define PACKET_HDRLEN   11
95#define PACKET_TX_RING  13
96#define PACKET_FANOUT   18
97#define TP_STATUS_USER  0x1
98#define TP_STATUS_SEND_REQUEST  0x1
99#define TP_STATUS_AVAILABLE     0x0
100#define TO_TP_HDR2(x)   ((struct tpacket2_hdr *) (x))
101#define TO_TP_HDR3(x)   ((struct tpacket3_hdr *) (x))
102#define TPACKET_ALIGNMENT       16
103#define TPACKET_ALIGN(x)        (((x)+TPACKET_ALIGNMENT-1)&~(TPACKET_ALIGNMENT-1))
104#define TPACKET2_HDRLEN         (TPACKET_ALIGN(sizeof(struct tpacket2_hdr)) + sizeof(struct sockaddr_ll))
105#define TPACKET3_HDRLEN         (TPACKET_ALIGN(sizeof(struct tpacket3_hdr)) + sizeof(struct sockaddr_ll))
106
107
108/* Since 3.1 kernel we have packet_fanout support */
109// schedule to socket by skb's rxhash - the implementation is bi-directional
110#define PACKET_FANOUT_HASH              0
111// schedule round robin
112#define PACKET_FANOUT_LB                1
113// schedule to the same socket that received the packet
114#define PACKET_FANOUT_CPU               2
115// Something to do with fragmented packets and hashing problems !! TODO figure out if this needs to be on
116#define PACKET_FANOUT_FLAG_DEFRAG       0x8000
117/* Included but unused by libtrace since Linux 3.10 */
118// if one socket if full roll over to the next
119#define PACKET_FANOUT_ROLLOVER          3
120// This flag makes any other system roll over
121#define PACKET_FANOUT_FLAG_ROLLOVER     0x1000
122/* Included but unused by libtrace since Linux 3.12 */
123// schedule random
124#define PACKET_FANOUT_RND               4
125
126
127enum tpacket_versions {
128        TPACKET_V1,
129        TPACKET_V2,
130        TPACKET_V3
131};
132
133struct tpacket2_hdr {
134        /* Frame status - in use by kernel or libtrace etc. */
135        uint32_t        tp_status;
136        /* Wire length */
137        uint32_t        tp_len;
138        /* Captured length */
139        uint32_t        tp_snaplen;
140        /* Offset in bytes from frame start to the mac (link layer) header */
141        uint16_t        tp_mac;
142        /* Offset in bytes from frame start to the net (network layer) header */
143        uint16_t        tp_net;
144        /* Timestamp */
145        uint32_t        tp_sec;
146        uint32_t        tp_nsec;
147        /* Not used VLAN tag control information */
148        uint16_t        tp_vlan_tci;
149        uint16_t        tp_padding;
150};
151
152struct tpacket_hdr_variant1 {
153        uint32_t        tp_rxhash;
154        uint32_t        tp_vlan_tci;
155};
156
157struct tpacket3_hdr {
158        uint32_t                tp_next_offset;
159        uint32_t                tp_sec;
160        uint32_t                tp_nsec;
161        uint32_t                tp_snaplen;
162        uint32_t                tp_len;
163        uint32_t                tp_status;
164        uint16_t                tp_mac;
165        uint16_t                tp_net;
166        /* pkt_hdr variants */
167        union {
168                struct tpacket_hdr_variant1 hv1;
169        };
170};
171
172struct tpacket_req {
173        unsigned int tp_block_size;  /* Minimal size of contiguous block */
174        unsigned int tp_block_nr;    /* Number of blocks */
175        unsigned int tp_frame_size;  /* Size of frame */
176        unsigned int tp_frame_nr;    /* Total number of frames */
177};
178
179/* Note that this structure is passed over the wire in rt encapsulation, and
180 * thus we need to be careful with data sizes.  timeval's and timespec's
181 * can also change their size on 32/64 machines.
182 */
183struct linux_format_data_t {
184        /* The snap length for the capture */
185        int snaplen;
186        /* Flag indicating whether the interface should be placed in
187         * promiscuous mode */
188        int promisc;
189        /* The timestamp format used by the capture */
190        timestamptype_t timestamptype;
191        /* A BPF filter that is applied to every captured packet */
192        libtrace_filter_t *filter;
193        /* Statistics for the capture process, e.g. dropped packet counts */
194        struct tpacket_stats stats;
195        /* Flag indicating whether the statistics are current or not */
196        int stats_valid;
197        /* The actual format being used - ring vs int */
198        libtrace_rt_types_t format;
199        /* The current ring buffer layout */
200        struct tpacket_req req;
201        /* Used to determine buffer size for the ring buffer */
202        uint32_t max_order;
203        /* Used for the parallel case, fanout is the mode */
204        uint16_t fanout_flags;
205        /* The group lets Linux know which sockets to group together
206         * so we use a random here to try avoid collisions */
207        uint16_t fanout_group;
208        /* Parent format so we can call parent functions */
209        struct libtrace_format_t *parent_format;
210        /* When running in parallel mode this is malloc'd with an array
211         * file descriptors from packet fanout will use, here we assume/hope
212         * that every ring can get setup the same */
213        libtrace_list_t *per_stream;
214};
215
216struct linux_format_data_out_t {
217        /* The file descriptor used to write the packets */
218        int fd;
219        /* The tx ring mmap location */
220        char * tx_ring;
221        /* The current frame number within the tx ring */
222        int txring_offset;
223        /* The current ring buffer layout */
224        struct tpacket_req req;
225        /* Our sockaddr structure, here so we can cache the interface number */
226        struct sockaddr_ll sock_hdr;
227        /* The (maximum) number of packets that haven't been written */
228        int queue;
229        /* The format this trace is using linuxring or linuxnative */
230        libtrace_rt_types_t format;
231        /* Used to determine buffer size for the ring buffer */
232        uint32_t max_order;
233        /* Parent format so we can call parent functions */
234        struct libtrace_format_t *parent_format;
235};
236
237struct linux_per_stream_t {
238        /* File descriptor for the memory mapped stream */
239        int fd;
240        /* Memory mapped buffer */
241        char *rx_ring;
242        /* Offset within the mapped buffer */
243        int rxring_offset;
244} ALIGN_STRUCT(CACHE_LINE_SIZE);
245
246/* Format header for encapsulating packets captured using linux native */
247struct libtrace_linuxnative_header {
248        /* Timestamp of the packet, as a timeval */
249        struct {
250                uint32_t tv_sec;
251                uint32_t tv_usec;
252        } tv;
253        /* Timestamp of the packet, as a timespec */
254        struct {
255                uint32_t tv_sec;
256                uint32_t tv_nsec;
257        } ts;
258        /* The timestamp format used by the process that captured this packet */
259        uint8_t timestamptype;
260        /* Wire length */
261        uint32_t wirelen;
262        /* Capture length */
263        uint32_t caplen;
264        /* The linux native header itself */
265        struct sockaddr_ll hdr;
266};
267
268/* Helper macros to make addressing data in the above structures easier */
269#define DATA(x) ((struct linux_format_data_t *)x->format_data)
270#define DATA_OUT(x) ((struct linux_format_data_out_t *)x->format_data)
271#define STREAM_DATA(x) ((struct linux_per_stream_t *)x->data)
272
273#define FORMAT_DATA DATA(libtrace)
274#define FORMAT_DATA_OUT DATA_OUT(libtrace)
275
276#define PARENT FORMAT_DATA->parent_format
277#define PARENT_OUT FORMAT_DATA_OUT->parent_format
278
279#define FORMAT_DATA_HEAD FORMAT_DATA->per_stream->head
280#define FORMAT_DATA_FIRST ((struct linux_per_stream_t *)FORMAT_DATA_HEAD->data)
281
282/* Get the sockaddr_ll structure from a frame */
283#define GET_SOCKADDR_HDR(x)  ((struct sockaddr_ll *) (((char *) (x))\
284        + TPACKET_ALIGN(sizeof(struct tpacket2_hdr))))
285
286/* TODO: Decide if inheritance is how we want to do this. Basically, ring is
287 * a subclass of native and so it makes sense to reuse the native code where
288 * possible. Moving ring into a new file really helps with readability, and
289 * also helps us not carry so much ring data around in the native format. */
290struct libtrace_format_t *get_native_format(void);
291
292static inline libtrace_linktype_t get_libtrace_link_type(uint16_t linktype)
293{
294        /* Convert the ARPHRD type into an appropriate libtrace link type */
295        switch (linktype) {
296                case LIBTRACE_ARPHRD_ETHER:
297                case LIBTRACE_ARPHRD_LOOPBACK:
298                        return TRACE_TYPE_ETH;
299                case LIBTRACE_ARPHRD_PPP:
300                        return TRACE_TYPE_NONE;
301                case LIBTRACE_ARPHRD_IEEE80211_RADIOTAP:
302                        return TRACE_TYPE_80211_RADIO;
303                case LIBTRACE_ARPHRD_IEEE80211:
304                        return TRACE_TYPE_80211;
305                case LIBTRACE_ARPHRD_SIT:
306                case LIBTRACE_ARPHRD_NONE:
307                        return TRACE_TYPE_NONE;
308                default: /* shrug, beyond me! */
309                        printf("unknown Linux ARPHRD type 0x%04x\n",linktype);
310                        return (libtrace_linktype_t)~0U;
311        }
312}
313
314static inline libtrace_direction_t get_libtrace_direction(uint8_t pkttype)
315{
316        switch (pkttype) {
317                case PACKET_OUTGOING:
318                case PACKET_LOOPBACK:
319                        return TRACE_DIR_OUTGOING;
320                case PACKET_OTHERHOST:
321                        return TRACE_DIR_OTHER;
322                default:
323                        return TRACE_DIR_INCOMING;
324        }
325}
326
327static libtrace_direction_t set_direction(struct sockaddr_ll * skadr,
328                                          libtrace_direction_t direction)
329{
330        switch (direction) {
331                case TRACE_DIR_OUTGOING:
332                        skadr->sll_pkttype = PACKET_OUTGOING;
333                        return TRACE_DIR_OUTGOING;
334                case TRACE_DIR_INCOMING:
335                        skadr->sll_pkttype = PACKET_HOST;
336                        return TRACE_DIR_INCOMING;
337                case TRACE_DIR_OTHER:
338                        skadr->sll_pkttype = PACKET_OTHERHOST;
339                        return TRACE_DIR_OTHER;
340                default:
341                        return -1;
342        }
343}
Note: See TracBrowser for help on using the repository browser.