source: lib/format_linux_common.h @ 6cf3ca0

4.0.1-hotfixescachetimestampsdevelopdpdk-ndagetsilivelibtrace4ndag_formatpfringrc-4.0.1rc-4.0.2rc-4.0.3rc-4.0.4ringdecrementfixringperformanceringtimestampfixes
Last change on this file since 6cf3ca0 was 6cf3ca0, checked in by Richard Sanger <rsangerarj@…>, 7 years ago

Gets the ring format back to a working state, the bulk of the refactoring
is now done.

I've opted to remove the inheritance way of grabbing shared functions
and replaced it with a file containing the common functions. Hopefully
this is more obvious that both int and ring depend on these.

I've also reworked the formats to be stream orientated, which removed
duplicates of heaps of functions. And allows the parallel and single
thread code to be almost identical.

After doing this many of the places where we had differences in
functions between ring and int disappeared.

I've also upped the MAX_ORDER to 11, used in allocating memory
from the kernel for the ring format.
Since this seems to work on the testing machines.
And we'll continue to fallback to smaller values if needed anyway.

  • Property mode set to 100644
File size: 10.4 KB
Line 
1/* Various definitions required for the linux format. They were moved here,
2 * because format_linux.c had a lot of header information before the actual
3 * code. The linux headers have been copied into here rather than included to
4 * support RT on machines that don't have the linux headers (like a mac for
5 * example.
6 */
7
8#ifndef FORMAT_LINUX_COMMON_H
9#define FORMAT_LINUX_COMMON_H
10
11#include "libtrace.h"
12#include "libtrace_int.h"
13
14#ifdef HAVE_NETPACKET_PACKET_H
15
16#include <sys/socket.h>
17#include <netpacket/packet.h>
18#include <net/ethernet.h>
19#include <net/if_arp.h>
20
21#include <net/if.h>
22#include <sys/ioctl.h>
23#include <poll.h>
24#include <sys/mman.h>
25
26#include <fcntl.h>
27
28/* MAX_ORDER is defined in linux/mmzone.h. 11 is default for 3.0 kernels.
29 * max_order will be decreased by one if the ring buffer fails to allocate.
30 * Used to get the correct sized buffers from the kernel.
31 */
32#define MAX_ORDER 11
33/* Number of frames in the ring used by both TX and TR rings. More frames
34 * hopefully means less packet loss, especially if traffic comes in bursts.
35 */
36#define CONF_RING_FRAMES        0x100
37
38/* The maximum frames allowed to be waiting in the TX_RING before the kernel is
39 * notified to write them out. Make sure this is less than CONF_RING_FRAMES.
40 * Performance doesn't seem to increase any more when setting this above 10.
41 */
42#define TX_MAX_QUEUE            10
43
44#else   /* HAVE_NETPACKET_PACKET_H */
45
46/* Need to know what a sockaddr_ll looks like */
47struct sockaddr_ll {
48        uint16_t sll_family;
49        uint16_t sll_protocol;
50        int32_t  sll_ifindex;
51        uint16_t sll_hatype;
52        uint8_t  sll_pkttype;
53        uint8_t  sll_halen;
54        uint8_t  sll_addr[8];
55};
56
57/* Packet types.  */
58#define PACKET_HOST             0               /* To us.  */
59#define PACKET_BROADCAST        1               /* To all.  */
60#define PACKET_MULTICAST        2               /* To group.  */
61#define PACKET_OTHERHOST        3               /* To someone else.  */
62#define PACKET_OUTGOING         4               /* Originated by us . */
63#define PACKET_LOOPBACK         5
64#define PACKET_FASTROUTE        6
65
66/* Packet socket options.  */
67
68#define PACKET_ADD_MEMBERSHIP           1
69#define PACKET_DROP_MEMBERSHIP          2
70#define PACKET_RECV_OUTPUT              3
71#define PACKET_RX_RING                  5
72#define PACKET_STATISTICS               6
73
74#endif /* HAVE_NETPACKET_PACKET_H */
75
76struct tpacket_stats {
77        unsigned int tp_packets;
78        unsigned int tp_drops;
79};
80
81typedef enum { TS_NONE, TS_TIMEVAL, TS_TIMESPEC } timestamptype_t;
82
83/* linux/if_packet.h defines. They are here rather than including the header
84 * this means that we can interpret a ring frame on a kernel that doesn't
85 * support the format directly.
86 */
87#define PACKET_RX_RING  5
88#define PACKET_VERSION  10
89#define PACKET_HDRLEN   11
90#define PACKET_TX_RING  13
91#define PACKET_FANOUT   18
92#define TP_STATUS_USER  0x1
93#define TP_STATUS_SEND_REQUEST  0x1
94#define TP_STATUS_AVAILABLE     0x0
95#define TO_TP_HDR2(x)   ((struct tpacket2_hdr *) (x))
96#define TO_TP_HDR3(x)   ((struct tpacket3_hdr *) (x))
97#define TPACKET_ALIGNMENT       16
98#define TPACKET_ALIGN(x)        (((x)+TPACKET_ALIGNMENT-1)&~(TPACKET_ALIGNMENT-1))
99#define TPACKET2_HDRLEN         (TPACKET_ALIGN(sizeof(struct tpacket2_hdr)) + sizeof(struct sockaddr_ll))
100#define TPACKET3_HDRLEN         (TPACKET_ALIGN(sizeof(struct tpacket3_hdr)) + sizeof(struct sockaddr_ll))
101
102
103/* Since 3.1 kernel we have packet_fanout support */
104// schedule to socket by skb's rxhash - the implementation is bi-directional
105#define PACKET_FANOUT_HASH              0
106// schedule round robin
107#define PACKET_FANOUT_LB                1
108// schedule to the same socket that received the packet
109#define PACKET_FANOUT_CPU               2
110// Something to do with fragmented packets and hashing problems !! TODO figure out if this needs to be on
111#define PACKET_FANOUT_FLAG_DEFRAG       0x8000
112/* Included but unused by libtrace since Linux 3.10 */
113// if one socket if full roll over to the next
114#define PACKET_FANOUT_ROLLOVER          3
115// This flag makes any other system roll over
116#define PACKET_FANOUT_FLAG_ROLLOVER     0x1000
117/* Included but unused by libtrace since Linux 3.12 */
118// schedule random
119#define PACKET_FANOUT_RND               4
120
121
122enum tpacket_versions {
123        TPACKET_V1,
124        TPACKET_V2,
125        TPACKET_V3
126};
127
128struct tpacket2_hdr {
129        /* Frame status - in use by kernel or libtrace etc. */
130        uint32_t        tp_status;
131        /* Wire length */
132        uint32_t        tp_len;
133        /* Captured length */
134        uint32_t        tp_snaplen;
135        /* Offset in bytes from frame start to the mac (link layer) header */
136        uint16_t        tp_mac;
137        /* Offset in bytes from frame start to the net (network layer) header */
138        uint16_t        tp_net;
139        /* Timestamp */
140        uint32_t        tp_sec;
141        uint32_t        tp_nsec;
142        /* Not used VLAN tag control information */
143        uint16_t        tp_vlan_tci;
144        uint16_t        tp_padding;
145};
146
147struct tpacket_hdr_variant1 {
148        uint32_t        tp_rxhash;
149        uint32_t        tp_vlan_tci;
150};
151
152struct tpacket3_hdr {
153        uint32_t                tp_next_offset;
154        uint32_t                tp_sec;
155        uint32_t                tp_nsec;
156        uint32_t                tp_snaplen;
157        uint32_t                tp_len;
158        uint32_t                tp_status;
159        uint16_t                tp_mac;
160        uint16_t                tp_net;
161        /* pkt_hdr variants */
162        union {
163                struct tpacket_hdr_variant1 hv1;
164        };
165};
166
167struct tpacket_req {
168        unsigned int tp_block_size;  /* Minimal size of contiguous block */
169        unsigned int tp_block_nr;    /* Number of blocks */
170        unsigned int tp_frame_size;  /* Size of frame */
171        unsigned int tp_frame_nr;    /* Total number of frames */
172};
173
174/* Note that this structure is passed over the wire in rt encapsulation, and
175 * thus we need to be careful with data sizes.  timeval's and timespec's
176 * can also change their size on 32/64 machines.
177 */
178struct linux_format_data_t {
179        /* The snap length for the capture */
180        int snaplen;
181        /* Flag indicating whether the interface should be placed in
182         * promiscuous mode */
183        int promisc;
184        /* The timestamp format used by the capture */
185        timestamptype_t timestamptype;
186        /* A BPF filter that is applied to every captured packet */
187        libtrace_filter_t *filter;
188        /* Statistics for the capture process, e.g. dropped packet counts */
189        struct tpacket_stats stats;
190        /* Flag indicating whether the statistics are current or not */
191        int stats_valid;
192        /* The current ring buffer layout */
193        struct tpacket_req req;
194        /* Used to determine buffer size for the ring buffer */
195        uint32_t max_order;
196        /* Used for the parallel case, fanout is the mode */
197        uint16_t fanout_flags;
198        /* The group lets Linux know which sockets to group together
199         * so we use a random here to try avoid collisions */
200        uint16_t fanout_group;
201        /* When running in parallel mode this is malloc'd with an array
202         * file descriptors from packet fanout will use, here we assume/hope
203         * that every ring can get setup the same */
204        libtrace_list_t *per_stream;
205};
206
207struct linux_format_data_out_t {
208        /* The file descriptor used to write the packets */
209        int fd;
210        /* The tx ring mmap location */
211        char * tx_ring;
212        /* The current frame number within the tx ring */
213        int txring_offset;
214        /* The current ring buffer layout */
215        struct tpacket_req req;
216        /* Our sockaddr structure, here so we can cache the interface number */
217        struct sockaddr_ll sock_hdr;
218        /* The (maximum) number of packets that haven't been written */
219        int queue;
220        /* The format this trace is using linuxring or linuxnative */
221        libtrace_rt_types_t format;
222        /* Used to determine buffer size for the ring buffer */
223        uint32_t max_order;
224};
225
226struct linux_per_stream_t {
227        /* File descriptor for the memory mapped stream */
228        int fd;
229        /* Memory mapped buffer */
230        char *rx_ring;
231        /* Offset within the mapped buffer */
232        int rxring_offset;
233} ALIGN_STRUCT(CACHE_LINE_SIZE);
234
235#define ZERO_LINUX_STREAM {-1, MAP_FAILED, 0}
236
237
238/* Format header for encapsulating packets captured using linux native */
239struct libtrace_linuxnative_header {
240        /* Timestamp of the packet, as a timeval */
241        struct {
242                uint32_t tv_sec;
243                uint32_t tv_usec;
244        } tv;
245        /* Timestamp of the packet, as a timespec */
246        struct {
247                uint32_t tv_sec;
248                uint32_t tv_nsec;
249        } ts;
250        /* The timestamp format used by the process that captured this packet */
251        uint8_t timestamptype;
252        /* Wire length */
253        uint32_t wirelen;
254        /* Capture length */
255        uint32_t caplen;
256        /* The linux native header itself */
257        struct sockaddr_ll hdr;
258};
259
260/* Helper macros to make addressing data in the above structures easier */
261#define DATA(x) ((struct linux_format_data_t *)x->format_data)
262#define DATA_OUT(x) ((struct linux_format_data_out_t *)x->format_data)
263#define STREAM_DATA(x) ((struct linux_per_stream_t *)x->data)
264
265#define FORMAT_DATA DATA(libtrace)
266#define FORMAT_DATA_OUT DATA_OUT(libtrace)
267
268#define FORMAT_DATA_HEAD FORMAT_DATA->per_stream->head
269#define FORMAT_DATA_FIRST ((struct linux_per_stream_t *)FORMAT_DATA_HEAD->data)
270
271/* Get the sockaddr_ll structure from a frame */
272#define GET_SOCKADDR_HDR(x)  ((struct sockaddr_ll *) (((char *) (x))\
273        + TPACKET_ALIGN(sizeof(struct tpacket2_hdr))))
274
275/* Common functions */
276#ifdef HAVE_NETPACKET_PACKET_H
277int linuxcommon_init_input(libtrace_t *libtrace);
278int linuxcommon_init_output(libtrace_out_t *libtrace);
279int linuxcommon_probe_filename(const char *filename);
280int linuxcommon_config_input(libtrace_t *libtrace, trace_option_t option,
281                             void *data);
282void linuxcommon_close_input_stream(libtrace_t *libtrace,
283                                    struct linux_per_stream_t *stream);
284int linuxcommon_start_input_stream(libtrace_t *libtrace,
285                                   struct linux_per_stream_t *stream);
286inline int linuxcommon_to_packet_fanout(libtrace_t *libtrace,
287                                        struct linux_per_stream_t *stream);
288int linuxcommon_pause_input(libtrace_t *libtrace);
289int linuxcommon_get_fd(const libtrace_t *libtrace);
290int linuxcommon_fin_input(libtrace_t *libtrace);
291int linuxcommon_pconfig_input(libtrace_t *libtrace,
292                              trace_parallel_option_t option,
293                              void *data);
294int linuxcommon_pregister_thread(libtrace_t *libtrace,
295                                 libtrace_thread_t *t,
296                                 bool reading);
297int linuxcommon_pstart_input(libtrace_t *libtrace,
298                             int (*start_stream)(libtrace_t *, struct linux_per_stream_t*));
299#endif /* HAVE_NETPACKET_PACKET_H */
300
301uint64_t linuxcommon_get_captured_packets(libtrace_t *libtrace);
302uint64_t linuxcommon_get_filtered_packets(libtrace_t *libtrace);
303uint64_t linuxcommon_get_dropped_packets(libtrace_t *libtrace);
304inline libtrace_direction_t linuxcommon_get_direction(uint8_t pkttype);
305inline libtrace_direction_t linuxcommon_set_direction(struct sockaddr_ll * skadr,
306                                                 libtrace_direction_t direction);
307inline libtrace_linktype_t linuxcommon_get_link_type(uint16_t linktype);
308
309
310#endif /* FORMAT_LINUX_COMMON_H */
Note: See TracBrowser for help on using the repository browser.