source: lib/format_linux_common.h @ 89e2ff7

4.0.1-hotfixescachetimestampsdevelopdpdk-ndagetsilivendag_formatrc-4.0.1rc-4.0.2rc-4.0.3rc-4.0.4ringdecrementfixringperformanceringtimestampfixes
Last change on this file since 89e2ff7 was ee6e802, checked in by Shane Alcock <salcock@…>, 5 years ago

Updated copyright blurb on all source files

In some cases, this meant adding copyright blurbs to files that
had never had them before.

  • Property mode set to 100644
File size: 13.7 KB
Line 
1/*
2 *
3 * Copyright (c) 2007-2016 The University of Waikato, Hamilton, New Zealand.
4 * All rights reserved.
5 *
6 * This file is part of libtrace.
7 *
8 * This code has been developed by the University of Waikato WAND
9 * research group. For further information please see http://www.wand.net.nz/
10 *
11 * libtrace is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser General Public License as published by
13 * the Free Software Foundation; either version 3 of the License, or
14 * (at your option) any later version.
15 *
16 * libtrace is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 * GNU Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public License
22 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
23 *
24 *
25 */
26
27
28/* Various definitions required for the linux format. They were moved here,
29 * because format_linux.c had a lot of header information before the actual
30 * code. The linux headers have been copied into here rather than included to
31 * support RT on machines that don't have the linux headers (like a mac for
32 * example.
33 */
34
35#ifndef FORMAT_LINUX_COMMON_H
36#define FORMAT_LINUX_COMMON_H
37
38#include "libtrace.h"
39#include "libtrace_int.h"
40
41#ifdef HAVE_NETPACKET_PACKET_H
42
43#include <sys/socket.h>
44#include <netpacket/packet.h>
45#include <net/ethernet.h>
46#include <net/if_arp.h>
47
48#include <net/if.h>
49#include <sys/ioctl.h>
50#include <poll.h>
51#include <sys/mman.h>
52
53#include <fcntl.h>
54
55/* MAX_ORDER is defined in linux/mmzone.h. 11 is default for 3.0 kernels.
56 * max_order will be decreased by one if the ring buffer fails to allocate.
57 * Used to get the correct sized buffers from the kernel.
58 */
59#define MAX_ORDER 11
60/* Number of frames in the ring used by both TX and TR rings. More frames
61 * hopefully means less packet loss, especially if traffic comes in bursts.
62 */
63#define CONF_RING_FRAMES        0x100
64
65/* The maximum frames allowed to be waiting in the TX_RING before the kernel is
66 * notified to write them out. Make sure this is less than CONF_RING_FRAMES.
67 * Performance doesn't seem to increase any more when setting this above 10.
68 */
69#define TX_MAX_QUEUE            10
70
71#else   /* HAVE_NETPACKET_PACKET_H */
72
73/* Need to know what a sockaddr_ll looks like */
74struct sockaddr_ll {
75        uint16_t sll_family;
76        uint16_t sll_protocol;
77        int32_t  sll_ifindex;
78        uint16_t sll_hatype;
79        uint8_t  sll_pkttype;
80        uint8_t  sll_halen;
81        uint8_t  sll_addr[8];
82};
83
84/* Packet types.  */
85#define PACKET_HOST             0               /* To us.  */
86#define PACKET_BROADCAST        1               /* To all.  */
87#define PACKET_MULTICAST        2               /* To group.  */
88#define PACKET_OTHERHOST        3               /* To someone else.  */
89#define PACKET_OUTGOING         4               /* Originated by us . */
90#define PACKET_LOOPBACK         5
91#define PACKET_FASTROUTE        6
92
93/* Packet socket options.  */
94
95#define PACKET_ADD_MEMBERSHIP           1
96#define PACKET_DROP_MEMBERSHIP          2
97#define PACKET_RECV_OUTPUT              3
98#define PACKET_RX_RING                  5
99#define PACKET_STATISTICS               6
100
101#endif /* HAVE_NETPACKET_PACKET_H */
102
103struct tpacket_stats {
104        unsigned int tp_packets;
105        unsigned int tp_drops;
106};
107
108typedef enum { TS_NONE, TS_TIMEVAL, TS_TIMESPEC } timestamptype_t;
109
110/* linux/if_packet.h defines. They are here rather than including the header
111 * this means that we can interpret a ring frame on a kernel that doesn't
112 * support the format directly.
113 */
114#define PACKET_RX_RING  5
115#define PACKET_VERSION  10
116#define PACKET_HDRLEN   11
117#define PACKET_TX_RING  13
118#define PACKET_FANOUT   18
119#define TP_STATUS_USER  0x1
120#define TP_STATUS_SEND_REQUEST  0x1
121#define TP_STATUS_AVAILABLE     0x0
122#define TO_TP_HDR2(x)   ((struct tpacket2_hdr *) (x))
123#define TO_TP_HDR3(x)   ((struct tpacket3_hdr *) (x))
124#define TPACKET_ALIGNMENT       16
125#define TPACKET_ALIGN(x)        (((x)+TPACKET_ALIGNMENT-1)&~(TPACKET_ALIGNMENT-1))
126#define TPACKET2_HDRLEN         (TPACKET_ALIGN(sizeof(struct tpacket2_hdr)) + sizeof(struct sockaddr_ll))
127#define TPACKET3_HDRLEN         (TPACKET_ALIGN(sizeof(struct tpacket3_hdr)) + sizeof(struct sockaddr_ll))
128
129
130/* Since 3.1 kernel we have packet_fanout support */
131// schedule to socket by skb's rxhash - the implementation is bi-directional
132#define PACKET_FANOUT_HASH              0
133// schedule round robin
134#define PACKET_FANOUT_LB                1
135// schedule to the same socket that received the packet
136#define PACKET_FANOUT_CPU               2
137// Something to do with fragmented packets and hashing problems !! TODO figure out if this needs to be on
138#define PACKET_FANOUT_FLAG_DEFRAG       0x8000
139/* Included but unused by libtrace since Linux 3.10 */
140// if one socket if full roll over to the next
141#define PACKET_FANOUT_ROLLOVER          3
142// This flag makes any other system roll over
143#define PACKET_FANOUT_FLAG_ROLLOVER     0x1000
144/* Included but unused by libtrace since Linux 3.12 */
145// schedule random
146#define PACKET_FANOUT_RND               4
147
148
149enum tpacket_versions {
150        TPACKET_V1,
151        TPACKET_V2,
152        TPACKET_V3
153};
154
155struct tpacket2_hdr {
156        /* Frame status - in use by kernel or libtrace etc. */
157        uint32_t        tp_status;
158        /* Wire length */
159        uint32_t        tp_len;
160        /* Captured length */
161        uint32_t        tp_snaplen;
162        /* Offset in bytes from frame start to the mac (link layer) header */
163        uint16_t        tp_mac;
164        /* Offset in bytes from frame start to the net (network layer) header */
165        uint16_t        tp_net;
166        /* Timestamp */
167        uint32_t        tp_sec;
168        uint32_t        tp_nsec;
169        /* Not used VLAN tag control information */
170        uint16_t        tp_vlan_tci;
171        uint16_t        tp_padding;
172};
173
174struct tpacket_hdr_variant1 {
175        uint32_t        tp_rxhash;
176        uint32_t        tp_vlan_tci;
177};
178
179struct tpacket3_hdr {
180        uint32_t                tp_next_offset;
181        uint32_t                tp_sec;
182        uint32_t                tp_nsec;
183        uint32_t                tp_snaplen;
184        uint32_t                tp_len;
185        uint32_t                tp_status;
186        uint16_t                tp_mac;
187        uint16_t                tp_net;
188        /* pkt_hdr variants */
189        union {
190                struct tpacket_hdr_variant1 hv1;
191        };
192};
193
194struct tpacket_req {
195        unsigned int tp_block_size;  /* Minimal size of contiguous block */
196        unsigned int tp_block_nr;    /* Number of blocks */
197        unsigned int tp_frame_size;  /* Size of frame */
198        unsigned int tp_frame_nr;    /* Total number of frames */
199};
200
201#ifndef IF_NAMESIZE
202#define IF_NAMESIZE 16
203#endif
204
205/* A structure we use to hold statistic counters from the network cards
206 * as accessed via the /proc/net/dev
207 */
208struct linux_dev_stats {
209        char if_name[IF_NAMESIZE];
210        uint64_t rx_bytes;
211        uint64_t rx_packets;
212        uint64_t rx_errors;
213        uint64_t rx_drops;
214        uint64_t rx_fifo;
215        uint64_t rx_frame;
216        uint64_t rx_compressed;
217        uint64_t rx_multicast;
218        uint64_t tx_bytes;
219        uint64_t tx_packets;
220        uint64_t tx_errors;
221        uint64_t tx_drops;
222        uint64_t tx_fifo;
223        uint64_t tx_colls;
224        uint64_t tx_carrier;
225        uint64_t tx_compressed;
226};
227
228/* Note that this structure is passed over the wire in rt encapsulation, and
229 * thus we need to be careful with data sizes.  timeval's and timespec's
230 * can also change their size on 32/64 machines.
231 */
232struct linux_format_data_t {
233        /* The snap length for the capture */
234        int snaplen;
235        /* Flag indicating whether the interface should be placed in
236         * promiscuous mode */
237        int promisc;
238        /* The timestamp format used by the capture */
239        timestamptype_t timestamptype;
240        /* A BPF filter that is applied to every captured packet */
241        libtrace_filter_t *filter;
242        /* Statistics for the capture process, e.g. dropped packet counts */
243        struct tpacket_stats stats;
244        /* Statistics for the NIC rather than the socket */
245        struct linux_dev_stats dev_stats;
246        /* Flag indicating whether the statistics are current or not */
247        int stats_valid;
248        /* Used to determine buffer size for the ring buffer */
249        uint32_t max_order;
250        /* Used for the parallel case, fanout is the mode */
251        uint16_t fanout_flags;
252        /* The group lets Linux know which sockets to group together
253         * so we use a random here to try avoid collisions */
254        uint16_t fanout_group;
255        /* When running in parallel mode this is malloc'd with an array
256         * file descriptors from packet fanout will use, here we assume/hope
257         * that every ring can get setup the same */
258        libtrace_list_t *per_stream;
259};
260
261struct linux_format_data_out_t {
262        /* The file descriptor used to write the packets */
263        int fd;
264        /* The tx ring mmap location */
265        char * tx_ring;
266        /* The current frame number within the tx ring */
267        int txring_offset;
268        /* The current ring buffer layout */
269        struct tpacket_req req;
270        /* Our sockaddr structure, here so we can cache the interface number */
271        struct sockaddr_ll sock_hdr;
272        /* The (maximum) number of packets that haven't been written */
273        int queue;
274        /* The format this trace is using linuxring or linuxnative */
275        libtrace_rt_types_t format;
276        /* Used to determine buffer size for the ring buffer */
277        uint32_t max_order;
278};
279
280struct linux_per_stream_t {
281        /* File descriptor for the memory mapped stream */
282        int fd;
283        /* Memory mapped buffer */
284        char *rx_ring;
285        /* Offset within the mapped buffer */
286        int rxring_offset;
287        /* The ring buffer layout */
288        struct tpacket_req req;
289} ALIGN_STRUCT(CACHE_LINE_SIZE);
290
291#define ZERO_LINUX_STREAM {-1, MAP_FAILED, 0, {0,0,0,0}}
292
293
294/* Format header for encapsulating packets captured using linux native */
295struct libtrace_linuxnative_header {
296        /* Timestamp of the packet, as a timeval */
297        struct {
298                uint32_t tv_sec;
299                uint32_t tv_usec;
300        } tv;
301        /* Timestamp of the packet, as a timespec */
302        struct {
303                uint32_t tv_sec;
304                uint32_t tv_nsec;
305        } ts;
306        /* The timestamp format used by the process that captured this packet */
307        uint8_t timestamptype;
308        /* Wire length */
309        uint32_t wirelen;
310        /* Capture length */
311        uint32_t caplen;
312        /* The linux native header itself */
313        struct sockaddr_ll hdr;
314};
315
316/* Helper macros to make addressing data in the above structures easier */
317#define DATA(x) ((struct linux_format_data_t *)x->format_data)
318#define DATA_OUT(x) ((struct linux_format_data_out_t *)x->format_data)
319#define STREAM_DATA(x) ((struct linux_per_stream_t *)x->data)
320
321#define FORMAT_DATA DATA(libtrace)
322#define FORMAT_DATA_OUT DATA_OUT(libtrace)
323
324#define FORMAT_DATA_HEAD FORMAT_DATA->per_stream->head
325#define FORMAT_DATA_FIRST ((struct linux_per_stream_t *)FORMAT_DATA_HEAD->data)
326
327/* Get the sockaddr_ll structure from a frame */
328#define GET_SOCKADDR_HDR(x)  ((struct sockaddr_ll *) (((char *) (x))\
329        + TPACKET_ALIGN(sizeof(struct tpacket2_hdr))))
330
331/* Common functions */
332#ifdef HAVE_NETPACKET_PACKET_H
333int linuxcommon_init_input(libtrace_t *libtrace);
334int linuxcommon_init_output(libtrace_out_t *libtrace);
335int linuxcommon_probe_filename(const char *filename);
336int linuxcommon_config_input(libtrace_t *libtrace, trace_option_t option,
337                             void *data);
338void linuxcommon_close_input_stream(libtrace_t *libtrace,
339                                    struct linux_per_stream_t *stream);
340int linuxcommon_start_input_stream(libtrace_t *libtrace,
341                                   struct linux_per_stream_t *stream);
342int linuxcommon_pause_input(libtrace_t *libtrace);
343int linuxcommon_get_fd(const libtrace_t *libtrace);
344int linuxcommon_fin_input(libtrace_t *libtrace);
345int linuxcommon_pregister_thread(libtrace_t *libtrace,
346                                 libtrace_thread_t *t,
347                                 bool reading);
348int linuxcommon_pstart_input(libtrace_t *libtrace,
349                             int (*start_stream)(libtrace_t *, struct linux_per_stream_t*));
350#endif /* HAVE_NETPACKET_PACKET_H */
351
352void linuxcommon_get_statistics(libtrace_t *libtrace, libtrace_stat_t *stat);
353
354static inline libtrace_direction_t linuxcommon_get_direction(uint8_t pkttype)
355{
356        switch (pkttype) {
357                case PACKET_OUTGOING:
358                case PACKET_LOOPBACK:
359                        return TRACE_DIR_OUTGOING;
360                case PACKET_OTHERHOST:
361                        return TRACE_DIR_OTHER;
362                default:
363                        return TRACE_DIR_INCOMING;
364        }
365}
366
367static inline libtrace_direction_t
368linuxcommon_set_direction(struct sockaddr_ll * skadr,
369                          libtrace_direction_t direction)
370{
371        switch (direction) {
372                case TRACE_DIR_OUTGOING:
373                        skadr->sll_pkttype = PACKET_OUTGOING;
374                        return TRACE_DIR_OUTGOING;
375                case TRACE_DIR_INCOMING:
376                        skadr->sll_pkttype = PACKET_HOST;
377                        return TRACE_DIR_INCOMING;
378                case TRACE_DIR_OTHER:
379                        skadr->sll_pkttype = PACKET_OTHERHOST;
380                        return TRACE_DIR_OTHER;
381                default:
382                        return -1;
383        }
384}
385
386static inline libtrace_linktype_t linuxcommon_get_link_type(uint16_t linktype)
387{
388        /* Convert the ARPHRD type into an appropriate libtrace link type */
389        switch (linktype) {
390                case LIBTRACE_ARPHRD_ETHER:
391                case LIBTRACE_ARPHRD_LOOPBACK:
392                        return TRACE_TYPE_ETH;
393                case LIBTRACE_ARPHRD_PPP:
394                        return TRACE_TYPE_NONE;
395                case LIBTRACE_ARPHRD_IEEE80211_RADIOTAP:
396                        return TRACE_TYPE_80211_RADIO;
397                case LIBTRACE_ARPHRD_IEEE80211:
398                        return TRACE_TYPE_80211;
399                case LIBTRACE_ARPHRD_SIT:
400                case LIBTRACE_ARPHRD_NONE:
401                        return TRACE_TYPE_NONE;
402                default: /* shrug, beyond me! */
403                        printf("unknown Linux ARPHRD type 0x%04x\n",linktype);
404                        return (libtrace_linktype_t)~0U;
405        }
406}
407
408#ifdef HAVE_NETPACKET_PACKET_H
409/**
410 * Converts a socket, either packet_mmap or standard raw socket into a
411 * fanout socket.
412 * NOTE: This means we can read from the socket with multiple queues,
413 * each must be setup (identically) and then this called upon them
414 *
415 * @return 0 success, -1 error
416 */
417static inline int linuxcommon_to_packet_fanout(libtrace_t *libtrace,
418                                        struct linux_per_stream_t *stream)
419{
420        int fanout_opt = ((int)FORMAT_DATA->fanout_flags << 16) |
421                         (int)FORMAT_DATA->fanout_group;
422        if (setsockopt(stream->fd, SOL_PACKET, PACKET_FANOUT,
423                        &fanout_opt, sizeof(fanout_opt)) == -1) {
424                trace_set_err(libtrace, TRACE_ERR_INIT_FAILED,
425                              "Converting the fd to a socket fanout failed %s",
426                              libtrace->uridata);
427                return -1;
428        }
429        return 0;
430}
431#endif /* HAVE_NETPACKET_PACKET_H */
432
433
434#endif /* FORMAT_LINUX_COMMON_H */
Note: See TracBrowser for help on using the repository browser.