source: lib/format_linux_common.h @ 4697684

cachetimestampsdevelopdpdk-ndagetsiliverc-4.0.3rc-4.0.4ringdecrementfixringperformance
Last change on this file since 4697684 was 4697684, checked in by Shane Alcock <salcock@…>, 3 years ago

Add support for ARPHRD_IPGRE

Captures from GRE tunnel interfaces should now work correctly.

  • Property mode set to 100644
File size: 13.7 KB
Line 
1/*
2 *
3 * Copyright (c) 2007-2016 The University of Waikato, Hamilton, New Zealand.
4 * All rights reserved.
5 *
6 * This file is part of libtrace.
7 *
8 * This code has been developed by the University of Waikato WAND
9 * research group. For further information please see http://www.wand.net.nz/
10 *
11 * libtrace is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser General Public License as published by
13 * the Free Software Foundation; either version 3 of the License, or
14 * (at your option) any later version.
15 *
16 * libtrace is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 * GNU Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public License
22 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
23 *
24 *
25 */
26
27
28/* Various definitions required for the linux format. They were moved here,
29 * because format_linux.c had a lot of header information before the actual
30 * code. The linux headers have been copied into here rather than included to
31 * support RT on machines that don't have the linux headers (like a mac for
32 * example.
33 */
34
35#ifndef FORMAT_LINUX_COMMON_H
36#define FORMAT_LINUX_COMMON_H
37
38#include "libtrace.h"
39#include "libtrace_int.h"
40
41#ifdef HAVE_NETPACKET_PACKET_H
42
43#include <sys/socket.h>
44#include <netpacket/packet.h>
45#include <net/ethernet.h>
46#include <net/if_arp.h>
47
48#include <net/if.h>
49#include <sys/ioctl.h>
50#include <poll.h>
51#include <sys/mman.h>
52
53#include <fcntl.h>
54
55/* MAX_ORDER is defined in linux/mmzone.h. 11 is default for 3.0 kernels.
56 * max_order will be decreased by one if the ring buffer fails to allocate.
57 * Used to get the correct sized buffers from the kernel.
58 */
59#define MAX_ORDER 11
60/* Number of frames in the ring used by both TX and TR rings. More frames
61 * hopefully means less packet loss, especially if traffic comes in bursts.
62 */
63#define CONF_RING_FRAMES        0x100
64
65/* The maximum frames allowed to be waiting in the TX_RING before the kernel is
66 * notified to write them out. Make sure this is less than CONF_RING_FRAMES.
67 * Performance doesn't seem to increase any more when setting this above 10.
68 */
69#define TX_MAX_QUEUE            10
70
71#else   /* HAVE_NETPACKET_PACKET_H */
72
73/* Need to know what a sockaddr_ll looks like */
74struct sockaddr_ll {
75        uint16_t sll_family;
76        uint16_t sll_protocol;
77        int32_t  sll_ifindex;
78        uint16_t sll_hatype;
79        uint8_t  sll_pkttype;
80        uint8_t  sll_halen;
81        uint8_t  sll_addr[8];
82};
83
84/* Packet types.  */
85#define PACKET_HOST             0               /* To us.  */
86#define PACKET_BROADCAST        1               /* To all.  */
87#define PACKET_MULTICAST        2               /* To group.  */
88#define PACKET_OTHERHOST        3               /* To someone else.  */
89#define PACKET_OUTGOING         4               /* Originated by us . */
90#define PACKET_LOOPBACK         5
91#define PACKET_FASTROUTE        6
92
93/* Packet socket options.  */
94
95#define PACKET_ADD_MEMBERSHIP           1
96#define PACKET_DROP_MEMBERSHIP          2
97#define PACKET_RECV_OUTPUT              3
98#define PACKET_RX_RING                  5
99#define PACKET_STATISTICS               6
100
101#endif /* HAVE_NETPACKET_PACKET_H */
102
103struct tpacket_stats {
104        unsigned int tp_packets;
105        unsigned int tp_drops;
106};
107
108typedef enum { TS_NONE, TS_TIMEVAL, TS_TIMESPEC } timestamptype_t;
109
110/* linux/if_packet.h defines. They are here rather than including the header
111 * this means that we can interpret a ring frame on a kernel that doesn't
112 * support the format directly.
113 */
114#define PACKET_RX_RING  5
115#define PACKET_VERSION  10
116#define PACKET_HDRLEN   11
117#define PACKET_TX_RING  13
118#define PACKET_FANOUT   18
119#define TP_STATUS_USER  0x1
120#define TP_STATUS_SEND_REQUEST  0x1
121#define TP_STATUS_AVAILABLE     0x0
122#define TO_TP_HDR2(x)   ((struct tpacket2_hdr *) (x))
123#define TO_TP_HDR3(x)   ((struct tpacket3_hdr *) (x))
124#define TPACKET_ALIGNMENT       16
125#define TPACKET_ALIGN(x)        (((x)+TPACKET_ALIGNMENT-1)&~(TPACKET_ALIGNMENT-1))
126#define TPACKET2_HDRLEN         (TPACKET_ALIGN(sizeof(struct tpacket2_hdr)) + sizeof(struct sockaddr_ll))
127#define TPACKET3_HDRLEN         (TPACKET_ALIGN(sizeof(struct tpacket3_hdr)) + sizeof(struct sockaddr_ll))
128
129
130/* Since 3.1 kernel we have packet_fanout support */
131// schedule to socket by skb's rxhash - the implementation is bi-directional
132#define PACKET_FANOUT_HASH              0
133// schedule round robin
134#define PACKET_FANOUT_LB                1
135// schedule to the same socket that received the packet
136#define PACKET_FANOUT_CPU               2
137// Something to do with fragmented packets and hashing problems !! TODO figure out if this needs to be on
138#define PACKET_FANOUT_FLAG_DEFRAG       0x8000
139/* Included but unused by libtrace since Linux 3.10 */
140// if one socket if full roll over to the next
141#define PACKET_FANOUT_ROLLOVER          3
142// This flag makes any other system roll over
143#define PACKET_FANOUT_FLAG_ROLLOVER     0x1000
144/* Included but unused by libtrace since Linux 3.12 */
145// schedule random
146#define PACKET_FANOUT_RND               4
147
148
149enum tpacket_versions {
150        TPACKET_V1,
151        TPACKET_V2,
152        TPACKET_V3
153};
154
155struct tpacket2_hdr {
156        /* Frame status - in use by kernel or libtrace etc. */
157        uint32_t        tp_status;
158        /* Wire length */
159        uint32_t        tp_len;
160        /* Captured length */
161        uint32_t        tp_snaplen;
162        /* Offset in bytes from frame start to the mac (link layer) header */
163        uint16_t        tp_mac;
164        /* Offset in bytes from frame start to the net (network layer) header */
165        uint16_t        tp_net;
166        /* Timestamp */
167        uint32_t        tp_sec;
168        uint32_t        tp_nsec;
169        /* Not used VLAN tag control information */
170        uint16_t        tp_vlan_tci;
171        uint16_t        tp_padding;
172};
173
174struct tpacket_hdr_variant1 {
175        uint32_t        tp_rxhash;
176        uint32_t        tp_vlan_tci;
177};
178
179struct tpacket3_hdr {
180        uint32_t                tp_next_offset;
181        uint32_t                tp_sec;
182        uint32_t                tp_nsec;
183        uint32_t                tp_snaplen;
184        uint32_t                tp_len;
185        uint32_t                tp_status;
186        uint16_t                tp_mac;
187        uint16_t                tp_net;
188        /* pkt_hdr variants */
189        union {
190                struct tpacket_hdr_variant1 hv1;
191        };
192};
193
194struct tpacket_req {
195        unsigned int tp_block_size;  /* Minimal size of contiguous block */
196        unsigned int tp_block_nr;    /* Number of blocks */
197        unsigned int tp_frame_size;  /* Size of frame */
198        unsigned int tp_frame_nr;    /* Total number of frames */
199};
200
201#ifndef IF_NAMESIZE
202#define IF_NAMESIZE 16
203#endif
204
205/* A structure we use to hold statistic counters from the network cards
206 * as accessed via the /proc/net/dev
207 */
208struct linux_dev_stats {
209        char if_name[IF_NAMESIZE];
210        uint64_t rx_bytes;
211        uint64_t rx_packets;
212        uint64_t rx_errors;
213        uint64_t rx_drops;
214        uint64_t rx_fifo;
215        uint64_t rx_frame;
216        uint64_t rx_compressed;
217        uint64_t rx_multicast;
218        uint64_t tx_bytes;
219        uint64_t tx_packets;
220        uint64_t tx_errors;
221        uint64_t tx_drops;
222        uint64_t tx_fifo;
223        uint64_t tx_colls;
224        uint64_t tx_carrier;
225        uint64_t tx_compressed;
226};
227
228/* Note that this structure is passed over the wire in rt encapsulation, and
229 * thus we need to be careful with data sizes.  timeval's and timespec's
230 * can also change their size on 32/64 machines.
231 */
232struct linux_format_data_t {
233        /* The snap length for the capture */
234        int snaplen;
235        /* Flag indicating whether the interface should be placed in
236         * promiscuous mode */
237        int promisc;
238        /* The timestamp format used by the capture */
239        timestamptype_t timestamptype;
240        /* A BPF filter that is applied to every captured packet */
241        libtrace_filter_t *filter;
242        /* Statistics for the capture process, e.g. dropped packet counts */
243        struct tpacket_stats stats;
244        /* Statistics for the NIC rather than the socket */
245        struct linux_dev_stats dev_stats;
246        /* Flag indicating whether the statistics are current or not */
247        int stats_valid;
248        /* Used to determine buffer size for the ring buffer */
249        uint32_t max_order;
250        /* Used for the parallel case, fanout is the mode */
251        uint16_t fanout_flags;
252        /* The group lets Linux know which sockets to group together
253         * so we use a random here to try avoid collisions */
254        uint16_t fanout_group;
255        /* When running in parallel mode this is malloc'd with an array
256         * file descriptors from packet fanout will use, here we assume/hope
257         * that every ring can get setup the same */
258        libtrace_list_t *per_stream;
259
260};
261
262struct linux_format_data_out_t {
263        /* The file descriptor used to write the packets */
264        int fd;
265        /* The tx ring mmap location */
266        char * tx_ring;
267        /* The current frame number within the tx ring */
268        int txring_offset;
269        /* The current ring buffer layout */
270        struct tpacket_req req;
271        /* Our sockaddr structure, here so we can cache the interface number */
272        struct sockaddr_ll sock_hdr;
273        /* The (maximum) number of packets that haven't been written */
274        int queue;
275        /* The format this trace is using linuxring or linuxnative */
276        libtrace_rt_types_t format;
277        /* Used to determine buffer size for the ring buffer */
278        uint32_t max_order;
279};
280
281struct linux_per_stream_t {
282        /* File descriptor for the memory mapped stream */
283        int fd;
284        /* Memory mapped buffer */
285        char *rx_ring;
286        /* Offset within the mapped buffer */
287        int rxring_offset;
288        /* The ring buffer layout */
289        struct tpacket_req req;
290        uint64_t last_timestamp;
291} ALIGN_STRUCT(CACHE_LINE_SIZE);
292
293#define ZERO_LINUX_STREAM {-1, MAP_FAILED, 0, {0,0,0,0}, 0}
294
295
296/* Format header for encapsulating packets captured using linux native */
297struct libtrace_linuxnative_header {
298        /* Timestamp of the packet, as a timeval */
299        struct {
300                uint32_t tv_sec;
301                uint32_t tv_usec;
302        } tv;
303        /* Timestamp of the packet, as a timespec */
304        struct {
305                uint32_t tv_sec;
306                uint32_t tv_nsec;
307        } ts;
308        /* The timestamp format used by the process that captured this packet */
309        uint8_t timestamptype;
310        /* Wire length */
311        uint32_t wirelen;
312        /* Capture length */
313        uint32_t caplen;
314        /* The linux native header itself */
315        struct sockaddr_ll hdr;
316};
317
318/* Helper macros to make addressing data in the above structures easier */
319#define DATA(x) ((struct linux_format_data_t *)x->format_data)
320#define DATA_OUT(x) ((struct linux_format_data_out_t *)x->format_data)
321#define STREAM_DATA(x) ((struct linux_per_stream_t *)x->data)
322
323#define FORMAT_DATA DATA(libtrace)
324#define FORMAT_DATA_OUT DATA_OUT(libtrace)
325
326#define FORMAT_DATA_HEAD FORMAT_DATA->per_stream->head
327#define FORMAT_DATA_FIRST ((struct linux_per_stream_t *)FORMAT_DATA_HEAD->data)
328
329/* Get the sockaddr_ll structure from a frame */
330#define GET_SOCKADDR_HDR(x)  ((struct sockaddr_ll *) (((char *) (x))\
331        + TPACKET_ALIGN(sizeof(struct tpacket2_hdr))))
332
333/* Common functions */
334#ifdef HAVE_NETPACKET_PACKET_H
335int linuxcommon_init_input(libtrace_t *libtrace);
336int linuxcommon_init_output(libtrace_out_t *libtrace);
337int linuxcommon_probe_filename(const char *filename);
338int linuxcommon_config_input(libtrace_t *libtrace, trace_option_t option,
339                             void *data);
340void linuxcommon_close_input_stream(libtrace_t *libtrace,
341                                    struct linux_per_stream_t *stream);
342int linuxcommon_start_input_stream(libtrace_t *libtrace,
343                                   struct linux_per_stream_t *stream);
344int linuxcommon_pause_input(libtrace_t *libtrace);
345int linuxcommon_get_fd(const libtrace_t *libtrace);
346int linuxcommon_fin_input(libtrace_t *libtrace);
347int linuxcommon_pregister_thread(libtrace_t *libtrace,
348                                 libtrace_thread_t *t,
349                                 bool reading);
350int linuxcommon_pstart_input(libtrace_t *libtrace,
351                             int (*start_stream)(libtrace_t *, struct linux_per_stream_t*));
352#endif /* HAVE_NETPACKET_PACKET_H */
353
354void linuxcommon_get_statistics(libtrace_t *libtrace, libtrace_stat_t *stat);
355
356static inline libtrace_direction_t linuxcommon_get_direction(uint8_t pkttype)
357{
358        switch (pkttype) {
359                case PACKET_OUTGOING:
360                case PACKET_LOOPBACK:
361                        return TRACE_DIR_OUTGOING;
362                case PACKET_OTHERHOST:
363                        return TRACE_DIR_OTHER;
364                default:
365                        return TRACE_DIR_INCOMING;
366        }
367}
368
369static inline libtrace_direction_t
370linuxcommon_set_direction(struct sockaddr_ll * skadr,
371                          libtrace_direction_t direction)
372{
373        switch (direction) {
374                case TRACE_DIR_OUTGOING:
375                        skadr->sll_pkttype = PACKET_OUTGOING;
376                        return TRACE_DIR_OUTGOING;
377                case TRACE_DIR_INCOMING:
378                        skadr->sll_pkttype = PACKET_HOST;
379                        return TRACE_DIR_INCOMING;
380                case TRACE_DIR_OTHER:
381                        skadr->sll_pkttype = PACKET_OTHERHOST;
382                        return TRACE_DIR_OTHER;
383                default:
384                        return -1;
385        }
386}
387
388static inline libtrace_linktype_t linuxcommon_get_link_type(uint16_t linktype)
389{
390        /* Convert the ARPHRD type into an appropriate libtrace link type */
391        switch (linktype) {
392                case LIBTRACE_ARPHRD_ETHER:
393                case LIBTRACE_ARPHRD_LOOPBACK:
394                        return TRACE_TYPE_ETH;
395                case LIBTRACE_ARPHRD_PPP:
396                case LIBTRACE_ARPHRD_IPGRE:
397                        return TRACE_TYPE_NONE;
398                case LIBTRACE_ARPHRD_IEEE80211_RADIOTAP:
399                        return TRACE_TYPE_80211_RADIO;
400                case LIBTRACE_ARPHRD_IEEE80211:
401                        return TRACE_TYPE_80211;
402                case LIBTRACE_ARPHRD_SIT:
403                case LIBTRACE_ARPHRD_NONE:
404                        return TRACE_TYPE_NONE;
405                default: /* shrug, beyond me! */
406                        printf("unknown Linux ARPHRD type 0x%04x\n",linktype);
407                        return (libtrace_linktype_t)~0U;
408        }
409}
410
411#ifdef HAVE_NETPACKET_PACKET_H
412/**
413 * Converts a socket, either packet_mmap or standard raw socket into a
414 * fanout socket.
415 * NOTE: This means we can read from the socket with multiple queues,
416 * each must be setup (identically) and then this called upon them
417 *
418 * @return 0 success, -1 error
419 */
420static inline int linuxcommon_to_packet_fanout(libtrace_t *libtrace,
421                                        struct linux_per_stream_t *stream)
422{
423        int fanout_opt = ((int)FORMAT_DATA->fanout_flags << 16) |
424                         (int)FORMAT_DATA->fanout_group;
425        if (setsockopt(stream->fd, SOL_PACKET, PACKET_FANOUT,
426                        &fanout_opt, sizeof(fanout_opt)) == -1) {
427                trace_set_err(libtrace, TRACE_ERR_INIT_FAILED,
428                              "Converting the fd to a socket fanout failed %s",
429                              libtrace->uridata);
430                return -1;
431        }
432        return 0;
433}
434#endif /* HAVE_NETPACKET_PACKET_H */
435
436
437#endif /* FORMAT_LINUX_COMMON_H */
Note: See TracBrowser for help on using the repository browser.