1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
|
/*
* Attempt to set up memory-mapped access.
*
* On success, returns 1, and sets *status to 0 if there are no warnings
* or to a PCAP_WARNING_ code if there is a warning.
*
* On failure due to lack of support for memory-mapped capture, returns
* 0.
*
* On error, returns -1, and sets *status to the appropriate error code;
* if that is PCAP_ERROR, sets handle->errbuf to the appropriate message.
*/
static int
create_ring(pcap_t *handle, int *status)
{
unsigned i, j, frames_per_block;
struct tpacket_req req;
socklen_t len;
unsigned int sk_type, tp_reserve, maclen, tp_hdrlen, netoff, macoff;
unsigned int frame_size;
/*
* Start out assuming no warnings or errors.
*/
*status = 0;
/* Note that with large snapshot length (say 64K, which is the default
* for recent versions of tcpdump, the value that "-s 0" has given
* for a long time with tcpdump, and the default in Wireshark/TShark),
* if we use the snapshot length to calculate the frame length,
* only a few frames will be available in the ring even with pretty
* large ring size (and a lot of memory will be unused).
*
* Ideally, we should choose a frame length based on the
* minimum of the specified snapshot length and the maximum
* packet size. That's not as easy as it sounds; consider, for
* example, an 802.11 interface in monitor mode, where the
* frame would include a radiotap header, where the maximum
* radiotap header length is device-dependent.
*
* So, for now, we just do this for Ethernet devices, where
* there's no metadata header, and the link-layer header is
* fixed length. We can get the maximum packet size by
* adding 18, the Ethernet header length plus the CRC length
* (just in case we happen to get the CRC in the packet), to
* the MTU of the interface; we fetch the MTU in the hopes
* that it reflects support for jumbo frames. (Even if the
* interface is just being used for passive snooping, the driver
* might set the size of buffers in the receive ring based on
* the MTU, so that the MTU limits the maximum size of packets
* that we can receive.)
*
* We don't do that if segmentation/fragmentation or receive
* offload are enabled, so we don't get rudely surprised by
* "packets" bigger than the MTU. */
frame_size = handle->snapshot;
if (handle->linktype == DLT_EN10MB) {
int mtu;
int offload;
offload = iface_get_offload(handle);
if (offload == -1) {
*status = PCAP_ERROR;
return -1;
}
if (!offload) {
mtu = iface_get_mtu(handle->fd, handle->opt.source,
handle->errbuf);
if (mtu == -1) {
*status = PCAP_ERROR;
return -1;
}
if (frame_size > mtu + 18)
frame_size = mtu + 18;
}
}
/* NOTE: calculus matching those in tpacket_rcv()
* in linux-2.6/net/packet/af_packet.c
*/
len = sizeof(sk_type);
if (getsockopt(handle->fd, SOL_SOCKET, SO_TYPE, &sk_type, &len) < 0) {
snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, "getsockopt: %s", pcap_strerror(errno));
*status = PCAP_ERROR;
return -1;
}
#ifdef PACKET_RESERVE
len = sizeof(tp_reserve);
if (getsockopt(handle->fd, SOL_PACKET, PACKET_RESERVE, &tp_reserve, &len) < 0) {
if (errno != ENOPROTOOPT) {
/*
* ENOPROTOOPT means "kernel doesn't support
* PACKET_RESERVE", in which case we fall back
* as best we can.
*/
snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, "getsockopt: %s", pcap_strerror(errno));
*status = PCAP_ERROR;
return -1;
}
tp_reserve = 0; /* older kernel, reserve not supported */
}
#else
tp_reserve = 0; /* older kernel, reserve not supported */
#endif
maclen = (sk_type == SOCK_DGRAM) ? 0 : MAX_LINKHEADER_SIZE;
/* XXX: in the kernel maclen is calculated from
* LL_ALLOCATED_SPACE(dev) and vnet_hdr.hdr_len
* in: packet_snd() in linux-2.6/net/packet/af_packet.c
* then packet_alloc_skb() in linux-2.6/net/packet/af_packet.c
* then sock_alloc_send_pskb() in linux-2.6/net/core/sock.c
* but I see no way to get those sizes in userspace,
* like for instance with an ifreq ioctl();
* the best thing I've found so far is MAX_HEADER in the kernel
* part of linux-2.6/include/linux/netdevice.h
* which goes up to 128+48=176; since pcap-linux.c defines
* a MAX_LINKHEADER_SIZE of 256 which is greater than that,
* let's use it.. maybe is it even large enough to directly
* replace macoff..
*/
tp_hdrlen = TPACKET_ALIGN(handle->md.tp_hdrlen) + sizeof(struct sockaddr_ll) ;
netoff = TPACKET_ALIGN(tp_hdrlen + (maclen < 16 ? 16 : maclen)) + tp_reserve;
/* NOTE: AFAICS tp_reserve may break the TPACKET_ALIGN of
* netoff, which contradicts
* linux-2.6/Documentation/networking/packet_mmap.txt
* documenting that:
* "- Gap, chosen so that packet data (Start+tp_net)
* aligns to TPACKET_ALIGNMENT=16"
*/
/* NOTE: in linux-2.6/include/linux/skbuff.h:
* "CPUs often take a performance hit
* when accessing unaligned memory locations"
*/
macoff = netoff - maclen;
req.tp_frame_size = TPACKET_ALIGN(macoff + frame_size);
req.tp_frame_nr = handle->opt.buffer_size/req.tp_frame_size;
/* compute the minumum block size that will handle this frame.
* The block has to be page size aligned.
* The max block size allowed by the kernel is arch-dependent and
* it's not explicitly checked here. */
req.tp_block_size = getpagesize();
while (req.tp_block_size < req.tp_frame_size)
req.tp_block_size <<= 1;
frames_per_block = req.tp_block_size/req.tp_frame_size;
/*
* PACKET_TIMESTAMP was added after linux/net_tstamp.h was,
* so we check for PACKET_TIMESTAMP. We check for
* linux/net_tstamp.h just in case a system somehow has
* PACKET_TIMESTAMP but not linux/net_tstamp.h; that might
* be unnecessary.
*
* SIOCSHWTSTAMP was introduced in the patch that introduced
* linux/net_tstamp.h, so we don't bother checking whether
* SIOCSHWTSTAMP is defined (if your Linux system has
* linux/net_tstamp.h but doesn't define SIOCSHWTSTAMP, your
* Linux system is badly broken).
*/
#if defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP)
/*
* If we were told to do so, ask the kernel and the driver
* to use hardware timestamps.
*
* Hardware timestamps are only supported with mmapped
* captures.
*/
if (handle->opt.tstamp_type == PCAP_TSTAMP_ADAPTER ||
handle->opt.tstamp_type == PCAP_TSTAMP_ADAPTER_UNSYNCED) {
struct hwtstamp_config hwconfig;
struct ifreq ifr;
int timesource;
/*
* Ask for hardware time stamps on all packets,
* including transmitted packets.
*/
memset(&hwconfig, 0, sizeof(hwconfig));
hwconfig.tx_type = HWTSTAMP_TX_ON;
hwconfig.rx_filter = HWTSTAMP_FILTER_ALL;
memset(&ifr, 0, sizeof(ifr));
strcpy(ifr.ifr_name, handle->opt.source);
ifr.ifr_data = (void *)&hwconfig;
if (ioctl(handle->fd, SIOCSHWTSTAMP, &ifr) < 0) {
switch (errno) {
case EPERM:
/*
* Treat this as an error, as the
* user should try to run this
* with the appropriate privileges -
* and, if they can't, shouldn't
* try requesting hardware time stamps.
*/
*status = PCAP_ERROR_PERM_DENIED;
return -1;
case EOPNOTSUPP:
/*
* Treat this as a warning, as the
* only way to fix the warning is to
* get an adapter that supports hardware
* time stamps. We'll just fall back
* on the standard host time stamps.
*/
*status = PCAP_WARNING_TSTAMP_TYPE_NOTSUP;
break;
default:
snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
"SIOCSHWTSTAMP failed: %s",
pcap_strerror(errno));
*status = PCAP_ERROR;
return -1;
}
} else {
/*
* Well, that worked. Now specify the type of
* hardware time stamp we want for this
* socket.
*/
if (handle->opt.tstamp_type == PCAP_TSTAMP_ADAPTER) {
/*
* Hardware timestamp, synchronized
* with the system clock.
*/
timesource = SOF_TIMESTAMPING_SYS_HARDWARE;
} else {
/*
* PCAP_TSTAMP_ADAPTER_UNSYNCED - hardware
* timestamp, not synchronized with the
* system clock.
*/
timesource = SOF_TIMESTAMPING_RAW_HARDWARE;
}
if (setsockopt(handle->fd, SOL_PACKET, PACKET_TIMESTAMP,
(void *)×ource, sizeof(timesource))) {
snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
"can't set PACKET_TIMESTAMP: %s",
pcap_strerror(errno));
*status = PCAP_ERROR;
return -1;
}
}
}
#endif /* HAVE_LINUX_NET_TSTAMP_H && PACKET_TIMESTAMP */
/* ask the kernel to create the ring */
retry:
req.tp_block_nr = req.tp_frame_nr / frames_per_block;
/* req.tp_frame_nr is requested to match frames_per_block*req.tp_block_nr */
req.tp_frame_nr = req.tp_block_nr * frames_per_block;
if (setsockopt(handle->fd, SOL_PACKET, PACKET_RX_RING,
(void *) &req, sizeof(req))) {
if ((errno == ENOMEM) && (req.tp_block_nr > 1)) {
/*
* Memory failure; try to reduce the requested ring
* size.
*
* We used to reduce this by half -- do 5% instead.
* That may result in more iterations and a longer
* startup, but the user will be much happier with
* the resulting buffer size.
*/
if (req.tp_frame_nr < 20)
req.tp_frame_nr -= 1;
else
req.tp_frame_nr -= req.tp_frame_nr/20;
goto retry;
}
if (errno == ENOPROTOOPT) {
/*
* We don't have ring buffer support in this kernel.
*/
return 0;
}
snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
"can't create rx ring on packet socket: %s",
pcap_strerror(errno));
*status = PCAP_ERROR;
return -1;
}
/* memory map the rx ring */
handle->md.mmapbuflen = req.tp_block_nr * req.tp_block_size;
handle->md.mmapbuf = mmap(0, handle->md.mmapbuflen,
PROT_READ|PROT_WRITE, MAP_SHARED, handle->fd, 0);
if (handle->md.mmapbuf == MAP_FAILED) {
snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
"can't mmap rx ring: %s", pcap_strerror(errno));
/* clear the allocated ring on error*/
destroy_ring(handle);
*status = PCAP_ERROR;
return -1;
}
/* allocate a ring for each frame header pointer*/
handle->cc = req.tp_frame_nr;
handle->buffer = malloc(handle->cc * sizeof(union thdr *));
if (!handle->buffer) {
snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
"can't allocate ring of frame headers: %s",
pcap_strerror(errno));
destroy_ring(handle);
*status = PCAP_ERROR;
return -1;
}
/* fill the header ring with proper frame ptr*/
handle->offset = 0;
for (i=0; i<req.tp_block_nr; ++i) {
void *base = &handle->md.mmapbuf[i*req.tp_block_size];
for (j=0; j<frames_per_block; ++j, ++handle->offset) {
RING_GET_FRAME(handle) = base;
base += req.tp_frame_size;
}
}
handle->bufsize = req.tp_frame_size;
handle->offset = 0;
return 1;
} |
Partager