add pf_ring patches for kernel and libpcap

SVN-Revision: 2266
This commit is contained in:
Felix Fietkau 2005-10-22 22:03:56 +00:00
parent 29d9cfcf65
commit c2520f5416
4 changed files with 12357 additions and 1 deletions

View file

@ -57,7 +57,7 @@ $(PKG_BUILD_DIR)/.built:
rm -rf $(PKG_INSTALL_DIR)
mkdir -p $(PKG_INSTALL_DIR)
$(MAKE) -C $(PKG_BUILD_DIR) \
CCOPT="$(TARGET_CFLAGS)" \
CCOPT="$(TARGET_CFLAGS) -I$(BUILD_DIR)/linux/include" \
DESTDIR="$(PKG_INSTALL_DIR)" \
all install
touch $@

View file

@ -0,0 +1,613 @@
diff -urN libpcap.old/pcap-int.h libpcap.dev/pcap-int.h
--- libpcap.old/pcap-int.h 2003-12-15 02:42:24.000000000 +0100
+++ libpcap.dev/pcap-int.h 2005-10-22 23:20:12.220060500 +0200
@@ -30,7 +30,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * @(#) $Header: /tcpdump/master/libpcap/pcap-int.h,v 1.55.2.4 2003/12/15 01:42:24 guy Exp $ (LBL)
+ * @(#) $Header: /export/home/ntop/PF_RING/userland/libpcap-0.8.1-ring/pcap-int.h,v 1.2 2004/11/25 09:58:00 deri Exp $ (LBL)
*/
#ifndef pcap_int_h
@@ -46,6 +46,8 @@
#include <packet32.h>
#endif /* WIN32 */
+#define RING /* L.Deri */
+
/*
* Savefile
*/
@@ -93,6 +95,57 @@
#endif
};
+/* **************************** */
+
+#ifdef RING
+
+#include <unistd.h>
+#include <sys/mman.h>
+#include <errno.h>
+#include <sys/poll.h>
+
+#define PAGE_SIZE 4096
+
+#define HAVE_PCAP
+#include <linux/ring.h>
+#endif
+
+#ifdef RING
+
+#define E1000_RXD_STAT_DD 0x01 /* Descriptor Done */
+
+struct e1000_rx_desc {
+ u_int64_t buffer_addr; /* Address of the descriptor's data buffer */
+ u_int16_t length; /* Length of data DMAed into data buffer */
+ u_int16_t csum; /* Packet checksum */
+ u_int8_t status; /* Descriptor status */
+ u_int8_t errors; /* Descriptor Errors */
+ u_int16_t special;
+};
+
+/* Transmit Descriptor */
+struct e1000_tx_desc {
+ u_int64_t buffer_addr; /* Address of the descriptor's data buffer */
+ union {
+ u_int32_t data;
+ struct {
+ u_int16_t length; /* Data buffer length */
+ u_int8_t cso; /* Checksum offset */
+ u_int8_t cmd; /* Descriptor control */
+ } flags;
+ } lower;
+ union {
+ u_int32_t data;
+ struct {
+ u_int8_t status; /* Descriptor status */
+ u_int8_t css; /* Checksum start */
+ u_int16_t special;
+ } fields;
+ } upper;
+};
+
+#endif
+
struct pcap {
#ifdef WIN32
ADAPTER *adapter;
@@ -121,6 +174,14 @@
u_char *bp;
int cc;
+#ifdef RING
+ /* PF_RING */
+ char *ring_buffer, *ring_slots;
+ int ring_fd;
+ FlowSlotInfo *slots_info;
+ u_int page_id, slot_id, pkts_per_page;
+ u_int poll_sleep;
+#endif
/*
* Place holder for pcap_next().
*/
diff -urN libpcap.old/pcap-linux.c libpcap.dev/pcap-linux.c
--- libpcap.old/pcap-linux.c 2003-11-21 11:20:46.000000000 +0100
+++ libpcap.dev/pcap-linux.c 2005-10-22 23:43:59.726120250 +0200
@@ -27,7 +27,7 @@
#ifndef lint
static const char rcsid[] _U_ =
- "@(#) $Header: /tcpdump/master/libpcap/pcap-linux.c,v 1.98.2.4 2003/11/21 10:20:46 guy Exp $ (LBL)";
+ "@(#) $Header: /export/home/ntop/PF_RING/userland/libpcap-0.8.1-ring/pcap-linux.c,v 1.2 2004/11/25 09:58:00 deri Exp $ (LBL)";
#endif
/*
@@ -83,7 +83,7 @@
#ifdef HAVE_DAG_API
#include "pcap-dag.h"
#endif /* HAVE_DAG_API */
-
+
#include <errno.h>
#include <stdlib.h>
#include <unistd.h>
@@ -217,6 +217,83 @@
= { 1, &total_insn };
#endif
+#define RING /* L.Deri */
+#define SAFE_RING_MODE /*
+ Copy the bucket in order to avoid kernel
+ crash if the application faults
+ */
+
+#ifdef RING
+unsigned char *write_register;
+static struct pcap_stat ringStats;
+u_long numPollCalls = 0, numReadCalls = 0;
+
+#define POLL_SLEEP_STEP 10 /* ns = 0.1 ms */
+#define POLL_SLEEP_MIN POLL_SLEEP_STEP
+#define POLL_SLEEP_MAX 1000 /* ns */
+#define POLL_QUEUE_MIN_LEN 500 /* # packets */
+
+#ifdef SAFE_RING_MODE
+static char staticBucket[2048];
+#endif
+
+
+/* ******************************* */
+
+int pcap_set_cluster(pcap_t *handle, u_int clusterId) {
+ return(handle->ring_fd ? setsockopt(handle->ring_fd, 0, SO_ADD_TO_CLUSTER,
+ &clusterId, sizeof(clusterId)): -1);
+}
+
+/* ******************************* */
+
+int pcap_remove_from_cluster(pcap_t *handle) {
+ return(handle->ring_fd ?
+ setsockopt(handle->ring_fd, 0, SO_REMOVE_FROM_CLUSTER, NULL, 0) : -1);
+}
+
+/* ******************************* */
+
+int pcap_set_reflector(pcap_t *handle, char *reflectorDevice) {
+ return(handle->ring_fd ?
+ setsockopt(handle->ring_fd, 0, SO_SET_REFLECTOR,
+ &reflectorDevice, strlen(reflectorDevice)) : -1);
+}
+
+/* ******************************* */
+
+static int set_if_promisc(const char *device, int set_promisc) {
+ int sock_fd;
+ struct ifreq ifr;
+
+ if(device == NULL) return(-3);
+
+ sock_fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
+ if(sock_fd <= 0) return(-1);
+
+ memset(&ifr, 0, sizeof(ifr));
+ strncpy(ifr.ifr_name, device, sizeof(ifr.ifr_name));
+ if(ioctl(sock_fd, SIOCGIFFLAGS, &ifr) == -1) {
+ close(sock_fd);
+ return(-2);
+ }
+
+ if(set_promisc) {
+ if((ifr.ifr_flags & IFF_PROMISC) == 0) ifr.ifr_flags |= IFF_PROMISC;
+ } else {
+ /* Remove promisc */
+ if((ifr.ifr_flags & IFF_PROMISC) != 0) ifr.ifr_flags &= ~IFF_PROMISC;
+ }
+
+ if(ioctl(sock_fd, SIOCSIFFLAGS, &ifr) == -1)
+ return(-1);
+
+ close(sock_fd);
+ return(0);
+}
+
+#endif
+
/*
* Get a handle for a live capture from the given device. You can
* pass NULL as device to get all packages (without link level
@@ -258,6 +335,138 @@
handle->snapshot = snaplen;
handle->md.timeout = to_ms;
+#ifdef RING
+ handle->ring_fd = handle->fd = socket(PF_RING, SOCK_RAW, htons(ETH_P_ALL));
+
+ printf("Open RING [fd=%d]\n", handle->ring_fd);
+
+ if(handle->ring_fd > 0) {
+ struct sockaddr sa;
+ int rc;
+ u_int memSlotsLen;
+
+ err = 0;
+ sa.sa_family = PF_RING;
+ snprintf(sa.sa_data, sizeof(sa.sa_data), "%s", device);
+ rc = bind(handle->ring_fd, (struct sockaddr *)&sa, sizeof(sa));
+
+ if(rc == 0) {
+
+
+ handle->md.device = strdup(device);
+ handle->ring_buffer = (char *)mmap(NULL, PAGE_SIZE,
+ PROT_READ|PROT_WRITE,
+ MAP_SHARED,
+ handle->ring_fd, 0);
+
+ if(handle->ring_buffer == MAP_FAILED) {
+ sprintf(ebuf, "mmap() failed");
+ return (NULL);
+ }
+
+ handle->slots_info = (FlowSlotInfo *)handle->ring_buffer;
+ if(handle->slots_info->version != RING_FLOWSLOT_VERSION) {
+ snprintf(ebuf, PCAP_ERRBUF_SIZE, "Wrong RING version: "
+ "kernel is %i, libpcap was compiled with %i\n",
+ handle->slots_info->version, RING_FLOWSLOT_VERSION);
+ return (NULL);
+ }
+ memSlotsLen = handle->slots_info->tot_mem;
+ munmap(handle->ring_buffer, PAGE_SIZE);
+
+ handle->ring_buffer = (char *)mmap(NULL, memSlotsLen,
+ PROT_READ|PROT_WRITE,
+ MAP_SHARED, handle->ring_fd, 0);
+
+ if(handle->ring_buffer == MAP_FAILED) {
+ sprintf(ebuf, "mmap() failed");
+ return (NULL);
+ }
+
+ handle->slots_info = (FlowSlotInfo *)handle->ring_buffer;
+ handle->ring_slots = (char *)(handle->ring_buffer+sizeof(FlowSlotInfo));
+
+ /* Safety check */
+ if(handle->slots_info->remove_idx >= handle->slots_info->tot_slots)
+ handle->slots_info->remove_idx = 0;
+
+ handle->page_id = PAGE_SIZE, handle->slot_id = 0,
+ handle->pkts_per_page = 0;
+
+ if(0) {
+ int i;
+
+ for(i=0; i<handle->slots_info->tot_slots; i++) {
+ unsigned long idx = i*handle->slots_info->slot_len;
+ FlowSlot *slot = (FlowSlot*)&handle->ring_slots[idx];
+
+ printf("RING: Setting RING_MAGIC_VALUE into slot %d [displacement=%lu]\n", i, idx);
+ slot->magic = RING_MAGIC_VALUE; slot->slot_state = 0;
+ printf("RING: slot[%d]: magic=%d, slot_state=%d\n",
+ slot->magic, slot->slot_state);
+ }
+ }
+
+
+ /* Set defaults */
+ handle->linktype = DLT_EN10MB;
+ handle->offset = 2;
+
+ printf("RING (%s): tot_slots=%d/slot_len=%d/"
+ "insertIdx=%d/remove_idx=%d/dropped=%d\n",
+ device,
+ handle->slots_info->tot_slots,
+ handle->slots_info->slot_len,
+ handle->slots_info->insert_idx,
+ handle->slots_info->remove_idx,
+ handle->slots_info->tot_lost);
+
+ ringStats.ps_recv = handle->slots_info->tot_read;
+ ringStats.ps_drop = handle->slots_info->tot_lost;
+
+ if(promisc) {
+ struct ifreq ifr;
+
+ err = 0;
+ memset(&ifr, 0, sizeof(ifr));
+ strncpy(ifr.ifr_name, device, sizeof(ifr.ifr_name));
+ if (ioctl(handle->fd, SIOCGIFFLAGS, &ifr) == -1) {
+ snprintf(ebuf, PCAP_ERRBUF_SIZE,
+ "ioctl: %s", pcap_strerror(errno));
+ err = 1;
+ }
+
+ if(err == 0) {
+ if ((ifr.ifr_flags & IFF_PROMISC) == 0) {
+ /*
+ * Promiscuous mode isn't currently on,
+ * so turn it on, and remember that
+ * we should turn it off when the
+ * pcap_t is closed.
+ */
+
+ ifr.ifr_flags |= IFF_PROMISC;
+ if (ioctl(handle->fd, SIOCSIFFLAGS, &ifr) == -1) {
+ snprintf(ebuf, PCAP_ERRBUF_SIZE,
+ "ioctl: %s", pcap_strerror(errno));
+ err = 1;
+ }
+ }
+
+ if(err == 0)
+ handle->md.clear_promisc = 1;
+ }
+ }
+
+ if(err == 0)
+ goto open_open_live_final;
+ }
+
+ /* Don't put 'else' above... */
+ close(handle->ring_fd);
+ /* Continue without ring support */
+ }
+#endif
/*
* NULL and "any" are special devices which give us the hint to
* monitor all devices.
@@ -397,6 +606,9 @@
return NULL;
}
+#ifdef RING
+ open_open_live_final:
+#endif
/*
* "handle->fd" is a socket, so "select()" and "poll()"
* should work on it.
@@ -449,6 +661,120 @@
int packet_len, caplen;
struct pcap_pkthdr pcap_header;
+#ifdef RING
+ if(handle->ring_buffer != NULL) {
+ u_int idx, numRuns = 0, ptrAddr;
+ FlowSlot *slot;
+
+ slot = (FlowSlot*)&handle->ring_slots[handle->slots_info->remove_idx*handle->slots_info->slot_len];
+
+ while(1) {
+ u_int32_t queuedPkts;
+
+ if(handle->slots_info->tot_insert >= handle->slots_info->tot_read)
+ queuedPkts = handle->slots_info->tot_insert - handle->slots_info->tot_read;
+ else
+ queuedPkts = handle->slots_info->tot_slots + handle->slots_info->tot_insert - handle->slots_info->tot_read;
+
+ if(queuedPkts && (slot->slot_state == 1)) {
+ char *bucket = &slot->bucket;
+
+#ifdef RING_MAGIC
+ if(slot->magic != RING_MAGIC_VALUE) {
+ printf("==>> Bad Magic [remove_idx=%u][insert_idx=%u][ptrAddr=%u]\n",
+ handle->slots_info->remove_idx,
+ handle->slots_info->insert_idx,
+ ptrAddr);
+ slot->magic = RING_MAGIC_VALUE;
+ }
+#endif
+
+
+ handle->md.stat.ps_recv++;
+
+#ifdef SAFE_RING_MODE
+ {
+ struct pcap_pkthdr *hdr = (struct pcap_pkthdr*)bucket;
+ int bktLen = hdr->caplen;
+
+ if(bktLen > sizeof(staticBucket))
+ bktLen = sizeof(staticBucket);
+
+ memcpy(staticBucket, &bucket[sizeof(struct pcap_pkthdr)], bktLen);
+
+#ifdef RING_DEBUG
+ printf("==>> [remove_idx=%u][insert_idx=%u][ptrAddr=%u]\n",
+ handle->slots_info->remove_idx,
+ handle->slots_info->insert_idx,
+ ptrAddr);
+#endif
+
+ callback(userdata, hdr, staticBucket);
+ }
+#else
+ callback(userdata,
+ (const struct pcap_pkthdr*)bucket,
+ (const u_char*)&bucket[sizeof(struct pcap_pkthdr)]);
+#endif
+
+ if(handle->slots_info->remove_idx >= (handle->slots_info->tot_slots-1)) {
+ handle->slots_info->remove_idx = 0;
+ handle->page_id = PAGE_SIZE, handle->slot_id = 0, handle->pkts_per_page = 0;
+ } else {
+ handle->slots_info->remove_idx++;
+ handle->pkts_per_page++, handle->slot_id += handle->slots_info->slot_len;
+ }
+
+ handle->slots_info->tot_read++;
+ slot->slot_state = 0;
+
+ return(1);
+ } else {
+ struct pollfd pfd;
+ int rc;
+
+ /* Sleep when nothing is happening */
+ pfd.fd = handle->ring_fd;
+ pfd.events = POLLIN|POLLERR;
+ pfd.revents = 0;
+
+#ifdef RING_DEBUG
+ printf("==>> poll [remove_idx=%u][insert_idx=%u][loss=%d][queuedPkts=%u]"
+ "[slot_state=%d][tot_insert=%u][tot_read=%u]\n",
+ handle->slots_info->remove_idx,
+ handle->slots_info->insert_idx,
+ handle->slots_info->tot_lost,
+ queuedPkts, slot->slot_state,
+ handle->slots_info->tot_insert,
+ handle->slots_info->tot_read);
+ #endif
+
+#ifdef RING_DEBUG
+ printf("==>> poll @ [remove_idx=%u][slot_id=%u]\n", handle->slots_info->remove_idx, handle->slot_id);
+#endif
+ errno = 0;
+ rc = poll(&pfd, 1, -1);
+#ifdef RING_DEBUG
+ printf("==>> poll returned %d [%s][errno=%d][break_loop=%d]\n",
+ rc, strerror(errno), errno, handle->break_loop);
+#endif
+ numPollCalls++;
+
+ if(rc == -1) {
+ if(errno == EINTR) {
+ if(handle->break_loop) {
+ handle->break_loop = 0;
+ return(-2);
+ } else
+ return(0);
+ } else
+ return(-1);
+ }
+ }
+ } /* while() */
+ }
+#endif
+
#ifdef HAVE_PF_PACKET_SOCKETS
/*
* If this is a cooked device, leave extra room for a
@@ -688,6 +1014,22 @@
socklen_t len = sizeof (struct tpacket_stats);
#endif
+#ifdef RING
+ if(handle->ring_fd > 0) {
+ stats->ps_recv = handle->slots_info->tot_read-ringStats.ps_recv;
+ stats->ps_drop = handle->slots_info->tot_lost-ringStats.ps_drop;
+
+ printf("RING: numPollCalls=%d [%.1f packets/call]\n",
+ numPollCalls, (float)stats->ps_recv/(float)numPollCalls);
+ printf("RING: [tot_pkts=%u][tot_read=%u][tot_lost=%u]\n",
+ handle->slots_info->tot_pkts,
+ handle->slots_info->tot_read,
+ handle->slots_info->tot_lost);
+
+ return(0);
+ }
+#endif
+
#ifdef HAVE_TPACKET_STATS
/*
* Try to get the packet counts from the kernel.
@@ -879,6 +1221,11 @@
}
}
+
+#ifdef RING
+ if(handle->ring_fd <= 0) can_filter_in_kernel = 0;
+#endif
+
if (can_filter_in_kernel) {
if ((err = set_kernel_filter(handle, &fcode)) == 0)
{
@@ -1348,7 +1695,7 @@
memset(&mr, 0, sizeof(mr));
mr.mr_ifindex = device_id;
mr.mr_type = PACKET_MR_PROMISC;
- if (setsockopt(sock_fd, SOL_PACKET,
+ if (setsockopt(sock_fd, 0 /* SOL_PACKET */,
PACKET_ADD_MEMBERSHIP, &mr, sizeof(mr)) == -1)
{
snprintf(ebuf, PCAP_ERRBUF_SIZE,
@@ -1425,10 +1772,11 @@
/* Any pending errors, e.g., network is down? */
- if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &errlen) == -1) {
- snprintf(ebuf, PCAP_ERRBUF_SIZE,
- "getsockopt: %s", pcap_strerror(errno));
- return -2;
+ if ((getsockopt(fd, PF_RING, SO_ERROR, &err, &errlen) == -1)
+ && (getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &errlen) == -1)) {
+ snprintf(ebuf, PCAP_ERRBUF_SIZE,
+ "getsockopt: %s", pcap_strerror(errno));
+ return -2;
}
if (err > 0) {
@@ -1482,6 +1830,13 @@
struct pcap *p, *prevp;
struct ifreq ifr;
+#ifdef RING
+ if(handle->ring_buffer != NULL) {
+ munmap(handle->ring_buffer, handle->slots_info->tot_mem);
+ handle->ring_buffer = NULL;
+ }
+#endif
+
if (handle->md.clear_promisc) {
/*
* We put the interface into promiscuous mode; take
@@ -1698,11 +2053,11 @@
}
/* Any pending errors, e.g., network is down? */
-
- if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &errlen) == -1) {
- snprintf(ebuf, PCAP_ERRBUF_SIZE,
- "getsockopt: %s", pcap_strerror(errno));
- return -1;
+ if((getsockopt(fd, PF_RING, SO_ERROR, &err, &errlen) == -1)
+ && (getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &errlen) == -1)) {
+ snprintf(ebuf, PCAP_ERRBUF_SIZE,
+ "getsockopt: %s", pcap_strerror(errno));
+ return -1;
}
if (err > 0) {
@@ -1924,8 +2279,11 @@
* the filtering done in userland even if it could have been
* done in the kernel.
*/
- if (setsockopt(handle->fd, SOL_SOCKET, SO_ATTACH_FILTER,
- &total_fcode, sizeof(total_fcode)) == 0) {
+ printf("pcap[setsockopt(%d)]\n", 0);
+ if (setsockopt(handle->fd, 0 /* SOL_SOCKET */,
+ SO_ATTACH_FILTER,
+ &total_fcode,
+ sizeof(total_fcode)) == 0) {
char drain[1];
/*
@@ -1933,6 +2291,9 @@
*/
total_filter_on = 1;
+#ifdef RING
+ if(!handle->ring_fd) {
+#endif
/*
* Save the socket's current mode, and put it in
* non-blocking mode; we drain it by reading packets
@@ -1955,12 +2316,15 @@
return -2;
}
}
- }
+#ifdef RING
+ }
+#endif
+}
/*
* Now attach the new filter.
*/
- ret = setsockopt(handle->fd, SOL_SOCKET, SO_ATTACH_FILTER,
+ ret = setsockopt(handle->fd, 0 /* SOL_SOCKET */, SO_ATTACH_FILTER,
fcode, sizeof(*fcode));
if (ret == -1 && total_filter_on) {
/*
@@ -1993,7 +2357,8 @@
/* setsockopt() barfs unless it get a dummy parameter */
int dummy;
- return setsockopt(handle->fd, SOL_SOCKET, SO_DETACH_FILTER,
- &dummy, sizeof(dummy));
+ return setsockopt(handle->fd, handle->ring_fd > 0 ? PF_RING : SOL_SOCKET,
+ SO_DETACH_FILTER,
+ &dummy, sizeof(dummy));
}
#endif

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff