Project

General

Profile

Bug #5744 » netmap-patch.v4.diff

Bill Meeks, 01/24/2023 12:33 AM

View differences:

src/runmode-netmap.c
void RunModeIdsNetmapRegister(void)
{
#if HAVE_NETMAP
SCLogInfo("Using netmap version %d ["
#ifdef HAVE_NETMAP
#if USE_NEW_NETMAP_API
"new"
SCLogInfo("Using netmap API version %d with multiple host rings support", NETMAP_API);
#else
"legacy"
#endif
" API interfaces]",
NETMAP_API);
SCLogInfo("Using netmap API version %d with single host ring support", NETMAP_API);
#endif /* USE_NEW_NETMAP_API */
#endif /* HAVE_NETMAP */
RunModeRegisterNewRunMode(RUNMODE_NETMAP, "single",
"Single threaded netmap mode",
RunModeIdsNetmapSingle);
......
"thread.",
RunModeIdsNetmapAutoFp);
return;
#endif
}
#ifdef HAVE_NETMAP
......
}
}
#if USE_NEW_NETMAP_API
/* we will need the base interface name for later */
char base_name[IFNAMSIZ];
strlcpy(base_name, ns->iface, sizeof(base_name));
if (strlen(base_name) > 0 &&
(base_name[strlen(base_name) - 1] == '^' || base_name[strlen(base_name) - 1] == '*')) {
(base_name[strlen(base_name) - 1] == '^' || base_name[strlen(base_name) - 1] == '*')) {
base_name[strlen(base_name) - 1] = '\0';
}
#else
char *base_name = ns->iface;
#endif /* USE_NEW_NETMAP_API */
/* prefixed with netmap or vale means it's not a real interface
* and we don't check offloading. */
......
/* just one thread per interface supported */
ns->threads = 1;
} else {
#endif
#endif /* !USE_NEW_NETMAP_API */
if (ns->threads_auto) {
/* As NetmapGetRSSCount used to be broken on Linux,
* fall back to GetIfaceRSSQueuesNum if needed. */
......
}
#if !USE_NEW_NETMAP_API
}
#endif
#endif /* !USE_NEW_NETMAP_API */
if (ns->threads <= 0) {
ns->threads = 1;
}
......
} else if (aconf->in.sw_ring && aconf->out.threads_auto) {
aconf->out.threads = aconf->in.threads = 1;
}
#endif
#endif /* !USE_NEW_NETMAP_API */
}
}
......
LiveRegisterDevice(live_buf);
}
#endif
/* netmap needs all offloading to be disabled */
#endif /* USE_NEW_NETMAP_API */
/* netmap needs all offloading to be disabled on RX side */
if (aconf->in.real) {
char base_name[sizeof(aconf->in.iface)];
strlcpy(base_name, aconf->in.iface, sizeof(base_name));
......
#if USE_NEW_NETMAP_API
LiveDeviceHasNoStats();
#endif
#endif /* USE_NEW_NETMAP_API */
return aconf;
}
......
return has_ips;
}
#endif // #ifdef HAVE_NETMAP
#endif /* HAVE_NETMAP */
int RunModeIdsNetmapAutoFp(void)
{
src/source-netmap.c
#define NETMAP_WITH_LIBS
#ifdef DEBUG
#define DEBUG_NETMAP_USER
#endif
#endif /* DEBUG */
#include <net/netmap_user.h>
#if USE_NEW_NETMAP_API
#include <libnetmap.h>
#endif
#endif /* USE_NEW_NETMAP_API */
#endif /* HAVE_NETMAP */
......
int flags;
struct bpf_program bpf_prog;
struct nm_pkthdr pkt_hdr;
/* suricata internals */
TmSlot *slot;
......
*/
int NetmapGetRSSCount(const char *ifname)
{
#if USE_NEW_NETMAP_API
struct nmreq_port_info_get req;
struct nmreq_header hdr;
#else
struct nmreq nm_req;
#endif
int rx_rings = 0;
/* we need the base interface name to query queues */
#if USE_NEW_NETMAP_API
/* we need the base interface name to query queues count */
char base_name[IFNAMSIZ];
strlcpy(base_name, ifname, sizeof(base_name));
if (strlen(base_name) > 0 &&
(base_name[strlen(base_name) - 1] == '^' || base_name[strlen(base_name) - 1] == '*')) {
base_name[strlen(base_name) - 1] = '\0';
}
#endif
SCMutexLock(&netmap_devlist_lock);
/* open netmap device */
......
}
/* query netmap interface info */
#if USE_NEW_NETMAP_API
memset(&req, 0, sizeof(req));
memset(&hdr, 0, sizeof(hdr));
hdr.nr_version = NETMAP_API;
hdr.nr_reqtype = NETMAP_REQ_PORT_INFO_GET;
hdr.nr_body = (uintptr_t)&req;
strlcpy(hdr.nr_name, base_name, sizeof(hdr.nr_name));
#else
memset(&nm_req, 0, sizeof(nm_req));
strlcpy(nm_req.nr_name, ifname, sizeof(nm_req.nr_name));
nm_req.nr_version = NETMAP_API;
#endif
#if USE_NEW_NETMAP_API
if (ioctl(fd, NIOCCTRL, &hdr) != 0) {
#else
if (ioctl(fd, NIOCGINFO, &nm_req) != 0) {
#endif
SCLogError(SC_ERR_NETMAP_CREATE, "Couldn't query netmap for info about %s, error %s",
ifname, strerror(errno));
goto error_fd;
};
#if USE_NEW_NETMAP_API
/* return RX rings count if it equals TX rings count */
if (req.nr_rx_rings == req.nr_tx_rings) {
if (req.nr_rx_rings == req.nr_tx_rings)
rx_rings = req.nr_rx_rings;
}
#else
rx_rings = nm_req.nr_rx_rings;
#endif
error_fd:
close(fd);
......
/**
* \brief Open interface in netmap mode.
* \param ifname Interface name.
* \param promisc Enable promiscuous mode.
* \param ns Pointer to Netmap Interface conf settings.
* \param dev Pointer to requested netmap device instance.
* \param verbose Verbose error logging.
* \param read Indicates direction: RX or TX
* \param zerocopy 1 if zerocopy access requested
* \param ntv Pointer to NetmapThreadVars structure.
* \param read Indicates direction: 1 = RX or 0 = TX
* \param soft Use Host stack (software) interface
* \return Zero on success.
*/
static int NetmapOpen(NetmapIfaceSettings *ns, NetmapDevice **pdevice, int verbose, int read,
bool zerocopy, bool soft)
static int NetmapOpen(NetmapIfaceSettings *ns, NetmapDevice **pdevice, NetmapThreadVars *ntv, int read, bool soft)
{
SCEnter();
SCLogDebug("ifname %s", ns->iface);
......
/* check interface is up */
int if_flags = GetIfaceFlags(base_name);
if (if_flags == -1) {
if (verbose) {
SCLogError(SC_ERR_NETMAP_CREATE, "Cannot access network interface '%s' (%s)",
base_name, ns->iface);
}
SCLogError(SC_ERR_NETMAP_CREATE, "Cannot access network interface '%s' (%s) to query for option flags",
base_name, ns->iface);
goto error;
}
......
* When using multiple rings/threads, then the open of the initial Ring 0 MUST
* instruct netmap to open multiple Host Stack rings (as the default is to open only a single
* pair). This is also critical for the HW NIC endpoint. This is done by adding
* “@conf:host-rings=x” suffix option (where “x” is the number of host rings desired)
* "@conf:host-rings=x"? suffix option (where 'x'? is the number of host rings desired)
* to BOTH endpoint nmport_open_desc() calls for ring 0 (hardware and host stack).
* For subsequent additional ring open calls, omit the suffix option specifying host ring count.
*
......
ns->iface, ring, strlen(optstr) ? "/" : "", optstr);
} else if (strlen(ns->iface) > 5 && strncmp(ns->iface, "vale", 4) == 0 && isdigit(ns->iface[4])) {
snprintf(devname, sizeof(devname), "%s", ns->iface);
#if NETMAP_API < 14 || !USET_NET_NETMAP_API
#if NETMAP_API < 14 || !USE_NEW_NETMAP_API
} else if (ns->iface[strlen(ns->iface)-1] == '*' ||
ns->iface[strlen(ns->iface)-1] == '^') {
SCLogDebug("device with SW-ring enabled (ns->iface): %s",ns->iface);
......
strlcpy(pdev->ifname, ns->iface, sizeof(pdev->ifname));
#if USE_NEW_NETMAP_API
/* have the netmap API parse device name and prepare the port descriptor for us */
pdev->nmd = nmport_prepare(devname);
if (pdev->nmd != NULL) {
/* For RX devices, set the nr_mode flag we need on the netmap port TX rings prior to opening
*/
if (read) {
pdev->nmd->reg.nr_flags |= NR_NO_TX_POLL;
}
/* Now attempt to actually open the netmap port descriptor */
if (nmport_open_desc(pdev->nmd) < 0) {
/* the open failed, so clean-up the descriptor and fall through to error handler */
nmport_close(pdev->nmd);
pdev->nmd = NULL;
}
}
pdev->nmd = nmport_open(devname);
#else
pdev->nmd = nm_open(devname, NULL, 0, NULL);
#endif
......
FatalError(SC_ERR_FATAL, "opening devname %s failed: %s", devname, strerror(errno));
}
#if USE_NEW_NETMAP_API
/* Work around bug in libnetmap library where "cur_{r,t}x_ring" values not initialized */
SCLogDebug("%s -- cur rings: [%d, %d] first rings: [%d, %d]", devname, pdev->nmd->cur_rx_ring,
pdev->nmd->cur_tx_ring, pdev->nmd->first_rx_ring, pdev->nmd->first_tx_ring);
pdev->nmd->cur_rx_ring = pdev->nmd->first_rx_ring;
pdev->nmd->cur_tx_ring = pdev->nmd->first_tx_ring;
#endif
SCLogInfo("devname [fd: %d] %s %s opened", pdev->nmd->fd, devname, ns->iface);
......
ntv->flags |= NETMAP_FLAG_EXCL_RING_ACCESS;
}
/* Need to insure open of ring 0 conveys requested ring count for open */
/* set flag if either side of interface pair is host stack */
bool soft = aconf->in.sw_ring || aconf->out.sw_ring;
if (NetmapOpen(&aconf->in, &ntv->ifsrc, 1, 1, (ntv->flags & NETMAP_FLAG_ZERO_COPY) != 0,
soft) != 0) {
if (NetmapOpen(&aconf->in, &ntv->ifsrc, ntv, 1, soft) != 0) {
goto error_ntv;
}
#if !USE_NEW_NETMAP_API
if (unlikely(aconf->in.sw_ring && aconf->in.threads > 1)) {
SCLogError(SC_ERR_INVALID_VALUE,
"Interface '%s^'. "
"Source Interface '%s^'. "
"Thread count can't be greater than 1 for SW ring.",
aconf->iface_name);
goto error_src;
......
#endif
if (aconf->in.copy_mode != NETMAP_COPY_MODE_NONE) {
if (NetmapOpen(&aconf->out, &ntv->ifdst, 1, 0, (ntv->flags & NETMAP_FLAG_ZERO_COPY) != 0,
soft) != 0) {
goto error_src;
if (NetmapOpen(&aconf->out, &ntv->ifdst, ntv, 0, soft) != 0) {
goto error_dst;
}
}
......
if (aconf->in.bpf_filter) {
SCLogConfig("Using BPF '%s' on iface '%s'",
aconf->in.bpf_filter, ntv->ifsrc->ifname);
aconf->in.bpf_filter, ntv->ifsrc->ifname);
char errbuf[PCAP_ERRBUF_SIZE];
if (SCBPFCompile(default_packet_size, /* snaplen_arg */
LINKTYPE_ETHERNET, /* linktype_arg */
&ntv->bpf_prog, /* program */
aconf->in.bpf_filter, /* const char *buf */
1, /* optimize */
PCAP_NETMASK_UNKNOWN, /* mask */
LINKTYPE_ETHERNET, /* linktype_arg */
&ntv->bpf_prog, /* program */
aconf->in.bpf_filter, /* const char *buf */
1, /* optimize */
PCAP_NETMASK_UNKNOWN, /* mask */
errbuf,
sizeof(errbuf)) == -1)
{
SCLogError(SC_ERR_NETMAP_CREATE, "Failed to compile BPF \"%s\": %s",
aconf->in.bpf_filter,
errbuf);
aconf->in.bpf_filter,
errbuf);
goto error_dst;
}
}
SCLogNotice("thread: %s polling on fd: %d", tv->name, ntv->ifsrc->nmd->fd);
int rx_rings = ntv->ifsrc->nmd->last_rx_ring - ntv->ifsrc->nmd->first_rx_ring + 1;
SCLogNotice("thread: %s polling on fd: %d using %d RX ring%s", tv->name, ntv->ifsrc->nmd->fd,
rx_rings, rx_rings == 1 ? "" : "s");
*data = (void *)ntv;
aconf->DerefFunc(aconf);
......
}
DEBUG_VALIDATE_BUG_ON(ntv->ifdst == NULL);
/* Lock the destination netmap ring while writing to it */
/* Lock destination netmap TX ring while writing when autofp runmode */
if (ntv->flags & NETMAP_FLAG_EXCL_RING_ACCESS) {
SCMutexLock(&ntv->ifdst->netmap_dev_lock);
}
/* attempt to write the packet into the netmap ring buffer(s) */
/* write the Packet's data into available ring slot(s) */
#if USE_NEW_NETMAP_API
if (nmport_inject(ntv->ifdst->nmd, GET_PKT_DATA(p), GET_PKT_LEN(p)) == 0) {
if (ntv->flags & NETMAP_FLAG_EXCL_RING_ACCESS) {
SCMutexUnlock(&ntv->ifdst->netmap_dev_lock);
}
#else
if (nm_inject(ntv->ifdst->nmd, GET_PKT_DATA(p), GET_PKT_LEN(p)) == 0) {
#endif
/* an error occurred sending to netmap destination port,
* so release our mutex if used and log a dropped packet.
*/
if (ntv->flags & NETMAP_FLAG_EXCL_RING_ACCESS) {
SCMutexUnlock(&ntv->ifdst->netmap_dev_lock);
}
SCLogDebug("failed to send %s -> %s", ntv->ifsrc->ifname, ntv->ifdst->ifname);
ntv->drops++;
return TM_ECODE_FAILED;
}
SCLogDebug("sent successfully: %s(%d)->%s(%d) (%u)", ntv->ifsrc->ifname, ntv->ifsrc->ring,
ntv->ifdst->ifname, ntv->ifdst->ring, GET_PKT_LEN(p));
/* Sync TX rings with kernel using NIOTXSYNC */
if (ioctl(ntv->ifdst->nmd->fd, NIOCTXSYNC, 0) == -1) {
SCLogWarning(SC_ERR_SYSCALL, "An error occurred syncing TX ring #%d on %s.",
ntv->ifdst->ring, ntv->ifdst->ifname);
} else {
SCLogDebug("sent successfully: %s(%d)->%s(%d) (%u)", ntv->ifsrc->ifname, ntv->ifsrc->ring,
ntv->ifdst->ifname, ntv->ifdst->ring, GET_PKT_LEN(p));
}
ioctl(ntv->ifdst->nmd->fd, NIOCTXSYNC, 0);
/* release netmap write mutex if runmode = autofp */
if (ntv->flags & NETMAP_FLAG_EXCL_RING_ACCESS) {
SCMutexUnlock(&ntv->ifdst->netmap_dev_lock);
}
......
PacketFreeOrRelease(p);
}
static void NetmapProcessPacket(NetmapThreadVars *ntv, const struct nm_pkthdr *ph)
static void NetmapCallback(u_char *user, const struct nm_pkthdr *ph, const u_char *d)
{
NetmapThreadVars *ntv = (NetmapThreadVars *)user;
if (ntv->bpf_prog.bf_len) {
struct pcap_pkthdr pkthdr = { {0, 0}, ph->len, ph->len };
if (pcap_offline_filter(&ntv->bpf_prog, &pkthdr, ph->buf) == 0) {
if (pcap_offline_filter(&ntv->bpf_prog, &pkthdr, d) == 0) {
return;
}
}
......
ntv->bytes += ph->len;
if (ntv->flags & NETMAP_FLAG_ZERO_COPY) {
if (PacketSetData(p, (uint8_t *)ph->buf, ph->len) == -1) {
if (PacketSetData(p, (uint8_t *)d, ph->len) == -1) {
TmqhOutputPacketpool(ntv->tv, p);
return;
}
} else {
if (PacketCopyData(p, (uint8_t *)ph->buf, ph->len) == -1) {
if (PacketCopyData(p, (uint8_t *)d, ph->len) == -1) {
TmqhOutputPacketpool(ntv->tv, p);
return;
}
......
/**
* \brief Copy netmap rings data into Packet structures.
* \param *d nmport_d (or nm_desc) netmap if structure.
* \param *d nmport_d netmap port structure.
* \param cnt int count of packets to read (-1 = all).
* \param *ntv NetmapThreadVars.
*/
#if USE_NEW_NETMAP_API
static TmEcode NetmapReadPackets(struct nmport_d *d, int cnt, NetmapThreadVars *ntv)
#else
static TmEcode NetmapReadPackets(struct nm_desc *d, int cnt, NetmapThreadVars *ntv)
#endif
static int NetmapDispatchPackets(struct nmport_d *d, int cnt, NetmapThreadVars *ntv)
{
struct nm_pkthdr hdr;
int last_ring = d->last_rx_ring - d->first_rx_ring + 1;
int cur_ring, got = 0, cur_rx_ring = d->cur_rx_ring;
int c, got = 0, ri = d->cur_rx_ring;
memset(&hdr, 0, sizeof(hdr));
hdr.flags = NM_MORE_PKTS;
/* compute number of rings opened for this netmap port */
int n = d->last_rx_ring - d->first_rx_ring + 1;
ntv->pkt_hdr.buf = NULL;
ntv->pkt_hdr.flags = NM_MORE_PKTS;
if (cnt == 0)
cnt = -1;
for (cur_ring = 0; cur_ring < last_ring && cnt != got; cur_ring++, cur_rx_ring++) {
/* Loop through the rings in the netmap port */
for (c = 0; c < n && cnt != got; c++, ri++) {
struct netmap_ring *ring;
if (cur_rx_ring > d->last_rx_ring)
cur_rx_ring = d->first_rx_ring;
if (ri > d->last_rx_ring)
ri = d->first_rx_ring;
ring = NETMAP_RXRING(d->nifp, cur_rx_ring);
ring = NETMAP_RXRING(d->nifp, ri);
/* cycle through the non-empty ring slots to fetch their data */
for (; !nm_ring_empty(ring) && cnt != got; got++) {
......
u_char *oldbuf;
struct netmap_slot *slot;
if (hdr.buf) { /* from previous round */
NetmapProcessPacket(ntv, &hdr);
if (ntv->pkt_hdr.buf) { /* data from previous pass */
NetmapCallback((u_char *)ntv, &ntv->pkt_hdr, ntv->pkt_hdr.buf);
}
i = ring->cur;
slot = &ring->slot[i];
idx = slot->buf_idx;
d->cur_rx_ring = cur_rx_ring;
hdr.slot = slot;
oldbuf = hdr.buf = (u_char *)NETMAP_BUF(ring, idx);
hdr.len = hdr.caplen = slot->len;
d->cur_rx_ring = ri;
ntv->pkt_hdr.slot = slot;
oldbuf = ntv->pkt_hdr.buf = (u_char *)NETMAP_BUF(ring, idx);
ntv->pkt_hdr.len = ntv->pkt_hdr.caplen = slot->len;
/* loop through the ring slots to get packet data */
while (slot->flags & NS_MOREFRAG) {
/* packet can be fragmented across multiple slots, */
/* so loop until we find the slot with the flag */
/* cleared, signalling the end of the packet data. */
/* packet may be fragmented across multiple slots,
* so loop until we find a slot with the flag
* cleared, signalling the end of packet's data.
*/
u_char *nbuf;
u_int oldlen = slot->len;
i = nm_ring_next(ring, i);
i = nm_ring_next(ring, i); /* advance to next slot */
slot = &ring->slot[i];
hdr.len += slot->len;
ntv->pkt_hdr.len += slot->len;
nbuf = (u_char *)NETMAP_BUF(ring, slot->buf_idx);
if (oldbuf != NULL && nbuf - oldbuf == ring->nr_buf_size &&
oldlen == ring->nr_buf_size) {
hdr.caplen += slot->len;
ntv->pkt_hdr.caplen += slot->len;
oldbuf = nbuf;
} else {
oldbuf = NULL;
}
}
hdr.ts = ring->ts;
ntv->pkt_hdr.ts = ring->ts;
/* update cur & head so kernel knows we read the data on ring */
ring->head = ring->cur = nm_ring_next(ring, i);
}
}
if (hdr.buf) { /* from previous round */
hdr.flags = 0;
NetmapProcessPacket(ntv, &hdr);
/* finished checking all the rings, copy any remaining data */
if (ntv->pkt_hdr.buf) {
ntv->pkt_hdr.flags = 0;
NetmapCallback((u_char *)ntv, &ntv->pkt_hdr, ntv->pkt_hdr.buf);
}
/* return the count of Packet structs we filled */
return got;
}
#endif
/**
* \brief Main netmap reading loop function
......
}
/* make sure we have at least one packet in the packet pool,
* to prevent us from alloc'ing packets at line rate */
* to prevent us from alloc'ing packets at line rate
*/
PacketPoolWait();
int r = poll(&fds, 1, POLL_TIMEOUT);
......
continue;
} else if (r == 0) {
/* no events, timeout */
// SCLogDebug("(%s:%d-%d) Poll timeout", ntv->ifsrc->ifname,
// ntv->src_ring_from, ntv->src_ring_to);
/* sync counters */
NetmapDumpCounters(ntv);
StatsSyncCountersIfSignalled(tv);
......
SCLogError(SC_ERR_NETMAP_READ,
"Error reading netmap data via polling from iface '%s': (%d" PRIu32 ") %s",
ntv->ifsrc->ifname, errno, strerror(errno));
// SCLogError(SC_ERR_NETMAP_READ,
// "Error reading data from iface '%s': (%d" PRIu32 ") %s",
// ntv->ifsrc->ifname, errno, strerror(errno));
} else if (fds.revents & POLLNVAL) {
SCLogError(SC_ERR_NETMAP_READ, "Invalid polling request");
}
......
}
if (likely(fds.revents & POLLIN)) {
/* have data on RX ring, so copy to Packet for processing */
NetmapReadPackets(ntv->ifsrc->nmd, -1, ntv);
#if USE_NEW_NETMAP_API
/* have RX rings data, so copy into Packet structs for analysis */
NetmapDispatchPackets(ntv->ifsrc->nmd, -1, ntv);
#else
nm_dispatch(ntv->ifsrc->nmd, -1, NetmapCallback, (void *)ntv);
#endif
}
NetmapDumpCounters(ntv);
(5-5/7)