Project

General

Profile

Bug #8667 » reproduce.sh

repro script - Shane Dugan, 06/17/2026 04:15 PM

 
#!/usr/bin/env bash
# Reproduction script for AF-PACKET IPS startup race (ENOTSOCK on socket 0)
#
# Requirements:
# - Linux (kernel >= 4.x)
# - Suricata 8.0.x built from source, binary at $SURICATA or on $PATH
# - Root / sudo
# - iproute2 (ip command)
# - tcpreplay (for traffic generation)
# - Optionally: WIDEN_WINDOW=1 env var to enable the deterministic widener
# (requires Suricata built with the widener patch - see widener.patch)
#
# Usage:
# sudo ./reproduce.sh [/path/to/suricata]
#
# The script will:
# 1. Create 6 veth pairs (SFE_0_TX/SFE_0_RX ... SFE_5_TX/SFE_5_RX)
# 2. Start continuous traffic on all pairs
# 3. Perform 10 cold restart cycles (SIGTERM + relaunch)
# 4. Check each restart for the ENOTSOCK signature
# 5. Report pass/fail
#
# Expected result WITHOUT fix: ENOTSOCK lines appear, RX counters stay at 0
# Expected result WITH fix: No ENOTSOCK lines, RX counters grow

set -euo pipefail

SURICATA="${1:-$(command -v suricata 2>/dev/null || echo '')}"
if [[ -z "$SURICATA" || ! -x "$SURICATA" ]]; then
echo "ERROR: suricata binary not found. Pass path as argument or ensure it's on \$PATH."
echo "Usage: sudo $0 /path/to/suricata"
exit 1
fi

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
LOG_DIR="/tmp/suri-race-repro"
NUM_PAIRS=6
RESTART_CYCLES=10
WIDEN_WINDOW="${WIDEN_WINDOW:-0}"

# Cleanup on exit
cleanup() {
echo "[*] Cleaning up..."
pkill -f "suri-race-repro" 2>/dev/null || true
pkill -f "tcpreplay.*veth" 2>/dev/null || true
kill "$(cat "$LOG_DIR/suricata.pid" 2>/dev/null)" 2>/dev/null || true
for i in $(seq 0 $((NUM_PAIRS - 1))); do
ip link del "SFE_${i}_TX" 2>/dev/null || true
# SFE_N_RX is deleted automatically as the veth peer
done
echo "[*] Done. Logs in $LOG_DIR/"
}
trap cleanup EXIT INT TERM

mkdir -p "$LOG_DIR"

echo "========================================"
echo " AF-PACKET IPS Startup Race Reproducer"
echo "========================================"
echo " Suricata: $SURICATA"
echo " Log dir: $LOG_DIR"
echo " Pairs: $NUM_PAIRS"
echo " Cycles: $RESTART_CYCLES"
echo " Widener: $WIDEN_WINDOW (set WIDEN_WINDOW=1 to enable)"
echo "========================================"
echo ""

# ── 1. Create veth pairs ─────────────────────────────────────────────────────
echo "[*] Creating $NUM_PAIRS veth pairs..."
for i in $(seq 0 $((NUM_PAIRS - 1))); do
ip link add "SFE_${i}_TX" type veth peer name "SFE_${i}_RX" 2>/dev/null || true
ip link set "SFE_${i}_TX" up
ip link set "SFE_${i}_RX" up
# Set promisc so AF_PACKET sees all frames
ip link set "SFE_${i}_TX" promisc on
ip link set "SFE_${i}_RX" promisc on
done
echo "[*] Interfaces up:"
for i in $(seq 0 $((NUM_PAIRS - 1))); do
echo " SFE_${i}_TX <-> SFE_${i}_RX"
done
echo ""

# ── 2. Generate a tiny pcap for tcpreplay ────────────────────────────────────
PCAP="$LOG_DIR/traffic.pcap"
python3 - "$PCAP" <<'PYEOF'
import struct, sys, socket

def pcap_hdr():
return struct.pack('<IHHiIII', 0xA1B2C3D4, 2, 4, 0, 0, 65535, 1)

def eth_ip_udp(src_ip, dst_ip, sport, dport, payload=b'hello'):
src_mac = b'\x02\x00\x00\x00\x00\x01'
dst_mac = b'\x02\x00\x00\x00\x00\x02'
eth = dst_mac + src_mac + b'\x08\x00'
udp_len = 8 + len(payload)
udp = struct.pack('!HHHH', sport, dport, udp_len, 0) + payload
ip_len = 20 + len(udp)
ip = struct.pack('!BBHHHBBH4s4s',
0x45, 0, ip_len, 0x1234, 0x4000, 64, 17, 0,
socket.inet_aton(src_ip), socket.inet_aton(dst_ip))
# simple checksum
def cksum(b):
if len(b) % 2: b += b'\x00'
s = sum(struct.unpack('!%dH' % (len(b)//2), b))
s = (s >> 16) + (s & 0xffff)
s += (s >> 16)
return ~s & 0xffff
ip = ip[:10] + struct.pack('!H', cksum(ip)) + ip[12:]
return eth + ip + udp

out = sys.argv[1]
with open(out, 'wb') as f:
f.write(pcap_hdr())
for i in range(100):
pkt = eth_ip_udp('10.0.0.1', '10.0.0.2', 1024+i, 80)
ts = 1700000000.0 + i * 0.001
sec = int(ts); usec = int((ts - sec) * 1e6)
f.write(struct.pack('<IIII', sec, usec, len(pkt), len(pkt)) + pkt)
print(f"Generated {out} (100 UDP packets)")
PYEOF

echo "[*] Generated traffic pcap: $PCAP"
echo ""

# ── 3. Start continuous traffic on all pairs ─────────────────────────────────
echo "[*] Starting tcpreplay loops on all TX interfaces..."
for i in $(seq 0 $((NUM_PAIRS - 1))); do
tcpreplay --loop=0 --mbps=1 --intf1="SFE_${i}_TX" "$PCAP" \
>/dev/null 2>&1 &
done
echo "[*] Traffic running."
echo ""

# ── 4. Write suricata.yaml ───────────────────────────────────────────────────
YAML="$LOG_DIR/suricata.yaml"
cat > "$YAML" <<YAML_EOF
%YAML 1.1
---
vars:
address-groups:
HOME_NET: "[10.0.0.0/8]"
EXTERNAL_NET: "!\$HOME_NET"

default-log-dir: $LOG_DIR/
default-packet-size: 1514

logging:
default-log-level: info
outputs:
- console:
enabled: false
- file:
enabled: true
filename: suricata.log
level: info

af-packet:
YAML_EOF

for i in $(seq 0 $((NUM_PAIRS - 1))); do
cat >> "$YAML" <<YAML_EOF
- interface: SFE_${i}_TX
cluster-id: $((10 + i * 2))
cluster-type: cluster_flow
copy-iface: SFE_${i}_RX
copy-mode: ips
threads: 2
use-mmap: true
checksum-checks: false
defrag: false
- interface: SFE_${i}_RX
cluster-id: $((11 + i * 2))
cluster-type: cluster_flow
copy-iface: SFE_${i}_TX
copy-mode: ips
threads: 2
disable-read: 1
use-mmap: true
checksum-checks: false
defrag: false
YAML_EOF
done

cat >> "$YAML" <<YAML_EOF

outputs:
- eve-log:
enabled: false
- stats:
enabled: false

stream:
checksum-validation: false
memcap: 64mb
reassembly:
memcap: 128mb

detect:
profile: medium

runmode: workers

threading:
detect-thread-ratio: 1.0
# Intentionally NOT pinning all workers to one core —
# pinning serializes setup and hides the race
set-cpu-affinity: no

rule-files: []
unix-command:
enabled: false
pid-file: $LOG_DIR/suricata.pid
YAML_EOF

echo "[*] Wrote $YAML"
echo ""

# ── 5. Restart loop ──────────────────────────────────────────────────────────
ENOTSOCK_TOTAL=0
FAILED_CYCLES=0

start_suricata() {
local logfile="$LOG_DIR/run_${1}.log"
if [[ "$WIDEN_WINDOW" == "1" ]]; then
SURICATA_RING_SETUP_DELAY_US=500000 \
"$SURICATA" -c "$YAML" -S /dev/null \
-l "$LOG_DIR" --runmode workers -k none \
--simulate-ips --af-packet \
> "$logfile" 2>&1 &
else
"$SURICATA" -c "$YAML" -S /dev/null \
-l "$LOG_DIR" --runmode workers -k none \
--simulate-ips --af-packet \
> "$logfile" 2>&1 &
fi
echo $!
}

echo "[*] Starting $RESTART_CYCLES cold-restart cycles..."
echo ""

for cycle in $(seq 1 $RESTART_CYCLES); do
echo "── Cycle $cycle/$RESTART_CYCLES ──"

SURI_PID=$(start_suricata "$cycle")
echo " Started PID $SURI_PID"

# Wait for engine to start (up to 30s)
started=0
for _ in $(seq 1 30); do
if grep -q "Engine started" "$LOG_DIR/run_${cycle}.log" 2>/dev/null; then
started=1
break
fi
sleep 1
done

if [[ $started -eq 0 ]]; then
echo " WARNING: Engine did not report 'Engine started' within 30s"
fi

# Let it run for 3 seconds under traffic
sleep 3

# Check for ENOTSOCK signature BEFORE killing
ENOTSOCK_COUNT=$(grep -c "sending packet failed on socket 0: Socket operation on non-socket" \
"$LOG_DIR/run_${cycle}.log" 2>/dev/null || true)

if [[ $ENOTSOCK_COUNT -gt 0 ]]; then
echo " *** RACE DETECTED: $ENOTSOCK_COUNT ENOTSOCK line(s) ***"
grep "sending packet failed on socket 0" "$LOG_DIR/run_${cycle}.log" | head -5 | sed 's/^/ /'
ENOTSOCK_TOTAL=$((ENOTSOCK_TOTAL + ENOTSOCK_COUNT))
FAILED_CYCLES=$((FAILED_CYCLES + 1))
else
echo " No ENOTSOCK lines — clean start"
fi

# Show RX/TX asymmetry if stats are present
if grep -q "packets:" "$LOG_DIR/run_${cycle}.log" 2>/dev/null; then
echo " Interface stats:"
grep -E "SFE_[0-9]+_(TX|RX): packets:" "$LOG_DIR/run_${cycle}.log" \
| tail -$((NUM_PAIRS * 2)) | sed 's/^/ /'
fi

# Cold restart: SIGTERM and wait
kill "$SURI_PID" 2>/dev/null || true
wait "$SURI_PID" 2>/dev/null || true
echo " Stopped."
echo ""

# Small gap between restarts
sleep 1
done

# ── 6. Report ────────────────────────────────────────────────────────────────
echo "========================================"
echo " Results"
echo "========================================"
echo " Restart cycles: $RESTART_CYCLES"
echo " Cycles with ENOTSOCK: $FAILED_CYCLES"
echo " Total ENOTSOCK lines: $ENOTSOCK_TOTAL"
echo ""

if [[ $FAILED_CYCLES -gt 0 ]]; then
echo " RESULT: BUG REPRODUCED"
echo " The startup race fired on $FAILED_CYCLES/$RESTART_CYCLES restarts."
echo " Fix: add 'if (SC_ATOMIC_GET(p->afp_v.peer->state) != AFP_STATE_UP) return;'"
echo " at the top of AFPWritePacket() in src/source-af-packet.c"
exit 1
else
echo " RESULT: No ENOTSOCK lines detected in $RESTART_CYCLES cycles."
echo " This could mean:"
echo " a) The fix is applied and working, OR"
echo " b) The race did not fire (timing-dependent without widener)"
echo " For deterministic reproduction, build with widener.patch and set WIDEN_WINDOW=1"
exit 0
fi
(3-3/4)