jenkins-recon/subfinder.groovy
2026-05-10 05:35:24 +02:00

313 lines
No EOL
10 KiB
Groovy
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// ═══════════════════════════════════════════════════════════════════════════
// recon-subfinder
// Subdomain enumeration + dnsx pre-resolve + diff.
// Writes all-resolved-latest.txt consumed by recon-httpx.
// Recommended schedule: once or twice daily (e.g. 01:00 + 13:00)
// ═══════════════════════════════════════════════════════════════════════════
pipeline {
agent any
options {
timestamps()
disableConcurrentBuilds()
timeout(time: 2, unit: 'HOURS')
}
triggers {
cron('30 23 * * *')
}
parameters {
text(
name: 'DOMAINS',
defaultValue: '''\
gellert-innovation.com
privsec.ch
twilio.com
hubspot.com
deere.com
ford.com''',
description: 'One root domain per line. Only domains you are authorised to test.'
)
string(
name: 'RESOLVERS',
defaultValue: '1.1.1.1,1.0.0.1,8.8.8.8,8.8.4.4,9.9.9.9,149.112.112.112',
description: 'Comma-separated DNS resolvers'
)
string(
name: 'DNSX_THREADS',
defaultValue: '50',
description: 'dnsx thread count (safe for VPS: 50)'
)
string(
name: 'DNSX_RATE_LIMIT',
defaultValue: '500',
description: 'dnsx max queries/sec (safe for VPS: 500)'
)
}
environment {
STATE_BASE = '/var/jenkins_home/recon-state'
RUN_DIR = 'current-run'
}
stages {
// ── 1. Prepare ────────────────────────────────────────────────────
stage('Prepare workspace') {
steps {
sh '''#!/usr/bin/env bash
set -euo pipefail
echo "[*] Preparing clean workspace..."
rm -rf "$RUN_DIR"
mkdir -p "$RUN_DIR/results"
printf '%s\n' "${DOMAINS}" \
| sed 's/\r$//' \
| grep -vE '^[[:space:]]*($|#)' \
| sed 's/^[[:space:]]*//;s/[[:space:]]*$//' \
| sort -u > "$RUN_DIR/domains.clean.txt"
echo "[*] Domains for this build:"
cat "$RUN_DIR/domains.clean.txt"
[ -s "$RUN_DIR/domains.clean.txt" ] || { echo "[!] No domains. Aborting."; exit 1; }
'''
}
}
// ── 2. Subfinder ──────────────────────────────────────────────────
stage('Subfinder enumerate subdomains') {
steps {
sh '''#!/usr/bin/env bash
set -euo pipefail
command -v subfinder >/dev/null 2>&1 || { echo "[!] subfinder not found"; exit 1; }
RESULT_BASE="$RUN_DIR/results"
SF_STATE="$STATE_BASE/subfinder/domains"
mkdir -p "$SF_STATE"
while IFS= read -r DOMAIN; do
[ -z "$DOMAIN" ] && continue
SAFE="$(echo "$DOMAIN" | sed 's/[^a-zA-Z0-9._-]/_/g')"
RDIR="$RESULT_BASE/$SAFE"
SDIR="$SF_STATE/$SAFE"
mkdir -p "$RDIR" "$SDIR"
RAW="$RDIR/subdomains-raw.txt"
CURRENT="$RDIR/subdomains-current.txt"
OLD="$SDIR/subdomains-last.txt"
NEW="$RDIR/subdomains-new.txt"
REMOVED="$RDIR/subdomains-removed.txt"
DIFF="$RDIR/subdomains-diff.txt"
echo
echo "=================================================="
echo "[*] subfinder: $DOMAIN"
echo "=================================================="
subfinder -d "$DOMAIN" \
-silent \
-r "$RESOLVERS" \
| sort -u > "$RAW"
# Safety filter: keep only exact domain or *.domain
awk -v d="$DOMAIN" '
BEGIN { d = tolower(d) }
{
x = tolower($0)
suffix = "." d
if (x == d || substr(x, length(x) - length(suffix) + 1) == suffix) print $0
}
' "$RAW" | sort -u > "$CURRENT"
echo "[*] Raw: $(wc -l < "$RAW") | Filtered: $(wc -l < "$CURRENT")"
if [ ! -s "$CURRENT" ]; then
echo "[!] No valid subdomains for $DOMAIN — continuing."
: > "$CURRENT"
fi
# Diff against previous baseline
if [ ! -f "$OLD" ]; then
echo "[*] First run — creating baseline."
cp "$CURRENT" "$OLD"
cp "$CURRENT" "$NEW"
: > "$REMOVED"
{
echo "Initial baseline for $DOMAIN"
echo "Job: ${JOB_NAME} Build: ${BUILD_NUMBER}"
echo "Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
echo "---"
cat "$CURRENT"
} > "$DIFF"
else
comm -13 "$OLD" "$CURRENT" > "$NEW"
comm -23 "$OLD" "$CURRENT" > "$REMOVED"
{
echo "Subdomain diff for $DOMAIN"
echo "Job: ${JOB_NAME} Build: ${BUILD_NUMBER}"
echo "Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
echo ""
echo "=== NEW ($(wc -l < "$NEW")) ==="
cat "$NEW"
echo ""
echo "=== REMOVED ($(wc -l < "$REMOVED")) ==="
cat "$REMOVED"
} > "$DIFF"
cp "$CURRENT" "$OLD"
fi
echo "[*] current=$(wc -l < "$CURRENT") new=$(wc -l < "$NEW") removed=$(wc -l < "$REMOVED")"
done < "$RUN_DIR/domains.clean.txt"
# ── Aggregate all subdomains ──────────────────────────────────────────
find "$RESULT_BASE" -name "subdomains-current.txt" -print0 \
| xargs -0 cat \
| sort -u > "$RUN_DIR/all-subdomains.txt"
echo
echo "[*] Total unique subdomains: $(wc -l < "$RUN_DIR/all-subdomains.txt")"
'''
}
}
// ── 3. Scope check ────────────────────────────────────────────────
stage('Scope check') {
steps {
sh '''#!/usr/bin/env bash
set -euo pipefail
PATTERN="$(awk '{d=tolower($0); printf "(^%s$|\\.%s$)|",d,d}' \
"$RUN_DIR/domains.clean.txt" | sed "s/|$//")"
BAD="$RUN_DIR/out-of-scope.txt"
grep -ivE "$PATTERN" "$RUN_DIR/all-subdomains.txt" > "$BAD" || : > "$BAD"
if [ -s "$BAD" ]; then
echo "[!] Out-of-scope entries detected:"
cat "$BAD"
echo "[!] Aborting."
exit 1
fi
echo "[*] Scope check passed."
'''
}
}
// ── 4. dnsx pre-resolve entire list ────────────────────────────
// Runs ONCE here so recon-httpx only ever chunks live-resolvable
// hosts. 50 threads / 500 qps is safe on a mid-range VPS and
// processes ~47k subdomains in roughly 2 minutes.
// Tune DNSX_THREADS / DNSX_RATE_LIMIT if you see timeouts.
stage('dnsx pre-resolve all') {
steps {
sh '''#!/usr/bin/env bash
set -euo pipefail
INPUT="$RUN_DIR/all-subdomains.txt"
RESOLVED="$RUN_DIR/all-resolved.txt"
DNSX_BIN="$(which dnsx 2>/dev/null || true)"
if [ -z "$DNSX_BIN" ]; then
echo "[!] dnsx not installed — falling back to raw subdomain list."
echo "[!] Install dnsx in the Containerfile for best results."
cp "$INPUT" "$RESOLVED"
exit 0
fi
TOTAL=$(wc -l < "$INPUT")
echo "[*] Resolving $TOTAL subdomains..."
echo "[*] Threads: ${DNSX_THREADS} Rate-limit: ${DNSX_RATE_LIMIT} qps"
dnsx -l "$INPUT" \
-silent \
-r "$RESOLVERS" \
-a -aaaa \
-threads "${DNSX_THREADS}" \
-rl "${DNSX_RATE_LIMIT}" \
-retry 3 \
| sort -u > "$RESOLVED" || true
# Sanity check — if output is suspiciously small something went wrong
AFTER=$(wc -l < "$RESOLVED")
TOTAL_CHECK=$(wc -l < "$INPUT")
if [ "$AFTER" -lt 10 ] && [ "$TOTAL_CHECK" -gt 100 ]; then
echo "[!] WARNING: Only $AFTER hosts resolved from $TOTAL_CHECK — possible dnsx flag error."
echo "[!] Falling back to raw subdomain list to avoid data loss."
cp "$INPUT" "$RESOLVED"
fi
AFTER=$(wc -l < "$RESOLVED")
DROPPED=$(( TOTAL - AFTER ))
echo "[*] Resolved: $AFTER | Dropped (NXDOMAIN/timeout): $DROPPED"
echo "[*] Reduction: $(( DROPPED * 100 / (TOTAL + 1) ))% — httpx will only probe these $AFTER hosts"
'''
}
}
// ── 5. Publish state ──────────────────────────────────────────────
stage('Publish state') {
steps {
sh '''#!/usr/bin/env bash
set -euo pipefail
SF_DIR="$STATE_BASE/subfinder"
mkdir -p "$SF_DIR/history"
# Raw subdomain list (auditing / reference)
LATEST_RAW="$SF_DIR/all-subdomains-latest.txt"
cp "$RUN_DIR/all-subdomains.txt" "$LATEST_RAW"
cp "$RUN_DIR/all-subdomains.txt" "$SF_DIR/history/subdomains-build-${BUILD_NUMBER}.txt"
# Resolved list — recon-httpx chunks from this
LATEST_RESOLVED="$SF_DIR/all-resolved-latest.txt"
cp "$RUN_DIR/all-resolved.txt" "$LATEST_RESOLVED"
cp "$RUN_DIR/all-resolved.txt" "$SF_DIR/history/resolved-build-${BUILD_NUMBER}.txt"
# Reset httpx pointer — next httpx run starts from top of fresh resolved list
POINTER="$STATE_BASE/httpx/chunk-pointer.txt"
mkdir -p "$STATE_BASE/httpx/history"
echo "0" > "$POINTER"
{
echo "job=${JOB_NAME}"
echo "build=${BUILD_NUMBER}"
echo "timestamp=$(date -u +%Y-%m-%dT%H:%M:%SZ)"
echo "subdomains_total=$(wc -l < "$LATEST_RAW")"
echo "resolved_total=$(wc -l < "$LATEST_RESOLVED")"
echo "resolvers=${RESOLVERS}"
echo "dnsx_threads=${DNSX_THREADS}"
echo "dnsx_rate_limit=${DNSX_RATE_LIMIT}"
echo ""
echo "domains:"
cat "$RUN_DIR/domains.clean.txt"
} > "$SF_DIR/metadata.txt"
echo "[*] Raw subdomains: $(wc -l < "$LATEST_RAW")"
echo "[*] Resolved hosts: $(wc -l < "$LATEST_RESOLVED")"
echo "[*] httpx pointer reset to 0"
'''
}
}
// ── 6. Archive ────────────────────────────────────────────────────
stage('Archive results') {
steps {
archiveArtifacts artifacts: 'current-run/**/*.txt',
fingerprint: true
}
}
}
post {
always { echo "[*] recon-subfinder finished." }
failure { echo "[!] recon-subfinder FAILED." }
}
}