jenkins-recon/nuclei.groovy

// ═══════════════════════════════════════════════════════════════════════════
//  recon-nuclei
//  Lightweight API/endpoint checks on httpx results.
//  Triggered automatically after recon-httpx (new services only),
//  or on schedule for full cumulative scan.
//
//  SCAN_NEW_ONLY=true  → only new services from last httpx diff
//  SCAN_NEW_ONLY=false → all cumulative live services
//
//  Not aggressive: no bruteforce, no exploits — only passive checks
//  and known-path probes (exposures, misconfigurations, technologies).
// ═══════════════════════════════════════════════════════════════════════════

pipeline {
    agent any

    options {
        timestamps()
        disableConcurrentBuilds()
        timeout(time: 2, unit: 'HOURS')
    }


    parameters {
        booleanParam(
            name: 'SCAN_NEW_ONLY',
            defaultValue: true,
            description: 'true = only new services from last httpx diff | false = all cumulative live services'
        )
        string(
            name: 'NUCLEI_CONCURRENCY',
            defaultValue: '10',
            description: 'Nuclei parallel template execution (keep low on VPS: 10-25)'
        )
        string(
            name: 'NUCLEI_RATE_LIMIT',
            defaultValue: '50',
            description: 'Max HTTP requests/sec (keep conservative: 50-100)'
        )
        string(
            name: 'NUCLEI_TIMEOUT',
            defaultValue: '10',
            description: 'HTTP timeout in seconds'
        )
        string(
            name: 'NUCLEI_TEMPLATES',
            defaultValue: 'http/exposures/,http/misconfiguration/',
            description: 'Templates for all new hosts (technologies/ removed — httpx already does this)'
        )
        string(
            name: 'LOGIN_PATTERNS',
            defaultValue: 'login|signin|admin|portal|dashboard|console|panel|wp-admin|phpmyadmin|grafana|jenkins|kibana|adminer|manage|management|cpanel',
            description: 'Pipe-separated patterns to filter hosts for default-login scan (Scan 2)'
        )
        string(
            name: 'NUCLEI_SEVERITY',
            defaultValue: 'low,medium,high,critical',
            description: 'Severity filter'
        )
        booleanParam(
            name: 'INCLUDE_INFO',
            defaultValue: false,
            description: 'Include info-severity findings (very noisy — off by default)'
        )
    }

    environment {
        STATE_BASE = '/var/jenkins_home/recon-state'
        RUN_DIR    = 'current-run'
    }

    stages {

        // ── 1. Prepare ────────────────────────────────────────────────────
        stage('Prepare workspace') {
            steps {
                sh '''#!/usr/bin/env bash
set -euo pipefail

echo "[*] Preparing clean workspace..."
rm -rf "$RUN_DIR"
mkdir -p "$RUN_DIR/results"

NUCLEI_BIN="$(which nuclei 2>/dev/null || true)"
if [ -z "$NUCLEI_BIN" ]; then
    echo "[!] nuclei not found in PATH."
    echo "[!] Add to Containerfile and rebuild: go install github.com/projectdiscovery/nuclei/v3/cmd/nuclei@latest"
    exit 1
fi

echo "[*] nuclei version: $(nuclei -version 2>&1 | head -1)"
'''
            }
        }

        // ── 2. Update nuclei templates ────────────────────────────────────
        stage('Update templates') {
            steps {
                sh '''#!/usr/bin/env bash
set -euo pipefail

echo "[*] Updating nuclei templates..."
nuclei -update-templates -silent || true
echo "[*] Templates up to date."
'''
            }
        }

        // ── 3. Select targets ─────────────────────────────────────────────
        //  Two target lists:
        //  targets.txt         → all new hosts → exposures + misconfiguration
        //  targets-login.txt   → grep-interesting hosts only → + default-logins
        stage('Select targets') {
            steps {
                sh '''#!/usr/bin/env bash
set -euo pipefail

HDIR="$STATE_BASE/httpx"
TARGET_FILE="$RUN_DIR/targets.txt"
TARGET_LOGIN="$RUN_DIR/targets-login.txt"

: > "$TARGET_FILE"
: > "$TARGET_LOGIN"

if [ "${SCAN_NEW_ONLY}" = "true" ]; then
    LAST_NEW="$HDIR/httpx-last-new.txt"
    if [ -s "$LAST_NEW" ]; then
        cp "$LAST_NEW" "$TARGET_FILE"
        echo "[*] Mode: NEW only — $(wc -l < "$TARGET_FILE") hosts"
    else
        echo "[!] No new services — nothing to scan."
        exit 0
    fi
else
    CUMULATIVE="$HDIR/httpx-state-cumulative.txt"
    if [ ! -s "$CUMULATIVE" ]; then
        echo "[!] No cumulative state. Run recon-httpx first."
        exit 1
    fi
    cut -f1 "$CUMULATIVE" | sort -u > "$TARGET_FILE"
    echo "[*] Mode: ALL cumulative — $(wc -l < "$TARGET_FILE") hosts"
fi

if [ ! -s "$TARGET_FILE" ]; then
    echo "[*] No targets — skipping."
    exit 0
fi

# Build login targets from grep-interesting filtered by LOGIN_PATTERNS
GREP_FILE="$(find /var/jenkins_home/workspace -name "grep-interesting.txt" \
    -type f -not -empty 2>/dev/null | xargs ls -t 2>/dev/null | head -1 || true)"

if [ -n "$GREP_FILE" ] && [ -s "$GREP_FILE" ]; then
    grep -iE "${LOGIN_PATTERNS}" "$GREP_FILE" \
        | grep -oE "https?://[^ ]+" \
        | sort -u > "$TARGET_LOGIN" || true
    echo "[*] Login targets (filtered by LOGIN_PATTERNS): $(wc -l < "$TARGET_LOGIN") hosts"
else
    echo "[*] No grep-interesting file found — skipping login scan"
fi

echo "[*] First 5 targets:"
head -5 "$TARGET_FILE"
'''
            }
        }

        // ── 4. nuclei scan ────────────────────────────────────────────────
        //  Run 1: all new hosts → exposures + misconfiguration
        //  Run 2: grep-interesting hosts only → + default-logins
        //  No technologies/ — httpx already does tech detection
        //  Single run (text + jsonl together via -o and -jsonl)
        stage('nuclei – scan') {
            steps {
                sh '''#!/usr/bin/env bash
set -euo pipefail

TARGET_FILE="$RUN_DIR/targets.txt"
TARGET_LOGIN="$RUN_DIR/targets-login.txt"
RDIR="$RUN_DIR/results"
TMPL_ROOT="/var/jenkins_home/nuclei-templates"

touch "$RDIR/nuclei-findings.txt"
touch "$RDIR/nuclei-findings.jsonl"

if [ ! -s "$TARGET_FILE" ]; then
    echo "[*] No targets — skipping scan."
    exit 0
fi

SEV="${NUCLEI_SEVERITY}"
if [ "${INCLUDE_INFO}" = "true" ]; then
    SEV="info,${SEV}"
fi

TOTAL=$(wc -l < "$TARGET_FILE")
echo "[*] Scan 1: $TOTAL hosts — exposures + misconfiguration"

# Scan 1: exposures + misconfiguration on all new hosts
nuclei \
    -l "$TARGET_FILE" \
    -t "${TMPL_ROOT}/http/exposures/" \
    -t "${TMPL_ROOT}/http/misconfiguration/" \
    -severity "$SEV" \
    -c "${NUCLEI_CONCURRENCY}" \
    -rl "${NUCLEI_RATE_LIMIT}" \
    -timeout "${NUCLEI_TIMEOUT}" \
    -H "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" \
    -silent \
    -o "$RDIR/nuclei-findings.txt" \
    -jsonl \
    -output "$RDIR/nuclei-findings.jsonl" || true

# Scan 2: default-logins only on grep-interesting hosts
if [ -s "$TARGET_LOGIN" ]; then
    LOGIN_TOTAL=$(wc -l < "$TARGET_LOGIN")
    echo "[*] Scan 2: $LOGIN_TOTAL interesting hosts — default-logins"
    nuclei \
        -l "$TARGET_LOGIN" \
        -t "${TMPL_ROOT}/http/default-logins/" \
        -severity "$SEV" \
        -c "${NUCLEI_CONCURRENCY}" \
        -rl "${NUCLEI_RATE_LIMIT}" \
        -timeout "${NUCLEI_TIMEOUT}" \
        -H "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" \
        -silent \
        >> "$RDIR/nuclei-findings.txt" || true
else
    echo "[*] No interesting hosts for login scan — skipping."
fi

FINDINGS=$(wc -l < "$RDIR/nuclei-findings.txt")
echo
echo "[*] nuclei scan complete — $FINDINGS findings"
'''
            }
        }

        // ── 5. Diff – new findings only ───────────────────────────────────
        stage('nuclei – diff') {
            steps {
                sh '''#!/usr/bin/env bash
set -euo pipefail

RDIR="$RUN_DIR/results"
NDIR="$STATE_BASE/nuclei"
mkdir -p "$NDIR/history"

FINDINGS="$RDIR/nuclei-findings.txt"
OLD="$NDIR/nuclei-findings-cumulative.txt"
NEW_FINDINGS="$RDIR/nuclei-new-findings.txt"
DIFF="$RDIR/nuclei-diff.txt"

if [ ! -s "$FINDINGS" ]; then
    echo "[*] No findings this run."
    touch "$NEW_FINDINGS" "$DIFF"
    exit 0
fi

if [ ! -f "$OLD" ]; then
    echo "[*] No baseline — all findings are new."
    cp "$FINDINGS" "$OLD"
    cp "$FINDINGS" "$NEW_FINDINGS"
    {
        echo "========================================"
        echo "  recon-nuclei — Initial Findings"
        echo "========================================"
        echo "Job:       ${JOB_NAME}  #${BUILD_NUMBER}"
        echo "Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
        echo "Mode:      ${SCAN_NEW_ONLY}"
        echo ""
        echo "=== ALL FINDINGS ($(wc -l < "$FINDINGS")) ==="
        cat "$FINDINGS"
    } > "$DIFF"
else
    # New = finding line not seen before (match on template+url)
    comm -13 <(sort "$OLD") <(sort "$FINDINGS") > "$NEW_FINDINGS"

    {
        echo "========================================"
        echo "  recon-nuclei — Diff Report"
        echo "========================================"
        echo "Job:       ${JOB_NAME}  #${BUILD_NUMBER}"
        echo "Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
        echo "Mode:      SCAN_NEW_ONLY=${SCAN_NEW_ONLY}"
        echo "Total findings this run: $(wc -l < "$FINDINGS")"
        echo ""
        echo "=== NEW FINDINGS ($(wc -l < "$NEW_FINDINGS")) ==="
        cat "$NEW_FINDINGS"
    } > "$DIFF"

    # Update cumulative
    { cat "$OLD"; cat "$NEW_FINDINGS"; } | sort -u > "$NDIR/nuclei-findings-cumulative-new.txt"
    mv "$NDIR/nuclei-findings-cumulative-new.txt" "$OLD"
fi

# History snapshot
cp "$FINDINGS" "$NDIR/history/build-${BUILD_NUMBER}.txt"

{
    echo "job=${JOB_NAME}"
    echo "build=${BUILD_NUMBER}"
    echo "timestamp=$(date -u +%Y-%m-%dT%H:%M:%SZ)"
    echo "mode=SCAN_NEW_ONLY=${SCAN_NEW_ONLY}"
    echo "findings=$(wc -l < "$FINDINGS")"
    echo "new_findings=$(wc -l < "$NEW_FINDINGS")"
} > "$NDIR/metadata.txt"

echo "[*] Total findings:  $(wc -l < "$FINDINGS")"
echo "[*] New findings:    $(wc -l < "$NEW_FINDINGS")"

if [ -s "$NEW_FINDINGS" ]; then
    echo ""
    echo "[!] NEW FINDINGS:"
    cat "$NEW_FINDINGS"
fi
'''
            }
        }

        // ── 6. Archive ────────────────────────────────────────────────────
        stage('Archive results') {
            steps {
                archiveArtifacts artifacts: [
                    'current-run/**/*.txt',
                    'current-run/**/*.jsonl'
                ].join(','), fingerprint: true
            }
        }
    }

    post {
        always {
            echo "[*] recon-nuclei finished."

            // ── Matrix notification ───────────────────────────────────────
            // Requires two Jenkins credentials:
            //   MATRIX_TOKEN   → Secret text: your Matrix access token
            //   MATRIX_ROOM_ID → Secret text: your room ID (!xxx:matrix.org)
            withCredentials([
                string(credentialsId: 'MATRIX_TOKEN',   variable: 'TOKEN'),
                string(credentialsId: 'MATRIX_ROOM_ID', variable: 'ROOM_ID')
            ]) {
                sh '''#!/usr/bin/env bash
set -euo pipefail

FINDINGS="$RUN_DIR/results/nuclei-findings.txt"
NEW_FINDINGS="$RUN_DIR/results/nuclei-new-findings.txt"

# Only send if there are any findings at all
if [ ! -s "$FINDINGS" ]; then
    echo "[*] No findings — skipping Matrix notification."
    exit 0
fi

TOTAL=$(wc -l < "$FINDINGS")
NEW=$(wc -l < "$NEW_FINDINGS" 2>/dev/null || echo 0)
TIMESTAMP="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
MODE="SCAN_NEW_ONLY=${SCAN_NEW_ONLY}"
BUILD_URL_SAFE="${BUILD_URL:-unknown}"

# ── Summary line ──────────────────────────────────────────────────────
SUMMARY="🔍 *recon-nuclei* — Build #${BUILD_NUMBER}
📅 ${TIMESTAMP}
🎯 Mode: ${MODE}
📊 Total findings: ${TOTAL}  |  New findings: ${NEW}"

# ── Detail block — top 20 new findings ───────────────────────────────
if [ -s "$NEW_FINDINGS" ]; then
    DETAIL="$(head -20 "$NEW_FINDINGS")"
    if [ "$(wc -l < "$NEW_FINDINGS")" -gt 20 ]; then
        DETAIL="${DETAIL}
... and $(( $(wc -l < "$NEW_FINDINGS") - 20 )) more"
    fi
    FULL_MSG="${SUMMARY}

New Findings:
---
${DETAIL}
---"
else
    FULL_MSG="${SUMMARY}

✅ No new findings — all already known."
fi

# ── Send to Matrix ────────────────────────────────────────────────────
# Escape for JSON using sed and tr — no python3 needed
JSON_MSG="$(printf '%s' "$FULL_MSG" \
    | sed 's/\\/\\\\/g' \
    | sed 's/"/\\"/g' \
    | tr '\n' '|' \
    | sed 's/|/\\n/g')"

TX_ID="recon-nuclei-$(date +%s%N)"

HTTP_CODE=$(curl -s -o /tmp/matrix_response.txt -w "%{http_code}" \
    -X PUT \
    -H "Authorization: Bearer ${TOKEN}" \
    -H "Content-Type: application/json" \
    -d "{\"msgtype\":\"m.text\",\"body\":\"${JSON_MSG}\"}" \
    "https://matrix.org/_matrix/client/v3/rooms/${ROOM_ID}/send/m.room.message/${TX_ID}")

if [ "$HTTP_CODE" = "200" ]; then
    echo "[*] Matrix notification sent successfully."
else
    echo "[!] Matrix notification failed (HTTP $HTTP_CODE):"
    cat /tmp/matrix_response.txt
fi
'''
            }
        }
        failure { echo "[!] recon-nuclei FAILED." }
    }
}