// ═══════════════════════════════════════════════════════════════════════════
//  recon-nuclei
//  Eigenständiger Job — liest aus nuclei-queue.txt (geschrieben von httpx).
//  Nimmt max. QUEUE_CHUNK_SIZE Hosts pro Run, scannt sie, entfernt sie aus Queue.
//  Läuft komplett unabhängig von httpx — kein Timeout-Problem mehr.
//
//  Queue-Datei: /var/jenkins_home/recon-state/nuclei/nuclei-queue.txt
//  Queue bleibt über Tage erhalten — nuclei macht immer da weiter wo er war.
//
//  Recommended schedule: every hour (H * * * *)
// ═══════════════════════════════════════════════════════════════════════════

pipeline {
    agent any

    options {
        timestamps()
        disableConcurrentBuilds()
        timeout(time: 2, unit: 'HOURS')
    }

    triggers {
        cron('0 * * * *')
    }

    parameters {
        string(
            name: 'QUEUE_CHUNK_SIZE',
            defaultValue: '50',
            description: 'Max hosts to take from queue per run'
        )
        string(
            name: 'NUCLEI_CONCURRENCY',
            defaultValue: '10',
            description: 'Nuclei parallel template execution (keep low on VPS: 10-25)'
        )
        string(
            name: 'NUCLEI_RATE_LIMIT',
            defaultValue: '50',
            description: 'Max HTTP requests/sec (keep conservative: 50-100)'
        )
        string(
            name: 'NUCLEI_TIMEOUT',
            defaultValue: '10',
            description: 'HTTP timeout in seconds'
        )
        string(
            name: 'NUCLEI_SEVERITY',
            defaultValue: 'low,medium,high,critical',
            description: 'Severity filter'
        )
        booleanParam(
            name: 'INCLUDE_INFO',
            defaultValue: false,
            description: 'Include info-severity findings (very noisy — off by default)'
        )
        string(
            name: 'LOGIN_PATTERNS',
            defaultValue: 'login|signin|admin|portal|dashboard|console|panel|wp-admin|phpmyadmin|grafana|jenkins|kibana|adminer|manage|management|cpanel',
            description: 'Pipe-separated patterns to filter hosts for default-login scan'
        )
        text(
            name: 'NUCLEI_BLACKLIST',
            defaultValue: '''\
*.hubspot.com
*.twilio.com''',
            description: 'Domains/wildcards to skip. One per line. Wildcards: *.example.com'
        )
    }

    environment {
        STATE_BASE = '/var/jenkins_home/recon-state'
        RUN_DIR    = 'current-run'
        QUEUE_FILE = '/var/jenkins_home/recon-state/nuclei/nuclei-queue.txt'
    }

    stages {

        // ── 1. Prepare ────────────────────────────────────────────────────
        stage('Prepare workspace') {
            steps {
                sh '''#!/usr/bin/env bash
set -euo pipefail

echo "[*] Preparing clean workspace..."
rm -rf "$RUN_DIR"
mkdir -p "$RUN_DIR/results"

NUCLEI_BIN="$(which nuclei 2>/dev/null || true)"
if [ -z "$NUCLEI_BIN" ]; then
    echo "[!] nuclei not found in PATH."
    echo "[!] Add to Containerfile: go install github.com/projectdiscovery/nuclei/v3/cmd/nuclei@latest"
    exit 1
fi

echo "[*] nuclei version: $(nuclei -version 2>&1 | head -1)"

# Queue-Datei muss existieren
if [ ! -f "$QUEUE_FILE" ]; then
    echo "[*] Queue file does not exist yet — nothing to scan."
    exit 0
fi

QUEUE_SIZE=$(wc -l < "$QUEUE_FILE")
if [ "$QUEUE_SIZE" -eq 0 ]; then
    echo "[*] Queue is empty — nothing to scan."
    exit 0
fi

echo "[*] Queue size: $QUEUE_SIZE hosts"
echo "[*] Will process: $(( QUEUE_SIZE < ${QUEUE_CHUNK_SIZE} ? QUEUE_SIZE : ${QUEUE_CHUNK_SIZE} )) hosts this run"
'''
            }
        }

        // ── 2. Update templates ───────────────────────────────────────────
        stage('Update templates') {
            steps {
                sh '''#!/usr/bin/env bash
set -euo pipefail

echo "[*] Updating nuclei templates..."
nuclei -update-templates -silent || true
echo "[*] Templates up to date."
'''
            }
        }

        // ── 3. Take chunk from queue ──────────────────────────────────────
        //  Nimmt die ersten QUEUE_CHUNK_SIZE Einträge aus der Queue.
        //  Wendet nochmals Blacklist an (defensive — httpx filtert bereits).
        //  Entfernt die genommenen Einträge sofort aus der Queue-Datei
        //  damit parallele Runs (falls disableConcurrentBuilds deaktiviert)
        //  nicht dieselben Hosts nehmen.
        stage('Take chunk from queue') {
            steps {
                sh '''#!/usr/bin/env bash
set -euo pipefail

if [ ! -s "$QUEUE_FILE" ]; then
    echo "[*] Queue is empty — skipping."
    touch "$RUN_DIR/targets.txt"
    exit 0
fi

CHUNK_SIZE="${QUEUE_CHUNK_SIZE:-50}"

# ── Blacklist-Filter (nochmals, defensiv) ────────────────────────────
BLACKLIST_FILE="$RUN_DIR/blacklist.txt"
printf '%s\n' "${NUCLEI_BLACKLIST}" \
    | sed 's/\r$//' \
    | grep -vE '^[[:space:]]*($|#)' \
    | sed 's/^[[:space:]]*//;s/[[:space:]]*$//' \
    | sort -u > "$BLACKLIST_FILE" || true

is_blacklisted() {
    local candidate="$1"
    local host rule suffix host_lower rule_lower suffix_lower
    host="${candidate#http://}"; host="${host#https://}"; host="${host%%/*}"; host="${host%%:*}"
    if [ ! -s "$BLACKLIST_FILE" ]; then return 1; fi
    while IFS= read -r rule; do
        [ -z "$rule" ] && continue
        case "$rule" in
            *.*)
                suffix="${rule#*.}"
                host_lower="$(echo "$host" | tr '[:upper:]' '[:lower:]')"
                suffix_lower="$(echo "$suffix" | tr '[:upper:]' '[:lower:]')"
                if [ "$host_lower" = "$suffix_lower" ]; then return 0; fi
                case "$host_lower" in
                    *."$suffix_lower") return 0 ;;
                esac
                ;;
            *)
                host_lower="$(echo "$host" | tr '[:upper:]' '[:lower:]')"
                rule_lower="$(echo "$rule" | tr '[:upper:]' '[:lower:]')"
                if [ "$host_lower" = "$rule_lower" ]; then return 0; fi
                ;;
        esac
    done < "$BLACKLIST_FILE"
    return 1
}

# Nimm ersten CHUNK_SIZE Einträge die nicht blacklisted sind
TARGETS="$RUN_DIR/targets.txt"
TAKEN="$RUN_DIR/taken.txt"
: > "$TARGETS"
: > "$TAKEN"

while IFS= read -r url; do
    [ -z "$url" ] && continue
    if is_blacklisted "$url"; then
        echo "[*] Blacklisted (removing from queue): $url"
        echo "$url" >> "$TAKEN"
        continue
    fi
    echo "$url" >> "$TARGETS"
    echo "$url" >> "$TAKEN"
    if [ "$(wc -l < "$TARGETS")" -ge "$CHUNK_SIZE" ]; then
        break
    fi
done < "$QUEUE_FILE"

# Entferne genommene + blacklisted Einträge aus Queue
if [ -s "$TAKEN" ]; then
    grep -vFf "$TAKEN" "$QUEUE_FILE" > "${QUEUE_FILE}.tmp" || true
    mv "${QUEUE_FILE}.tmp" "$QUEUE_FILE"
fi

TAKEN_COUNT=$(wc -l < "$TAKEN")
TARGET_COUNT=$(wc -l < "$TARGETS")
REMAINING=$(wc -l < "$QUEUE_FILE")

echo "[*] Taken from queue:  $TAKEN_COUNT"
echo "[*] Targets to scan:   $TARGET_COUNT"
echo "[*] Remaining in queue: $REMAINING"

if [ ! -s "$TARGETS" ]; then
    echo "[*] No valid targets after blacklist filter — done."
fi
'''
            }
        }

        // ── 4. Build login targets ────────────────────────────────────────
        stage('Build login targets') {
            steps {
                sh '''#!/usr/bin/env bash
set -euo pipefail

TARGETS="$RUN_DIR/targets.txt"
TARGET_LOGIN="$RUN_DIR/targets-login.txt"
: > "$TARGET_LOGIN"

if [ ! -s "$TARGETS" ]; then
    echo "[*] No targets — skipping login target build."
    exit 0
fi

grep -iE "${LOGIN_PATTERNS}" "$TARGETS" \
    | sort -u > "$TARGET_LOGIN" || true

echo "[*] Login targets: $(wc -l < "$TARGET_LOGIN")"
'''
            }
        }

        // ── 5. nuclei scan ────────────────────────────────────────────────
        //  Scan 1: alle targets → exposures + misconfiguration
        //  Scan 2: login-interessante targets → default-logins
        stage('nuclei – scan') {
            steps {
                sh '''#!/usr/bin/env bash
set -euo pipefail

TARGET_FILE="$RUN_DIR/targets.txt"
TARGET_LOGIN="$RUN_DIR/targets-login.txt"
RDIR="$RUN_DIR/results"
TMPL_ROOT="/var/jenkins_home/nuclei-templates"

touch "$RDIR/nuclei-findings.txt"
touch "$RDIR/nuclei-findings.jsonl"

if [ ! -s "$TARGET_FILE" ]; then
    echo "[*] No targets — skipping scan."
    exit 0
fi

SEV="${NUCLEI_SEVERITY}"
if [ "${INCLUDE_INFO}" = "true" ]; then
    SEV="info,${SEV}"
fi

TOTAL=$(wc -l < "$TARGET_FILE")
echo "[*] Scan 1: $TOTAL hosts — exposures + misconfiguration"

nuclei \
    -l "$TARGET_FILE" \
    -t "${TMPL_ROOT}/http/exposures/" \
    -t "${TMPL_ROOT}/http/misconfiguration/" \
    -severity "$SEV" \
    -c "${NUCLEI_CONCURRENCY}" \
    -rl "${NUCLEI_RATE_LIMIT}" \
    -timeout "${NUCLEI_TIMEOUT}" \
    -H "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" \
    -silent \
    -o "$RDIR/nuclei-findings.txt" \
    -je "$RDIR/nuclei-findings.jsonl" || true

if [ -s "$TARGET_LOGIN" ]; then
    LOGIN_TOTAL=$(wc -l < "$TARGET_LOGIN")
    echo "[*] Scan 2: $LOGIN_TOTAL hosts — default-logins"
    nuclei \
        -l "$TARGET_LOGIN" \
        -t "${TMPL_ROOT}/http/default-logins/" \
        -severity "$SEV" \
        -c "${NUCLEI_CONCURRENCY}" \
        -rl "${NUCLEI_RATE_LIMIT}" \
        -timeout "${NUCLEI_TIMEOUT}" \
        -H "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" \
        -silent \
        >> "$RDIR/nuclei-findings.txt" || true
else
    echo "[*] No login targets — skipping scan 2."
fi

FINDINGS=$(wc -l < "$RDIR/nuclei-findings.txt")
echo
echo "[*] nuclei scan complete — $FINDINGS findings"
'''
            }
        }

        // ── 6. Diff ───────────────────────────────────────────────────────
        stage('nuclei – diff') {
            steps {
                sh '''#!/usr/bin/env bash
set -euo pipefail

RDIR="$RUN_DIR/results"
NDIR="$STATE_BASE/nuclei"
mkdir -p "$NDIR/history"

FINDINGS="$RDIR/nuclei-findings.txt"
OLD="$NDIR/nuclei-findings-cumulative.txt"
NEW_FINDINGS="$RDIR/nuclei-new-findings.txt"
DIFF="$RDIR/nuclei-diff.txt"

if [ ! -s "$FINDINGS" ]; then
    echo "[*] No findings this run."
    touch "$NEW_FINDINGS" "$DIFF"
    exit 0
fi

if [ ! -f "$OLD" ]; then
    echo "[*] No baseline — all findings are new."
    cp "$FINDINGS" "$OLD"
    cp "$FINDINGS" "$NEW_FINDINGS"
    {
        echo "========================================"
        echo "  recon-nuclei — Initial Findings"
        echo "========================================"
        echo "Job:       ${JOB_NAME}  #${BUILD_NUMBER}"
        echo "Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
        echo "Queue remaining: $(wc -l < "$QUEUE_FILE" 2>/dev/null || echo 0)"
        echo ""
        echo "=== ALL FINDINGS ($(wc -l < "$FINDINGS")) ==="
        cat "$FINDINGS"
    } > "$DIFF"
else
    comm -13 <(sort "$OLD") <(sort "$FINDINGS") > "$NEW_FINDINGS"

    {
        echo "========================================"
        echo "  recon-nuclei — Diff Report"
        echo "========================================"
        echo "Job:       ${JOB_NAME}  #${BUILD_NUMBER}"
        echo "Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
        echo "Queue remaining: $(wc -l < "$QUEUE_FILE" 2>/dev/null || echo 0)"
        echo "Total findings this run: $(wc -l < "$FINDINGS")"
        echo ""
        echo "=== NEW FINDINGS ($(wc -l < "$NEW_FINDINGS")) ==="
        cat "$NEW_FINDINGS"
    } > "$DIFF"

    { cat "$OLD"; cat "$NEW_FINDINGS"; } | sort -u > "$NDIR/nuclei-findings-cumulative-new.txt"
    mv "$NDIR/nuclei-findings-cumulative-new.txt" "$OLD"
fi

cp "$FINDINGS" "$NDIR/history/build-${BUILD_NUMBER}.txt"

{
    echo "job=${JOB_NAME}"
    echo "build=${BUILD_NUMBER}"
    echo "timestamp=$(date -u +%Y-%m-%dT%H:%M:%SZ)"
    echo "targets=$(wc -l < "$RUN_DIR/targets.txt" 2>/dev/null || echo 0)"
    echo "findings=$(wc -l < "$FINDINGS")"
    echo "new_findings=$(wc -l < "$NEW_FINDINGS")"
    echo "queue_remaining=$(wc -l < "$QUEUE_FILE" 2>/dev/null || echo 0)"
} > "$NDIR/metadata.txt"

echo "[*] Total findings:  $(wc -l < "$FINDINGS")"
echo "[*] New findings:    $(wc -l < "$NEW_FINDINGS")"
echo "[*] Queue remaining: $(wc -l < "$QUEUE_FILE" 2>/dev/null || echo 0)"

if [ -s "$NEW_FINDINGS" ]; then
    echo ""
    echo "[!] NEW FINDINGS:"
    cat "$NEW_FINDINGS"
fi
'''
            }
        }

        // ── 7. Archive ────────────────────────────────────────────────────
        stage('Archive results') {
            steps {
                archiveArtifacts artifacts: [
                    'current-run/**/*.txt',
                    'current-run/**/*.jsonl'
                ].join(','), fingerprint: true
            }
        }
    }

    post {
        always {
            echo "[*] recon-nuclei finished."

            withCredentials([
                string(credentialsId: 'MATRIX_TOKEN',   variable: 'TOKEN'),
                string(credentialsId: 'MATRIX_ROOM_ID', variable: 'ROOM_ID')
            ]) {
                sh '''#!/usr/bin/env bash
set -euo pipefail

FINDINGS="$RUN_DIR/results/nuclei-findings.txt"
NEW_FINDINGS="$RUN_DIR/results/nuclei-new-findings.txt"
QUEUE_REMAINING=$(wc -l < "$QUEUE_FILE" 2>/dev/null || echo 0)

if [ ! -s "$FINDINGS" ]; then
    echo "[*] No findings — skipping Matrix notification."
    exit 0
fi

TOTAL=$(wc -l < "$FINDINGS")
NEW=$(wc -l < "$NEW_FINDINGS" 2>/dev/null || echo 0)
TIMESTAMP="$(date -u +%Y-%m-%dT%H:%M:%SZ)"

SUMMARY="🔍 *recon-nuclei* — Build #${BUILD_NUMBER}
📅 ${TIMESTAMP}
📋 Queue remaining: ${QUEUE_REMAINING}
📊 Total findings: ${TOTAL}  |  New findings: ${NEW}"

if [ -s "$NEW_FINDINGS" ]; then
    DETAIL="$(head -20 "$NEW_FINDINGS")"
    if [ "$(wc -l < "$NEW_FINDINGS")" -gt 20 ]; then
        DETAIL="${DETAIL}
... and $(( $(wc -l < "$NEW_FINDINGS") - 20 )) more"
    fi
    FULL_MSG="${SUMMARY}

New Findings:
---
${DETAIL}
---"
else
    FULL_MSG="${SUMMARY}

No new findings — all already known."
fi

JSON_MSG="$(printf '%s' "$FULL_MSG" \
    | sed 's/\\/\\\\/g' \
    | sed 's/"/\\"/g' \
    | tr '\n' '|' \
    | sed 's/|/\\n/g')"

TX_ID="recon-nuclei-$(date +%s%N)"

HTTP_CODE=$(curl -s -o /tmp/matrix_response.txt -w "%{http_code}" \
    -X PUT \
    -H "Authorization: Bearer ${TOKEN}" \
    -H "Content-Type: application/json" \
    -d "{\"msgtype\":\"m.text\",\"body\":\"${JSON_MSG}\"}" \
    "https://matrix.org/_matrix/client/v3/rooms/${ROOM_ID}/send/m.room.message/${TX_ID}")

if [ "$HTTP_CODE" = "200" ]; then
    echo "[*] Matrix notification sent."
else
    echo "[!] Matrix notification failed (HTTP $HTTP_CODE):"
    cat /tmp/matrix_response.txt
fi
'''
            }
        }
        failure { echo "[!] recon-nuclei FAILED." }
    }
}
