recon-pipeline/jenkins/Jenkinsfile-recon-httpx

// ═══════════════════════════════════════════════════════════════════════════
//  recon-httpx
//  Probes ONE chunk of resolved hosts per run (pointer-file rotation).
//  Reads from all-resolved-latest.txt written by recon-subfinder.
//
//  CHANGES vs previous version:
//  - parse_state() State-Fix: alle Chunk-Hosts werden neu geschrieben
//  - NEWs + CHANGEDs werden in nuclei-queue.txt akkumuliert
//  - Blacklist-Filter vor Queue-Eintrag
//  - nuclei wird nicht mehr direkt getriggert
//  - HTTPX_BLACKLIST: hosts vor httpx-Probe herausfiltern
//
//  Recommended schedule: every 30 min (H/2 * * * *)
// ═══════════════════════════════════════════════════════════════════════════

pipeline {
    agent any

    options {
        timestamps()
        disableConcurrentBuilds()
        timeout(time: 1, unit: 'HOURS')
    }

    triggers {
        cron('20,50 0-22 * * *')
    }

    parameters {
        string(
            name: 'CHUNK_SIZE',
            defaultValue: '300',
            description: 'Max hosts to probe per run'
        )
        string(
            name: 'RESOLVERS',
            defaultValue: '1.1.1.1,1.0.0.1,8.8.8.8,8.8.4.4,9.9.9.9,149.112.112.112',
            description: 'Comma-separated DNS resolvers'
        )
        string(
            name: 'HTTPX_THREADS',
            defaultValue: '25',
            description: 'httpx thread count'
        )
        string(
            name: 'HTTPX_TIMEOUT',
            defaultValue: '10',
            description: 'httpx timeout in seconds'
        )
        string(
            name: 'GREP_PATTERNS',
            defaultValue: 'admin|administrator|adminpanel|admin-panel|admin_panel|admincp|cpanel|webadmin|superadmin|siteadmin|login|signin|sign-in|sign_in|logon|log-in|log_in|sso|oauth|openid|saml|ldap|kerberos|portal|dashboard|control-panel|controlpanel|manage|management|manager|console|panel|wp-admin|wp-login|phpmyadmin|adminer|dbadmin|phpinfo|server-status|server-info|auth|authenticate|authentication|authorize|authorization|access-control|rbac|acl|staging|stage|preprod|pre-prod|pre_prod|uat|sit|qa|qe|integration|testing-env|dev|develop|development|sandbox|local|localhost|internal|intranet|corp|corpnet|test|testing|testenv|test-env|demo|poc|pilot|beta|alpha|canary|nightly|feature|hotfix|release|deploy|build|preview|review-app|draft|api|api-v1|api-v2|api-v3|rest|restapi|graphql|grpc|rpc|soap|xmlrpc|endpoint|gateway|proxy|backend|service|microservice|webhook|callback|listener|swagger|openapi|api-docs|jenkins|grafana|kibana|prometheus|elasticsearch|gitlab|github|bitbucket|sonarqube|nexus|artifactory|harbor|docker|kubernetes|rancher|portainer|splunk|datadog|sentry|s3|ec2|lambda|cloudfront|terraform|ansible|vpn|bastion|jump|firewall|loadbalancer|.env|config|secret|secrets|password|passwd|credentials|backup|dump|token|jwt|private|id_rsa|mysql|postgres|redis|mongo|ftp|sftp|smtp|rdp|vnc|ssh|actuator|health|healthcheck|metrics|monitoring',
            description: 'Pipe-separated grep patterns for interesting findings'
        )
        text(
            name: 'HTTPX_BLACKLIST',
            defaultValue: '',
            description: 'Domains/wildcards to skip entirely — not probed by httpx. One per line. Wildcards: *.example.com'
        )
        text(
            name: 'NUCLEI_BLACKLIST',
            defaultValue: '''\
*.hubspot.com
*.twilio.com''',
            description: 'Domains/wildcards to exclude from nuclei queue. One per line. Wildcards: *.example.com'
        )
    }

    environment {
        STATE_BASE   = '/var/jenkins_home/recon-state'
        RUN_DIR      = 'current-run'
        RESOLVED_SRC = '/var/jenkins_home/recon-state/subfinder/all-resolved-latest.txt'
    }

    stages {

        // ── 1. Prepare ────────────────────────────────────────────────────
        stage('Prepare workspace') {
            steps {
                sh '''#!/usr/bin/env bash
set -euo pipefail

echo "[*] Preparing clean workspace..."
rm -rf "$RUN_DIR"
mkdir -p "$RUN_DIR/results"

if [ ! -s "$RESOLVED_SRC" ]; then
    RAW_SRC="$STATE_BASE/subfinder/all-subdomains-latest.txt"
    if [ -s "$RAW_SRC" ]; then
        echo "[!] No resolved list found — falling back to raw subdomain list."
        RESOLVED_SRC="$RAW_SRC"
    else
        echo "[!] No subfinder state found. Run recon-subfinder first."
        exit 1
    fi
fi

TOTAL=$(wc -l < "$RESOLVED_SRC")
echo "[*] Input list:  $RESOLVED_SRC"
echo "[*] Total hosts: $TOTAL"
echo "[*] Chunk size:  ${CHUNK_SIZE}"
'''
            }
        }

        // ── 2. Select chunk ───────────────────────────────────────────────
        stage('Select chunk') {
            steps {
                sh '''#!/usr/bin/env bash
set -euo pipefail

INPUT="$RESOLVED_SRC"
[ ! -s "$INPUT" ] && INPUT="$STATE_BASE/subfinder/all-subdomains-latest.txt"

POINTER="$STATE_BASE/httpx/chunk-pointer.txt"
mkdir -p "$STATE_BASE/httpx"

TOTAL=$(wc -l < "$INPUT")
CHUNK_SIZE="${CHUNK_SIZE:-300}"

OFFSET=0
[ -f "$POINTER" ] && OFFSET=$(cat "$POINTER" | tr -d '[:space:]')
[[ "$OFFSET" =~ ^[0-9]+$ ]] || OFFSET=0
[ "$OFFSET" -ge "$TOTAL" ] && OFFSET=0

echo "[*] Total: $TOTAL | Chunk size: $CHUNK_SIZE | Offset: $OFFSET"

START=$(( OFFSET + 1 ))
{ tail -n "+${START}" "$INPUT" | head -n "$CHUNK_SIZE" \
    > "$RUN_DIR/chunk.txt"; } || true

ACTUAL=$(wc -l < "$RUN_DIR/chunk.txt")
echo "[*] Chunk: $ACTUAL hosts (lines $START to $(( OFFSET + ACTUAL )))"

NEW_OFFSET=$(( OFFSET + ACTUAL ))
[ "$NEW_OFFSET" -ge "$TOTAL" ] && NEW_OFFSET=0
echo "$NEW_OFFSET" > "$POINTER"
echo "[*] Pointer advanced to $NEW_OFFSET"

# ── HTTPX_BLACKLIST filter — remove hosts before probing ─────────────
HTTPX_BL_FILE="$RUN_DIR/httpx-blacklist.txt"
printf '%s\n' "${HTTPX_BLACKLIST}" \
    | sed 's/\r$//' \
    | grep -vE '^[[:space:]]*($|#)' \
    | sed 's/^[[:space:]]*//;s/[[:space:]]*$//' \
    | sort -u > "$HTTPX_BL_FILE" || true

if [ -s "$HTTPX_BL_FILE" ]; then
    BEFORE=$(wc -l < "$RUN_DIR/chunk.txt")
    FILTERED_CHUNK="$RUN_DIR/chunk-filtered.txt"
    : > "$FILTERED_CHUNK"
    while IFS= read -r host; do
        [ -z "$host" ] && continue
        blocked=false
        while IFS= read -r rule; do
            [ -z "$rule" ] && continue
            case "$rule" in
                *.*)
                    suffix="${rule#*.}"
                    host_lower="$(echo "$host" | tr '[:upper:]' '[:lower:]')"
                    suffix_lower="$(echo "$suffix" | tr '[:upper:]' '[:lower:]')"
                    if [ "$host_lower" = "$suffix_lower" ]; then blocked=true; break; fi
                    case "$host_lower" in
                        *."$suffix_lower") blocked=true; break ;;
                    esac
                    ;;
                *)
                    host_lower="$(echo "$host" | tr '[:upper:]' '[:lower:]')"
                    rule_lower="$(echo "$rule" | tr '[:upper:]' '[:lower:]')"
                    if [ "$host_lower" = "$rule_lower" ]; then blocked=true; break; fi
                    ;;
            esac
        done < "$HTTPX_BL_FILE"
        [ "$blocked" = "false" ] && echo "$host" >> "$FILTERED_CHUNK"
    done < "$RUN_DIR/chunk.txt"
    mv "$FILTERED_CHUNK" "$RUN_DIR/chunk.txt"
    AFTER=$(wc -l < "$RUN_DIR/chunk.txt")
    echo "[*] HTTPX_BLACKLIST: removed $(( BEFORE - AFTER )) hosts, $AFTER remaining"
else
    echo "[*] HTTPX_BLACKLIST: empty — no hosts filtered"
fi

ACTUAL=$(wc -l < "$RUN_DIR/chunk.txt")
echo "$OFFSET" > "$RUN_DIR/chunk-offset.txt"
echo "$ACTUAL" > "$RUN_DIR/chunk-actual.txt"
echo "$TOTAL"  > "$RUN_DIR/host-total.txt"
'''
            }
        }

        // ── 3. httpx probe ────────────────────────────────────────────────
        stage('httpx – probe chunk') {
            steps {
                sh '''#!/usr/bin/env bash
set -euo pipefail

command -v httpx >/dev/null 2>&1 || {
    echo "[!] httpx not found. Skipping probe."
    touch "$RUN_DIR/results/httpx-live.txt"
    touch "$RUN_DIR/results/httpx-live.jsonl"
    exit 0
}

CHUNK="$RUN_DIR/chunk.txt"
RDIR="$RUN_DIR/results"

if [ ! -s "$CHUNK" ]; then
    echo "[!] Chunk is empty — nothing to probe."
    touch "$RDIR/httpx-live.txt" "$RDIR/httpx-live.jsonl"
    exit 0
fi

echo "[*] Probing $(wc -l < "$CHUNK") hosts..."

httpx -l "$CHUNK" \
    -silent -sc -title -td -location -cl -rt -fr \
    -nc \
    -r "$RESOLVERS" \
    -t "$HTTPX_THREADS" \
    -timeout "$HTTPX_TIMEOUT" \
    -retries 1 \
    -H "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" \
    -rl 50 \
    | sort -u > "$RDIR/httpx-live.txt" || true

httpx -l "$CHUNK" \
    -silent -json -sc -title -td -location -cl -rt -fr \
    -r "$RESOLVERS" \
    -t "$HTTPX_THREADS" \
    -timeout "$HTTPX_TIMEOUT" \
    -retries 1 \
    -H "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" \
    -rl 50 \
    > "$RDIR/httpx-live.jsonl" || true

echo "[*] Live services found: $(wc -l < "$RDIR/httpx-live.txt")"
'''
            }
        }

        // ── 4. Diff ───────────────────────────────────────────────────────
        //  STATE-FIX: alle Hosts des aktuellen Chunks werden aus dem alten
        //  State entfernt und mit dem frischen Live-State neu geschrieben.
        //  Verhindert dass kaputte Einträge (z.B. CL als Titel) ewig bleiben.
        stage('httpx – diff') {
            steps {
                sh '''#!/usr/bin/env bash
set -euo pipefail

RDIR="$RUN_DIR/results"
HDIR="$STATE_BASE/httpx"
mkdir -p "$HDIR/history"

LIVE="$RDIR/httpx-live.txt"
OLD_STATE="$HDIR/httpx-state-cumulative.txt"
NEW_TXT="$RDIR/httpx-new.txt"
CHANGED_TXT="$RDIR/httpx-changed.txt"
CHANGED_URLS="$RDIR/httpx-changed-urls.txt"
REM_TXT="$RDIR/httpx-removed.txt"
DIFF="$RDIR/httpx-diff.txt"

: > "$NEW_TXT"
: > "$CHANGED_TXT"
: > "$CHANGED_URLS"
: > "$REM_TXT"

OFFSET=$(cat "$RUN_DIR/chunk-offset.txt")
ACTUAL=$(cat "$RUN_DIR/chunk-actual.txt")
TOTAL=$(cat "$RUN_DIR/host-total.txt")

# ── Parse httpx text line → TAB-separated: URL status title tech ──────
parse_state() {
    local infile="$1"
    local outfile="$2"
    : > "$outfile"
    while IFS= read -r ln; do
        [ -z "$ln" ] && continue
        url="$(echo "$ln" | cut -d' ' -f1)"
        rest="${ln#"$url"}"
        status=""; title=""; tech=""
        count=0
        tmp="$rest"
        while true; do
            case "$tmp" in
                *"["*) tmp="${tmp#*[}" ;;
                *) break ;;
            esac
            val="${tmp%%]*}"
            tmp="${tmp#"$val]"}"
            [ -z "$val" ] && continue
            count=$(( count + 1 ))
            if [ "$count" -eq 1 ]; then
                status="$val"
                continue
            fi
            case "$val" in
                *ms) continue ;;
                *[0-9]s) continue ;;
            esac
            case "$val" in
                *[!0-9,]*) : ;;
                *) continue ;;
            esac
            if [ -z "$title" ]; then
                title="$val"
            fi
            tech="$val"
        done
        if [ "$title" = "$tech" ]; then
            title=""
        fi
        printf '%s\t%s\t%s\t%s\n' "$url" "$status" "$title" "$tech"
    done < "$infile" | sort -u >> "$outfile"
}

LIVE_STATE="$RDIR/httpx-live-state.txt"
parse_state "$LIVE" "$LIVE_STATE"

LIVE_URLS="$RDIR/httpx-live-urls.txt"
cut -f1 "$LIVE_STATE" | sort -u > "$LIVE_URLS"

if [ ! -f "$OLD_STATE" ]; then
    echo "[*] No cumulative baseline — creating one."
    cp "$LIVE_STATE" "$OLD_STATE"
    cp "$LIVE" "$NEW_TXT"
    : > "$CHANGED_TXT"
    : > "$REM_TXT"
    {
        echo "========================================"
        echo "  recon-httpx — Initial Baseline"
        echo "========================================"
        echo "Job:       ${JOB_NAME}  #${BUILD_NUMBER}"
        echo "Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
        echo "Chunk:     lines ${OFFSET}-$(( OFFSET + ACTUAL - 1 )) / $TOTAL"
        echo ""
        echo "Live services ($(wc -l < "$LIVE_URLS")):"
        cat "$LIVE"
    } > "$DIFF"
else
    OLD_URLS="$RDIR/httpx-old-urls.txt"
    cut -f1 "$OLD_STATE" | sort -u > "$OLD_URLS"

    # ── NEW ───────────────────────────────────────────────────────────
    NEW_URLS="$RDIR/httpx-new-urls.txt"
    comm -13 "$OLD_URLS" "$LIVE_URLS" > "$NEW_URLS"

    if [ -s "$NEW_URLS" ]; then
        grep -Ff "$NEW_URLS" "$LIVE" | sort -u > "$NEW_TXT" || : > "$NEW_TXT"
    else
        : > "$NEW_TXT"
    fi

    # ── CHANGED ───────────────────────────────────────────────────────
    KNOWN_URLS="$RDIR/httpx-known-urls.txt"
    comm -12 "$OLD_URLS" "$LIVE_URLS" > "$KNOWN_URLS"

    : > "$CHANGED_TXT"
    : > "$CHANGED_URLS"

    if [ -s "$KNOWN_URLS" ]; then
        while IFS= read -r url; do
            OLD_LINE="$(grep -F "$url	" "$OLD_STATE" | head -1 || true)"
            NEW_LINE="$(grep -F "$url	" "$LIVE_STATE" | head -1 || true)"
            if [ -n "$OLD_LINE" ] && [ -n "$NEW_LINE" ] && [ "$OLD_LINE" != "$NEW_LINE" ]; then
                OS="$(echo "$OLD_LINE" | cut -f2)"
                NS="$(echo "$NEW_LINE" | cut -f2)"
                OT="$(echo "$OLD_LINE" | cut -f3)"
                NT="$(echo "$NEW_LINE" | cut -f3)"
                OK="$(echo "$OLD_LINE" | cut -f4)"
                NK="$(echo "$NEW_LINE" | cut -f4)"
                echo "$url" >> "$CHANGED_TXT"
                echo "$url" >> "$CHANGED_URLS"
                [ "$OS" != "$NS" ] && echo "  status: $OS -> $NS" >> "$CHANGED_TXT"
                [ "$OT" != "$NT" ] && echo "  title:  $OT -> $NT" >> "$CHANGED_TXT"
                [ "$OK" != "$NK" ] && echo "  tech:   $OK -> $NK" >> "$CHANGED_TXT"
            fi
        done < "$KNOWN_URLS"
    fi

    # ── REMOVED ───────────────────────────────────────────────────────
    PREV_CHUNK_LIVE="$RDIR/httpx-prev-chunk-live-urls.txt"
    : > "$PREV_CHUNK_LIVE"
    while IFS= read -r host; do
        grep -xF "https://${host}" "$OLD_URLS" >> "$PREV_CHUNK_LIVE" || true
        grep -xF "http://${host}"  "$OLD_URLS" >> "$PREV_CHUNK_LIVE" || true
    done < "$RUN_DIR/chunk.txt"
    sort -u "$PREV_CHUNK_LIVE" -o "$PREV_CHUNK_LIVE"
    comm -23 "$PREV_CHUNK_LIVE" "$LIVE_URLS" > "$REM_TXT" || : > "$REM_TXT"

    NEW_COUNT=$(wc -l < "$NEW_URLS")
    CHANGED_COUNT=$(wc -l < "$CHANGED_URLS")
    REM_COUNT=$(wc -l < "$REM_TXT")

    {
        echo "========================================"
        echo "  recon-httpx — Diff Report"
        echo "========================================"
        echo "Job:       ${JOB_NAME}  #${BUILD_NUMBER}"
        echo "Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
        echo "Chunk:     lines ${OFFSET}-$(( OFFSET + ACTUAL - 1 )) / $TOTAL"
        echo "Probed:    $ACTUAL  |  Live: $(wc -l < "$LIVE_URLS")"
        echo ""
        echo "=== NEW hosts (${NEW_COUNT}) ==="
        cat "$NEW_TXT"
        echo ""
        echo "=== CHANGED hosts (${CHANGED_COUNT}) ==="
        cat "$CHANGED_TXT"
        echo ""
        echo "=== REMOVED from this chunk (${REM_COUNT}) ==="
        cat "$REM_TXT"
    } > "$DIFF"

    # ── STATE-FIX: alle Chunk-Hosts aus State entfernen und neu schreiben
    # Entferne alle URLs des aktuellen Chunks aus dem alten State
    # (nicht nur NEW+CHANGED+REMOVED) damit kaputte Einträge sich heilen
    CHUNK_URLS="$RDIR/httpx-chunk-urls.txt"
    : > "$CHUNK_URLS"
    while IFS= read -r host; do
        echo "https://${host}" >> "$CHUNK_URLS"
        echo "http://${host}"  >> "$CHUNK_URLS"
    done < "$RUN_DIR/chunk.txt"
    sort -u "$CHUNK_URLS" -o "$CHUNK_URLS"

    grep -vFf "$CHUNK_URLS" "$OLD_STATE" > "$RDIR/httpx-state-kept.txt" || true

    {
        cat "$RDIR/httpx-state-kept.txt"
        cat "$LIVE_STATE"
    } | sort -u > "$OLD_STATE"
fi

cp "$LIVE" "$HDIR/history/build-${BUILD_NUMBER}.txt"

# ── Daily digest ──────────────────────────────────────────────────────
TODAY="$(date -u +%Y-%m-%d)"
DIGEST="$HDIR/daily-digest.txt"
DIGEST_DATE="$HDIR/daily-digest-date.txt"

if [ -f "$DIGEST_DATE" ]; then
    LAST_DATE="$(cat "$DIGEST_DATE")"
    if [ "$LAST_DATE" != "$TODAY" ]; then
        : > "$DIGEST"
        echo "$TODAY" > "$DIGEST_DATE"
    fi
else
    : > "$DIGEST"
    echo "$TODAY" > "$DIGEST_DATE"
fi

if [ -s "$NEW_TXT" ] || [ -s "$CHANGED_TXT" ]; then
    {
        echo ""
        echo "--- Build #${BUILD_NUMBER} $(date -u +%H:%M:%SZ) ---"
        if [ -s "$NEW_TXT" ]; then
            echo "[NEW]"
            cat "$NEW_TXT"
        fi
        if [ -s "$CHANGED_TXT" ]; then
            echo "[CHANGED]"
            cat "$CHANGED_TXT"
        fi
    } >> "$DIGEST"
fi

NEW_COUNT=$(wc -l < "$NEW_TXT")
CHANGED_COUNT=$(wc -l < "$CHANGED_URLS" 2>/dev/null || echo 0)
REM_COUNT=$(wc -l < "$REM_TXT")

{
    echo "job=${JOB_NAME}"
    echo "build=${BUILD_NUMBER}"
    echo "timestamp=$(date -u +%Y-%m-%dT%H:%M:%SZ)"
    echo "offset=${OFFSET}"
    echo "chunk_size=${ACTUAL}"
    echo "total_hosts=${TOTAL}"
    echo "live=$(wc -l < "$LIVE_URLS")"
    echo "new=${NEW_COUNT}"
    echo "changed=${CHANGED_COUNT}"
    echo "removed=${REM_COUNT}"
} > "$HDIR/metadata.txt"

echo
echo "[*] httpx diff complete."
echo "    Live:    $(wc -l < "$LIVE_URLS")"
echo "    New:     ${NEW_COUNT}"
echo "    Changed: ${CHANGED_COUNT}"
echo "    Removed: ${REM_COUNT}"
'''
            }
        }

        // ── 5. Grep – interesting findings ───────────────────────────────
        stage('grep – interesting findings') {
            steps {
                sh '''#!/usr/bin/env bash
set -euo pipefail

LIVE="$RUN_DIR/results/httpx-live.txt"
JSON="$RUN_DIR/results/httpx-live.jsonl"
RDIR="$RUN_DIR/results"
PATTERNS="${GREP_PATTERNS:-admin|login|staging|dev|test|api}"

echo "[*] Scanning for interesting patterns..."

GREP_TXT="$RDIR/grep-interesting.txt"
if [ -s "$LIVE" ]; then
    grep -iE "$PATTERNS" "$LIVE" | sort -u > "$GREP_TXT" || : > "$GREP_TXT"
else
    : > "$GREP_TXT"
fi

GREP_JSON="$RDIR/grep-interesting.jsonl"
if [ -s "$JSON" ] && command -v jq >/dev/null 2>&1; then
    jq -c --arg p "$PATTERNS" \
        'select(
            (.url         // "" | test($p; "i")) or
            (.title       // "" | test($p; "i")) or
            ((.tech // []) | map(.) | join(" ") | test($p; "i"))
        )' "$JSON" | sort -u > "$GREP_JSON" || : > "$GREP_JSON"
else
    grep -iE "$PATTERNS" "$JSON" 2>/dev/null | sort -u > "$GREP_JSON" \
        || : > "$GREP_JSON"
fi

GREP_SUBS="$RDIR/grep-interesting-subdomains.txt"
grep -iE "$PATTERNS" "$RUN_DIR/chunk.txt" | sort -u > "$GREP_SUBS" \
    || : > "$GREP_SUBS"

echo "[*] Text hits:      $(wc -l < "$GREP_TXT")"
echo "[*] JSON hits:      $(wc -l < "$GREP_JSON")"
echo "[*] Subdomain hits: $(wc -l < "$GREP_SUBS")"

if [ -s "$GREP_TXT" ]; then
    echo ""
    echo "[*] Top 30 interesting (text):"
    head -n 30 "$GREP_TXT"
fi
'''
            }
        }

        // ── 6. Queue – NEWs und CHANGEDs in nuclei-queue.txt schreiben ───
        //  - Blacklist-Filter (Wildcards via *.domain.com)
        //  - sort -u verhindert Duplicates
        //  - nuclei-Job liest Queue eigenständig per Cron
        stage('Queue – feed nuclei') {
            steps {
                sh '''#!/usr/bin/env bash
set -euo pipefail

RDIR="$RUN_DIR/results"
NDIR="$STATE_BASE/nuclei"
mkdir -p "$NDIR"

QUEUE="$NDIR/nuclei-queue.txt"
NEW_URLS="$RDIR/httpx-new-urls.txt"
CHANGED_URLS="$RDIR/httpx-changed-urls.txt"

touch "$QUEUE"

# ── Kandidaten: NEWs + CHANGEDs ──────────────────────────────────────
CANDIDATES="$RDIR/nuclei-candidates.txt"
{
    cat "$NEW_URLS"    2>/dev/null || true
    cat "$CHANGED_URLS" 2>/dev/null || true
} | sort -u > "$CANDIDATES"

if [ ! -s "$CANDIDATES" ]; then
    echo "[*] No new/changed hosts — nothing to queue."
    exit 0
fi

echo "[*] Candidates before blacklist: $(wc -l < "$CANDIDATES")"

# ── Blacklist-Filter ──────────────────────────────────────────────────
# Parst NUCLEI_BLACKLIST (eine Regel pro Zeile)
# Wildcards: *.example.com  → matched alle subdomains
# Exakt:     sub.example.com → nur genau dieser host
BLACKLIST_FILE="$RDIR/nuclei-blacklist.txt"
printf '%s\n' "${NUCLEI_BLACKLIST}" \
    | sed 's/\r$//' \
    | grep -vE '^[[:space:]]*($|#)' \
    | sed 's/^[[:space:]]*//;s/[[:space:]]*$//' \
    | sort -u > "$BLACKLIST_FILE" || true

FILTERED="$RDIR/nuclei-filtered.txt"
: > "$FILTERED"

if [ -s "$BLACKLIST_FILE" ]; then
    while IFS= read -r candidate; do
        [ -z "$candidate" ] && continue
        host="${candidate#http://}"; host="${host#https://}"; host="${host%%/*}"; host="${host%%:*}"
        blocked=false
        while IFS= read -r rule; do
            [ -z "$rule" ] && continue
            case "$rule" in
                *.*)
                    suffix="${rule#*.}"
                    host_lower="$(echo "$host" | tr '[:upper:]' '[:lower:]')"
                    suffix_lower="$(echo "$suffix" | tr '[:upper:]' '[:lower:]')"
                    if [ "$host_lower" = "$suffix_lower" ]; then blocked=true; break; fi
                    case "$host_lower" in
                        *."$suffix_lower") blocked=true; break ;;
                    esac
                    ;;
                *)
                    host_lower="$(echo "$host" | tr '[:upper:]' '[:lower:]')"
                    rule_lower="$(echo "$rule" | tr '[:upper:]' '[:lower:]')"
                    if [ "$host_lower" = "$rule_lower" ]; then blocked=true; break; fi
                    ;;
            esac
        done < "$BLACKLIST_FILE"
        if [ "$blocked" = "false" ]; then
            echo "$candidate" >> "$FILTERED"
        fi
    done < "$CANDIDATES"
else
    cp "$CANDIDATES" "$FILTERED"
fi

echo "[*] Candidates after blacklist:  $(wc -l < "$FILTERED")"

if [ ! -s "$FILTERED" ]; then
    echo "[*] All candidates blacklisted — nothing to queue."
    exit 0
fi

# ── In Queue schreiben, Duplicates verhindern ─────────────────────────
ADDED_BEFORE=$(wc -l < "$QUEUE")
{ cat "$QUEUE"; cat "$FILTERED"; } | sort -u > "${QUEUE}.tmp"
mv "${QUEUE}.tmp" "$QUEUE"
ADDED_AFTER=$(wc -l < "$QUEUE")
ADDED=$(( ADDED_AFTER - ADDED_BEFORE ))

echo "[*] Added to queue: $ADDED"
echo "[*] Queue total:    $(wc -l < "$QUEUE")"
'''
            }
        }

        // ── 7. Archive ────────────────────────────────────────────────────
        stage('Archive results') {
            steps {
                archiveArtifacts artifacts: [
                    'current-run/**/*.txt',
                    'current-run/**/*.jsonl'
                ].join(','), fingerprint: true
            }
        }
    }

    post {
        always { echo "[*] recon-httpx finished." }
        failure { echo "[!] recon-httpx FAILED." }
    }
}