commit for version used in evaluation of thesis

2026-03-29 10:03:18 +02:00 · 2026-03-29 10:03:18 +02:00 · 72635dc7b9
commit 72635dc7b9
27 changed files with 6084 additions and 0 deletions
--- a/50
+++ b/50
@ -0,0 +1,50 @@
+BINARY     := guenther
+BUILD_DIR  := build
+CMD        := ./cmd/pipeline/main.go
+CONFIG     := configs/default.yaml
+
+GO_IMAGE   := golang:bookworm
+BUILD_TAGS := duckdb_arrow
+LDFLAGS    := -s -w
+
+GO_BUILD_FLAGS := -tags=$(BUILD_TAGS) -buildvcs=false -ldflags='$(LDFLAGS)'
+
+# ── Targets ───────────────────────────────────────────────────────────────────
+
+.PHONY: all build build-local test clean run help
+
+all: build
+
+## build: Build the binary inside a Docker container (no local toolchain needed)
+build:
+	@mkdir -p $(BUILD_DIR)
+	docker run --rm \
+		-v $(PWD):/app:Z \
+		-w /app \
+		$(GO_IMAGE) \
+		sh -c "apt-get update -qq && \
+		       apt-get install -y -qq gcc libc6-dev && \
+		       CGO_ENABLED=1 go build $(GO_BUILD_FLAGS) -o $(BUILD_DIR)/$(BINARY) $(CMD) && \
+		       echo BUILD_OK" \
+		2>&1
+
+## build-local: Build the binary using the local Go toolchain (requires gcc)
+build-local:
+	@mkdir -p $(BUILD_DIR)
+	CGO_ENABLED=1 go build $(GO_BUILD_FLAGS) -o $(BUILD_DIR)/$(BINARY) $(CMD)
+
+## test: Run all tests (requires local Go toolchain with gcc)
+test:
+	CGO_ENABLED=1 go test -v -tags=$(BUILD_TAGS) ./...
+
+## run: Run the pipeline with the default config (binary must be built first)
+run: $(BUILD_DIR)/$(BINARY)
+	./$(BUILD_DIR)/$(BINARY) -config $(CONFIG)
+
+## clean: Remove build artefacts
+clean:
+	rm -rf $(BUILD_DIR)
+
+## help: Show this help message
+help:
+	@grep -E '^## ' $(MAKEFILE_LIST) | sed 's/^## /  /'
--- a/README.md
+++ b/README.md
@ -0,0 +1,212 @@
+# guenther
+
+A streaming anomaly detection pipeline for Managed-File-Transfer (MFT) infrastructure.
+guenther ingests system metrics and application logs in real time, extracts structured
+feature vectors per time window, and scores them with an ensemble of unsupervised
+detectors — without any labelled training data.
+
+---
+
+## How it works
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│  Ingestion                                                  │
+│  MetricCollector (/proc)  LogCollector (inotify + Drain3)  │
+│  SystemctlCollector (service states)                        │
+└────────────────────┬────────────────────────────────────────┘
+                     │ channels (backpressure)
+┌────────────────────▼────────────────────────────────────────┐
+│  Transformation                                             │
+│  TransformEngine  –  30 s tumbling windows via DuckDB       │
+│  45 base features + N Drain3 parameter aggregates           │
+└────────────────────┬────────────────────────────────────────┘
+                     │
+┌────────────────────▼────────────────────────────────────────┐
+│  Detection                                                  │
+│  EnsembleDetector  (RRCF fast/mid/slow · COPOD · MAD)       │
+│  SEAD online weight adaptation · auto-scaling (3 stages)   │
+└────────────────────┬────────────────────────────────────────┘
+                     │
+              anomalies.jsonl
+```
+
+### Packages
+
+| Path                 | Responsibility                                                                   |
+| -------------------- | -------------------------------------------------------------------------------- |
+| `cmd/pipeline`       | Entry point, wiring, graceful shutdown                                           |
+| `internal/collector` | `MetricCollector` (`/proc`), `LogCollector` (inotify), `SystemctlCollector`      |
+| `internal/transform` | `TransformEngine` — DuckDB windowed aggregation                                  |
+| `internal/detect`    | `EnsembleDetector`, RRCF, COPOD, MAD, IsolationForest, SEAD, `ScalingController` |
+| `internal/drain3`    | Masking / parameter extraction wrapper around Drain3                             |
+| `internal/config`    | YAML config loading and regex compilation                                        |
+| `internal/health`    | `HealthMonitor` — per-stage counters                                             |
+| `pkg/types`          | Shared types: `LogEvent`, `MetricSnapshot`, `FeatureVector`, `AnomalyResult`     |
+
+---
+
+## Requirements
+
+| Dependency      | Notes                                                        |
+| --------------- | ------------------------------------------------------------ |
+| Docker          | Required for the containerised build (recommended)           |
+| Go ≥ 1.25       | Only needed for local builds                                 |
+| gcc / libc6-dev | CGO is required by `go-duckdb`                               |
+| Linux           | Metric collection reads `/proc`; not supported on other OSes |
+
+---
+
+## Building
+
+### Docker (recommended — no local toolchain needed)
+
+```bash
+make build
+```
+
+The binary is written to `build/guenther`.
+
+### Local (requires Go + gcc)
+
+```bash
+make build-local
+```
+
+---
+
+## Running
+
+```bash
+./build/guenther -config configs/default.yaml
+```
+
+guenther shuts down cleanly on `SIGINT` or `SIGTERM`.
+
+---
+
+## Testing
+
+```bash
+make test
+```
+
+---
+
+## Configuration
+
+guenther is configured via a single YAML file (default: `configs/default.yaml`).
+
+```yaml
+ingestion:
+  log_path: "/path/to/log/file/transfer.log" # file to tail
+  net_interface: "ens4" # interface for /proc/net/dev
+  disk_device: "vda1" # device for /proc/diskstats
+  systemctl_services:
+    - service1.service
+    - service2.service
+
+transformation:
+  window_size: "30s" # tumbling window length
+  db_path: "data/pipeline.duckdb" # DuckDB file (use :memory: for ephemeral)
+
+drain:
+  depth: 4
+  sim_threshold: 0.4
+  max_children: 100
+  max_clusters: 1000
+  masking_patterns: # applied in order before template mining
+    - name: "uuid"
+      pattern: '\b[0-9a-fA-F]{8}-...\b'
+      replace: "<UUID>"
+      type: "string"
+    # ... see configs/default.yaml for the full set
+
+detector:
+  method: "ensemble" # fallback when ensemble.enabled = false
+  ensemble:
+    enabled: true
+    method: "sead" # avg | max | median | sead
+    contamination: 0.15
+    sead:
+      eta: 0.1
+      lambda: 0.01
+  auto_scaling:
+    enabled: true
+    high_threshold: 75.0 # CPU % → switch to mid detector
+    critical_threshold: 90.0 # CPU % → switch to fast detector
+    down_threshold: 50.0
+    high_duration: 90.0 # seconds load must persist before scaling
+    critical_duration: 120.0
+    down_duration: 120.0
+  rrcf_variants:
+    fast: { num_trees: 50, tree_size: 32, threshold_percentile: 0.85 }
+    mid: { num_trees: 150, tree_size: 64, threshold_percentile: 0.85 }
+    slow: { num_trees: 200, tree_size: 128, threshold_percentile: 0.85 }
+  copod:
+    buffer_size: 50
+    threshold: 0.3
+  mad:
+    threshold: 3.5
+    calibration_size: 50
+
+output:
+  feature_log_path: "logs/features.jsonl"
+  anomaly_log_path: "logs/anomalies.jsonl"
+```
+
+### Masking pattern types
+
+Patterns with `type: float` extract a named parameter into `FeatureVector.ParamAvg`;
+patterns with `type: string` replace the match in-place before template mining.
+Named patterns (`name != ""`) are aggregated as features per window.
+
+---
+
+## Output
+
+**`logs/anomalies.jsonl`** — one JSON object per scored window:
+
+```json
+{
+  "timestamp": "2026-01-15T14:32:00Z",
+  "score": 0.8721,
+  "is_anomaly": true,
+  "confidence": 0.91,
+  "method": "sead_ensemble",
+  "details": "rrcf_slow=0.91 copod=0.83 mad=0.78"
+}
+```
+
+**`logs/features.jsonl`** — raw feature vectors for offline analysis (optional).
+
+---
+
+## Project layout
+
+```
+guenther/
+├── cmd/
+│   └── pipeline/
+│       └── main.go
+├── internal/
+│   ├── collector/
+│   ├── config/
+│   ├── detect/
+│   ├── drain3/
+│   ├── health/
+│   └── transform/
+├── pkg/
+│   └── types/
+├── configs/
+│   └── default.yaml
+├── build/              # created by `make build`
+├── Makefile
+└── README.md
+```
+
+---
+
+## License
+
+This project was developed as part of a Bachelor's thesis.
--- a/cmd/pipeline/main.go
+++ b/cmd/pipeline/main.go
@ -0,0 +1,294 @@
+// Command pipeline is the entry point for the MFT anomaly detection pipeline.
+//
+// Startup order:
+//  1. Load and compile config (masking patterns → *regexp.Regexp).
+//  2. Allocate channels with fixed capacities to enable backpressure.
+//  3. Start HealthMonitor.
+//  4. Start collectors (MetricCollector, LogCollector).
+//  5. Start TransformEngine (DuckDB, schema, pre-compiled query).
+//  6. Start DetectionLayer.
+//  7. Start anomaly sink goroutine.
+//  8. Wait for SIGINT / SIGTERM.
+//  9. Graceful shutdown in reverse order.
+package main
+
+import (
+	"bufio"
+	"context"
+	"encoding/json"
+	"flag"
+	"fmt"
+	"log"
+	"os"
+	"os/signal"
+	"sync"
+	"syscall"
+	"time"
+
+	"codeberg.org/pata1704/guenther/internal/collector"
+	"codeberg.org/pata1704/guenther/internal/config"
+	"codeberg.org/pata1704/guenther/internal/detect"
+	"codeberg.org/pata1704/guenther/internal/health"
+	"codeberg.org/pata1704/guenther/internal/transform"
+	"codeberg.org/pata1704/guenther/pkg/types"
+)
+
+func main() {
+	cfgPath := flag.String("config", "configs/default.yaml", "path to config file")
+	flag.Parse()
+
+	cfg, err := config.LoadConfig(*cfgPath)
+	if err != nil {
+		log.Fatalf("load config %q: %v", *cfgPath, err)
+	}
+	if err := cfg.Compile(); err != nil {
+		log.Fatalf("compile masking patterns: %v", err)
+	}
+
+	ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
+	defer cancel()
+
+	logChan := make(chan types.LogEvent, 1_000)
+	metricChan := make(chan types.MetricSnapshot, 100)
+	serviceStatusChan := make(chan types.ServiceStatus, 100)
+	featureChan := make(chan types.FeatureVector, 10)
+	anomalyChan := make(chan types.AnomalyResult, 50)
+
+	hm := health.NewHealthMonitor()
+	hm.Start(ctx, 5*time.Second)
+
+	metricColl := collector.NewMetricCollector(
+		metricChan, hm.Chan(),
+		time.Second,
+		cfg.Ingestion.NetInterface,
+		cfg.Ingestion.DiskDevice,
+	)
+	logColl := collector.NewLogCollector(cfg, logChan, hm.Chan())
+	sysColl := collector.NewSystemctlCollector(
+		cfg.Ingestion.SystemctlServices,
+		5*time.Second,
+		serviceStatusChan,
+		hm.Chan(),
+	)
+
+	metricColl.Start(ctx)
+	if err := logColl.Start(ctx); err != nil {
+		log.Fatalf("start log collector: %v", err)
+	}
+	sysColl.Start(ctx)
+
+	engine, err := transform.NewTransformEngine(cfg, logChan, metricChan, serviceStatusChan, featureChan, hm.Chan())
+	if err != nil {
+		log.Fatalf("create transform engine: %v", err)
+	}
+	engine.Start(ctx)
+
+	detector, err := buildDetector(cfg)
+	if err != nil {
+		log.Fatalf("build detector: %v", err)
+	}
+	detLayer := detect.NewDetectionLayer(detector, featureChan, anomalyChan, hm.Chan())
+
+	if cfg.Detection.AutoScaling.Enabled {
+		if sd, ok := detector.(*detect.SwitchableDetector); ok {
+			sc := detect.NewScalingController(
+				sd,
+				cfg.Detection.AutoScaling.HighThreshold,
+				cfg.Detection.AutoScaling.CritThreshold,
+				cfg.Detection.AutoScaling.DownThreshold,
+				cfg.Detection.AutoScaling.HighDuration,
+				cfg.Detection.AutoScaling.CritDuration,
+				cfg.Detection.AutoScaling.DownDuration,
+			)
+			detLayer.SetScalingController(sc)
+			log.Println("detector: auto-scaling enabled")
+		} else {
+			log.Println("warning: auto-scaling requested but detector is not switchable (requires SEAD ensemble)")
+		}
+	}
+
+	detLayer.Start(ctx)
+
+	anomalyLog := openLog(cfg.Output.AnomalyLogPath, "anomaly log")
+	if anomalyLog != nil {
+		defer anomalyLog.Close()
+	}
+	anomalyWriter := maybeWriter(anomalyLog)
+
+	var sinkWg sync.WaitGroup
+	sinkWg.Add(1)
+	go func() {
+		defer sinkWg.Done()
+		for res := range anomalyChan {
+			writeJSON(anomalyWriter, res)
+			if res.IsAnomaly {
+				log.Printf("[ANOMALY] time=%s score=%.4f method=%s details=%s",
+					res.Timestamp.Format(time.RFC3339), res.Score, res.Method, res.Details)
+			}
+		}
+	}()
+
+	// Optionally log SEAD weights periodically (when using SEAD ensemble).
+	if ens, ok := detector.(*detect.EnsembleDetector); ok {
+		go func() {
+			t := time.NewTicker(60 * time.Second)
+			defer t.Stop()
+			for {
+				select {
+				case <-ctx.Done():
+					return
+				case <-t.C:
+					if ws := ens.WeightSummary(); ws != "" {
+						log.Printf("[SEAD weights] %s", ws)
+					}
+				}
+			}
+		}()
+	}
+
+	log.Println("pipeline started – waiting for SIGINT / SIGTERM")
+	<-ctx.Done()
+	log.Println("shutting down…")
+
+	metricColl.Wait()
+	logColl.Wait()
+	engine.Wait()
+
+	close(featureChan)
+	detLayer.Wait()
+
+	close(anomalyChan)
+	sinkWg.Wait()
+
+	hm.Wait()
+	log.Println("pipeline stopped")
+}
+
+// buildDetector constructs the configured AnomalyDetector.
+//
+// Routing:
+//  1. detector.ensemble.enabled = true → EnsembleDetector with the method
+//     specified by detector.ensemble.method ("avg"|"max"|"median"|"sead").
+//  2. Otherwise fall through to detector.method ("copod"|"rrcf"|"isolation_forest").
+func buildDetector(cfg *config.Config) (detect.AnomalyDetector, error) {
+	if cfg.Detection.Ensemble.Enabled {
+		method := detect.EnsembleMethod(cfg.Detection.Ensemble.Method)
+		if method == "" {
+			method = detect.EnsembleAVG // backward-compat default
+		}
+
+		// Map SEAD config from YAML to detect.SEADConfig.
+		seadCfg := detect.SEADConfig{
+			Eta:            cfg.Detection.Ensemble.SEAD.Eta,
+			Lambda:         cfg.Detection.Ensemble.SEAD.Lambda,
+			QuantileWindow: cfg.Detection.Ensemble.SEAD.QuantileWindow,
+			MinDataPoints:  cfg.Detection.Ensemble.SEAD.MinDataPoints,
+			Contamination:  cfg.Detection.Ensemble.Contamination,
+		}
+		// Apply defaults for zero-value fields.
+		if seadCfg.Eta == 0 {
+			seadCfg.Eta = 0.10
+		}
+		if seadCfg.QuantileWindow == 0 {
+			seadCfg.QuantileWindow = 300
+		}
+		if seadCfg.MinDataPoints == 0 {
+			seadCfg.MinDataPoints = 20
+		}
+
+		det, err := detect.NewEnsembleDetector(
+			method,
+			cfg.Detection.COPOD.BufferSize,
+			cfg.Detection.COPOD.Threshold,
+			detect.RRCFVariantsConfig{
+				Fast: detect.RRCFVariantConfig{
+					NumTrees:            cfg.Detection.RRCFVariants.Fast.NumTrees,
+					TreeSize:            cfg.Detection.RRCFVariants.Fast.TreeSize,
+					ThresholdPercentile: cfg.Detection.RRCFVariants.Fast.ThresholdPercentile,
+				},
+				Mid: detect.RRCFVariantConfig{
+					NumTrees:            cfg.Detection.RRCFVariants.Mid.NumTrees,
+					TreeSize:            cfg.Detection.RRCFVariants.Mid.TreeSize,
+					ThresholdPercentile: cfg.Detection.RRCFVariants.Mid.ThresholdPercentile,
+				},
+				Slow: detect.RRCFVariantConfig{
+					NumTrees:            cfg.Detection.RRCFVariants.Slow.NumTrees,
+					TreeSize:            cfg.Detection.RRCFVariants.Slow.TreeSize,
+					ThresholdPercentile: cfg.Detection.RRCFVariants.Slow.ThresholdPercentile,
+				},
+			},
+			cfg.Detection.Ensemble.Contamination,
+			seadCfg,
+		)
+		if err != nil {
+			return nil, fmt.Errorf("build ensemble detector (%s): %w", method, err)
+		}
+		log.Printf("detector: Ensemble method=%s contamination=%.2f", method, cfg.Detection.Ensemble.Contamination)
+		if method == detect.EnsembleSEAD {
+			log.Printf("detector: SEAD η=%.3f λ=%.3f quantile_window=%d",
+				seadCfg.Eta, seadCfg.Lambda, seadCfg.QuantileWindow)
+
+			// Wrap in SwitchableDetector if using SEAD (required for 3-stage scaling).
+			if sead := det.SEAD(); sead != nil {
+				return detect.NewSwitchableDetector(sead), nil
+			}
+		}
+		return det, nil
+	}
+
+	switch cfg.Detection.Method {
+	case "copod":
+		return detect.NewCOPODDetector(
+			cfg.Detection.COPOD.BufferSize,
+			cfg.Detection.COPOD.Threshold,
+		)
+	case "rrcf":
+		return detect.NewRRCFDetector(
+			cfg.Detection.RRCF.NumTrees,
+			cfg.Detection.RRCF.TreeSize,
+			0,
+			cfg.Detection.RRCF.ThresholdPercentile,
+		), nil
+	default: // "isolation_forest"
+		return detect.NewIsolationForestDetector(
+			5_000, 100, 100, 256, 0.05, 10.0,
+		), nil
+	}
+}
+
+func openLog(path, label string) *os.File {
+	if path == "" {
+		return nil
+	}
+	f, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644)
+	if err != nil {
+		log.Printf("warning: cannot open %s %q: %v", label, path, err)
+		return nil
+	}
+	return f
+}
+
+func maybeWriter(f *os.File) *bufio.Writer {
+	if f == nil {
+		return nil
+	}
+	return bufio.NewWriterSize(f, 64*1024)
+}
+
+func writeJSON(w *bufio.Writer, v any) {
+	if w == nil {
+		return
+	}
+	b, err := json.Marshal(v)
+	if err != nil {
+		log.Printf("marshal: %v", err)
+		return
+	}
+	if _, err := w.Write(append(b, '\n')); err != nil {
+		log.Printf("write log: %v", err)
+		return
+	}
+	if err := w.Flush(); err != nil {
+		log.Printf("flush log: %v", err)
+	}
+}
--- a/configs/default.yaml
+++ b/configs/default.yaml
@ -0,0 +1,123 @@
+ingestion:
+  log_path: "/path/to/log/file/transfer.log"
+  net_interface: "ens4"
+  disk_device: "vda1"
+  systemctl_services:
+    - service1.service
+    - service2.service
+
+transformation:
+  window_size: "30s"
+  db_path: "data/pipeline_test.duckdb"
+
+drain:
+  depth: 4
+  sim_threshold: 0.4
+  max_children: 100
+  max_clusters: 1000
+  masking_patterns:
+    - name: "loglevel"
+      pattern: '^(\S+)'
+      replace: "<LOGLEVEL>"
+      type: "string"
+
+    - name: ""
+      pattern: '(\d{4}-\d{2}-\d{2})'
+      replace: "<DATE>"
+      type: "string"
+
+    - name: ""
+      pattern: '(\d{2}:\d{2}:\d{2}\.\d{6})'
+      replace: "<TIME>"
+      type: "string"
+
+    - name: "uuid"
+      pattern: '\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b'
+      replace: "<UUID>"
+      type: "string"
+
+    - name: ""
+      pattern: '\+\]'
+      replace: "<SESSION>"
+      type: "string"
+
+    - name: ""
+      pattern: "(/[a-zA-Z0-9._-]+)+"
+      replace: "<PATH>"
+      type: "string"
+
+    - name: ""
+      pattern: '(sync-file-reader|checksum|xp-network-(?:sender|receiver)|aes-crypt)-\d+:'
+      replace: "<MODULE>:"
+      type: "string"
+
+    - name: "datarate"
+      pattern: 'datarate=\s*(\d+(?:\.\d+)?)'
+      replace: "<datarate>"
+      type: "float"
+
+    - name: "duration"
+      pattern: 'duration=\s*(\d+(?:\.\d+)?)'
+      replace: "<duration>"
+      type: "float"
+
+    - name: "throughput"
+      pattern: 'throughput=\s*(\d+(?:\.\d+)?)'
+      replace: "<throughput>"
+      type: "float"
+
+    - name: "filesize"
+      pattern: '(\d+(?:\.\d+)?)\s*(?:MByte|GByte|MiB|GiB|GB|MB|KB)'
+      replace: "<filesize>"
+      type: "float"
+
+    - name: "hostport"
+      pattern: '([a-zA-Z0-9.-]+:\d+)'
+      replace: "<HOSTPORT>"
+      type: "string"
+
+    - name: ""
+      pattern: '\b(\d+(?:\.\d+)?)\b'
+      replace: "<NUM>"
+      type: "float"
+
+detector:
+  method: "ensemble"
+  ensemble:
+    enabled: true
+    method: "sead"
+    contamination: 0.15
+    sead:
+      eta: 0.1
+      lambda: 0.01
+  auto_scaling:
+    enabled: true
+    high_threshold: 75.0
+    critical_threshold: 90.
+    high_duration: 90.0
+    critical_duration: 120.0
+    down_threshold: 50.0
+    down_duration: 120.0
+  rrcf_variants:
+    fast:
+      num_trees: 50
+      tree_size: 32
+      threshold_percentile: 0.85
+    mid:
+      num_trees: 150
+      tree_size: 64
+      threshold_percentile: 0.85
+    slow:
+      num_trees: 200
+      tree_size: 128
+      threshold_percentile: 0.85
+  copod:
+    buffer_size: 50
+    threshold: 0.3
+  mad:
+    threshold: 3.5
+    calibration_size: 50
+
+output:
+  feature_log_path: "logs/features.jsonl"
+  anomaly_log_path: "logs/anomalies.jsonl"
--- a/go.mod
+++ b/go.mod
@ -0,0 +1,49 @@
+module codeberg.org/pata1704/guenther
+
+go 1.25.5
+
+require (
+	codeberg.org/pata1704/copod v0.0.0-20260308082005-aded842ae0c1
+	codeberg.org/pata1704/drain3 v1.0.0
+	codeberg.org/pata1704/rrcf v0.0.0-20260305123746-25e149fa69ba
+	github.com/apache/arrow-go/v18 v18.5.1
+	github.com/duckdb/duckdb-go/v2 v2.5.5
+	github.com/e-XpertSolutions/go-iforest v1.0.0
+	github.com/fsnotify/fsnotify v1.9.0
+	github.com/stretchr/testify v1.11.1
+	gopkg.in/yaml.v3 v3.0.1
+)
+
+require (
+	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
+	github.com/duckdb/duckdb-go-bindings v0.3.3 // indirect
+	github.com/duckdb/duckdb-go-bindings/lib/darwin-amd64 v0.3.3 // indirect
+	github.com/duckdb/duckdb-go-bindings/lib/darwin-arm64 v0.3.3 // indirect
+	github.com/duckdb/duckdb-go-bindings/lib/linux-amd64 v0.3.3 // indirect
+	github.com/duckdb/duckdb-go-bindings/lib/linux-arm64 v0.3.3 // indirect
+	github.com/duckdb/duckdb-go-bindings/lib/windows-amd64 v0.3.3 // indirect
+	github.com/dustin/go-humanize v1.0.1 // indirect
+	github.com/go-viper/mapstructure/v2 v2.5.0 // indirect
+	github.com/goccy/go-json v0.10.5 // indirect
+	github.com/google/flatbuffers v25.12.19+incompatible // indirect
+	github.com/google/uuid v1.6.0 // indirect
+	github.com/klauspost/compress v1.18.3 // indirect
+	github.com/klauspost/cpuid/v2 v2.3.0 // indirect
+	github.com/mattn/go-isatty v0.0.20 // indirect
+	github.com/ncruces/go-strftime v1.0.0 // indirect
+	github.com/pierrec/lz4/v4 v4.1.25 // indirect
+	github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
+	github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
+	github.com/zeebo/xxh3 v1.1.0 // indirect
+	golang.org/x/exp v0.0.0-20260112195511-716be5621a96 // indirect
+	golang.org/x/mod v0.32.0 // indirect
+	golang.org/x/sync v0.19.0 // indirect
+	golang.org/x/sys v0.40.0 // indirect
+	golang.org/x/telemetry v0.0.0-20260116145544-c6413dc483f5 // indirect
+	golang.org/x/tools v0.41.0 // indirect
+	golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect
+	modernc.org/libc v1.67.6 // indirect
+	modernc.org/mathutil v1.7.1 // indirect
+	modernc.org/memory v1.11.0 // indirect
+	modernc.org/sqlite v1.44.1 // indirect
+)
--- a/go.sum
+++ b/go.sum
@ -0,0 +1,125 @@
+codeberg.org/pata1704/copod v0.0.0-20260308082005-aded842ae0c1 h1:DoXV7m58nWibyIvVaUj4AVyVM/FN1SSpHuiuae+2Pa0=
+codeberg.org/pata1704/copod v0.0.0-20260308082005-aded842ae0c1/go.mod h1:IchgVmiksba/DP7BjHiAYKoSrKTe3zrNrFO9QZWNxx0=
+codeberg.org/pata1704/drain3 v1.0.0 h1:X66fn+lnzOMU+PFFSkNBF89z1ghbqihE1I4A6x/OJIM=
+codeberg.org/pata1704/drain3 v1.0.0/go.mod h1:+K1hIYh3hNSPiXRxUin6ZiC2CC9FDGqQKNNR+7ZIx9s=
+codeberg.org/pata1704/rrcf v0.0.0-20260305123746-25e149fa69ba h1:szOyiRopNELsHg9v/Tvif2292MGpgz+Hw9QqTMgildg=
+codeberg.org/pata1704/rrcf v0.0.0-20260305123746-25e149fa69ba/go.mod h1:BmI1vkwcwL5tlRVfn3wEDZV+MXQbPMj8w7IsUhelrkA=
+github.com/andybalholm/brotli v1.2.0 h1:ukwgCxwYrmACq68yiUqwIWnGY0cTPox/M94sVwToPjQ=
+github.com/andybalholm/brotli v1.2.0/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY=
+github.com/apache/arrow-go/v18 v18.5.1 h1:yaQ6zxMGgf9YCYw4/oaeOU3AULySDlAYDOcnr4LdHdI=
+github.com/apache/arrow-go/v18 v18.5.1/go.mod h1:OCCJsmdq8AsRm8FkBSSmYTwL/s4zHW9CqxeBxEytkNE=
+github.com/apache/thrift v0.22.0 h1:r7mTJdj51TMDe6RtcmNdQxgn9XcyfGDOzegMDRg47uc=
+github.com/apache/thrift v0.22.0/go.mod h1:1e7J/O1Ae6ZQMTYdy9xa3w9k+XHWPfRvdPyJeynQ+/g=
+github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
+github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/duckdb/duckdb-go-bindings v0.3.3 h1:lXogtCY8hiGLQvTfK55HcgvaA3K2MrwKeZGqhIin35U=
+github.com/duckdb/duckdb-go-bindings v0.3.3/go.mod h1:zS7OpBP8zwVlP38OljRZOnqWYlNd4KLcVfMoA1JFzpk=
+github.com/duckdb/duckdb-go-bindings/lib/darwin-amd64 v0.3.3 h1:ue8BtIOSt+2Bt2fEfTAvBcQLxzBFhgfCcyzPtqQWTRA=
+github.com/duckdb/duckdb-go-bindings/lib/darwin-amd64 v0.3.3/go.mod h1:EnAvZh1kNJHp5yF+M1ZHNEvapnmt6anq1xXHVrAGqMo=
+github.com/duckdb/duckdb-go-bindings/lib/darwin-arm64 v0.3.3 h1:2TrSeTgtwi3WIvub9ba0mny+AClSNo1w0Ghszc2B8lQ=
+github.com/duckdb/duckdb-go-bindings/lib/darwin-arm64 v0.3.3/go.mod h1:IGLSeEcFhNeZF16aVjQCULD7TsFZKG5G7SyKJAXKp5c=
+github.com/duckdb/duckdb-go-bindings/lib/linux-amd64 v0.3.3 h1:GN0cexhfE7uLb7qgDmsYG324wKF15nW+O7v5+NGalS4=
+github.com/duckdb/duckdb-go-bindings/lib/linux-amd64 v0.3.3/go.mod h1:KAIynZ0GHCS7X5fRyuFnQMg/SZBPK/bS9OCOVojClxw=
+github.com/duckdb/duckdb-go-bindings/lib/linux-arm64 v0.3.3 h1:bIJV+ct6yvMXjy+N3bfILFd0fkTK50AUhUTerkY40/8=
+github.com/duckdb/duckdb-go-bindings/lib/linux-arm64 v0.3.3/go.mod h1:81SGOYoEUs8qaAfSk1wRfM5oobrIJ5KI7AzYhK6/bvQ=
+github.com/duckdb/duckdb-go-bindings/lib/windows-amd64 v0.3.3 h1:SK2sunA/MPb2T3113iFzHv6DWeu+qrsw0DizTFrvM+Q=
+github.com/duckdb/duckdb-go-bindings/lib/windows-amd64 v0.3.3/go.mod h1:K25pJL26ARblGDeuAkrdblFvUen92+CwksLtPEHRqqQ=
+github.com/duckdb/duckdb-go/v2 v2.5.5 h1:TlK8ipnzoKW2aNrjGqRkFWLCDpJDxR/VwH8ezEcvVhw=
+github.com/duckdb/duckdb-go/v2 v2.5.5/go.mod h1:6uIbC3gz36NCEygECzboygOo/Z9TeVwox/puG+ohWV0=
+github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
+github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
+github.com/e-XpertSolutions/go-iforest v1.0.0 h1:x8IN5xsmugc9VsVyHlBtR7EY9tEacBX7A5dwXXh1y94=
+github.com/e-XpertSolutions/go-iforest v1.0.0/go.mod h1:t3C4RgLJcVtm2sOOXB+UTbwGiT+TPQAeP9daEWR4C8c=
+github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
+github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
+github.com/go-viper/mapstructure/v2 v2.5.0 h1:vM5IJoUAy3d7zRSVtIwQgBj7BiWtMPfmPEgAXnvj1Ro=
+github.com/go-viper/mapstructure/v2 v2.5.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM=
+github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4=
+github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
+github.com/golang/snappy v1.0.0 h1:Oy607GVXHs7RtbggtPBnr2RmDArIsAefDwvrdWvRhGs=
+github.com/golang/snappy v1.0.0/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
+github.com/google/flatbuffers v25.12.19+incompatible h1:haMV2JRRJCe1998HeW/p0X9UaMTK6SDo0ffLn2+DbLs=
+github.com/google/flatbuffers v25.12.19+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
+github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
+github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs=
+github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
+github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
+github.com/klauspost/asmfmt v1.3.2 h1:4Ri7ox3EwapiOjCki+hw14RyKk201CN4rzyCJRFLpK4=
+github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE=
+github.com/klauspost/compress v1.18.3 h1:9PJRvfbmTabkOX8moIpXPbMMbYN60bWImDDU7L+/6zw=
+github.com/klauspost/compress v1.18.3/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4=
+github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y=
+github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0=
+github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
+github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs=
+github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY=
+github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI=
+github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3/go.mod h1:RagcQ7I8IeTMnF8JTXieKnO4Z6JCsikNEzj0DwauVzE=
+github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
+github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
+github.com/pierrec/lz4/v4 v4.1.25 h1:kocOqRffaIbU5djlIBr7Wh+cx82C0vtFb0fOurZHqD0=
+github.com/pierrec/lz4/v4 v4.1.25/go.mod h1:EoQMVJgeeEOMsCqCzqFm2O0cJvljX2nGZjcRIPL34O4=
+github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
+github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
+github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
+github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
+github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
+github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ=
+github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0=
+github.com/zeebo/xxh3 v1.1.0 h1:s7DLGDK45Dyfg7++yxI0khrfwq9661w9EN78eP/UZVs=
+github.com/zeebo/xxh3 v1.1.0/go.mod h1:IisAie1LELR4xhVinxWS5+zf1lA4p0MW4T+w+W07F5s=
+golang.org/x/exp v0.0.0-20260112195511-716be5621a96 h1:Z/6YuSHTLOHfNFdb8zVZomZr7cqNgTJvA8+Qz75D8gU=
+golang.org/x/exp v0.0.0-20260112195511-716be5621a96/go.mod h1:nzimsREAkjBCIEFtHiYkrJyT+2uy9YZJB7H1k68CXZU=
+golang.org/x/mod v0.32.0 h1:9F4d3PHLljb6x//jOyokMv3eX+YDeepZSEo3mFJy93c=
+golang.org/x/mod v0.32.0/go.mod h1:SgipZ/3h2Ci89DlEtEXWUk/HteuRin+HHhN+WbNhguU=
+golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4=
+golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
+golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ=
+golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/telemetry v0.0.0-20260116145544-c6413dc483f5 h1:i0p03B68+xC1kD2QUO8JzDTPXCzhN56OLJ+IhHY8U3A=
+golang.org/x/telemetry v0.0.0-20260116145544-c6413dc483f5/go.mod h1:b7fPSJ0pKZ3ccUh8gnTONJxhn3c/PS6tyzQvyqw4iA8=
+golang.org/x/tools v0.41.0 h1:a9b8iMweWG+S0OBnlU36rzLp20z1Rp10w+IY2czHTQc=
+golang.org/x/tools v0.41.0/go.mod h1:XSY6eDqxVNiYgezAVqqCeihT4j1U2CCsqvH3WhQpnlg=
+golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da h1:noIWHXmPHxILtqtCOPIhSt0ABwskkZKjD3bXGnZGpNY=
+golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
+gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk=
+gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+modernc.org/cc/v4 v4.27.1 h1:9W30zRlYrefrDV2JE2O8VDtJ1yPGownxciz5rrbQZis=
+modernc.org/cc/v4 v4.27.1/go.mod h1:uVtb5OGqUKpoLWhqwNQo/8LwvoiEBLvZXIQ/SmO6mL0=
+modernc.org/ccgo/v4 v4.30.1 h1:4r4U1J6Fhj98NKfSjnPUN7Ze2c6MnAdL0hWw6+LrJpc=
+modernc.org/ccgo/v4 v4.30.1/go.mod h1:bIOeI1JL54Utlxn+LwrFyjCx2n2RDiYEaJVSrgdrRfM=
+modernc.org/fileutil v1.3.40 h1:ZGMswMNc9JOCrcrakF1HrvmergNLAmxOPjizirpfqBA=
+modernc.org/fileutil v1.3.40/go.mod h1:HxmghZSZVAz/LXcMNwZPA/DRrQZEVP9VX0V4LQGQFOc=
+modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI=
+modernc.org/gc/v2 v2.6.5/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito=
+modernc.org/gc/v3 v3.1.1 h1:k8T3gkXWY9sEiytKhcgyiZ2L0DTyCQ/nvX+LoCljoRE=
+modernc.org/gc/v3 v3.1.1/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY=
+modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks=
+modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI=
+modernc.org/libc v1.67.6 h1:eVOQvpModVLKOdT+LvBPjdQqfrZq+pC39BygcT+E7OI=
+modernc.org/libc v1.67.6/go.mod h1:JAhxUVlolfYDErnwiqaLvUqc8nfb2r6S6slAgZOnaiE=
+modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
+modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
+modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI=
+modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw=
+modernc.org/opt v0.1.4 h1:2kNGMRiUjrp4LcaPuLY2PzUfqM/w9N23quVwhKt5Qm8=
+modernc.org/opt v0.1.4/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns=
+modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w=
+modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE=
+modernc.org/sqlite v1.44.1 h1:qybx/rNpfQipX/t47OxbHmkkJuv2JWifCMH8SVUiDas=
+modernc.org/sqlite v1.44.1/go.mod h1:CzbrU2lSB1DKUusvwGz7rqEKIq+NUd8GWuBBZDs9/nA=
+modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0=
+modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A=
+modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=
+modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM=
--- a/internal/collector/log.go
+++ b/internal/collector/log.go
@ -0,0 +1,250 @@
+package collector
+
+import (
+	"bufio"
+	"context"
+	"fmt"
+	"io"
+	"log"
+	"os"
+	"strings"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	drain3go "codeberg.org/pata1704/drain3"
+	"codeberg.org/pata1704/guenther/internal/config"
+	idrain3 "codeberg.org/pata1704/guenther/internal/drain3"
+	"codeberg.org/pata1704/guenther/pkg/types"
+	"github.com/fsnotify/fsnotify"
+)
+
+// linePool recycles *strings.Builder instances used in the line-read hot path
+// to reduce allocations when processing high-volume log files.
+var linePool = sync.Pool{
+	New: func() any { return new(strings.Builder) },
+}
+
+// LogCollector tails a log file using inotify (fsnotify) and emits a
+// types.LogEvent for every non-empty line.
+//
+// Processing pipeline per line:
+//  1. ApplyMasking  – extracts named parameters and masks the line.
+//  2. Drain3.Parse  – mines a template ID from the masked line.
+//  3. Severity      – classified from the raw line.
+//  4. Emit          – non-blocking channel send with drop counter.
+//
+// The collector uses a single goroutine per file and a WaitGroup for clean
+// shutdown.
+type LogCollector struct {
+	cfg        *config.Config
+	miner      *drain3go.TemplateMiner
+	outputChan chan<- types.LogEvent
+	healthChan chan<- types.StageHealth
+
+	wg sync.WaitGroup
+
+	processed atomic.Uint64
+	dropped   atomic.Uint64
+}
+
+// NewLogCollector creates a LogCollector wired to the provided channels.
+// Drain3 is initialised with an in-memory persistence store; the template
+// tree is rebuilt from scratch on restart (state persistence can be added
+// via FilePersistence if needed).
+func NewLogCollector(
+	cfg *config.Config,
+	output chan<- types.LogEvent,
+	health chan<- types.StageHealth,
+) *LogCollector {
+	dc := drain3go.DefaultConfig()
+	dc.SimTh = cfg.Drain.SimThreshold
+	dc.Depth = cfg.Drain.Depth
+	dc.MaxChildren = cfg.Drain.MaxChildren
+
+	miner := drain3go.NewTemplateMiner(dc, drain3go.NewMemoryPersistence())
+
+	return &LogCollector{
+		cfg:        cfg,
+		miner:      miner,
+		outputChan: output,
+		healthChan: health,
+	}
+}
+
+// Start begins tailing cfg.Ingestion.LogPath.
+// The method returns an error if the file cannot be opened or if the
+// inotify watcher cannot be created. Subsequent errors during tailing are
+// logged but do not propagate.
+func (c *LogCollector) Start(ctx context.Context) error {
+	f, err := os.Open(c.cfg.Ingestion.LogPath)
+	if err != nil {
+		return fmt.Errorf("log collector: open %q: %w", c.cfg.Ingestion.LogPath, err)
+	}
+
+	// Seek to end: only tail new content, not existing content.
+	if _, err := f.Seek(0, io.SeekEnd); err != nil {
+		f.Close()
+		return fmt.Errorf("log collector: seek %q: %w", c.cfg.Ingestion.LogPath, err)
+	}
+
+	watcher, err := fsnotify.NewWatcher()
+	if err != nil {
+		f.Close()
+		return fmt.Errorf("log collector: create fsnotify watcher: %w", err)
+	}
+	if err := watcher.Add(c.cfg.Ingestion.LogPath); err != nil {
+		f.Close()
+		watcher.Close()
+		return fmt.Errorf("log collector: watch %q: %w", c.cfg.Ingestion.LogPath, err)
+	}
+
+	reader := bufio.NewReaderSize(f, 64*1024)
+	reportTicker := time.NewTicker(5 * time.Second)
+
+	c.wg.Go(func() {
+		defer f.Close()
+		defer watcher.Close()
+		defer reportTicker.Stop()
+
+		for {
+			select {
+			case event, ok := <-watcher.Events:
+				if !ok {
+					return
+				}
+				if event.Has(fsnotify.Write) {
+					c.drainReader(reader)
+				}
+				if event.Has(fsnotify.Remove) || event.Has(fsnotify.Rename) {
+					// Log rotation: reopen the file.
+					log.Printf("log collector: file %q rotated – reopening", c.cfg.Ingestion.LogPath)
+					f.Close()
+					newF, err := c.reopenFile()
+					if err != nil {
+						log.Printf("log collector: reopen after rotation: %v", err)
+						return
+					}
+					f = newF
+					reader = bufio.NewReaderSize(f, 64*1024)
+					if err := watcher.Add(c.cfg.Ingestion.LogPath); err != nil {
+						log.Printf("log collector: re-watch after rotation: %v", err)
+					}
+				}
+
+			case err, ok := <-watcher.Errors:
+				if !ok {
+					return
+				}
+				log.Printf("log collector: watcher error: %v", err)
+
+			case <-reportTicker.C:
+				c.emitHealth()
+
+			case <-ctx.Done():
+				return
+			}
+		}
+	})
+
+	return nil
+}
+
+// Wait waits for the collector goroutine to exit after context cancellation.
+func (c *LogCollector) Wait() {
+	c.wg.Wait()
+}
+
+// drainReader reads all complete lines currently available in reader and
+// processes each one. Partial lines (no trailing newline) are left in the
+// bufio buffer for the next Write event.
+func (c *LogCollector) drainReader(r *bufio.Reader) {
+	for {
+		line, err := r.ReadString('\n')
+		if len(line) > 0 {
+			c.processLine(strings.TrimRight(line, "\r\n"))
+		}
+		if err != nil {
+			// io.EOF means no more complete lines; any other error is logged.
+			if err != io.EOF {
+				log.Printf("log collector: read error: %v", err)
+			}
+			return
+		}
+	}
+}
+
+// processLine applies masking, mines a Drain3 template, classifies severity,
+// and emits a LogEvent. The send is non-blocking; full channels increment the
+// dropped counter if the pipeline is backlogged.
+func (c *LogCollector) processLine(line string) {
+	if line == "" {
+		return
+	}
+
+	// Phase 1+2: masking and parameter extraction.
+	masked, params := idrain3.ApplyMasking(line, c.cfg.Drain.MaskingPatterns)
+
+	// Phase 3: template mining on the masked line.
+	result := c.miner.AddLogMessage(masked)
+	if result == nil {
+		return
+	}
+
+	event := types.LogEvent{
+		Timestamp:  time.Now(),
+		TemplateID: result.ClusterID,
+		Params:     params,
+		Severity:   classifySeverity(line),
+		RawLine:    line,
+	}
+
+	select {
+	case c.outputChan <- event:
+		c.processed.Add(1)
+	default:
+		c.dropped.Add(1)
+	}
+}
+
+// reopenFile opens cfg.Ingestion.LogPath after log rotation, seeking to the
+// beginning of the new file.
+func (c *LogCollector) reopenFile() (*os.File, error) {
+	f, err := os.Open(c.cfg.Ingestion.LogPath)
+	if err != nil {
+		return nil, fmt.Errorf("open: %w", err)
+	}
+	return f, nil
+}
+
+// emitHealth sends a StageHealth snapshot; non-blocking (drops if full).
+func (c *LogCollector) emitHealth() {
+	p := c.processed.Load()
+	d := c.dropped.Load()
+	select {
+	case c.healthChan <- types.StageHealth{
+		StageName:       "log_collector",
+		EventsProcessed: p,
+		EventsDropped:   d,
+		Throughput:      float64(p) / 5.0,
+		LastUpdate:      time.Now(),
+	}:
+	default:
+	}
+}
+
+// classifySeverity extracts the severity level from a raw log line by
+// scanning for well-known keywords (case-insensitive).
+func classifySeverity(line string) string {
+	upper := strings.ToUpper(line)
+	switch {
+	case strings.Contains(upper, "ERROR") || strings.Contains(upper, "FATAL") || strings.Contains(upper, "CRITICAL") || strings.Contains(upper, "ERR"):
+		return "ERROR"
+	case strings.Contains(upper, "WARN") || strings.Contains(upper, "WARNING"):
+		return "WARN"
+	case strings.Contains(upper, "DEBUG"):
+		return "DEBUG"
+	default:
+		return "INFO"
+	}
+}
--- a/internal/collector/log_test.go
+++ b/internal/collector/log_test.go
@ -0,0 +1,45 @@
+package collector
+
+import (
+	"os"
+	"testing"
+	"time"
+
+	"codeberg.org/pata1704/guenther/internal/config"
+	"codeberg.org/pata1704/guenther/pkg/types"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestLogCollector_ProcessLine(t *testing.T) {
+	// 1. Create temporary log file
+	tmpFile, err := os.CreateTemp("", "test_log_*.log")
+	assert.NoError(t, err)
+	defer os.Remove(tmpFile.Name())
+
+	outputChan := make(chan types.LogEvent, 10)
+	healthChan := make(chan types.StageHealth, 10)
+
+	cfg := &config.Config{}
+	cfg.Ingestion.LogPath = tmpFile.Name()
+	cfg.Drain.Depth = 4
+	cfg.Drain.SimThreshold = 0.5
+	cfg.Drain.MaxChildren = 100
+	collector := NewLogCollector(cfg, outputChan, healthChan)
+
+	// 2. Test line processing with specific regex patterns
+	testLine := "2026-02-26 13:00:00.123456 INFO Transfer from 192.168.1.1:8080 completed (duration=1.23)"
+	collector.processLine(testLine)
+
+	select {
+	case ev := <-outputChan:
+		assert.Equal(t, "INFO", ev.Severity)
+		assert.Greater(t, ev.TemplateID, 0)
+
+		t.Logf("Extracted parameters: %v", ev.Params)
+
+		// Unconfigured Drain3 template yields empty map
+		assert.GreaterOrEqual(t, len(ev.Params), 0)
+	case <-time.After(1 * time.Second):
+		t.Fatal("Timeout waiting for LogEvent")
+	}
+}
--- a/internal/collector/metric.go
+++ b/internal/collector/metric.go
@ -0,0 +1,542 @@
+package collector
+
+import (
+	"bufio"
+	"context"
+	"log"
+	"os"
+	"strconv"
+	"strings"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"codeberg.org/pata1704/guenther/pkg/types"
+)
+
+// MetricCollector samples Linux system metrics from /proc at a fixed interval
+// and emits a types.MetricSnapshot for each sample.
+//
+// All /proc reads happen in the single collector goroutine, so no locking is
+// required for the delta-state fields. The output channel uses a non-blocking
+// send; overflows are counted in the dropped counter via load-shedding.
+type MetricCollector struct {
+	outputChan chan<- types.MetricSnapshot
+	healthChan chan<- types.StageHealth
+
+	interval     time.Duration
+	netInterface string
+	diskDevice   string
+
+	wg sync.WaitGroup
+
+	// Delta state – only accessed from the single collector goroutine.
+	prevSoftnetDropped  uint64
+	prevSoftnetSqueeze  uint64
+	prevNetPacketsIn    uint64
+	prevNetPacketsOut   uint64
+	prevDiskReadsComp   uint64
+	prevDiskWritesComp  uint64
+	prevDiskRead        uint64
+	prevDiskWrite       uint64
+	prevDiskReadTimeMs  uint64
+	prevDiskWriteTimeMs uint64
+	prevDiskIOTicks     uint64
+	prevCPUTotal        uint64
+	prevCPUIdle         uint64
+	prevCPUIoWait       uint64
+	prevCPUSoftIrq      uint64
+	prevCtxt            uint64
+	prevIntr            uint64
+	prevNetIn           uint64
+	prevNetOut          uint64
+	prevNetErrs         uint64
+	prevNetDrops        uint64
+	prevTCPRetrans      uint64
+	prevTCPTimeouts     uint64
+	prevTCPLostRetrans  uint64
+	prevTCPFastRetrans  uint64
+	prevTime            time.Time
+	firstSample         bool
+
+	processed atomic.Uint64
+	dropped   atomic.Uint64
+}
+
+func NewMetricCollector(
+	output chan<- types.MetricSnapshot,
+	health chan<- types.StageHealth,
+	interval time.Duration,
+	netIntf, diskDev string,
+) *MetricCollector {
+	return &MetricCollector{
+		outputChan:   output,
+		healthChan:   health,
+		interval:     interval,
+		netInterface: netIntf,
+		diskDevice:   diskDev,
+		firstSample:  true,
+	}
+}
+
+func (c *MetricCollector) Start(ctx context.Context) {
+	ticker := time.NewTicker(c.interval)
+	reportTicker := time.NewTicker(5 * time.Second)
+	c.prevTime = time.Now()
+
+	c.wg.Go(func() {
+		defer ticker.Stop()
+		defer reportTicker.Stop()
+
+		for {
+			select {
+			case <-ticker.C:
+				snap := c.collect()
+				if snap == nil {
+					continue
+				}
+				select {
+				case c.outputChan <- *snap:
+					c.processed.Add(1)
+				default:
+					c.dropped.Add(1)
+				}
+
+			case <-reportTicker.C:
+				c.emitHealth()
+
+			case <-ctx.Done():
+				return
+			}
+		}
+	})
+}
+
+// Wait waits for the collector goroutine to exit after context cancellation.
+func (c *MetricCollector) Wait() {
+	c.wg.Wait()
+}
+
+// ── collection ────────────────────────────────────────────────────────────────
+
+func (c *MetricCollector) collect() *types.MetricSnapshot {
+	now := time.Now()
+	duration := now.Sub(c.prevTime).Seconds()
+
+	cpuTotal, cpuIdle, cpuIowait, cpuSoftirq, ctxt, intr := c.readSystemStats()
+	memUsed, memCached, memDirty := c.readMemInfo()
+	netIn, netOut, netErrs, netDrops, rxPackets, txPackets := c.readNetDev()
+	retrans := c.readSNMPStats()
+	timeouts, lostRetrans, fastRetrans := c.readNetstat()
+	softDropped, softSqueeze := c.readSoftnetStat()
+	diskRead, diskWrite, diskReadTime, diskWriteTime, diskIOTicks, readsComp, writesComp := c.readDiskStats()
+
+	if c.firstSample {
+		c.storePrev(now,
+			cpuTotal, cpuIdle, cpuIowait, cpuSoftirq, ctxt, intr,
+			netIn, netOut, netErrs, netDrops, rxPackets, txPackets,
+			retrans, timeouts, lostRetrans, fastRetrans,
+			softDropped, softSqueeze,
+			diskRead, diskWrite, diskReadTime, diskWriteTime, diskIOTicks, readsComp, writesComp)
+		c.firstSample = false
+		return nil
+	}
+
+	if duration < 1e-6 {
+		duration = 1e-6
+	}
+
+	cpuDelta := saturatingSub(cpuTotal, c.prevCPUTotal)
+	cpuIdleDelta := saturatingSub(cpuIdle, c.prevCPUIdle)
+	cpuPercent, cpuIowaitPercent, cpuSoftirqPercent := 0.0, 0.0, 0.0
+	if cpuDelta > 0 {
+		cpuPercent = float64(cpuDelta-cpuIdleDelta) / float64(cpuDelta) * 100.0
+		cpuIowaitPercent = float64(saturatingSub(cpuIowait, c.prevCPUIoWait)) / float64(cpuDelta) * 100.0
+		cpuSoftirqPercent = float64(saturatingSub(cpuSoftirq, c.prevCPUSoftIrq)) / float64(cpuDelta) * 100.0
+	}
+
+	snap := &types.MetricSnapshot{
+		Timestamp:               now,
+		CPUPercent:              cpuPercent,
+		CPUIoWaitPercent:        cpuIowaitPercent,
+		CPUSoftIrqPercent:       cpuSoftirqPercent,
+		ContextSwitchesPerS:     float64(saturatingSub(ctxt, c.prevCtxt)) / duration,
+		InterruptsPerS:          float64(saturatingSub(intr, c.prevIntr)) / duration,
+		MemoryUsedMB:            float64(memUsed),
+		MemoryCachedMB:          float64(memCached),
+		MemoryDirtyMB:           float64(memDirty),
+		NetworkInMBps:           float64(saturatingSub(netIn, c.prevNetIn)) / duration / 1_048_576,
+		NetworkOutMBps:          float64(saturatingSub(netOut, c.prevNetOut)) / duration / 1_048_576,
+		NetErrorsPerS:           float64(saturatingSub(netErrs, c.prevNetErrs)) / duration,
+		NetDropsPerS:            float64(saturatingSub(netDrops, c.prevNetDrops)) / duration,
+		TCPRetransPerS:          float64(saturatingSub(retrans, c.prevTCPRetrans)) / duration,
+		TCPTimeoutsPerS:         float64(saturatingSub(timeouts, c.prevTCPTimeouts)) / duration,
+		TCPLostRetransmitPerS:   float64(saturatingSub(lostRetrans, c.prevTCPLostRetrans)) / duration,
+		TCPFastRetransPerS:      float64(saturatingSub(fastRetrans, c.prevTCPFastRetrans)) / duration,
+		SoftnetDroppedPerS:      float64(saturatingSub(softDropped, c.prevSoftnetDropped)) / duration,
+		SoftnetTimeSqueezePerS:  float64(saturatingSub(softSqueeze, c.prevSoftnetSqueeze)) / duration,
+		DiskReadMBps:            float64(saturatingSub(diskRead, c.prevDiskRead)) / duration / 1_048_576,
+		DiskWriteMBps:           float64(saturatingSub(diskWrite, c.prevDiskWrite)) / duration / 1_048_576,
+		DiskReadTimeMsPerS:      float64(saturatingSub(diskReadTime, c.prevDiskReadTimeMs)) / duration,
+		DiskWriteTimeMsPerS:     float64(saturatingSub(diskWriteTime, c.prevDiskWriteTimeMs)) / duration,
+		DiskIOTicksPerS:         float64(saturatingSub(diskIOTicks, c.prevDiskIOTicks)) / duration,
+		NetPacketsInPerS:        float64(saturatingSub(rxPackets, c.prevNetPacketsIn)) / duration,
+		NetPacketsOutPerS:       float64(saturatingSub(txPackets, c.prevNetPacketsOut)) / duration,
+		DiskReadsCompletedPerS:  float64(saturatingSub(readsComp, c.prevDiskReadsComp)) / duration,
+		DiskWritesCompletedPerS: float64(saturatingSub(writesComp, c.prevDiskWritesComp)) / duration,
+	}
+
+	c.storePrev(now,
+		cpuTotal, cpuIdle, cpuIowait, cpuSoftirq, ctxt, intr,
+		netIn, netOut, netErrs, netDrops, rxPackets, txPackets,
+		retrans, timeouts, lostRetrans, fastRetrans,
+		softDropped, softSqueeze,
+		diskRead, diskWrite, diskReadTime, diskWriteTime, diskIOTicks, readsComp, writesComp)
+	return snap
+}
+
+func (c *MetricCollector) storePrev(
+	now time.Time,
+	cpuTotal, cpuIdle, cpuIowait, cpuSoftirq, ctxt, intr,
+	netIn, netOut, netErrs, netDrops, rxPackets, txPackets,
+	retrans, timeouts, lostRetrans, fastRetrans,
+	softDropped, softSqueeze,
+	diskRead, diskWrite, diskReadTime, diskWriteTime, diskIOTicks, readsComp, writesComp uint64,
+) {
+	c.prevTime = now
+	c.prevCPUTotal = cpuTotal
+	c.prevCPUIdle = cpuIdle
+	c.prevCPUIoWait = cpuIowait
+	c.prevCPUSoftIrq = cpuSoftirq
+	c.prevCtxt = ctxt
+	c.prevIntr = intr
+	c.prevNetIn = netIn
+	c.prevNetOut = netOut
+	c.prevNetErrs = netErrs
+	c.prevNetDrops = netDrops
+	c.prevTCPRetrans = retrans
+	c.prevTCPTimeouts = timeouts
+	c.prevTCPLostRetrans = lostRetrans
+	c.prevTCPFastRetrans = fastRetrans
+	c.prevSoftnetDropped = softDropped
+	c.prevSoftnetSqueeze = softSqueeze
+	c.prevDiskRead = diskRead
+	c.prevDiskWrite = diskWrite
+	c.prevDiskReadTimeMs = diskReadTime
+	c.prevDiskWriteTimeMs = diskWriteTime
+	c.prevDiskIOTicks = diskIOTicks
+	c.prevNetPacketsIn = rxPackets
+	c.prevNetPacketsOut = txPackets
+	c.prevDiskReadsComp = readsComp
+	c.prevDiskWritesComp = writesComp
+}
+
+// ── /proc readers ─────────────────────────────────────────────────────────────
+
+// readSystemStats reads /proc/stat and returns cumulative CPU jiffies
+// (total, idle, iowait, softirq) plus cumulative context-switches and
+// interrupt counts.
+//
+// /proc/stat CPU column layout:
+//
+//	col 1=user 2=nice 3=system 4=idle 5=iowait 6=irq 7=softirq
+func (c *MetricCollector) readSystemStats() (total, idle, iowait, softirq, ctxt, intr uint64) {
+	f, err := os.Open("/proc/stat")
+	if err != nil {
+		log.Printf("metric: open /proc/stat: %v", err)
+		return
+	}
+	defer f.Close()
+
+	scanner := bufio.NewScanner(f)
+	for scanner.Scan() {
+		fields := strings.Fields(scanner.Text())
+		if len(fields) == 0 {
+			continue
+		}
+		switch fields[0] {
+		case "cpu":
+			for i := 1; i < len(fields); i++ {
+				v, _ := strconv.ParseUint(fields[i], 10, 64)
+				total += v
+				switch i {
+				case 4:
+					idle = v
+				case 5:
+					iowait = v
+				case 7:
+					softirq = v
+				}
+			}
+		case "ctxt":
+			if len(fields) > 1 {
+				ctxt, _ = strconv.ParseUint(fields[1], 10, 64)
+			}
+		case "intr":
+			if len(fields) > 1 {
+				intr, _ = strconv.ParseUint(fields[1], 10, 64)
+			}
+		}
+	}
+	if err := scanner.Err(); err != nil {
+		log.Printf("metric: scan /proc/stat: %v", err)
+	}
+	return
+}
+
+func (c *MetricCollector) readMemInfo() (used, cached, dirty uint64) {
+	f, err := os.Open("/proc/meminfo")
+	if err != nil {
+		log.Printf("metric: open /proc/meminfo: %v", err)
+		return
+	}
+	defer f.Close()
+
+	var total, available uint64
+	scanner := bufio.NewScanner(f)
+	for scanner.Scan() {
+		fields := strings.Fields(scanner.Text())
+		if len(fields) < 2 {
+			continue
+		}
+		val, _ := strconv.ParseUint(fields[1], 10, 64)
+		switch fields[0] {
+		case "MemTotal:":
+			total = val
+		case "MemAvailable:":
+			available = val
+		case "Cached:":
+			cached = val / 1024 // kB → MB
+		case "Dirty:":
+			dirty = val / 1024 // kB → MB
+		}
+	}
+	if err := scanner.Err(); err != nil {
+		log.Printf("metric: scan /proc/meminfo: %v", err)
+	}
+	if total >= available {
+		used = (total - available) / 1024
+	}
+	return
+}
+
+// readNetDev reads /proc/net/dev for the configured interface.
+//
+// /proc/net/dev column layout (after stripping "iface:"):
+//
+//	0=rx_bytes 1=rx_packets 2=rx_errs 3=rx_drop
+//	4=rx_fifo  5=rx_frame   6=rx_compressed 7=rx_multicast
+//	8=tx_bytes 9=tx_packets 10=tx_errs 11=tx_drop ...
+//	8=tx_bytes 9=tx_packets 10=tx_errs 11=tx_drop ...
+func (c *MetricCollector) readNetDev() (rxBytes, txBytes, errs, drops, rxPackets, txPackets uint64) {
+	f, err := os.Open("/proc/net/dev")
+	if err != nil {
+		return 0, 0, 0, 0, 0, 0
+	}
+	defer f.Close()
+
+	prefix := c.netInterface + ":"
+	scanner := bufio.NewScanner(f)
+	for scanner.Scan() {
+		line := strings.TrimSpace(scanner.Text())
+		if !strings.HasPrefix(line, prefix) {
+			continue
+		}
+		line = strings.TrimPrefix(line, prefix)
+		fields := strings.Fields(line)
+		if len(fields) < 12 {
+			log.Printf("metric: unexpected /proc/net/dev format for %q", c.netInterface)
+			return 0, 0, 0, 0, 0, 0
+		}
+		rxBytes, _ = strconv.ParseUint(fields[0], 10, 64)
+		rxPackets, _ = strconv.ParseUint(fields[1], 10, 64)
+		rxErrs, _ := strconv.ParseUint(fields[2], 10, 64)
+		rxDrops, _ := strconv.ParseUint(fields[3], 10, 64)
+		txBytes, _ = strconv.ParseUint(fields[8], 10, 64)
+		txPackets, _ = strconv.ParseUint(fields[9], 10, 64)
+		txErrs, _ := strconv.ParseUint(fields[10], 10, 64)
+		txDrops, _ := strconv.ParseUint(fields[11], 10, 64)
+		return rxBytes, txBytes, rxErrs + txErrs, rxDrops + txDrops, rxPackets, txPackets
+	}
+	if err := scanner.Err(); err != nil {
+		log.Printf("metric: scan /proc/net/dev: %v", err)
+	}
+	return 0, 0, 0, 0, 0, 0
+}
+
+// readSNMPStats reads RetransSegs from /proc/net/snmp (Tcp section).
+//
+// /proc/net/snmp Tcp header order (kernel-stable):
+//
+//	RtoAlgorithm RtoMin RtoMax MaxConn ActiveOpens PassiveOpens
+//	AttemptFails EstabResets CurrEstab InSegs OutSegs RetransSegs InErrs OutRsts
+//
+// RetransSegs is at index 12 (0-based) in the value row.
+func (c *MetricCollector) readSNMPStats() uint64 {
+	f, err := os.Open("/proc/net/snmp")
+	if err != nil {
+		return 0
+	}
+	defer f.Close()
+
+	// The file alternates header/value rows for each protocol block.
+	// We need both rows to find RetransSegs by column name.
+	scanner := bufio.NewScanner(f)
+	var tcpHeader []string
+	for scanner.Scan() {
+		line := scanner.Text()
+		if !strings.HasPrefix(line, "Tcp:") {
+			continue
+		}
+		fields := strings.Fields(line)
+		if tcpHeader == nil {
+			tcpHeader = fields // first Tcp: line is the header
+			continue
+		}
+		// second Tcp: line is the values
+		for i, h := range tcpHeader {
+			if h == "RetransSegs" && i < len(fields) {
+				v, _ := strconv.ParseUint(fields[i], 10, 64)
+				return v
+			}
+		}
+	}
+	if err := scanner.Err(); err != nil {
+		log.Printf("metric: scan /proc/net/snmp: %v", err)
+	}
+	return 0
+}
+
+// readNetstat reads TCPTimeouts, TCPLostRetransmit and TCPFastRetrans from
+// /proc/net/netstat (TcpExt section). The file alternates header/value rows.
+func (c *MetricCollector) readNetstat() (timeouts, lostRetrans, fastRetrans uint64) {
+	f, err := os.Open("/proc/net/netstat")
+	if err != nil {
+		return 0, 0, 0
+	}
+	defer f.Close()
+
+	scanner := bufio.NewScanner(f)
+	var headers []string
+	for scanner.Scan() {
+		line := scanner.Text()
+		if !strings.HasPrefix(line, "TcpExt:") {
+			continue
+		}
+		fields := strings.Fields(line)
+		if headers == nil {
+			headers = fields
+			continue
+		}
+		// value row
+		for i, h := range headers {
+			if i >= len(fields) {
+				break
+			}
+			switch h {
+			case "TCPTimeouts":
+				timeouts, _ = strconv.ParseUint(fields[i], 10, 64)
+			case "TCPLostRetransmit":
+				lostRetrans, _ = strconv.ParseUint(fields[i], 10, 64)
+			case "TCPFastRetrans":
+				fastRetrans, _ = strconv.ParseUint(fields[i], 10, 64)
+			}
+		}
+	}
+	if err := scanner.Err(); err != nil {
+		log.Printf("metric: scan /proc/net/netstat: %v", err)
+	}
+	return
+}
+
+// readSoftnetStat reads /proc/net/softnet_stat and sums dropped and
+// time_squeeze across all CPU columns (hex values).
+func (c *MetricCollector) readSoftnetStat() (dropped, timeSqueeze uint64) {
+	f, err := os.Open("/proc/net/softnet_stat")
+	if err != nil {
+		return 0, 0
+	}
+	defer f.Close()
+
+	scanner := bufio.NewScanner(f)
+	for scanner.Scan() {
+		fields := strings.Fields(scanner.Text())
+		// col 0 = total, col 1 = dropped, col 2 = time_squeeze
+		if len(fields) >= 3 {
+			d, _ := strconv.ParseUint(fields[1], 16, 64)
+			t, _ := strconv.ParseUint(fields[2], 16, 64)
+			dropped += d
+			timeSqueeze += t
+		}
+	}
+	if err := scanner.Err(); err != nil {
+		log.Printf("metric: scan /proc/net/softnet_stat: %v", err)
+	}
+	return
+}
+
+// readDiskStats reads /proc/diskstats for the configured device.
+//
+// /proc/diskstats column layout (kernel ≥ 4.18):
+//
+//	0=major 1=minor 2=name
+//	3=reads_completed 4=reads_merged 5=sectors_read    6=read_time_ms
+//	7=writes_completed 8=writes_merged 9=sectors_written 10=write_time_ms
+//	11=io_in_progress 12=io_ticks_ms 13=weighted_io_ticks
+//	11=io_in_progress 12=io_ticks_ms 13=weighted_io_ticks
+func (c *MetricCollector) readDiskStats() (readBytes, writeBytes, readTimeMs, writeTimeMs, ioTicks, readsComp, writesComp uint64) {
+	f, err := os.Open("/proc/diskstats")
+	if err != nil {
+		log.Printf("metric: open /proc/diskstats: %v", err)
+		return
+	}
+	defer f.Close()
+
+	scanner := bufio.NewScanner(f)
+	for scanner.Scan() {
+		fields := strings.Fields(scanner.Text())
+		if len(fields) < 14 || fields[2] != c.diskDevice {
+			continue
+		}
+		readsComp, _ = strconv.ParseUint(fields[3], 10, 64)
+		writesComp, _ = strconv.ParseUint(fields[7], 10, 64)
+		rSectors, _ := strconv.ParseUint(fields[5], 10, 64)
+		wSectors, _ := strconv.ParseUint(fields[9], 10, 64)
+		rTime, _ := strconv.ParseUint(fields[6], 10, 64)
+		wTime, _ := strconv.ParseUint(fields[10], 10, 64)
+		ticks, _ := strconv.ParseUint(fields[12], 10, 64)
+		return rSectors * 512, wSectors * 512, rTime, wTime, ticks, readsComp, writesComp
+	}
+	if err := scanner.Err(); err != nil {
+		log.Printf("metric: scan /proc/diskstats: %v", err)
+	}
+	return
+}
+
+// ── health ────────────────────────────────────────────────────────────────────
+
+func (c *MetricCollector) emitHealth() {
+	p := c.processed.Load()
+	d := c.dropped.Load()
+	select {
+	case c.healthChan <- types.StageHealth{
+		StageName:       "metric_collector",
+		EventsProcessed: p,
+		EventsDropped:   d,
+		Throughput:      float64(p) / 5.0,
+		LastUpdate:      time.Now(),
+	}:
+	default:
+	}
+}
+
+// ── helpers ───────────────────────────────────────────────────────────────────
+
+// saturatingSub returns a − b, clamped to 0 on underflow.
+// 64-bit /proc counters very rarely wrap, but saturation prevents negative rates.
+func saturatingSub(a, b uint64) uint64 {
+	if a >= b {
+		return a - b
+	}
+	return 0
+}
--- a/internal/collector/systemctl.go
+++ b/internal/collector/systemctl.go
@ -0,0 +1,140 @@
+package collector
+
+import (
+	"bytes"
+	"context"
+	"log"
+	"os/exec"
+	"strings"
+	"sync"
+	"time"
+
+	"codeberg.org/pata1704/guenther/pkg/types"
+)
+
+// SystemctlCollector periodically checks the status of systemd services.
+type SystemctlCollector struct {
+	services   []string
+	interval   time.Duration
+	outputChan chan<- types.ServiceStatus
+	healthChan chan<- types.StageHealth
+
+	wg sync.WaitGroup
+
+	mu        sync.Mutex
+	processed uint64
+}
+
+// NewSystemctlCollector creates a new collector for the given services.
+func NewSystemctlCollector(
+	services []string,
+	interval time.Duration,
+	output chan<- types.ServiceStatus,
+	health chan<- types.StageHealth,
+) *SystemctlCollector {
+	return &SystemctlCollector{
+		services:   services,
+		interval:   interval,
+		outputChan: output,
+		healthChan: health,
+	}
+}
+
+// Start launches the collection loop.
+func (c *SystemctlCollector) Start(ctx context.Context) {
+	if len(c.services) == 0 {
+		log.Println("systemctl: no services configured for monitoring")
+		return
+	}
+
+	c.wg.Go(func() {
+		ticker := time.NewTicker(c.interval)
+		reportTicker := time.NewTicker(5 * time.Second)
+		defer ticker.Stop()
+		defer reportTicker.Stop()
+
+		// Immediate first collection.
+		c.collect()
+
+		for {
+			select {
+			case <-ctx.Done():
+				return
+			case <-ticker.C:
+				c.collect()
+			case <-reportTicker.C:
+				c.emitHealth()
+			}
+		}
+	})
+}
+
+// Wait waits for the collector to stop.
+func (c *SystemctlCollector) Wait() {
+	c.wg.Wait()
+}
+
+func (c *SystemctlCollector) collect() {
+	for _, service := range c.services {
+		status, err := c.getServiceStatus(service)
+		if err != nil {
+			log.Printf("systemctl: error getting status for %s: %v", service, err)
+			continue
+		}
+
+		select {
+		case c.outputChan <- status:
+			c.mu.Lock()
+			c.processed++
+			c.mu.Unlock()
+		default:
+			log.Printf("systemctl: output channel full – dropping status for %s", service)
+		}
+	}
+}
+
+func (c *SystemctlCollector) getServiceStatus(service string) (types.ServiceStatus, error) {
+	// Use systemctl show to get machine-readable properties.
+	cmd := exec.Command("systemctl", "show", "-p", "ActiveState,SubState", service)
+	var out bytes.Buffer
+	cmd.Stdout = &out
+	if err := cmd.Run(); err != nil {
+		return types.ServiceStatus{}, err
+	}
+
+	lines := strings.Split(strings.TrimSpace(out.String()), "\n")
+	status := types.ServiceStatus{
+		Timestamp:   time.Now(),
+		ServiceName: service,
+	}
+
+	for _, line := range lines {
+		parts := strings.SplitN(line, "=", 2)
+		if len(parts) != 2 {
+			continue
+		}
+		switch parts[0] {
+		case "ActiveState":
+			status.ActiveState = parts[1]
+		case "SubState":
+			status.SubState = parts[1]
+		}
+	}
+
+	return status, nil
+}
+
+func (c *SystemctlCollector) emitHealth() {
+	c.mu.Lock()
+	count := c.processed
+	c.mu.Unlock()
+
+	select {
+	case c.healthChan <- types.StageHealth{
+		StageName:       "systemctl_collector",
+		EventsProcessed: count,
+		LastUpdate:      time.Now(),
+	}:
+	default:
+	}
+}
--- a/internal/config/config.go
+++ b/internal/config/config.go
@ -0,0 +1,203 @@
+// Package config provides the pipeline configuration loaded from YAML.
+package config
+
+import (
+	"fmt"
+	"os"
+	"regexp"
+	"time"
+
+	"gopkg.in/yaml.v3"
+)
+
+// MaskingPattern is a single entry in drain.masking_patterns.
+type MaskingPattern struct {
+	Name    string `yaml:"name"`
+	Pattern string `yaml:"pattern"`
+	Replace string `yaml:"replace"`
+	Type    string `yaml:"type"`
+	Re      *regexp.Regexp
+}
+
+// MADConfig defines parameters for the MAD detector.
+type MADConfig struct {
+	// Threshold is the modified Z-score cutoff for IsAnomaly.
+	// Recommended: 3.0–4.0. Default: 3.5.
+	Threshold float64 `yaml:"threshold"`
+	// CalibrationSize is the number of NormalizedVectors to buffer before
+	// automatic per-feature median/MAD calibration runs.
+	// Default (if 0): 100.
+	CalibrationSize int `yaml:"calibration_size"`
+}
+
+// COPODConfig defines the parameters for the Copula-Based Outlier detector.
+type COPODConfig struct {
+	Threshold  float64 `yaml:"threshold"`
+	BufferSize int     `yaml:"buffer_size"`
+}
+
+// RRCFConfig defines the parameters for the Robust Random Cut Forest detector.
+// Used for the standalone RRCF detector and the classic AVG/MAX/MEDIAN ensemble paths.
+type RRCFConfig struct {
+	NumTrees            int     `yaml:"num_trees"`
+	TreeSize            int     `yaml:"tree_size"`
+	ThresholdPercentile float64 `yaml:"threshold_percentile"`
+}
+
+// RRCFVariantConfig holds parameters for a single named RRCF variant inside
+// the SEAD multi-horizon ensemble.
+type RRCFVariantConfig struct {
+	// NumTrees controls score stability: more trees → smoother/conservative.
+	NumTrees int `yaml:"num_trees"`
+	// TreeSize sets the sliding-window capacity per tree.
+	TreeSize int `yaml:"tree_size"`
+	// ThresholdPercentile is the per-model decision threshold (standalone use).
+	ThresholdPercentile float64 `yaml:"threshold_percentile"`
+}
+
+// RRCFVariantsConfig groups the three RRCF variants used by the SEAD ensemble.
+// Each variant captures anomalies at a different time-horizon:
+//   - Fast:  short memory, reactive to transient spikes
+//   - Mid:   medium memory, balanced sensitivity
+//   - Slow:  long memory, detects sustained / slow-drift events
+type RRCFVariantsConfig struct {
+	Fast RRCFVariantConfig `yaml:"fast"`
+	Mid  RRCFVariantConfig `yaml:"mid"`
+	Slow RRCFVariantConfig `yaml:"slow"`
+}
+
+// SEADConfig holds tunable parameters for the SEAD ensemble.
+// Only used when EnsembleConfig.Method == "sead".
+type SEADConfig struct {
+	// Eta is the MWU learning rate η ∈ (0, 1].
+	// Higher values react faster to distribution shifts but are noisier.
+	// Recommended: 0.05–0.20. Default (if 0): 0.10.
+	Eta float64 `yaml:"eta"`
+
+	// Lambda is the KL-divergence regularisation strength.
+	// 0 = pure MWU (uniform prior). Recommended: 0.0–0.05. Default: 0.01.
+	Lambda float64 `yaml:"lambda"`
+
+	// QuantileWindow is the number of past scores retained per detector for
+	// streaming quantile normalisation. Default (if 0): 300.
+	QuantileWindow int `yaml:"quantile_window"`
+
+	// MinDataPoints is the cold-start guard: no anomaly is flagged until at
+	// least this many windows have been scored. Default (if 0): 20.
+	MinDataPoints int `yaml:"min_data_points"`
+}
+
+// EnsembleConfig manages the routing for the multi-model detector.
+type EnsembleConfig struct {
+	Enabled bool `yaml:"enabled"`
+
+	// Method selects the score-aggregation strategy.
+	// Allowed values: "avg" (default), "max", "median", "sead".
+	// "sead": adaptive Multiplicative Weights Update ensemble (Shah et al., ICML 2025).
+	Method string `yaml:"method"`
+
+	// Contamination is the expected fraction of anomalous windows ∈ [0, 0.5).
+	// Determines the decision threshold as quantile(1-contamination) of
+	// the rolling combined score history.
+	Contamination float64 `yaml:"contamination"`
+
+	// SEAD tuning parameters (only applied when Method == "sead").
+	SEAD SEADConfig `yaml:"sead"`
+}
+
+// AutoScalingConfig holds thresholds and durations for dynamic detector switching.
+type AutoScalingConfig struct {
+	Enabled       bool    `yaml:"enabled"`
+	HighThreshold float64 `yaml:"high_threshold"`     // e.g. 0.75 (Normal -> High)
+	CritThreshold float64 `yaml:"critical_threshold"` // e.g. 0.90 (High -> Critical)
+	HighDuration  float64 `yaml:"high_duration"`      // e.g. 30.0 (seconds)
+	CritDuration  float64 `yaml:"critical_duration"`  // e.g. 15.0 (seconds)
+	DownThreshold float64 `yaml:"down_threshold"`     // e.g. 0.50 (back to Normal)
+	DownDuration  float64 `yaml:"down_duration"`      // e.g. 60.0 (seconds)
+}
+
+// DetectorConfig groups all anomaly detection configurations.
+type DetectorConfig struct {
+	Method   string         `yaml:"method"`
+	Ensemble EnsembleConfig `yaml:"ensemble"`
+	MAD      MADConfig      `yaml:"mad"`
+	COPOD    COPODConfig    `yaml:"copod"`
+	// RRCF is used by the standalone detector and the AVG/MAX/MEDIAN ensemble paths.
+	RRCF RRCFConfig `yaml:"rrcf"`
+	// RRCFVariants configures the three-horizon RRCF instances for the SEAD ensemble.
+	// Defaults are applied automatically when fields are zero.
+	RRCFVariants RRCFVariantsConfig `yaml:"rrcf_variants"`
+	AutoScaling  AutoScalingConfig  `yaml:"auto_scaling"`
+}
+
+// Config is the top-level pipeline configuration.
+type Config struct {
+	Ingestion struct {
+		LogPath           string   `yaml:"log_path"`
+		NetInterface      string   `yaml:"net_interface"`
+		DiskDevice        string   `yaml:"disk_device"`
+		SystemctlServices []string `yaml:"systemctl_services"`
+	} `yaml:"ingestion"`
+
+	Transformation struct {
+		WindowSize time.Duration `yaml:"window_size"`
+		DbPath     string        `yaml:"db_path"`
+	} `yaml:"transformation"`
+
+	Drain struct {
+		Depth           int              `yaml:"depth"`
+		SimThreshold    float64          `yaml:"sim_threshold"`
+		MaxChildren     int              `yaml:"max_children"`
+		MaxClusters     int              `yaml:"max_clusters"`
+		MaskingPatterns []MaskingPattern `yaml:"masking_patterns"`
+	} `yaml:"drain"`
+
+	Detection DetectorConfig `yaml:"detector"`
+
+	Output struct {
+		FeatureLogPath string `yaml:"feature_log_path"`
+		AnomalyLogPath string `yaml:"anomaly_log_path"`
+	} `yaml:"output"`
+}
+
+// LoadConfig reads and decodes the YAML file at path.
+func LoadConfig(path string) (*Config, error) {
+	f, err := os.Open(path)
+	if err != nil {
+		return nil, fmt.Errorf("config: open %q: %w", path, err)
+	}
+	defer f.Close()
+
+	var cfg Config
+	dec := yaml.NewDecoder(f)
+	dec.KnownFields(false)
+	if err := dec.Decode(&cfg); err != nil {
+		return nil, fmt.Errorf("config: decode %q: %w", path, err)
+	}
+	return &cfg, nil
+}
+
+// Compile compiles all MaskingPattern.Pattern strings into *regexp.Regexp.
+func (c *Config) Compile() error {
+	for i := range c.Drain.MaskingPatterns {
+		mp := &c.Drain.MaskingPatterns[i]
+		re, err := regexp.Compile(mp.Pattern)
+		if err != nil {
+			return fmt.Errorf("config: compile pattern %q: %w", mp.Name, err)
+		}
+		mp.Re = re
+	}
+	return nil
+}
+
+// NumericPatternNames returns the ordered list of MaskingPattern names whose
+// Type is "float" or "int".
+func (c *Config) NumericPatternNames() []string {
+	names := make([]string, 0, len(c.Drain.MaskingPatterns))
+	for _, mp := range c.Drain.MaskingPatterns {
+		if mp.Name != "" && (mp.Type == "float" || mp.Type == "int") {
+			names = append(names, mp.Name)
+		}
+	}
+	return names
+}
--- a/internal/detect/copod.go
+++ b/internal/detect/copod.go
@ -0,0 +1,98 @@
+// Package detect provides anomaly detection algorithms and ensemble logic.
+package detect
+
+import (
+	"fmt"
+	"log"
+
+	"codeberg.org/pata1704/copod"
+	"codeberg.org/pata1704/guenther/pkg/types"
+)
+
+// COPODDetector implements the AnomalyDetector interface by wrapping the
+// external codeberg.org/pata1704/copod package.
+//
+// Streaming mode: Score calls Update internally, so the sliding-window buffer
+// stays current without requiring a separate Update call. Callers (like SEAD)
+// only need to call Score per time step.
+//
+// Fit seeds the buffer with a batch of normal vectors. If Fit is not called
+// the detector starts cold and returns score=0 until the buffer has enough
+// points (controlled by bufferSize in the underlying library).
+type COPODDetector struct {
+	detector *copod.Detector
+}
+
+// NewCOPODDetector initialises the streaming COPOD detector wrapper.
+//
+//   - bufferSize: sliding-window capacity. Recommended: 100–200.
+//   - threshold:  score cutoff for standalone IsAnomaly. When used inside
+//     SEAD the threshold is ignored (SEAD applies its own adaptive threshold).
+func NewCOPODDetector(bufferSize int, threshold float64) (*COPODDetector, error) {
+	det, err := copod.NewDetector(bufferSize, threshold)
+	if err != nil {
+		return nil, fmt.Errorf("copod: initialize wrapped detector: %w", err)
+	}
+	return &COPODDetector{
+		detector: det,
+	}, nil
+}
+
+// Fit seeds the COPOD history buffer with a slice of labelled-normal vectors.
+func (c *COPODDetector) Fit(vectors []types.FeatureVector) error {
+	for _, v := range vectors {
+		if err := c.update(v); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// Update adds a single observation to the sliding window.
+// Safe to call concurrently with Score.
+func (c *COPODDetector) Update(vector types.FeatureVector) error {
+	return c.update(vector)
+}
+
+// Score computes the COPOD anomaly score for the given vector and
+// simultaneously updates the internal sliding window with the scored vector.
+//
+// The self-update ensures COPOD's buffer reflects the current data stream
+// without requiring a separate Update call after every Score. This is
+// consistent with the RRCF and IsolationForest detectors which also
+// update themselves inside Score.
+func (c *COPODDetector) Score(vector types.FeatureVector) (types.AnomalyResult, error) {
+	vec := copod.FeatureVector{
+		NormalizedVector: vector.NormalizedVector,
+		Timestamp:        vector.Timestamp,
+	}
+
+	// Score first, then append to the buffer so the scored point does not
+	// bias its own copula calculation (score-then-insert, same as RRCF).
+	res, err := c.detector.Score(vec)
+	if err != nil {
+		return types.AnomalyResult{}, fmt.Errorf("copod: score: %w", err)
+	}
+
+	if err := c.update(vector); err != nil {
+		// Log but don't fail: the score is already computed.
+		log.Printf("copod: update after score: %v", err)
+	}
+
+	return types.AnomalyResult{
+		Timestamp:  res.Timestamp,
+		Score:      res.Score,
+		IsAnomaly:  res.IsAnomaly,
+		Confidence: res.Confidence,
+		Method:     res.Method,
+	}, nil
+}
+
+// update is the internal helper that adds vector to the copod sliding window.
+func (c *COPODDetector) update(vector types.FeatureVector) error {
+	vec := copod.FeatureVector{
+		NormalizedVector: vector.NormalizedVector,
+		Timestamp:        vector.Timestamp,
+	}
+	return c.detector.Update(vec)
+}
--- a/internal/detect/ensemble.go
+++ b/internal/detect/ensemble.go
@ -0,0 +1,325 @@
+// Package detect provides anomaly detection algorithms and ensemble logic.
+package detect
+
+import (
+	"fmt"
+	"math"
+	"sort"
+	"strings"
+	"sync"
+
+	"codeberg.org/pata1704/guenther/pkg/types"
+)
+
+// EnsembleMethod selects the score-aggregation strategy used by EnsembleDetector.
+type EnsembleMethod string
+
+const (
+	// EnsembleAVG combines normalised sub-scores by arithmetic mean.
+	EnsembleAVG EnsembleMethod = "avg"
+	// EnsembleMAX takes the maximum of the normalised sub-scores (aggressive).
+	EnsembleMAX EnsembleMethod = "max"
+	// EnsembleMEDIAN uses the median of normalised sub-scores (robust to outliers).
+	EnsembleMEDIAN EnsembleMethod = "median"
+	// EnsembleSEAD delegates to an embedded SEADDetector (adaptive MWU weights).
+	// This method is selected by setting detector.ensemble.method = "sead" in
+	// the config. The four base detectors (MAD, RRCF, COPOD, IForest) are
+	// instantiated with the same parameters as the non-SEAD ensemble paths and
+	// the SEAD wrapper handles the online weight updates automatically.
+	EnsembleSEAD EnsembleMethod = "sead"
+)
+
+// RRCFVariantConfig holds parameters for a single named RRCF instance in the
+// SEAD multi-horizon ensemble.
+type RRCFVariantConfig struct {
+	// NumTrees controls score stability: more trees → smoother / more conservative.
+	NumTrees int
+	// TreeSize is the sliding-window capacity per tree.
+	TreeSize int
+	// ThresholdPercentile is the per-model decision threshold for standalone use.
+	ThresholdPercentile float64
+}
+
+// RRCFVariantsConfig groups the three RRCF horizon variants used by the SEAD ensemble.
+//   - Fast: short memory, reactive to transient spikes
+//   - Mid:  medium memory, balanced sensitivity
+//   - Slow: long memory, detects sustained / slow-drift events
+type RRCFVariantsConfig struct {
+	Fast RRCFVariantConfig
+	Mid  RRCFVariantConfig
+	Slow RRCFVariantConfig
+}
+
+// EnsembleDetector implements the AnomalyDetector interface by combining
+// COPOD and RRCF scores using min-max normalisation.
+//
+// Scoring strategy (AVG / MAX / MEDIAN methods):
+//  1. Each model produces a raw score on its own scale.
+//  2. Both scores are normalised to [0, 1] using a rolling min/max window.
+//  3. The combined score is the result of the selected aggregation function.
+//  4. A window is flagged anomalous when combinedScore > threshold where
+//     threshold = quantile(combinedHistory, 1-contamination).
+//
+// SEAD method:
+//
+//	When method == EnsembleSEAD the detector delegates entirely to an embedded
+//	SEADDetector which wraps all four base detectors and uses Multiplicative
+//	Weights Update (MWU/FTRL) to adapt weights online. The COPOD and RRCF
+//	sub-detectors passed to NewEnsembleDetector are still created but are only
+//	used when method != EnsembleSEAD.
+type EnsembleDetector struct {
+	method EnsembleMethod
+
+	// sub-detectors for AVG/MAX/MEDIAN methods
+	copod AnomalyDetector
+	rrcf  AnomalyDetector
+
+	// SEAD method: fully adaptive ensemble (replaces copod+rrcf when active)
+	sead *SEADDetector
+
+	contamination float64
+
+	mu              sync.Mutex
+	copodHistory    []float64
+	rrcfHistory     []float64
+	combinedHistory []float64
+	historySize     int
+}
+
+// NewEnsembleDetector initialises the multi-model ensemble.
+//
+//   - method:          "avg" | "max" | "median" | "sead"
+//   - copodBufferSize: sliding-window capacity for COPOD (≥ 100 recommended).
+//   - copodThreshold:  per-model threshold passed to COPODDetector.
+//   - rrcfVariants:    three-horizon RRCF config (fast/mid/slow). Used by SEAD;
+//     the Mid variant is also used for the classic AVG/MAX/MEDIAN path.
+//   - contamination:   expected fraction of anomalies ∈ [0, 0.5).
+//   - seadCfg:         SEAD parameters (only used when method == "sead").
+//     Pass detect.DefaultSEADConfig() when method != "sead".
+func NewEnsembleDetector(
+	method EnsembleMethod,
+	copodBufferSize int, copodThreshold float64,
+	rrcfVariants RRCFVariantsConfig,
+	contamination float64,
+	seadCfg SEADConfig,
+) (*EnsembleDetector, error) {
+	e := &EnsembleDetector{
+		method:        method,
+		contamination: contamination,
+		historySize:   1000,
+	}
+
+	if method == EnsembleSEAD {
+		// Delegate to SEADDetector with all six base detectors (3 RRCF horizons).
+		// MAD is bootstrapped with identity priors (median=0, MAD=1); it will
+		// calibrate itself during the pipeline warm-up phase.
+		sead, err := NewSEADWithAllDetectors(
+			copodBufferSize, copodThreshold,
+			rrcfVariants,
+			3.5, 0, // madThreshold=3.5, madCalibSize=0→default 100 vectors
+			seadCfg,
+		)
+		if err != nil {
+			return nil, fmt.Errorf("ensemble: sead: %w", err)
+		}
+		e.sead = sead
+	} else {
+		// Classic AVG/MAX/MEDIAN path: only COPOD + RRCF (Mid variant as default).
+		copodDet, err := NewCOPODDetector(copodBufferSize, copodThreshold)
+		if err != nil {
+			return nil, fmt.Errorf("ensemble: %w", err)
+		}
+		e.copod = copodDet
+		// Use Mid variant defaults for the classic ensemble path.
+		midTrees := rrcfVariants.Mid.NumTrees
+		if midTrees == 0 {
+			midTrees = 150
+		}
+		midSize := rrcfVariants.Mid.TreeSize
+		if midSize == 0 {
+			midSize = 64
+		}
+		midPct := rrcfVariants.Mid.ThresholdPercentile
+		if midPct == 0 {
+			midPct = 0.85
+		}
+		e.rrcf = NewRRCFDetector(midTrees, midSize, 0, midPct)
+	}
+
+	return e, nil
+}
+
+// SEAD returns the underlying SEADDetector if the ensemble is in SEAD mode.
+func (e *EnsembleDetector) SEAD() *SEADDetector {
+	e.mu.Lock()
+	defer e.mu.Unlock()
+	return e.sead
+}
+
+// Fit seeds the underlying models from a slice of feature vectors.
+func (e *EnsembleDetector) Fit(vectors []types.FeatureVector) error {
+	if e.method == EnsembleSEAD {
+		return e.sead.Fit(vectors)
+	}
+	if err := e.copod.Fit(vectors); err != nil {
+		return fmt.Errorf("ensemble: fit copod: %w", err)
+	}
+	if err := e.rrcf.Fit(vectors); err != nil {
+		return fmt.Errorf("ensemble: fit rrcf: %w", err)
+	}
+	return nil
+}
+
+// Update propagates the vector to the underlying models.
+func (e *EnsembleDetector) Update(vector types.FeatureVector) error {
+	if e.method == EnsembleSEAD {
+		return e.sead.Update(vector)
+	}
+	if err := e.copod.Update(vector); err != nil {
+		return fmt.Errorf("ensemble: update copod: %w", err)
+	}
+	if err := e.rrcf.Update(vector); err != nil {
+		return fmt.Errorf("ensemble: update rrcf: %w", err)
+	}
+	return nil
+}
+
+// Score evaluates the feature vector.
+//
+// For SEAD method: delegates entirely to the embedded SEADDetector.
+// For AVG/MAX/MEDIAN: min-max normalises COPOD and RRCF scores and aggregates.
+func (e *EnsembleDetector) Score(vector types.FeatureVector) (types.AnomalyResult, error) {
+	if e.method == EnsembleSEAD {
+		res, err := e.sead.Score(vector)
+		if err != nil {
+			return types.AnomalyResult{}, fmt.Errorf("ensemble: sead score: %w", err)
+		}
+		return res, nil
+	}
+
+	resCOPOD, err := e.copod.Score(vector)
+	if err != nil {
+		return types.AnomalyResult{}, fmt.Errorf("ensemble: score copod: %w", err)
+	}
+
+	resRRCF, err := e.rrcf.Score(vector)
+	if err != nil {
+		return types.AnomalyResult{}, fmt.Errorf("ensemble: score rrcf: %w", err)
+	}
+
+	e.mu.Lock()
+	defer e.mu.Unlock()
+
+	e.appendHistory(&e.copodHistory, resCOPOD.Score)
+	e.appendHistory(&e.rrcfHistory, resRRCF.Score)
+
+	normCOPOD := minMaxNorm(resCOPOD.Score, e.copodHistory)
+	normRRCF := minMaxNorm(resRRCF.Score, e.rrcfHistory)
+
+	var combined float64
+	switch e.method {
+	case EnsembleMAX:
+		combined = math.Max(normCOPOD, normRRCF)
+	case EnsembleMEDIAN:
+		// Median of two values = average; kept for future N>2 extension.
+		vals := []float64{normCOPOD, normRRCF}
+		sort.Float64s(vals)
+		combined = vals[len(vals)/2]
+	default: // EnsembleAVG
+		combined = (normCOPOD + normRRCF) / 2.0
+	}
+
+	e.appendHistory(&e.combinedHistory, combined)
+
+	const minDataPoints = 10
+	threshold := quantile(e.combinedHistory, 1.0-e.contamination)
+	isAnomaly := len(e.combinedHistory) > minDataPoints && combined > threshold
+
+	return types.AnomalyResult{
+		Timestamp:  vector.Timestamp,
+		Score:      combined,
+		IsAnomaly:  isAnomaly,
+		Confidence: math.Min(combined/math.Max(threshold, 1e-9), 1.0),
+		Method:     e.methodString(string(e.method), resCOPOD.IsAnomaly, resRRCF.IsAnomaly),
+	}, nil
+}
+
+// WeightSummary returns the current SEAD detector weights as a human-readable
+// string. Returns "" when the ensemble is not using SEAD.
+func (e *EnsembleDetector) WeightSummary() string {
+	if e.method != EnsembleSEAD || e.sead == nil {
+		return ""
+	}
+	return e.sead.WeightSummary()
+}
+
+// appendHistory appends v to *h, evicting the oldest entry when full.
+// Caller must hold e.mu.
+func (e *EnsembleDetector) appendHistory(h *[]float64, v float64) {
+	*h = append(*h, v)
+	if len(*h) > e.historySize {
+		*h = (*h)[1:]
+	}
+}
+
+// methodString builds a concise label for AnomalyResult.Method.
+func (e *EnsembleDetector) methodString(method string, copodAnomaly, rrcfAnomaly bool) string {
+	var active []string
+	if copodAnomaly {
+		active = append(active, "COPOD")
+	}
+	if rrcfAnomaly {
+		active = append(active, "RRCF")
+	}
+	if len(active) > 0 {
+		return fmt.Sprintf("Ensemble-%s(%s)", strings.ToUpper(method), strings.Join(active, "+"))
+	}
+	return fmt.Sprintf("Ensemble-%s(none)", strings.ToUpper(method))
+}
+
+// ── score helpers ─────────────────────────────────────────────────────────────
+
+// minMaxNorm normalises v into [0, 1] using the observed min/max of history.
+func minMaxNorm(v float64, history []float64) float64 {
+	if len(history) == 0 {
+		return 0
+	}
+	minV, maxV := history[0], history[0]
+	for _, h := range history[1:] {
+		if h < minV {
+			minV = h
+		}
+		if h > maxV {
+			maxV = h
+		}
+	}
+	spread := maxV - minV
+	if spread < 1e-12 {
+		return 0.5
+	}
+	norm := (v - minV) / spread
+	if norm < 0 {
+		return 0
+	}
+	if norm > 1 {
+		return 1
+	}
+	return norm
+}
+
+// quantile returns the p-th quantile of data without modifying the slice.
+func quantile(data []float64, p float64) float64 {
+	n := len(data)
+	if n == 0 {
+		return 0
+	}
+	sorted := make([]float64, n)
+	copy(sorted, data)
+	sort.Float64s(sorted)
+
+	idx := int(float64(n) * p)
+	if idx >= n {
+		idx = n - 1
+	}
+	return sorted[idx]
+}
--- a/internal/detect/iforest.go
+++ b/internal/detect/iforest.go
@ -0,0 +1,200 @@
+package detect
+
+import (
+	"log"
+	"sync"
+
+	"codeberg.org/pata1704/guenther/pkg/types"
+	"github.com/e-XpertSolutions/go-iforest/iforest"
+)
+
+// IsolationForestDetector wraps go-iforest with thread-safe access and
+// continuous background retraining on non-anomalous data to handle concept drift.
+//
+// During the warmup phase (model == nil) incoming vectors are buffered.
+// Once warmupSize vectors have accumulated, the first training run executes
+// synchronously so that the detector is never in an undefined trained state
+// after the first window tick.
+//
+// Subsequent retraining is asynchronous: when trainingBuffer reaches
+// bufferSize the buffer is swapped out under the lock, and training runs in
+// a detached goroutine. The current model remains active during retraining,
+// so scoring never blocks.
+type IsolationForestDetector struct {
+	mu             sync.RWMutex
+	model          *iforest.Forest
+	trainingBuffer []types.FeatureVector
+
+	// Tuning knobs – set via constructor.
+	numTrees      int
+	subSample     int
+	contamination float64
+	bufferSize    int
+	warmupSize    int
+	threshold     float64
+}
+
+// NewIsolationForestDetector creates a detector with the given parameters.
+//
+//   - bufferSize:    number of non-anomalous vectors to accumulate before
+//     triggering background retraining.
+//   - warmupSize:    number of vectors to accumulate before the first (sync)
+//     training run. Must be ≤ bufferSize.
+//   - numTrees:      number of isolation trees (typically 100).
+//   - subSample:     subsample size per tree (typically 256).
+//   - contamination: expected fraction of anomalies (0 < c < 0.5).
+//   - threshold:     score cutoff for IsAnomaly.
+func NewIsolationForestDetector(
+	bufferSize, warmupSize, numTrees, subSample int,
+	contamination, threshold float64,
+) *IsolationForestDetector {
+	if warmupSize <= 0 || warmupSize > bufferSize {
+		warmupSize = bufferSize
+	}
+	return &IsolationForestDetector{
+		bufferSize:    bufferSize,
+		warmupSize:    warmupSize,
+		numTrees:      numTrees,
+		subSample:     subSample,
+		contamination: contamination,
+		threshold:     threshold,
+	}
+}
+
+// Fit trains a new Isolation Forest on vectors.
+// Fit is safe to call concurrently with Score (uses a write lock only while
+// swapping the model pointer).
+func (d *IsolationForestDetector) Fit(vectors []types.FeatureVector) error {
+	if len(vectors) == 0 {
+		return nil
+	}
+
+	data := convertToMatrix(vectors)
+	forest := iforest.NewForest(d.numTrees, d.subSample, d.contamination)
+	forest.Train(data)
+	forest.Test(data)
+
+	d.mu.Lock()
+	d.model = forest
+	d.mu.Unlock()
+
+	log.Printf("iforest: trained on %d samples (trees=%d, subsample=%d, contamination=%.3f)",
+		len(vectors), d.numTrees, d.subSample, d.contamination)
+	return nil
+}
+
+// Score returns an AnomalyResult for vector.
+//
+// Pre-model (warmup) behaviour:
+//   - Vector is appended to trainingBuffer.
+//   - Once warmupSize is reached the first training run executes synchronously
+//     on the calling goroutine so subsequent Score calls have a model.
+//   - Returns score=0, IsAnomaly=false while warming up.
+//
+// Post-model behaviour:
+//   - Score is computed via the active model (read-lock only).
+//   - Non-anomalous vectors are appended to trainingBuffer.
+//   - When trainingBuffer reaches bufferSize, a background retrain fires.
+func (d *IsolationForestDetector) Score(vector types.FeatureVector) (types.AnomalyResult, error) {
+	warmup := types.AnomalyResult{
+		Timestamp: vector.Timestamp,
+		Score:     0,
+		IsAnomaly: false,
+		Method:    "IF",
+	}
+
+	// ── warmup phase ──────────────────────────────────────────────────────
+	d.mu.RLock()
+	model := d.model
+	d.mu.RUnlock()
+
+	if model == nil {
+		d.mu.Lock()
+		d.trainingBuffer = append(d.trainingBuffer, vector)
+		bufLen := len(d.trainingBuffer)
+		d.mu.Unlock()
+
+		if bufLen < d.warmupSize {
+			return warmup, nil
+		}
+
+		// Synchronous first fit to eliminate the cold-start gap.
+		d.mu.Lock()
+		buf := d.trainingBuffer
+		d.trainingBuffer = nil
+		d.mu.Unlock()
+
+		if err := d.Fit(buf); err != nil {
+			return warmup, err
+		}
+
+		d.mu.RLock()
+		model = d.model
+		d.mu.RUnlock()
+
+		if model == nil {
+			return warmup, nil // Fit failed silently – defensive
+		}
+	}
+
+	// ── inference ─────────────────────────────────────────────────────────
+	_, scores, err := model.Predict([][]float64{vector.NormalizedVector})
+	if err != nil {
+		return warmup, err
+	}
+	if len(scores) == 0 {
+		return warmup, nil
+	}
+	score := scores[0]
+
+	res := types.AnomalyResult{
+		Timestamp:  vector.Timestamp,
+		Score:      score,
+		IsAnomaly:  score > d.threshold,
+		Confidence: score,
+		Method:     "IF",
+	}
+
+	// Buffer non-anomalous vectors for background retraining.
+	if !res.IsAnomaly {
+		if err := d.Update(vector); err != nil {
+			log.Printf("iforest: update buffer: %v", err)
+		}
+	}
+	return res, nil
+}
+
+// Update appends a non-anomalous vector to the training buffer.
+// If the buffer is full it is swapped atomically and a background goroutine
+// retrains the model on the captured data.
+func (d *IsolationForestDetector) Update(vector types.FeatureVector) error {
+	d.mu.Lock()
+	d.trainingBuffer = append(d.trainingBuffer, vector)
+
+	if len(d.trainingBuffer) < d.bufferSize {
+		d.mu.Unlock()
+		return nil
+	}
+
+	buf := make([]types.FeatureVector, len(d.trainingBuffer))
+	copy(buf, d.trainingBuffer)
+	d.trainingBuffer = nil
+	d.mu.Unlock()
+
+	go func() {
+		if err := d.Fit(buf); err != nil {
+			log.Printf("iforest: background retrain: %v", err)
+		}
+	}()
+	return nil
+}
+
+// ── helpers ───────────────────────────────────────────────────────────────────
+
+func convertToMatrix(vectors []types.FeatureVector) [][]float64 {
+	m := make([][]float64, len(vectors))
+	for i, v := range vectors {
+		m[i] = v.NormalizedVector
+	}
+	return m
+}
--- a/internal/detect/interface.go
+++ b/internal/detect/interface.go
@ -0,0 +1,148 @@
+package detect
+
+import (
+	"context"
+	"log"
+	"sync"
+	"time"
+
+	"codeberg.org/pata1704/guenther/pkg/types"
+)
+
+// AnomalyDetector is the common interface for all detection algorithms.
+// Implementations must be safe for concurrent use.
+type AnomalyDetector interface {
+	// Fit trains the model on the supplied slice of labelled-normal vectors.
+	Fit(vectors []types.FeatureVector) error
+	// Score returns an anomaly assessment for vector. It must not block.
+	Score(vector types.FeatureVector) (types.AnomalyResult, error)
+	// Update buffers vector for incremental model updates.
+	Update(vector types.FeatureVector) error
+}
+
+// DetectionLayer reads FeatureVectors from inputChan, scores them with the
+// configured AnomalyDetector, and forwards AnomalyResults to outputChan.
+//
+// The layer runs a single event-loop goroutine (no additional worker pool is
+// needed because detection is CPU-bound in a single model, not I/O-bound).
+// Health metrics are emitted to healthChan every 5 seconds.
+//
+// Backpressure: if outputChan is full the result is dropped and a warning is
+// logged. This prevents the detection goroutine from blocking the upstream
+// TransformEngine via backpressure handling.
+type DetectionLayer struct {
+	detector   AnomalyDetector
+	inputChan  <-chan types.FeatureVector
+	outputChan chan<- types.AnomalyResult
+	healthChan chan<- types.StageHealth
+
+	scalingController *ScalingController // optional
+
+	wg sync.WaitGroup
+
+	mu         sync.Mutex
+	processed  uint64
+	dropped    uint64
+	avgLatency float64
+}
+
+// NewDetectionLayer constructs a DetectionLayer wired to the given channels.
+func NewDetectionLayer(
+	detector AnomalyDetector,
+	input <-chan types.FeatureVector,
+	output chan<- types.AnomalyResult,
+	health chan<- types.StageHealth,
+) *DetectionLayer {
+	return &DetectionLayer{
+		detector:   detector,
+		inputChan:  input,
+		outputChan: output,
+		healthChan: health,
+	}
+}
+
+// SetScalingController attaches an auto-scaling controller to the layer.
+func (l *DetectionLayer) SetScalingController(sc *ScalingController) {
+	l.scalingController = sc
+}
+
+// Start launches the detection event loop in a background goroutine.
+// The method is idempotent: calling Start twice panics (close of closed channel).
+func (l *DetectionLayer) Start(ctx context.Context) {
+	l.wg.Go(func() {
+		reportTicker := time.NewTicker(5 * time.Second)
+		defer reportTicker.Stop()
+
+		for {
+			select {
+			case fv := <-l.inputChan:
+				l.handle(fv)
+
+			case <-reportTicker.C:
+				l.emitHealth()
+
+			case <-ctx.Done():
+				return
+			}
+		}
+	})
+}
+
+// Wait waits for the event loop to exit after context cancellation.
+func (l *DetectionLayer) Wait() {
+	l.wg.Wait()
+}
+
+func (l *DetectionLayer) handle(fv types.FeatureVector) {
+	if l.scalingController != nil {
+		l.scalingController.ObserveCPU(fv.AvgCPUPercent)
+	}
+
+	start := time.Now()
+	result, err := l.detector.Score(fv)
+	ms := time.Since(start).Seconds() * 1e3
+
+	l.mu.Lock()
+	l.processed++
+	if l.avgLatency == 0 {
+		l.avgLatency = ms
+	} else {
+		l.avgLatency = l.avgLatency*0.8 + ms*0.2
+	}
+	l.mu.Unlock()
+
+	if err != nil {
+		log.Printf("detection: score error: %v", err)
+		return
+	}
+
+	select {
+	case l.outputChan <- result:
+	default:
+		l.mu.Lock()
+		l.dropped++
+		l.mu.Unlock()
+		log.Printf("detection: output channel full – dropping result (score=%.4f)", result.Score)
+	}
+}
+
+// emitHealth sends a StageHealth snapshot to healthChan.
+// Non-blocking: skips the report if healthChan is full.
+func (l *DetectionLayer) emitHealth() {
+	l.mu.Lock()
+	p := l.processed
+	d := l.dropped
+	avg := l.avgLatency
+	l.mu.Unlock()
+
+	select {
+	case l.healthChan <- types.StageHealth{
+		StageName:       "detection_layer",
+		EventsProcessed: p,
+		EventsDropped:   d,
+		AvgLatencyMs:    avg,
+		LastUpdate:      time.Now(),
+	}:
+	default:
+	}
+}
--- a/internal/detect/mad.go
+++ b/internal/detect/mad.go
@ -0,0 +1,254 @@
+// Package detect provides anomaly detection algorithms and ensemble logic.
+package detect
+
+import (
+	"log"
+	"math"
+	"sort"
+	"sync"
+
+	"codeberg.org/pata1704/guenther/pkg/types"
+)
+
+// MADDetector scores feature vectors using per-feature Median Absolute
+// Deviation (MAD) with pre-calibrated or automatically derived statistics.
+//
+// Pass nil for medians and mads and set calibrationSize > 0 via
+// NewMADDetectorAutoCalibrate. The detector buffers the first calibrationSize
+// NormalizedVectors, computes per-feature statistics once the buffer is full,
+// and starts scoring normally afterwards. During the warmup phase Score
+// returns score=0 / IsAnomaly=false.
+//
+//	detector := NewMADDetectorAutoCalibrate(3.5, 100)
+//
+// SEAD down-weights MAD automatically during the warmup phase because
+// all scores are zero; once calibration completes SEAD will start to
+// consider MAD scores in its weight updates.
+//
+// # Calibration contract
+//
+// The medians and mads slices must be computed from the SAME representation
+// that arrives in vector.NormalizedVector – i.e. from the RobustScaler-scaled
+// feature vectors, NOT from raw window aggregates.
+//
+// # Scoring
+//
+// For each feature i the modified Z-score is:
+//
+//	score_i = |x_i - median_i| / (1.4826 * MAD_i)
+//
+// The constant 1.4826 ≈ 1/(Φ⁻¹(3/4)) makes MAD a consistent estimator of σ
+// under normality (Rousseeuw & Croux, 1993). The anomaly score is the maximum
+// modified Z-score across all features.
+//
+// # Fit / Update
+//
+// When calibration is already complete, Fit replaces the
+// current statistics with values derived from the supplied vectors. Update is a
+// no-op.
+type MADDetector struct {
+	mu        sync.Mutex
+	threshold float64
+	medians   []float64 // per-feature median of NormalizedVector in baseline
+	mads      []float64 // per-feature MAD  of NormalizedVector in baseline
+
+	// Auto-calibration state. calibrationSize == 0 means disabled.
+	calibrationSize int
+	calibrationBuf  [][]float64 // collected NormalizedVectors during warmup
+	calibrated      bool
+}
+
+// NewMADDetector creates a MADDetector with pre-calibrated baseline statistics.
+//
+//   - threshold: anomaly score cutoff (modified Z-score). Typical: 2.5–4.0.
+//   - medians:   per-feature median computed from NormalizedVector in baseline.
+//   - mads:      per-feature MAD computed from NormalizedVector in baseline.
+//     Zero entries are replaced with 1.0 to avoid division-by-zero.
+//
+// Pass nil for medians and mads only when calibrationSize > 0 is set via
+// NewMADDetectorAutoCalibrate; otherwise all scores will be zero.
+func NewMADDetector(threshold float64, medians, mads []float64) *MADDetector {
+	return &MADDetector{
+		threshold:  threshold,
+		medians:    medians,
+		mads:       mads,
+		calibrated: len(medians) > 0,
+	}
+}
+
+// NewMADDetectorAutoCalibrate creates a MADDetector that derives its own
+// per-feature statistics from the first calibrationSize NormalizedVectors
+// it encounters in Score.
+//
+//   - threshold:       modified Z-score cutoff after calibration. Typical: 3.5.
+//   - calibrationSize: number of vectors to buffer before first calibration.
+//     Recommended: 60–200
+func NewMADDetectorAutoCalibrate(threshold float64, calibrationSize int) *MADDetector {
+	if calibrationSize <= 0 {
+		calibrationSize = 100
+	}
+	// Initialise with "Identity" stats (median=0, mad=1) so the detector is
+	// operational immediately with a global sensitivity of 1.0 (baseline IQR).
+	// Features are already RobustScaled by DuckDB, so this is a sane prior.
+	// Automatic calibration will refine these once the buffer is full.
+	return &MADDetector{
+		threshold:       threshold,
+		calibrationSize: calibrationSize,
+		medians:         nil, // will be Lazy-init or from buffer
+		mads:            nil,
+	}
+}
+
+// Fit recomputes per-feature median and MAD from the supplied vectors,
+// replacing any prior calibration. Safe to call concurrently with Score.
+func (m *MADDetector) Fit(vectors []types.FeatureVector) error {
+	if len(vectors) == 0 {
+		return nil
+	}
+	raw := make([][]float64, len(vectors))
+	for i, v := range vectors {
+		raw[i] = v.NormalizedVector
+	}
+	medians, mads := computeMADStats(raw)
+
+	m.mu.Lock()
+	m.medians = medians
+	m.mads = mads
+	m.calibrated = true
+	m.calibrationBuf = nil
+	m.mu.Unlock()
+
+	log.Printf("mad: fitted on %d vectors (%d features)", len(vectors), len(medians))
+	return nil
+}
+
+// Update is a no-op when manual statistics are used. When auto-calibration is
+// active it is equivalent to calling Score but discards the result.
+func (m *MADDetector) Update(v types.FeatureVector) error {
+	_, _ = m.Score(v)
+	return nil
+}
+
+// Score computes the maximum modified Z-score across all features of vector.
+//
+// During the auto-calibration warmup the vector is buffered and a zero-score
+// result is returned. Once the calibration buffer is full the statistics are
+// derived automatically and scoring starts on the next call.
+//
+// vector.NormalizedVector must contain values on the same scale as the
+// medians and mads slices (i.e. RobustScaler-scaled values from DuckDB).
+func (m *MADDetector) Score(vector types.FeatureVector) (types.AnomalyResult, error) {
+	m.mu.Lock()
+	// ── Auto-calibration warmup ───────────────────────────────────────────
+	if !m.calibrated && m.calibrationSize > 0 {
+		if vec := vector.NormalizedVector; len(vec) > 0 {
+			cp := make([]float64, len(vec))
+			copy(cp, vec)
+			m.calibrationBuf = append(m.calibrationBuf, cp)
+		}
+		if len(m.calibrationBuf) >= m.calibrationSize {
+			m.medians, m.mads = computeMADStats(m.calibrationBuf)
+			m.calibrated = true
+			m.calibrationBuf = nil
+			log.Printf("mad: auto-calibrated on %d vectors (%d features)",
+				m.calibrationSize, len(m.medians))
+		}
+		if !m.calibrated {
+			m.mu.Unlock()
+			return m.scoreIdentity(vector), nil
+		}
+	}
+	medians := m.medians
+	mads := m.mads
+	m.mu.Unlock()
+
+	// ── Scoring ───────────────────────────────────────────────────────────
+	maxScore := 0.0
+	for i, val := range vector.NormalizedVector {
+		if i >= len(medians) || i >= len(mads) {
+			break
+		}
+		// Stability floor: prevent explosive Z-scores for features with near-zero variance.
+		// 1e-2 corresponds to 1% of the original baseline IQR.
+		mad := math.Max(mads[i], 0.01)
+
+		// 1.4826 converts MAD to an estimator of standard deviation.
+		score := math.Abs(val-medians[i]) / (1.4826 * mad)
+		if score > maxScore {
+			maxScore = score
+		}
+	}
+
+	return types.AnomalyResult{
+		Timestamp:  vector.Timestamp,
+		Score:      maxScore,
+		IsAnomaly:  maxScore > m.threshold,
+		Confidence: math.Min(maxScore/math.Max(m.threshold, 1e-9), 1.0),
+		Method:     "MAD",
+	}, nil
+}
+
+// scoreIdentity provides a sane fallback (median=0, mad=1) for pre-scaled data.
+func (m *MADDetector) scoreIdentity(vector types.FeatureVector) types.AnomalyResult {
+	maxScore := 0.0
+	for _, val := range vector.NormalizedVector {
+		score := math.Abs(val) / 0.6745 // 1/1.4826
+		if score > maxScore {
+			maxScore = score
+		}
+	}
+	res := types.AnomalyResult{
+		Timestamp:  vector.Timestamp,
+		Score:      maxScore,
+		IsAnomaly:  maxScore > m.threshold,
+		Confidence: math.Min(maxScore/math.Max(m.threshold, 1e-9), 1.0),
+		Method:     "MAD (warmup)",
+	}
+	if res.IsAnomaly {
+		res.Details = "Detected during MAD auto-calibration warmup period (using identity prior)."
+	}
+	return res
+}
+
+// ── calibration helper ────────────────────────────────────────────────────────
+
+// computeMADStats returns per-feature median and MAD for a matrix of row vectors.
+// Both slices have length equal to the number of features (columns).
+func computeMADStats(rows [][]float64) (medians, mads []float64) {
+	if len(rows) == 0 {
+		return nil, nil
+	}
+	nFeatures := len(rows[0])
+	medians = make([]float64, nFeatures)
+	mads = make([]float64, nFeatures)
+
+	col := make([]float64, len(rows))
+	devs := make([]float64, len(rows))
+	for f := range nFeatures {
+		for r, row := range rows {
+			if f < len(row) {
+				col[r] = row[f]
+			}
+		}
+		med := median(col)
+		medians[f] = med
+		for r, v := range col {
+			devs[r] = math.Abs(v - med)
+		}
+		mads[f] = median(devs)
+	}
+	return medians, mads
+}
+
+// median returns the median of xs. xs is modified in-place (sorted).
+func median(xs []float64) float64 {
+	n := len(xs)
+	if n == 0 {
+		return 0
+	}
+	sort.Float64s(xs)
+	if n%2 == 1 {
+		return xs[n/2]
+	}
+	return (xs[n/2-1] + xs[n/2]) / 2.0
+}
--- a/internal/detect/mad_test.go
+++ b/internal/detect/mad_test.go
@ -0,0 +1,114 @@
+package detect
+
+import (
+	"testing"
+	"time"
+
+	"codeberg.org/pata1704/guenther/pkg/types"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestMADDetector_Score(t *testing.T) {
+	detector := NewMADDetector(3.0, []float64{10.0}, []float64{1.0})
+
+	// 1. Score a normal value
+	res, err := detector.Score(types.FeatureVector{
+		Timestamp:        time.Now(),
+		NormalizedVector: []float64{11},
+	})
+	assert.NoError(t, err)
+	assert.False(t, res.IsAnomaly, "Value 11 should not be an anomaly")
+
+	// 2. Score an extreme outlier
+	res, err = detector.Score(types.FeatureVector{
+		Timestamp:        time.Now(),
+		NormalizedVector: []float64{100},
+	})
+	assert.NoError(t, err)
+	assert.True(t, res.IsAnomaly, "Value 100 should be an anomaly")
+	assert.Greater(t, res.Score, 3.0)
+}
+
+func TestMADDetector_CalibrationStability(t *testing.T) {
+	// 1. Create a detector that auto-calibrates on 100 idle vectors.
+	detector := NewMADDetectorAutoCalibrate(3.5, 100)
+	now := time.Now()
+
+	// 2. Feed 99 perfectly idle vectors.
+	// They should all use "Identity" fallback and return low scores (or 0 if val is 0).
+	for i := 0; i < 99; i++ {
+		fv := types.FeatureVector{
+			Timestamp:        now.Add(time.Duration(i) * time.Second),
+			NormalizedVector: []float64{0.0, 0.0},
+		}
+		res, err := detector.Score(fv)
+		assert.NoError(t, err)
+		assert.Equal(t, 0.0, res.Score)
+		assert.Contains(t, res.Method, "warmup")
+	}
+
+	// 3. Feed the 100th vector. This triggers calibration.
+	// Since all 100 vectors were 0, the learned medians will be 0 and mads will be 0.
+	fv100 := types.FeatureVector{
+		Timestamp:        now.Add(100 * time.Second),
+		NormalizedVector: []float64{0.0, 0.0},
+	}
+	res100, err := detector.Score(fv100)
+	assert.NoError(t, err)
+	assert.Equal(t, 0.0, res100.Score)
+	// After this call, mads should be [0.0, 0.0] but clamped to 0.01 during Score.
+
+	// 4. Feed the 101st vector: A "normal" burst (e.g. 1.0 baseline IQR).
+	// Without the floor, this would be 1.0 / (1.48 * 0) -> infinity (clamped).
+	// With the floor (0.01), it should be 1.0 / (1.4826 * 0.01) ≈ 67.45.
+	fv101 := types.FeatureVector{
+		Timestamp:        now.Add(101 * time.Second),
+		NormalizedVector: []float64{1.0, 0.0},
+	}
+	res101, err := detector.Score(fv101)
+	assert.NoError(t, err)
+	
+	// Check that the score is contained.
+	// 1.0 / (1.4826 * 0.01) = 67.449
+	assert.InDelta(t, 67.449, res101.Score, 0.1)
+	assert.True(t, res101.IsAnomaly)
+	assert.Equal(t, "MAD", res101.Method) // No longer "warmup"
+
+	// 5. Test with a very small variance but not 0.
+	// Suppose learned MAD was 0.0001. Score for val=1.0 would be 1.0 / 0.000148... ≈ 6745.
+	// Our floor (0.01) should still clamp this to 67.45.
+	detector.mu.Lock()
+	detector.mads = []float64{0.0001, 0.0}
+	detector.medians = []float64{0.0, 0.0}
+	detector.mu.Unlock()
+
+	resSmall, err := detector.Score(fv101)
+	assert.NoError(t, err)
+	assert.InDelta(t, 67.449, resSmall.Score, 0.1)
+}
+
+func TestMADDetector_IdentityPrior(t *testing.T) {
+	detector := NewMADDetectorAutoCalibrate(3.5, 10)
+	
+	// Feature vector with a deviation of 2.0 baseline IQR.
+	// Using identity prior (mad=1.0), the score should be:
+	// score = |2.0| / (1.4826 * 1.0) = 2.0 / 1.4826 ≈ 1.3489
+	// Wait, scoreIdentity uses 0.6745 directly: math.Abs(val) / 0.6745
+	// 2.0 / 0.6745 ≈ 2.965
+	fv := types.FeatureVector{
+		NormalizedVector: []float64{2.0},
+	}
+	res, _ := detector.Score(fv)
+	assert.InDelta(t, 2.965, res.Score, 0.1)
+	assert.False(t, res.IsAnomaly) // 2.96 < 3.5
+
+	// Feature vector with deviation of 3.0.
+	// score = 3.0 / 0.6745 ≈ 4.44
+	fv2 := types.FeatureVector{
+		NormalizedVector: []float64{3.0},
+	}
+	res2, _ := detector.Score(fv2)
+	assert.InDelta(t, 4.44, res2.Score, 0.1)
+	assert.True(t, res2.IsAnomaly)
+	assert.Contains(t, res2.Details, "identity prior")
+}
--- a/internal/detect/rrcf.go
+++ b/internal/detect/rrcf.go
@ -0,0 +1,173 @@
+// Package detect provides anomaly detection algorithms and ensemble logic.
+package detect
+
+import (
+	"fmt"
+	"log"
+	"math"
+	"sync"
+
+	"codeberg.org/pata1704/guenther/pkg/types"
+	"codeberg.org/pata1704/rrcf"
+)
+
+// RRCFDetector wraps pkg/rrcf.Forest with the AnomalyDetector interface.
+//
+// Scoring strategy: score-then-insert (online streaming).
+// Each call to Score:
+//  1. Scores the point without inserting (ephemeral key – thread-safe).
+//  2. Inserts the point permanently so the forest stays fresh.
+type RRCFDetector struct {
+	mu     sync.Mutex
+	forest *rrcf.Forest
+
+	thresholdPct float64
+	numTrees     int
+	treeSize     int
+	warmup       int
+	counter      int
+	buf          []types.FeatureVector
+
+	// Rolling score window for adaptive threshold calculation.
+	// Uses a FIFO ring buffer; only scores after warmupDiscard are included.
+	scoreWindow   *ringBuffer
+	warmupDiscard int // number of scores to discard after forest initialisation
+	scored        int // total scores seen (including discarded)
+}
+
+// NewRRCFDetector creates an RRCFDetector.
+//
+//   - numTrees:       number of trees in the forest (200 recommended).
+//   - treeSize:       sliding-window capacity per tree (256 recommended).
+//   - warmup:         vectors to buffer before first Score (pass 0 for immediate start).
+//   - thresholdPct:   percentile of rolling score window used as threshold.
+//     E.g. 0.65 means: flag as anomaly if score > 65th percentile of recent scores.
+//
+// Internal defaults:
+//   - warmupDiscard = 10  (discard the first 10 scores; forest is not yet stable)
+//   - scoreWindowMax = 60
+func NewRRCFDetector(numTrees, treeSize, warmup int, thresholdPct float64) *RRCFDetector {
+	return &RRCFDetector{
+		numTrees:      numTrees,
+		treeSize:      treeSize,
+		warmup:        warmup,
+		thresholdPct:  thresholdPct,
+		scoreWindow:   newRingBuffer(60),
+		warmupDiscard: 10,
+	}
+}
+
+// Fit seeds the forest from a slice of FeatureVectors.
+// It replaces any existing forest; the internal insert counter is reset.
+func (d *RRCFDetector) Fit(vectors []types.FeatureVector) error {
+	if len(vectors) == 0 {
+		return nil
+	}
+	dim := len(vectors[0].NormalizedVector)
+
+	d.mu.Lock()
+	defer d.mu.Unlock()
+
+	d.forest = rrcf.NewForest(d.numTrees, dim, d.treeSize)
+	d.counter = 0
+	for _, v := range vectors {
+		if err := d.forest.Insert(v.NormalizedVector, d.counter); err != nil {
+			log.Printf("rrcf: fit insert: %v", err)
+			continue
+		}
+		d.counter++
+	}
+	log.Printf("rrcf: forest seeded with %d points (trees=%d, treeSize=%d)",
+		len(vectors), d.numTrees, d.treeSize)
+	return nil
+}
+
+// Score returns an AnomalyResult for vector.
+// During the warmup phase (len(buf) < warmup) the vector is buffered and a
+// zero-score result is returned.
+func (d *RRCFDetector) Score(vector types.FeatureVector) (types.AnomalyResult, error) {
+	d.mu.Lock()
+	defer d.mu.Unlock()
+
+	// Lazy forest initialisation on the first Score call.
+	if d.forest == nil {
+		dim := len(vector.NormalizedVector)
+		d.forest = rrcf.NewForest(d.numTrees, dim, d.treeSize)
+	}
+
+	// Warmup buffering.
+	if d.warmup > 0 && len(d.buf) < d.warmup {
+		d.buf = append(d.buf, vector)
+		if len(d.buf) == d.warmup {
+			for _, v := range d.buf {
+				_ = d.forest.Insert(v.NormalizedVector, d.counter)
+				d.counter++
+			}
+			d.buf = nil
+			log.Printf("rrcf: warmup complete (%d vectors)", d.warmup)
+		}
+		return types.AnomalyResult{
+			Timestamp: vector.Timestamp,
+			Score:     0,
+			IsAnomaly: false,
+			Method:    "RRCF",
+		}, nil
+	}
+
+	// Score via ephemeral insertion.
+	score, err := d.forest.Score(vector.NormalizedVector)
+	if err != nil {
+		return types.AnomalyResult{}, fmt.Errorf("rrcf: %w", err)
+	}
+
+	// Permanent streaming insert to keep the forest fresh.
+	if err := d.forest.Insert(vector.NormalizedVector, d.counter); err != nil {
+		log.Printf("rrcf: insert: %v", err)
+	}
+	d.counter++
+	d.scored++
+
+	// Discard the first warmupDiscard scores: the forest is still settling
+	// and scores are artificially high, which would anchor the threshold.
+	if d.scored <= d.warmupDiscard {
+		return types.AnomalyResult{
+			Timestamp: vector.Timestamp,
+			Score:     score,
+			IsAnomaly: false,
+			Method:    "RRCF",
+		}, nil
+	}
+
+	// Update rolling score window (ring buffer).
+	d.scoreWindow.push(score)
+
+	// Need at least 10 scores before making decisions.
+	isAnomaly := false
+	var threshold float64
+	if d.scoreWindow.size >= 10 {
+		threshold = d.rollingThreshold()
+		isAnomaly = score > threshold
+	}
+
+	confidence := 0.0
+	if threshold > 1e-9 {
+		confidence = math.Min(score/threshold, 1.0)
+	}
+
+	return types.AnomalyResult{
+		Timestamp:  vector.Timestamp,
+		Score:      score,
+		IsAnomaly:  isAnomaly,
+		Confidence: confidence,
+		Method:     "RRCF",
+	}, nil
+}
+
+// rollingThreshold returns the thresholdPct-quantile of the rolling score window.
+// Caller must hold d.mu.
+func (d *RRCFDetector) rollingThreshold() float64 {
+	return d.scoreWindow.quantileVal(d.thresholdPct)
+}
+
+// Update is a no-op for RRCF: insertion happens inside Score.
+func (d *RRCFDetector) Update(_ types.FeatureVector) error { return nil }
--- a/internal/detect/scaling.go
+++ b/internal/detect/scaling.go
@ -0,0 +1,299 @@
+package detect
+
+import (
+	"log"
+	"sync"
+	"time"
+
+	"codeberg.org/pata1704/guenther/pkg/types"
+)
+
+// ScalingLevel represents the current detector complexity level.
+type ScalingLevel int
+
+const (
+	LevelNormal   ScalingLevel = iota // SEAD Ensemble (full accuracy)
+	LevelHigh                         // COPOD (reduced complexity)
+	LevelCritical                     // MAD (minimal overhead)
+)
+
+// levelName maps ScalingLevel to a human-readable string for logging.
+var levelName = map[ScalingLevel]string{
+	LevelNormal:   "SEAD Ensemble (Normal)",
+	LevelHigh:     "COPOD (High Load)",
+	LevelCritical: "MAD (Critical Load)",
+}
+
+// ── SwitchableDetector ───────────────────────────────────────────────────────
+
+// SwitchableDetector wraps a SEADDetector and allows runtime switching to
+// lighter-weight sub-detectors (COPOD, MAD) under high CPU load.
+//
+// State consistency guarantee: all base detectors are kept up-to-date
+// regardless of which one is currently active. This ensures a clean
+// transition back to SEAD without stale internal state.
+//
+// Update-deduplication contract:
+//
+//	SEAD.Score()  calls d.Score() on every base detector, which self-updates.
+//	              → no separate Update() call needed; doing so would double-count.
+//	SEAD.Update() calls d.Update() on every base detector directly.
+//	              → used here when we need to advance inactive detectors
+//	                without scoring through SEAD.
+//
+// For LevelHigh / LevelCritical we call:
+//
+//	s.ensemble.Update(vector)  → advances MAD, RRCF variants via d.Update()
+//	                             COPOD.Update() = COPOD.update() (buffer append only)
+//	active.Score(vector)       → scores + self-updates the active detector
+//	                             (COPOD.Score calls update internally again)
+//
+// This means COPOD receives one Update() + one self-update from Score() per tick.
+// That is intentional: Update() appends to the sliding window buffer; Score()
+// computes the copula and then appends the scored point (score-then-insert).
+// The two operations are not idempotent and must both run for correct behaviour.
+// RRCF and MAD are updated via SEAD.Update() only; their Score() methods are
+// not called when inactive so they do not double-count.
+type SwitchableDetector struct {
+	mu sync.RWMutex
+
+	ensemble *SEADDetector
+	copod    AnomalyDetector // may be nil if COPOD is not configured
+	mad      AnomalyDetector // may be nil if MAD is not configured
+
+	activeLevel ScalingLevel
+}
+
+// NewSwitchableDetector creates a SwitchableDetector backed by the given
+// SEADDetector. COPOD and MAD sub-detectors are extracted from the ensemble
+// for direct access during high-load switching.
+//
+// If a sub-detector is not present in the ensemble, the corresponding field
+// is nil and Score() falls back to the ensemble for that level.
+func NewSwitchableDetector(ensemble *SEADDetector) *SwitchableDetector {
+	return &SwitchableDetector{
+		ensemble:    ensemble,
+		copod:       ensemble.GetDetector("COPOD"),
+		mad:         ensemble.GetDetector("MAD"),
+		activeLevel: LevelNormal,
+	}
+}
+
+// Fit trains all underlying detectors on the given baseline vectors.
+func (s *SwitchableDetector) Fit(vectors []types.FeatureVector) error {
+	return s.ensemble.Fit(vectors)
+}
+
+// Update advances the internal state of all base detectors without scoring.
+// Safe for concurrent use.
+func (s *SwitchableDetector) Update(vector types.FeatureVector) error {
+	return s.ensemble.Update(vector)
+}
+
+// Score returns an AnomalyResult from the currently active detector.
+//
+// All inactive detectors are kept current via SEAD.Update() so that
+// switching back to a heavier detector does not produce stale scores.
+// Safe for concurrent use.
+func (s *SwitchableDetector) Score(vector types.FeatureVector) (types.AnomalyResult, error) {
+	s.mu.RLock()
+	level := s.activeLevel
+	s.mu.RUnlock()
+
+	// LevelNormal: SEAD.Score() handles everything internally.
+	// It scores all base detectors (which self-update) and applies
+	// MWU weight adaptation. No separate Update() needed.
+	if level == LevelNormal {
+		return s.ensemble.Score(vector)
+	}
+
+	// LevelHigh / LevelCritical:
+	// 1. Advance all base detectors via SEAD.Update() so inactive detectors
+	//    (MAD, RRCF variants for LevelHigh; RRCF, COPOD for LevelCritical)
+	//    maintain current state. SEAD weight adaptation is NOT performed here
+	//    because we are bypassing SEAD.Score().
+	if err := s.ensemble.Update(vector); err != nil {
+		// Non-fatal: log and continue. A single missed update is acceptable;
+		// the detector will resync on the next tick.
+		log.Printf("scaling: ensemble update error at level %s: %v", levelName[level], err)
+	}
+
+	// 2. Score via the active sub-detector.
+	//    COPOD.Score() additionally self-updates (score-then-insert), which is
+	//    correct and complementary to the Update() call above (see type doc).
+	//    MAD.Update() internally calls Score(), so it is already current after
+	//    the SEAD.Update() call; MAD.Score() here is pure scoring only.
+	switch level {
+	case LevelHigh:
+		if s.copod == nil {
+			log.Printf("scaling: COPOD unavailable at LevelHigh, falling back to ensemble")
+			return s.ensemble.Score(vector)
+		}
+		res, err := s.copod.Score(vector)
+		if err != nil {
+			return res, err
+		}
+		res.Method = "COPOD (High Load)"
+		return res, nil
+
+	case LevelCritical:
+		if s.mad == nil {
+			log.Printf("scaling: MAD unavailable at LevelCritical, falling back to ensemble")
+			return s.ensemble.Score(vector)
+		}
+		res, err := s.mad.Score(vector)
+		if err != nil {
+			return res, err
+		}
+		res.Method = "MAD (Critical Load)"
+		return res, nil
+
+	default:
+		return s.ensemble.Score(vector)
+	}
+}
+
+// Switch atomically changes the active detection level.
+// It is a no-op if the requested level equals the current level.
+// Safe for concurrent use.
+func (s *SwitchableDetector) Switch(level ScalingLevel) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	if s.activeLevel == level {
+		return
+	}
+	log.Printf("[SCALING] %s → %s", levelName[s.activeLevel], levelName[level])
+	s.activeLevel = level
+}
+
+// ── ScalingController ────────────────────────────────────────────────────────
+
+// ScalingController monitors CPU load and drives a SwitchableDetector through
+// its scaling levels (Normal → High → Critical and back).
+//
+// Level transitions follow a two-phase commit pattern:
+//
+//  1. A CPU measurement moves the desired level to a "pending" state.
+//  2. Only after the pending level has been stable for the configured
+//     duration is Switch() called on the detector.
+//
+// This prevents rapid oscillation under bursty workloads.
+//
+// Hysteresis rules (in the dead-band between downThres and highThres):
+//
+//	Critical → High  (one step down, not straight to Normal)
+//	High     → High  (stays until CPU drops below downThres)
+//	Normal   → Normal
+//
+// ScalingController is not safe for concurrent use. ObserveCPU must be
+// called from a single goroutine (the DetectionLayer's processing loop).
+type ScalingController struct {
+	detector *SwitchableDetector
+
+	// Thresholds (CPU percent, 0–100)
+	highThres float64
+	critThres float64
+	downThres float64
+
+	// Required stable duration before a level transition is committed.
+	highDur time.Duration
+	critDur time.Duration
+	downDur time.Duration
+
+	// currentLevel is the level that has been committed to the detector.
+	currentLevel ScalingLevel
+
+	// pendingLevel is the desired level based on recent CPU measurements.
+	// It must remain stable for the corresponding duration before becoming current.
+	pendingLevel ScalingLevel
+
+	// pendingStart is the time at which pendingLevel last changed.
+	// The pending level is committed when time.Since(pendingStart) >= required duration.
+	pendingStart time.Time
+}
+
+// NewScalingController constructs a ScalingController.
+// Duration arguments are in seconds (float64 to match YAML config values).
+func NewScalingController(
+	detector *SwitchableDetector,
+	highThres, critThres, downThres float64,
+	highDurSec, critDurSec, downDurSec float64,
+) *ScalingController {
+	return &ScalingController{
+		detector:     detector,
+		highThres:    highThres,
+		critThres:    critThres,
+		downThres:    downThres,
+		highDur:      time.Duration(highDurSec * float64(time.Second)),
+		critDur:      time.Duration(critDurSec * float64(time.Second)),
+		downDur:      time.Duration(downDurSec * float64(time.Second)),
+		currentLevel: LevelNormal,
+		pendingLevel: LevelNormal,
+		pendingStart: time.Now(), // explicit init avoids zero-time edge case
+	}
+}
+
+// ObserveCPU processes a single CPU measurement and, if warranted, triggers
+// a level switch on the underlying SwitchableDetector.
+//
+// Must be called from a single goroutine only (not safe for concurrent use).
+func (c *ScalingController) ObserveCPU(cpuPercent float64) {
+	now := time.Now()
+
+	desired := c.desiredLevel(cpuPercent)
+
+	// Phase 1: desired level changed → restart the stability timer.
+	if desired != c.pendingLevel {
+		c.pendingLevel = desired
+		c.pendingStart = now
+		return
+	}
+
+	// Phase 2: desired level has been stable – check if duration is met.
+	if now.Sub(c.pendingStart) < c.durationFor(desired) {
+		return
+	}
+
+	if desired != c.currentLevel {
+		c.currentLevel = desired
+		c.detector.Switch(desired)
+	}
+	c.pendingStart = now
+}
+
+// desiredLevel computes the target ScalingLevel for a given CPU measurement,
+// applying hysteresis in the dead-band between downThres and highThres.
+func (c *ScalingController) desiredLevel(cpuPercent float64) ScalingLevel {
+	switch {
+	case cpuPercent > c.critThres:
+		return LevelCritical
+	case cpuPercent > c.highThres:
+		return LevelHigh
+	case cpuPercent < c.downThres:
+		return LevelNormal
+	default:
+		// Dead-band: degrade at most one step to avoid jumping straight
+		// from Critical to Normal on a brief CPU dip.
+		switch c.currentLevel {
+		case LevelCritical:
+			return LevelHigh
+		case LevelHigh:
+			return LevelHigh
+		default:
+			return LevelNormal
+		}
+	}
+}
+
+// durationFor returns the required stable duration for a given target level.
+func (c *ScalingController) durationFor(level ScalingLevel) time.Duration {
+	switch level {
+	case LevelCritical:
+		return c.critDur
+	case LevelHigh:
+		return c.highDur
+	default:
+		return c.downDur
+	}
+}
--- a/internal/detect/sead.go
+++ b/internal/detect/sead.go
@ -0,0 +1,507 @@
+// Package detect provides anomaly detection algorithms and ensemble logic.
+package detect
+
+// sead.go – SEAD: Unsupervised Ensemble of Streaming Anomaly Detectors
+//
+// Implementation of Algorithm 1 from:
+//   Shah et al. "SEAD: Unsupervised Ensemble of Streaming Anomaly Detectors"
+//   ICML 2025, Amazon Science.
+//
+// Core algorithm (Multiplicative Weights Update / FTRL with KL-divergence):
+//
+//  1. For each incoming feature vector x_t:
+//     a. Score every base detector: s̃_i(t) = A_i(x_t)
+//     b. Normalise to [0,1] via streaming quantile: s_i(t) = Q(s̃_i(t); history_i)
+//     c. Compute softmax weights: p_i(t) = exp(w_i) / Σ exp(w_j)
+//     d. Output combined score: S_t = Σ p_i(t) · s_i(t)
+//     e. Update weights: w_i(t+1) = w_i(t) − η · ∂L_t/∂w_i
+//        where L_t = S_t + λ · KL(p || π)
+//  2. Update each base detector: A_i(t+1) ← Update(A_i(t), x_t)
+//
+// Streaming quantiles are approximated via a fixed-capacity sorted circular
+// buffer (lightweight t-digest substitute). For N=4 detectors at 1 Hz this
+// is negligible memory and CPU overhead.
+//
+// SEAD runs parallel to the existing AVG/MAX/MEDIAN ensemble; it is selected
+// by setting detector.ensemble.method = "sead" in the config.
+
+import (
+	"fmt"
+	"math"
+	"sort"
+	"strings"
+	"sync"
+
+	"codeberg.org/pata1704/guenther/pkg/types"
+)
+
+// ─── FIFO Ring Buffer ─────────────────────────────────────────────────────────
+
+// ringBuffer is a fixed-capacity circular buffer with true FIFO eviction.
+//
+// Memory: O(cap · 8 bytes). For cap=500 this is 4 KB per detector
+type ringBuffer struct {
+	data []float64
+	head int // index of the next write position
+	size int // current number of elements
+	cap  int
+}
+
+func newRingBuffer(capacity int) *ringBuffer {
+	if capacity < 10 {
+		capacity = 10
+	}
+	return &ringBuffer{
+		data: make([]float64, capacity),
+		cap:  capacity,
+	}
+}
+
+// push inserts v, overwriting the oldest entry when the buffer is full.
+// Returns the empirical quantile rank of v within the current window ∈ [0,1].
+func (r *ringBuffer) push(v float64) float64 {
+	r.data[r.head] = v
+	r.head = (r.head + 1) % r.cap
+	if r.size < r.cap {
+		r.size++
+	}
+
+	n := r.size
+	if n <= 1 {
+		return 0.5
+	}
+
+	sorted := make([]float64, n)
+	for i := range n {
+		sorted[i] = r.data[(r.head-n+i+r.cap)%r.cap]
+	}
+	sort.Float64s(sorted)
+
+	rank := sort.SearchFloat64s(sorted, v)
+	return float64(rank) / float64(n-1)
+}
+
+// quantileVal returns the value at quantile p ∈ [0,1] without modifying the buffer.
+func (r *ringBuffer) quantileVal(p float64) float64 {
+	n := r.size
+	if n == 0 {
+		return 0
+	}
+	sorted := make([]float64, n)
+	for i := range n {
+		sorted[i] = r.data[(r.head-n+i+r.cap)%r.cap]
+	}
+	sort.Float64s(sorted)
+	idx := int(p * float64(n-1))
+	if idx >= n {
+		idx = n - 1
+	}
+	return sorted[idx]
+}
+
+// streamQuantile is an alias kept for API compatibility.
+// New code should use ringBuffer directly.
+type streamQuantile = ringBuffer
+
+func newStreamQuantile(capacity int) *ringBuffer {
+	return newRingBuffer(capacity)
+}
+
+// ─── SEADDetector ─────────────────────────────────────────────────────────────
+
+// SEADDetector implements the SEAD algorithm: an unsupervised online ensemble
+// that adaptively weights N base anomaly detectors using Multiplicative Weights
+// Update (MWU / FTRL with KL-divergence regulariser).
+//
+// Key properties:
+//   - Fully unsupervised: no anomaly labels required.
+//   - O(1) per time step: computational cost does not grow with stream length.
+//   - Adaptive: detector weights shift as data distribution changes.
+//   - Score-scale agnostic: all base scores are quantile-normalised to [0,1]
+//     before aggregation, preventing any single detector from dominating due
+//     to score magnitude differences.
+//
+// Configuration:
+//   - eta (η): MWU learning rate. Larger → faster adaptation, more noise.
+//     Recommended range: [0.05, 0.3]. Default: 0.1.
+//   - lambda (λ): KL-divergence regularisation strength. 0 = pure MWU (uniform
+//     prior). Positive values pull weights toward π (uniform). Default: 0.01.
+//   - quantileWindow: number of past scores retained per detector for quantile
+//     normalisation. Default: 300.
+//   - contamination: expected anomaly fraction used to set the decision
+//     threshold as quantile(combinedHistory, 1-contamination). Default: 0.15.
+//   - minDataPoints: minimum scored windows before any anomaly is flagged.
+type SEADDetector struct {
+	detectors []AnomalyDetector // N base detectors (MAD, RRCF, COPOD, IForest)
+	names     []string          // human-readable name per detector
+
+	// MWU state
+	weights []float64 // w_i (log-space, unconstrained)
+	eta     float64   // learning rate η
+	lambda  float64   // KL regularisation strength λ
+	prior   []float64 // π – uniform by default
+
+	// Streaming quantile per detector
+	quantiles []*streamQuantile
+
+	// Combined score history for threshold computation
+	// Uses a FIFO ring buffer (capacity: historySize) so every score lives
+	// exactly historySize time steps, regardless of its magnitude.
+	contamination   float64
+	combinedHistory *ringBuffer // FIFO ring buffer, capacity=1000
+	minDataPoints   int
+
+	mu sync.Mutex
+}
+
+// SEADConfig holds all tunable parameters for the SEAD ensemble.
+type SEADConfig struct {
+	// Eta is the MWU learning rate η.
+	// Higher values react faster to distribution shifts but are noisier.
+	// Recommended: 0.05–0.20. Default: 0.10.
+	Eta float64
+
+	// Lambda is the KL-divergence regularisation strength.
+	// 0 = pure MWU (no penalty for deviation from prior).
+	// Positive values add stability; use 0.01–0.05.
+	Lambda float64
+
+	// QuantileWindow is the number of past scores retained per detector.
+	// Larger → more stable quantiles but slower adaptation.
+	// Default: 300.
+	QuantileWindow int
+
+	// Contamination is the expected anomaly fraction ∈ [0, 0.5).
+	// Sets the decision threshold at quantile(1-contamination) of combined history.
+	// Default: 0.15.
+	Contamination float64
+
+	// MinDataPoints is the cold-start guard: anomalies are not flagged until
+	// at least this many windows have been scored. Default: 20.
+	MinDataPoints int
+}
+
+// DefaultSEADConfig returns sensible defaults for the SEAD ensemble.
+func DefaultSEADConfig() SEADConfig {
+	return SEADConfig{
+		Eta:            0.10,
+		Lambda:         0.01,
+		QuantileWindow: 300,
+		Contamination:  0.15,
+		MinDataPoints:  20,
+	}
+}
+
+// NewSEADDetector constructs a SEAD ensemble from N base detectors.
+//
+//   - detectors: slice of base AnomalyDetector implementations. Must be ≥ 1.
+//   - names: human-readable labels for each detector (used in Details field).
+//   - cfg: SEAD tuning parameters (use DefaultSEADConfig() for a safe start).
+func NewSEADDetector(
+	detectors []AnomalyDetector,
+	names []string,
+	cfg SEADConfig,
+) (*SEADDetector, error) {
+	n := len(detectors)
+	if n == 0 {
+		return nil, fmt.Errorf("sead: at least one base detector required")
+	}
+	if len(names) != n {
+		return nil, fmt.Errorf("sead: names length %d must match detectors length %d", len(names), n)
+	}
+
+	if cfg.Eta <= 0 {
+		cfg.Eta = 0.10
+	}
+	if cfg.QuantileWindow <= 0 {
+		cfg.QuantileWindow = 300
+	}
+	if cfg.Contamination <= 0 || cfg.Contamination >= 0.5 {
+		cfg.Contamination = 0.15
+	}
+	if cfg.MinDataPoints <= 0 {
+		cfg.MinDataPoints = 20
+	}
+
+	// Uniform prior π = 1/N for all detectors.
+	prior := make([]float64, n)
+	for i := range prior {
+		prior[i] = 1.0 / float64(n)
+	}
+
+	// Initialise weights uniformly in log-space: w_i = 0 → softmax = 1/N.
+	weights := make([]float64, n)
+
+	quantiles := make([]*streamQuantile, n)
+	for i := range quantiles {
+		quantiles[i] = newStreamQuantile(cfg.QuantileWindow)
+	}
+
+	return &SEADDetector{
+		detectors:       detectors,
+		names:           names,
+		weights:         weights,
+		eta:             cfg.Eta,
+		lambda:          cfg.Lambda,
+		prior:           prior,
+		quantiles:       quantiles,
+		contamination:   cfg.Contamination,
+		combinedHistory: newRingBuffer(1000),
+		minDataPoints:   cfg.MinDataPoints,
+	}, nil
+}
+
+// Fit seeds all base detectors from labelled-normal vectors.
+// SEAD itself has no training phase; only the base detectors are fitted.
+func (s *SEADDetector) Fit(vectors []types.FeatureVector) error {
+	for i, d := range s.detectors {
+		if err := d.Fit(vectors); err != nil {
+			return fmt.Errorf("sead: fit detector %q: %w", s.names[i], err)
+		}
+	}
+	return nil
+}
+
+// Update propagates the feature vector to all base detectors.
+func (s *SEADDetector) Update(vector types.FeatureVector) error {
+	for i, d := range s.detectors {
+		if err := d.Update(vector); err != nil {
+			return fmt.Errorf("sead: update detector %q: %w", s.names[i], err)
+		}
+	}
+	return nil
+}
+
+// Score implements Algorithm 1 from the SEAD paper.
+//
+// Steps:
+//  1. Score each base detector → raw scores s̃_i.
+//     Each detector also self-updates its internal state (RRCF inserts
+//     the point into the forest; COPOD appends to its copula buffer;
+//     IForest adds to its retraining buffer; MAD buffers for calibration).
+//  2. Quantile-normalise each s̃_i to ŝ_i ∈ [0,1] via streaming window.
+//  3. Compute softmax weights p_i = exp(w_i) / Σ exp(w_j).
+//  4. Combined score S = Σ p_i · ŝ_i.
+//  5. Update weights: w_i -= η · ∂L/∂w_i
+//     where L = S + λ · KL(p || π).
+//  6. Threshold S against rolling (1-contamination)-quantile of S history.
+func (s *SEADDetector) Score(vector types.FeatureVector) (types.AnomalyResult, error) {
+	n := len(s.detectors)
+
+	// ── Step 1: Score all base detectors ──────────────────────────────────────
+	// Each detector's Score method is responsible for self-updating (RRCF inserts
+	// into its forest; COPOD appends to its copula buffer; etc.). We do NOT call
+	// d.Update separately here to avoid double-counting in detectors that already
+	// self-update inside Score.
+	rawScores := make([]float64, n)
+	anomalyFlags := make([]bool, n)
+	for i, d := range s.detectors {
+		res, err := d.Score(vector)
+		if err != nil {
+			// Degrade gracefully: treat failed detector as neutral (score=0.5).
+			rawScores[i] = 0.5
+		} else {
+			rawScores[i] = res.Score
+			anomalyFlags[i] = res.IsAnomaly
+		}
+	}
+
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	// ── Step 2: Quantile-normalise scores to [0,1] ────────────────────────────
+	normScores := make([]float64, n)
+	for i, raw := range rawScores {
+		normScores[i] = s.quantiles[i].push(raw)
+	}
+
+	// ── Step 3: Softmax weights ───────────────────────────────────────────────
+	p := softmax(s.weights)
+
+	// ── Step 4: Combined score ────────────────────────────────────────────────
+	combined := 0.0
+	for i := range p {
+		combined += p[i] * normScores[i]
+	}
+
+	// ── Step 5: Weight update (MWU gradient step) ─────────────────────────────
+	// Loss L(w) = combined(w) + λ · KL(softmax(w) || π)
+	// ∂L/∂w_i = p_i · (ŝ_i - combined) + λ · (p_i - π_i)
+	//
+	// This is the closed-form gradient for softmax + weighted sum + KL penalty.
+	for i := range s.weights {
+		gradCombined := p[i] * (normScores[i] - combined)
+		gradKL := s.lambda * (p[i] - s.prior[i])
+		s.weights[i] -= s.eta * (gradCombined + gradKL)
+	}
+
+	// ── Step 6: Threshold decision ────────────────────────────────────────────
+	// Use FIFO ring buffer: oldest score is evicted automatically after
+	// 1000 time steps, giving the threshold a finite, sliding memory.
+	s.combinedHistory.push(combined)
+	threshold := s.combinedHistory.quantileVal(1.0 - s.contamination)
+	isAnomaly := s.combinedHistory.size > s.minDataPoints && combined > threshold
+
+	confidence := 0.0
+	if threshold > 1e-9 {
+		confidence = math.Min(combined/threshold, 1.0)
+	}
+
+	return types.AnomalyResult{
+		Timestamp:  vector.Timestamp,
+		Score:      combined,
+		IsAnomaly:  isAnomaly,
+		Confidence: confidence,
+		Method:     "SEAD",
+		Details:    s.detailString(p, normScores, anomalyFlags),
+	}, nil
+}
+
+// GetDetector returns a base detector by name. Returns nil if not found.
+func (s *SEADDetector) GetDetector(name string) AnomalyDetector {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	for i, n := range s.names {
+		if n == name {
+			return s.detectors[i]
+		}
+	}
+	return nil
+}
+
+// Weights returns a copy of the current softmax-normalised detector weights.
+// Useful for logging and diagnostics. Thread-safe.
+func (s *SEADDetector) Weights() []float64 {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	return softmax(s.weights)
+}
+
+// WeightSummary returns a human-readable string of detector weights.
+func (s *SEADDetector) WeightSummary() string {
+	w := s.Weights()
+	var sb strings.Builder
+	for i, name := range s.names {
+		if i > 0 {
+			sb.WriteString(" | ")
+		}
+		sb.WriteString(fmt.Sprintf("%s=%.3f", name, w[i]))
+	}
+	return sb.String()
+}
+
+// detailString builds a diagnostic annotation for AnomalyResult.Details.
+// Caller must hold s.mu.
+func (s *SEADDetector) detailString(p, normScores []float64, flags []bool) string {
+	var parts []string
+	for i, name := range s.names {
+		flag := ""
+		if flags[i] {
+			flag = "!"
+		}
+		parts = append(parts, fmt.Sprintf("%s%s:w=%.2f,s=%.2f", name, flag, p[i], normScores[i]))
+	}
+	return strings.Join(parts, " ")
+}
+
+// ─── Math helpers ─────────────────────────────────────────────────────────────
+
+// softmax returns exp(w_i) / Σ exp(w_j) with numerical stability (max subtraction).
+func softmax(w []float64) []float64 {
+	maxW := w[0]
+	for _, v := range w[1:] {
+		if v > maxW {
+			maxW = v
+		}
+	}
+	out := make([]float64, len(w))
+	var sum float64
+	for i, v := range w {
+		out[i] = math.Exp(v - maxW)
+		sum += out[i]
+	}
+	for i := range out {
+		out[i] /= sum
+	}
+	return out
+}
+
+// ─── Factory helpers ──────────────────────────────────────────────────────────
+
+// NewSEADWithAllDetectors constructs a SEAD ensemble from six base detectors:
+// MAD, RRCF-fast, RRCF-mid, RRCF-slow, COPOD, IsolationForest.
+//
+// SEAD's MWU weight-update naturally up-weights the variant that consistently
+// separates anomalies from normal windows, and adapts when the stream
+// distribution shifts (e.g. time-of-day effects).
+//
+// MAD auto-calibration: the MADDetector buffers the first madCalibSize
+// NormalizedVectors, derives per-feature median and MAD, and starts scoring
+// once calibration is complete. Calibration requires no external tooling.
+// SEAD down-weights MAD automatically during the warmup phase.
+func NewSEADWithAllDetectors(
+	copodBufferSize int, copodThreshold float64,
+	rrcfVariants RRCFVariantsConfig,
+	madThreshold float64, madCalibSize int,
+	seadCfg SEADConfig,
+) (*SEADDetector, error) {
+	if rrcfVariants.Fast.NumTrees == 0 {
+		rrcfVariants.Fast.NumTrees = 50
+	}
+	if rrcfVariants.Fast.TreeSize == 0 {
+		rrcfVariants.Fast.TreeSize = 32
+	}
+	if rrcfVariants.Fast.ThresholdPercentile == 0 {
+		rrcfVariants.Fast.ThresholdPercentile = 0.85
+	}
+
+	if rrcfVariants.Mid.NumTrees == 0 {
+		rrcfVariants.Mid.NumTrees = 150
+	}
+	if rrcfVariants.Mid.TreeSize == 0 {
+		rrcfVariants.Mid.TreeSize = 64
+	}
+	if rrcfVariants.Mid.ThresholdPercentile == 0 {
+		rrcfVariants.Mid.ThresholdPercentile = 0.85
+	}
+
+	if rrcfVariants.Slow.NumTrees == 0 {
+		rrcfVariants.Slow.NumTrees = 200
+	}
+	if rrcfVariants.Slow.TreeSize == 0 {
+		rrcfVariants.Slow.TreeSize = 128
+	}
+	if rrcfVariants.Slow.ThresholdPercentile == 0 {
+		rrcfVariants.Slow.ThresholdPercentile = 0.85
+	}
+
+	// ── Construct base detectors ──────────────────────────────────────────────
+	copod, err := NewCOPODDetector(copodBufferSize, copodThreshold)
+	if err != nil {
+		return nil, fmt.Errorf("sead: copod: %w", err)
+	}
+
+	rrcfFast := NewRRCFDetector(
+		rrcfVariants.Fast.NumTrees, rrcfVariants.Fast.TreeSize,
+		0, rrcfVariants.Fast.ThresholdPercentile,
+	)
+	rrcfMid := NewRRCFDetector(
+		rrcfVariants.Mid.NumTrees, rrcfVariants.Mid.TreeSize,
+		0, rrcfVariants.Mid.ThresholdPercentile,
+	)
+	rrcfSlow := NewRRCFDetector(
+		rrcfVariants.Slow.NumTrees, rrcfVariants.Slow.TreeSize,
+		0, rrcfVariants.Slow.ThresholdPercentile,
+	)
+
+	if madCalibSize <= 0 {
+		madCalibSize = 100
+	}
+	mad := NewMADDetectorAutoCalibrate(madThreshold, madCalibSize)
+
+	return NewSEADDetector(
+		[]AnomalyDetector{mad, rrcfFast, rrcfMid, rrcfSlow, copod},
+		[]string{"MAD", "RRCF-fast", "RRCF-mid", "RRCF-slow", "COPOD"},
+		seadCfg,
+	)
+}
--- a/internal/detect/sead_test.go
+++ b/internal/detect/sead_test.go
@ -0,0 +1,61 @@
+package detect
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestRingBuffer_FIFO(t *testing.T) {
+	// 1. Initialize with capacity 10
+	rb := newRingBuffer(10)
+	assert.Equal(t, 10, rb.cap)
+	assert.Equal(t, 0, rb.size)
+
+	// 2. Fill it up
+	for i := 1; i <= 10; i++ {
+		rb.push(float64(i))
+	}
+	assert.Equal(t, 10, rb.size)
+	// head should be at 0 after 10 pushes
+	assert.Equal(t, 0, rb.head)
+
+	// 3. Verify quantile (sorted view)
+	// sorted: [1 2 3 4 5 6 7 8 9 10]
+	// quantile 0.5 (median) of 10 items: index int(0.5 * 9) = 4 -> value 5
+	assert.Equal(t, 5.0, rb.quantileVal(0.5))
+
+	// 4. Push one more to trigger FIFO eviction
+	// Should evict "1" (the oldest)
+	rb.push(11.0)
+	assert.Equal(t, 10, rb.size)
+	assert.Equal(t, 1, rb.head)
+
+	// 5. Verify the oldest (1.0) is gone and 11.0 is present
+	// sorted: [2 3 4 5 6 7 8 9 10 11]
+	// idx = int(0.4 * 9) = 3 -> value at index 3 is 5.0
+	assert.Equal(t, 5.0, rb.quantileVal(0.4))
+	// let's be precise: idx = int(p * 9)
+	// p=0 -> idx 0 (2.0)
+	// p=1 -> idx 9 (11.0)
+	assert.Equal(t, 2.0, rb.quantileVal(0.0))
+	assert.Equal(t, 11.0, rb.quantileVal(1.0))
+}
+
+func TestRingBuffer_Rank(t *testing.T) {
+	rb := newRingBuffer(5)
+
+	// Rank is float64(rank) / float64(n-1)
+	assert.Equal(t, 0.5, rb.push(10.0)) // n=1 -> 0.5
+	assert.Equal(t, 1.0, rb.push(20.0)) // n=2, sorted=[10, 20], search(20)->1. 1/(2-1)=1.0
+	assert.Equal(t, 0.0, rb.push(5.0))  // n=3, sorted=[5, 10, 20], search(5)->0. 0/2=0.0
+
+	// n=4, sorted=[5 10 10 20], search(10) -> idx 1. 1/(4-1) = 0.333...
+	assert.InDelta(t, 0.3333333333333333, rb.push(10.0), 1e-9)
+
+	rb = newRingBuffer(4)
+	rb.push(1.0)
+	rb.push(3.0)
+	rank := rb.push(2.0) // n=3, sorted=[1, 2, 3], search(2)->idx 1. 1/(3-1)=0.5
+	assert.Equal(t, 0.5, rank)
+}
--- a/internal/drain3/masking.go
+++ b/internal/drain3/masking.go
@ -0,0 +1,32 @@
+// Package drain3 provides log stripping via regex-based masking templates which
+// sits in front of Drain3 template mining.
+package drain3
+
+import (
+	"codeberg.org/pata1704/guenther/internal/config"
+)
+
+// ApplyMasking applies all MaskingPatterns sequentially to line.
+//
+// For each pattern with a non-empty Name, capture group 1 of the regex is
+// stored in params before the match is replaced with mp.Replace.
+// Patterns without a Name only mask; they never write to params.
+//
+// All patterns are pre-compiled via config.Compile at startup;
+// no compilation happens in this hot-path function.
+func ApplyMasking(line string, patterns []config.MaskingPattern) (masked string, params map[string]string) {
+	params = make(map[string]string, len(patterns))
+	masked = line
+	for _, mp := range patterns {
+		if mp.Re == nil {
+			continue
+		}
+		if mp.Name != "" {
+			if m := mp.Re.FindStringSubmatch(masked); len(m) > 1 {
+				params[mp.Name] = m[1]
+			}
+		}
+		masked = mp.Re.ReplaceAllString(masked, mp.Replace)
+	}
+	return masked, params
+}
--- a/internal/health/monitor.go
+++ b/internal/health/monitor.go
@ -0,0 +1,111 @@
+package health
+
+import (
+	"context"
+	"encoding/json"
+	"log"
+	"sync"
+	"time"
+
+	"codeberg.org/pata1704/guenther/pkg/types"
+)
+
+// HealthMonitor collects StageHealth snapshots from pipeline stages and
+// periodically prints a JSON report to the standard logger.
+//
+// Stages write to the channel returned by Chan(). The channel is buffered
+// (capacity 100) so health updates never block the sending stage.
+//
+// The channel is intentionally private (accessed via Chan()) so that callers
+// cannot close it from outside and cannot see the internal buffer size.
+type HealthMonitor struct {
+	healthChan chan types.StageHealth
+
+	mu     sync.Mutex
+	stages map[string]*types.StageHealth
+
+	wg sync.WaitGroup
+}
+
+// NewHealthMonitor allocates a HealthMonitor. Call Start to begin processing.
+func NewHealthMonitor() *HealthMonitor {
+	return &HealthMonitor{
+		healthChan: make(chan types.StageHealth, 100),
+		stages:     make(map[string]*types.StageHealth),
+	}
+}
+
+// Chan returns the write-only channel that pipeline stages use to submit
+// health updates. The channel remains open for the lifetime of the monitor.
+func (m *HealthMonitor) Chan() chan<- types.StageHealth {
+	return m.healthChan
+}
+
+// Start begins the health collection loop and periodic reporting.
+// interval controls how often the report is printed (typically 5 s).
+func (m *HealthMonitor) Start(ctx context.Context, interval time.Duration) {
+	ticker := time.NewTicker(interval)
+	m.wg.Go(func() {
+		defer ticker.Stop()
+		for {
+			select {
+			case h := <-m.healthChan:
+				m.mu.Lock()
+				// Shallow copy so the map owns the value.
+				snap := h
+				m.stages[h.StageName] = &snap
+				m.mu.Unlock()
+
+			case <-ticker.C:
+				m.printReport()
+
+			case <-ctx.Done():
+				// Drain remaining updates before exiting.
+				for {
+					select {
+					case h := <-m.healthChan:
+						m.mu.Lock()
+						snap := h
+						m.stages[h.StageName] = &snap
+						m.mu.Unlock()
+					default:
+						return
+					}
+				}
+			}
+		}
+	})
+}
+
+// Wait waits for the health monitor goroutine to exit after context cancellation.
+func (m *HealthMonitor) Wait() {
+	m.wg.Wait()
+}
+
+// Snapshot returns a point-in-time copy of all stage health records.
+// Useful for tests and metrics endpoints.
+func (m *HealthMonitor) Snapshot() map[string]types.StageHealth {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	out := make(map[string]types.StageHealth, len(m.stages))
+	for k, v := range m.stages {
+		out[k] = *v
+	}
+	return out
+}
+
+func (m *HealthMonitor) printReport() {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	log.Println("── Pipeline Health ──────────────────────────────")
+	for _, h := range m.stages {
+		b, err := json.Marshal(h)
+		if err != nil {
+			log.Printf("[%s] marshal error: %v", h.StageName, err)
+			continue
+		}
+		log.Printf("[%s] %s", h.StageName, b)
+	}
+	log.Println("─────────────────────────────────────────────────")
+}
--- a/internal/transform/engine.go
+++ b/internal/transform/engine.go
--- a/internal/transform/engine_test.go
+++ b/internal/transform/engine_test.go
@ -0,0 +1,106 @@
+package transform
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"codeberg.org/pata1704/guenther/internal/config"
+	"codeberg.org/pata1704/guenther/pkg/types"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestTransformEngine_Fusion(t *testing.T) {
+	logChan := make(chan types.LogEvent, 100)
+	metricChan := make(chan types.MetricSnapshot, 100)
+	serviceStatusChan := make(chan types.ServiceStatus, 100)
+	featureChan := make(chan types.FeatureVector, 100)
+	healthChan := make(chan types.StageHealth, 10)
+
+	cfg := &config.Config{}
+	cfg.Transformation.WindowSize = 1 * time.Second
+	cfg.Transformation.DbPath = ":memory:"
+
+	engine, err := NewTransformEngine(cfg, logChan, metricChan, serviceStatusChan, featureChan, healthChan)
+	assert.NoError(t, err)
+
+	baseTime := time.Date(2026, 1, 1, 12, 0, 0, 0, time.Local)
+
+	// 1. Send data for first window
+	metricChan <- types.MetricSnapshot{
+		Timestamp:      baseTime,
+		CPUPercent:     50.0,
+		MemoryUsedMB:   1000,
+		MemoryDirtyMB:  100,
+		NetworkInMBps:  10.0,
+		NetworkOutMBps: 20.0,
+		TCPRetransPerS: 5,
+		NetPacketsInPerS:  100,
+		NetPacketsOutPerS: 200,
+	}
+
+	// 2. Start engine and wait for first window
+	ctx, cancel := context.WithCancel(context.Background())
+	engine.Start(ctx)
+	defer func() {
+		cancel()
+		engine.Wait()
+	}()
+
+	select {
+	case fv := <-featureChan:
+		assert.Equal(t, 50.0, fv.AvgCPUPercent)
+		// Deltas are absolute value on first window because tracker starts at 0
+		assert.Equal(t, 10.0, fv.DeltaNetIn)
+	case <-time.After(2 * time.Second):
+		t.Fatal("Timeout waiting for first FeatureVector")
+	}
+
+	// 3. Send data for second window (triggers deltas)
+	secondTime := baseTime.Add(cfg.Transformation.WindowSize)
+	metricChan <- types.MetricSnapshot{
+		Timestamp:      secondTime,
+		CPUPercent:     60.0,
+		MemoryUsedMB:   1000,
+		MemoryDirtyMB:  200,
+		NetworkInMBps:  15.0, // DeltaNetIn = 15.0 - 10.0 = 5.0
+		NetworkOutMBps: 20.0,
+		TCPRetransPerS: 10, // DeltaTCPRetrans = 10.0 - 5.0 = 5.0
+		NetPacketsInPerS:  150,
+		NetPacketsOutPerS: 200,
+	}
+
+	select {
+	case fv := <-featureChan:
+		// Check original logic
+		assert.Equal(t, 60.0, fv.AvgCPUPercent)
+
+		// Check new delta features
+		assert.Equal(t, 5.0, fv.DeltaNetIn)
+		assert.Equal(t, 5.0, fv.DeltaTCPRetrans)
+
+		// Check ratio features
+		// MemPressure = dirty / (used + 1) = 200/1001
+		expectedPressure := 200.0 / 1001.0
+		assert.InDelta(t, expectedPressure, fv.MemPressure, 1e-9)
+		// NetAsymmetry = in / (out + 1e-3) = 15/20.001
+		expectedAsym := 15.0 / 20.001
+		assert.InDelta(t, expectedAsym, fv.NetAsymmetry, 1e-9)
+
+		// Check NormalizedVector length (should be 45 base + params)
+		assert.GreaterOrEqual(t, len(fv.NormalizedVector), 45)
+
+		// Verify slots 39-44 (Engineered Features tail)
+		nv := fv.NormalizedVector
+		assert.Equal(t, 5.0, nv[39]) // DeltaNetIn
+		assert.Equal(t, 5.0, nv[40]) // DeltaTCPRetrans
+		// TcpRollStd and NetRollStd will have values (even if just 2 pts)
+		assert.Greater(t, nv[41], 0.0)                    // TcpRollStd (10 and 5)
+		assert.Equal(t, 0.0, nv[42])                      // NetRollStd (20 and 20 -> std=0)
+		assert.InDelta(t, expectedPressure, nv[43], 1e-9) // MemPressure
+		assert.InDelta(t, expectedAsym, nv[44], 1e-9)     // NetAsymmetry
+
+	case <-time.After(2 * time.Second):
+		t.Fatal("Timeout waiting for second FeatureVector")
+	}
+}
--- a/internal/transform/schema.go
+++ b/internal/transform/schema.go
@ -0,0 +1,230 @@
+// Package transform contains the DuckDB-backed Tumbling Window Engine.
+package transform
+
+import (
+	"fmt"
+	"strings"
+
+	"codeberg.org/pata1704/guenther/internal/config"
+)
+
+// they are derived from already-scaled inputs or are ratio/delta features).
+var scalerFeatureNames = []string{
+	// CPU (3)
+	"avg_cpu", "max_cpu", "std_cpu",
+	// System/Kernel (7)
+	"avg_iowait", "std_iowait", "avg_softirq", "avg_ctx_switches", "avg_interrupts", "avg_softnet_dropped", "avg_softnet_squeeze",
+	// Network (8)
+	"avg_net_in", "std_net_in", "avg_net_out", "std_net_out", "sum_tcp_retrans", "sum_tcp_fast_retrans", "sum_tcp_timeouts", "avg_net_drops",
+	// Disk (4)
+	"avg_disk_read", "avg_disk_write", "avg_disk_io_ticks", "std_disk_io_ticks",
+	// Log (2)
+	"error_count", "severity_score",
+}
+
+// ScalerFeatureNames returns the ordered list of feature names stored in
+// scaler_params.
+func ScalerFeatureNames() []string { return scalerFeatureNames }
+
+func BuildScalerParamsTable() string {
+	return `CREATE TABLE IF NOT EXISTS scaler_params (
+	feature_name VARCHAR PRIMARY KEY,
+	mean         DOUBLE NOT NULL,
+	std          DOUBLE NOT NULL
+)`
+}
+
+func BuildFitScalerQuery() string {
+	return `
+INSERT OR REPLACE INTO scaler_params (feature_name, mean, std)
+WITH stats AS (
+	SELECT
+		-- CPU
+		PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY cpu_percent) AS m_avg_cpu,
+		PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY cpu_percent) AS m_max_cpu, -- Approximation
+		0.0 AS m_std_cpu, -- Baseline std is often 0 or low
+		-- System
+		PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY cpu_iowait_percent) AS m_avg_iowait,
+		0.0 AS m_std_iowait,
+		PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY cpu_softirq_percent) AS m_avg_softirq,
+		PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY context_switches_s) AS m_avg_ctx_switches,
+		PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY interrupts_s) AS m_avg_interrupts,
+		PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY softnet_dropped_s) AS m_avg_softnet_dropped,
+		PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY softnet_time_squeeze_s) AS m_avg_softnet_squeeze,
+		-- Network
+		PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY net_in_mbps) AS m_avg_net_in,
+		0.0 AS m_std_net_in,
+		PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY net_out_mbps) AS m_avg_net_out,
+		0.0 AS m_std_net_out,
+		PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY tcp_retrans_s) AS m_sum_tcp_retrans,
+		PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY tcp_fast_retrans_s) AS m_sum_tcp_fast_retrans,
+		PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY tcp_timeouts_s) AS m_sum_tcp_timeouts,
+		PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY network_drops_s) AS m_avg_net_drops,
+		-- Disk
+		PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY disk_read_mbps) AS m_avg_disk_read,
+		PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY disk_write_mbps) AS m_avg_disk_write,
+		PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY disk_io_ticks_s) AS m_avg_disk_io_ticks,
+		0.0 AS m_std_disk_io_ticks,
+
+		-- IQRs for scaling
+		(PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY cpu_percent) - PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY cpu_percent)) AS s_avg_cpu,
+		(PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY cpu_iowait_percent) - PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY cpu_iowait_percent)) AS s_avg_iowait,
+		(PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY net_in_mbps) - PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY net_in_mbps)) AS s_avg_net_in,
+		(PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY net_out_mbps) - PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY net_out_mbps)) AS s_avg_net_out,
+		(PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY disk_io_ticks_s) - PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY disk_io_ticks_s)) AS s_avg_disk_io_ticks
+	FROM raw_metrics
+	WHERE timestamp >= $1 AND timestamp < $2
+)
+SELECT feature_name, mean, std FROM (
+	SELECT 'avg_cpu' AS feature_name, s.m_avg_cpu AS mean, GREATEST(s.s_avg_cpu, 1e-9) AS std FROM stats s UNION ALL
+	SELECT 'max_cpu', s.m_max_cpu, GREATEST(s.s_avg_cpu, 1e-9) FROM stats s UNION ALL
+	SELECT 'std_cpu', 0.0, 1.0 FROM stats s UNION ALL
+	SELECT 'avg_iowait', s.m_avg_iowait, GREATEST(s.s_avg_iowait, 1e-9) FROM stats s UNION ALL
+	SELECT 'std_iowait', 0.0, 1.0 FROM stats s UNION ALL
+	SELECT 'avg_softirq', s.m_avg_softirq, 1.0 FROM stats s UNION ALL
+	SELECT 'avg_ctx_switches', s.m_avg_ctx_switches, 1.0 FROM stats s UNION ALL
+	SELECT 'avg_interrupts', s.m_avg_interrupts, 1.0 FROM stats s UNION ALL
+	SELECT 'avg_softnet_dropped', s.m_avg_softnet_dropped, 1.0 FROM stats s UNION ALL
+	SELECT 'avg_softnet_squeeze', s.m_avg_softnet_squeeze, 1.0 FROM stats s UNION ALL
+	SELECT 'avg_net_in', s.m_avg_net_in, GREATEST(s.s_avg_net_in, 1e-9) FROM stats s UNION ALL
+	SELECT 'std_net_in', 0.0, 1.0 FROM stats s UNION ALL
+	SELECT 'avg_net_out', s.m_avg_net_out, GREATEST(s.s_avg_net_out, 1e-9) FROM stats s UNION ALL
+	SELECT 'std_net_out', 0.0, 1.0 FROM stats s UNION ALL
+	SELECT 'sum_tcp_retrans', s.m_sum_tcp_retrans, 1.0 FROM stats s UNION ALL
+	SELECT 'sum_tcp_fast_retrans', s.m_sum_tcp_fast_retrans, 1.0 FROM stats s UNION ALL
+	SELECT 'sum_tcp_timeouts', s.m_sum_tcp_timeouts, 1.0 FROM stats s UNION ALL
+	SELECT 'avg_net_drops', s.m_avg_net_drops, 1.0 FROM stats s UNION ALL
+	SELECT 'avg_disk_read', s.m_avg_disk_read, 1.0 FROM stats s UNION ALL
+	SELECT 'avg_disk_write', s.m_avg_disk_write, 1.0 FROM stats s UNION ALL
+	SELECT 'avg_disk_io_ticks', s.m_avg_disk_io_ticks, GREATEST(s.s_avg_disk_io_ticks, 1e-9) FROM stats s UNION ALL
+	SELECT 'std_disk_io_ticks', 0.0, 1.0 FROM stats s UNION ALL
+	SELECT 'error_count', 0.0, 1.0 UNION ALL
+	SELECT 'severity_score', 0.0, 1.0
+) t`
+}
+
+func BuildFusionQuery(maskingPatterns []config.MaskingPattern, systemctlServices []string, windowInterval string) string {
+	numericCols := collectNumericCols(maskingPatterns)
+	paramCTE := ""
+	paramSelect := ""
+	paramJoin := ""
+	if len(numericCols) > 0 {
+		var aggs []string
+		for _, col := range numericCols {
+			aggs = append(aggs, fmt.Sprintf("AVG(%s) AS avg_%s", col, col))
+			paramSelect += fmt.Sprintf(", COALESCE(p.avg_%s, 0.0) AS avg_%s", col, col)
+		}
+		paramCTE = fmt.Sprintf(`, param_agg AS (SELECT time_bucket(INTERVAL '%s', event_time) AS ws, %s FROM log_params GROUP BY 1)`, windowInterval, strings.Join(aggs, ", "))
+		paramJoin = "LEFT JOIN param_agg p ON m.ws = p.ws"
+	}
+
+	svcCTE := ""
+	svcSelect := ""
+	svcJoin := ""
+	if len(systemctlServices) > 0 {
+		var svcAggs []string
+		for _, svc := range systemctlServices {
+			safeName := strings.ReplaceAll(strings.ReplaceAll(svc, ".", "_"), "-", "_")
+			svcAggs = append(svcAggs, fmt.Sprintf(`MODE(CASE WHEN active_state = 'active' THEN 1 WHEN active_state = 'failed' THEN -1 ELSE 0 END) AS state_%s`, safeName))
+			svcSelect += fmt.Sprintf(", COALESCE(s.state_%s, 0) AS svc_%s", safeName, safeName)
+		}
+		svcCTE = fmt.Sprintf(`, svc_agg AS (SELECT time_bucket(INTERVAL '%s', timestamp) AS ws, %s FROM service_status GROUP BY 1)`, windowInterval, strings.Join(svcAggs, ", "))
+		svcJoin = "LEFT JOIN svc_agg s ON m.ws = s.ws"
+	}
+
+	var scFields []string
+	for _, name := range scalerFeatureNames {
+		scFields = append(scFields, fmt.Sprintf("COALESCE(MAX(CASE WHEN feature_name='%s' THEN mean END),0) AS m_%s, COALESCE(MAX(CASE WHEN feature_name='%s' THEN std END),1) AS s_%s", name, name, name, name))
+	}
+
+	var normVecFields []string
+	for _, name := range scalerFeatureNames {
+		// DuckDB aggregation aliases match these exactly (see metric_agg and log_agg below)
+		src := name
+		if name == "severity_score" || name == "error_count" {
+			src = "l." + name
+		} else {
+			src = "m." + name
+		}
+		normVecFields = append(normVecFields, fmt.Sprintf("(COALESCE(%s, 0.0) - sc.m_%s) / sc.s_%s AS sc_%s", src, name, name, name))
+	}
+
+	return fmt.Sprintf(`
+WITH metric_agg AS (
+	SELECT
+		time_bucket(INTERVAL '%[1]s', timestamp) AS ws,
+		AVG(cpu_percent) AS avg_cpu, MAX(cpu_percent) AS max_cpu, STDDEV_SAMP(cpu_percent) AS std_cpu,
+		AVG(cpu_iowait_percent) AS avg_iowait, STDDEV_SAMP(cpu_iowait_percent) AS std_iowait,
+		AVG(cpu_softirq_percent) AS avg_softirq, AVG(context_switches_s) AS avg_ctx_switches,
+		AVG(interrupts_s) AS avg_interrupts, AVG(softnet_dropped_s) AS avg_softnet_dropped,
+		AVG(softnet_time_squeeze_s) AS avg_softnet_squeeze,
+		AVG(memory_used_mb) AS avg_mem_used, AVG(memory_cached_mb) AS avg_mem_cached, MAX(memory_dirty_mb) AS max_mem_dirty,
+		AVG(net_in_mbps) AS avg_net_in, STDDEV_SAMP(net_in_mbps) AS std_net_in,
+		AVG(net_out_mbps) AS avg_net_out, STDDEV_SAMP(net_out_mbps) AS std_net_out,
+		SUM(tcp_retrans_s) AS sum_tcp_retrans, SUM(tcp_fast_retrans_s) AS sum_tcp_fast_retrans,
+		SUM(tcp_timeouts_s) AS sum_tcp_timeouts, AVG(network_drops_s) AS avg_net_drops,
+		AVG(disk_read_mbps) AS avg_disk_read, AVG(disk_write_mbps) AS avg_disk_write,
+		AVG(disk_io_ticks_s) AS avg_disk_io_ticks, STDDEV_SAMP(disk_io_ticks_s) AS std_disk_io_ticks,
+		SUM(disk_read_time_s) AS sum_disk_read_time, SUM(disk_write_time_s) AS sum_disk_write_time,
+		SUM(disk_reads_s) AS sum_disk_reads, SUM(disk_writes_s) AS sum_disk_writes,
+		SUM(net_packets_in_s) AS sum_packets_in, SUM(net_packets_out_s) AS sum_packets_out
+	FROM raw_metrics GROUP BY 1
+),
+log_agg AS (
+	SELECT
+		time_bucket(INTERVAL '%[1]s', timestamp) AS ws,
+		COUNT(*) AS log_event_count, COUNT(DISTINCT template_id) AS unique_templates,
+		SUM(CASE WHEN severity = 'ERROR' THEN 1 ELSE 0 END) AS error_count,
+		SUM(CASE 
+			WHEN severity = 'ERROR' THEN 10 
+			WHEN severity = 'WARN' THEN 3 
+			ELSE 1 
+		END) AS severity_score
+	FROM log_events GROUP BY 1
+)%[2]s%[3]s,
+scaler AS (
+	SELECT %[4]s FROM scaler_params
+)
+SELECT m.ws,
+	m.*, l.log_event_count, l.unique_templates, l.error_count, l.severity_score%[5]s%[6]s,
+	%[7]s
+FROM metric_agg m
+LEFT JOIN log_agg l ON m.ws = l.ws
+%[8]s %[9]s
+CROSS JOIN scaler sc
+ORDER BY m.ws DESC LIMIT 1`,
+		windowInterval, paramCTE, svcCTE, strings.Join(scFields, ", "), paramSelect, svcSelect, strings.Join(normVecFields, ", "), paramJoin, svcJoin)
+}
+
+func BuildLogParamsSchema(patterns []config.MaskingPattern) string {
+	cols := []string{"event_time TIMESTAMP WITH TIME ZONE"}
+	for _, mp := range patterns {
+		if mp.Name == "" {
+			continue
+		}
+		cols = append(cols, fmt.Sprintf("param_%s %s", mp.Name, sqlType(mp.Type)))
+	}
+	return fmt.Sprintf("CREATE TABLE IF NOT EXISTS log_params (\n\t%s\n)", strings.Join(cols, ",\n\t"))
+}
+
+func sqlType(t string) string {
+	switch t {
+	case "float":
+		return "DOUBLE"
+	case "int":
+		return "BIGINT"
+	default:
+		return "VARCHAR"
+	}
+}
+
+func collectNumericCols(patterns []config.MaskingPattern) []string {
+	var cols []string
+	for _, mp := range patterns {
+		if mp.Name == "" || mp.Type == "string" {
+			continue
+		}
+		cols = append(cols, "param_"+mp.Name)
+	}
+	return cols
+}
--- a/pkg/types/types.go
+++ b/pkg/types/types.go
@ -0,0 +1,302 @@
+// Package types defines the shared data structures that flow between pipeline
+// stages. All types are value-safe to copy and JSON-serialisable.
+package types
+
+import "time"
+
+// ── LogEvent ─────────────────────────────────────────────────────────────────
+
+// LogEvent represents a single parsed log line after Drain3 template mining.
+type LogEvent struct {
+	Timestamp  time.Time         `json:"timestamp"`
+	TemplateID int               `json:"template_id"`
+	Params     map[string]string `json:"params"`
+	Severity   string            `json:"severity"`
+	RawLine    string            `json:"raw_line"`
+}
+
+// ── ServiceStatus ─────────────────────────────────────────────────────────────
+
+// ServiceStatus represents the state of a systemd service.
+type ServiceStatus struct {
+	Timestamp   time.Time `json:"timestamp"`
+	ServiceName string    `json:"service_name"`
+	ActiveState string    `json:"active_state"` // e.g. "active", "inactive", "failed"
+	SubState    string    `json:"sub_state"`    // e.g. "running", "dead", "exited"
+}
+
+// ── MetricSnapshot ────────────────────────────────────────────────────────────
+
+// MetricSnapshot is a 1 Hz sample of Linux system metrics collected from /proc.
+type MetricSnapshot struct {
+	Timestamp time.Time `json:"timestamp"`
+
+	CPUPercent        float64 `json:"cpu_percent"`
+	CPUIoWaitPercent  float64 `json:"cpu_iowait_percent"`
+	CPUSoftIrqPercent float64 `json:"cpu_softirq_percent"`
+
+	ContextSwitchesPerS float64 `json:"context_switches_s"`
+	InterruptsPerS      float64 `json:"interrupts_s"`
+
+	MemoryUsedMB   float64 `json:"memory_used_mb"`
+	MemoryCachedMB float64 `json:"memory_cached_mb"`
+	MemoryDirtyMB  float64 `json:"memory_dirty_mb"`
+
+	NetworkInMBps  float64 `json:"net_in_mbps"`
+	NetworkOutMBps float64 `json:"net_out_mbps"`
+
+	NetErrorsPerS float64 `json:"network_errors_s"`
+	NetDropsPerS  float64 `json:"network_drops_s"`
+
+	TCPRetransPerS        float64 `json:"tcp_retrans_s"`
+	TCPTimeoutsPerS       float64 `json:"tcp_timeouts_s"`
+	TCPLostRetransmitPerS float64 `json:"tcp_lost_retransmit_s"`
+	TCPFastRetransPerS    float64 `json:"tcp_fast_retrans_s"`
+
+	DiskReadMBps        float64 `json:"disk_read_mbps"`
+	DiskWriteMBps       float64 `json:"disk_write_mbps"`
+	DiskReadTimeMsPerS  float64 `json:"disk_read_time_s"`
+	DiskWriteTimeMsPerS float64 `json:"disk_write_time_s"`
+	DiskIOTicksPerS     float64 `json:"disk_io_ticks_s"`
+
+	SoftnetDroppedPerS     float64 `json:"softnet_dropped_s"`
+	SoftnetTimeSqueezePerS float64 `json:"softnet_time_squeeze_s"`
+
+	NetPacketsInPerS  float64 `json:"net_packets_in_s"`
+	NetPacketsOutPerS float64 `json:"net_packets_out_s"`
+
+	DiskReadsCompletedPerS  float64 `json:"disk_reads_s"`
+	DiskWritesCompletedPerS float64 `json:"disk_writes_s"`
+}
+
+// ── FeatureVector ─────────────────────────────────────────────────────────────
+
+// FeatureVector is the output of the DuckDB Tumbling-Window fusion layer.
+//
+// # NormalizedVector layout
+//
+//	Slot  0– 4: CPU (DuckDB RobustScaled)
+//	0=avg_cpu  1=max_cpu  2=avg_iowait  3=avg_softirq  4=avg_ctx_switches
+//	Slot  5– 7: Memory (DuckDB RobustScaled)
+//	5=avg_mem_used  6=avg_mem_cached  7=max_mem_dirty
+//	Slot     8: Disk (DuckDB RobustScaled)
+//	8=avg_disk_io_ticks
+//	Slot  9–12: Network (DuckDB RobustScaled)
+//	9=avg_net_in  10=avg_net_out  11=avg_net_drops  12=avg_softnet_squeeze
+//	Slot 13–16: TCP (DuckDB RobustScaled)
+//	13=max_tcp_retrans  14=sum_tcp_fast_retrans
+//	15=sum_tcp_timeouts  16=sum_tcp_lost_retrans
+//	Slot 17–20: Log (DuckDB RobustScaled)
+//	17=log_event_count  18=error_count  19=unique_templates  20=error_rate
+//	Slot    21: CPUDelta     – Δavg_cpu vs previous window, %-points (unscaled)
+//	Slot    22: RatioTCPNet  – sum_tcp_retrans / (avg_net_out + 1e-3), CV=10 (NEW)
+//	Slot    23: DeltaCtx     – Δavg_ctx_switches vs previous window, CV=6.2 (NEW)
+//	Slot    24: NetDelta     – Δavg_net_out vs previous window, MBps (unscaled)
+//	Slot    25: CPURollStd   – rolling σ(avg_cpu, 12 windows) (unscaled)
+//	Slot    26: CPUEfficiency – avg_cpu / (avg_net_out + 1) (unscaled)
+//	Slot    27: IOWaitProxy   – avg_disk_io_ticks / (avg_cpu + 1) (unscaled)
+//	Slot    28: LogDensity    – unique_templates / (log_count + 1) (unscaled)
+//	Slot    29: DeltaNetIn    – Δavg_net_in vs previous window, MBps (unscaled)
+//	Slot    30: DeltaTCPRetrans – Δsum_tcp_retrans vs previous window (unscaled)
+//	Slot    31: TcpRollStd   – rolling σ(sum_tcp_retrans, 5 windows) (unscaled)
+//	Slot    32: NetRollStd    – rolling σ(avg_net_out, 5 windows) (unscaled)
+//	Slot    33: MemPressure   – avg_dirty_mb / (avg_mem_used + 1) (unscaled)
+//	Slot    34: NetAsymmetry  – avg_net_in / (avg_net_out + 1e-3) (unscaled)
+//	Slot   35+: Drain param averages (unscaled)
+type FeatureVector struct {
+	Timestamp   time.Time `json:"timestamp"`
+	WindowStart time.Time `json:"window_start"`
+	WindowEnd   time.Time `json:"window_end"`
+
+	// CPU aggregations
+	AvgCPUPercent  float64 `json:"avg_cpu"`
+	MaxCPUPercent  float64 `json:"max_cpu"`
+	StdCPUPercent  float64 `json:"std_cpu"`
+	AvgCPUIoWait   float64 `json:"avg_iowait"`
+	StdCPUIoWait   float64 `json:"std_iowait"`
+	AvgCPUSoftIrq  float64 `json:"avg_softirq"`
+	AvgCtxSwitches float64 `json:"avg_ctx_switches"`
+	AvgInterrupts  float64 `json:"avg_interrupts"`
+
+	// Memory aggregations
+	AvgMemUsedMB   float64 `json:"avg_mem_used"`
+	AvgMemCachedMB float64 `json:"avg_mem_cached"`
+	MaxMemDirtyMB  float64 `json:"max_mem_dirty"`
+
+	// Disk aggregations
+	AvgDiskIOTicks   float64 `json:"avg_disk_io_ticks"`
+	StdDiskIOTicks   float64 `json:"std_disk_io_ticks"`
+	AvgDiskReadMBps  float64 `json:"avg_disk_read"`
+	AvgDiskWriteMBps float64 `json:"avg_disk_write"`
+
+	// Network aggregations
+	AvgNetInMBps      float64 `json:"avg_net_in"`
+	StdNetInMBps      float64 `json:"std_net_in"`
+	AvgNetOutMBps     float64 `json:"avg_net_out"`
+	StdNetOutMBps     float64 `json:"std_net_out"`
+	AvgNetDrops       float64 `json:"avg_net_drops"`
+	AvgSoftnetDropped float64 `json:"avg_softnet_dropped"`
+	AvgSoftnetSqueeze float64 `json:"avg_softnet_squeeze"`
+
+	// TCP aggregations
+	SumTCPRetrans     float64 `json:"sum_tcp_retrans"`
+	SumTCPFastRetrans float64 `json:"sum_tcp_fast_retrans"`
+	SumTCPTimeouts    float64 `json:"sum_tcp_timeouts"`
+
+	// Log aggregations
+	ErrorCount    int     `json:"error_count"`
+	SeverityScore float64 `json:"severity_score"`
+
+	// Engineered / Derived features
+	CPUDelta         float64 `json:"cpu_delta"`
+	CPURollStd       float64 `json:"cpu_roll_std"`
+	CPUEfficiency    float64 `json:"cpu_efficiency"`
+	DeltaCtx         float64 `json:"delta_ctx"`
+	NetDelta         float64 `json:"net_delta"`
+	AvgNetThroughput float64 `json:"avg_net_throughput"`
+	CPUPerMB         float64 `json:"cpu_per_mb"`
+	NetworkDiskRatio float64 `json:"network_disk_ratio"`
+	RetransPerPacket float64 `json:"retrans_per_packet"`
+	RetransPerMB     float64 `json:"retrans_per_mb"`
+	AvgDiskLatencyMS float64 `json:"avg_disk_latency_ms"`
+	LogCountTotal    int     `json:"log_count_total"`
+	UniqueTemplates  int     `json:"unique_templates"`
+	LogDensity       float64 `json:"log_density"`
+	IOWaitProxy      float64 `json:"io_wait_proxy"`
+	DeltaNetIn       float64 `json:"delta_net_in"`
+	DeltaTCPRetrans  float64 `json:"delta_tcp_retrans"`
+	TcpRollStd       float64 `json:"tcp_roll_std"`
+	NetRollStd       float64 `json:"net_roll_std"`
+	MemPressure      float64 `json:"mem_pressure"`
+	NetAsymmetry     float64 `json:"net_asymmetry"`
+
+	// Drain parameter aggregations
+	ParamAvg map[string]float64 `json:"param_avg"`
+
+	// ServiceStatuses maps service names to their encoded state (active=1, inactive=0, failed=-1).
+	ServiceStatuses map[string]float64 `json:"service_statuses"`
+
+	// NormalizedVector is the flat float64 slice consumed by anomaly detectors.
+	NormalizedVector []float64 `json:"normalized_vector"`
+}
+
+// ToFloatSlice serialises fv to a deterministic []float64 for offline EDA.
+// Returns raw (unscaled) values; use NormalizedVector for ML inference.
+//
+//	[avg_cpu, max_cpu, std_cpu,
+//	 avg_iowait, std_iowait, avg_softirq, avg_ctx_switches, avg_interrupts,
+//	 avg_softnet_dropped, avg_softnet_squeeze,
+//	 avg_net_in, std_net_in, avg_net_out, std_net_out,
+//	 sum_tcp_retrans, sum_tcp_fast_retrans, sum_tcp_timeouts, avg_net_drops,
+//	 avg_disk_read, avg_disk_write, avg_disk_io_ticks, std_disk_io_ticks,
+//	 error_count, severity_score,
+//	 cpu_delta, cpu_roll_std, cpu_efficiency, delta_ctx, net_delta,
+//	 avg_net_throughput, cpu_per_mb, network_disk_ratio, retrans_per_packet,
+//	 retrans_per_mb, avg_disk_latency_ms, log_count_total, unique_templates,
+//	 log_density, io_wait_proxy, delta_net_in, delta_tcp_retrans,
+//	 tcp_roll_std, net_roll_std, mem_pressure, net_asymmetry,
+//	 param_*]
+func (fv FeatureVector) ToFloatSlice(paramNames []string) []float64 {
+	out := make([]float64, 0, 45+len(paramNames))
+	out = append(out,
+		// Base Aggregates (24)
+		fv.AvgCPUPercent, fv.MaxCPUPercent, fv.StdCPUPercent,
+		fv.AvgCPUIoWait, fv.StdCPUIoWait, fv.AvgCPUSoftIrq, fv.AvgCtxSwitches, fv.AvgInterrupts,
+		fv.AvgSoftnetDropped, fv.AvgSoftnetSqueeze,
+		fv.AvgNetInMBps, fv.StdNetInMBps, fv.AvgNetOutMBps, fv.StdNetOutMBps,
+		fv.SumTCPRetrans, fv.SumTCPFastRetrans, fv.SumTCPTimeouts, fv.AvgNetDrops,
+		fv.AvgDiskReadMBps, fv.AvgDiskWriteMBps, fv.AvgDiskIOTicks, fv.StdDiskIOTicks,
+		float64(fv.ErrorCount), fv.SeverityScore,
+
+		// Engineered Features (21)
+		fv.CPUDelta, fv.CPURollStd, fv.CPUEfficiency, fv.DeltaCtx, fv.NetDelta,
+		fv.AvgNetThroughput, fv.CPUPerMB, fv.NetworkDiskRatio, fv.RetransPerPacket,
+		fv.RetransPerMB, fv.AvgDiskLatencyMS, float64(fv.LogCountTotal),
+		float64(fv.UniqueTemplates), fv.LogDensity, fv.IOWaitProxy,
+		fv.DeltaNetIn, fv.DeltaTCPRetrans, fv.TcpRollStd, fv.NetRollStd,
+		fv.MemPressure, fv.NetAsymmetry,
+	)
+	for _, name := range paramNames {
+		out = append(out, fv.ParamAvg[name])
+	}
+	return out
+}
+
+// ToNamedMap returns the feature vector as map[string]float64
+func (fv FeatureVector) ToNamedMap(paramNames []string) map[string]float64 {
+	m := map[string]float64{
+		"avg_cpu":              fv.AvgCPUPercent,
+		"max_cpu":              fv.MaxCPUPercent,
+		"std_cpu":              fv.StdCPUPercent,
+		"avg_iowait":           fv.AvgCPUIoWait,
+		"std_iowait":           fv.StdCPUIoWait,
+		"avg_softirq":          fv.AvgCPUSoftIrq,
+		"avg_ctx_switches":     fv.AvgCtxSwitches,
+		"avg_interrupts":       fv.AvgInterrupts,
+		"avg_softnet_dropped":  fv.AvgSoftnetDropped,
+		"avg_softnet_squeeze":  fv.AvgSoftnetSqueeze,
+		"avg_net_in":           fv.AvgNetInMBps,
+		"std_net_in":           fv.StdNetInMBps,
+		"avg_net_out":          fv.AvgNetOutMBps,
+		"std_net_out":          fv.StdNetOutMBps,
+		"avg_net_drops":        fv.AvgNetDrops,
+		"sum_tcp_retrans":      fv.SumTCPRetrans,
+		"sum_tcp_fast_retrans": fv.SumTCPFastRetrans,
+		"sum_tcp_timeouts":     fv.SumTCPTimeouts,
+		"avg_disk_read":        fv.AvgDiskReadMBps,
+		"avg_disk_write":       fv.AvgDiskWriteMBps,
+		"avg_disk_io_ticks":    fv.AvgDiskIOTicks,
+		"std_disk_io_ticks":    fv.StdDiskIOTicks,
+		"error_count":          float64(fv.ErrorCount),
+		"severity_score":       fv.SeverityScore,
+		"cpu_delta":            fv.CPUDelta,
+		"cpu_roll_std":         fv.CPURollStd,
+		"cpu_efficiency":       fv.CPUEfficiency,
+		"delta_ctx":            fv.DeltaCtx,
+		"net_delta":            fv.NetDelta,
+		"avg_net_throughput":   fv.AvgNetThroughput,
+		"cpu_per_mb":           fv.CPUPerMB,
+		"network_disk_ratio":   fv.NetworkDiskRatio,
+		"retrans_per_packet":   fv.RetransPerPacket,
+		"retrans_per_mb":       fv.RetransPerMB,
+		"avg_disk_latency_ms":  fv.AvgDiskLatencyMS,
+		"log_count_total":      float64(fv.LogCountTotal),
+		"unique_templates":     float64(fv.UniqueTemplates),
+		"log_density":          fv.LogDensity,
+		"io_wait_proxy":        fv.IOWaitProxy,
+		"delta_net_in":         fv.DeltaNetIn,
+		"delta_tcp_retrans":    fv.DeltaTCPRetrans,
+		"tcp_roll_std":         fv.TcpRollStd,
+		"net_roll_std":         fv.NetRollStd,
+		"mem_pressure":         fv.MemPressure,
+		"net_asymmetry":        fv.NetAsymmetry,
+	}
+	for _, name := range paramNames {
+		m["avg_param_"+name] = fv.ParamAvg[name]
+	}
+	return m
+}
+
+// ── AnomalyResult ─────────────────────────────────────────────────────────────
+
+// AnomalyResult is the final output of the detection layer.
+type AnomalyResult struct {
+	Timestamp  time.Time `json:"timestamp"`
+	Score      float64   `json:"score"`
+	IsAnomaly  bool      `json:"is_anomaly"`
+	Confidence float64   `json:"confidence"`
+	Method     string    `json:"method"`
+	Details    string    `json:"details,omitempty"`
+}
+
+// ── StageHealth ───────────────────────────────────────────────────────────────
+
+// StageHealth stores per-stage monitoring counters.
+type StageHealth struct {
+	StageName       string    `json:"stage_name"`
+	EventsProcessed uint64    `json:"events_processed"`
+	EventsDropped   uint64    `json:"events_dropped"`
+	AvgLatencyMs    float64   `json:"avg_latency_ms"`
+	Throughput      float64   `json:"throughput_eps"`
+	LastUpdate      time.Time `json:"last_update"`
+}