commit for version used in evaluation of thesis
This commit is contained in:
commit
72635dc7b9
27 changed files with 6084 additions and 0 deletions
98
internal/detect/copod.go
Normal file
98
internal/detect/copod.go
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
// Package detect provides anomaly detection algorithms and ensemble logic.
|
||||
package detect
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
"codeberg.org/pata1704/copod"
|
||||
"codeberg.org/pata1704/guenther/pkg/types"
|
||||
)
|
||||
|
||||
// COPODDetector implements the AnomalyDetector interface by wrapping the
|
||||
// external codeberg.org/pata1704/copod package.
|
||||
//
|
||||
// Streaming mode: Score calls Update internally, so the sliding-window buffer
|
||||
// stays current without requiring a separate Update call. Callers (like SEAD)
|
||||
// only need to call Score per time step.
|
||||
//
|
||||
// Fit seeds the buffer with a batch of normal vectors. If Fit is not called
|
||||
// the detector starts cold and returns score=0 until the buffer has enough
|
||||
// points (controlled by bufferSize in the underlying library).
|
||||
type COPODDetector struct {
|
||||
detector *copod.Detector
|
||||
}
|
||||
|
||||
// NewCOPODDetector initialises the streaming COPOD detector wrapper.
|
||||
//
|
||||
// - bufferSize: sliding-window capacity. Recommended: 100–200.
|
||||
// - threshold: score cutoff for standalone IsAnomaly. When used inside
|
||||
// SEAD the threshold is ignored (SEAD applies its own adaptive threshold).
|
||||
func NewCOPODDetector(bufferSize int, threshold float64) (*COPODDetector, error) {
|
||||
det, err := copod.NewDetector(bufferSize, threshold)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("copod: initialize wrapped detector: %w", err)
|
||||
}
|
||||
return &COPODDetector{
|
||||
detector: det,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Fit seeds the COPOD history buffer with a slice of labelled-normal vectors.
|
||||
func (c *COPODDetector) Fit(vectors []types.FeatureVector) error {
|
||||
for _, v := range vectors {
|
||||
if err := c.update(v); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Update adds a single observation to the sliding window.
|
||||
// Safe to call concurrently with Score.
|
||||
func (c *COPODDetector) Update(vector types.FeatureVector) error {
|
||||
return c.update(vector)
|
||||
}
|
||||
|
||||
// Score computes the COPOD anomaly score for the given vector and
|
||||
// simultaneously updates the internal sliding window with the scored vector.
|
||||
//
|
||||
// The self-update ensures COPOD's buffer reflects the current data stream
|
||||
// without requiring a separate Update call after every Score. This is
|
||||
// consistent with the RRCF and IsolationForest detectors which also
|
||||
// update themselves inside Score.
|
||||
func (c *COPODDetector) Score(vector types.FeatureVector) (types.AnomalyResult, error) {
|
||||
vec := copod.FeatureVector{
|
||||
NormalizedVector: vector.NormalizedVector,
|
||||
Timestamp: vector.Timestamp,
|
||||
}
|
||||
|
||||
// Score first, then append to the buffer so the scored point does not
|
||||
// bias its own copula calculation (score-then-insert, same as RRCF).
|
||||
res, err := c.detector.Score(vec)
|
||||
if err != nil {
|
||||
return types.AnomalyResult{}, fmt.Errorf("copod: score: %w", err)
|
||||
}
|
||||
|
||||
if err := c.update(vector); err != nil {
|
||||
// Log but don't fail: the score is already computed.
|
||||
log.Printf("copod: update after score: %v", err)
|
||||
}
|
||||
|
||||
return types.AnomalyResult{
|
||||
Timestamp: res.Timestamp,
|
||||
Score: res.Score,
|
||||
IsAnomaly: res.IsAnomaly,
|
||||
Confidence: res.Confidence,
|
||||
Method: res.Method,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// update is the internal helper that adds vector to the copod sliding window.
|
||||
func (c *COPODDetector) update(vector types.FeatureVector) error {
|
||||
vec := copod.FeatureVector{
|
||||
NormalizedVector: vector.NormalizedVector,
|
||||
Timestamp: vector.Timestamp,
|
||||
}
|
||||
return c.detector.Update(vec)
|
||||
}
|
||||
325
internal/detect/ensemble.go
Normal file
325
internal/detect/ensemble.go
Normal file
|
|
@ -0,0 +1,325 @@
|
|||
// Package detect provides anomaly detection algorithms and ensemble logic.
|
||||
package detect
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"codeberg.org/pata1704/guenther/pkg/types"
|
||||
)
|
||||
|
||||
// EnsembleMethod selects the score-aggregation strategy used by EnsembleDetector.
|
||||
type EnsembleMethod string
|
||||
|
||||
const (
|
||||
// EnsembleAVG combines normalised sub-scores by arithmetic mean.
|
||||
EnsembleAVG EnsembleMethod = "avg"
|
||||
// EnsembleMAX takes the maximum of the normalised sub-scores (aggressive).
|
||||
EnsembleMAX EnsembleMethod = "max"
|
||||
// EnsembleMEDIAN uses the median of normalised sub-scores (robust to outliers).
|
||||
EnsembleMEDIAN EnsembleMethod = "median"
|
||||
// EnsembleSEAD delegates to an embedded SEADDetector (adaptive MWU weights).
|
||||
// This method is selected by setting detector.ensemble.method = "sead" in
|
||||
// the config. The four base detectors (MAD, RRCF, COPOD, IForest) are
|
||||
// instantiated with the same parameters as the non-SEAD ensemble paths and
|
||||
// the SEAD wrapper handles the online weight updates automatically.
|
||||
EnsembleSEAD EnsembleMethod = "sead"
|
||||
)
|
||||
|
||||
// RRCFVariantConfig holds parameters for a single named RRCF instance in the
|
||||
// SEAD multi-horizon ensemble.
|
||||
type RRCFVariantConfig struct {
|
||||
// NumTrees controls score stability: more trees → smoother / more conservative.
|
||||
NumTrees int
|
||||
// TreeSize is the sliding-window capacity per tree.
|
||||
TreeSize int
|
||||
// ThresholdPercentile is the per-model decision threshold for standalone use.
|
||||
ThresholdPercentile float64
|
||||
}
|
||||
|
||||
// RRCFVariantsConfig groups the three RRCF horizon variants used by the SEAD ensemble.
|
||||
// - Fast: short memory, reactive to transient spikes
|
||||
// - Mid: medium memory, balanced sensitivity
|
||||
// - Slow: long memory, detects sustained / slow-drift events
|
||||
type RRCFVariantsConfig struct {
|
||||
Fast RRCFVariantConfig
|
||||
Mid RRCFVariantConfig
|
||||
Slow RRCFVariantConfig
|
||||
}
|
||||
|
||||
// EnsembleDetector implements the AnomalyDetector interface by combining
|
||||
// COPOD and RRCF scores using min-max normalisation.
|
||||
//
|
||||
// Scoring strategy (AVG / MAX / MEDIAN methods):
|
||||
// 1. Each model produces a raw score on its own scale.
|
||||
// 2. Both scores are normalised to [0, 1] using a rolling min/max window.
|
||||
// 3. The combined score is the result of the selected aggregation function.
|
||||
// 4. A window is flagged anomalous when combinedScore > threshold where
|
||||
// threshold = quantile(combinedHistory, 1-contamination).
|
||||
//
|
||||
// SEAD method:
|
||||
//
|
||||
// When method == EnsembleSEAD the detector delegates entirely to an embedded
|
||||
// SEADDetector which wraps all four base detectors and uses Multiplicative
|
||||
// Weights Update (MWU/FTRL) to adapt weights online. The COPOD and RRCF
|
||||
// sub-detectors passed to NewEnsembleDetector are still created but are only
|
||||
// used when method != EnsembleSEAD.
|
||||
type EnsembleDetector struct {
|
||||
method EnsembleMethod
|
||||
|
||||
// sub-detectors for AVG/MAX/MEDIAN methods
|
||||
copod AnomalyDetector
|
||||
rrcf AnomalyDetector
|
||||
|
||||
// SEAD method: fully adaptive ensemble (replaces copod+rrcf when active)
|
||||
sead *SEADDetector
|
||||
|
||||
contamination float64
|
||||
|
||||
mu sync.Mutex
|
||||
copodHistory []float64
|
||||
rrcfHistory []float64
|
||||
combinedHistory []float64
|
||||
historySize int
|
||||
}
|
||||
|
||||
// NewEnsembleDetector initialises the multi-model ensemble.
|
||||
//
|
||||
// - method: "avg" | "max" | "median" | "sead"
|
||||
// - copodBufferSize: sliding-window capacity for COPOD (≥ 100 recommended).
|
||||
// - copodThreshold: per-model threshold passed to COPODDetector.
|
||||
// - rrcfVariants: three-horizon RRCF config (fast/mid/slow). Used by SEAD;
|
||||
// the Mid variant is also used for the classic AVG/MAX/MEDIAN path.
|
||||
// - contamination: expected fraction of anomalies ∈ [0, 0.5).
|
||||
// - seadCfg: SEAD parameters (only used when method == "sead").
|
||||
// Pass detect.DefaultSEADConfig() when method != "sead".
|
||||
func NewEnsembleDetector(
|
||||
method EnsembleMethod,
|
||||
copodBufferSize int, copodThreshold float64,
|
||||
rrcfVariants RRCFVariantsConfig,
|
||||
contamination float64,
|
||||
seadCfg SEADConfig,
|
||||
) (*EnsembleDetector, error) {
|
||||
e := &EnsembleDetector{
|
||||
method: method,
|
||||
contamination: contamination,
|
||||
historySize: 1000,
|
||||
}
|
||||
|
||||
if method == EnsembleSEAD {
|
||||
// Delegate to SEADDetector with all six base detectors (3 RRCF horizons).
|
||||
// MAD is bootstrapped with identity priors (median=0, MAD=1); it will
|
||||
// calibrate itself during the pipeline warm-up phase.
|
||||
sead, err := NewSEADWithAllDetectors(
|
||||
copodBufferSize, copodThreshold,
|
||||
rrcfVariants,
|
||||
3.5, 0, // madThreshold=3.5, madCalibSize=0→default 100 vectors
|
||||
seadCfg,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("ensemble: sead: %w", err)
|
||||
}
|
||||
e.sead = sead
|
||||
} else {
|
||||
// Classic AVG/MAX/MEDIAN path: only COPOD + RRCF (Mid variant as default).
|
||||
copodDet, err := NewCOPODDetector(copodBufferSize, copodThreshold)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("ensemble: %w", err)
|
||||
}
|
||||
e.copod = copodDet
|
||||
// Use Mid variant defaults for the classic ensemble path.
|
||||
midTrees := rrcfVariants.Mid.NumTrees
|
||||
if midTrees == 0 {
|
||||
midTrees = 150
|
||||
}
|
||||
midSize := rrcfVariants.Mid.TreeSize
|
||||
if midSize == 0 {
|
||||
midSize = 64
|
||||
}
|
||||
midPct := rrcfVariants.Mid.ThresholdPercentile
|
||||
if midPct == 0 {
|
||||
midPct = 0.85
|
||||
}
|
||||
e.rrcf = NewRRCFDetector(midTrees, midSize, 0, midPct)
|
||||
}
|
||||
|
||||
return e, nil
|
||||
}
|
||||
|
||||
// SEAD returns the underlying SEADDetector if the ensemble is in SEAD mode.
|
||||
func (e *EnsembleDetector) SEAD() *SEADDetector {
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
return e.sead
|
||||
}
|
||||
|
||||
// Fit seeds the underlying models from a slice of feature vectors.
|
||||
func (e *EnsembleDetector) Fit(vectors []types.FeatureVector) error {
|
||||
if e.method == EnsembleSEAD {
|
||||
return e.sead.Fit(vectors)
|
||||
}
|
||||
if err := e.copod.Fit(vectors); err != nil {
|
||||
return fmt.Errorf("ensemble: fit copod: %w", err)
|
||||
}
|
||||
if err := e.rrcf.Fit(vectors); err != nil {
|
||||
return fmt.Errorf("ensemble: fit rrcf: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Update propagates the vector to the underlying models.
|
||||
func (e *EnsembleDetector) Update(vector types.FeatureVector) error {
|
||||
if e.method == EnsembleSEAD {
|
||||
return e.sead.Update(vector)
|
||||
}
|
||||
if err := e.copod.Update(vector); err != nil {
|
||||
return fmt.Errorf("ensemble: update copod: %w", err)
|
||||
}
|
||||
if err := e.rrcf.Update(vector); err != nil {
|
||||
return fmt.Errorf("ensemble: update rrcf: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Score evaluates the feature vector.
|
||||
//
|
||||
// For SEAD method: delegates entirely to the embedded SEADDetector.
|
||||
// For AVG/MAX/MEDIAN: min-max normalises COPOD and RRCF scores and aggregates.
|
||||
func (e *EnsembleDetector) Score(vector types.FeatureVector) (types.AnomalyResult, error) {
|
||||
if e.method == EnsembleSEAD {
|
||||
res, err := e.sead.Score(vector)
|
||||
if err != nil {
|
||||
return types.AnomalyResult{}, fmt.Errorf("ensemble: sead score: %w", err)
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
resCOPOD, err := e.copod.Score(vector)
|
||||
if err != nil {
|
||||
return types.AnomalyResult{}, fmt.Errorf("ensemble: score copod: %w", err)
|
||||
}
|
||||
|
||||
resRRCF, err := e.rrcf.Score(vector)
|
||||
if err != nil {
|
||||
return types.AnomalyResult{}, fmt.Errorf("ensemble: score rrcf: %w", err)
|
||||
}
|
||||
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
|
||||
e.appendHistory(&e.copodHistory, resCOPOD.Score)
|
||||
e.appendHistory(&e.rrcfHistory, resRRCF.Score)
|
||||
|
||||
normCOPOD := minMaxNorm(resCOPOD.Score, e.copodHistory)
|
||||
normRRCF := minMaxNorm(resRRCF.Score, e.rrcfHistory)
|
||||
|
||||
var combined float64
|
||||
switch e.method {
|
||||
case EnsembleMAX:
|
||||
combined = math.Max(normCOPOD, normRRCF)
|
||||
case EnsembleMEDIAN:
|
||||
// Median of two values = average; kept for future N>2 extension.
|
||||
vals := []float64{normCOPOD, normRRCF}
|
||||
sort.Float64s(vals)
|
||||
combined = vals[len(vals)/2]
|
||||
default: // EnsembleAVG
|
||||
combined = (normCOPOD + normRRCF) / 2.0
|
||||
}
|
||||
|
||||
e.appendHistory(&e.combinedHistory, combined)
|
||||
|
||||
const minDataPoints = 10
|
||||
threshold := quantile(e.combinedHistory, 1.0-e.contamination)
|
||||
isAnomaly := len(e.combinedHistory) > minDataPoints && combined > threshold
|
||||
|
||||
return types.AnomalyResult{
|
||||
Timestamp: vector.Timestamp,
|
||||
Score: combined,
|
||||
IsAnomaly: isAnomaly,
|
||||
Confidence: math.Min(combined/math.Max(threshold, 1e-9), 1.0),
|
||||
Method: e.methodString(string(e.method), resCOPOD.IsAnomaly, resRRCF.IsAnomaly),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// WeightSummary returns the current SEAD detector weights as a human-readable
|
||||
// string. Returns "" when the ensemble is not using SEAD.
|
||||
func (e *EnsembleDetector) WeightSummary() string {
|
||||
if e.method != EnsembleSEAD || e.sead == nil {
|
||||
return ""
|
||||
}
|
||||
return e.sead.WeightSummary()
|
||||
}
|
||||
|
||||
// appendHistory appends v to *h, evicting the oldest entry when full.
|
||||
// Caller must hold e.mu.
|
||||
func (e *EnsembleDetector) appendHistory(h *[]float64, v float64) {
|
||||
*h = append(*h, v)
|
||||
if len(*h) > e.historySize {
|
||||
*h = (*h)[1:]
|
||||
}
|
||||
}
|
||||
|
||||
// methodString builds a concise label for AnomalyResult.Method.
|
||||
func (e *EnsembleDetector) methodString(method string, copodAnomaly, rrcfAnomaly bool) string {
|
||||
var active []string
|
||||
if copodAnomaly {
|
||||
active = append(active, "COPOD")
|
||||
}
|
||||
if rrcfAnomaly {
|
||||
active = append(active, "RRCF")
|
||||
}
|
||||
if len(active) > 0 {
|
||||
return fmt.Sprintf("Ensemble-%s(%s)", strings.ToUpper(method), strings.Join(active, "+"))
|
||||
}
|
||||
return fmt.Sprintf("Ensemble-%s(none)", strings.ToUpper(method))
|
||||
}
|
||||
|
||||
// ── score helpers ─────────────────────────────────────────────────────────────
|
||||
|
||||
// minMaxNorm normalises v into [0, 1] using the observed min/max of history.
|
||||
func minMaxNorm(v float64, history []float64) float64 {
|
||||
if len(history) == 0 {
|
||||
return 0
|
||||
}
|
||||
minV, maxV := history[0], history[0]
|
||||
for _, h := range history[1:] {
|
||||
if h < minV {
|
||||
minV = h
|
||||
}
|
||||
if h > maxV {
|
||||
maxV = h
|
||||
}
|
||||
}
|
||||
spread := maxV - minV
|
||||
if spread < 1e-12 {
|
||||
return 0.5
|
||||
}
|
||||
norm := (v - minV) / spread
|
||||
if norm < 0 {
|
||||
return 0
|
||||
}
|
||||
if norm > 1 {
|
||||
return 1
|
||||
}
|
||||
return norm
|
||||
}
|
||||
|
||||
// quantile returns the p-th quantile of data without modifying the slice.
|
||||
func quantile(data []float64, p float64) float64 {
|
||||
n := len(data)
|
||||
if n == 0 {
|
||||
return 0
|
||||
}
|
||||
sorted := make([]float64, n)
|
||||
copy(sorted, data)
|
||||
sort.Float64s(sorted)
|
||||
|
||||
idx := int(float64(n) * p)
|
||||
if idx >= n {
|
||||
idx = n - 1
|
||||
}
|
||||
return sorted[idx]
|
||||
}
|
||||
200
internal/detect/iforest.go
Normal file
200
internal/detect/iforest.go
Normal file
|
|
@ -0,0 +1,200 @@
|
|||
package detect
|
||||
|
||||
import (
|
||||
"log"
|
||||
"sync"
|
||||
|
||||
"codeberg.org/pata1704/guenther/pkg/types"
|
||||
"github.com/e-XpertSolutions/go-iforest/iforest"
|
||||
)
|
||||
|
||||
// IsolationForestDetector wraps go-iforest with thread-safe access and
|
||||
// continuous background retraining on non-anomalous data to handle concept drift.
|
||||
//
|
||||
// During the warmup phase (model == nil) incoming vectors are buffered.
|
||||
// Once warmupSize vectors have accumulated, the first training run executes
|
||||
// synchronously so that the detector is never in an undefined trained state
|
||||
// after the first window tick.
|
||||
//
|
||||
// Subsequent retraining is asynchronous: when trainingBuffer reaches
|
||||
// bufferSize the buffer is swapped out under the lock, and training runs in
|
||||
// a detached goroutine. The current model remains active during retraining,
|
||||
// so scoring never blocks.
|
||||
type IsolationForestDetector struct {
|
||||
mu sync.RWMutex
|
||||
model *iforest.Forest
|
||||
trainingBuffer []types.FeatureVector
|
||||
|
||||
// Tuning knobs – set via constructor.
|
||||
numTrees int
|
||||
subSample int
|
||||
contamination float64
|
||||
bufferSize int
|
||||
warmupSize int
|
||||
threshold float64
|
||||
}
|
||||
|
||||
// NewIsolationForestDetector creates a detector with the given parameters.
|
||||
//
|
||||
// - bufferSize: number of non-anomalous vectors to accumulate before
|
||||
// triggering background retraining.
|
||||
// - warmupSize: number of vectors to accumulate before the first (sync)
|
||||
// training run. Must be ≤ bufferSize.
|
||||
// - numTrees: number of isolation trees (typically 100).
|
||||
// - subSample: subsample size per tree (typically 256).
|
||||
// - contamination: expected fraction of anomalies (0 < c < 0.5).
|
||||
// - threshold: score cutoff for IsAnomaly.
|
||||
func NewIsolationForestDetector(
|
||||
bufferSize, warmupSize, numTrees, subSample int,
|
||||
contamination, threshold float64,
|
||||
) *IsolationForestDetector {
|
||||
if warmupSize <= 0 || warmupSize > bufferSize {
|
||||
warmupSize = bufferSize
|
||||
}
|
||||
return &IsolationForestDetector{
|
||||
bufferSize: bufferSize,
|
||||
warmupSize: warmupSize,
|
||||
numTrees: numTrees,
|
||||
subSample: subSample,
|
||||
contamination: contamination,
|
||||
threshold: threshold,
|
||||
}
|
||||
}
|
||||
|
||||
// Fit trains a new Isolation Forest on vectors.
|
||||
// Fit is safe to call concurrently with Score (uses a write lock only while
|
||||
// swapping the model pointer).
|
||||
func (d *IsolationForestDetector) Fit(vectors []types.FeatureVector) error {
|
||||
if len(vectors) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
data := convertToMatrix(vectors)
|
||||
forest := iforest.NewForest(d.numTrees, d.subSample, d.contamination)
|
||||
forest.Train(data)
|
||||
forest.Test(data)
|
||||
|
||||
d.mu.Lock()
|
||||
d.model = forest
|
||||
d.mu.Unlock()
|
||||
|
||||
log.Printf("iforest: trained on %d samples (trees=%d, subsample=%d, contamination=%.3f)",
|
||||
len(vectors), d.numTrees, d.subSample, d.contamination)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Score returns an AnomalyResult for vector.
|
||||
//
|
||||
// Pre-model (warmup) behaviour:
|
||||
// - Vector is appended to trainingBuffer.
|
||||
// - Once warmupSize is reached the first training run executes synchronously
|
||||
// on the calling goroutine so subsequent Score calls have a model.
|
||||
// - Returns score=0, IsAnomaly=false while warming up.
|
||||
//
|
||||
// Post-model behaviour:
|
||||
// - Score is computed via the active model (read-lock only).
|
||||
// - Non-anomalous vectors are appended to trainingBuffer.
|
||||
// - When trainingBuffer reaches bufferSize, a background retrain fires.
|
||||
func (d *IsolationForestDetector) Score(vector types.FeatureVector) (types.AnomalyResult, error) {
|
||||
warmup := types.AnomalyResult{
|
||||
Timestamp: vector.Timestamp,
|
||||
Score: 0,
|
||||
IsAnomaly: false,
|
||||
Method: "IF",
|
||||
}
|
||||
|
||||
// ── warmup phase ──────────────────────────────────────────────────────
|
||||
d.mu.RLock()
|
||||
model := d.model
|
||||
d.mu.RUnlock()
|
||||
|
||||
if model == nil {
|
||||
d.mu.Lock()
|
||||
d.trainingBuffer = append(d.trainingBuffer, vector)
|
||||
bufLen := len(d.trainingBuffer)
|
||||
d.mu.Unlock()
|
||||
|
||||
if bufLen < d.warmupSize {
|
||||
return warmup, nil
|
||||
}
|
||||
|
||||
// Synchronous first fit to eliminate the cold-start gap.
|
||||
d.mu.Lock()
|
||||
buf := d.trainingBuffer
|
||||
d.trainingBuffer = nil
|
||||
d.mu.Unlock()
|
||||
|
||||
if err := d.Fit(buf); err != nil {
|
||||
return warmup, err
|
||||
}
|
||||
|
||||
d.mu.RLock()
|
||||
model = d.model
|
||||
d.mu.RUnlock()
|
||||
|
||||
if model == nil {
|
||||
return warmup, nil // Fit failed silently – defensive
|
||||
}
|
||||
}
|
||||
|
||||
// ── inference ─────────────────────────────────────────────────────────
|
||||
_, scores, err := model.Predict([][]float64{vector.NormalizedVector})
|
||||
if err != nil {
|
||||
return warmup, err
|
||||
}
|
||||
if len(scores) == 0 {
|
||||
return warmup, nil
|
||||
}
|
||||
score := scores[0]
|
||||
|
||||
res := types.AnomalyResult{
|
||||
Timestamp: vector.Timestamp,
|
||||
Score: score,
|
||||
IsAnomaly: score > d.threshold,
|
||||
Confidence: score,
|
||||
Method: "IF",
|
||||
}
|
||||
|
||||
// Buffer non-anomalous vectors for background retraining.
|
||||
if !res.IsAnomaly {
|
||||
if err := d.Update(vector); err != nil {
|
||||
log.Printf("iforest: update buffer: %v", err)
|
||||
}
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// Update appends a non-anomalous vector to the training buffer.
|
||||
// If the buffer is full it is swapped atomically and a background goroutine
|
||||
// retrains the model on the captured data.
|
||||
func (d *IsolationForestDetector) Update(vector types.FeatureVector) error {
|
||||
d.mu.Lock()
|
||||
d.trainingBuffer = append(d.trainingBuffer, vector)
|
||||
|
||||
if len(d.trainingBuffer) < d.bufferSize {
|
||||
d.mu.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
buf := make([]types.FeatureVector, len(d.trainingBuffer))
|
||||
copy(buf, d.trainingBuffer)
|
||||
d.trainingBuffer = nil
|
||||
d.mu.Unlock()
|
||||
|
||||
go func() {
|
||||
if err := d.Fit(buf); err != nil {
|
||||
log.Printf("iforest: background retrain: %v", err)
|
||||
}
|
||||
}()
|
||||
return nil
|
||||
}
|
||||
|
||||
// ── helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
func convertToMatrix(vectors []types.FeatureVector) [][]float64 {
|
||||
m := make([][]float64, len(vectors))
|
||||
for i, v := range vectors {
|
||||
m[i] = v.NormalizedVector
|
||||
}
|
||||
return m
|
||||
}
|
||||
148
internal/detect/interface.go
Normal file
148
internal/detect/interface.go
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
package detect
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"codeberg.org/pata1704/guenther/pkg/types"
|
||||
)
|
||||
|
||||
// AnomalyDetector is the common interface for all detection algorithms.
|
||||
// Implementations must be safe for concurrent use.
|
||||
type AnomalyDetector interface {
|
||||
// Fit trains the model on the supplied slice of labelled-normal vectors.
|
||||
Fit(vectors []types.FeatureVector) error
|
||||
// Score returns an anomaly assessment for vector. It must not block.
|
||||
Score(vector types.FeatureVector) (types.AnomalyResult, error)
|
||||
// Update buffers vector for incremental model updates.
|
||||
Update(vector types.FeatureVector) error
|
||||
}
|
||||
|
||||
// DetectionLayer reads FeatureVectors from inputChan, scores them with the
|
||||
// configured AnomalyDetector, and forwards AnomalyResults to outputChan.
|
||||
//
|
||||
// The layer runs a single event-loop goroutine (no additional worker pool is
|
||||
// needed because detection is CPU-bound in a single model, not I/O-bound).
|
||||
// Health metrics are emitted to healthChan every 5 seconds.
|
||||
//
|
||||
// Backpressure: if outputChan is full the result is dropped and a warning is
|
||||
// logged. This prevents the detection goroutine from blocking the upstream
|
||||
// TransformEngine via backpressure handling.
|
||||
type DetectionLayer struct {
|
||||
detector AnomalyDetector
|
||||
inputChan <-chan types.FeatureVector
|
||||
outputChan chan<- types.AnomalyResult
|
||||
healthChan chan<- types.StageHealth
|
||||
|
||||
scalingController *ScalingController // optional
|
||||
|
||||
wg sync.WaitGroup
|
||||
|
||||
mu sync.Mutex
|
||||
processed uint64
|
||||
dropped uint64
|
||||
avgLatency float64
|
||||
}
|
||||
|
||||
// NewDetectionLayer constructs a DetectionLayer wired to the given channels.
|
||||
func NewDetectionLayer(
|
||||
detector AnomalyDetector,
|
||||
input <-chan types.FeatureVector,
|
||||
output chan<- types.AnomalyResult,
|
||||
health chan<- types.StageHealth,
|
||||
) *DetectionLayer {
|
||||
return &DetectionLayer{
|
||||
detector: detector,
|
||||
inputChan: input,
|
||||
outputChan: output,
|
||||
healthChan: health,
|
||||
}
|
||||
}
|
||||
|
||||
// SetScalingController attaches an auto-scaling controller to the layer.
|
||||
func (l *DetectionLayer) SetScalingController(sc *ScalingController) {
|
||||
l.scalingController = sc
|
||||
}
|
||||
|
||||
// Start launches the detection event loop in a background goroutine.
|
||||
// The method is idempotent: calling Start twice panics (close of closed channel).
|
||||
func (l *DetectionLayer) Start(ctx context.Context) {
|
||||
l.wg.Go(func() {
|
||||
reportTicker := time.NewTicker(5 * time.Second)
|
||||
defer reportTicker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case fv := <-l.inputChan:
|
||||
l.handle(fv)
|
||||
|
||||
case <-reportTicker.C:
|
||||
l.emitHealth()
|
||||
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// Wait waits for the event loop to exit after context cancellation.
|
||||
func (l *DetectionLayer) Wait() {
|
||||
l.wg.Wait()
|
||||
}
|
||||
|
||||
func (l *DetectionLayer) handle(fv types.FeatureVector) {
|
||||
if l.scalingController != nil {
|
||||
l.scalingController.ObserveCPU(fv.AvgCPUPercent)
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
result, err := l.detector.Score(fv)
|
||||
ms := time.Since(start).Seconds() * 1e3
|
||||
|
||||
l.mu.Lock()
|
||||
l.processed++
|
||||
if l.avgLatency == 0 {
|
||||
l.avgLatency = ms
|
||||
} else {
|
||||
l.avgLatency = l.avgLatency*0.8 + ms*0.2
|
||||
}
|
||||
l.mu.Unlock()
|
||||
|
||||
if err != nil {
|
||||
log.Printf("detection: score error: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
select {
|
||||
case l.outputChan <- result:
|
||||
default:
|
||||
l.mu.Lock()
|
||||
l.dropped++
|
||||
l.mu.Unlock()
|
||||
log.Printf("detection: output channel full – dropping result (score=%.4f)", result.Score)
|
||||
}
|
||||
}
|
||||
|
||||
// emitHealth sends a StageHealth snapshot to healthChan.
|
||||
// Non-blocking: skips the report if healthChan is full.
|
||||
func (l *DetectionLayer) emitHealth() {
|
||||
l.mu.Lock()
|
||||
p := l.processed
|
||||
d := l.dropped
|
||||
avg := l.avgLatency
|
||||
l.mu.Unlock()
|
||||
|
||||
select {
|
||||
case l.healthChan <- types.StageHealth{
|
||||
StageName: "detection_layer",
|
||||
EventsProcessed: p,
|
||||
EventsDropped: d,
|
||||
AvgLatencyMs: avg,
|
||||
LastUpdate: time.Now(),
|
||||
}:
|
||||
default:
|
||||
}
|
||||
}
|
||||
254
internal/detect/mad.go
Normal file
254
internal/detect/mad.go
Normal file
|
|
@ -0,0 +1,254 @@
|
|||
// Package detect provides anomaly detection algorithms and ensemble logic.
|
||||
package detect
|
||||
|
||||
import (
|
||||
"log"
|
||||
"math"
|
||||
"sort"
|
||||
"sync"
|
||||
|
||||
"codeberg.org/pata1704/guenther/pkg/types"
|
||||
)
|
||||
|
||||
// MADDetector scores feature vectors using per-feature Median Absolute
|
||||
// Deviation (MAD) with pre-calibrated or automatically derived statistics.
|
||||
//
|
||||
// Pass nil for medians and mads and set calibrationSize > 0 via
|
||||
// NewMADDetectorAutoCalibrate. The detector buffers the first calibrationSize
|
||||
// NormalizedVectors, computes per-feature statistics once the buffer is full,
|
||||
// and starts scoring normally afterwards. During the warmup phase Score
|
||||
// returns score=0 / IsAnomaly=false.
|
||||
//
|
||||
// detector := NewMADDetectorAutoCalibrate(3.5, 100)
|
||||
//
|
||||
// SEAD down-weights MAD automatically during the warmup phase because
|
||||
// all scores are zero; once calibration completes SEAD will start to
|
||||
// consider MAD scores in its weight updates.
|
||||
//
|
||||
// # Calibration contract
|
||||
//
|
||||
// The medians and mads slices must be computed from the SAME representation
|
||||
// that arrives in vector.NormalizedVector – i.e. from the RobustScaler-scaled
|
||||
// feature vectors, NOT from raw window aggregates.
|
||||
//
|
||||
// # Scoring
|
||||
//
|
||||
// For each feature i the modified Z-score is:
|
||||
//
|
||||
// score_i = |x_i - median_i| / (1.4826 * MAD_i)
|
||||
//
|
||||
// The constant 1.4826 ≈ 1/(Φ⁻¹(3/4)) makes MAD a consistent estimator of σ
|
||||
// under normality (Rousseeuw & Croux, 1993). The anomaly score is the maximum
|
||||
// modified Z-score across all features.
|
||||
//
|
||||
// # Fit / Update
|
||||
//
|
||||
// When calibration is already complete, Fit replaces the
|
||||
// current statistics with values derived from the supplied vectors. Update is a
|
||||
// no-op.
|
||||
type MADDetector struct {
|
||||
mu sync.Mutex
|
||||
threshold float64
|
||||
medians []float64 // per-feature median of NormalizedVector in baseline
|
||||
mads []float64 // per-feature MAD of NormalizedVector in baseline
|
||||
|
||||
// Auto-calibration state. calibrationSize == 0 means disabled.
|
||||
calibrationSize int
|
||||
calibrationBuf [][]float64 // collected NormalizedVectors during warmup
|
||||
calibrated bool
|
||||
}
|
||||
|
||||
// NewMADDetector creates a MADDetector with pre-calibrated baseline statistics.
|
||||
//
|
||||
// - threshold: anomaly score cutoff (modified Z-score). Typical: 2.5–4.0.
|
||||
// - medians: per-feature median computed from NormalizedVector in baseline.
|
||||
// - mads: per-feature MAD computed from NormalizedVector in baseline.
|
||||
// Zero entries are replaced with 1.0 to avoid division-by-zero.
|
||||
//
|
||||
// Pass nil for medians and mads only when calibrationSize > 0 is set via
|
||||
// NewMADDetectorAutoCalibrate; otherwise all scores will be zero.
|
||||
func NewMADDetector(threshold float64, medians, mads []float64) *MADDetector {
|
||||
return &MADDetector{
|
||||
threshold: threshold,
|
||||
medians: medians,
|
||||
mads: mads,
|
||||
calibrated: len(medians) > 0,
|
||||
}
|
||||
}
|
||||
|
||||
// NewMADDetectorAutoCalibrate creates a MADDetector that derives its own
|
||||
// per-feature statistics from the first calibrationSize NormalizedVectors
|
||||
// it encounters in Score.
|
||||
//
|
||||
// - threshold: modified Z-score cutoff after calibration. Typical: 3.5.
|
||||
// - calibrationSize: number of vectors to buffer before first calibration.
|
||||
// Recommended: 60–200
|
||||
func NewMADDetectorAutoCalibrate(threshold float64, calibrationSize int) *MADDetector {
|
||||
if calibrationSize <= 0 {
|
||||
calibrationSize = 100
|
||||
}
|
||||
// Initialise with "Identity" stats (median=0, mad=1) so the detector is
|
||||
// operational immediately with a global sensitivity of 1.0 (baseline IQR).
|
||||
// Features are already RobustScaled by DuckDB, so this is a sane prior.
|
||||
// Automatic calibration will refine these once the buffer is full.
|
||||
return &MADDetector{
|
||||
threshold: threshold,
|
||||
calibrationSize: calibrationSize,
|
||||
medians: nil, // will be Lazy-init or from buffer
|
||||
mads: nil,
|
||||
}
|
||||
}
|
||||
|
||||
// Fit recomputes per-feature median and MAD from the supplied vectors,
|
||||
// replacing any prior calibration. Safe to call concurrently with Score.
|
||||
func (m *MADDetector) Fit(vectors []types.FeatureVector) error {
|
||||
if len(vectors) == 0 {
|
||||
return nil
|
||||
}
|
||||
raw := make([][]float64, len(vectors))
|
||||
for i, v := range vectors {
|
||||
raw[i] = v.NormalizedVector
|
||||
}
|
||||
medians, mads := computeMADStats(raw)
|
||||
|
||||
m.mu.Lock()
|
||||
m.medians = medians
|
||||
m.mads = mads
|
||||
m.calibrated = true
|
||||
m.calibrationBuf = nil
|
||||
m.mu.Unlock()
|
||||
|
||||
log.Printf("mad: fitted on %d vectors (%d features)", len(vectors), len(medians))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Update is a no-op when manual statistics are used. When auto-calibration is
|
||||
// active it is equivalent to calling Score but discards the result.
|
||||
func (m *MADDetector) Update(v types.FeatureVector) error {
|
||||
_, _ = m.Score(v)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Score computes the maximum modified Z-score across all features of vector.
|
||||
//
|
||||
// During the auto-calibration warmup the vector is buffered and a zero-score
|
||||
// result is returned. Once the calibration buffer is full the statistics are
|
||||
// derived automatically and scoring starts on the next call.
|
||||
//
|
||||
// vector.NormalizedVector must contain values on the same scale as the
|
||||
// medians and mads slices (i.e. RobustScaler-scaled values from DuckDB).
|
||||
func (m *MADDetector) Score(vector types.FeatureVector) (types.AnomalyResult, error) {
|
||||
m.mu.Lock()
|
||||
// ── Auto-calibration warmup ───────────────────────────────────────────
|
||||
if !m.calibrated && m.calibrationSize > 0 {
|
||||
if vec := vector.NormalizedVector; len(vec) > 0 {
|
||||
cp := make([]float64, len(vec))
|
||||
copy(cp, vec)
|
||||
m.calibrationBuf = append(m.calibrationBuf, cp)
|
||||
}
|
||||
if len(m.calibrationBuf) >= m.calibrationSize {
|
||||
m.medians, m.mads = computeMADStats(m.calibrationBuf)
|
||||
m.calibrated = true
|
||||
m.calibrationBuf = nil
|
||||
log.Printf("mad: auto-calibrated on %d vectors (%d features)",
|
||||
m.calibrationSize, len(m.medians))
|
||||
}
|
||||
if !m.calibrated {
|
||||
m.mu.Unlock()
|
||||
return m.scoreIdentity(vector), nil
|
||||
}
|
||||
}
|
||||
medians := m.medians
|
||||
mads := m.mads
|
||||
m.mu.Unlock()
|
||||
|
||||
// ── Scoring ───────────────────────────────────────────────────────────
|
||||
maxScore := 0.0
|
||||
for i, val := range vector.NormalizedVector {
|
||||
if i >= len(medians) || i >= len(mads) {
|
||||
break
|
||||
}
|
||||
// Stability floor: prevent explosive Z-scores for features with near-zero variance.
|
||||
// 1e-2 corresponds to 1% of the original baseline IQR.
|
||||
mad := math.Max(mads[i], 0.01)
|
||||
|
||||
// 1.4826 converts MAD to an estimator of standard deviation.
|
||||
score := math.Abs(val-medians[i]) / (1.4826 * mad)
|
||||
if score > maxScore {
|
||||
maxScore = score
|
||||
}
|
||||
}
|
||||
|
||||
return types.AnomalyResult{
|
||||
Timestamp: vector.Timestamp,
|
||||
Score: maxScore,
|
||||
IsAnomaly: maxScore > m.threshold,
|
||||
Confidence: math.Min(maxScore/math.Max(m.threshold, 1e-9), 1.0),
|
||||
Method: "MAD",
|
||||
}, nil
|
||||
}
|
||||
|
||||
// scoreIdentity provides a sane fallback (median=0, mad=1) for pre-scaled data.
|
||||
func (m *MADDetector) scoreIdentity(vector types.FeatureVector) types.AnomalyResult {
|
||||
maxScore := 0.0
|
||||
for _, val := range vector.NormalizedVector {
|
||||
score := math.Abs(val) / 0.6745 // 1/1.4826
|
||||
if score > maxScore {
|
||||
maxScore = score
|
||||
}
|
||||
}
|
||||
res := types.AnomalyResult{
|
||||
Timestamp: vector.Timestamp,
|
||||
Score: maxScore,
|
||||
IsAnomaly: maxScore > m.threshold,
|
||||
Confidence: math.Min(maxScore/math.Max(m.threshold, 1e-9), 1.0),
|
||||
Method: "MAD (warmup)",
|
||||
}
|
||||
if res.IsAnomaly {
|
||||
res.Details = "Detected during MAD auto-calibration warmup period (using identity prior)."
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
// ── calibration helper ────────────────────────────────────────────────────────
|
||||
|
||||
// computeMADStats returns per-feature median and MAD for a matrix of row vectors.
|
||||
// Both slices have length equal to the number of features (columns).
|
||||
func computeMADStats(rows [][]float64) (medians, mads []float64) {
|
||||
if len(rows) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
nFeatures := len(rows[0])
|
||||
medians = make([]float64, nFeatures)
|
||||
mads = make([]float64, nFeatures)
|
||||
|
||||
col := make([]float64, len(rows))
|
||||
devs := make([]float64, len(rows))
|
||||
for f := range nFeatures {
|
||||
for r, row := range rows {
|
||||
if f < len(row) {
|
||||
col[r] = row[f]
|
||||
}
|
||||
}
|
||||
med := median(col)
|
||||
medians[f] = med
|
||||
for r, v := range col {
|
||||
devs[r] = math.Abs(v - med)
|
||||
}
|
||||
mads[f] = median(devs)
|
||||
}
|
||||
return medians, mads
|
||||
}
|
||||
|
||||
// median returns the median of xs. xs is modified in-place (sorted).
|
||||
func median(xs []float64) float64 {
|
||||
n := len(xs)
|
||||
if n == 0 {
|
||||
return 0
|
||||
}
|
||||
sort.Float64s(xs)
|
||||
if n%2 == 1 {
|
||||
return xs[n/2]
|
||||
}
|
||||
return (xs[n/2-1] + xs[n/2]) / 2.0
|
||||
}
|
||||
114
internal/detect/mad_test.go
Normal file
114
internal/detect/mad_test.go
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
package detect
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"codeberg.org/pata1704/guenther/pkg/types"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestMADDetector_Score(t *testing.T) {
|
||||
detector := NewMADDetector(3.0, []float64{10.0}, []float64{1.0})
|
||||
|
||||
// 1. Score a normal value
|
||||
res, err := detector.Score(types.FeatureVector{
|
||||
Timestamp: time.Now(),
|
||||
NormalizedVector: []float64{11},
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
assert.False(t, res.IsAnomaly, "Value 11 should not be an anomaly")
|
||||
|
||||
// 2. Score an extreme outlier
|
||||
res, err = detector.Score(types.FeatureVector{
|
||||
Timestamp: time.Now(),
|
||||
NormalizedVector: []float64{100},
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, res.IsAnomaly, "Value 100 should be an anomaly")
|
||||
assert.Greater(t, res.Score, 3.0)
|
||||
}
|
||||
|
||||
func TestMADDetector_CalibrationStability(t *testing.T) {
|
||||
// 1. Create a detector that auto-calibrates on 100 idle vectors.
|
||||
detector := NewMADDetectorAutoCalibrate(3.5, 100)
|
||||
now := time.Now()
|
||||
|
||||
// 2. Feed 99 perfectly idle vectors.
|
||||
// They should all use "Identity" fallback and return low scores (or 0 if val is 0).
|
||||
for i := 0; i < 99; i++ {
|
||||
fv := types.FeatureVector{
|
||||
Timestamp: now.Add(time.Duration(i) * time.Second),
|
||||
NormalizedVector: []float64{0.0, 0.0},
|
||||
}
|
||||
res, err := detector.Score(fv)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 0.0, res.Score)
|
||||
assert.Contains(t, res.Method, "warmup")
|
||||
}
|
||||
|
||||
// 3. Feed the 100th vector. This triggers calibration.
|
||||
// Since all 100 vectors were 0, the learned medians will be 0 and mads will be 0.
|
||||
fv100 := types.FeatureVector{
|
||||
Timestamp: now.Add(100 * time.Second),
|
||||
NormalizedVector: []float64{0.0, 0.0},
|
||||
}
|
||||
res100, err := detector.Score(fv100)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 0.0, res100.Score)
|
||||
// After this call, mads should be [0.0, 0.0] but clamped to 0.01 during Score.
|
||||
|
||||
// 4. Feed the 101st vector: A "normal" burst (e.g. 1.0 baseline IQR).
|
||||
// Without the floor, this would be 1.0 / (1.48 * 0) -> infinity (clamped).
|
||||
// With the floor (0.01), it should be 1.0 / (1.4826 * 0.01) ≈ 67.45.
|
||||
fv101 := types.FeatureVector{
|
||||
Timestamp: now.Add(101 * time.Second),
|
||||
NormalizedVector: []float64{1.0, 0.0},
|
||||
}
|
||||
res101, err := detector.Score(fv101)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Check that the score is contained.
|
||||
// 1.0 / (1.4826 * 0.01) = 67.449
|
||||
assert.InDelta(t, 67.449, res101.Score, 0.1)
|
||||
assert.True(t, res101.IsAnomaly)
|
||||
assert.Equal(t, "MAD", res101.Method) // No longer "warmup"
|
||||
|
||||
// 5. Test with a very small variance but not 0.
|
||||
// Suppose learned MAD was 0.0001. Score for val=1.0 would be 1.0 / 0.000148... ≈ 6745.
|
||||
// Our floor (0.01) should still clamp this to 67.45.
|
||||
detector.mu.Lock()
|
||||
detector.mads = []float64{0.0001, 0.0}
|
||||
detector.medians = []float64{0.0, 0.0}
|
||||
detector.mu.Unlock()
|
||||
|
||||
resSmall, err := detector.Score(fv101)
|
||||
assert.NoError(t, err)
|
||||
assert.InDelta(t, 67.449, resSmall.Score, 0.1)
|
||||
}
|
||||
|
||||
func TestMADDetector_IdentityPrior(t *testing.T) {
|
||||
detector := NewMADDetectorAutoCalibrate(3.5, 10)
|
||||
|
||||
// Feature vector with a deviation of 2.0 baseline IQR.
|
||||
// Using identity prior (mad=1.0), the score should be:
|
||||
// score = |2.0| / (1.4826 * 1.0) = 2.0 / 1.4826 ≈ 1.3489
|
||||
// Wait, scoreIdentity uses 0.6745 directly: math.Abs(val) / 0.6745
|
||||
// 2.0 / 0.6745 ≈ 2.965
|
||||
fv := types.FeatureVector{
|
||||
NormalizedVector: []float64{2.0},
|
||||
}
|
||||
res, _ := detector.Score(fv)
|
||||
assert.InDelta(t, 2.965, res.Score, 0.1)
|
||||
assert.False(t, res.IsAnomaly) // 2.96 < 3.5
|
||||
|
||||
// Feature vector with deviation of 3.0.
|
||||
// score = 3.0 / 0.6745 ≈ 4.44
|
||||
fv2 := types.FeatureVector{
|
||||
NormalizedVector: []float64{3.0},
|
||||
}
|
||||
res2, _ := detector.Score(fv2)
|
||||
assert.InDelta(t, 4.44, res2.Score, 0.1)
|
||||
assert.True(t, res2.IsAnomaly)
|
||||
assert.Contains(t, res2.Details, "identity prior")
|
||||
}
|
||||
173
internal/detect/rrcf.go
Normal file
173
internal/detect/rrcf.go
Normal file
|
|
@ -0,0 +1,173 @@
|
|||
// Package detect provides anomaly detection algorithms and ensemble logic.
|
||||
package detect
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"math"
|
||||
"sync"
|
||||
|
||||
"codeberg.org/pata1704/guenther/pkg/types"
|
||||
"codeberg.org/pata1704/rrcf"
|
||||
)
|
||||
|
||||
// RRCFDetector wraps pkg/rrcf.Forest with the AnomalyDetector interface.
|
||||
//
|
||||
// Scoring strategy: score-then-insert (online streaming).
|
||||
// Each call to Score:
|
||||
// 1. Scores the point without inserting (ephemeral key – thread-safe).
|
||||
// 2. Inserts the point permanently so the forest stays fresh.
|
||||
type RRCFDetector struct {
|
||||
mu sync.Mutex
|
||||
forest *rrcf.Forest
|
||||
|
||||
thresholdPct float64
|
||||
numTrees int
|
||||
treeSize int
|
||||
warmup int
|
||||
counter int
|
||||
buf []types.FeatureVector
|
||||
|
||||
// Rolling score window for adaptive threshold calculation.
|
||||
// Uses a FIFO ring buffer; only scores after warmupDiscard are included.
|
||||
scoreWindow *ringBuffer
|
||||
warmupDiscard int // number of scores to discard after forest initialisation
|
||||
scored int // total scores seen (including discarded)
|
||||
}
|
||||
|
||||
// NewRRCFDetector creates an RRCFDetector.
|
||||
//
|
||||
// - numTrees: number of trees in the forest (200 recommended).
|
||||
// - treeSize: sliding-window capacity per tree (256 recommended).
|
||||
// - warmup: vectors to buffer before first Score (pass 0 for immediate start).
|
||||
// - thresholdPct: percentile of rolling score window used as threshold.
|
||||
// E.g. 0.65 means: flag as anomaly if score > 65th percentile of recent scores.
|
||||
//
|
||||
// Internal defaults:
|
||||
// - warmupDiscard = 10 (discard the first 10 scores; forest is not yet stable)
|
||||
// - scoreWindowMax = 60
|
||||
func NewRRCFDetector(numTrees, treeSize, warmup int, thresholdPct float64) *RRCFDetector {
|
||||
return &RRCFDetector{
|
||||
numTrees: numTrees,
|
||||
treeSize: treeSize,
|
||||
warmup: warmup,
|
||||
thresholdPct: thresholdPct,
|
||||
scoreWindow: newRingBuffer(60),
|
||||
warmupDiscard: 10,
|
||||
}
|
||||
}
|
||||
|
||||
// Fit seeds the forest from a slice of FeatureVectors.
|
||||
// It replaces any existing forest; the internal insert counter is reset.
|
||||
func (d *RRCFDetector) Fit(vectors []types.FeatureVector) error {
|
||||
if len(vectors) == 0 {
|
||||
return nil
|
||||
}
|
||||
dim := len(vectors[0].NormalizedVector)
|
||||
|
||||
d.mu.Lock()
|
||||
defer d.mu.Unlock()
|
||||
|
||||
d.forest = rrcf.NewForest(d.numTrees, dim, d.treeSize)
|
||||
d.counter = 0
|
||||
for _, v := range vectors {
|
||||
if err := d.forest.Insert(v.NormalizedVector, d.counter); err != nil {
|
||||
log.Printf("rrcf: fit insert: %v", err)
|
||||
continue
|
||||
}
|
||||
d.counter++
|
||||
}
|
||||
log.Printf("rrcf: forest seeded with %d points (trees=%d, treeSize=%d)",
|
||||
len(vectors), d.numTrees, d.treeSize)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Score returns an AnomalyResult for vector.
|
||||
// During the warmup phase (len(buf) < warmup) the vector is buffered and a
|
||||
// zero-score result is returned.
|
||||
func (d *RRCFDetector) Score(vector types.FeatureVector) (types.AnomalyResult, error) {
|
||||
d.mu.Lock()
|
||||
defer d.mu.Unlock()
|
||||
|
||||
// Lazy forest initialisation on the first Score call.
|
||||
if d.forest == nil {
|
||||
dim := len(vector.NormalizedVector)
|
||||
d.forest = rrcf.NewForest(d.numTrees, dim, d.treeSize)
|
||||
}
|
||||
|
||||
// Warmup buffering.
|
||||
if d.warmup > 0 && len(d.buf) < d.warmup {
|
||||
d.buf = append(d.buf, vector)
|
||||
if len(d.buf) == d.warmup {
|
||||
for _, v := range d.buf {
|
||||
_ = d.forest.Insert(v.NormalizedVector, d.counter)
|
||||
d.counter++
|
||||
}
|
||||
d.buf = nil
|
||||
log.Printf("rrcf: warmup complete (%d vectors)", d.warmup)
|
||||
}
|
||||
return types.AnomalyResult{
|
||||
Timestamp: vector.Timestamp,
|
||||
Score: 0,
|
||||
IsAnomaly: false,
|
||||
Method: "RRCF",
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Score via ephemeral insertion.
|
||||
score, err := d.forest.Score(vector.NormalizedVector)
|
||||
if err != nil {
|
||||
return types.AnomalyResult{}, fmt.Errorf("rrcf: %w", err)
|
||||
}
|
||||
|
||||
// Permanent streaming insert to keep the forest fresh.
|
||||
if err := d.forest.Insert(vector.NormalizedVector, d.counter); err != nil {
|
||||
log.Printf("rrcf: insert: %v", err)
|
||||
}
|
||||
d.counter++
|
||||
d.scored++
|
||||
|
||||
// Discard the first warmupDiscard scores: the forest is still settling
|
||||
// and scores are artificially high, which would anchor the threshold.
|
||||
if d.scored <= d.warmupDiscard {
|
||||
return types.AnomalyResult{
|
||||
Timestamp: vector.Timestamp,
|
||||
Score: score,
|
||||
IsAnomaly: false,
|
||||
Method: "RRCF",
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Update rolling score window (ring buffer).
|
||||
d.scoreWindow.push(score)
|
||||
|
||||
// Need at least 10 scores before making decisions.
|
||||
isAnomaly := false
|
||||
var threshold float64
|
||||
if d.scoreWindow.size >= 10 {
|
||||
threshold = d.rollingThreshold()
|
||||
isAnomaly = score > threshold
|
||||
}
|
||||
|
||||
confidence := 0.0
|
||||
if threshold > 1e-9 {
|
||||
confidence = math.Min(score/threshold, 1.0)
|
||||
}
|
||||
|
||||
return types.AnomalyResult{
|
||||
Timestamp: vector.Timestamp,
|
||||
Score: score,
|
||||
IsAnomaly: isAnomaly,
|
||||
Confidence: confidence,
|
||||
Method: "RRCF",
|
||||
}, nil
|
||||
}
|
||||
|
||||
// rollingThreshold returns the thresholdPct-quantile of the rolling score window.
|
||||
// Caller must hold d.mu.
|
||||
func (d *RRCFDetector) rollingThreshold() float64 {
|
||||
return d.scoreWindow.quantileVal(d.thresholdPct)
|
||||
}
|
||||
|
||||
// Update is a no-op for RRCF: insertion happens inside Score.
|
||||
func (d *RRCFDetector) Update(_ types.FeatureVector) error { return nil }
|
||||
299
internal/detect/scaling.go
Normal file
299
internal/detect/scaling.go
Normal file
|
|
@ -0,0 +1,299 @@
|
|||
package detect
|
||||
|
||||
import (
|
||||
"log"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"codeberg.org/pata1704/guenther/pkg/types"
|
||||
)
|
||||
|
||||
// ScalingLevel represents the current detector complexity level.
|
||||
type ScalingLevel int
|
||||
|
||||
const (
|
||||
LevelNormal ScalingLevel = iota // SEAD Ensemble (full accuracy)
|
||||
LevelHigh // COPOD (reduced complexity)
|
||||
LevelCritical // MAD (minimal overhead)
|
||||
)
|
||||
|
||||
// levelName maps ScalingLevel to a human-readable string for logging.
|
||||
var levelName = map[ScalingLevel]string{
|
||||
LevelNormal: "SEAD Ensemble (Normal)",
|
||||
LevelHigh: "COPOD (High Load)",
|
||||
LevelCritical: "MAD (Critical Load)",
|
||||
}
|
||||
|
||||
// ── SwitchableDetector ───────────────────────────────────────────────────────
|
||||
|
||||
// SwitchableDetector wraps a SEADDetector and allows runtime switching to
|
||||
// lighter-weight sub-detectors (COPOD, MAD) under high CPU load.
|
||||
//
|
||||
// State consistency guarantee: all base detectors are kept up-to-date
|
||||
// regardless of which one is currently active. This ensures a clean
|
||||
// transition back to SEAD without stale internal state.
|
||||
//
|
||||
// Update-deduplication contract:
|
||||
//
|
||||
// SEAD.Score() calls d.Score() on every base detector, which self-updates.
|
||||
// → no separate Update() call needed; doing so would double-count.
|
||||
// SEAD.Update() calls d.Update() on every base detector directly.
|
||||
// → used here when we need to advance inactive detectors
|
||||
// without scoring through SEAD.
|
||||
//
|
||||
// For LevelHigh / LevelCritical we call:
|
||||
//
|
||||
// s.ensemble.Update(vector) → advances MAD, RRCF variants via d.Update()
|
||||
// COPOD.Update() = COPOD.update() (buffer append only)
|
||||
// active.Score(vector) → scores + self-updates the active detector
|
||||
// (COPOD.Score calls update internally again)
|
||||
//
|
||||
// This means COPOD receives one Update() + one self-update from Score() per tick.
|
||||
// That is intentional: Update() appends to the sliding window buffer; Score()
|
||||
// computes the copula and then appends the scored point (score-then-insert).
|
||||
// The two operations are not idempotent and must both run for correct behaviour.
|
||||
// RRCF and MAD are updated via SEAD.Update() only; their Score() methods are
|
||||
// not called when inactive so they do not double-count.
|
||||
type SwitchableDetector struct {
|
||||
mu sync.RWMutex
|
||||
|
||||
ensemble *SEADDetector
|
||||
copod AnomalyDetector // may be nil if COPOD is not configured
|
||||
mad AnomalyDetector // may be nil if MAD is not configured
|
||||
|
||||
activeLevel ScalingLevel
|
||||
}
|
||||
|
||||
// NewSwitchableDetector creates a SwitchableDetector backed by the given
|
||||
// SEADDetector. COPOD and MAD sub-detectors are extracted from the ensemble
|
||||
// for direct access during high-load switching.
|
||||
//
|
||||
// If a sub-detector is not present in the ensemble, the corresponding field
|
||||
// is nil and Score() falls back to the ensemble for that level.
|
||||
func NewSwitchableDetector(ensemble *SEADDetector) *SwitchableDetector {
|
||||
return &SwitchableDetector{
|
||||
ensemble: ensemble,
|
||||
copod: ensemble.GetDetector("COPOD"),
|
||||
mad: ensemble.GetDetector("MAD"),
|
||||
activeLevel: LevelNormal,
|
||||
}
|
||||
}
|
||||
|
||||
// Fit trains all underlying detectors on the given baseline vectors.
|
||||
func (s *SwitchableDetector) Fit(vectors []types.FeatureVector) error {
|
||||
return s.ensemble.Fit(vectors)
|
||||
}
|
||||
|
||||
// Update advances the internal state of all base detectors without scoring.
|
||||
// Safe for concurrent use.
|
||||
func (s *SwitchableDetector) Update(vector types.FeatureVector) error {
|
||||
return s.ensemble.Update(vector)
|
||||
}
|
||||
|
||||
// Score returns an AnomalyResult from the currently active detector.
|
||||
//
|
||||
// All inactive detectors are kept current via SEAD.Update() so that
|
||||
// switching back to a heavier detector does not produce stale scores.
|
||||
// Safe for concurrent use.
|
||||
func (s *SwitchableDetector) Score(vector types.FeatureVector) (types.AnomalyResult, error) {
|
||||
s.mu.RLock()
|
||||
level := s.activeLevel
|
||||
s.mu.RUnlock()
|
||||
|
||||
// LevelNormal: SEAD.Score() handles everything internally.
|
||||
// It scores all base detectors (which self-update) and applies
|
||||
// MWU weight adaptation. No separate Update() needed.
|
||||
if level == LevelNormal {
|
||||
return s.ensemble.Score(vector)
|
||||
}
|
||||
|
||||
// LevelHigh / LevelCritical:
|
||||
// 1. Advance all base detectors via SEAD.Update() so inactive detectors
|
||||
// (MAD, RRCF variants for LevelHigh; RRCF, COPOD for LevelCritical)
|
||||
// maintain current state. SEAD weight adaptation is NOT performed here
|
||||
// because we are bypassing SEAD.Score().
|
||||
if err := s.ensemble.Update(vector); err != nil {
|
||||
// Non-fatal: log and continue. A single missed update is acceptable;
|
||||
// the detector will resync on the next tick.
|
||||
log.Printf("scaling: ensemble update error at level %s: %v", levelName[level], err)
|
||||
}
|
||||
|
||||
// 2. Score via the active sub-detector.
|
||||
// COPOD.Score() additionally self-updates (score-then-insert), which is
|
||||
// correct and complementary to the Update() call above (see type doc).
|
||||
// MAD.Update() internally calls Score(), so it is already current after
|
||||
// the SEAD.Update() call; MAD.Score() here is pure scoring only.
|
||||
switch level {
|
||||
case LevelHigh:
|
||||
if s.copod == nil {
|
||||
log.Printf("scaling: COPOD unavailable at LevelHigh, falling back to ensemble")
|
||||
return s.ensemble.Score(vector)
|
||||
}
|
||||
res, err := s.copod.Score(vector)
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
res.Method = "COPOD (High Load)"
|
||||
return res, nil
|
||||
|
||||
case LevelCritical:
|
||||
if s.mad == nil {
|
||||
log.Printf("scaling: MAD unavailable at LevelCritical, falling back to ensemble")
|
||||
return s.ensemble.Score(vector)
|
||||
}
|
||||
res, err := s.mad.Score(vector)
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
res.Method = "MAD (Critical Load)"
|
||||
return res, nil
|
||||
|
||||
default:
|
||||
return s.ensemble.Score(vector)
|
||||
}
|
||||
}
|
||||
|
||||
// Switch atomically changes the active detection level.
|
||||
// It is a no-op if the requested level equals the current level.
|
||||
// Safe for concurrent use.
|
||||
func (s *SwitchableDetector) Switch(level ScalingLevel) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
if s.activeLevel == level {
|
||||
return
|
||||
}
|
||||
log.Printf("[SCALING] %s → %s", levelName[s.activeLevel], levelName[level])
|
||||
s.activeLevel = level
|
||||
}
|
||||
|
||||
// ── ScalingController ────────────────────────────────────────────────────────
|
||||
|
||||
// ScalingController monitors CPU load and drives a SwitchableDetector through
|
||||
// its scaling levels (Normal → High → Critical and back).
|
||||
//
|
||||
// Level transitions follow a two-phase commit pattern:
|
||||
//
|
||||
// 1. A CPU measurement moves the desired level to a "pending" state.
|
||||
// 2. Only after the pending level has been stable for the configured
|
||||
// duration is Switch() called on the detector.
|
||||
//
|
||||
// This prevents rapid oscillation under bursty workloads.
|
||||
//
|
||||
// Hysteresis rules (in the dead-band between downThres and highThres):
|
||||
//
|
||||
// Critical → High (one step down, not straight to Normal)
|
||||
// High → High (stays until CPU drops below downThres)
|
||||
// Normal → Normal
|
||||
//
|
||||
// ScalingController is not safe for concurrent use. ObserveCPU must be
|
||||
// called from a single goroutine (the DetectionLayer's processing loop).
|
||||
type ScalingController struct {
|
||||
detector *SwitchableDetector
|
||||
|
||||
// Thresholds (CPU percent, 0–100)
|
||||
highThres float64
|
||||
critThres float64
|
||||
downThres float64
|
||||
|
||||
// Required stable duration before a level transition is committed.
|
||||
highDur time.Duration
|
||||
critDur time.Duration
|
||||
downDur time.Duration
|
||||
|
||||
// currentLevel is the level that has been committed to the detector.
|
||||
currentLevel ScalingLevel
|
||||
|
||||
// pendingLevel is the desired level based on recent CPU measurements.
|
||||
// It must remain stable for the corresponding duration before becoming current.
|
||||
pendingLevel ScalingLevel
|
||||
|
||||
// pendingStart is the time at which pendingLevel last changed.
|
||||
// The pending level is committed when time.Since(pendingStart) >= required duration.
|
||||
pendingStart time.Time
|
||||
}
|
||||
|
||||
// NewScalingController constructs a ScalingController.
|
||||
// Duration arguments are in seconds (float64 to match YAML config values).
|
||||
func NewScalingController(
|
||||
detector *SwitchableDetector,
|
||||
highThres, critThres, downThres float64,
|
||||
highDurSec, critDurSec, downDurSec float64,
|
||||
) *ScalingController {
|
||||
return &ScalingController{
|
||||
detector: detector,
|
||||
highThres: highThres,
|
||||
critThres: critThres,
|
||||
downThres: downThres,
|
||||
highDur: time.Duration(highDurSec * float64(time.Second)),
|
||||
critDur: time.Duration(critDurSec * float64(time.Second)),
|
||||
downDur: time.Duration(downDurSec * float64(time.Second)),
|
||||
currentLevel: LevelNormal,
|
||||
pendingLevel: LevelNormal,
|
||||
pendingStart: time.Now(), // explicit init avoids zero-time edge case
|
||||
}
|
||||
}
|
||||
|
||||
// ObserveCPU processes a single CPU measurement and, if warranted, triggers
|
||||
// a level switch on the underlying SwitchableDetector.
|
||||
//
|
||||
// Must be called from a single goroutine only (not safe for concurrent use).
|
||||
func (c *ScalingController) ObserveCPU(cpuPercent float64) {
|
||||
now := time.Now()
|
||||
|
||||
desired := c.desiredLevel(cpuPercent)
|
||||
|
||||
// Phase 1: desired level changed → restart the stability timer.
|
||||
if desired != c.pendingLevel {
|
||||
c.pendingLevel = desired
|
||||
c.pendingStart = now
|
||||
return
|
||||
}
|
||||
|
||||
// Phase 2: desired level has been stable – check if duration is met.
|
||||
if now.Sub(c.pendingStart) < c.durationFor(desired) {
|
||||
return
|
||||
}
|
||||
|
||||
if desired != c.currentLevel {
|
||||
c.currentLevel = desired
|
||||
c.detector.Switch(desired)
|
||||
}
|
||||
c.pendingStart = now
|
||||
}
|
||||
|
||||
// desiredLevel computes the target ScalingLevel for a given CPU measurement,
|
||||
// applying hysteresis in the dead-band between downThres and highThres.
|
||||
func (c *ScalingController) desiredLevel(cpuPercent float64) ScalingLevel {
|
||||
switch {
|
||||
case cpuPercent > c.critThres:
|
||||
return LevelCritical
|
||||
case cpuPercent > c.highThres:
|
||||
return LevelHigh
|
||||
case cpuPercent < c.downThres:
|
||||
return LevelNormal
|
||||
default:
|
||||
// Dead-band: degrade at most one step to avoid jumping straight
|
||||
// from Critical to Normal on a brief CPU dip.
|
||||
switch c.currentLevel {
|
||||
case LevelCritical:
|
||||
return LevelHigh
|
||||
case LevelHigh:
|
||||
return LevelHigh
|
||||
default:
|
||||
return LevelNormal
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// durationFor returns the required stable duration for a given target level.
|
||||
func (c *ScalingController) durationFor(level ScalingLevel) time.Duration {
|
||||
switch level {
|
||||
case LevelCritical:
|
||||
return c.critDur
|
||||
case LevelHigh:
|
||||
return c.highDur
|
||||
default:
|
||||
return c.downDur
|
||||
}
|
||||
}
|
||||
507
internal/detect/sead.go
Normal file
507
internal/detect/sead.go
Normal file
|
|
@ -0,0 +1,507 @@
|
|||
// Package detect provides anomaly detection algorithms and ensemble logic.
|
||||
package detect
|
||||
|
||||
// sead.go – SEAD: Unsupervised Ensemble of Streaming Anomaly Detectors
|
||||
//
|
||||
// Implementation of Algorithm 1 from:
|
||||
// Shah et al. "SEAD: Unsupervised Ensemble of Streaming Anomaly Detectors"
|
||||
// ICML 2025, Amazon Science.
|
||||
//
|
||||
// Core algorithm (Multiplicative Weights Update / FTRL with KL-divergence):
|
||||
//
|
||||
// 1. For each incoming feature vector x_t:
|
||||
// a. Score every base detector: s̃_i(t) = A_i(x_t)
|
||||
// b. Normalise to [0,1] via streaming quantile: s_i(t) = Q(s̃_i(t); history_i)
|
||||
// c. Compute softmax weights: p_i(t) = exp(w_i) / Σ exp(w_j)
|
||||
// d. Output combined score: S_t = Σ p_i(t) · s_i(t)
|
||||
// e. Update weights: w_i(t+1) = w_i(t) − η · ∂L_t/∂w_i
|
||||
// where L_t = S_t + λ · KL(p || π)
|
||||
// 2. Update each base detector: A_i(t+1) ← Update(A_i(t), x_t)
|
||||
//
|
||||
// Streaming quantiles are approximated via a fixed-capacity sorted circular
|
||||
// buffer (lightweight t-digest substitute). For N=4 detectors at 1 Hz this
|
||||
// is negligible memory and CPU overhead.
|
||||
//
|
||||
// SEAD runs parallel to the existing AVG/MAX/MEDIAN ensemble; it is selected
|
||||
// by setting detector.ensemble.method = "sead" in the config.
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"codeberg.org/pata1704/guenther/pkg/types"
|
||||
)
|
||||
|
||||
// ─── FIFO Ring Buffer ─────────────────────────────────────────────────────────
|
||||
|
||||
// ringBuffer is a fixed-capacity circular buffer with true FIFO eviction.
|
||||
//
|
||||
// Memory: O(cap · 8 bytes). For cap=500 this is 4 KB per detector
|
||||
type ringBuffer struct {
|
||||
data []float64
|
||||
head int // index of the next write position
|
||||
size int // current number of elements
|
||||
cap int
|
||||
}
|
||||
|
||||
func newRingBuffer(capacity int) *ringBuffer {
|
||||
if capacity < 10 {
|
||||
capacity = 10
|
||||
}
|
||||
return &ringBuffer{
|
||||
data: make([]float64, capacity),
|
||||
cap: capacity,
|
||||
}
|
||||
}
|
||||
|
||||
// push inserts v, overwriting the oldest entry when the buffer is full.
|
||||
// Returns the empirical quantile rank of v within the current window ∈ [0,1].
|
||||
func (r *ringBuffer) push(v float64) float64 {
|
||||
r.data[r.head] = v
|
||||
r.head = (r.head + 1) % r.cap
|
||||
if r.size < r.cap {
|
||||
r.size++
|
||||
}
|
||||
|
||||
n := r.size
|
||||
if n <= 1 {
|
||||
return 0.5
|
||||
}
|
||||
|
||||
sorted := make([]float64, n)
|
||||
for i := range n {
|
||||
sorted[i] = r.data[(r.head-n+i+r.cap)%r.cap]
|
||||
}
|
||||
sort.Float64s(sorted)
|
||||
|
||||
rank := sort.SearchFloat64s(sorted, v)
|
||||
return float64(rank) / float64(n-1)
|
||||
}
|
||||
|
||||
// quantileVal returns the value at quantile p ∈ [0,1] without modifying the buffer.
|
||||
func (r *ringBuffer) quantileVal(p float64) float64 {
|
||||
n := r.size
|
||||
if n == 0 {
|
||||
return 0
|
||||
}
|
||||
sorted := make([]float64, n)
|
||||
for i := range n {
|
||||
sorted[i] = r.data[(r.head-n+i+r.cap)%r.cap]
|
||||
}
|
||||
sort.Float64s(sorted)
|
||||
idx := int(p * float64(n-1))
|
||||
if idx >= n {
|
||||
idx = n - 1
|
||||
}
|
||||
return sorted[idx]
|
||||
}
|
||||
|
||||
// streamQuantile is an alias kept for API compatibility.
|
||||
// New code should use ringBuffer directly.
|
||||
type streamQuantile = ringBuffer
|
||||
|
||||
func newStreamQuantile(capacity int) *ringBuffer {
|
||||
return newRingBuffer(capacity)
|
||||
}
|
||||
|
||||
// ─── SEADDetector ─────────────────────────────────────────────────────────────
|
||||
|
||||
// SEADDetector implements the SEAD algorithm: an unsupervised online ensemble
|
||||
// that adaptively weights N base anomaly detectors using Multiplicative Weights
|
||||
// Update (MWU / FTRL with KL-divergence regulariser).
|
||||
//
|
||||
// Key properties:
|
||||
// - Fully unsupervised: no anomaly labels required.
|
||||
// - O(1) per time step: computational cost does not grow with stream length.
|
||||
// - Adaptive: detector weights shift as data distribution changes.
|
||||
// - Score-scale agnostic: all base scores are quantile-normalised to [0,1]
|
||||
// before aggregation, preventing any single detector from dominating due
|
||||
// to score magnitude differences.
|
||||
//
|
||||
// Configuration:
|
||||
// - eta (η): MWU learning rate. Larger → faster adaptation, more noise.
|
||||
// Recommended range: [0.05, 0.3]. Default: 0.1.
|
||||
// - lambda (λ): KL-divergence regularisation strength. 0 = pure MWU (uniform
|
||||
// prior). Positive values pull weights toward π (uniform). Default: 0.01.
|
||||
// - quantileWindow: number of past scores retained per detector for quantile
|
||||
// normalisation. Default: 300.
|
||||
// - contamination: expected anomaly fraction used to set the decision
|
||||
// threshold as quantile(combinedHistory, 1-contamination). Default: 0.15.
|
||||
// - minDataPoints: minimum scored windows before any anomaly is flagged.
|
||||
type SEADDetector struct {
|
||||
detectors []AnomalyDetector // N base detectors (MAD, RRCF, COPOD, IForest)
|
||||
names []string // human-readable name per detector
|
||||
|
||||
// MWU state
|
||||
weights []float64 // w_i (log-space, unconstrained)
|
||||
eta float64 // learning rate η
|
||||
lambda float64 // KL regularisation strength λ
|
||||
prior []float64 // π – uniform by default
|
||||
|
||||
// Streaming quantile per detector
|
||||
quantiles []*streamQuantile
|
||||
|
||||
// Combined score history for threshold computation
|
||||
// Uses a FIFO ring buffer (capacity: historySize) so every score lives
|
||||
// exactly historySize time steps, regardless of its magnitude.
|
||||
contamination float64
|
||||
combinedHistory *ringBuffer // FIFO ring buffer, capacity=1000
|
||||
minDataPoints int
|
||||
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
// SEADConfig holds all tunable parameters for the SEAD ensemble.
|
||||
type SEADConfig struct {
|
||||
// Eta is the MWU learning rate η.
|
||||
// Higher values react faster to distribution shifts but are noisier.
|
||||
// Recommended: 0.05–0.20. Default: 0.10.
|
||||
Eta float64
|
||||
|
||||
// Lambda is the KL-divergence regularisation strength.
|
||||
// 0 = pure MWU (no penalty for deviation from prior).
|
||||
// Positive values add stability; use 0.01–0.05.
|
||||
Lambda float64
|
||||
|
||||
// QuantileWindow is the number of past scores retained per detector.
|
||||
// Larger → more stable quantiles but slower adaptation.
|
||||
// Default: 300.
|
||||
QuantileWindow int
|
||||
|
||||
// Contamination is the expected anomaly fraction ∈ [0, 0.5).
|
||||
// Sets the decision threshold at quantile(1-contamination) of combined history.
|
||||
// Default: 0.15.
|
||||
Contamination float64
|
||||
|
||||
// MinDataPoints is the cold-start guard: anomalies are not flagged until
|
||||
// at least this many windows have been scored. Default: 20.
|
||||
MinDataPoints int
|
||||
}
|
||||
|
||||
// DefaultSEADConfig returns sensible defaults for the SEAD ensemble.
|
||||
func DefaultSEADConfig() SEADConfig {
|
||||
return SEADConfig{
|
||||
Eta: 0.10,
|
||||
Lambda: 0.01,
|
||||
QuantileWindow: 300,
|
||||
Contamination: 0.15,
|
||||
MinDataPoints: 20,
|
||||
}
|
||||
}
|
||||
|
||||
// NewSEADDetector constructs a SEAD ensemble from N base detectors.
|
||||
//
|
||||
// - detectors: slice of base AnomalyDetector implementations. Must be ≥ 1.
|
||||
// - names: human-readable labels for each detector (used in Details field).
|
||||
// - cfg: SEAD tuning parameters (use DefaultSEADConfig() for a safe start).
|
||||
func NewSEADDetector(
|
||||
detectors []AnomalyDetector,
|
||||
names []string,
|
||||
cfg SEADConfig,
|
||||
) (*SEADDetector, error) {
|
||||
n := len(detectors)
|
||||
if n == 0 {
|
||||
return nil, fmt.Errorf("sead: at least one base detector required")
|
||||
}
|
||||
if len(names) != n {
|
||||
return nil, fmt.Errorf("sead: names length %d must match detectors length %d", len(names), n)
|
||||
}
|
||||
|
||||
if cfg.Eta <= 0 {
|
||||
cfg.Eta = 0.10
|
||||
}
|
||||
if cfg.QuantileWindow <= 0 {
|
||||
cfg.QuantileWindow = 300
|
||||
}
|
||||
if cfg.Contamination <= 0 || cfg.Contamination >= 0.5 {
|
||||
cfg.Contamination = 0.15
|
||||
}
|
||||
if cfg.MinDataPoints <= 0 {
|
||||
cfg.MinDataPoints = 20
|
||||
}
|
||||
|
||||
// Uniform prior π = 1/N for all detectors.
|
||||
prior := make([]float64, n)
|
||||
for i := range prior {
|
||||
prior[i] = 1.0 / float64(n)
|
||||
}
|
||||
|
||||
// Initialise weights uniformly in log-space: w_i = 0 → softmax = 1/N.
|
||||
weights := make([]float64, n)
|
||||
|
||||
quantiles := make([]*streamQuantile, n)
|
||||
for i := range quantiles {
|
||||
quantiles[i] = newStreamQuantile(cfg.QuantileWindow)
|
||||
}
|
||||
|
||||
return &SEADDetector{
|
||||
detectors: detectors,
|
||||
names: names,
|
||||
weights: weights,
|
||||
eta: cfg.Eta,
|
||||
lambda: cfg.Lambda,
|
||||
prior: prior,
|
||||
quantiles: quantiles,
|
||||
contamination: cfg.Contamination,
|
||||
combinedHistory: newRingBuffer(1000),
|
||||
minDataPoints: cfg.MinDataPoints,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Fit seeds all base detectors from labelled-normal vectors.
|
||||
// SEAD itself has no training phase; only the base detectors are fitted.
|
||||
func (s *SEADDetector) Fit(vectors []types.FeatureVector) error {
|
||||
for i, d := range s.detectors {
|
||||
if err := d.Fit(vectors); err != nil {
|
||||
return fmt.Errorf("sead: fit detector %q: %w", s.names[i], err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Update propagates the feature vector to all base detectors.
|
||||
func (s *SEADDetector) Update(vector types.FeatureVector) error {
|
||||
for i, d := range s.detectors {
|
||||
if err := d.Update(vector); err != nil {
|
||||
return fmt.Errorf("sead: update detector %q: %w", s.names[i], err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Score implements Algorithm 1 from the SEAD paper.
|
||||
//
|
||||
// Steps:
|
||||
// 1. Score each base detector → raw scores s̃_i.
|
||||
// Each detector also self-updates its internal state (RRCF inserts
|
||||
// the point into the forest; COPOD appends to its copula buffer;
|
||||
// IForest adds to its retraining buffer; MAD buffers for calibration).
|
||||
// 2. Quantile-normalise each s̃_i to ŝ_i ∈ [0,1] via streaming window.
|
||||
// 3. Compute softmax weights p_i = exp(w_i) / Σ exp(w_j).
|
||||
// 4. Combined score S = Σ p_i · ŝ_i.
|
||||
// 5. Update weights: w_i -= η · ∂L/∂w_i
|
||||
// where L = S + λ · KL(p || π).
|
||||
// 6. Threshold S against rolling (1-contamination)-quantile of S history.
|
||||
func (s *SEADDetector) Score(vector types.FeatureVector) (types.AnomalyResult, error) {
|
||||
n := len(s.detectors)
|
||||
|
||||
// ── Step 1: Score all base detectors ──────────────────────────────────────
|
||||
// Each detector's Score method is responsible for self-updating (RRCF inserts
|
||||
// into its forest; COPOD appends to its copula buffer; etc.). We do NOT call
|
||||
// d.Update separately here to avoid double-counting in detectors that already
|
||||
// self-update inside Score.
|
||||
rawScores := make([]float64, n)
|
||||
anomalyFlags := make([]bool, n)
|
||||
for i, d := range s.detectors {
|
||||
res, err := d.Score(vector)
|
||||
if err != nil {
|
||||
// Degrade gracefully: treat failed detector as neutral (score=0.5).
|
||||
rawScores[i] = 0.5
|
||||
} else {
|
||||
rawScores[i] = res.Score
|
||||
anomalyFlags[i] = res.IsAnomaly
|
||||
}
|
||||
}
|
||||
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
// ── Step 2: Quantile-normalise scores to [0,1] ────────────────────────────
|
||||
normScores := make([]float64, n)
|
||||
for i, raw := range rawScores {
|
||||
normScores[i] = s.quantiles[i].push(raw)
|
||||
}
|
||||
|
||||
// ── Step 3: Softmax weights ───────────────────────────────────────────────
|
||||
p := softmax(s.weights)
|
||||
|
||||
// ── Step 4: Combined score ────────────────────────────────────────────────
|
||||
combined := 0.0
|
||||
for i := range p {
|
||||
combined += p[i] * normScores[i]
|
||||
}
|
||||
|
||||
// ── Step 5: Weight update (MWU gradient step) ─────────────────────────────
|
||||
// Loss L(w) = combined(w) + λ · KL(softmax(w) || π)
|
||||
// ∂L/∂w_i = p_i · (ŝ_i - combined) + λ · (p_i - π_i)
|
||||
//
|
||||
// This is the closed-form gradient for softmax + weighted sum + KL penalty.
|
||||
for i := range s.weights {
|
||||
gradCombined := p[i] * (normScores[i] - combined)
|
||||
gradKL := s.lambda * (p[i] - s.prior[i])
|
||||
s.weights[i] -= s.eta * (gradCombined + gradKL)
|
||||
}
|
||||
|
||||
// ── Step 6: Threshold decision ────────────────────────────────────────────
|
||||
// Use FIFO ring buffer: oldest score is evicted automatically after
|
||||
// 1000 time steps, giving the threshold a finite, sliding memory.
|
||||
s.combinedHistory.push(combined)
|
||||
threshold := s.combinedHistory.quantileVal(1.0 - s.contamination)
|
||||
isAnomaly := s.combinedHistory.size > s.minDataPoints && combined > threshold
|
||||
|
||||
confidence := 0.0
|
||||
if threshold > 1e-9 {
|
||||
confidence = math.Min(combined/threshold, 1.0)
|
||||
}
|
||||
|
||||
return types.AnomalyResult{
|
||||
Timestamp: vector.Timestamp,
|
||||
Score: combined,
|
||||
IsAnomaly: isAnomaly,
|
||||
Confidence: confidence,
|
||||
Method: "SEAD",
|
||||
Details: s.detailString(p, normScores, anomalyFlags),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// GetDetector returns a base detector by name. Returns nil if not found.
|
||||
func (s *SEADDetector) GetDetector(name string) AnomalyDetector {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
for i, n := range s.names {
|
||||
if n == name {
|
||||
return s.detectors[i]
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Weights returns a copy of the current softmax-normalised detector weights.
|
||||
// Useful for logging and diagnostics. Thread-safe.
|
||||
func (s *SEADDetector) Weights() []float64 {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
return softmax(s.weights)
|
||||
}
|
||||
|
||||
// WeightSummary returns a human-readable string of detector weights.
|
||||
func (s *SEADDetector) WeightSummary() string {
|
||||
w := s.Weights()
|
||||
var sb strings.Builder
|
||||
for i, name := range s.names {
|
||||
if i > 0 {
|
||||
sb.WriteString(" | ")
|
||||
}
|
||||
sb.WriteString(fmt.Sprintf("%s=%.3f", name, w[i]))
|
||||
}
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
// detailString builds a diagnostic annotation for AnomalyResult.Details.
|
||||
// Caller must hold s.mu.
|
||||
func (s *SEADDetector) detailString(p, normScores []float64, flags []bool) string {
|
||||
var parts []string
|
||||
for i, name := range s.names {
|
||||
flag := ""
|
||||
if flags[i] {
|
||||
flag = "!"
|
||||
}
|
||||
parts = append(parts, fmt.Sprintf("%s%s:w=%.2f,s=%.2f", name, flag, p[i], normScores[i]))
|
||||
}
|
||||
return strings.Join(parts, " ")
|
||||
}
|
||||
|
||||
// ─── Math helpers ─────────────────────────────────────────────────────────────
|
||||
|
||||
// softmax returns exp(w_i) / Σ exp(w_j) with numerical stability (max subtraction).
|
||||
func softmax(w []float64) []float64 {
|
||||
maxW := w[0]
|
||||
for _, v := range w[1:] {
|
||||
if v > maxW {
|
||||
maxW = v
|
||||
}
|
||||
}
|
||||
out := make([]float64, len(w))
|
||||
var sum float64
|
||||
for i, v := range w {
|
||||
out[i] = math.Exp(v - maxW)
|
||||
sum += out[i]
|
||||
}
|
||||
for i := range out {
|
||||
out[i] /= sum
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// ─── Factory helpers ──────────────────────────────────────────────────────────
|
||||
|
||||
// NewSEADWithAllDetectors constructs a SEAD ensemble from six base detectors:
|
||||
// MAD, RRCF-fast, RRCF-mid, RRCF-slow, COPOD, IsolationForest.
|
||||
//
|
||||
// SEAD's MWU weight-update naturally up-weights the variant that consistently
|
||||
// separates anomalies from normal windows, and adapts when the stream
|
||||
// distribution shifts (e.g. time-of-day effects).
|
||||
//
|
||||
// MAD auto-calibration: the MADDetector buffers the first madCalibSize
|
||||
// NormalizedVectors, derives per-feature median and MAD, and starts scoring
|
||||
// once calibration is complete. Calibration requires no external tooling.
|
||||
// SEAD down-weights MAD automatically during the warmup phase.
|
||||
func NewSEADWithAllDetectors(
|
||||
copodBufferSize int, copodThreshold float64,
|
||||
rrcfVariants RRCFVariantsConfig,
|
||||
madThreshold float64, madCalibSize int,
|
||||
seadCfg SEADConfig,
|
||||
) (*SEADDetector, error) {
|
||||
if rrcfVariants.Fast.NumTrees == 0 {
|
||||
rrcfVariants.Fast.NumTrees = 50
|
||||
}
|
||||
if rrcfVariants.Fast.TreeSize == 0 {
|
||||
rrcfVariants.Fast.TreeSize = 32
|
||||
}
|
||||
if rrcfVariants.Fast.ThresholdPercentile == 0 {
|
||||
rrcfVariants.Fast.ThresholdPercentile = 0.85
|
||||
}
|
||||
|
||||
if rrcfVariants.Mid.NumTrees == 0 {
|
||||
rrcfVariants.Mid.NumTrees = 150
|
||||
}
|
||||
if rrcfVariants.Mid.TreeSize == 0 {
|
||||
rrcfVariants.Mid.TreeSize = 64
|
||||
}
|
||||
if rrcfVariants.Mid.ThresholdPercentile == 0 {
|
||||
rrcfVariants.Mid.ThresholdPercentile = 0.85
|
||||
}
|
||||
|
||||
if rrcfVariants.Slow.NumTrees == 0 {
|
||||
rrcfVariants.Slow.NumTrees = 200
|
||||
}
|
||||
if rrcfVariants.Slow.TreeSize == 0 {
|
||||
rrcfVariants.Slow.TreeSize = 128
|
||||
}
|
||||
if rrcfVariants.Slow.ThresholdPercentile == 0 {
|
||||
rrcfVariants.Slow.ThresholdPercentile = 0.85
|
||||
}
|
||||
|
||||
// ── Construct base detectors ──────────────────────────────────────────────
|
||||
copod, err := NewCOPODDetector(copodBufferSize, copodThreshold)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("sead: copod: %w", err)
|
||||
}
|
||||
|
||||
rrcfFast := NewRRCFDetector(
|
||||
rrcfVariants.Fast.NumTrees, rrcfVariants.Fast.TreeSize,
|
||||
0, rrcfVariants.Fast.ThresholdPercentile,
|
||||
)
|
||||
rrcfMid := NewRRCFDetector(
|
||||
rrcfVariants.Mid.NumTrees, rrcfVariants.Mid.TreeSize,
|
||||
0, rrcfVariants.Mid.ThresholdPercentile,
|
||||
)
|
||||
rrcfSlow := NewRRCFDetector(
|
||||
rrcfVariants.Slow.NumTrees, rrcfVariants.Slow.TreeSize,
|
||||
0, rrcfVariants.Slow.ThresholdPercentile,
|
||||
)
|
||||
|
||||
if madCalibSize <= 0 {
|
||||
madCalibSize = 100
|
||||
}
|
||||
mad := NewMADDetectorAutoCalibrate(madThreshold, madCalibSize)
|
||||
|
||||
return NewSEADDetector(
|
||||
[]AnomalyDetector{mad, rrcfFast, rrcfMid, rrcfSlow, copod},
|
||||
[]string{"MAD", "RRCF-fast", "RRCF-mid", "RRCF-slow", "COPOD"},
|
||||
seadCfg,
|
||||
)
|
||||
}
|
||||
61
internal/detect/sead_test.go
Normal file
61
internal/detect/sead_test.go
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
package detect
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestRingBuffer_FIFO(t *testing.T) {
|
||||
// 1. Initialize with capacity 10
|
||||
rb := newRingBuffer(10)
|
||||
assert.Equal(t, 10, rb.cap)
|
||||
assert.Equal(t, 0, rb.size)
|
||||
|
||||
// 2. Fill it up
|
||||
for i := 1; i <= 10; i++ {
|
||||
rb.push(float64(i))
|
||||
}
|
||||
assert.Equal(t, 10, rb.size)
|
||||
// head should be at 0 after 10 pushes
|
||||
assert.Equal(t, 0, rb.head)
|
||||
|
||||
// 3. Verify quantile (sorted view)
|
||||
// sorted: [1 2 3 4 5 6 7 8 9 10]
|
||||
// quantile 0.5 (median) of 10 items: index int(0.5 * 9) = 4 -> value 5
|
||||
assert.Equal(t, 5.0, rb.quantileVal(0.5))
|
||||
|
||||
// 4. Push one more to trigger FIFO eviction
|
||||
// Should evict "1" (the oldest)
|
||||
rb.push(11.0)
|
||||
assert.Equal(t, 10, rb.size)
|
||||
assert.Equal(t, 1, rb.head)
|
||||
|
||||
// 5. Verify the oldest (1.0) is gone and 11.0 is present
|
||||
// sorted: [2 3 4 5 6 7 8 9 10 11]
|
||||
// idx = int(0.4 * 9) = 3 -> value at index 3 is 5.0
|
||||
assert.Equal(t, 5.0, rb.quantileVal(0.4))
|
||||
// let's be precise: idx = int(p * 9)
|
||||
// p=0 -> idx 0 (2.0)
|
||||
// p=1 -> idx 9 (11.0)
|
||||
assert.Equal(t, 2.0, rb.quantileVal(0.0))
|
||||
assert.Equal(t, 11.0, rb.quantileVal(1.0))
|
||||
}
|
||||
|
||||
func TestRingBuffer_Rank(t *testing.T) {
|
||||
rb := newRingBuffer(5)
|
||||
|
||||
// Rank is float64(rank) / float64(n-1)
|
||||
assert.Equal(t, 0.5, rb.push(10.0)) // n=1 -> 0.5
|
||||
assert.Equal(t, 1.0, rb.push(20.0)) // n=2, sorted=[10, 20], search(20)->1. 1/(2-1)=1.0
|
||||
assert.Equal(t, 0.0, rb.push(5.0)) // n=3, sorted=[5, 10, 20], search(5)->0. 0/2=0.0
|
||||
|
||||
// n=4, sorted=[5 10 10 20], search(10) -> idx 1. 1/(4-1) = 0.333...
|
||||
assert.InDelta(t, 0.3333333333333333, rb.push(10.0), 1e-9)
|
||||
|
||||
rb = newRingBuffer(4)
|
||||
rb.push(1.0)
|
||||
rb.push(3.0)
|
||||
rank := rb.push(2.0) // n=3, sorted=[1, 2, 3], search(2)->idx 1. 1/(3-1)=0.5
|
||||
assert.Equal(t, 0.5, rank)
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue