commit for version used in evaluation of thesis
This commit is contained in:
commit
72635dc7b9
27 changed files with 6084 additions and 0 deletions
203
internal/config/config.go
Normal file
203
internal/config/config.go
Normal file
|
|
@ -0,0 +1,203 @@
|
|||
// Package config provides the pipeline configuration loaded from YAML.
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"regexp"
|
||||
"time"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// MaskingPattern is a single entry in drain.masking_patterns.
|
||||
type MaskingPattern struct {
|
||||
Name string `yaml:"name"`
|
||||
Pattern string `yaml:"pattern"`
|
||||
Replace string `yaml:"replace"`
|
||||
Type string `yaml:"type"`
|
||||
Re *regexp.Regexp
|
||||
}
|
||||
|
||||
// MADConfig defines parameters for the MAD detector.
|
||||
type MADConfig struct {
|
||||
// Threshold is the modified Z-score cutoff for IsAnomaly.
|
||||
// Recommended: 3.0–4.0. Default: 3.5.
|
||||
Threshold float64 `yaml:"threshold"`
|
||||
// CalibrationSize is the number of NormalizedVectors to buffer before
|
||||
// automatic per-feature median/MAD calibration runs.
|
||||
// Default (if 0): 100.
|
||||
CalibrationSize int `yaml:"calibration_size"`
|
||||
}
|
||||
|
||||
// COPODConfig defines the parameters for the Copula-Based Outlier detector.
|
||||
type COPODConfig struct {
|
||||
Threshold float64 `yaml:"threshold"`
|
||||
BufferSize int `yaml:"buffer_size"`
|
||||
}
|
||||
|
||||
// RRCFConfig defines the parameters for the Robust Random Cut Forest detector.
|
||||
// Used for the standalone RRCF detector and the classic AVG/MAX/MEDIAN ensemble paths.
|
||||
type RRCFConfig struct {
|
||||
NumTrees int `yaml:"num_trees"`
|
||||
TreeSize int `yaml:"tree_size"`
|
||||
ThresholdPercentile float64 `yaml:"threshold_percentile"`
|
||||
}
|
||||
|
||||
// RRCFVariantConfig holds parameters for a single named RRCF variant inside
|
||||
// the SEAD multi-horizon ensemble.
|
||||
type RRCFVariantConfig struct {
|
||||
// NumTrees controls score stability: more trees → smoother/conservative.
|
||||
NumTrees int `yaml:"num_trees"`
|
||||
// TreeSize sets the sliding-window capacity per tree.
|
||||
TreeSize int `yaml:"tree_size"`
|
||||
// ThresholdPercentile is the per-model decision threshold (standalone use).
|
||||
ThresholdPercentile float64 `yaml:"threshold_percentile"`
|
||||
}
|
||||
|
||||
// RRCFVariantsConfig groups the three RRCF variants used by the SEAD ensemble.
|
||||
// Each variant captures anomalies at a different time-horizon:
|
||||
// - Fast: short memory, reactive to transient spikes
|
||||
// - Mid: medium memory, balanced sensitivity
|
||||
// - Slow: long memory, detects sustained / slow-drift events
|
||||
type RRCFVariantsConfig struct {
|
||||
Fast RRCFVariantConfig `yaml:"fast"`
|
||||
Mid RRCFVariantConfig `yaml:"mid"`
|
||||
Slow RRCFVariantConfig `yaml:"slow"`
|
||||
}
|
||||
|
||||
// SEADConfig holds tunable parameters for the SEAD ensemble.
|
||||
// Only used when EnsembleConfig.Method == "sead".
|
||||
type SEADConfig struct {
|
||||
// Eta is the MWU learning rate η ∈ (0, 1].
|
||||
// Higher values react faster to distribution shifts but are noisier.
|
||||
// Recommended: 0.05–0.20. Default (if 0): 0.10.
|
||||
Eta float64 `yaml:"eta"`
|
||||
|
||||
// Lambda is the KL-divergence regularisation strength.
|
||||
// 0 = pure MWU (uniform prior). Recommended: 0.0–0.05. Default: 0.01.
|
||||
Lambda float64 `yaml:"lambda"`
|
||||
|
||||
// QuantileWindow is the number of past scores retained per detector for
|
||||
// streaming quantile normalisation. Default (if 0): 300.
|
||||
QuantileWindow int `yaml:"quantile_window"`
|
||||
|
||||
// MinDataPoints is the cold-start guard: no anomaly is flagged until at
|
||||
// least this many windows have been scored. Default (if 0): 20.
|
||||
MinDataPoints int `yaml:"min_data_points"`
|
||||
}
|
||||
|
||||
// EnsembleConfig manages the routing for the multi-model detector.
|
||||
type EnsembleConfig struct {
|
||||
Enabled bool `yaml:"enabled"`
|
||||
|
||||
// Method selects the score-aggregation strategy.
|
||||
// Allowed values: "avg" (default), "max", "median", "sead".
|
||||
// "sead": adaptive Multiplicative Weights Update ensemble (Shah et al., ICML 2025).
|
||||
Method string `yaml:"method"`
|
||||
|
||||
// Contamination is the expected fraction of anomalous windows ∈ [0, 0.5).
|
||||
// Determines the decision threshold as quantile(1-contamination) of
|
||||
// the rolling combined score history.
|
||||
Contamination float64 `yaml:"contamination"`
|
||||
|
||||
// SEAD tuning parameters (only applied when Method == "sead").
|
||||
SEAD SEADConfig `yaml:"sead"`
|
||||
}
|
||||
|
||||
// AutoScalingConfig holds thresholds and durations for dynamic detector switching.
|
||||
type AutoScalingConfig struct {
|
||||
Enabled bool `yaml:"enabled"`
|
||||
HighThreshold float64 `yaml:"high_threshold"` // e.g. 0.75 (Normal -> High)
|
||||
CritThreshold float64 `yaml:"critical_threshold"` // e.g. 0.90 (High -> Critical)
|
||||
HighDuration float64 `yaml:"high_duration"` // e.g. 30.0 (seconds)
|
||||
CritDuration float64 `yaml:"critical_duration"` // e.g. 15.0 (seconds)
|
||||
DownThreshold float64 `yaml:"down_threshold"` // e.g. 0.50 (back to Normal)
|
||||
DownDuration float64 `yaml:"down_duration"` // e.g. 60.0 (seconds)
|
||||
}
|
||||
|
||||
// DetectorConfig groups all anomaly detection configurations.
|
||||
type DetectorConfig struct {
|
||||
Method string `yaml:"method"`
|
||||
Ensemble EnsembleConfig `yaml:"ensemble"`
|
||||
MAD MADConfig `yaml:"mad"`
|
||||
COPOD COPODConfig `yaml:"copod"`
|
||||
// RRCF is used by the standalone detector and the AVG/MAX/MEDIAN ensemble paths.
|
||||
RRCF RRCFConfig `yaml:"rrcf"`
|
||||
// RRCFVariants configures the three-horizon RRCF instances for the SEAD ensemble.
|
||||
// Defaults are applied automatically when fields are zero.
|
||||
RRCFVariants RRCFVariantsConfig `yaml:"rrcf_variants"`
|
||||
AutoScaling AutoScalingConfig `yaml:"auto_scaling"`
|
||||
}
|
||||
|
||||
// Config is the top-level pipeline configuration.
|
||||
type Config struct {
|
||||
Ingestion struct {
|
||||
LogPath string `yaml:"log_path"`
|
||||
NetInterface string `yaml:"net_interface"`
|
||||
DiskDevice string `yaml:"disk_device"`
|
||||
SystemctlServices []string `yaml:"systemctl_services"`
|
||||
} `yaml:"ingestion"`
|
||||
|
||||
Transformation struct {
|
||||
WindowSize time.Duration `yaml:"window_size"`
|
||||
DbPath string `yaml:"db_path"`
|
||||
} `yaml:"transformation"`
|
||||
|
||||
Drain struct {
|
||||
Depth int `yaml:"depth"`
|
||||
SimThreshold float64 `yaml:"sim_threshold"`
|
||||
MaxChildren int `yaml:"max_children"`
|
||||
MaxClusters int `yaml:"max_clusters"`
|
||||
MaskingPatterns []MaskingPattern `yaml:"masking_patterns"`
|
||||
} `yaml:"drain"`
|
||||
|
||||
Detection DetectorConfig `yaml:"detector"`
|
||||
|
||||
Output struct {
|
||||
FeatureLogPath string `yaml:"feature_log_path"`
|
||||
AnomalyLogPath string `yaml:"anomaly_log_path"`
|
||||
} `yaml:"output"`
|
||||
}
|
||||
|
||||
// LoadConfig reads and decodes the YAML file at path.
|
||||
func LoadConfig(path string) (*Config, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("config: open %q: %w", path, err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var cfg Config
|
||||
dec := yaml.NewDecoder(f)
|
||||
dec.KnownFields(false)
|
||||
if err := dec.Decode(&cfg); err != nil {
|
||||
return nil, fmt.Errorf("config: decode %q: %w", path, err)
|
||||
}
|
||||
return &cfg, nil
|
||||
}
|
||||
|
||||
// Compile compiles all MaskingPattern.Pattern strings into *regexp.Regexp.
|
||||
func (c *Config) Compile() error {
|
||||
for i := range c.Drain.MaskingPatterns {
|
||||
mp := &c.Drain.MaskingPatterns[i]
|
||||
re, err := regexp.Compile(mp.Pattern)
|
||||
if err != nil {
|
||||
return fmt.Errorf("config: compile pattern %q: %w", mp.Name, err)
|
||||
}
|
||||
mp.Re = re
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// NumericPatternNames returns the ordered list of MaskingPattern names whose
|
||||
// Type is "float" or "int".
|
||||
func (c *Config) NumericPatternNames() []string {
|
||||
names := make([]string, 0, len(c.Drain.MaskingPatterns))
|
||||
for _, mp := range c.Drain.MaskingPatterns {
|
||||
if mp.Name != "" && (mp.Type == "float" || mp.Type == "int") {
|
||||
names = append(names, mp.Name)
|
||||
}
|
||||
}
|
||||
return names
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue